aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2014-06-11 13:45:14 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2014-06-11 13:45:14 -0400
commit2840c566e95599cd60c7143762ca8b49d9395050 (patch)
treee2bc9e5a65e613c00ac9fca63d83d26458355bdf
parent859862ddd2b6b8dee00498c015ab37f02474b442 (diff)
parent19ef1229bc2e2468bdf4ea594a57e4287ffa1e6b (diff)
Merge branch 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/jack/linux-fs
Pull reiserfs and ext3 changes from Jan Kara: "Big reiserfs cleanup from Jeff, an ext3 deadlock fix, and some small cleanups" * 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/jack/linux-fs: (34 commits) reiserfs: Fix compilation breakage with CONFIG_REISERFS_CHECK ext3: Fix deadlock in data=journal mode when fs is frozen reiserfs: call truncate_setsize under tailpack mutex fs/jbd/revoke.c: replace shift loop by ilog2 reiserfs: remove obsolete __constant_cpu_to_le32 reiserfs: balance_leaf refactor, split up balance_leaf_when_delete reiserfs: balance_leaf refactor, format balance_leaf_finish_node reiserfs: balance_leaf refactor, format balance_leaf_new_nodes_paste reiserfs: balance_leaf refactor, format balance_leaf_paste_right reiserfs: balance_leaf refactor, format balance_leaf_insert_right reiserfs: balance_leaf refactor, format balance_leaf_paste_left reiserfs: balance_leaf refactor, format balance_leaf_insert_left reiserfs: balance_leaf refactor, pull out balance_leaf{left, right, new_nodes, finish_node} reiserfs: balance_leaf refactor, pull out balance_leaf_finish_node_paste reiserfs: balance_leaf refactor pull out balance_leaf_finish_node_insert reiserfs: balance_leaf refactor, pull out balance_leaf_new_nodes_paste reiserfs: balance_leaf refactor, pull out balance_leaf_new_nodes_insert reiserfs: balance_leaf refactor, pull out balance_leaf_paste_right reiserfs: balance_leaf refactor, pull out balance_leaf_insert_right reiserfs: balance_leaf refactor, pull out balance_leaf_paste_left ...
-rw-r--r--fs/ext3/inode.c33
-rw-r--r--fs/jbd/revoke.c12
-rw-r--r--fs/reiserfs/bitmap.c259
-rw-r--r--fs/reiserfs/dir.c156
-rw-r--r--fs/reiserfs/do_balan.c2449
-rw-r--r--fs/reiserfs/file.c90
-rw-r--r--fs/reiserfs/fix_node.c1008
-rw-r--r--fs/reiserfs/hashes.c15
-rw-r--r--fs/reiserfs/ibalance.c271
-rw-r--r--fs/reiserfs/inode.c1206
-rw-r--r--fs/reiserfs/ioctl.c27
-rw-r--r--fs/reiserfs/item_ops.c108
-rw-r--r--fs/reiserfs/journal.c1339
-rw-r--r--fs/reiserfs/lbalance.c501
-rw-r--r--fs/reiserfs/namei.c513
-rw-r--r--fs/reiserfs/objectid.c101
-rw-r--r--fs/reiserfs/prints.c176
-rw-r--r--fs/reiserfs/reiserfs.h1921
-rw-r--r--fs/reiserfs/resize.c75
-rw-r--r--fs/reiserfs/stree.c884
-rw-r--r--fs/reiserfs/super.c552
-rw-r--r--fs/reiserfs/tail_conversion.c161
-rw-r--r--fs/reiserfs/xattr.c70
-rw-r--r--fs/reiserfs/xattr.h3
-rw-r--r--fs/reiserfs/xattr_acl.c38
25 files changed, 7052 insertions, 4916 deletions
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index f5157d0d1b43..695abe738a24 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -1716,17 +1716,17 @@ static int ext3_journalled_writepage(struct page *page,
1716 WARN_ON_ONCE(IS_RDONLY(inode) && 1716 WARN_ON_ONCE(IS_RDONLY(inode) &&
1717 !(EXT3_SB(inode->i_sb)->s_mount_state & EXT3_ERROR_FS)); 1717 !(EXT3_SB(inode->i_sb)->s_mount_state & EXT3_ERROR_FS));
1718 1718
1719 if (ext3_journal_current_handle())
1720 goto no_write;
1721
1722 trace_ext3_journalled_writepage(page); 1719 trace_ext3_journalled_writepage(page);
1723 handle = ext3_journal_start(inode, ext3_writepage_trans_blocks(inode));
1724 if (IS_ERR(handle)) {
1725 ret = PTR_ERR(handle);
1726 goto no_write;
1727 }
1728
1729 if (!page_has_buffers(page) || PageChecked(page)) { 1720 if (!page_has_buffers(page) || PageChecked(page)) {
1721 if (ext3_journal_current_handle())
1722 goto no_write;
1723
1724 handle = ext3_journal_start(inode,
1725 ext3_writepage_trans_blocks(inode));
1726 if (IS_ERR(handle)) {
1727 ret = PTR_ERR(handle);
1728 goto no_write;
1729 }
1730 /* 1730 /*
1731 * It's mmapped pagecache. Add buffers and journal it. There 1731 * It's mmapped pagecache. Add buffers and journal it. There
1732 * doesn't seem much point in redirtying the page here. 1732 * doesn't seem much point in redirtying the page here.
@@ -1749,17 +1749,18 @@ static int ext3_journalled_writepage(struct page *page,
1749 atomic_set(&EXT3_I(inode)->i_datasync_tid, 1749 atomic_set(&EXT3_I(inode)->i_datasync_tid,
1750 handle->h_transaction->t_tid); 1750 handle->h_transaction->t_tid);
1751 unlock_page(page); 1751 unlock_page(page);
1752 err = ext3_journal_stop(handle);
1753 if (!ret)
1754 ret = err;
1752 } else { 1755 } else {
1753 /* 1756 /*
1754 * It may be a page full of checkpoint-mode buffers. We don't 1757 * It is a page full of checkpoint-mode buffers. Go and write
1755 * really know unless we go poke around in the buffer_heads. 1758 * them. They should have been already mapped when they went
1756 * But block_write_full_page will do the right thing. 1759 * to the journal so provide NULL get_block function to catch
1760 * errors.
1757 */ 1761 */
1758 ret = block_write_full_page(page, ext3_get_block, wbc); 1762 ret = block_write_full_page(page, NULL, wbc);
1759 } 1763 }
1760 err = ext3_journal_stop(handle);
1761 if (!ret)
1762 ret = err;
1763out: 1764out:
1764 return ret; 1765 return ret;
1765 1766
diff --git a/fs/jbd/revoke.c b/fs/jbd/revoke.c
index 25c713e7071c..8898bbd2b61e 100644
--- a/fs/jbd/revoke.c
+++ b/fs/jbd/revoke.c
@@ -231,19 +231,15 @@ record_cache_failure:
231 231
232static struct jbd_revoke_table_s *journal_init_revoke_table(int hash_size) 232static struct jbd_revoke_table_s *journal_init_revoke_table(int hash_size)
233{ 233{
234 int shift = 0; 234 int i;
235 int tmp = hash_size;
236 struct jbd_revoke_table_s *table; 235 struct jbd_revoke_table_s *table;
237 236
238 table = kmem_cache_alloc(revoke_table_cache, GFP_KERNEL); 237 table = kmem_cache_alloc(revoke_table_cache, GFP_KERNEL);
239 if (!table) 238 if (!table)
240 goto out; 239 goto out;
241 240
242 while((tmp >>= 1UL) != 0UL)
243 shift++;
244
245 table->hash_size = hash_size; 241 table->hash_size = hash_size;
246 table->hash_shift = shift; 242 table->hash_shift = ilog2(hash_size);
247 table->hash_table = 243 table->hash_table =
248 kmalloc(hash_size * sizeof(struct list_head), GFP_KERNEL); 244 kmalloc(hash_size * sizeof(struct list_head), GFP_KERNEL);
249 if (!table->hash_table) { 245 if (!table->hash_table) {
@@ -252,8 +248,8 @@ static struct jbd_revoke_table_s *journal_init_revoke_table(int hash_size)
252 goto out; 248 goto out;
253 } 249 }
254 250
255 for (tmp = 0; tmp < hash_size; tmp++) 251 for (i = 0; i < hash_size; i++)
256 INIT_LIST_HEAD(&table->hash_table[tmp]); 252 INIT_LIST_HEAD(&table->hash_table[i]);
257 253
258out: 254out:
259 return table; 255 return table;
diff --git a/fs/reiserfs/bitmap.c b/fs/reiserfs/bitmap.c
index 1bcffeab713c..dc198bc64c61 100644
--- a/fs/reiserfs/bitmap.c
+++ b/fs/reiserfs/bitmap.c
@@ -50,8 +50,10 @@ static inline void get_bit_address(struct super_block *s,
50 unsigned int *bmap_nr, 50 unsigned int *bmap_nr,
51 unsigned int *offset) 51 unsigned int *offset)
52{ 52{
53 /* It is in the bitmap block number equal to the block 53 /*
54 * number divided by the number of bits in a block. */ 54 * It is in the bitmap block number equal to the block
55 * number divided by the number of bits in a block.
56 */
55 *bmap_nr = block >> (s->s_blocksize_bits + 3); 57 *bmap_nr = block >> (s->s_blocksize_bits + 3);
56 /* Within that bitmap block it is located at bit offset *offset. */ 58 /* Within that bitmap block it is located at bit offset *offset. */
57 *offset = block & ((s->s_blocksize << 3) - 1); 59 *offset = block & ((s->s_blocksize << 3) - 1);
@@ -71,10 +73,12 @@ int is_reusable(struct super_block *s, b_blocknr_t block, int bit_value)
71 73
72 get_bit_address(s, block, &bmap, &offset); 74 get_bit_address(s, block, &bmap, &offset);
73 75
74 /* Old format filesystem? Unlikely, but the bitmaps are all up front so 76 /*
75 * we need to account for it. */ 77 * Old format filesystem? Unlikely, but the bitmaps are all
78 * up front so we need to account for it.
79 */
76 if (unlikely(test_bit(REISERFS_OLD_FORMAT, 80 if (unlikely(test_bit(REISERFS_OLD_FORMAT,
77 &(REISERFS_SB(s)->s_properties)))) { 81 &REISERFS_SB(s)->s_properties))) {
78 b_blocknr_t bmap1 = REISERFS_SB(s)->s_sbh->b_blocknr + 1; 82 b_blocknr_t bmap1 = REISERFS_SB(s)->s_sbh->b_blocknr + 1;
79 if (block >= bmap1 && 83 if (block >= bmap1 &&
80 block <= bmap1 + bmap_count) { 84 block <= bmap1 + bmap_count) {
@@ -108,8 +112,11 @@ int is_reusable(struct super_block *s, b_blocknr_t block, int bit_value)
108 return 1; 112 return 1;
109} 113}
110 114
111/* searches in journal structures for a given block number (bmap, off). If block 115/*
112 is found in reiserfs journal it suggests next free block candidate to test. */ 116 * Searches in journal structures for a given block number (bmap, off).
117 * If block is found in reiserfs journal it suggests next free block
118 * candidate to test.
119 */
113static inline int is_block_in_journal(struct super_block *s, unsigned int bmap, 120static inline int is_block_in_journal(struct super_block *s, unsigned int bmap,
114 int off, int *next) 121 int off, int *next)
115{ 122{
@@ -120,7 +127,7 @@ static inline int is_block_in_journal(struct super_block *s, unsigned int bmap,
120 *next = tmp; 127 *next = tmp;
121 PROC_INFO_INC(s, scan_bitmap.in_journal_hint); 128 PROC_INFO_INC(s, scan_bitmap.in_journal_hint);
122 } else { 129 } else {
123 (*next) = off + 1; /* inc offset to avoid looping. */ 130 (*next) = off + 1; /* inc offset to avoid looping. */
124 PROC_INFO_INC(s, scan_bitmap.in_journal_nohint); 131 PROC_INFO_INC(s, scan_bitmap.in_journal_nohint);
125 } 132 }
126 PROC_INFO_INC(s, scan_bitmap.retry); 133 PROC_INFO_INC(s, scan_bitmap.retry);
@@ -129,8 +136,10 @@ static inline int is_block_in_journal(struct super_block *s, unsigned int bmap,
129 return 0; 136 return 0;
130} 137}
131 138
132/* it searches for a window of zero bits with given minimum and maximum lengths in one bitmap 139/*
133 * block; */ 140 * Searches for a window of zero bits with given minimum and maximum
141 * lengths in one bitmap block
142 */
134static int scan_bitmap_block(struct reiserfs_transaction_handle *th, 143static int scan_bitmap_block(struct reiserfs_transaction_handle *th,
135 unsigned int bmap_n, int *beg, int boundary, 144 unsigned int bmap_n, int *beg, int boundary,
136 int min, int max, int unfm) 145 int min, int max, int unfm)
@@ -145,10 +154,6 @@ static int scan_bitmap_block(struct reiserfs_transaction_handle *th,
145 RFALSE(bmap_n >= reiserfs_bmap_count(s), "Bitmap %u is out of " 154 RFALSE(bmap_n >= reiserfs_bmap_count(s), "Bitmap %u is out of "
146 "range (0..%u)", bmap_n, reiserfs_bmap_count(s) - 1); 155 "range (0..%u)", bmap_n, reiserfs_bmap_count(s) - 1);
147 PROC_INFO_INC(s, scan_bitmap.bmap); 156 PROC_INFO_INC(s, scan_bitmap.bmap);
148/* this is unclear and lacks comments, explain how journal bitmaps
149 work here for the reader. Convey a sense of the design here. What
150 is a window? */
151/* - I mean `a window of zero bits' as in description of this function - Zam. */
152 157
153 if (!bi) { 158 if (!bi) {
154 reiserfs_error(s, "jdm-4055", "NULL bitmap info pointer " 159 reiserfs_error(s, "jdm-4055", "NULL bitmap info pointer "
@@ -161,18 +166,21 @@ static int scan_bitmap_block(struct reiserfs_transaction_handle *th,
161 return 0; 166 return 0;
162 167
163 while (1) { 168 while (1) {
164 cont: 169cont:
165 if (bi->free_count < min) { 170 if (bi->free_count < min) {
166 brelse(bh); 171 brelse(bh);
167 return 0; // No free blocks in this bitmap 172 return 0; /* No free blocks in this bitmap */
168 } 173 }
169 174
170 /* search for a first zero bit -- beginning of a window */ 175 /* search for a first zero bit -- beginning of a window */
171 *beg = reiserfs_find_next_zero_le_bit 176 *beg = reiserfs_find_next_zero_le_bit
172 ((unsigned long *)(bh->b_data), boundary, *beg); 177 ((unsigned long *)(bh->b_data), boundary, *beg);
173 178
174 if (*beg + min > boundary) { /* search for a zero bit fails or the rest of bitmap block 179 /*
175 * cannot contain a zero window of minimum size */ 180 * search for a zero bit fails or the rest of bitmap block
181 * cannot contain a zero window of minimum size
182 */
183 if (*beg + min > boundary) {
176 brelse(bh); 184 brelse(bh);
177 return 0; 185 return 0;
178 } 186 }
@@ -186,49 +194,75 @@ static int scan_bitmap_block(struct reiserfs_transaction_handle *th,
186 next = end; 194 next = end;
187 break; 195 break;
188 } 196 }
189 /* finding the other end of zero bit window requires looking into journal structures (in 197
190 * case of searching for free blocks for unformatted nodes) */ 198 /*
199 * finding the other end of zero bit window requires
200 * looking into journal structures (in case of
201 * searching for free blocks for unformatted nodes)
202 */
191 if (unfm && is_block_in_journal(s, bmap_n, end, &next)) 203 if (unfm && is_block_in_journal(s, bmap_n, end, &next))
192 break; 204 break;
193 } 205 }
194 206
195 /* now (*beg) points to beginning of zero bits window, 207 /*
196 * (end) points to one bit after the window end */ 208 * now (*beg) points to beginning of zero bits window,
197 if (end - *beg >= min) { /* it seems we have found window of proper size */ 209 * (end) points to one bit after the window end
210 */
211
212 /* found window of proper size */
213 if (end - *beg >= min) {
198 int i; 214 int i;
199 reiserfs_prepare_for_journal(s, bh, 1); 215 reiserfs_prepare_for_journal(s, bh, 1);
200 /* try to set all blocks used checking are they still free */ 216 /*
217 * try to set all blocks used checking are
218 * they still free
219 */
201 for (i = *beg; i < end; i++) { 220 for (i = *beg; i < end; i++) {
202 /* It seems that we should not check in journal again. */ 221 /* Don't check in journal again. */
203 if (reiserfs_test_and_set_le_bit 222 if (reiserfs_test_and_set_le_bit
204 (i, bh->b_data)) { 223 (i, bh->b_data)) {
205 /* bit was set by another process 224 /*
206 * while we slept in prepare_for_journal() */ 225 * bit was set by another process while
226 * we slept in prepare_for_journal()
227 */
207 PROC_INFO_INC(s, scan_bitmap.stolen); 228 PROC_INFO_INC(s, scan_bitmap.stolen);
208 if (i >= *beg + min) { /* we can continue with smaller set of allocated blocks, 229
209 * if length of this set is more or equal to `min' */ 230 /*
231 * we can continue with smaller set
232 * of allocated blocks, if length of
233 * this set is more or equal to `min'
234 */
235 if (i >= *beg + min) {
210 end = i; 236 end = i;
211 break; 237 break;
212 } 238 }
213 /* otherwise we clear all bit were set ... */ 239
240 /*
241 * otherwise we clear all bit
242 * were set ...
243 */
214 while (--i >= *beg) 244 while (--i >= *beg)
215 reiserfs_clear_le_bit 245 reiserfs_clear_le_bit
216 (i, bh->b_data); 246 (i, bh->b_data);
217 reiserfs_restore_prepared_buffer(s, bh); 247 reiserfs_restore_prepared_buffer(s, bh);
218 *beg = org; 248 *beg = org;
219 /* ... and search again in current block from beginning */ 249
250 /*
251 * Search again in current block
252 * from beginning
253 */
220 goto cont; 254 goto cont;
221 } 255 }
222 } 256 }
223 bi->free_count -= (end - *beg); 257 bi->free_count -= (end - *beg);
224 journal_mark_dirty(th, s, bh); 258 journal_mark_dirty(th, bh);
225 brelse(bh); 259 brelse(bh);
226 260
227 /* free block count calculation */ 261 /* free block count calculation */
228 reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s), 262 reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s),
229 1); 263 1);
230 PUT_SB_FREE_BLOCKS(s, SB_FREE_BLOCKS(s) - (end - *beg)); 264 PUT_SB_FREE_BLOCKS(s, SB_FREE_BLOCKS(s) - (end - *beg));
231 journal_mark_dirty(th, s, SB_BUFFER_WITH_SB(s)); 265 journal_mark_dirty(th, SB_BUFFER_WITH_SB(s));
232 266
233 return end - (*beg); 267 return end - (*beg);
234 } else { 268 } else {
@@ -267,11 +301,13 @@ static inline int block_group_used(struct super_block *s, u32 id)
267 int bm = bmap_hash_id(s, id); 301 int bm = bmap_hash_id(s, id);
268 struct reiserfs_bitmap_info *info = &SB_AP_BITMAP(s)[bm]; 302 struct reiserfs_bitmap_info *info = &SB_AP_BITMAP(s)[bm];
269 303
270 /* If we don't have cached information on this bitmap block, we're 304 /*
305 * If we don't have cached information on this bitmap block, we're
271 * going to have to load it later anyway. Loading it here allows us 306 * going to have to load it later anyway. Loading it here allows us
272 * to make a better decision. This favors long-term performance gain 307 * to make a better decision. This favors long-term performance gain
273 * with a better on-disk layout vs. a short term gain of skipping the 308 * with a better on-disk layout vs. a short term gain of skipping the
274 * read and potentially having a bad placement. */ 309 * read and potentially having a bad placement.
310 */
275 if (info->free_count == UINT_MAX) { 311 if (info->free_count == UINT_MAX) {
276 struct buffer_head *bh = reiserfs_read_bitmap_block(s, bm); 312 struct buffer_head *bh = reiserfs_read_bitmap_block(s, bm);
277 brelse(bh); 313 brelse(bh);
@@ -304,25 +340,26 @@ __le32 reiserfs_choose_packing(struct inode * dir)
304 return packing; 340 return packing;
305} 341}
306 342
307/* Tries to find contiguous zero bit window (given size) in given region of 343/*
308 * bitmap and place new blocks there. Returns number of allocated blocks. */ 344 * Tries to find contiguous zero bit window (given size) in given region of
345 * bitmap and place new blocks there. Returns number of allocated blocks.
346 */
309static int scan_bitmap(struct reiserfs_transaction_handle *th, 347static int scan_bitmap(struct reiserfs_transaction_handle *th,
310 b_blocknr_t * start, b_blocknr_t finish, 348 b_blocknr_t * start, b_blocknr_t finish,
311 int min, int max, int unfm, sector_t file_block) 349 int min, int max, int unfm, sector_t file_block)
312{ 350{
313 int nr_allocated = 0; 351 int nr_allocated = 0;
314 struct super_block *s = th->t_super; 352 struct super_block *s = th->t_super;
315 /* find every bm and bmap and bmap_nr in this file, and change them all to bitmap_blocknr
316 * - Hans, it is not a block number - Zam. */
317
318 unsigned int bm, off; 353 unsigned int bm, off;
319 unsigned int end_bm, end_off; 354 unsigned int end_bm, end_off;
320 unsigned int off_max = s->s_blocksize << 3; 355 unsigned int off_max = s->s_blocksize << 3;
321 356
322 BUG_ON(!th->t_trans_id); 357 BUG_ON(!th->t_trans_id);
323 PROC_INFO_INC(s, scan_bitmap.call); 358 PROC_INFO_INC(s, scan_bitmap.call);
359
360 /* No point in looking for more free blocks */
324 if (SB_FREE_BLOCKS(s) <= 0) 361 if (SB_FREE_BLOCKS(s) <= 0)
325 return 0; // No point in looking for more free blocks 362 return 0;
326 363
327 get_bit_address(s, *start, &bm, &off); 364 get_bit_address(s, *start, &bm, &off);
328 get_bit_address(s, finish, &end_bm, &end_off); 365 get_bit_address(s, finish, &end_bm, &end_off);
@@ -331,7 +368,8 @@ static int scan_bitmap(struct reiserfs_transaction_handle *th,
331 if (end_bm > reiserfs_bmap_count(s)) 368 if (end_bm > reiserfs_bmap_count(s))
332 end_bm = reiserfs_bmap_count(s); 369 end_bm = reiserfs_bmap_count(s);
333 370
334 /* When the bitmap is more than 10% free, anyone can allocate. 371 /*
372 * When the bitmap is more than 10% free, anyone can allocate.
335 * When it's less than 10% free, only files that already use the 373 * When it's less than 10% free, only files that already use the
336 * bitmap are allowed. Once we pass 80% full, this restriction 374 * bitmap are allowed. Once we pass 80% full, this restriction
337 * is lifted. 375 * is lifted.
@@ -369,7 +407,7 @@ static int scan_bitmap(struct reiserfs_transaction_handle *th,
369 nr_allocated = 407 nr_allocated =
370 scan_bitmap_block(th, bm, &off, end_off + 1, min, max, unfm); 408 scan_bitmap_block(th, bm, &off, end_off + 1, min, max, unfm);
371 409
372 ret: 410ret:
373 *start = bm * off_max + off; 411 *start = bm * off_max + off;
374 return nr_allocated; 412 return nr_allocated;
375 413
@@ -411,14 +449,14 @@ static void _reiserfs_free_block(struct reiserfs_transaction_handle *th,
411 "block %lu: bit already cleared", block); 449 "block %lu: bit already cleared", block);
412 } 450 }
413 apbi[nr].free_count++; 451 apbi[nr].free_count++;
414 journal_mark_dirty(th, s, bmbh); 452 journal_mark_dirty(th, bmbh);
415 brelse(bmbh); 453 brelse(bmbh);
416 454
417 reiserfs_prepare_for_journal(s, sbh, 1); 455 reiserfs_prepare_for_journal(s, sbh, 1);
418 /* update super block */ 456 /* update super block */
419 set_sb_free_blocks(rs, sb_free_blocks(rs) + 1); 457 set_sb_free_blocks(rs, sb_free_blocks(rs) + 1);
420 458
421 journal_mark_dirty(th, s, sbh); 459 journal_mark_dirty(th, sbh);
422 if (for_unformatted) { 460 if (for_unformatted) {
423 int depth = reiserfs_write_unlock_nested(s); 461 int depth = reiserfs_write_unlock_nested(s);
424 dquot_free_block_nodirty(inode, 1); 462 dquot_free_block_nodirty(inode, 1);
@@ -483,7 +521,7 @@ static void __discard_prealloc(struct reiserfs_transaction_handle *th,
483 if (dirty) 521 if (dirty)
484 reiserfs_update_sd(th, inode); 522 reiserfs_update_sd(th, inode);
485 ei->i_prealloc_block = save; 523 ei->i_prealloc_block = save;
486 list_del_init(&(ei->i_prealloc_list)); 524 list_del_init(&ei->i_prealloc_list);
487} 525}
488 526
489/* FIXME: It should be inline function */ 527/* FIXME: It should be inline function */
@@ -529,7 +567,8 @@ int reiserfs_parse_alloc_options(struct super_block *s, char *options)
529{ 567{
530 char *this_char, *value; 568 char *this_char, *value;
531 569
532 REISERFS_SB(s)->s_alloc_options.bits = 0; /* clear default settings */ 570 /* clear default settings */
571 REISERFS_SB(s)->s_alloc_options.bits = 0;
533 572
534 while ((this_char = strsep(&options, ":")) != NULL) { 573 while ((this_char = strsep(&options, ":")) != NULL) {
535 if ((value = strchr(this_char, '=')) != NULL) 574 if ((value = strchr(this_char, '=')) != NULL)
@@ -731,7 +770,7 @@ static inline void new_hashed_relocation(reiserfs_blocknr_hint_t * hint)
731 hash_in = (char *)&hint->key.k_dir_id; 770 hash_in = (char *)&hint->key.k_dir_id;
732 } else { 771 } else {
733 if (!hint->inode) { 772 if (!hint->inode) {
734 //hint->search_start = hint->beg; 773 /*hint->search_start = hint->beg;*/
735 hash_in = (char *)&hint->key.k_dir_id; 774 hash_in = (char *)&hint->key.k_dir_id;
736 } else 775 } else
737 if (TEST_OPTION(displace_based_on_dirid, hint->th->t_super)) 776 if (TEST_OPTION(displace_based_on_dirid, hint->th->t_super))
@@ -785,7 +824,8 @@ static void oid_groups(reiserfs_blocknr_hint_t * hint)
785 824
786 dirid = le32_to_cpu(INODE_PKEY(hint->inode)->k_dir_id); 825 dirid = le32_to_cpu(INODE_PKEY(hint->inode)->k_dir_id);
787 826
788 /* keep the root dir and it's first set of subdirs close to 827 /*
828 * keep the root dir and it's first set of subdirs close to
789 * the start of the disk 829 * the start of the disk
790 */ 830 */
791 if (dirid <= 2) 831 if (dirid <= 2)
@@ -799,7 +839,8 @@ static void oid_groups(reiserfs_blocknr_hint_t * hint)
799 } 839 }
800} 840}
801 841
802/* returns 1 if it finds an indirect item and gets valid hint info 842/*
843 * returns 1 if it finds an indirect item and gets valid hint info
803 * from it, otherwise 0 844 * from it, otherwise 0
804 */ 845 */
805static int get_left_neighbor(reiserfs_blocknr_hint_t * hint) 846static int get_left_neighbor(reiserfs_blocknr_hint_t * hint)
@@ -811,25 +852,29 @@ static int get_left_neighbor(reiserfs_blocknr_hint_t * hint)
811 __le32 *item; 852 __le32 *item;
812 int ret = 0; 853 int ret = 0;
813 854
814 if (!hint->path) /* reiserfs code can call this function w/o pointer to path 855 /*
815 * structure supplied; then we rely on supplied search_start */ 856 * reiserfs code can call this function w/o pointer to path
857 * structure supplied; then we rely on supplied search_start
858 */
859 if (!hint->path)
816 return 0; 860 return 0;
817 861
818 path = hint->path; 862 path = hint->path;
819 bh = get_last_bh(path); 863 bh = get_last_bh(path);
820 RFALSE(!bh, "green-4002: Illegal path specified to get_left_neighbor"); 864 RFALSE(!bh, "green-4002: Illegal path specified to get_left_neighbor");
821 ih = get_ih(path); 865 ih = tp_item_head(path);
822 pos_in_item = path->pos_in_item; 866 pos_in_item = path->pos_in_item;
823 item = get_item(path); 867 item = tp_item_body(path);
824 868
825 hint->search_start = bh->b_blocknr; 869 hint->search_start = bh->b_blocknr;
826 870
871 /*
872 * for indirect item: go to left and look for the first non-hole entry
873 * in the indirect item
874 */
827 if (!hint->formatted_node && is_indirect_le_ih(ih)) { 875 if (!hint->formatted_node && is_indirect_le_ih(ih)) {
828 /* for indirect item: go to left and look for the first non-hole entry
829 in the indirect item */
830 if (pos_in_item == I_UNFM_NUM(ih)) 876 if (pos_in_item == I_UNFM_NUM(ih))
831 pos_in_item--; 877 pos_in_item--;
832// pos_in_item = I_UNFM_NUM (ih) - 1;
833 while (pos_in_item >= 0) { 878 while (pos_in_item >= 0) {
834 int t = get_block_num(item, pos_in_item); 879 int t = get_block_num(item, pos_in_item);
835 if (t) { 880 if (t) {
@@ -845,10 +890,12 @@ static int get_left_neighbor(reiserfs_blocknr_hint_t * hint)
845 return ret; 890 return ret;
846} 891}
847 892
848/* should be, if formatted node, then try to put on first part of the device 893/*
849 specified as number of percent with mount option device, else try to put 894 * should be, if formatted node, then try to put on first part of the device
850 on last of device. This is not to say it is good code to do so, 895 * specified as number of percent with mount option device, else try to put
851 but the effect should be measured. */ 896 * on last of device. This is not to say it is good code to do so,
897 * but the effect should be measured.
898 */
852static inline void set_border_in_hint(struct super_block *s, 899static inline void set_border_in_hint(struct super_block *s,
853 reiserfs_blocknr_hint_t * hint) 900 reiserfs_blocknr_hint_t * hint)
854{ 901{
@@ -974,21 +1021,27 @@ static void determine_search_start(reiserfs_blocknr_hint_t * hint,
974 set_border_in_hint(s, hint); 1021 set_border_in_hint(s, hint);
975 1022
976#ifdef DISPLACE_NEW_PACKING_LOCALITIES 1023#ifdef DISPLACE_NEW_PACKING_LOCALITIES
977 /* whenever we create a new directory, we displace it. At first we will 1024 /*
978 hash for location, later we might look for a moderately empty place for 1025 * whenever we create a new directory, we displace it. At first
979 it */ 1026 * we will hash for location, later we might look for a moderately
1027 * empty place for it
1028 */
980 if (displacing_new_packing_localities(s) 1029 if (displacing_new_packing_localities(s)
981 && hint->th->displace_new_blocks) { 1030 && hint->th->displace_new_blocks) {
982 displace_new_packing_locality(hint); 1031 displace_new_packing_locality(hint);
983 1032
984 /* we do not continue determine_search_start, 1033 /*
985 * if new packing locality is being displaced */ 1034 * we do not continue determine_search_start,
1035 * if new packing locality is being displaced
1036 */
986 return; 1037 return;
987 } 1038 }
988#endif 1039#endif
989 1040
990 /* all persons should feel encouraged to add more special cases here and 1041 /*
991 * test them */ 1042 * all persons should feel encouraged to add more special cases
1043 * here and test them
1044 */
992 1045
993 if (displacing_large_files(s) && !hint->formatted_node 1046 if (displacing_large_files(s) && !hint->formatted_node
994 && this_blocknr_allocation_would_make_it_a_large_file(hint)) { 1047 && this_blocknr_allocation_would_make_it_a_large_file(hint)) {
@@ -996,8 +1049,10 @@ static void determine_search_start(reiserfs_blocknr_hint_t * hint,
996 return; 1049 return;
997 } 1050 }
998 1051
999 /* if none of our special cases is relevant, use the left neighbor in the 1052 /*
1000 tree order of the new node we are allocating for */ 1053 * if none of our special cases is relevant, use the left
1054 * neighbor in the tree order of the new node we are allocating for
1055 */
1001 if (hint->formatted_node && TEST_OPTION(hashed_formatted_nodes, s)) { 1056 if (hint->formatted_node && TEST_OPTION(hashed_formatted_nodes, s)) {
1002 hash_formatted_node(hint); 1057 hash_formatted_node(hint);
1003 return; 1058 return;
@@ -1005,10 +1060,13 @@ static void determine_search_start(reiserfs_blocknr_hint_t * hint,
1005 1060
1006 unfm_hint = get_left_neighbor(hint); 1061 unfm_hint = get_left_neighbor(hint);
1007 1062
1008 /* Mimic old block allocator behaviour, that is if VFS allowed for preallocation, 1063 /*
1009 new blocks are displaced based on directory ID. Also, if suggested search_start 1064 * Mimic old block allocator behaviour, that is if VFS allowed for
1010 is less than last preallocated block, we start searching from it, assuming that 1065 * preallocation, new blocks are displaced based on directory ID.
1011 HDD dataflow is faster in forward direction */ 1066 * Also, if suggested search_start is less than last preallocated
1067 * block, we start searching from it, assuming that HDD dataflow
1068 * is faster in forward direction
1069 */
1012 if (TEST_OPTION(old_way, s)) { 1070 if (TEST_OPTION(old_way, s)) {
1013 if (!hint->formatted_node) { 1071 if (!hint->formatted_node) {
1014 if (!reiserfs_hashed_relocation(s)) 1072 if (!reiserfs_hashed_relocation(s))
@@ -1037,11 +1095,13 @@ static void determine_search_start(reiserfs_blocknr_hint_t * hint,
1037 TEST_OPTION(old_hashed_relocation, s)) { 1095 TEST_OPTION(old_hashed_relocation, s)) {
1038 old_hashed_relocation(hint); 1096 old_hashed_relocation(hint);
1039 } 1097 }
1098
1040 /* new_hashed_relocation works with both formatted/unformatted nodes */ 1099 /* new_hashed_relocation works with both formatted/unformatted nodes */
1041 if ((!unfm_hint || hint->formatted_node) && 1100 if ((!unfm_hint || hint->formatted_node) &&
1042 TEST_OPTION(new_hashed_relocation, s)) { 1101 TEST_OPTION(new_hashed_relocation, s)) {
1043 new_hashed_relocation(hint); 1102 new_hashed_relocation(hint);
1044 } 1103 }
1104
1045 /* dirid grouping works only on unformatted nodes */ 1105 /* dirid grouping works only on unformatted nodes */
1046 if (!unfm_hint && !hint->formatted_node && TEST_OPTION(dirid_groups, s)) { 1106 if (!unfm_hint && !hint->formatted_node && TEST_OPTION(dirid_groups, s)) {
1047 dirid_groups(hint); 1107 dirid_groups(hint);
@@ -1079,8 +1139,6 @@ static int determine_prealloc_size(reiserfs_blocknr_hint_t * hint)
1079 return CARRY_ON; 1139 return CARRY_ON;
1080} 1140}
1081 1141
1082/* XXX I know it could be merged with upper-level function;
1083 but may be result function would be too complex. */
1084static inline int allocate_without_wrapping_disk(reiserfs_blocknr_hint_t * hint, 1142static inline int allocate_without_wrapping_disk(reiserfs_blocknr_hint_t * hint,
1085 b_blocknr_t * new_blocknrs, 1143 b_blocknr_t * new_blocknrs,
1086 b_blocknr_t start, 1144 b_blocknr_t start,
@@ -1108,7 +1166,10 @@ static inline int allocate_without_wrapping_disk(reiserfs_blocknr_hint_t * hint,
1108 1166
1109 /* do we have something to fill prealloc. array also ? */ 1167 /* do we have something to fill prealloc. array also ? */
1110 if (nr_allocated > 0) { 1168 if (nr_allocated > 0) {
1111 /* it means prealloc_size was greater that 0 and we do preallocation */ 1169 /*
1170 * it means prealloc_size was greater that 0 and
1171 * we do preallocation
1172 */
1112 list_add(&REISERFS_I(hint->inode)->i_prealloc_list, 1173 list_add(&REISERFS_I(hint->inode)->i_prealloc_list,
1113 &SB_JOURNAL(hint->th->t_super)-> 1174 &SB_JOURNAL(hint->th->t_super)->
1114 j_prealloc_list); 1175 j_prealloc_list);
@@ -1176,7 +1237,8 @@ static inline int blocknrs_and_prealloc_arrays_from_search_start
1176 start = 0; 1237 start = 0;
1177 finish = hint->beg; 1238 finish = hint->beg;
1178 break; 1239 break;
1179 default: /* We've tried searching everywhere, not enough space */ 1240 default:
1241 /* We've tried searching everywhere, not enough space */
1180 /* Free the blocks */ 1242 /* Free the blocks */
1181 if (!hint->formatted_node) { 1243 if (!hint->formatted_node) {
1182#ifdef REISERQUOTA_DEBUG 1244#ifdef REISERQUOTA_DEBUG
@@ -1261,8 +1323,11 @@ static int use_preallocated_list_if_available(reiserfs_blocknr_hint_t * hint,
1261 return amount_needed; 1323 return amount_needed;
1262} 1324}
1263 1325
1264int reiserfs_allocate_blocknrs(reiserfs_blocknr_hint_t * hint, b_blocknr_t * new_blocknrs, int amount_needed, int reserved_by_us /* Amount of blocks we have 1326int reiserfs_allocate_blocknrs(reiserfs_blocknr_hint_t *hint,
1265 already reserved */ ) 1327 b_blocknr_t *new_blocknrs,
1328 int amount_needed,
1329 /* Amount of blocks we have already reserved */
1330 int reserved_by_us)
1266{ 1331{
1267 int initial_amount_needed = amount_needed; 1332 int initial_amount_needed = amount_needed;
1268 int ret; 1333 int ret;
@@ -1274,15 +1339,21 @@ int reiserfs_allocate_blocknrs(reiserfs_blocknr_hint_t * hint, b_blocknr_t * new
1274 return NO_DISK_SPACE; 1339 return NO_DISK_SPACE;
1275 /* should this be if !hint->inode && hint->preallocate? */ 1340 /* should this be if !hint->inode && hint->preallocate? */
1276 /* do you mean hint->formatted_node can be removed ? - Zam */ 1341 /* do you mean hint->formatted_node can be removed ? - Zam */
1277 /* hint->formatted_node cannot be removed because we try to access 1342 /*
1278 inode information here, and there is often no inode assotiated with 1343 * hint->formatted_node cannot be removed because we try to access
1279 metadata allocations - green */ 1344 * inode information here, and there is often no inode associated with
1345 * metadata allocations - green
1346 */
1280 1347
1281 if (!hint->formatted_node && hint->preallocate) { 1348 if (!hint->formatted_node && hint->preallocate) {
1282 amount_needed = use_preallocated_list_if_available 1349 amount_needed = use_preallocated_list_if_available
1283 (hint, new_blocknrs, amount_needed); 1350 (hint, new_blocknrs, amount_needed);
1284 if (amount_needed == 0) /* all blocknrs we need we got from 1351
1285 prealloc. list */ 1352 /*
1353 * We have all the block numbers we need from the
1354 * prealloc list
1355 */
1356 if (amount_needed == 0)
1286 return CARRY_ON; 1357 return CARRY_ON;
1287 new_blocknrs += (initial_amount_needed - amount_needed); 1358 new_blocknrs += (initial_amount_needed - amount_needed);
1288 } 1359 }
@@ -1296,10 +1367,12 @@ int reiserfs_allocate_blocknrs(reiserfs_blocknr_hint_t * hint, b_blocknr_t * new
1296 ret = blocknrs_and_prealloc_arrays_from_search_start 1367 ret = blocknrs_and_prealloc_arrays_from_search_start
1297 (hint, new_blocknrs, amount_needed); 1368 (hint, new_blocknrs, amount_needed);
1298 1369
1299 /* we used prealloc. list to fill (partially) new_blocknrs array. If final allocation fails we 1370 /*
1300 * need to return blocks back to prealloc. list or just free them. -- Zam (I chose second 1371 * We used prealloc. list to fill (partially) new_blocknrs array.
1301 * variant) */ 1372 * If final allocation fails we need to return blocks back to
1302 1373 * prealloc. list or just free them. -- Zam (I chose second
1374 * variant)
1375 */
1303 if (ret != CARRY_ON) { 1376 if (ret != CARRY_ON) {
1304 while (amount_needed++ < initial_amount_needed) { 1377 while (amount_needed++ < initial_amount_needed) {
1305 reiserfs_free_block(hint->th, hint->inode, 1378 reiserfs_free_block(hint->th, hint->inode,
@@ -1338,10 +1411,12 @@ struct buffer_head *reiserfs_read_bitmap_block(struct super_block *sb,
1338 struct reiserfs_bitmap_info *info = SB_AP_BITMAP(sb) + bitmap; 1411 struct reiserfs_bitmap_info *info = SB_AP_BITMAP(sb) + bitmap;
1339 struct buffer_head *bh; 1412 struct buffer_head *bh;
1340 1413
1341 /* Way old format filesystems had the bitmaps packed up front. 1414 /*
1342 * I doubt there are any of these left, but just in case... */ 1415 * Way old format filesystems had the bitmaps packed up front.
1416 * I doubt there are any of these left, but just in case...
1417 */
1343 if (unlikely(test_bit(REISERFS_OLD_FORMAT, 1418 if (unlikely(test_bit(REISERFS_OLD_FORMAT,
1344 &(REISERFS_SB(sb)->s_properties)))) 1419 &REISERFS_SB(sb)->s_properties)))
1345 block = REISERFS_SB(sb)->s_sbh->b_blocknr + 1 + bitmap; 1420 block = REISERFS_SB(sb)->s_sbh->b_blocknr + 1 + bitmap;
1346 else if (bitmap == 0) 1421 else if (bitmap == 0)
1347 block = (REISERFS_DISK_OFFSET_IN_BYTES >> sb->s_blocksize_bits) + 1; 1422 block = (REISERFS_DISK_OFFSET_IN_BYTES >> sb->s_blocksize_bits) + 1;
diff --git a/fs/reiserfs/dir.c b/fs/reiserfs/dir.c
index af677353a3f5..d9f5a60dd59b 100644
--- a/fs/reiserfs/dir.c
+++ b/fs/reiserfs/dir.c
@@ -59,7 +59,10 @@ static inline bool is_privroot_deh(struct inode *dir, struct reiserfs_de_head *d
59 59
60int reiserfs_readdir_inode(struct inode *inode, struct dir_context *ctx) 60int reiserfs_readdir_inode(struct inode *inode, struct dir_context *ctx)
61{ 61{
62 struct cpu_key pos_key; /* key of current position in the directory (key of directory entry) */ 62
63 /* key of current position in the directory (key of directory entry) */
64 struct cpu_key pos_key;
65
63 INITIALIZE_PATH(path_to_entry); 66 INITIALIZE_PATH(path_to_entry);
64 struct buffer_head *bh; 67 struct buffer_head *bh;
65 int item_num, entry_num; 68 int item_num, entry_num;
@@ -77,21 +80,28 @@ int reiserfs_readdir_inode(struct inode *inode, struct dir_context *ctx)
77 80
78 reiserfs_check_lock_depth(inode->i_sb, "readdir"); 81 reiserfs_check_lock_depth(inode->i_sb, "readdir");
79 82
80 /* form key for search the next directory entry using f_pos field of 83 /*
81 file structure */ 84 * form key for search the next directory entry using
85 * f_pos field of file structure
86 */
82 make_cpu_key(&pos_key, inode, ctx->pos ?: DOT_OFFSET, TYPE_DIRENTRY, 3); 87 make_cpu_key(&pos_key, inode, ctx->pos ?: DOT_OFFSET, TYPE_DIRENTRY, 3);
83 next_pos = cpu_key_k_offset(&pos_key); 88 next_pos = cpu_key_k_offset(&pos_key);
84 89
85 path_to_entry.reada = PATH_READA; 90 path_to_entry.reada = PATH_READA;
86 while (1) { 91 while (1) {
87 research: 92research:
88 /* search the directory item, containing entry with specified key */ 93 /*
94 * search the directory item, containing entry with
95 * specified key
96 */
89 search_res = 97 search_res =
90 search_by_entry_key(inode->i_sb, &pos_key, &path_to_entry, 98 search_by_entry_key(inode->i_sb, &pos_key, &path_to_entry,
91 &de); 99 &de);
92 if (search_res == IO_ERROR) { 100 if (search_res == IO_ERROR) {
93 // FIXME: we could just skip part of directory which could 101 /*
94 // not be read 102 * FIXME: we could just skip part of directory
103 * which could not be read
104 */
95 ret = -EIO; 105 ret = -EIO;
96 goto out; 106 goto out;
97 } 107 }
@@ -102,41 +112,49 @@ int reiserfs_readdir_inode(struct inode *inode, struct dir_context *ctx)
102 store_ih(&tmp_ih, ih); 112 store_ih(&tmp_ih, ih);
103 113
104 /* we must have found item, that is item of this directory, */ 114 /* we must have found item, that is item of this directory, */
105 RFALSE(COMP_SHORT_KEYS(&(ih->ih_key), &pos_key), 115 RFALSE(COMP_SHORT_KEYS(&ih->ih_key, &pos_key),
106 "vs-9000: found item %h does not match to dir we readdir %K", 116 "vs-9000: found item %h does not match to dir we readdir %K",
107 ih, &pos_key); 117 ih, &pos_key);
108 RFALSE(item_num > B_NR_ITEMS(bh) - 1, 118 RFALSE(item_num > B_NR_ITEMS(bh) - 1,
109 "vs-9005 item_num == %d, item amount == %d", 119 "vs-9005 item_num == %d, item amount == %d",
110 item_num, B_NR_ITEMS(bh)); 120 item_num, B_NR_ITEMS(bh));
111 121
112 /* and entry must be not more than number of entries in the item */ 122 /*
113 RFALSE(I_ENTRY_COUNT(ih) < entry_num, 123 * and entry must be not more than number of entries
124 * in the item
125 */
126 RFALSE(ih_entry_count(ih) < entry_num,
114 "vs-9010: entry number is too big %d (%d)", 127 "vs-9010: entry number is too big %d (%d)",
115 entry_num, I_ENTRY_COUNT(ih)); 128 entry_num, ih_entry_count(ih));
116 129
130 /*
131 * go through all entries in the directory item beginning
132 * from the entry, that has been found
133 */
117 if (search_res == POSITION_FOUND 134 if (search_res == POSITION_FOUND
118 || entry_num < I_ENTRY_COUNT(ih)) { 135 || entry_num < ih_entry_count(ih)) {
119 /* go through all entries in the directory item beginning from the entry, that has been found */
120 struct reiserfs_de_head *deh = 136 struct reiserfs_de_head *deh =
121 B_I_DEH(bh, ih) + entry_num; 137 B_I_DEH(bh, ih) + entry_num;
122 138
123 for (; entry_num < I_ENTRY_COUNT(ih); 139 for (; entry_num < ih_entry_count(ih);
124 entry_num++, deh++) { 140 entry_num++, deh++) {
125 int d_reclen; 141 int d_reclen;
126 char *d_name; 142 char *d_name;
127 ino_t d_ino; 143 ino_t d_ino;
128 loff_t cur_pos = deh_offset(deh); 144 loff_t cur_pos = deh_offset(deh);
129 145
146 /* it is hidden entry */
130 if (!de_visible(deh)) 147 if (!de_visible(deh))
131 /* it is hidden entry */
132 continue; 148 continue;
133 d_reclen = entry_length(bh, ih, entry_num); 149 d_reclen = entry_length(bh, ih, entry_num);
134 d_name = B_I_DEH_ENTRY_FILE_NAME(bh, ih, deh); 150 d_name = B_I_DEH_ENTRY_FILE_NAME(bh, ih, deh);
135 151
136 if (d_reclen <= 0 || 152 if (d_reclen <= 0 ||
137 d_name + d_reclen > bh->b_data + bh->b_size) { 153 d_name + d_reclen > bh->b_data + bh->b_size) {
138 /* There is corrupted data in entry, 154 /*
139 * We'd better stop here */ 155 * There is corrupted data in entry,
156 * We'd better stop here
157 */
140 pathrelse(&path_to_entry); 158 pathrelse(&path_to_entry);
141 ret = -EIO; 159 ret = -EIO;
142 goto out; 160 goto out;
@@ -145,10 +163,10 @@ int reiserfs_readdir_inode(struct inode *inode, struct dir_context *ctx)
145 if (!d_name[d_reclen - 1]) 163 if (!d_name[d_reclen - 1])
146 d_reclen = strlen(d_name); 164 d_reclen = strlen(d_name);
147 165
166 /* too big to send back to VFS */
148 if (d_reclen > 167 if (d_reclen >
149 REISERFS_MAX_NAME(inode->i_sb-> 168 REISERFS_MAX_NAME(inode->i_sb->
150 s_blocksize)) { 169 s_blocksize)) {
151 /* too big to send back to VFS */
152 continue; 170 continue;
153 } 171 }
154 172
@@ -173,10 +191,14 @@ int reiserfs_readdir_inode(struct inode *inode, struct dir_context *ctx)
173 goto research; 191 goto research;
174 } 192 }
175 } 193 }
176 // Note, that we copy name to user space via temporary 194
177 // buffer (local_buf) because filldir will block if 195 /*
178 // user space buffer is swapped out. At that time 196 * Note, that we copy name to user space via
179 // entry can move to somewhere else 197 * temporary buffer (local_buf) because
198 * filldir will block if user space buffer is
199 * swapped out. At that time entry can move to
200 * somewhere else
201 */
180 memcpy(local_buf, d_name, d_reclen); 202 memcpy(local_buf, d_name, d_reclen);
181 203
182 /* 204 /*
@@ -209,22 +231,26 @@ int reiserfs_readdir_inode(struct inode *inode, struct dir_context *ctx)
209 } /* for */ 231 } /* for */
210 } 232 }
211 233
234 /* end of directory has been reached */
212 if (item_num != B_NR_ITEMS(bh) - 1) 235 if (item_num != B_NR_ITEMS(bh) - 1)
213 // end of directory has been reached
214 goto end; 236 goto end;
215 237
216 /* item we went through is last item of node. Using right 238 /*
217 delimiting key check is it directory end */ 239 * item we went through is last item of node. Using right
240 * delimiting key check is it directory end
241 */
218 rkey = get_rkey(&path_to_entry, inode->i_sb); 242 rkey = get_rkey(&path_to_entry, inode->i_sb);
219 if (!comp_le_keys(rkey, &MIN_KEY)) { 243 if (!comp_le_keys(rkey, &MIN_KEY)) {
220 /* set pos_key to key, that is the smallest and greater 244 /*
221 that key of the last entry in the item */ 245 * set pos_key to key, that is the smallest and greater
246 * that key of the last entry in the item
247 */
222 set_cpu_key_k_offset(&pos_key, next_pos); 248 set_cpu_key_k_offset(&pos_key, next_pos);
223 continue; 249 continue;
224 } 250 }
225 251
252 /* end of directory has been reached */
226 if (COMP_SHORT_KEYS(rkey, &pos_key)) { 253 if (COMP_SHORT_KEYS(rkey, &pos_key)) {
227 // end of directory has been reached
228 goto end; 254 goto end;
229 } 255 }
230 256
@@ -248,71 +274,73 @@ static int reiserfs_readdir(struct file *file, struct dir_context *ctx)
248 return reiserfs_readdir_inode(file_inode(file), ctx); 274 return reiserfs_readdir_inode(file_inode(file), ctx);
249} 275}
250 276
251/* compose directory item containing "." and ".." entries (entries are 277/*
252 not aligned to 4 byte boundary) */ 278 * compose directory item containing "." and ".." entries (entries are
253/* the last four params are LE */ 279 * not aligned to 4 byte boundary)
280 */
254void make_empty_dir_item_v1(char *body, __le32 dirid, __le32 objid, 281void make_empty_dir_item_v1(char *body, __le32 dirid, __le32 objid,
255 __le32 par_dirid, __le32 par_objid) 282 __le32 par_dirid, __le32 par_objid)
256{ 283{
257 struct reiserfs_de_head *deh; 284 struct reiserfs_de_head *dot, *dotdot;
258 285
259 memset(body, 0, EMPTY_DIR_SIZE_V1); 286 memset(body, 0, EMPTY_DIR_SIZE_V1);
260 deh = (struct reiserfs_de_head *)body; 287 dot = (struct reiserfs_de_head *)body;
288 dotdot = dot + 1;
261 289
262 /* direntry header of "." */ 290 /* direntry header of "." */
263 put_deh_offset(&(deh[0]), DOT_OFFSET); 291 put_deh_offset(dot, DOT_OFFSET);
264 /* these two are from make_le_item_head, and are are LE */ 292 /* these two are from make_le_item_head, and are are LE */
265 deh[0].deh_dir_id = dirid; 293 dot->deh_dir_id = dirid;
266 deh[0].deh_objectid = objid; 294 dot->deh_objectid = objid;
267 deh[0].deh_state = 0; /* Endian safe if 0 */ 295 dot->deh_state = 0; /* Endian safe if 0 */
268 put_deh_location(&(deh[0]), EMPTY_DIR_SIZE_V1 - strlen(".")); 296 put_deh_location(dot, EMPTY_DIR_SIZE_V1 - strlen("."));
269 mark_de_visible(&(deh[0])); 297 mark_de_visible(dot);
270 298
271 /* direntry header of ".." */ 299 /* direntry header of ".." */
272 put_deh_offset(&(deh[1]), DOT_DOT_OFFSET); 300 put_deh_offset(dotdot, DOT_DOT_OFFSET);
273 /* key of ".." for the root directory */ 301 /* key of ".." for the root directory */
274 /* these two are from the inode, and are are LE */ 302 /* these two are from the inode, and are are LE */
275 deh[1].deh_dir_id = par_dirid; 303 dotdot->deh_dir_id = par_dirid;
276 deh[1].deh_objectid = par_objid; 304 dotdot->deh_objectid = par_objid;
277 deh[1].deh_state = 0; /* Endian safe if 0 */ 305 dotdot->deh_state = 0; /* Endian safe if 0 */
278 put_deh_location(&(deh[1]), deh_location(&(deh[0])) - strlen("..")); 306 put_deh_location(dotdot, deh_location(dot) - strlen(".."));
279 mark_de_visible(&(deh[1])); 307 mark_de_visible(dotdot);
280 308
281 /* copy ".." and "." */ 309 /* copy ".." and "." */
282 memcpy(body + deh_location(&(deh[0])), ".", 1); 310 memcpy(body + deh_location(dot), ".", 1);
283 memcpy(body + deh_location(&(deh[1])), "..", 2); 311 memcpy(body + deh_location(dotdot), "..", 2);
284} 312}
285 313
286/* compose directory item containing "." and ".." entries */ 314/* compose directory item containing "." and ".." entries */
287void make_empty_dir_item(char *body, __le32 dirid, __le32 objid, 315void make_empty_dir_item(char *body, __le32 dirid, __le32 objid,
288 __le32 par_dirid, __le32 par_objid) 316 __le32 par_dirid, __le32 par_objid)
289{ 317{
290 struct reiserfs_de_head *deh; 318 struct reiserfs_de_head *dot, *dotdot;
291 319
292 memset(body, 0, EMPTY_DIR_SIZE); 320 memset(body, 0, EMPTY_DIR_SIZE);
293 deh = (struct reiserfs_de_head *)body; 321 dot = (struct reiserfs_de_head *)body;
322 dotdot = dot + 1;
294 323
295 /* direntry header of "." */ 324 /* direntry header of "." */
296 put_deh_offset(&(deh[0]), DOT_OFFSET); 325 put_deh_offset(dot, DOT_OFFSET);
297 /* these two are from make_le_item_head, and are are LE */ 326 /* these two are from make_le_item_head, and are are LE */
298 deh[0].deh_dir_id = dirid; 327 dot->deh_dir_id = dirid;
299 deh[0].deh_objectid = objid; 328 dot->deh_objectid = objid;
300 deh[0].deh_state = 0; /* Endian safe if 0 */ 329 dot->deh_state = 0; /* Endian safe if 0 */
301 put_deh_location(&(deh[0]), EMPTY_DIR_SIZE - ROUND_UP(strlen("."))); 330 put_deh_location(dot, EMPTY_DIR_SIZE - ROUND_UP(strlen(".")));
302 mark_de_visible(&(deh[0])); 331 mark_de_visible(dot);
303 332
304 /* direntry header of ".." */ 333 /* direntry header of ".." */
305 put_deh_offset(&(deh[1]), DOT_DOT_OFFSET); 334 put_deh_offset(dotdot, DOT_DOT_OFFSET);
306 /* key of ".." for the root directory */ 335 /* key of ".." for the root directory */
307 /* these two are from the inode, and are are LE */ 336 /* these two are from the inode, and are are LE */
308 deh[1].deh_dir_id = par_dirid; 337 dotdot->deh_dir_id = par_dirid;
309 deh[1].deh_objectid = par_objid; 338 dotdot->deh_objectid = par_objid;
310 deh[1].deh_state = 0; /* Endian safe if 0 */ 339 dotdot->deh_state = 0; /* Endian safe if 0 */
311 put_deh_location(&(deh[1]), 340 put_deh_location(dotdot, deh_location(dot) - ROUND_UP(strlen("..")));
312 deh_location(&(deh[0])) - ROUND_UP(strlen(".."))); 341 mark_de_visible(dotdot);
313 mark_de_visible(&(deh[1]));
314 342
315 /* copy ".." and "." */ 343 /* copy ".." and "." */
316 memcpy(body + deh_location(&(deh[0])), ".", 1); 344 memcpy(body + deh_location(dot), ".", 1);
317 memcpy(body + deh_location(&(deh[1])), "..", 2); 345 memcpy(body + deh_location(dotdot), "..", 2);
318} 346}
diff --git a/fs/reiserfs/do_balan.c b/fs/reiserfs/do_balan.c
index 9a3c68cf6026..54fdf196bfb2 100644
--- a/fs/reiserfs/do_balan.c
+++ b/fs/reiserfs/do_balan.c
@@ -2,18 +2,13 @@
2 * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README 2 * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README
3 */ 3 */
4 4
5/* Now we have all buffers that must be used in balancing of the tree */ 5/*
6/* Further calculations can not cause schedule(), and thus the buffer */ 6 * Now we have all buffers that must be used in balancing of the tree
7/* tree will be stable until the balancing will be finished */ 7 * Further calculations can not cause schedule(), and thus the buffer
8/* balance the tree according to the analysis made before, */ 8 * tree will be stable until the balancing will be finished
9/* and using buffers obtained after all above. */ 9 * balance the tree according to the analysis made before,
10 10 * and using buffers obtained after all above.
11/** 11 */
12 ** balance_leaf_when_delete
13 ** balance_leaf
14 ** do_balance
15 **
16 **/
17 12
18#include <asm/uaccess.h> 13#include <asm/uaccess.h>
19#include <linux/time.h> 14#include <linux/time.h>
@@ -61,48 +56,190 @@ static inline void buffer_info_init_bh(struct tree_balance *tb,
61inline void do_balance_mark_leaf_dirty(struct tree_balance *tb, 56inline void do_balance_mark_leaf_dirty(struct tree_balance *tb,
62 struct buffer_head *bh, int flag) 57 struct buffer_head *bh, int flag)
63{ 58{
64 journal_mark_dirty(tb->transaction_handle, 59 journal_mark_dirty(tb->transaction_handle, bh);
65 tb->transaction_handle->t_super, bh);
66} 60}
67 61
68#define do_balance_mark_internal_dirty do_balance_mark_leaf_dirty 62#define do_balance_mark_internal_dirty do_balance_mark_leaf_dirty
69#define do_balance_mark_sb_dirty do_balance_mark_leaf_dirty 63#define do_balance_mark_sb_dirty do_balance_mark_leaf_dirty
70 64
71/* summary: 65/*
72 if deleting something ( tb->insert_size[0] < 0 ) 66 * summary:
73 return(balance_leaf_when_delete()); (flag d handled here) 67 * if deleting something ( tb->insert_size[0] < 0 )
74 else 68 * return(balance_leaf_when_delete()); (flag d handled here)
75 if lnum is larger than 0 we put items into the left node 69 * else
76 if rnum is larger than 0 we put items into the right node 70 * if lnum is larger than 0 we put items into the left node
77 if snum1 is larger than 0 we put items into the new node s1 71 * if rnum is larger than 0 we put items into the right node
78 if snum2 is larger than 0 we put items into the new node s2 72 * if snum1 is larger than 0 we put items into the new node s1
79Note that all *num* count new items being created. 73 * if snum2 is larger than 0 we put items into the new node s2
80 74 * Note that all *num* count new items being created.
81It would be easier to read balance_leaf() if each of these summary 75 */
82lines was a separate procedure rather than being inlined. I think 76
83that there are many passages here and in balance_leaf_when_delete() in 77static void balance_leaf_when_delete_del(struct tree_balance *tb)
84which two calls to one procedure can replace two passages, and it 78{
85might save cache space and improve software maintenance costs to do so. 79 struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path);
86 80 int item_pos = PATH_LAST_POSITION(tb->tb_path);
87Vladimir made the perceptive comment that we should offload most of 81 struct buffer_info bi;
88the decision making in this function into fix_nodes/check_balance, and 82#ifdef CONFIG_REISERFS_CHECK
89then create some sort of structure in tb that says what actions should 83 struct item_head *ih = item_head(tbS0, item_pos);
90be performed by do_balance. 84#endif
91 85
92-Hans */ 86 RFALSE(ih_item_len(ih) + IH_SIZE != -tb->insert_size[0],
93 87 "vs-12013: mode Delete, insert size %d, ih to be deleted %h",
94/* Balance leaf node in case of delete or cut: insert_size[0] < 0 88 -tb->insert_size[0], ih);
89
90 buffer_info_init_tbS0(tb, &bi);
91 leaf_delete_items(&bi, 0, item_pos, 1, -1);
92
93 if (!item_pos && tb->CFL[0]) {
94 if (B_NR_ITEMS(tbS0)) {
95 replace_key(tb, tb->CFL[0], tb->lkey[0], tbS0, 0);
96 } else {
97 if (!PATH_H_POSITION(tb->tb_path, 1))
98 replace_key(tb, tb->CFL[0], tb->lkey[0],
99 PATH_H_PPARENT(tb->tb_path, 0), 0);
100 }
101 }
102
103 RFALSE(!item_pos && !tb->CFL[0],
104 "PAP-12020: tb->CFL[0]==%p, tb->L[0]==%p", tb->CFL[0],
105 tb->L[0]);
106}
107
108/* cut item in S[0] */
109static void balance_leaf_when_delete_cut(struct tree_balance *tb)
110{
111 struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path);
112 int item_pos = PATH_LAST_POSITION(tb->tb_path);
113 struct item_head *ih = item_head(tbS0, item_pos);
114 int pos_in_item = tb->tb_path->pos_in_item;
115 struct buffer_info bi;
116 buffer_info_init_tbS0(tb, &bi);
117
118 if (is_direntry_le_ih(ih)) {
119 /*
120 * UFS unlink semantics are such that you can only
121 * delete one directory entry at a time.
122 *
123 * when we cut a directory tb->insert_size[0] means
124 * number of entries to be cut (always 1)
125 */
126 tb->insert_size[0] = -1;
127 leaf_cut_from_buffer(&bi, item_pos, pos_in_item,
128 -tb->insert_size[0]);
129
130 RFALSE(!item_pos && !pos_in_item && !tb->CFL[0],
131 "PAP-12030: can not change delimiting key. CFL[0]=%p",
132 tb->CFL[0]);
133
134 if (!item_pos && !pos_in_item && tb->CFL[0])
135 replace_key(tb, tb->CFL[0], tb->lkey[0], tbS0, 0);
136 } else {
137 leaf_cut_from_buffer(&bi, item_pos, pos_in_item,
138 -tb->insert_size[0]);
139
140 RFALSE(!ih_item_len(ih),
141 "PAP-12035: cut must leave non-zero dynamic "
142 "length of item");
143 }
144}
145
146static int balance_leaf_when_delete_left(struct tree_balance *tb)
147{
148 struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path);
149 int n = B_NR_ITEMS(tbS0);
150
151 /* L[0] must be joined with S[0] */
152 if (tb->lnum[0] == -1) {
153 /* R[0] must be also joined with S[0] */
154 if (tb->rnum[0] == -1) {
155 if (tb->FR[0] == PATH_H_PPARENT(tb->tb_path, 0)) {
156 /*
157 * all contents of all the
158 * 3 buffers will be in L[0]
159 */
160 if (PATH_H_POSITION(tb->tb_path, 1) == 0 &&
161 1 < B_NR_ITEMS(tb->FR[0]))
162 replace_key(tb, tb->CFL[0],
163 tb->lkey[0], tb->FR[0], 1);
164
165 leaf_move_items(LEAF_FROM_S_TO_L, tb, n, -1,
166 NULL);
167 leaf_move_items(LEAF_FROM_R_TO_L, tb,
168 B_NR_ITEMS(tb->R[0]), -1,
169 NULL);
170
171 reiserfs_invalidate_buffer(tb, tbS0);
172 reiserfs_invalidate_buffer(tb, tb->R[0]);
173
174 return 0;
175 }
176
177 /* all contents of all the 3 buffers will be in R[0] */
178 leaf_move_items(LEAF_FROM_S_TO_R, tb, n, -1, NULL);
179 leaf_move_items(LEAF_FROM_L_TO_R, tb,
180 B_NR_ITEMS(tb->L[0]), -1, NULL);
181
182 /* right_delimiting_key is correct in R[0] */
183 replace_key(tb, tb->CFR[0], tb->rkey[0], tb->R[0], 0);
184
185 reiserfs_invalidate_buffer(tb, tbS0);
186 reiserfs_invalidate_buffer(tb, tb->L[0]);
187
188 return -1;
189 }
190
191 RFALSE(tb->rnum[0] != 0,
192 "PAP-12045: rnum must be 0 (%d)", tb->rnum[0]);
193 /* all contents of L[0] and S[0] will be in L[0] */
194 leaf_shift_left(tb, n, -1);
195
196 reiserfs_invalidate_buffer(tb, tbS0);
197
198 return 0;
199 }
200
201 /*
202 * a part of contents of S[0] will be in L[0] and
203 * the rest part of S[0] will be in R[0]
204 */
205
206 RFALSE((tb->lnum[0] + tb->rnum[0] < n) ||
207 (tb->lnum[0] + tb->rnum[0] > n + 1),
208 "PAP-12050: rnum(%d) and lnum(%d) and item "
209 "number(%d) in S[0] are not consistent",
210 tb->rnum[0], tb->lnum[0], n);
211 RFALSE((tb->lnum[0] + tb->rnum[0] == n) &&
212 (tb->lbytes != -1 || tb->rbytes != -1),
213 "PAP-12055: bad rbytes (%d)/lbytes (%d) "
214 "parameters when items are not split",
215 tb->rbytes, tb->lbytes);
216 RFALSE((tb->lnum[0] + tb->rnum[0] == n + 1) &&
217 (tb->lbytes < 1 || tb->rbytes != -1),
218 "PAP-12060: bad rbytes (%d)/lbytes (%d) "
219 "parameters when items are split",
220 tb->rbytes, tb->lbytes);
221
222 leaf_shift_left(tb, tb->lnum[0], tb->lbytes);
223 leaf_shift_right(tb, tb->rnum[0], tb->rbytes);
224
225 reiserfs_invalidate_buffer(tb, tbS0);
226
227 return 0;
228}
229
230/*
231 * Balance leaf node in case of delete or cut: insert_size[0] < 0
95 * 232 *
96 * lnum, rnum can have values >= -1 233 * lnum, rnum can have values >= -1
97 * -1 means that the neighbor must be joined with S 234 * -1 means that the neighbor must be joined with S
98 * 0 means that nothing should be done with the neighbor 235 * 0 means that nothing should be done with the neighbor
99 * >0 means to shift entirely or partly the specified number of items to the neighbor 236 * >0 means to shift entirely or partly the specified number of items
237 * to the neighbor
100 */ 238 */
101static int balance_leaf_when_delete(struct tree_balance *tb, int flag) 239static int balance_leaf_when_delete(struct tree_balance *tb, int flag)
102{ 240{
103 struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path); 241 struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path);
104 int item_pos = PATH_LAST_POSITION(tb->tb_path); 242 int item_pos = PATH_LAST_POSITION(tb->tb_path);
105 int pos_in_item = tb->tb_path->pos_in_item;
106 struct buffer_info bi; 243 struct buffer_info bi;
107 int n; 244 int n;
108 struct item_head *ih; 245 struct item_head *ih;
@@ -114,1022 +251,1202 @@ static int balance_leaf_when_delete(struct tree_balance *tb, int flag)
114 RFALSE(!tb->blknum[0] && !PATH_H_PPARENT(tb->tb_path, 0), 251 RFALSE(!tb->blknum[0] && !PATH_H_PPARENT(tb->tb_path, 0),
115 "PAP-12010: tree can not be empty"); 252 "PAP-12010: tree can not be empty");
116 253
117 ih = B_N_PITEM_HEAD(tbS0, item_pos); 254 ih = item_head(tbS0, item_pos);
118 buffer_info_init_tbS0(tb, &bi); 255 buffer_info_init_tbS0(tb, &bi);
119 256
120 /* Delete or truncate the item */ 257 /* Delete or truncate the item */
121 258
122 switch (flag) { 259 BUG_ON(flag != M_DELETE && flag != M_CUT);
123 case M_DELETE: /* delete item in S[0] */ 260 if (flag == M_DELETE)
261 balance_leaf_when_delete_del(tb);
262 else /* M_CUT */
263 balance_leaf_when_delete_cut(tb);
124 264
125 RFALSE(ih_item_len(ih) + IH_SIZE != -tb->insert_size[0],
126 "vs-12013: mode Delete, insert size %d, ih to be deleted %h",
127 -tb->insert_size[0], ih);
128 265
129 leaf_delete_items(&bi, 0, item_pos, 1, -1); 266 /*
267 * the rule is that no shifting occurs unless by shifting
268 * a node can be freed
269 */
270 n = B_NR_ITEMS(tbS0);
130 271
131 if (!item_pos && tb->CFL[0]) {
132 if (B_NR_ITEMS(tbS0)) {
133 replace_key(tb, tb->CFL[0], tb->lkey[0], tbS0,
134 0);
135 } else {
136 if (!PATH_H_POSITION(tb->tb_path, 1))
137 replace_key(tb, tb->CFL[0], tb->lkey[0],
138 PATH_H_PPARENT(tb->tb_path,
139 0), 0);
140 }
141 }
142 272
143 RFALSE(!item_pos && !tb->CFL[0], 273 /* L[0] takes part in balancing */
144 "PAP-12020: tb->CFL[0]==%p, tb->L[0]==%p", tb->CFL[0], 274 if (tb->lnum[0])
145 tb->L[0]); 275 return balance_leaf_when_delete_left(tb);
146 276
147 break; 277 if (tb->rnum[0] == -1) {
278 /* all contents of R[0] and S[0] will be in R[0] */
279 leaf_shift_right(tb, n, -1);
280 reiserfs_invalidate_buffer(tb, tbS0);
281 return 0;
282 }
148 283
149 case M_CUT:{ /* cut item in S[0] */ 284 RFALSE(tb->rnum[0],
150 if (is_direntry_le_ih(ih)) { 285 "PAP-12065: bad rnum parameter must be 0 (%d)", tb->rnum[0]);
286 return 0;
287}
151 288
152 /* UFS unlink semantics are such that you can only delete one directory entry at a time. */ 289static void balance_leaf_insert_left(struct tree_balance *tb,
153 /* when we cut a directory tb->insert_size[0] means number of entries to be cut (always 1) */ 290 struct item_head *ih, const char *body)
154 tb->insert_size[0] = -1; 291{
155 leaf_cut_from_buffer(&bi, item_pos, pos_in_item, 292 int ret;
156 -tb->insert_size[0]); 293 struct buffer_info bi;
294 int n = B_NR_ITEMS(tb->L[0]);
295
296 if (tb->item_pos == tb->lnum[0] - 1 && tb->lbytes != -1) {
297 /* part of new item falls into L[0] */
298 int new_item_len, shift;
299 int version;
300
301 ret = leaf_shift_left(tb, tb->lnum[0] - 1, -1);
302
303 /* Calculate item length to insert to S[0] */
304 new_item_len = ih_item_len(ih) - tb->lbytes;
305
306 /* Calculate and check item length to insert to L[0] */
307 put_ih_item_len(ih, ih_item_len(ih) - new_item_len);
308
309 RFALSE(ih_item_len(ih) <= 0,
310 "PAP-12080: there is nothing to insert into L[0]: "
311 "ih_item_len=%d", ih_item_len(ih));
312
313 /* Insert new item into L[0] */
314 buffer_info_init_left(tb, &bi);
315 leaf_insert_into_buf(&bi, n + tb->item_pos - ret, ih, body,
316 min_t(int, tb->zeroes_num, ih_item_len(ih)));
317
318 version = ih_version(ih);
319
320 /*
321 * Calculate key component, item length and body to
322 * insert into S[0]
323 */
324 shift = 0;
325 if (is_indirect_le_ih(ih))
326 shift = tb->tb_sb->s_blocksize_bits - UNFM_P_SHIFT;
327
328 add_le_ih_k_offset(ih, tb->lbytes << shift);
329
330 put_ih_item_len(ih, new_item_len);
331 if (tb->lbytes > tb->zeroes_num) {
332 body += (tb->lbytes - tb->zeroes_num);
333 tb->zeroes_num = 0;
334 } else
335 tb->zeroes_num -= tb->lbytes;
336
337 RFALSE(ih_item_len(ih) <= 0,
338 "PAP-12085: there is nothing to insert into S[0]: "
339 "ih_item_len=%d", ih_item_len(ih));
340 } else {
341 /* new item in whole falls into L[0] */
342 /* Shift lnum[0]-1 items to L[0] */
343 ret = leaf_shift_left(tb, tb->lnum[0] - 1, tb->lbytes);
344
345 /* Insert new item into L[0] */
346 buffer_info_init_left(tb, &bi);
347 leaf_insert_into_buf(&bi, n + tb->item_pos - ret, ih, body,
348 tb->zeroes_num);
349 tb->insert_size[0] = 0;
350 tb->zeroes_num = 0;
351 }
352}
157 353
158 RFALSE(!item_pos && !pos_in_item && !tb->CFL[0], 354static void balance_leaf_paste_left_shift_dirent(struct tree_balance *tb,
159 "PAP-12030: can not change delimiting key. CFL[0]=%p", 355 struct item_head *ih,
160 tb->CFL[0]); 356 const char *body)
357{
358 int n = B_NR_ITEMS(tb->L[0]);
359 struct buffer_info bi;
161 360
162 if (!item_pos && !pos_in_item && tb->CFL[0]) { 361 RFALSE(tb->zeroes_num,
163 replace_key(tb, tb->CFL[0], tb->lkey[0], 362 "PAP-12090: invalid parameter in case of a directory");
164 tbS0, 0); 363
165 } 364 /* directory item */
166 } else { 365 if (tb->lbytes > tb->pos_in_item) {
167 leaf_cut_from_buffer(&bi, item_pos, pos_in_item, 366 /* new directory entry falls into L[0] */
168 -tb->insert_size[0]); 367 struct item_head *pasted;
368 int ret, l_pos_in_item = tb->pos_in_item;
369
370 /*
371 * Shift lnum[0] - 1 items in whole.
372 * Shift lbytes - 1 entries from given directory item
373 */
374 ret = leaf_shift_left(tb, tb->lnum[0], tb->lbytes - 1);
375 if (ret && !tb->item_pos) {
376 pasted = item_head(tb->L[0], B_NR_ITEMS(tb->L[0]) - 1);
377 l_pos_in_item += ih_entry_count(pasted) -
378 (tb->lbytes - 1);
379 }
169 380
170 RFALSE(!ih_item_len(ih), 381 /* Append given directory entry to directory item */
171 "PAP-12035: cut must leave non-zero dynamic length of item"); 382 buffer_info_init_left(tb, &bi);
172 } 383 leaf_paste_in_buffer(&bi, n + tb->item_pos - ret,
173 break; 384 l_pos_in_item, tb->insert_size[0],
385 body, tb->zeroes_num);
386
387 /*
388 * previous string prepared space for pasting new entry,
389 * following string pastes this entry
390 */
391
392 /*
393 * when we have merge directory item, pos_in_item
394 * has been changed too
395 */
396
397 /* paste new directory entry. 1 is entry number */
398 leaf_paste_entries(&bi, n + tb->item_pos - ret,
399 l_pos_in_item, 1,
400 (struct reiserfs_de_head *) body,
401 body + DEH_SIZE, tb->insert_size[0]);
402 tb->insert_size[0] = 0;
403 } else {
404 /* new directory item doesn't fall into L[0] */
405 /*
406 * Shift lnum[0]-1 items in whole. Shift lbytes
407 * directory entries from directory item number lnum[0]
408 */
409 leaf_shift_left(tb, tb->lnum[0], tb->lbytes);
410 }
411
412 /* Calculate new position to append in item body */
413 tb->pos_in_item -= tb->lbytes;
414}
415
416static void balance_leaf_paste_left_shift(struct tree_balance *tb,
417 struct item_head *ih,
418 const char *body)
419{
420 struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path);
421 int n = B_NR_ITEMS(tb->L[0]);
422 struct buffer_info bi;
423
424 if (is_direntry_le_ih(item_head(tbS0, tb->item_pos))) {
425 balance_leaf_paste_left_shift_dirent(tb, ih, body);
426 return;
427 }
428
429 RFALSE(tb->lbytes <= 0,
430 "PAP-12095: there is nothing to shift to L[0]. "
431 "lbytes=%d", tb->lbytes);
432 RFALSE(tb->pos_in_item != ih_item_len(item_head(tbS0, tb->item_pos)),
433 "PAP-12100: incorrect position to paste: "
434 "item_len=%d, pos_in_item=%d",
435 ih_item_len(item_head(tbS0, tb->item_pos)), tb->pos_in_item);
436
437 /* appended item will be in L[0] in whole */
438 if (tb->lbytes >= tb->pos_in_item) {
439 struct item_head *tbS0_pos_ih, *tbL0_ih;
440 struct item_head *tbS0_0_ih;
441 struct reiserfs_key *left_delim_key;
442 int ret, l_n, version, temp_l;
443
444 tbS0_pos_ih = item_head(tbS0, tb->item_pos);
445 tbS0_0_ih = item_head(tbS0, 0);
446
447 /*
448 * this bytes number must be appended
449 * to the last item of L[h]
450 */
451 l_n = tb->lbytes - tb->pos_in_item;
452
453 /* Calculate new insert_size[0] */
454 tb->insert_size[0] -= l_n;
455
456 RFALSE(tb->insert_size[0] <= 0,
457 "PAP-12105: there is nothing to paste into "
458 "L[0]. insert_size=%d", tb->insert_size[0]);
459
460 ret = leaf_shift_left(tb, tb->lnum[0],
461 ih_item_len(tbS0_pos_ih));
462
463 tbL0_ih = item_head(tb->L[0], n + tb->item_pos - ret);
464
465 /* Append to body of item in L[0] */
466 buffer_info_init_left(tb, &bi);
467 leaf_paste_in_buffer(&bi, n + tb->item_pos - ret,
468 ih_item_len(tbL0_ih), l_n, body,
469 min_t(int, l_n, tb->zeroes_num));
470
471 /*
472 * 0-th item in S0 can be only of DIRECT type
473 * when l_n != 0
474 */
475 temp_l = l_n;
476
477 RFALSE(ih_item_len(tbS0_0_ih),
478 "PAP-12106: item length must be 0");
479 RFALSE(comp_short_le_keys(&tbS0_0_ih->ih_key,
480 leaf_key(tb->L[0], n + tb->item_pos - ret)),
481 "PAP-12107: items must be of the same file");
482
483 if (is_indirect_le_ih(tbL0_ih)) {
484 int shift = tb->tb_sb->s_blocksize_bits - UNFM_P_SHIFT;
485 temp_l = l_n << shift;
174 } 486 }
487 /* update key of first item in S0 */
488 version = ih_version(tbS0_0_ih);
489 add_le_key_k_offset(version, &tbS0_0_ih->ih_key, temp_l);
490
491 /* update left delimiting key */
492 left_delim_key = internal_key(tb->CFL[0], tb->lkey[0]);
493 add_le_key_k_offset(version, left_delim_key, temp_l);
494
495 /*
496 * Calculate new body, position in item and
497 * insert_size[0]
498 */
499 if (l_n > tb->zeroes_num) {
500 body += (l_n - tb->zeroes_num);
501 tb->zeroes_num = 0;
502 } else
503 tb->zeroes_num -= l_n;
504 tb->pos_in_item = 0;
505
506 RFALSE(comp_short_le_keys(&tbS0_0_ih->ih_key,
507 leaf_key(tb->L[0],
508 B_NR_ITEMS(tb->L[0]) - 1)) ||
509 !op_is_left_mergeable(leaf_key(tbS0, 0), tbS0->b_size) ||
510 !op_is_left_mergeable(left_delim_key, tbS0->b_size),
511 "PAP-12120: item must be merge-able with left "
512 "neighboring item");
513 } else {
514 /* only part of the appended item will be in L[0] */
515
516 /* Calculate position in item for append in S[0] */
517 tb->pos_in_item -= tb->lbytes;
518
519 RFALSE(tb->pos_in_item <= 0,
520 "PAP-12125: no place for paste. pos_in_item=%d",
521 tb->pos_in_item);
522
523 /*
524 * Shift lnum[0] - 1 items in whole.
525 * Shift lbytes - 1 byte from item number lnum[0]
526 */
527 leaf_shift_left(tb, tb->lnum[0], tb->lbytes);
528 }
529}
175 530
176 default: 531
177 print_cur_tb("12040"); 532/* appended item will be in L[0] in whole */
178 reiserfs_panic(tb->tb_sb, "PAP-12040", 533static void balance_leaf_paste_left_whole(struct tree_balance *tb,
179 "unexpected mode: %s(%d)", 534 struct item_head *ih,
180 (flag == 535 const char *body)
181 M_PASTE) ? "PASTE" : ((flag == 536{
182 M_INSERT) ? "INSERT" : 537 struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path);
183 "UNKNOWN"), flag); 538 int n = B_NR_ITEMS(tb->L[0]);
539 struct buffer_info bi;
540 struct item_head *pasted;
541 int ret;
542
543 /* if we paste into first item of S[0] and it is left mergable */
544 if (!tb->item_pos &&
545 op_is_left_mergeable(leaf_key(tbS0, 0), tbS0->b_size)) {
546 /*
547 * then increment pos_in_item by the size of the
548 * last item in L[0]
549 */
550 pasted = item_head(tb->L[0], n - 1);
551 if (is_direntry_le_ih(pasted))
552 tb->pos_in_item += ih_entry_count(pasted);
553 else
554 tb->pos_in_item += ih_item_len(pasted);
184 } 555 }
185 556
186 /* the rule is that no shifting occurs unless by shifting a node can be freed */ 557 /*
187 n = B_NR_ITEMS(tbS0); 558 * Shift lnum[0] - 1 items in whole.
188 if (tb->lnum[0]) { /* L[0] takes part in balancing */ 559 * Shift lbytes - 1 byte from item number lnum[0]
189 if (tb->lnum[0] == -1) { /* L[0] must be joined with S[0] */ 560 */
190 if (tb->rnum[0] == -1) { /* R[0] must be also joined with S[0] */ 561 ret = leaf_shift_left(tb, tb->lnum[0], tb->lbytes);
191 if (tb->FR[0] == PATH_H_PPARENT(tb->tb_path, 0)) { 562
192 /* all contents of all the 3 buffers will be in L[0] */ 563 /* Append to body of item in L[0] */
193 if (PATH_H_POSITION(tb->tb_path, 1) == 0 564 buffer_info_init_left(tb, &bi);
194 && 1 < B_NR_ITEMS(tb->FR[0])) 565 leaf_paste_in_buffer(&bi, n + tb->item_pos - ret, tb->pos_in_item,
195 replace_key(tb, tb->CFL[0], 566 tb->insert_size[0], body, tb->zeroes_num);
196 tb->lkey[0], 567
197 tb->FR[0], 1); 568 /* if appended item is directory, paste entry */
198 569 pasted = item_head(tb->L[0], n + tb->item_pos - ret);
199 leaf_move_items(LEAF_FROM_S_TO_L, tb, n, 570 if (is_direntry_le_ih(pasted))
200 -1, NULL); 571 leaf_paste_entries(&bi, n + tb->item_pos - ret,
201 leaf_move_items(LEAF_FROM_R_TO_L, tb, 572 tb->pos_in_item, 1,
202 B_NR_ITEMS(tb->R[0]), 573 (struct reiserfs_de_head *)body,
203 -1, NULL); 574 body + DEH_SIZE, tb->insert_size[0]);
204 575
205 reiserfs_invalidate_buffer(tb, tbS0); 576 /*
206 reiserfs_invalidate_buffer(tb, 577 * if appended item is indirect item, put unformatted node
207 tb->R[0]); 578 * into un list
208 579 */
209 return 0; 580 if (is_indirect_le_ih(pasted))
210 } 581 set_ih_free_space(pasted, 0);
211 /* all contents of all the 3 buffers will be in R[0] */
212 leaf_move_items(LEAF_FROM_S_TO_R, tb, n, -1,
213 NULL);
214 leaf_move_items(LEAF_FROM_L_TO_R, tb,
215 B_NR_ITEMS(tb->L[0]), -1, NULL);
216 582
217 /* right_delimiting_key is correct in R[0] */ 583 tb->insert_size[0] = 0;
218 replace_key(tb, tb->CFR[0], tb->rkey[0], 584 tb->zeroes_num = 0;
219 tb->R[0], 0); 585}
220 586
221 reiserfs_invalidate_buffer(tb, tbS0); 587static void balance_leaf_paste_left(struct tree_balance *tb,
222 reiserfs_invalidate_buffer(tb, tb->L[0]); 588 struct item_head *ih, const char *body)
589{
590 /* we must shift the part of the appended item */
591 if (tb->item_pos == tb->lnum[0] - 1 && tb->lbytes != -1)
592 balance_leaf_paste_left_shift(tb, ih, body);
593 else
594 balance_leaf_paste_left_whole(tb, ih, body);
595}
223 596
224 return -1; 597/* Shift lnum[0] items from S[0] to the left neighbor L[0] */
225 } 598static void balance_leaf_left(struct tree_balance *tb, struct item_head *ih,
599 const char *body, int flag)
600{
601 if (tb->lnum[0] <= 0)
602 return;
226 603
227 RFALSE(tb->rnum[0] != 0, 604 /* new item or it part falls to L[0], shift it too */
228 "PAP-12045: rnum must be 0 (%d)", tb->rnum[0]); 605 if (tb->item_pos < tb->lnum[0]) {
229 /* all contents of L[0] and S[0] will be in L[0] */ 606 BUG_ON(flag != M_INSERT && flag != M_PASTE);
230 leaf_shift_left(tb, n, -1); 607
608 if (flag == M_INSERT)
609 balance_leaf_insert_left(tb, ih, body);
610 else /* M_PASTE */
611 balance_leaf_paste_left(tb, ih, body);
612 } else
613 /* new item doesn't fall into L[0] */
614 leaf_shift_left(tb, tb->lnum[0], tb->lbytes);
615}
231 616
232 reiserfs_invalidate_buffer(tb, tbS0);
233 617
234 return 0; 618static void balance_leaf_insert_right(struct tree_balance *tb,
619 struct item_head *ih, const char *body)
620{
621
622 struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path);
623 int n = B_NR_ITEMS(tbS0);
624 struct buffer_info bi;
625 int ret;
626
627 /* new item or part of it doesn't fall into R[0] */
628 if (n - tb->rnum[0] >= tb->item_pos) {
629 leaf_shift_right(tb, tb->rnum[0], tb->rbytes);
630 return;
631 }
632
633 /* new item or its part falls to R[0] */
634
635 /* part of new item falls into R[0] */
636 if (tb->item_pos == n - tb->rnum[0] + 1 && tb->rbytes != -1) {
637 loff_t old_key_comp, old_len, r_zeroes_number;
638 const char *r_body;
639 int version, shift;
640 loff_t offset;
641
642 leaf_shift_right(tb, tb->rnum[0] - 1, -1);
643
644 version = ih_version(ih);
645
646 /* Remember key component and item length */
647 old_key_comp = le_ih_k_offset(ih);
648 old_len = ih_item_len(ih);
649
650 /*
651 * Calculate key component and item length to insert
652 * into R[0]
653 */
654 shift = 0;
655 if (is_indirect_le_ih(ih))
656 shift = tb->tb_sb->s_blocksize_bits - UNFM_P_SHIFT;
657 offset = le_ih_k_offset(ih) + ((old_len - tb->rbytes) << shift);
658 set_le_ih_k_offset(ih, offset);
659 put_ih_item_len(ih, tb->rbytes);
660
661 /* Insert part of the item into R[0] */
662 buffer_info_init_right(tb, &bi);
663 if ((old_len - tb->rbytes) > tb->zeroes_num) {
664 r_zeroes_number = 0;
665 r_body = body + (old_len - tb->rbytes) - tb->zeroes_num;
666 } else {
667 r_body = body;
668 r_zeroes_number = tb->zeroes_num -
669 (old_len - tb->rbytes);
670 tb->zeroes_num -= r_zeroes_number;
235 } 671 }
236 /* a part of contents of S[0] will be in L[0] and the rest part of S[0] will be in R[0] */
237
238 RFALSE((tb->lnum[0] + tb->rnum[0] < n) ||
239 (tb->lnum[0] + tb->rnum[0] > n + 1),
240 "PAP-12050: rnum(%d) and lnum(%d) and item number(%d) in S[0] are not consistent",
241 tb->rnum[0], tb->lnum[0], n);
242 RFALSE((tb->lnum[0] + tb->rnum[0] == n) &&
243 (tb->lbytes != -1 || tb->rbytes != -1),
244 "PAP-12055: bad rbytes (%d)/lbytes (%d) parameters when items are not split",
245 tb->rbytes, tb->lbytes);
246 RFALSE((tb->lnum[0] + tb->rnum[0] == n + 1) &&
247 (tb->lbytes < 1 || tb->rbytes != -1),
248 "PAP-12060: bad rbytes (%d)/lbytes (%d) parameters when items are split",
249 tb->rbytes, tb->lbytes);
250 672
251 leaf_shift_left(tb, tb->lnum[0], tb->lbytes); 673 leaf_insert_into_buf(&bi, 0, ih, r_body, r_zeroes_number);
674
675 /* Replace right delimiting key by first key in R[0] */
676 replace_key(tb, tb->CFR[0], tb->rkey[0], tb->R[0], 0);
677
678 /*
679 * Calculate key component and item length to
680 * insert into S[0]
681 */
682 set_le_ih_k_offset(ih, old_key_comp);
683 put_ih_item_len(ih, old_len - tb->rbytes);
684
685 tb->insert_size[0] -= tb->rbytes;
686
687 } else {
688 /* whole new item falls into R[0] */
689
690 /* Shift rnum[0]-1 items to R[0] */
691 ret = leaf_shift_right(tb, tb->rnum[0] - 1, tb->rbytes);
692
693 /* Insert new item into R[0] */
694 buffer_info_init_right(tb, &bi);
695 leaf_insert_into_buf(&bi, tb->item_pos - n + tb->rnum[0] - 1,
696 ih, body, tb->zeroes_num);
697
698 if (tb->item_pos - n + tb->rnum[0] - 1 == 0)
699 replace_key(tb, tb->CFR[0], tb->rkey[0], tb->R[0], 0);
700
701 tb->zeroes_num = tb->insert_size[0] = 0;
702 }
703}
704
705
706static void balance_leaf_paste_right_shift_dirent(struct tree_balance *tb,
707 struct item_head *ih, const char *body)
708{
709 struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path);
710 struct buffer_info bi;
711 int entry_count;
712
713 RFALSE(tb->zeroes_num,
714 "PAP-12145: invalid parameter in case of a directory");
715 entry_count = ih_entry_count(item_head(tbS0, tb->item_pos));
716
717 /* new directory entry falls into R[0] */
718 if (entry_count - tb->rbytes < tb->pos_in_item) {
719 int paste_entry_position;
720
721 RFALSE(tb->rbytes - 1 >= entry_count || !tb->insert_size[0],
722 "PAP-12150: no enough of entries to shift to R[0]: "
723 "rbytes=%d, entry_count=%d", tb->rbytes, entry_count);
724
725 /*
726 * Shift rnum[0]-1 items in whole.
727 * Shift rbytes-1 directory entries from directory
728 * item number rnum[0]
729 */
730 leaf_shift_right(tb, tb->rnum[0], tb->rbytes - 1);
731
732 /* Paste given directory entry to directory item */
733 paste_entry_position = tb->pos_in_item - entry_count +
734 tb->rbytes - 1;
735 buffer_info_init_right(tb, &bi);
736 leaf_paste_in_buffer(&bi, 0, paste_entry_position,
737 tb->insert_size[0], body, tb->zeroes_num);
738
739 /* paste entry */
740 leaf_paste_entries(&bi, 0, paste_entry_position, 1,
741 (struct reiserfs_de_head *) body,
742 body + DEH_SIZE, tb->insert_size[0]);
743
744 /* change delimiting keys */
745 if (paste_entry_position == 0)
746 replace_key(tb, tb->CFR[0], tb->rkey[0], tb->R[0], 0);
747
748 tb->insert_size[0] = 0;
749 tb->pos_in_item++;
750 } else {
751 /* new directory entry doesn't fall into R[0] */
252 leaf_shift_right(tb, tb->rnum[0], tb->rbytes); 752 leaf_shift_right(tb, tb->rnum[0], tb->rbytes);
753 }
754}
253 755
254 reiserfs_invalidate_buffer(tb, tbS0); 756static void balance_leaf_paste_right_shift(struct tree_balance *tb,
757 struct item_head *ih, const char *body)
758{
759 struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path);
760 int n_shift, n_rem, r_zeroes_number, version;
761 unsigned long temp_rem;
762 const char *r_body;
763 struct buffer_info bi;
255 764
256 return 0; 765 /* we append to directory item */
766 if (is_direntry_le_ih(item_head(tbS0, tb->item_pos))) {
767 balance_leaf_paste_right_shift_dirent(tb, ih, body);
768 return;
257 } 769 }
258 770
259 if (tb->rnum[0] == -1) { 771 /* regular object */
260 /* all contents of R[0] and S[0] will be in R[0] */ 772
261 leaf_shift_right(tb, n, -1); 773 /*
262 reiserfs_invalidate_buffer(tb, tbS0); 774 * Calculate number of bytes which must be shifted
263 return 0; 775 * from appended item
776 */
777 n_shift = tb->rbytes - tb->insert_size[0];
778 if (n_shift < 0)
779 n_shift = 0;
780
781 RFALSE(tb->pos_in_item != ih_item_len(item_head(tbS0, tb->item_pos)),
782 "PAP-12155: invalid position to paste. ih_item_len=%d, "
783 "pos_in_item=%d", tb->pos_in_item,
784 ih_item_len(item_head(tbS0, tb->item_pos)));
785
786 leaf_shift_right(tb, tb->rnum[0], n_shift);
787
788 /*
789 * Calculate number of bytes which must remain in body
790 * after appending to R[0]
791 */
792 n_rem = tb->insert_size[0] - tb->rbytes;
793 if (n_rem < 0)
794 n_rem = 0;
795
796 temp_rem = n_rem;
797
798 version = ih_version(item_head(tb->R[0], 0));
799
800 if (is_indirect_le_key(version, leaf_key(tb->R[0], 0))) {
801 int shift = tb->tb_sb->s_blocksize_bits - UNFM_P_SHIFT;
802 temp_rem = n_rem << shift;
264 } 803 }
265 804
266 RFALSE(tb->rnum[0], 805 add_le_key_k_offset(version, leaf_key(tb->R[0], 0), temp_rem);
267 "PAP-12065: bad rnum parameter must be 0 (%d)", tb->rnum[0]); 806 add_le_key_k_offset(version, internal_key(tb->CFR[0], tb->rkey[0]),
268 return 0; 807 temp_rem);
808
809 do_balance_mark_internal_dirty(tb, tb->CFR[0], 0);
810
811 /* Append part of body into R[0] */
812 buffer_info_init_right(tb, &bi);
813 if (n_rem > tb->zeroes_num) {
814 r_zeroes_number = 0;
815 r_body = body + n_rem - tb->zeroes_num;
816 } else {
817 r_body = body;
818 r_zeroes_number = tb->zeroes_num - n_rem;
819 tb->zeroes_num -= r_zeroes_number;
820 }
821
822 leaf_paste_in_buffer(&bi, 0, n_shift, tb->insert_size[0] - n_rem,
823 r_body, r_zeroes_number);
824
825 if (is_indirect_le_ih(item_head(tb->R[0], 0)))
826 set_ih_free_space(item_head(tb->R[0], 0), 0);
827
828 tb->insert_size[0] = n_rem;
829 if (!n_rem)
830 tb->pos_in_item++;
269} 831}
270 832
271static int balance_leaf(struct tree_balance *tb, struct item_head *ih, /* item header of inserted item (this is on little endian) */ 833static void balance_leaf_paste_right_whole(struct tree_balance *tb,
272 const char *body, /* body of inserted item or bytes to paste */ 834 struct item_head *ih, const char *body)
273 int flag, /* i - insert, d - delete, c - cut, p - paste
274 (see comment to do_balance) */
275 struct item_head *insert_key, /* in our processing of one level we sometimes determine what
276 must be inserted into the next higher level. This insertion
277 consists of a key or two keys and their corresponding
278 pointers */
279 struct buffer_head **insert_ptr /* inserted node-ptrs for the next level */
280 )
281{ 835{
282 struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path); 836 struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path);
283 int item_pos = PATH_LAST_POSITION(tb->tb_path); /* index into the array of item headers in S[0] 837 int n = B_NR_ITEMS(tbS0);
284 of the affected item */ 838 struct item_head *pasted;
285 struct buffer_info bi; 839 struct buffer_info bi;
286 struct buffer_head *S_new[2]; /* new nodes allocated to hold what could not fit into S */
287 int snum[2]; /* number of items that will be placed
288 into S_new (includes partially shifted
289 items) */
290 int sbytes[2]; /* if an item is partially shifted into S_new then
291 if it is a directory item
292 it is the number of entries from the item that are shifted into S_new
293 else
294 it is the number of bytes from the item that are shifted into S_new
295 */
296 int n, i;
297 int ret_val;
298 int pos_in_item;
299 int zeros_num;
300 840
301 PROC_INFO_INC(tb->tb_sb, balance_at[0]); 841 buffer_info_init_right(tb, &bi);
842 leaf_shift_right(tb, tb->rnum[0], tb->rbytes);
843
844 /* append item in R[0] */
845 if (tb->pos_in_item >= 0) {
846 buffer_info_init_right(tb, &bi);
847 leaf_paste_in_buffer(&bi, tb->item_pos - n + tb->rnum[0],
848 tb->pos_in_item, tb->insert_size[0], body,
849 tb->zeroes_num);
850 }
302 851
303 /* Make balance in case insert_size[0] < 0 */ 852 /* paste new entry, if item is directory item */
304 if (tb->insert_size[0] < 0) 853 pasted = item_head(tb->R[0], tb->item_pos - n + tb->rnum[0]);
305 return balance_leaf_when_delete(tb, flag); 854 if (is_direntry_le_ih(pasted) && tb->pos_in_item >= 0) {
855 leaf_paste_entries(&bi, tb->item_pos - n + tb->rnum[0],
856 tb->pos_in_item, 1,
857 (struct reiserfs_de_head *)body,
858 body + DEH_SIZE, tb->insert_size[0]);
306 859
307 zeros_num = 0; 860 if (!tb->pos_in_item) {
308 if (flag == M_INSERT && !body)
309 zeros_num = ih_item_len(ih);
310 861
311 pos_in_item = tb->tb_path->pos_in_item; 862 RFALSE(tb->item_pos - n + tb->rnum[0],
312 /* for indirect item pos_in_item is measured in unformatted node 863 "PAP-12165: directory item must be first "
313 pointers. Recalculate to bytes */ 864 "item of node when pasting is in 0th position");
314 if (flag != M_INSERT 865
315 && is_indirect_le_ih(B_N_PITEM_HEAD(tbS0, item_pos))) 866 /* update delimiting keys */
316 pos_in_item *= UNFM_P_SIZE; 867 replace_key(tb, tb->CFR[0], tb->rkey[0], tb->R[0], 0);
317
318 if (tb->lnum[0] > 0) {
319 /* Shift lnum[0] items from S[0] to the left neighbor L[0] */
320 if (item_pos < tb->lnum[0]) {
321 /* new item or it part falls to L[0], shift it too */
322 n = B_NR_ITEMS(tb->L[0]);
323
324 switch (flag) {
325 case M_INSERT: /* insert item into L[0] */
326
327 if (item_pos == tb->lnum[0] - 1 && tb->lbytes != -1) {
328 /* part of new item falls into L[0] */
329 int new_item_len;
330 int version;
331
332 ret_val = leaf_shift_left(tb, tb->lnum[0] - 1, -1);
333
334 /* Calculate item length to insert to S[0] */
335 new_item_len = ih_item_len(ih) - tb->lbytes;
336 /* Calculate and check item length to insert to L[0] */
337 put_ih_item_len(ih, ih_item_len(ih) - new_item_len);
338
339 RFALSE(ih_item_len(ih) <= 0,
340 "PAP-12080: there is nothing to insert into L[0]: ih_item_len=%d",
341 ih_item_len(ih));
342
343 /* Insert new item into L[0] */
344 buffer_info_init_left(tb, &bi);
345 leaf_insert_into_buf(&bi,
346 n + item_pos - ret_val, ih, body,
347 zeros_num > ih_item_len(ih) ? ih_item_len(ih) : zeros_num);
348
349 version = ih_version(ih);
350
351 /* Calculate key component, item length and body to insert into S[0] */
352 set_le_ih_k_offset(ih, le_ih_k_offset(ih) +
353 (tb-> lbytes << (is_indirect_le_ih(ih) ? tb->tb_sb-> s_blocksize_bits - UNFM_P_SHIFT : 0)));
354
355 put_ih_item_len(ih, new_item_len);
356 if (tb->lbytes > zeros_num) {
357 body += (tb->lbytes - zeros_num);
358 zeros_num = 0;
359 } else
360 zeros_num -= tb->lbytes;
361
362 RFALSE(ih_item_len(ih) <= 0,
363 "PAP-12085: there is nothing to insert into S[0]: ih_item_len=%d",
364 ih_item_len(ih));
365 } else {
366 /* new item in whole falls into L[0] */
367 /* Shift lnum[0]-1 items to L[0] */
368 ret_val = leaf_shift_left(tb, tb->lnum[0] - 1, tb->lbytes);
369 /* Insert new item into L[0] */
370 buffer_info_init_left(tb, &bi);
371 leaf_insert_into_buf(&bi, n + item_pos - ret_val, ih, body, zeros_num);
372 tb->insert_size[0] = 0;
373 zeros_num = 0;
374 }
375 break;
376
377 case M_PASTE: /* append item in L[0] */
378
379 if (item_pos == tb->lnum[0] - 1 && tb->lbytes != -1) {
380 /* we must shift the part of the appended item */
381 if (is_direntry_le_ih(B_N_PITEM_HEAD(tbS0, item_pos))) {
382
383 RFALSE(zeros_num,
384 "PAP-12090: invalid parameter in case of a directory");
385 /* directory item */
386 if (tb->lbytes > pos_in_item) {
387 /* new directory entry falls into L[0] */
388 struct item_head *pasted;
389 int l_pos_in_item = pos_in_item;
390
391 /* Shift lnum[0] - 1 items in whole. Shift lbytes - 1 entries from given directory item */
392 ret_val = leaf_shift_left(tb, tb->lnum[0], tb->lbytes-1);
393 if (ret_val && !item_pos) {
394 pasted = B_N_PITEM_HEAD(tb->L[0], B_NR_ITEMS(tb->L[0]) - 1);
395 l_pos_in_item += I_ENTRY_COUNT(pasted) - (tb->lbytes -1);
396 }
397
398 /* Append given directory entry to directory item */
399 buffer_info_init_left(tb, &bi);
400 leaf_paste_in_buffer(&bi, n + item_pos - ret_val, l_pos_in_item, tb->insert_size[0], body, zeros_num);
401
402 /* previous string prepared space for pasting new entry, following string pastes this entry */
403
404 /* when we have merge directory item, pos_in_item has been changed too */
405
406 /* paste new directory entry. 1 is entry number */
407 leaf_paste_entries(&bi, n + item_pos - ret_val, l_pos_in_item,
408 1, (struct reiserfs_de_head *) body,
409 body + DEH_SIZE, tb->insert_size[0]);
410 tb->insert_size[0] = 0;
411 } else {
412 /* new directory item doesn't fall into L[0] */
413 /* Shift lnum[0]-1 items in whole. Shift lbytes directory entries from directory item number lnum[0] */
414 leaf_shift_left(tb, tb->lnum[0], tb->lbytes);
415 }
416 /* Calculate new position to append in item body */
417 pos_in_item -= tb->lbytes;
418 } else {
419 /* regular object */
420 RFALSE(tb->lbytes <= 0, "PAP-12095: there is nothing to shift to L[0]. lbytes=%d", tb->lbytes);
421 RFALSE(pos_in_item != ih_item_len(B_N_PITEM_HEAD(tbS0, item_pos)),
422 "PAP-12100: incorrect position to paste: item_len=%d, pos_in_item=%d",
423 ih_item_len(B_N_PITEM_HEAD(tbS0, item_pos)),pos_in_item);
424
425 if (tb->lbytes >= pos_in_item) {
426 /* appended item will be in L[0] in whole */
427 int l_n;
428
429 /* this bytes number must be appended to the last item of L[h] */
430 l_n = tb->lbytes - pos_in_item;
431
432 /* Calculate new insert_size[0] */
433 tb->insert_size[0] -= l_n;
434
435 RFALSE(tb->insert_size[0] <= 0,
436 "PAP-12105: there is nothing to paste into L[0]. insert_size=%d",
437 tb->insert_size[0]);
438 ret_val = leaf_shift_left(tb, tb->lnum[0], ih_item_len
439 (B_N_PITEM_HEAD(tbS0, item_pos)));
440 /* Append to body of item in L[0] */
441 buffer_info_init_left(tb, &bi);
442 leaf_paste_in_buffer
443 (&bi, n + item_pos - ret_val, ih_item_len
444 (B_N_PITEM_HEAD(tb->L[0], n + item_pos - ret_val)),
445 l_n, body,
446 zeros_num > l_n ? l_n : zeros_num);
447 /* 0-th item in S0 can be only of DIRECT type when l_n != 0 */
448 {
449 int version;
450 int temp_l = l_n;
451
452 RFALSE(ih_item_len(B_N_PITEM_HEAD(tbS0, 0)),
453 "PAP-12106: item length must be 0");
454 RFALSE(comp_short_le_keys(B_N_PKEY(tbS0, 0), B_N_PKEY
455 (tb->L[0], n + item_pos - ret_val)),
456 "PAP-12107: items must be of the same file");
457 if (is_indirect_le_ih(B_N_PITEM_HEAD(tb->L[0], n + item_pos - ret_val))) {
458 temp_l = l_n << (tb->tb_sb-> s_blocksize_bits - UNFM_P_SHIFT);
459 }
460 /* update key of first item in S0 */
461 version = ih_version(B_N_PITEM_HEAD(tbS0, 0));
462 set_le_key_k_offset(version, B_N_PKEY(tbS0, 0),
463 le_key_k_offset(version,B_N_PKEY(tbS0, 0)) + temp_l);
464 /* update left delimiting key */
465 set_le_key_k_offset(version, B_N_PDELIM_KEY(tb->CFL[0], tb->lkey[0]),
466 le_key_k_offset(version, B_N_PDELIM_KEY(tb->CFL[0], tb->lkey[0])) + temp_l);
467 }
468
469 /* Calculate new body, position in item and insert_size[0] */
470 if (l_n > zeros_num) {
471 body += (l_n - zeros_num);
472 zeros_num = 0;
473 } else
474 zeros_num -= l_n;
475 pos_in_item = 0;
476
477 RFALSE(comp_short_le_keys(B_N_PKEY(tbS0, 0), B_N_PKEY(tb->L[0], B_NR_ITEMS(tb->L[0]) - 1))
478 || !op_is_left_mergeable(B_N_PKEY(tbS0, 0), tbS0->b_size)
479 || !op_is_left_mergeable(B_N_PDELIM_KEY(tb->CFL[0], tb->lkey[0]), tbS0->b_size),
480 "PAP-12120: item must be merge-able with left neighboring item");
481 } else { /* only part of the appended item will be in L[0] */
482
483 /* Calculate position in item for append in S[0] */
484 pos_in_item -= tb->lbytes;
485
486 RFALSE(pos_in_item <= 0, "PAP-12125: no place for paste. pos_in_item=%d", pos_in_item);
487
488 /* Shift lnum[0] - 1 items in whole. Shift lbytes - 1 byte from item number lnum[0] */
489 leaf_shift_left(tb, tb->lnum[0], tb->lbytes);
490 }
491 }
492 } else { /* appended item will be in L[0] in whole */
493
494 struct item_head *pasted;
495
496 if (!item_pos && op_is_left_mergeable(B_N_PKEY(tbS0, 0), tbS0->b_size)) { /* if we paste into first item of S[0] and it is left mergable */
497 /* then increment pos_in_item by the size of the last item in L[0] */
498 pasted = B_N_PITEM_HEAD(tb->L[0], n - 1);
499 if (is_direntry_le_ih(pasted))
500 pos_in_item += ih_entry_count(pasted);
501 else
502 pos_in_item += ih_item_len(pasted);
503 }
504
505 /* Shift lnum[0] - 1 items in whole. Shift lbytes - 1 byte from item number lnum[0] */
506 ret_val = leaf_shift_left(tb, tb->lnum[0], tb->lbytes);
507 /* Append to body of item in L[0] */
508 buffer_info_init_left(tb, &bi);
509 leaf_paste_in_buffer(&bi, n + item_pos - ret_val,
510 pos_in_item,
511 tb->insert_size[0],
512 body, zeros_num);
513
514 /* if appended item is directory, paste entry */
515 pasted = B_N_PITEM_HEAD(tb->L[0], n + item_pos - ret_val);
516 if (is_direntry_le_ih(pasted))
517 leaf_paste_entries(&bi, n + item_pos - ret_val,
518 pos_in_item, 1,
519 (struct reiserfs_de_head *) body,
520 body + DEH_SIZE,
521 tb->insert_size[0]);
522 /* if appended item is indirect item, put unformatted node into un list */
523 if (is_indirect_le_ih(pasted))
524 set_ih_free_space(pasted, 0);
525 tb->insert_size[0] = 0;
526 zeros_num = 0;
527 }
528 break;
529 default: /* cases d and t */
530 reiserfs_panic(tb->tb_sb, "PAP-12130",
531 "lnum > 0: unexpected mode: "
532 " %s(%d)",
533 (flag == M_DELETE) ? "DELETE" : ((flag == M_CUT) ? "CUT" : "UNKNOWN"), flag);
534 }
535 } else {
536 /* new item doesn't fall into L[0] */
537 leaf_shift_left(tb, tb->lnum[0], tb->lbytes);
538 } 868 }
539 } 869 }
540 870
541 /* tb->lnum[0] > 0 */ 871 if (is_indirect_le_ih(pasted))
542 /* Calculate new item position */ 872 set_ih_free_space(pasted, 0);
543 item_pos -= (tb->lnum[0] - ((tb->lbytes != -1) ? 1 : 0)); 873 tb->zeroes_num = tb->insert_size[0] = 0;
544 874}
545 if (tb->rnum[0] > 0) {
546 /* shift rnum[0] items from S[0] to the right neighbor R[0] */
547 n = B_NR_ITEMS(tbS0);
548 switch (flag) {
549
550 case M_INSERT: /* insert item */
551 if (n - tb->rnum[0] < item_pos) { /* new item or its part falls to R[0] */
552 if (item_pos == n - tb->rnum[0] + 1 && tb->rbytes != -1) { /* part of new item falls into R[0] */
553 loff_t old_key_comp, old_len, r_zeros_number;
554 const char *r_body;
555 int version;
556 loff_t offset;
557
558 leaf_shift_right(tb, tb->rnum[0] - 1, -1);
559
560 version = ih_version(ih);
561 /* Remember key component and item length */
562 old_key_comp = le_ih_k_offset(ih);
563 old_len = ih_item_len(ih);
564
565 /* Calculate key component and item length to insert into R[0] */
566 offset = le_ih_k_offset(ih) + ((old_len - tb->rbytes) << (is_indirect_le_ih(ih) ? tb->tb_sb->s_blocksize_bits - UNFM_P_SHIFT : 0));
567 set_le_ih_k_offset(ih, offset);
568 put_ih_item_len(ih, tb->rbytes);
569 /* Insert part of the item into R[0] */
570 buffer_info_init_right(tb, &bi);
571 if ((old_len - tb->rbytes) > zeros_num) {
572 r_zeros_number = 0;
573 r_body = body + (old_len - tb->rbytes) - zeros_num;
574 } else {
575 r_body = body;
576 r_zeros_number = zeros_num - (old_len - tb->rbytes);
577 zeros_num -= r_zeros_number;
578 }
579
580 leaf_insert_into_buf(&bi, 0, ih, r_body,
581 r_zeros_number);
582
583 /* Replace right delimiting key by first key in R[0] */
584 replace_key(tb, tb->CFR[0], tb->rkey[0],
585 tb->R[0], 0);
586
587 /* Calculate key component and item length to insert into S[0] */
588 set_le_ih_k_offset(ih, old_key_comp);
589 put_ih_item_len(ih, old_len - tb->rbytes);
590
591 tb->insert_size[0] -= tb->rbytes;
592
593 } else { /* whole new item falls into R[0] */
594
595 /* Shift rnum[0]-1 items to R[0] */
596 ret_val = leaf_shift_right(tb, tb->rnum[0] - 1, tb->rbytes);
597 /* Insert new item into R[0] */
598 buffer_info_init_right(tb, &bi);
599 leaf_insert_into_buf(&bi, item_pos - n + tb->rnum[0] - 1,
600 ih, body, zeros_num);
601
602 if (item_pos - n + tb->rnum[0] - 1 == 0) {
603 replace_key(tb, tb->CFR[0],
604 tb->rkey[0],
605 tb->R[0], 0);
606
607 }
608 zeros_num = tb->insert_size[0] = 0;
609 }
610 } else { /* new item or part of it doesn't fall into R[0] */
611
612 leaf_shift_right(tb, tb->rnum[0], tb->rbytes);
613 }
614 break;
615 875
616 case M_PASTE: /* append item */ 876static void balance_leaf_paste_right(struct tree_balance *tb,
617 877 struct item_head *ih, const char *body)
618 if (n - tb->rnum[0] <= item_pos) { /* pasted item or part of it falls to R[0] */ 878{
619 if (item_pos == n - tb->rnum[0] && tb->rbytes != -1) { /* we must shift the part of the appended item */ 879 struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path);
620 if (is_direntry_le_ih(B_N_PITEM_HEAD(tbS0, item_pos))) { /* we append to directory item */ 880 int n = B_NR_ITEMS(tbS0);
621 int entry_count;
622
623 RFALSE(zeros_num,
624 "PAP-12145: invalid parameter in case of a directory");
625 entry_count = I_ENTRY_COUNT(B_N_PITEM_HEAD
626 (tbS0, item_pos));
627 if (entry_count - tb->rbytes <
628 pos_in_item)
629 /* new directory entry falls into R[0] */
630 {
631 int paste_entry_position;
632
633 RFALSE(tb->rbytes - 1 >= entry_count || !tb-> insert_size[0],
634 "PAP-12150: no enough of entries to shift to R[0]: rbytes=%d, entry_count=%d",
635 tb->rbytes, entry_count);
636 /* Shift rnum[0]-1 items in whole. Shift rbytes-1 directory entries from directory item number rnum[0] */
637 leaf_shift_right(tb, tb->rnum[0], tb->rbytes - 1);
638 /* Paste given directory entry to directory item */
639 paste_entry_position = pos_in_item - entry_count + tb->rbytes - 1;
640 buffer_info_init_right(tb, &bi);
641 leaf_paste_in_buffer(&bi, 0, paste_entry_position, tb->insert_size[0], body, zeros_num);
642 /* paste entry */
643 leaf_paste_entries(&bi, 0, paste_entry_position, 1,
644 (struct reiserfs_de_head *) body,
645 body + DEH_SIZE, tb->insert_size[0]);
646
647 if (paste_entry_position == 0) {
648 /* change delimiting keys */
649 replace_key(tb, tb->CFR[0], tb->rkey[0], tb->R[0],0);
650 }
651
652 tb->insert_size[0] = 0;
653 pos_in_item++;
654 } else { /* new directory entry doesn't fall into R[0] */
655
656 leaf_shift_right(tb, tb->rnum[0], tb->rbytes);
657 }
658 } else { /* regular object */
659
660 int n_shift, n_rem, r_zeros_number;
661 const char *r_body;
662
663 /* Calculate number of bytes which must be shifted from appended item */
664 if ((n_shift = tb->rbytes - tb->insert_size[0]) < 0)
665 n_shift = 0;
666
667 RFALSE(pos_in_item != ih_item_len
668 (B_N_PITEM_HEAD(tbS0, item_pos)),
669 "PAP-12155: invalid position to paste. ih_item_len=%d, pos_in_item=%d",
670 pos_in_item, ih_item_len
671 (B_N_PITEM_HEAD(tbS0, item_pos)));
672
673 leaf_shift_right(tb, tb->rnum[0], n_shift);
674 /* Calculate number of bytes which must remain in body after appending to R[0] */
675 if ((n_rem = tb->insert_size[0] - tb->rbytes) < 0)
676 n_rem = 0;
677
678 {
679 int version;
680 unsigned long temp_rem = n_rem;
681
682 version = ih_version(B_N_PITEM_HEAD(tb->R[0], 0));
683 if (is_indirect_le_key(version, B_N_PKEY(tb->R[0], 0))) {
684 temp_rem = n_rem << (tb->tb_sb->s_blocksize_bits - UNFM_P_SHIFT);
685 }
686 set_le_key_k_offset(version, B_N_PKEY(tb->R[0], 0),
687 le_key_k_offset(version, B_N_PKEY(tb->R[0], 0)) + temp_rem);
688 set_le_key_k_offset(version, B_N_PDELIM_KEY(tb->CFR[0], tb->rkey[0]),
689 le_key_k_offset(version, B_N_PDELIM_KEY(tb->CFR[0], tb->rkey[0])) + temp_rem);
690 }
691/* k_offset (B_N_PKEY(tb->R[0],0)) += n_rem;
692 k_offset (B_N_PDELIM_KEY(tb->CFR[0],tb->rkey[0])) += n_rem;*/
693 do_balance_mark_internal_dirty(tb, tb->CFR[0], 0);
694
695 /* Append part of body into R[0] */
696 buffer_info_init_right(tb, &bi);
697 if (n_rem > zeros_num) {
698 r_zeros_number = 0;
699 r_body = body + n_rem - zeros_num;
700 } else {
701 r_body = body;
702 r_zeros_number = zeros_num - n_rem;
703 zeros_num -= r_zeros_number;
704 }
705
706 leaf_paste_in_buffer(&bi, 0, n_shift,
707 tb->insert_size[0] - n_rem,
708 r_body, r_zeros_number);
709
710 if (is_indirect_le_ih(B_N_PITEM_HEAD(tb->R[0], 0))) {
711#if 0
712 RFALSE(n_rem,
713 "PAP-12160: paste more than one unformatted node pointer");
714#endif
715 set_ih_free_space(B_N_PITEM_HEAD(tb->R[0], 0), 0);
716 }
717 tb->insert_size[0] = n_rem;
718 if (!n_rem)
719 pos_in_item++;
720 }
721 } else { /* pasted item in whole falls into R[0] */
722
723 struct item_head *pasted;
724
725 ret_val = leaf_shift_right(tb, tb->rnum[0], tb->rbytes);
726 /* append item in R[0] */
727 if (pos_in_item >= 0) {
728 buffer_info_init_right(tb, &bi);
729 leaf_paste_in_buffer(&bi, item_pos - n + tb->rnum[0], pos_in_item,
730 tb->insert_size[0], body, zeros_num);
731 }
732
733 /* paste new entry, if item is directory item */
734 pasted = B_N_PITEM_HEAD(tb->R[0], item_pos - n + tb->rnum[0]);
735 if (is_direntry_le_ih(pasted) && pos_in_item >= 0) {
736 leaf_paste_entries(&bi, item_pos - n + tb->rnum[0],
737 pos_in_item, 1,
738 (struct reiserfs_de_head *) body,
739 body + DEH_SIZE, tb->insert_size[0]);
740 if (!pos_in_item) {
741
742 RFALSE(item_pos - n + tb->rnum[0],
743 "PAP-12165: directory item must be first item of node when pasting is in 0th position");
744
745 /* update delimiting keys */
746 replace_key(tb, tb->CFR[0], tb->rkey[0], tb->R[0], 0);
747 }
748 }
749
750 if (is_indirect_le_ih(pasted))
751 set_ih_free_space(pasted, 0);
752 zeros_num = tb->insert_size[0] = 0;
753 }
754 } else { /* new item doesn't fall into R[0] */
755
756 leaf_shift_right(tb, tb->rnum[0], tb->rbytes);
757 }
758 break;
759 default: /* cases d and t */
760 reiserfs_panic(tb->tb_sb, "PAP-12175",
761 "rnum > 0: unexpected mode: %s(%d)",
762 (flag == M_DELETE) ? "DELETE" : ((flag == M_CUT) ? "CUT" : "UNKNOWN"), flag);
763 }
764 881
882 /* new item doesn't fall into R[0] */
883 if (n - tb->rnum[0] > tb->item_pos) {
884 leaf_shift_right(tb, tb->rnum[0], tb->rbytes);
885 return;
765 } 886 }
766 887
767 /* tb->rnum[0] > 0 */ 888 /* pasted item or part of it falls to R[0] */
768 RFALSE(tb->blknum[0] > 3,
769 "PAP-12180: blknum can not be %d. It must be <= 3", tb->blknum[0]);
770 RFALSE(tb->blknum[0] < 0,
771 "PAP-12185: blknum can not be %d. It must be >= 0", tb->blknum[0]);
772 889
773 /* if while adding to a node we discover that it is possible to split 890 if (tb->item_pos == n - tb->rnum[0] && tb->rbytes != -1)
774 it in two, and merge the left part into the left neighbor and the 891 /* we must shift the part of the appended item */
775 right part into the right neighbor, eliminating the node */ 892 balance_leaf_paste_right_shift(tb, ih, body);
776 if (tb->blknum[0] == 0) { /* node S[0] is empty now */ 893 else
894 /* pasted item in whole falls into R[0] */
895 balance_leaf_paste_right_whole(tb, ih, body);
896}
777 897
778 RFALSE(!tb->lnum[0] || !tb->rnum[0], 898/* shift rnum[0] items from S[0] to the right neighbor R[0] */
779 "PAP-12190: lnum and rnum must not be zero"); 899static void balance_leaf_right(struct tree_balance *tb, struct item_head *ih,
780 /* if insertion was done before 0-th position in R[0], right 900 const char *body, int flag)
781 delimiting key of the tb->L[0]'s and left delimiting key are 901{
782 not set correctly */ 902 if (tb->rnum[0] <= 0)
783 if (tb->CFL[0]) { 903 return;
784 if (!tb->CFR[0]) 904
785 reiserfs_panic(tb->tb_sb, "vs-12195", 905 BUG_ON(flag != M_INSERT && flag != M_PASTE);
786 "CFR not initialized"); 906
787 copy_key(B_N_PDELIM_KEY(tb->CFL[0], tb->lkey[0]), 907 if (flag == M_INSERT)
788 B_N_PDELIM_KEY(tb->CFR[0], tb->rkey[0])); 908 balance_leaf_insert_right(tb, ih, body);
789 do_balance_mark_internal_dirty(tb, tb->CFL[0], 0); 909 else /* M_PASTE */
910 balance_leaf_paste_right(tb, ih, body);
911}
912
913static void balance_leaf_new_nodes_insert(struct tree_balance *tb,
914 struct item_head *ih,
915 const char *body,
916 struct item_head *insert_key,
917 struct buffer_head **insert_ptr,
918 int i)
919{
920 struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path);
921 int n = B_NR_ITEMS(tbS0);
922 struct buffer_info bi;
923 int shift;
924
925 /* new item or it part don't falls into S_new[i] */
926 if (n - tb->snum[i] >= tb->item_pos) {
927 leaf_move_items(LEAF_FROM_S_TO_SNEW, tb,
928 tb->snum[i], tb->sbytes[i], tb->S_new[i]);
929 return;
930 }
931
932 /* new item or it's part falls to first new node S_new[i] */
933
934 /* part of new item falls into S_new[i] */
935 if (tb->item_pos == n - tb->snum[i] + 1 && tb->sbytes[i] != -1) {
936 int old_key_comp, old_len, r_zeroes_number;
937 const char *r_body;
938 int version;
939
940 /* Move snum[i]-1 items from S[0] to S_new[i] */
941 leaf_move_items(LEAF_FROM_S_TO_SNEW, tb, tb->snum[i] - 1, -1,
942 tb->S_new[i]);
943
944 /* Remember key component and item length */
945 version = ih_version(ih);
946 old_key_comp = le_ih_k_offset(ih);
947 old_len = ih_item_len(ih);
948
949 /*
950 * Calculate key component and item length to insert
951 * into S_new[i]
952 */
953 shift = 0;
954 if (is_indirect_le_ih(ih))
955 shift = tb->tb_sb->s_blocksize_bits - UNFM_P_SHIFT;
956 set_le_ih_k_offset(ih,
957 le_ih_k_offset(ih) +
958 ((old_len - tb->sbytes[i]) << shift));
959
960 put_ih_item_len(ih, tb->sbytes[i]);
961
962 /* Insert part of the item into S_new[i] before 0-th item */
963 buffer_info_init_bh(tb, &bi, tb->S_new[i]);
964
965 if ((old_len - tb->sbytes[i]) > tb->zeroes_num) {
966 r_zeroes_number = 0;
967 r_body = body + (old_len - tb->sbytes[i]) -
968 tb->zeroes_num;
969 } else {
970 r_body = body;
971 r_zeroes_number = tb->zeroes_num - (old_len -
972 tb->sbytes[i]);
973 tb->zeroes_num -= r_zeroes_number;
790 } 974 }
791 975
792 reiserfs_invalidate_buffer(tb, tbS0); 976 leaf_insert_into_buf(&bi, 0, ih, r_body, r_zeroes_number);
793 return 0; 977
978 /*
979 * Calculate key component and item length to
980 * insert into S[i]
981 */
982 set_le_ih_k_offset(ih, old_key_comp);
983 put_ih_item_len(ih, old_len - tb->sbytes[i]);
984 tb->insert_size[0] -= tb->sbytes[i];
985 } else {
986 /* whole new item falls into S_new[i] */
987
988 /*
989 * Shift snum[0] - 1 items to S_new[i]
990 * (sbytes[i] of split item)
991 */
992 leaf_move_items(LEAF_FROM_S_TO_SNEW, tb,
993 tb->snum[i] - 1, tb->sbytes[i], tb->S_new[i]);
994
995 /* Insert new item into S_new[i] */
996 buffer_info_init_bh(tb, &bi, tb->S_new[i]);
997 leaf_insert_into_buf(&bi, tb->item_pos - n + tb->snum[i] - 1,
998 ih, body, tb->zeroes_num);
999
1000 tb->zeroes_num = tb->insert_size[0] = 0;
794 } 1001 }
1002}
795 1003
796 /* Fill new nodes that appear in place of S[0] */ 1004/* we append to directory item */
1005static void balance_leaf_new_nodes_paste_dirent(struct tree_balance *tb,
1006 struct item_head *ih,
1007 const char *body,
1008 struct item_head *insert_key,
1009 struct buffer_head **insert_ptr,
1010 int i)
1011{
1012 struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path);
1013 struct item_head *aux_ih = item_head(tbS0, tb->item_pos);
1014 int entry_count = ih_entry_count(aux_ih);
1015 struct buffer_info bi;
1016
1017 if (entry_count - tb->sbytes[i] < tb->pos_in_item &&
1018 tb->pos_in_item <= entry_count) {
1019 /* new directory entry falls into S_new[i] */
1020
1021 RFALSE(!tb->insert_size[0],
1022 "PAP-12215: insert_size is already 0");
1023 RFALSE(tb->sbytes[i] - 1 >= entry_count,
1024 "PAP-12220: there are no so much entries (%d), only %d",
1025 tb->sbytes[i] - 1, entry_count);
1026
1027 /*
1028 * Shift snum[i]-1 items in whole.
1029 * Shift sbytes[i] directory entries
1030 * from directory item number snum[i]
1031 */
1032 leaf_move_items(LEAF_FROM_S_TO_SNEW, tb, tb->snum[i],
1033 tb->sbytes[i] - 1, tb->S_new[i]);
1034
1035 /*
1036 * Paste given directory entry to
1037 * directory item
1038 */
1039 buffer_info_init_bh(tb, &bi, tb->S_new[i]);
1040 leaf_paste_in_buffer(&bi, 0, tb->pos_in_item - entry_count +
1041 tb->sbytes[i] - 1, tb->insert_size[0],
1042 body, tb->zeroes_num);
1043
1044 /* paste new directory entry */
1045 leaf_paste_entries(&bi, 0, tb->pos_in_item - entry_count +
1046 tb->sbytes[i] - 1, 1,
1047 (struct reiserfs_de_head *) body,
1048 body + DEH_SIZE, tb->insert_size[0]);
1049
1050 tb->insert_size[0] = 0;
1051 tb->pos_in_item++;
1052 } else {
1053 /* new directory entry doesn't fall into S_new[i] */
1054 leaf_move_items(LEAF_FROM_S_TO_SNEW, tb, tb->snum[i],
1055 tb->sbytes[i], tb->S_new[i]);
1056 }
1057
1058}
797 1059
798 /* I am told that this copying is because we need an array to enable 1060static void balance_leaf_new_nodes_paste_shift(struct tree_balance *tb,
799 the looping code. -Hans */ 1061 struct item_head *ih,
800 snum[0] = tb->s1num, snum[1] = tb->s2num; 1062 const char *body,
801 sbytes[0] = tb->s1bytes; 1063 struct item_head *insert_key,
802 sbytes[1] = tb->s2bytes; 1064 struct buffer_head **insert_ptr,
1065 int i)
1066{
1067 struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path);
1068 struct item_head *aux_ih = item_head(tbS0, tb->item_pos);
1069 int n_shift, n_rem, r_zeroes_number, shift;
1070 const char *r_body;
1071 struct item_head *tmp;
1072 struct buffer_info bi;
1073
1074 RFALSE(ih, "PAP-12210: ih must be 0");
1075
1076 if (is_direntry_le_ih(aux_ih)) {
1077 balance_leaf_new_nodes_paste_dirent(tb, ih, body, insert_key,
1078 insert_ptr, i);
1079 return;
1080 }
1081
1082 /* regular object */
1083
1084
1085 RFALSE(tb->pos_in_item != ih_item_len(item_head(tbS0, tb->item_pos)) ||
1086 tb->insert_size[0] <= 0,
1087 "PAP-12225: item too short or insert_size <= 0");
1088
1089 /*
1090 * Calculate number of bytes which must be shifted from appended item
1091 */
1092 n_shift = tb->sbytes[i] - tb->insert_size[0];
1093 if (n_shift < 0)
1094 n_shift = 0;
1095 leaf_move_items(LEAF_FROM_S_TO_SNEW, tb, tb->snum[i], n_shift,
1096 tb->S_new[i]);
1097
1098 /*
1099 * Calculate number of bytes which must remain in body after
1100 * append to S_new[i]
1101 */
1102 n_rem = tb->insert_size[0] - tb->sbytes[i];
1103 if (n_rem < 0)
1104 n_rem = 0;
1105
1106 /* Append part of body into S_new[0] */
1107 buffer_info_init_bh(tb, &bi, tb->S_new[i]);
1108 if (n_rem > tb->zeroes_num) {
1109 r_zeroes_number = 0;
1110 r_body = body + n_rem - tb->zeroes_num;
1111 } else {
1112 r_body = body;
1113 r_zeroes_number = tb->zeroes_num - n_rem;
1114 tb->zeroes_num -= r_zeroes_number;
1115 }
1116
1117 leaf_paste_in_buffer(&bi, 0, n_shift, tb->insert_size[0] - n_rem,
1118 r_body, r_zeroes_number);
1119
1120 tmp = item_head(tb->S_new[i], 0);
1121 shift = 0;
1122 if (is_indirect_le_ih(tmp)) {
1123 set_ih_free_space(tmp, 0);
1124 shift = tb->tb_sb->s_blocksize_bits - UNFM_P_SHIFT;
1125 }
1126 add_le_ih_k_offset(tmp, n_rem << shift);
1127
1128 tb->insert_size[0] = n_rem;
1129 if (!n_rem)
1130 tb->pos_in_item++;
1131}
1132
1133static void balance_leaf_new_nodes_paste_whole(struct tree_balance *tb,
1134 struct item_head *ih,
1135 const char *body,
1136 struct item_head *insert_key,
1137 struct buffer_head **insert_ptr,
1138 int i)
1139
1140{
1141 struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path);
1142 int n = B_NR_ITEMS(tbS0);
1143 int leaf_mi;
1144 struct item_head *pasted;
1145 struct buffer_info bi;
1146
1147#ifdef CONFIG_REISERFS_CHECK
1148 struct item_head *ih_check = item_head(tbS0, tb->item_pos);
1149
1150 if (!is_direntry_le_ih(ih_check) &&
1151 (tb->pos_in_item != ih_item_len(ih_check) ||
1152 tb->insert_size[0] <= 0))
1153 reiserfs_panic(tb->tb_sb,
1154 "PAP-12235",
1155 "pos_in_item must be equal to ih_item_len");
1156#endif
1157
1158 leaf_mi = leaf_move_items(LEAF_FROM_S_TO_SNEW, tb, tb->snum[i],
1159 tb->sbytes[i], tb->S_new[i]);
1160
1161 RFALSE(leaf_mi,
1162 "PAP-12240: unexpected value returned by leaf_move_items (%d)",
1163 leaf_mi);
1164
1165 /* paste into item */
1166 buffer_info_init_bh(tb, &bi, tb->S_new[i]);
1167 leaf_paste_in_buffer(&bi, tb->item_pos - n + tb->snum[i],
1168 tb->pos_in_item, tb->insert_size[0],
1169 body, tb->zeroes_num);
1170
1171 pasted = item_head(tb->S_new[i], tb->item_pos - n +
1172 tb->snum[i]);
1173 if (is_direntry_le_ih(pasted))
1174 leaf_paste_entries(&bi, tb->item_pos - n + tb->snum[i],
1175 tb->pos_in_item, 1,
1176 (struct reiserfs_de_head *)body,
1177 body + DEH_SIZE, tb->insert_size[0]);
1178
1179 /* if we paste to indirect item update ih_free_space */
1180 if (is_indirect_le_ih(pasted))
1181 set_ih_free_space(pasted, 0);
1182
1183 tb->zeroes_num = tb->insert_size[0] = 0;
1184
1185}
1186static void balance_leaf_new_nodes_paste(struct tree_balance *tb,
1187 struct item_head *ih,
1188 const char *body,
1189 struct item_head *insert_key,
1190 struct buffer_head **insert_ptr,
1191 int i)
1192{
1193 struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path);
1194 int n = B_NR_ITEMS(tbS0);
1195
1196 /* pasted item doesn't fall into S_new[i] */
1197 if (n - tb->snum[i] > tb->item_pos) {
1198 leaf_move_items(LEAF_FROM_S_TO_SNEW, tb,
1199 tb->snum[i], tb->sbytes[i], tb->S_new[i]);
1200 return;
1201 }
1202
1203 /* pasted item or part if it falls to S_new[i] */
1204
1205 if (tb->item_pos == n - tb->snum[i] && tb->sbytes[i] != -1)
1206 /* we must shift part of the appended item */
1207 balance_leaf_new_nodes_paste_shift(tb, ih, body, insert_key,
1208 insert_ptr, i);
1209 else
1210 /* item falls wholly into S_new[i] */
1211 balance_leaf_new_nodes_paste_whole(tb, ih, body, insert_key,
1212 insert_ptr, i);
1213}
1214
1215/* Fill new nodes that appear in place of S[0] */
1216static void balance_leaf_new_nodes(struct tree_balance *tb,
1217 struct item_head *ih,
1218 const char *body,
1219 struct item_head *insert_key,
1220 struct buffer_head **insert_ptr,
1221 int flag)
1222{
1223 int i;
803 for (i = tb->blknum[0] - 2; i >= 0; i--) { 1224 for (i = tb->blknum[0] - 2; i >= 0; i--) {
1225 BUG_ON(flag != M_INSERT && flag != M_PASTE);
804 1226
805 RFALSE(!snum[i], "PAP-12200: snum[%d] == %d. Must be > 0", i, 1227 RFALSE(!tb->snum[i],
806 snum[i]); 1228 "PAP-12200: snum[%d] == %d. Must be > 0", i,
1229 tb->snum[i]);
807 1230
808 /* here we shift from S to S_new nodes */ 1231 /* here we shift from S to S_new nodes */
809 1232
810 S_new[i] = get_FEB(tb); 1233 tb->S_new[i] = get_FEB(tb);
811 1234
812 /* initialized block type and tree level */ 1235 /* initialized block type and tree level */
813 set_blkh_level(B_BLK_HEAD(S_new[i]), DISK_LEAF_NODE_LEVEL); 1236 set_blkh_level(B_BLK_HEAD(tb->S_new[i]), DISK_LEAF_NODE_LEVEL);
814 1237
815 n = B_NR_ITEMS(tbS0); 1238 if (flag == M_INSERT)
816 1239 balance_leaf_new_nodes_insert(tb, ih, body, insert_key,
817 switch (flag) { 1240 insert_ptr, i);
818 case M_INSERT: /* insert item */ 1241 else /* M_PASTE */
819 1242 balance_leaf_new_nodes_paste(tb, ih, body, insert_key,
820 if (n - snum[i] < item_pos) { /* new item or it's part falls to first new node S_new[i] */ 1243 insert_ptr, i);
821 if (item_pos == n - snum[i] + 1 && sbytes[i] != -1) { /* part of new item falls into S_new[i] */ 1244
822 int old_key_comp, old_len, r_zeros_number; 1245 memcpy(insert_key + i, leaf_key(tb->S_new[i], 0), KEY_SIZE);
823 const char *r_body; 1246 insert_ptr[i] = tb->S_new[i];
824 int version; 1247
825 1248 RFALSE(!buffer_journaled(tb->S_new[i])
826 /* Move snum[i]-1 items from S[0] to S_new[i] */ 1249 || buffer_journal_dirty(tb->S_new[i])
827 leaf_move_items(LEAF_FROM_S_TO_SNEW, tb, 1250 || buffer_dirty(tb->S_new[i]),
828 snum[i] - 1, -1, 1251 "PAP-12247: S_new[%d] : (%b)",
829 S_new[i]); 1252 i, tb->S_new[i]);
830 /* Remember key component and item length */ 1253 }
831 version = ih_version(ih); 1254}
832 old_key_comp = le_ih_k_offset(ih);
833 old_len = ih_item_len(ih);
834
835 /* Calculate key component and item length to insert into S_new[i] */
836 set_le_ih_k_offset(ih, le_ih_k_offset(ih) +
837 ((old_len - sbytes[i]) << (is_indirect_le_ih(ih) ? tb->tb_sb-> s_blocksize_bits - UNFM_P_SHIFT : 0)));
838
839 put_ih_item_len(ih, sbytes[i]);
840
841 /* Insert part of the item into S_new[i] before 0-th item */
842 buffer_info_init_bh(tb, &bi, S_new[i]);
843
844 if ((old_len - sbytes[i]) > zeros_num) {
845 r_zeros_number = 0;
846 r_body = body + (old_len - sbytes[i]) - zeros_num;
847 } else {
848 r_body = body;
849 r_zeros_number = zeros_num - (old_len - sbytes[i]);
850 zeros_num -= r_zeros_number;
851 }
852
853 leaf_insert_into_buf(&bi, 0, ih, r_body, r_zeros_number);
854
855 /* Calculate key component and item length to insert into S[i] */
856 set_le_ih_k_offset(ih, old_key_comp);
857 put_ih_item_len(ih, old_len - sbytes[i]);
858 tb->insert_size[0] -= sbytes[i];
859 } else { /* whole new item falls into S_new[i] */
860
861 /* Shift snum[0] - 1 items to S_new[i] (sbytes[i] of split item) */
862 leaf_move_items(LEAF_FROM_S_TO_SNEW, tb,
863 snum[i] - 1, sbytes[i], S_new[i]);
864
865 /* Insert new item into S_new[i] */
866 buffer_info_init_bh(tb, &bi, S_new[i]);
867 leaf_insert_into_buf(&bi, item_pos - n + snum[i] - 1,
868 ih, body, zeros_num);
869
870 zeros_num = tb->insert_size[0] = 0;
871 }
872 }
873
874 else { /* new item or it part don't falls into S_new[i] */
875 1255
876 leaf_move_items(LEAF_FROM_S_TO_SNEW, tb, 1256static void balance_leaf_finish_node_insert(struct tree_balance *tb,
877 snum[i], sbytes[i], S_new[i]); 1257 struct item_head *ih,
878 } 1258 const char *body)
879 break; 1259{
1260 struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path);
1261 struct buffer_info bi;
1262 buffer_info_init_tbS0(tb, &bi);
1263 leaf_insert_into_buf(&bi, tb->item_pos, ih, body, tb->zeroes_num);
880 1264
881 case M_PASTE: /* append item */ 1265 /* If we insert the first key change the delimiting key */
882 1266 if (tb->item_pos == 0) {
883 if (n - snum[i] <= item_pos) { /* pasted item or part if it falls to S_new[i] */ 1267 if (tb->CFL[0]) /* can be 0 in reiserfsck */
884 if (item_pos == n - snum[i] && sbytes[i] != -1) { /* we must shift part of the appended item */ 1268 replace_key(tb, tb->CFL[0], tb->lkey[0], tbS0, 0);
885 struct item_head *aux_ih;
886
887 RFALSE(ih, "PAP-12210: ih must be 0");
888
889 aux_ih = B_N_PITEM_HEAD(tbS0, item_pos);
890 if (is_direntry_le_ih(aux_ih)) {
891 /* we append to directory item */
892
893 int entry_count;
894
895 entry_count = ih_entry_count(aux_ih);
896
897 if (entry_count - sbytes[i] < pos_in_item && pos_in_item <= entry_count) {
898 /* new directory entry falls into S_new[i] */
899
900 RFALSE(!tb->insert_size[0], "PAP-12215: insert_size is already 0");
901 RFALSE(sbytes[i] - 1 >= entry_count,
902 "PAP-12220: there are no so much entries (%d), only %d",
903 sbytes[i] - 1, entry_count);
904
905 /* Shift snum[i]-1 items in whole. Shift sbytes[i] directory entries from directory item number snum[i] */
906 leaf_move_items(LEAF_FROM_S_TO_SNEW, tb, snum[i], sbytes[i] - 1, S_new[i]);
907 /* Paste given directory entry to directory item */
908 buffer_info_init_bh(tb, &bi, S_new[i]);
909 leaf_paste_in_buffer(&bi, 0, pos_in_item - entry_count + sbytes[i] - 1,
910 tb->insert_size[0], body, zeros_num);
911 /* paste new directory entry */
912 leaf_paste_entries(&bi, 0, pos_in_item - entry_count + sbytes[i] - 1, 1,
913 (struct reiserfs_de_head *) body,
914 body + DEH_SIZE, tb->insert_size[0]);
915 tb->insert_size[0] = 0;
916 pos_in_item++;
917 } else { /* new directory entry doesn't fall into S_new[i] */
918 leaf_move_items(LEAF_FROM_S_TO_SNEW,tb, snum[i], sbytes[i], S_new[i]);
919 }
920 } else { /* regular object */
921
922 int n_shift, n_rem, r_zeros_number;
923 const char *r_body;
924
925 RFALSE(pos_in_item != ih_item_len(B_N_PITEM_HEAD(tbS0, item_pos)) || tb->insert_size[0] <= 0,
926 "PAP-12225: item too short or insert_size <= 0");
927
928 /* Calculate number of bytes which must be shifted from appended item */
929 n_shift = sbytes[i] - tb->insert_size[0];
930 if (n_shift < 0)
931 n_shift = 0;
932 leaf_move_items(LEAF_FROM_S_TO_SNEW, tb, snum[i], n_shift, S_new[i]);
933
934 /* Calculate number of bytes which must remain in body after append to S_new[i] */
935 n_rem = tb->insert_size[0] - sbytes[i];
936 if (n_rem < 0)
937 n_rem = 0;
938 /* Append part of body into S_new[0] */
939 buffer_info_init_bh(tb, &bi, S_new[i]);
940 if (n_rem > zeros_num) {
941 r_zeros_number = 0;
942 r_body = body + n_rem - zeros_num;
943 } else {
944 r_body = body;
945 r_zeros_number = zeros_num - n_rem;
946 zeros_num -= r_zeros_number;
947 }
948
949 leaf_paste_in_buffer(&bi, 0, n_shift,
950 tb->insert_size[0] - n_rem,
951 r_body, r_zeros_number);
952 {
953 struct item_head *tmp;
954
955 tmp = B_N_PITEM_HEAD(S_new[i], 0);
956 if (is_indirect_le_ih
957 (tmp)) {
958 set_ih_free_space(tmp, 0);
959 set_le_ih_k_offset(tmp, le_ih_k_offset(tmp) + (n_rem << (tb->tb_sb->s_blocksize_bits - UNFM_P_SHIFT)));
960 } else {
961 set_le_ih_k_offset(tmp, le_ih_k_offset(tmp) + n_rem);
962 }
963 }
964
965 tb->insert_size[0] = n_rem;
966 if (!n_rem)
967 pos_in_item++;
968 }
969 } else
970 /* item falls wholly into S_new[i] */
971 {
972 int leaf_mi;
973 struct item_head *pasted;
974 1269
975#ifdef CONFIG_REISERFS_CHECK 1270 }
976 struct item_head *ih_check = B_N_PITEM_HEAD(tbS0, item_pos); 1271}
977
978 if (!is_direntry_le_ih(ih_check)
979 && (pos_in_item != ih_item_len(ih_check)
980 || tb->insert_size[0] <= 0))
981 reiserfs_panic(tb->tb_sb,
982 "PAP-12235",
983 "pos_in_item "
984 "must be equal "
985 "to ih_item_len");
986#endif /* CONFIG_REISERFS_CHECK */
987
988 leaf_mi = leaf_move_items(LEAF_FROM_S_TO_SNEW,
989 tb, snum[i],
990 sbytes[i],
991 S_new[i]);
992
993 RFALSE(leaf_mi,
994 "PAP-12240: unexpected value returned by leaf_move_items (%d)",
995 leaf_mi);
996
997 /* paste into item */
998 buffer_info_init_bh(tb, &bi, S_new[i]);
999 leaf_paste_in_buffer(&bi,
1000 item_pos - n + snum[i],
1001 pos_in_item,
1002 tb->insert_size[0],
1003 body, zeros_num);
1004
1005 pasted = B_N_PITEM_HEAD(S_new[i], item_pos - n + snum[i]);
1006 if (is_direntry_le_ih(pasted)) {
1007 leaf_paste_entries(&bi,
1008 item_pos - n + snum[i],
1009 pos_in_item, 1,
1010 (struct reiserfs_de_head *)body,
1011 body + DEH_SIZE,
1012 tb->insert_size[0]
1013 );
1014 }
1015
1016 /* if we paste to indirect item update ih_free_space */
1017 if (is_indirect_le_ih(pasted))
1018 set_ih_free_space(pasted, 0);
1019 zeros_num = tb->insert_size[0] = 0;
1020 }
1021 }
1022 1272
1023 else { /* pasted item doesn't fall into S_new[i] */ 1273static void balance_leaf_finish_node_paste_dirent(struct tree_balance *tb,
1274 struct item_head *ih,
1275 const char *body)
1276{
1277 struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path);
1278 struct item_head *pasted = item_head(tbS0, tb->item_pos);
1279 struct buffer_info bi;
1024 1280
1025 leaf_move_items(LEAF_FROM_S_TO_SNEW, tb, 1281 if (tb->pos_in_item >= 0 && tb->pos_in_item <= ih_entry_count(pasted)) {
1026 snum[i], sbytes[i], S_new[i]); 1282 RFALSE(!tb->insert_size[0],
1027 } 1283 "PAP-12260: insert_size is 0 already");
1028 break; 1284
1029 default: /* cases d and t */ 1285 /* prepare space */
1030 reiserfs_panic(tb->tb_sb, "PAP-12245", 1286 buffer_info_init_tbS0(tb, &bi);
1031 "blknum > 2: unexpected mode: %s(%d)", 1287 leaf_paste_in_buffer(&bi, tb->item_pos, tb->pos_in_item,
1032 (flag == M_DELETE) ? "DELETE" : ((flag == M_CUT) ? "CUT" : "UNKNOWN"), flag); 1288 tb->insert_size[0], body, tb->zeroes_num);
1289
1290 /* paste entry */
1291 leaf_paste_entries(&bi, tb->item_pos, tb->pos_in_item, 1,
1292 (struct reiserfs_de_head *)body,
1293 body + DEH_SIZE, tb->insert_size[0]);
1294
1295 if (!tb->item_pos && !tb->pos_in_item) {
1296 RFALSE(!tb->CFL[0] || !tb->L[0],
1297 "PAP-12270: CFL[0]/L[0] must be specified");
1298 if (tb->CFL[0])
1299 replace_key(tb, tb->CFL[0], tb->lkey[0],
1300 tbS0, 0);
1033 } 1301 }
1034 1302
1035 memcpy(insert_key + i, B_N_PKEY(S_new[i], 0), KEY_SIZE); 1303 tb->insert_size[0] = 0;
1036 insert_ptr[i] = S_new[i]; 1304 }
1305}
1306
1307static void balance_leaf_finish_node_paste(struct tree_balance *tb,
1308 struct item_head *ih,
1309 const char *body)
1310{
1311 struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path);
1312 struct buffer_info bi;
1313 struct item_head *pasted = item_head(tbS0, tb->item_pos);
1037 1314
1038 RFALSE(!buffer_journaled(S_new[i]) 1315 /* when directory, may be new entry already pasted */
1039 || buffer_journal_dirty(S_new[i]) 1316 if (is_direntry_le_ih(pasted)) {
1040 || buffer_dirty(S_new[i]), "PAP-12247: S_new[%d] : (%b)", 1317 balance_leaf_finish_node_paste_dirent(tb, ih, body);
1041 i, S_new[i]); 1318 return;
1042 } 1319 }
1043 1320
1044 /* if the affected item was not wholly shifted then we perform all necessary operations on that part or whole of the 1321 /* regular object */
1045 affected item which remains in S */
1046 if (0 <= item_pos && item_pos < tb->s0num) { /* if we must insert or append into buffer S[0] */
1047 1322
1048 switch (flag) { 1323 if (tb->pos_in_item == ih_item_len(pasted)) {
1049 case M_INSERT: /* insert item into S[0] */ 1324 RFALSE(tb->insert_size[0] <= 0,
1050 buffer_info_init_tbS0(tb, &bi); 1325 "PAP-12275: insert size must not be %d",
1051 leaf_insert_into_buf(&bi, item_pos, ih, body, 1326 tb->insert_size[0]);
1052 zeros_num); 1327 buffer_info_init_tbS0(tb, &bi);
1328 leaf_paste_in_buffer(&bi, tb->item_pos,
1329 tb->pos_in_item, tb->insert_size[0], body,
1330 tb->zeroes_num);
1053 1331
1054 /* If we insert the first key change the delimiting key */ 1332 if (is_indirect_le_ih(pasted))
1055 if (item_pos == 0) { 1333 set_ih_free_space(pasted, 0);
1056 if (tb->CFL[0]) /* can be 0 in reiserfsck */
1057 replace_key(tb, tb->CFL[0], tb->lkey[0], tbS0, 0);
1058 }
1059 break;
1060 1334
1061 case M_PASTE:{ /* append item in S[0] */ 1335 tb->insert_size[0] = 0;
1062 struct item_head *pasted; 1336 }
1063
1064 pasted = B_N_PITEM_HEAD(tbS0, item_pos);
1065 /* when directory, may be new entry already pasted */
1066 if (is_direntry_le_ih(pasted)) {
1067 if (pos_in_item >= 0 && pos_in_item <= ih_entry_count(pasted)) {
1068
1069 RFALSE(!tb->insert_size[0],
1070 "PAP-12260: insert_size is 0 already");
1071
1072 /* prepare space */
1073 buffer_info_init_tbS0(tb, &bi);
1074 leaf_paste_in_buffer(&bi, item_pos, pos_in_item,
1075 tb->insert_size[0], body,
1076 zeros_num);
1077
1078 /* paste entry */
1079 leaf_paste_entries(&bi, item_pos, pos_in_item, 1,
1080 (struct reiserfs_de_head *)body,
1081 body + DEH_SIZE,
1082 tb->insert_size[0]);
1083 if (!item_pos && !pos_in_item) {
1084 RFALSE(!tb->CFL[0] || !tb->L[0],
1085 "PAP-12270: CFL[0]/L[0] must be specified");
1086 if (tb->CFL[0])
1087 replace_key(tb, tb->CFL[0], tb->lkey[0], tbS0, 0);
1088 }
1089 tb->insert_size[0] = 0;
1090 }
1091 } else { /* regular object */
1092 if (pos_in_item == ih_item_len(pasted)) {
1093
1094 RFALSE(tb->insert_size[0] <= 0,
1095 "PAP-12275: insert size must not be %d",
1096 tb->insert_size[0]);
1097 buffer_info_init_tbS0(tb, &bi);
1098 leaf_paste_in_buffer(&bi, item_pos, pos_in_item,
1099 tb->insert_size[0], body, zeros_num);
1100
1101 if (is_indirect_le_ih(pasted)) {
1102#if 0
1103 RFALSE(tb->
1104 insert_size[0] !=
1105 UNFM_P_SIZE,
1106 "PAP-12280: insert_size for indirect item must be %d, not %d",
1107 UNFM_P_SIZE,
1108 tb->
1109 insert_size[0]);
1110#endif
1111 set_ih_free_space(pasted, 0);
1112 }
1113 tb->insert_size[0] = 0;
1114 }
1115#ifdef CONFIG_REISERFS_CHECK 1337#ifdef CONFIG_REISERFS_CHECK
1116 else { 1338 else if (tb->insert_size[0]) {
1117 if (tb->insert_size[0]) { 1339 print_cur_tb("12285");
1118 print_cur_tb("12285"); 1340 reiserfs_panic(tb->tb_sb, "PAP-12285",
1119 reiserfs_panic(tb->tb_sb, 1341 "insert_size must be 0 (%d)", tb->insert_size[0]);
1120 "PAP-12285", 1342 }
1121 "insert_size " 1343#endif
1122 "must be 0 " 1344}
1123 "(%d)", 1345
1124 tb->insert_size[0]); 1346/*
1125 } 1347 * if the affected item was not wholly shifted then we
1126 } 1348 * perform all necessary operations on that part or whole
1127#endif /* CONFIG_REISERFS_CHECK */ 1349 * of the affected item which remains in S
1128 1350 */
1129 } 1351static void balance_leaf_finish_node(struct tree_balance *tb,
1130 } /* case M_PASTE: */ 1352 struct item_head *ih,
1353 const char *body, int flag)
1354{
1355 /* if we must insert or append into buffer S[0] */
1356 if (0 <= tb->item_pos && tb->item_pos < tb->s0num) {
1357 if (flag == M_INSERT)
1358 balance_leaf_finish_node_insert(tb, ih, body);
1359 else /* M_PASTE */
1360 balance_leaf_finish_node_paste(tb, ih, body);
1361 }
1362}
1363
1364/**
1365 * balance_leaf - reiserfs tree balancing algorithm
1366 * @tb: tree balance state
1367 * @ih: item header of inserted item (little endian)
1368 * @body: body of inserted item or bytes to paste
1369 * @flag: i - insert, d - delete, c - cut, p - paste (see do_balance)
1370 * passed back:
1371 * @insert_key: key to insert new nodes
1372 * @insert_ptr: array of nodes to insert at the next level
1373 *
1374 * In our processing of one level we sometimes determine what must be
1375 * inserted into the next higher level. This insertion consists of a
1376 * key or two keys and their corresponding pointers.
1377 */
1378static int balance_leaf(struct tree_balance *tb, struct item_head *ih,
1379 const char *body, int flag,
1380 struct item_head *insert_key,
1381 struct buffer_head **insert_ptr)
1382{
1383 struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path);
1384
1385 PROC_INFO_INC(tb->tb_sb, balance_at[0]);
1386
1387 /* Make balance in case insert_size[0] < 0 */
1388 if (tb->insert_size[0] < 0)
1389 return balance_leaf_when_delete(tb, flag);
1390
1391 tb->item_pos = PATH_LAST_POSITION(tb->tb_path),
1392 tb->pos_in_item = tb->tb_path->pos_in_item,
1393 tb->zeroes_num = 0;
1394 if (flag == M_INSERT && !body)
1395 tb->zeroes_num = ih_item_len(ih);
1396
1397 /*
1398 * for indirect item pos_in_item is measured in unformatted node
1399 * pointers. Recalculate to bytes
1400 */
1401 if (flag != M_INSERT
1402 && is_indirect_le_ih(item_head(tbS0, tb->item_pos)))
1403 tb->pos_in_item *= UNFM_P_SIZE;
1404
1405 balance_leaf_left(tb, ih, body, flag);
1406
1407 /* tb->lnum[0] > 0 */
1408 /* Calculate new item position */
1409 tb->item_pos -= (tb->lnum[0] - ((tb->lbytes != -1) ? 1 : 0));
1410
1411 balance_leaf_right(tb, ih, body, flag);
1412
1413 /* tb->rnum[0] > 0 */
1414 RFALSE(tb->blknum[0] > 3,
1415 "PAP-12180: blknum can not be %d. It must be <= 3", tb->blknum[0]);
1416 RFALSE(tb->blknum[0] < 0,
1417 "PAP-12185: blknum can not be %d. It must be >= 0", tb->blknum[0]);
1418
1419 /*
1420 * if while adding to a node we discover that it is possible to split
1421 * it in two, and merge the left part into the left neighbor and the
1422 * right part into the right neighbor, eliminating the node
1423 */
1424 if (tb->blknum[0] == 0) { /* node S[0] is empty now */
1425
1426 RFALSE(!tb->lnum[0] || !tb->rnum[0],
1427 "PAP-12190: lnum and rnum must not be zero");
1428 /*
1429 * if insertion was done before 0-th position in R[0], right
1430 * delimiting key of the tb->L[0]'s and left delimiting key are
1431 * not set correctly
1432 */
1433 if (tb->CFL[0]) {
1434 if (!tb->CFR[0])
1435 reiserfs_panic(tb->tb_sb, "vs-12195",
1436 "CFR not initialized");
1437 copy_key(internal_key(tb->CFL[0], tb->lkey[0]),
1438 internal_key(tb->CFR[0], tb->rkey[0]));
1439 do_balance_mark_internal_dirty(tb, tb->CFL[0], 0);
1131 } 1440 }
1441
1442 reiserfs_invalidate_buffer(tb, tbS0);
1443 return 0;
1132 } 1444 }
1445
1446 balance_leaf_new_nodes(tb, ih, body, insert_key, insert_ptr, flag);
1447
1448 balance_leaf_finish_node(tb, ih, body, flag);
1449
1133#ifdef CONFIG_REISERFS_CHECK 1450#ifdef CONFIG_REISERFS_CHECK
1134 if (flag == M_PASTE && tb->insert_size[0]) { 1451 if (flag == M_PASTE && tb->insert_size[0]) {
1135 print_cur_tb("12290"); 1452 print_cur_tb("12290");
@@ -1137,9 +1454,11 @@ static int balance_leaf(struct tree_balance *tb, struct item_head *ih, /* item h
1137 "PAP-12290", "insert_size is still not 0 (%d)", 1454 "PAP-12290", "insert_size is still not 0 (%d)",
1138 tb->insert_size[0]); 1455 tb->insert_size[0]);
1139 } 1456 }
1140#endif /* CONFIG_REISERFS_CHECK */ 1457#endif
1458
1459 /* Leaf level of the tree is balanced (end of balance_leaf) */
1141 return 0; 1460 return 0;
1142} /* Leaf level of the tree is balanced (end of balance_leaf) */ 1461}
1143 1462
1144/* Make empty node */ 1463/* Make empty node */
1145void make_empty_node(struct buffer_info *bi) 1464void make_empty_node(struct buffer_info *bi)
@@ -1178,9 +1497,7 @@ struct buffer_head *get_FEB(struct tree_balance *tb)
1178 return tb->used[i]; 1497 return tb->used[i];
1179} 1498}
1180 1499
1181/* This is now used because reiserfs_free_block has to be able to 1500/* This is now used because reiserfs_free_block has to be able to schedule. */
1182** schedule.
1183*/
1184static void store_thrown(struct tree_balance *tb, struct buffer_head *bh) 1501static void store_thrown(struct tree_balance *tb, struct buffer_head *bh)
1185{ 1502{
1186 int i; 1503 int i;
@@ -1246,10 +1563,10 @@ void replace_key(struct tree_balance *tb, struct buffer_head *dest, int n_dest,
1246 1563
1247 if (B_IS_ITEMS_LEVEL(src)) 1564 if (B_IS_ITEMS_LEVEL(src))
1248 /* source buffer contains leaf node */ 1565 /* source buffer contains leaf node */
1249 memcpy(B_N_PDELIM_KEY(dest, n_dest), B_N_PITEM_HEAD(src, n_src), 1566 memcpy(internal_key(dest, n_dest), item_head(src, n_src),
1250 KEY_SIZE); 1567 KEY_SIZE);
1251 else 1568 else
1252 memcpy(B_N_PDELIM_KEY(dest, n_dest), B_N_PDELIM_KEY(src, n_src), 1569 memcpy(internal_key(dest, n_dest), internal_key(src, n_src),
1253 KEY_SIZE); 1570 KEY_SIZE);
1254 1571
1255 do_balance_mark_internal_dirty(tb, dest, 0); 1572 do_balance_mark_internal_dirty(tb, dest, 0);
@@ -1335,8 +1652,10 @@ static int check_before_balancing(struct tree_balance *tb)
1335 "mount point."); 1652 "mount point.");
1336 } 1653 }
1337 1654
1338 /* double check that buffers that we will modify are unlocked. (fix_nodes should already have 1655 /*
1339 prepped all of these for us). */ 1656 * double check that buffers that we will modify are unlocked.
1657 * (fix_nodes should already have prepped all of these for us).
1658 */
1340 if (tb->lnum[0]) { 1659 if (tb->lnum[0]) {
1341 retval |= locked_or_not_in_tree(tb, tb->L[0], "L[0]"); 1660 retval |= locked_or_not_in_tree(tb, tb->L[0], "L[0]");
1342 retval |= locked_or_not_in_tree(tb, tb->FL[0], "FL[0]"); 1661 retval |= locked_or_not_in_tree(tb, tb->FL[0], "FL[0]");
@@ -1429,49 +1748,51 @@ static void check_internal_levels(struct tree_balance *tb)
1429 1748
1430#endif 1749#endif
1431 1750
1432/* Now we have all of the buffers that must be used in balancing of 1751/*
1433 the tree. We rely on the assumption that schedule() will not occur 1752 * Now we have all of the buffers that must be used in balancing of
1434 while do_balance works. ( Only interrupt handlers are acceptable.) 1753 * the tree. We rely on the assumption that schedule() will not occur
1435 We balance the tree according to the analysis made before this, 1754 * while do_balance works. ( Only interrupt handlers are acceptable.)
1436 using buffers already obtained. For SMP support it will someday be 1755 * We balance the tree according to the analysis made before this,
1437 necessary to add ordered locking of tb. */ 1756 * using buffers already obtained. For SMP support it will someday be
1438 1757 * necessary to add ordered locking of tb.
1439/* Some interesting rules of balancing: 1758 */
1440
1441 we delete a maximum of two nodes per level per balancing: we never
1442 delete R, when we delete two of three nodes L, S, R then we move
1443 them into R.
1444
1445 we only delete L if we are deleting two nodes, if we delete only
1446 one node we delete S
1447
1448 if we shift leaves then we shift as much as we can: this is a
1449 deliberate policy of extremism in node packing which results in
1450 higher average utilization after repeated random balance operations
1451 at the cost of more memory copies and more balancing as a result of
1452 small insertions to full nodes.
1453
1454 if we shift internal nodes we try to evenly balance the node
1455 utilization, with consequent less balancing at the cost of lower
1456 utilization.
1457
1458 one could argue that the policy for directories in leaves should be
1459 that of internal nodes, but we will wait until another day to
1460 evaluate this.... It would be nice to someday measure and prove
1461 these assumptions as to what is optimal....
1462 1759
1463*/ 1760/*
1761 * Some interesting rules of balancing:
1762 * we delete a maximum of two nodes per level per balancing: we never
1763 * delete R, when we delete two of three nodes L, S, R then we move
1764 * them into R.
1765 *
1766 * we only delete L if we are deleting two nodes, if we delete only
1767 * one node we delete S
1768 *
1769 * if we shift leaves then we shift as much as we can: this is a
1770 * deliberate policy of extremism in node packing which results in
1771 * higher average utilization after repeated random balance operations
1772 * at the cost of more memory copies and more balancing as a result of
1773 * small insertions to full nodes.
1774 *
1775 * if we shift internal nodes we try to evenly balance the node
1776 * utilization, with consequent less balancing at the cost of lower
1777 * utilization.
1778 *
1779 * one could argue that the policy for directories in leaves should be
1780 * that of internal nodes, but we will wait until another day to
1781 * evaluate this.... It would be nice to someday measure and prove
1782 * these assumptions as to what is optimal....
1783 */
1464 1784
1465static inline void do_balance_starts(struct tree_balance *tb) 1785static inline void do_balance_starts(struct tree_balance *tb)
1466{ 1786{
1467 /* use print_cur_tb() to see initial state of struct 1787 /* use print_cur_tb() to see initial state of struct tree_balance */
1468 tree_balance */
1469 1788
1470 /* store_print_tb (tb); */ 1789 /* store_print_tb (tb); */
1471 1790
1472 /* do not delete, just comment it out */ 1791 /* do not delete, just comment it out */
1473/* print_tb(flag, PATH_LAST_POSITION(tb->tb_path), tb->tb_path->pos_in_item, tb, 1792 /*
1474 "check");*/ 1793 print_tb(flag, PATH_LAST_POSITION(tb->tb_path),
1794 tb->tb_path->pos_in_item, tb, "check");
1795 */
1475 RFALSE(check_before_balancing(tb), "PAP-12340: locked buffers in TB"); 1796 RFALSE(check_before_balancing(tb), "PAP-12340: locked buffers in TB");
1476#ifdef CONFIG_REISERFS_CHECK 1797#ifdef CONFIG_REISERFS_CHECK
1477 REISERFS_SB(tb->tb_sb)->cur_tb = tb; 1798 REISERFS_SB(tb->tb_sb)->cur_tb = tb;
@@ -1487,9 +1808,10 @@ static inline void do_balance_completed(struct tree_balance *tb)
1487 REISERFS_SB(tb->tb_sb)->cur_tb = NULL; 1808 REISERFS_SB(tb->tb_sb)->cur_tb = NULL;
1488#endif 1809#endif
1489 1810
1490 /* reiserfs_free_block is no longer schedule safe. So, we need to 1811 /*
1491 ** put the buffers we want freed on the thrown list during do_balance, 1812 * reiserfs_free_block is no longer schedule safe. So, we need to
1492 ** and then free them now 1813 * put the buffers we want freed on the thrown list during do_balance,
1814 * and then free them now
1493 */ 1815 */
1494 1816
1495 REISERFS_SB(tb->tb_sb)->s_do_balance++; 1817 REISERFS_SB(tb->tb_sb)->s_do_balance++;
@@ -1500,36 +1822,40 @@ static inline void do_balance_completed(struct tree_balance *tb)
1500 free_thrown(tb); 1822 free_thrown(tb);
1501} 1823}
1502 1824
1503void do_balance(struct tree_balance *tb, /* tree_balance structure */ 1825/*
1504 struct item_head *ih, /* item header of inserted item */ 1826 * do_balance - balance the tree
1505 const char *body, /* body of inserted item or bytes to paste */ 1827 *
1506 int flag) 1828 * @tb: tree_balance structure
1507{ /* i - insert, d - delete 1829 * @ih: item header of inserted item
1508 c - cut, p - paste 1830 * @body: body of inserted item or bytes to paste
1509 1831 * @flag: 'i' - insert, 'd' - delete, 'c' - cut, 'p' paste
1510 Cut means delete part of an item 1832 *
1511 (includes removing an entry from a 1833 * Cut means delete part of an item (includes removing an entry from a
1512 directory). 1834 * directory).
1513 1835 *
1514 Delete means delete whole item. 1836 * Delete means delete whole item.
1515 1837 *
1516 Insert means add a new item into the 1838 * Insert means add a new item into the tree.
1517 tree. 1839 *
1518 1840 * Paste means to append to the end of an existing file or to
1519 Paste means to append to the end of an 1841 * insert a directory entry.
1520 existing file or to insert a directory 1842 */
1521 entry. */ 1843void do_balance(struct tree_balance *tb, struct item_head *ih,
1522 int child_pos, /* position of a child node in its parent */ 1844 const char *body, int flag)
1523 h; /* level of the tree being processed */ 1845{
1524 struct item_head insert_key[2]; /* in our processing of one level 1846 int child_pos; /* position of a child node in its parent */
1525 we sometimes determine what 1847 int h; /* level of the tree being processed */
1526 must be inserted into the next 1848
1527 higher level. This insertion 1849 /*
1528 consists of a key or two keys 1850 * in our processing of one level we sometimes determine what
1529 and their corresponding 1851 * must be inserted into the next higher level. This insertion
1530 pointers */ 1852 * consists of a key or two keys and their corresponding
1531 struct buffer_head *insert_ptr[2]; /* inserted node-ptrs for the next 1853 * pointers
1532 level */ 1854 */
1855 struct item_head insert_key[2];
1856
1857 /* inserted node-ptrs for the next level */
1858 struct buffer_head *insert_ptr[2];
1533 1859
1534 tb->tb_mode = flag; 1860 tb->tb_mode = flag;
1535 tb->need_balance_dirty = 0; 1861 tb->need_balance_dirty = 0;
@@ -1546,12 +1872,14 @@ void do_balance(struct tree_balance *tb, /* tree_balance structure */
1546 return; 1872 return;
1547 } 1873 }
1548 1874
1549 atomic_inc(&(fs_generation(tb->tb_sb))); 1875 atomic_inc(&fs_generation(tb->tb_sb));
1550 do_balance_starts(tb); 1876 do_balance_starts(tb);
1551 1877
1552 /* balance leaf returns 0 except if combining L R and S into 1878 /*
1553 one node. see balance_internal() for explanation of this 1879 * balance_leaf returns 0 except if combining L R and S into
1554 line of code. */ 1880 * one node. see balance_internal() for explanation of this
1881 * line of code.
1882 */
1555 child_pos = PATH_H_B_ITEM_ORDER(tb->tb_path, 0) + 1883 child_pos = PATH_H_B_ITEM_ORDER(tb->tb_path, 0) +
1556 balance_leaf(tb, ih, body, flag, insert_key, insert_ptr); 1884 balance_leaf(tb, ih, body, flag, insert_key, insert_ptr);
1557 1885
@@ -1561,9 +1889,8 @@ void do_balance(struct tree_balance *tb, /* tree_balance structure */
1561 1889
1562 /* Balance internal level of the tree. */ 1890 /* Balance internal level of the tree. */
1563 for (h = 1; h < MAX_HEIGHT && tb->insert_size[h]; h++) 1891 for (h = 1; h < MAX_HEIGHT && tb->insert_size[h]; h++)
1564 child_pos = 1892 child_pos = balance_internal(tb, h, child_pos, insert_key,
1565 balance_internal(tb, h, child_pos, insert_key, insert_ptr); 1893 insert_ptr);
1566 1894
1567 do_balance_completed(tb); 1895 do_balance_completed(tb);
1568
1569} 1896}
diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c
index ed58d843d578..5f6c32c668b6 100644
--- a/fs/reiserfs/file.c
+++ b/fs/reiserfs/file.c
@@ -15,20 +15,20 @@
15#include <linux/quotaops.h> 15#include <linux/quotaops.h>
16 16
17/* 17/*
18** We pack the tails of files on file close, not at the time they are written. 18 * We pack the tails of files on file close, not at the time they are written.
19** This implies an unnecessary copy of the tail and an unnecessary indirect item 19 * This implies an unnecessary copy of the tail and an unnecessary indirect item
20** insertion/balancing, for files that are written in one write. 20 * insertion/balancing, for files that are written in one write.
21** It avoids unnecessary tail packings (balances) for files that are written in 21 * It avoids unnecessary tail packings (balances) for files that are written in
22** multiple writes and are small enough to have tails. 22 * multiple writes and are small enough to have tails.
23** 23 *
24** file_release is called by the VFS layer when the file is closed. If 24 * file_release is called by the VFS layer when the file is closed. If
25** this is the last open file descriptor, and the file 25 * this is the last open file descriptor, and the file
26** small enough to have a tail, and the tail is currently in an 26 * small enough to have a tail, and the tail is currently in an
27** unformatted node, the tail is converted back into a direct item. 27 * unformatted node, the tail is converted back into a direct item.
28** 28 *
29** We use reiserfs_truncate_file to pack the tail, since it already has 29 * We use reiserfs_truncate_file to pack the tail, since it already has
30** all the conditions coded. 30 * all the conditions coded.
31*/ 31 */
32static int reiserfs_file_release(struct inode *inode, struct file *filp) 32static int reiserfs_file_release(struct inode *inode, struct file *filp)
33{ 33{
34 34
@@ -41,10 +41,10 @@ static int reiserfs_file_release(struct inode *inode, struct file *filp)
41 if (atomic_add_unless(&REISERFS_I(inode)->openers, -1, 1)) 41 if (atomic_add_unless(&REISERFS_I(inode)->openers, -1, 1))
42 return 0; 42 return 0;
43 43
44 mutex_lock(&(REISERFS_I(inode)->tailpack)); 44 mutex_lock(&REISERFS_I(inode)->tailpack);
45 45
46 if (!atomic_dec_and_test(&REISERFS_I(inode)->openers)) { 46 if (!atomic_dec_and_test(&REISERFS_I(inode)->openers)) {
47 mutex_unlock(&(REISERFS_I(inode)->tailpack)); 47 mutex_unlock(&REISERFS_I(inode)->tailpack);
48 return 0; 48 return 0;
49 } 49 }
50 50
@@ -52,31 +52,35 @@ static int reiserfs_file_release(struct inode *inode, struct file *filp)
52 if ((!(REISERFS_I(inode)->i_flags & i_pack_on_close_mask) || 52 if ((!(REISERFS_I(inode)->i_flags & i_pack_on_close_mask) ||
53 !tail_has_to_be_packed(inode)) && 53 !tail_has_to_be_packed(inode)) &&
54 REISERFS_I(inode)->i_prealloc_count <= 0) { 54 REISERFS_I(inode)->i_prealloc_count <= 0) {
55 mutex_unlock(&(REISERFS_I(inode)->tailpack)); 55 mutex_unlock(&REISERFS_I(inode)->tailpack);
56 return 0; 56 return 0;
57 } 57 }
58 58
59 reiserfs_write_lock(inode->i_sb); 59 reiserfs_write_lock(inode->i_sb);
60 /* freeing preallocation only involves relogging blocks that 60 /*
61 * freeing preallocation only involves relogging blocks that
61 * are already in the current transaction. preallocation gets 62 * are already in the current transaction. preallocation gets
62 * freed at the end of each transaction, so it is impossible for 63 * freed at the end of each transaction, so it is impossible for
63 * us to log any additional blocks (including quota blocks) 64 * us to log any additional blocks (including quota blocks)
64 */ 65 */
65 err = journal_begin(&th, inode->i_sb, 1); 66 err = journal_begin(&th, inode->i_sb, 1);
66 if (err) { 67 if (err) {
67 /* uh oh, we can't allow the inode to go away while there 68 /*
69 * uh oh, we can't allow the inode to go away while there
68 * is still preallocation blocks pending. Try to join the 70 * is still preallocation blocks pending. Try to join the
69 * aborted transaction 71 * aborted transaction
70 */ 72 */
71 jbegin_failure = err; 73 jbegin_failure = err;
72 err = journal_join_abort(&th, inode->i_sb, 1); 74 err = journal_join_abort(&th, inode->i_sb);
73 75
74 if (err) { 76 if (err) {
75 /* hmpf, our choices here aren't good. We can pin the inode 77 /*
76 * which will disallow unmount from every happening, we can 78 * hmpf, our choices here aren't good. We can pin
77 * do nothing, which will corrupt random memory on unmount, 79 * the inode which will disallow unmount from ever
78 * or we can forcibly remove the file from the preallocation 80 * happening, we can do nothing, which will corrupt
79 * list, which will leak blocks on disk. Lets pin the inode 81 * random memory on unmount, or we can forcibly
82 * remove the file from the preallocation list, which
83 * will leak blocks on disk. Lets pin the inode
80 * and let the admin know what is going on. 84 * and let the admin know what is going on.
81 */ 85 */
82 igrab(inode); 86 igrab(inode);
@@ -92,7 +96,7 @@ static int reiserfs_file_release(struct inode *inode, struct file *filp)
92#ifdef REISERFS_PREALLOCATE 96#ifdef REISERFS_PREALLOCATE
93 reiserfs_discard_prealloc(&th, inode); 97 reiserfs_discard_prealloc(&th, inode);
94#endif 98#endif
95 err = journal_end(&th, inode->i_sb, 1); 99 err = journal_end(&th);
96 100
97 /* copy back the error code from journal_begin */ 101 /* copy back the error code from journal_begin */
98 if (!err) 102 if (!err)
@@ -102,35 +106,38 @@ static int reiserfs_file_release(struct inode *inode, struct file *filp)
102 (REISERFS_I(inode)->i_flags & i_pack_on_close_mask) && 106 (REISERFS_I(inode)->i_flags & i_pack_on_close_mask) &&
103 tail_has_to_be_packed(inode)) { 107 tail_has_to_be_packed(inode)) {
104 108
105 /* if regular file is released by last holder and it has been 109 /*
106 appended (we append by unformatted node only) or its direct 110 * if regular file is released by last holder and it has been
107 item(s) had to be converted, then it may have to be 111 * appended (we append by unformatted node only) or its direct
108 indirect2direct converted */ 112 * item(s) had to be converted, then it may have to be
113 * indirect2direct converted
114 */
109 err = reiserfs_truncate_file(inode, 0); 115 err = reiserfs_truncate_file(inode, 0);
110 } 116 }
111 out: 117out:
112 reiserfs_write_unlock(inode->i_sb); 118 reiserfs_write_unlock(inode->i_sb);
113 mutex_unlock(&(REISERFS_I(inode)->tailpack)); 119 mutex_unlock(&REISERFS_I(inode)->tailpack);
114 return err; 120 return err;
115} 121}
116 122
117static int reiserfs_file_open(struct inode *inode, struct file *file) 123static int reiserfs_file_open(struct inode *inode, struct file *file)
118{ 124{
119 int err = dquot_file_open(inode, file); 125 int err = dquot_file_open(inode, file);
126
127 /* somebody might be tailpacking on final close; wait for it */
120 if (!atomic_inc_not_zero(&REISERFS_I(inode)->openers)) { 128 if (!atomic_inc_not_zero(&REISERFS_I(inode)->openers)) {
121 /* somebody might be tailpacking on final close; wait for it */ 129 mutex_lock(&REISERFS_I(inode)->tailpack);
122 mutex_lock(&(REISERFS_I(inode)->tailpack));
123 atomic_inc(&REISERFS_I(inode)->openers); 130 atomic_inc(&REISERFS_I(inode)->openers);
124 mutex_unlock(&(REISERFS_I(inode)->tailpack)); 131 mutex_unlock(&REISERFS_I(inode)->tailpack);
125 } 132 }
126 return err; 133 return err;
127} 134}
128 135
129void reiserfs_vfs_truncate_file(struct inode *inode) 136void reiserfs_vfs_truncate_file(struct inode *inode)
130{ 137{
131 mutex_lock(&(REISERFS_I(inode)->tailpack)); 138 mutex_lock(&REISERFS_I(inode)->tailpack);
132 reiserfs_truncate_file(inode, 1); 139 reiserfs_truncate_file(inode, 1);
133 mutex_unlock(&(REISERFS_I(inode)->tailpack)); 140 mutex_unlock(&REISERFS_I(inode)->tailpack);
134} 141}
135 142
136/* Sync a reiserfs file. */ 143/* Sync a reiserfs file. */
@@ -205,10 +212,11 @@ int reiserfs_commit_page(struct inode *inode, struct page *page,
205 set_buffer_uptodate(bh); 212 set_buffer_uptodate(bh);
206 if (logit) { 213 if (logit) {
207 reiserfs_prepare_for_journal(s, bh, 1); 214 reiserfs_prepare_for_journal(s, bh, 1);
208 journal_mark_dirty(&th, s, bh); 215 journal_mark_dirty(&th, bh);
209 } else if (!buffer_dirty(bh)) { 216 } else if (!buffer_dirty(bh)) {
210 mark_buffer_dirty(bh); 217 mark_buffer_dirty(bh);
211 /* do data=ordered on any page past the end 218 /*
219 * do data=ordered on any page past the end
212 * of file and any buffer marked BH_New. 220 * of file and any buffer marked BH_New.
213 */ 221 */
214 if (reiserfs_data_ordered(inode->i_sb) && 222 if (reiserfs_data_ordered(inode->i_sb) &&
@@ -219,8 +227,8 @@ int reiserfs_commit_page(struct inode *inode, struct page *page,
219 } 227 }
220 } 228 }
221 if (logit) { 229 if (logit) {
222 ret = journal_end(&th, s, bh_per_page + 1); 230 ret = journal_end(&th);
223 drop_write_lock: 231drop_write_lock:
224 reiserfs_write_unlock(s); 232 reiserfs_write_unlock(s);
225 } 233 }
226 /* 234 /*
diff --git a/fs/reiserfs/fix_node.c b/fs/reiserfs/fix_node.c
index dc4d41530316..6b0ddb2a9091 100644
--- a/fs/reiserfs/fix_node.c
+++ b/fs/reiserfs/fix_node.c
@@ -2,59 +2,32 @@
2 * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README 2 * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README
3 */ 3 */
4 4
5/**
6 ** old_item_num
7 ** old_entry_num
8 ** set_entry_sizes
9 ** create_virtual_node
10 ** check_left
11 ** check_right
12 ** directory_part_size
13 ** get_num_ver
14 ** set_parameters
15 ** is_leaf_removable
16 ** are_leaves_removable
17 ** get_empty_nodes
18 ** get_lfree
19 ** get_rfree
20 ** is_left_neighbor_in_cache
21 ** decrement_key
22 ** get_far_parent
23 ** get_parents
24 ** can_node_be_removed
25 ** ip_check_balance
26 ** dc_check_balance_internal
27 ** dc_check_balance_leaf
28 ** dc_check_balance
29 ** check_balance
30 ** get_direct_parent
31 ** get_neighbors
32 ** fix_nodes
33 **
34 **
35 **/
36
37#include <linux/time.h> 5#include <linux/time.h>
38#include <linux/slab.h> 6#include <linux/slab.h>
39#include <linux/string.h> 7#include <linux/string.h>
40#include "reiserfs.h" 8#include "reiserfs.h"
41#include <linux/buffer_head.h> 9#include <linux/buffer_head.h>
42 10
43/* To make any changes in the tree we find a node, that contains item 11/*
44 to be changed/deleted or position in the node we insert a new item 12 * To make any changes in the tree we find a node that contains item
45 to. We call this node S. To do balancing we need to decide what we 13 * to be changed/deleted or position in the node we insert a new item
46 will shift to left/right neighbor, or to a new node, where new item 14 * to. We call this node S. To do balancing we need to decide what we
47 will be etc. To make this analysis simpler we build virtual 15 * will shift to left/right neighbor, or to a new node, where new item
48 node. Virtual node is an array of items, that will replace items of 16 * will be etc. To make this analysis simpler we build virtual
49 node S. (For instance if we are going to delete an item, virtual 17 * node. Virtual node is an array of items, that will replace items of
50 node does not contain it). Virtual node keeps information about 18 * node S. (For instance if we are going to delete an item, virtual
51 item sizes and types, mergeability of first and last items, sizes 19 * node does not contain it). Virtual node keeps information about
52 of all entries in directory item. We use this array of items when 20 * item sizes and types, mergeability of first and last items, sizes
53 calculating what we can shift to neighbors and how many nodes we 21 * of all entries in directory item. We use this array of items when
54 have to have if we do not any shiftings, if we shift to left/right 22 * calculating what we can shift to neighbors and how many nodes we
55 neighbor or to both. */ 23 * have to have if we do not any shiftings, if we shift to left/right
56 24 * neighbor or to both.
57/* taking item number in virtual node, returns number of item, that it has in source buffer */ 25 */
26
27/*
28 * Takes item number in virtual node, returns number of item
29 * that it has in source buffer
30 */
58static inline int old_item_num(int new_num, int affected_item_num, int mode) 31static inline int old_item_num(int new_num, int affected_item_num, int mode)
59{ 32{
60 if (mode == M_PASTE || mode == M_CUT || new_num < affected_item_num) 33 if (mode == M_PASTE || mode == M_CUT || new_num < affected_item_num)
@@ -105,14 +78,17 @@ static void create_virtual_node(struct tree_balance *tb, int h)
105 vn->vn_free_ptr += vn->vn_nr_item * sizeof(struct virtual_item); 78 vn->vn_free_ptr += vn->vn_nr_item * sizeof(struct virtual_item);
106 79
107 /* first item in the node */ 80 /* first item in the node */
108 ih = B_N_PITEM_HEAD(Sh, 0); 81 ih = item_head(Sh, 0);
109 82
110 /* define the mergeability for 0-th item (if it is not being deleted) */ 83 /* define the mergeability for 0-th item (if it is not being deleted) */
111 if (op_is_left_mergeable(&(ih->ih_key), Sh->b_size) 84 if (op_is_left_mergeable(&ih->ih_key, Sh->b_size)
112 && (vn->vn_mode != M_DELETE || vn->vn_affected_item_num)) 85 && (vn->vn_mode != M_DELETE || vn->vn_affected_item_num))
113 vn->vn_vi[0].vi_type |= VI_TYPE_LEFT_MERGEABLE; 86 vn->vn_vi[0].vi_type |= VI_TYPE_LEFT_MERGEABLE;
114 87
115 /* go through all items those remain in the virtual node (except for the new (inserted) one) */ 88 /*
89 * go through all items that remain in the virtual
90 * node (except for the new (inserted) one)
91 */
116 for (new_num = 0; new_num < vn->vn_nr_item; new_num++) { 92 for (new_num = 0; new_num < vn->vn_nr_item; new_num++) {
117 int j; 93 int j;
118 struct virtual_item *vi = vn->vn_vi + new_num; 94 struct virtual_item *vi = vn->vn_vi + new_num;
@@ -128,11 +104,13 @@ static void create_virtual_node(struct tree_balance *tb, int h)
128 104
129 vi->vi_item_len += ih_item_len(ih + j) + IH_SIZE; 105 vi->vi_item_len += ih_item_len(ih + j) + IH_SIZE;
130 vi->vi_ih = ih + j; 106 vi->vi_ih = ih + j;
131 vi->vi_item = B_I_PITEM(Sh, ih + j); 107 vi->vi_item = ih_item_body(Sh, ih + j);
132 vi->vi_uarea = vn->vn_free_ptr; 108 vi->vi_uarea = vn->vn_free_ptr;
133 109
134 // FIXME: there is no check, that item operation did not 110 /*
135 // consume too much memory 111 * FIXME: there is no check that item operation did not
112 * consume too much memory
113 */
136 vn->vn_free_ptr += 114 vn->vn_free_ptr +=
137 op_create_vi(vn, vi, is_affected, tb->insert_size[0]); 115 op_create_vi(vn, vi, is_affected, tb->insert_size[0]);
138 if (tb->vn_buf + tb->vn_buf_size < vn->vn_free_ptr) 116 if (tb->vn_buf + tb->vn_buf_size < vn->vn_free_ptr)
@@ -145,7 +123,8 @@ static void create_virtual_node(struct tree_balance *tb, int h)
145 123
146 if (vn->vn_mode == M_PASTE || vn->vn_mode == M_CUT) { 124 if (vn->vn_mode == M_PASTE || vn->vn_mode == M_CUT) {
147 vn->vn_vi[new_num].vi_item_len += tb->insert_size[0]; 125 vn->vn_vi[new_num].vi_item_len += tb->insert_size[0];
148 vi->vi_new_data = vn->vn_data; // pointer to data which is going to be pasted 126 /* pointer to data which is going to be pasted */
127 vi->vi_new_data = vn->vn_data;
149 } 128 }
150 } 129 }
151 130
@@ -164,11 +143,14 @@ static void create_virtual_node(struct tree_balance *tb, int h)
164 tb->insert_size[0]); 143 tb->insert_size[0]);
165 } 144 }
166 145
167 /* set right merge flag we take right delimiting key and check whether it is a mergeable item */ 146 /*
147 * set right merge flag we take right delimiting key and
148 * check whether it is a mergeable item
149 */
168 if (tb->CFR[0]) { 150 if (tb->CFR[0]) {
169 struct reiserfs_key *key; 151 struct reiserfs_key *key;
170 152
171 key = B_N_PDELIM_KEY(tb->CFR[0], tb->rkey[0]); 153 key = internal_key(tb->CFR[0], tb->rkey[0]);
172 if (op_is_left_mergeable(key, Sh->b_size) 154 if (op_is_left_mergeable(key, Sh->b_size)
173 && (vn->vn_mode != M_DELETE 155 && (vn->vn_mode != M_DELETE
174 || vn->vn_affected_item_num != B_NR_ITEMS(Sh) - 1)) 156 || vn->vn_affected_item_num != B_NR_ITEMS(Sh) - 1))
@@ -179,12 +161,19 @@ static void create_virtual_node(struct tree_balance *tb, int h)
179 if (op_is_left_mergeable(key, Sh->b_size) && 161 if (op_is_left_mergeable(key, Sh->b_size) &&
180 !(vn->vn_mode != M_DELETE 162 !(vn->vn_mode != M_DELETE
181 || vn->vn_affected_item_num != B_NR_ITEMS(Sh) - 1)) { 163 || vn->vn_affected_item_num != B_NR_ITEMS(Sh) - 1)) {
182 /* we delete last item and it could be merged with right neighbor's first item */ 164 /*
165 * we delete last item and it could be merged
166 * with right neighbor's first item
167 */
183 if (! 168 if (!
184 (B_NR_ITEMS(Sh) == 1 169 (B_NR_ITEMS(Sh) == 1
185 && is_direntry_le_ih(B_N_PITEM_HEAD(Sh, 0)) 170 && is_direntry_le_ih(item_head(Sh, 0))
186 && I_ENTRY_COUNT(B_N_PITEM_HEAD(Sh, 0)) == 1)) { 171 && ih_entry_count(item_head(Sh, 0)) == 1)) {
187 /* node contains more than 1 item, or item is not directory item, or this item contains more than 1 entry */ 172 /*
173 * node contains more than 1 item, or item
174 * is not directory item, or this item
175 * contains more than 1 entry
176 */
188 print_block(Sh, 0, -1, -1); 177 print_block(Sh, 0, -1, -1);
189 reiserfs_panic(tb->tb_sb, "vs-8045", 178 reiserfs_panic(tb->tb_sb, "vs-8045",
190 "rdkey %k, affected item==%d " 179 "rdkey %k, affected item==%d "
@@ -198,8 +187,10 @@ static void create_virtual_node(struct tree_balance *tb, int h)
198 } 187 }
199} 188}
200 189
201/* using virtual node check, how many items can be shifted to left 190/*
202 neighbor */ 191 * Using virtual node check, how many items can be
192 * shifted to left neighbor
193 */
203static void check_left(struct tree_balance *tb, int h, int cur_free) 194static void check_left(struct tree_balance *tb, int h, int cur_free)
204{ 195{
205 int i; 196 int i;
@@ -259,9 +250,13 @@ static void check_left(struct tree_balance *tb, int h, int cur_free)
259 } 250 }
260 251
261 /* the item cannot be shifted entirely, try to split it */ 252 /* the item cannot be shifted entirely, try to split it */
262 /* check whether L[0] can hold ih and at least one byte of the item body */ 253 /*
254 * check whether L[0] can hold ih and at least one byte
255 * of the item body
256 */
257
258 /* cannot shift even a part of the current item */
263 if (cur_free <= ih_size) { 259 if (cur_free <= ih_size) {
264 /* cannot shift even a part of the current item */
265 tb->lbytes = -1; 260 tb->lbytes = -1;
266 return; 261 return;
267 } 262 }
@@ -278,8 +273,10 @@ static void check_left(struct tree_balance *tb, int h, int cur_free)
278 return; 273 return;
279} 274}
280 275
281/* using virtual node check, how many items can be shifted to right 276/*
282 neighbor */ 277 * Using virtual node check, how many items can be
278 * shifted to right neighbor
279 */
283static void check_right(struct tree_balance *tb, int h, int cur_free) 280static void check_right(struct tree_balance *tb, int h, int cur_free)
284{ 281{
285 int i; 282 int i;
@@ -338,13 +335,21 @@ static void check_right(struct tree_balance *tb, int h, int cur_free)
338 continue; 335 continue;
339 } 336 }
340 337
341 /* check whether R[0] can hold ih and at least one byte of the item body */ 338 /*
342 if (cur_free <= ih_size) { /* cannot shift even a part of the current item */ 339 * check whether R[0] can hold ih and at least one
340 * byte of the item body
341 */
342
343 /* cannot shift even a part of the current item */
344 if (cur_free <= ih_size) {
343 tb->rbytes = -1; 345 tb->rbytes = -1;
344 return; 346 return;
345 } 347 }
346 348
347 /* R[0] can hold the header of the item and at least one byte of its body */ 349 /*
350 * R[0] can hold the header of the item and at least
351 * one byte of its body
352 */
348 cur_free -= ih_size; /* cur_free is still > 0 */ 353 cur_free -= ih_size; /* cur_free is still > 0 */
349 354
350 tb->rbytes = op_check_right(vi, cur_free); 355 tb->rbytes = op_check_right(vi, cur_free);
@@ -361,45 +366,64 @@ static void check_right(struct tree_balance *tb, int h, int cur_free)
361/* 366/*
362 * from - number of items, which are shifted to left neighbor entirely 367 * from - number of items, which are shifted to left neighbor entirely
363 * to - number of item, which are shifted to right neighbor entirely 368 * to - number of item, which are shifted to right neighbor entirely
364 * from_bytes - number of bytes of boundary item (or directory entries) which are shifted to left neighbor 369 * from_bytes - number of bytes of boundary item (or directory entries)
365 * to_bytes - number of bytes of boundary item (or directory entries) which are shifted to right neighbor */ 370 * which are shifted to left neighbor
371 * to_bytes - number of bytes of boundary item (or directory entries)
372 * which are shifted to right neighbor
373 */
366static int get_num_ver(int mode, struct tree_balance *tb, int h, 374static int get_num_ver(int mode, struct tree_balance *tb, int h,
367 int from, int from_bytes, 375 int from, int from_bytes,
368 int to, int to_bytes, short *snum012, int flow) 376 int to, int to_bytes, short *snum012, int flow)
369{ 377{
370 int i; 378 int i;
371 int cur_free; 379 int cur_free;
372 // int bytes;
373 int units; 380 int units;
374 struct virtual_node *vn = tb->tb_vn; 381 struct virtual_node *vn = tb->tb_vn;
375 // struct virtual_item * vi;
376
377 int total_node_size, max_node_size, current_item_size; 382 int total_node_size, max_node_size, current_item_size;
378 int needed_nodes; 383 int needed_nodes;
379 int start_item, /* position of item we start filling node from */ 384
380 end_item, /* position of item we finish filling node by */ 385 /* position of item we start filling node from */
381 start_bytes, /* number of first bytes (entries for directory) of start_item-th item 386 int start_item;
382 we do not include into node that is being filled */ 387
383 end_bytes; /* number of last bytes (entries for directory) of end_item-th item 388 /* position of item we finish filling node by */
384 we do node include into node that is being filled */ 389 int end_item;
385 int split_item_positions[2]; /* these are positions in virtual item of 390
386 items, that are split between S[0] and 391 /*
387 S1new and S1new and S2new */ 392 * number of first bytes (entries for directory) of start_item-th item
393 * we do not include into node that is being filled
394 */
395 int start_bytes;
396
397 /*
398 * number of last bytes (entries for directory) of end_item-th item
399 * we do node include into node that is being filled
400 */
401 int end_bytes;
402
403 /*
404 * these are positions in virtual item of items, that are split
405 * between S[0] and S1new and S1new and S2new
406 */
407 int split_item_positions[2];
388 408
389 split_item_positions[0] = -1; 409 split_item_positions[0] = -1;
390 split_item_positions[1] = -1; 410 split_item_positions[1] = -1;
391 411
392 /* We only create additional nodes if we are in insert or paste mode 412 /*
393 or we are in replace mode at the internal level. If h is 0 and 413 * We only create additional nodes if we are in insert or paste mode
394 the mode is M_REPLACE then in fix_nodes we change the mode to 414 * or we are in replace mode at the internal level. If h is 0 and
395 paste or insert before we get here in the code. */ 415 * the mode is M_REPLACE then in fix_nodes we change the mode to
416 * paste or insert before we get here in the code.
417 */
396 RFALSE(tb->insert_size[h] < 0 || (mode != M_INSERT && mode != M_PASTE), 418 RFALSE(tb->insert_size[h] < 0 || (mode != M_INSERT && mode != M_PASTE),
397 "vs-8100: insert_size < 0 in overflow"); 419 "vs-8100: insert_size < 0 in overflow");
398 420
399 max_node_size = MAX_CHILD_SIZE(PATH_H_PBUFFER(tb->tb_path, h)); 421 max_node_size = MAX_CHILD_SIZE(PATH_H_PBUFFER(tb->tb_path, h));
400 422
401 /* snum012 [0-2] - number of items, that lay 423 /*
402 to S[0], first new node and second new node */ 424 * snum012 [0-2] - number of items, that lay
425 * to S[0], first new node and second new node
426 */
403 snum012[3] = -1; /* s1bytes */ 427 snum012[3] = -1; /* s1bytes */
404 snum012[4] = -1; /* s2bytes */ 428 snum012[4] = -1; /* s2bytes */
405 429
@@ -416,20 +440,22 @@ static int get_num_ver(int mode, struct tree_balance *tb, int h,
416 total_node_size = 0; 440 total_node_size = 0;
417 cur_free = max_node_size; 441 cur_free = max_node_size;
418 442
419 // start from 'from'-th item 443 /* start from 'from'-th item */
420 start_item = from; 444 start_item = from;
421 // skip its first 'start_bytes' units 445 /* skip its first 'start_bytes' units */
422 start_bytes = ((from_bytes != -1) ? from_bytes : 0); 446 start_bytes = ((from_bytes != -1) ? from_bytes : 0);
423 447
424 // last included item is the 'end_item'-th one 448 /* last included item is the 'end_item'-th one */
425 end_item = vn->vn_nr_item - to - 1; 449 end_item = vn->vn_nr_item - to - 1;
426 // do not count last 'end_bytes' units of 'end_item'-th item 450 /* do not count last 'end_bytes' units of 'end_item'-th item */
427 end_bytes = (to_bytes != -1) ? to_bytes : 0; 451 end_bytes = (to_bytes != -1) ? to_bytes : 0;
428 452
429 /* go through all item beginning from the start_item-th item and ending by 453 /*
430 the end_item-th item. Do not count first 'start_bytes' units of 454 * go through all item beginning from the start_item-th item
431 'start_item'-th item and last 'end_bytes' of 'end_item'-th item */ 455 * and ending by the end_item-th item. Do not count first
432 456 * 'start_bytes' units of 'start_item'-th item and last
457 * 'end_bytes' of 'end_item'-th item
458 */
433 for (i = start_item; i <= end_item; i++) { 459 for (i = start_item; i <= end_item; i++) {
434 struct virtual_item *vi = vn->vn_vi + i; 460 struct virtual_item *vi = vn->vn_vi + i;
435 int skip_from_end = ((i == end_item) ? end_bytes : 0); 461 int skip_from_end = ((i == end_item) ? end_bytes : 0);
@@ -439,7 +465,10 @@ static int get_num_ver(int mode, struct tree_balance *tb, int h,
439 /* get size of current item */ 465 /* get size of current item */
440 current_item_size = vi->vi_item_len; 466 current_item_size = vi->vi_item_len;
441 467
442 /* do not take in calculation head part (from_bytes) of from-th item */ 468 /*
469 * do not take in calculation head part (from_bytes)
470 * of from-th item
471 */
443 current_item_size -= 472 current_item_size -=
444 op_part_size(vi, 0 /*from start */ , start_bytes); 473 op_part_size(vi, 0 /*from start */ , start_bytes);
445 474
@@ -455,9 +484,11 @@ static int get_num_ver(int mode, struct tree_balance *tb, int h,
455 continue; 484 continue;
456 } 485 }
457 486
487 /*
488 * virtual item length is longer, than max size of item in
489 * a node. It is impossible for direct item
490 */
458 if (current_item_size > max_node_size) { 491 if (current_item_size > max_node_size) {
459 /* virtual item length is longer, than max size of item in
460 a node. It is impossible for direct item */
461 RFALSE(is_direct_le_ih(vi->vi_ih), 492 RFALSE(is_direct_le_ih(vi->vi_ih),
462 "vs-8110: " 493 "vs-8110: "
463 "direct item length is %d. It can not be longer than %d", 494 "direct item length is %d. It can not be longer than %d",
@@ -466,15 +497,18 @@ static int get_num_ver(int mode, struct tree_balance *tb, int h,
466 flow = 1; 497 flow = 1;
467 } 498 }
468 499
500 /* as we do not split items, take new node and continue */
469 if (!flow) { 501 if (!flow) {
470 /* as we do not split items, take new node and continue */
471 needed_nodes++; 502 needed_nodes++;
472 i--; 503 i--;
473 total_node_size = 0; 504 total_node_size = 0;
474 continue; 505 continue;
475 } 506 }
476 // calculate number of item units which fit into node being 507
477 // filled 508 /*
509 * calculate number of item units which fit into node being
510 * filled
511 */
478 { 512 {
479 int free_space; 513 int free_space;
480 514
@@ -482,17 +516,17 @@ static int get_num_ver(int mode, struct tree_balance *tb, int h,
482 units = 516 units =
483 op_check_left(vi, free_space, start_bytes, 517 op_check_left(vi, free_space, start_bytes,
484 skip_from_end); 518 skip_from_end);
519 /*
520 * nothing fits into current node, take new
521 * node and continue
522 */
485 if (units == -1) { 523 if (units == -1) {
486 /* nothing fits into current node, take new node and continue */
487 needed_nodes++, i--, total_node_size = 0; 524 needed_nodes++, i--, total_node_size = 0;
488 continue; 525 continue;
489 } 526 }
490 } 527 }
491 528
492 /* something fits into the current node */ 529 /* something fits into the current node */
493 //if (snum012[3] != -1 || needed_nodes != 1)
494 // reiserfs_panic (tb->tb_sb, "vs-8115: get_num_ver: too many nodes required");
495 //snum012[needed_nodes - 1 + 3] = op_unit_num (vi) - start_bytes - units;
496 start_bytes += units; 530 start_bytes += units;
497 snum012[needed_nodes - 1 + 3] = units; 531 snum012[needed_nodes - 1 + 3] = units;
498 532
@@ -508,9 +542,11 @@ static int get_num_ver(int mode, struct tree_balance *tb, int h,
508 total_node_size = 0; 542 total_node_size = 0;
509 } 543 }
510 544
511 // sum012[4] (if it is not -1) contains number of units of which 545 /*
512 // are to be in S1new, snum012[3] - to be in S0. They are supposed 546 * sum012[4] (if it is not -1) contains number of units of which
513 // to be S1bytes and S2bytes correspondingly, so recalculate 547 * are to be in S1new, snum012[3] - to be in S0. They are supposed
548 * to be S1bytes and S2bytes correspondingly, so recalculate
549 */
514 if (snum012[4] > 0) { 550 if (snum012[4] > 0) {
515 int split_item_num; 551 int split_item_num;
516 int bytes_to_r, bytes_to_l; 552 int bytes_to_r, bytes_to_l;
@@ -527,7 +563,7 @@ static int get_num_ver(int mode, struct tree_balance *tb, int h,
527 ((split_item_positions[0] == 563 ((split_item_positions[0] ==
528 split_item_positions[1]) ? snum012[3] : 0); 564 split_item_positions[1]) ? snum012[3] : 0);
529 565
530 // s2bytes 566 /* s2bytes */
531 snum012[4] = 567 snum012[4] =
532 op_unit_num(&vn->vn_vi[split_item_num]) - snum012[4] - 568 op_unit_num(&vn->vn_vi[split_item_num]) - snum012[4] -
533 bytes_to_r - bytes_to_l - bytes_to_S1new; 569 bytes_to_r - bytes_to_l - bytes_to_S1new;
@@ -555,7 +591,7 @@ static int get_num_ver(int mode, struct tree_balance *tb, int h,
555 ((split_item_positions[0] == split_item_positions[1] 591 ((split_item_positions[0] == split_item_positions[1]
556 && snum012[4] != -1) ? snum012[4] : 0); 592 && snum012[4] != -1) ? snum012[4] : 0);
557 593
558 // s1bytes 594 /* s1bytes */
559 snum012[3] = 595 snum012[3] =
560 op_unit_num(&vn->vn_vi[split_item_num]) - snum012[3] - 596 op_unit_num(&vn->vn_vi[split_item_num]) - snum012[3] -
561 bytes_to_r - bytes_to_l - bytes_to_S2new; 597 bytes_to_r - bytes_to_l - bytes_to_S2new;
@@ -565,7 +601,8 @@ static int get_num_ver(int mode, struct tree_balance *tb, int h,
565} 601}
566 602
567 603
568/* Set parameters for balancing. 604/*
605 * Set parameters for balancing.
569 * Performs write of results of analysis of balancing into structure tb, 606 * Performs write of results of analysis of balancing into structure tb,
570 * where it will later be used by the functions that actually do the balancing. 607 * where it will later be used by the functions that actually do the balancing.
571 * Parameters: 608 * Parameters:
@@ -575,11 +612,12 @@ static int get_num_ver(int mode, struct tree_balance *tb, int h,
575 * rnum number of items from S[h] that must be shifted to R[h]; 612 * rnum number of items from S[h] that must be shifted to R[h];
576 * blk_num number of blocks that S[h] will be splitted into; 613 * blk_num number of blocks that S[h] will be splitted into;
577 * s012 number of items that fall into splitted nodes. 614 * s012 number of items that fall into splitted nodes.
578 * lbytes number of bytes which flow to the left neighbor from the item that is not 615 * lbytes number of bytes which flow to the left neighbor from the
579 * not shifted entirely 616 * item that is not not shifted entirely
580 * rbytes number of bytes which flow to the right neighbor from the item that is not 617 * rbytes number of bytes which flow to the right neighbor from the
581 * not shifted entirely 618 * item that is not not shifted entirely
582 * s1bytes number of bytes which flow to the first new node when S[0] splits (this number is contained in s012 array) 619 * s1bytes number of bytes which flow to the first new node when
620 * S[0] splits (this number is contained in s012 array)
583 */ 621 */
584 622
585static void set_parameters(struct tree_balance *tb, int h, int lnum, 623static void set_parameters(struct tree_balance *tb, int h, int lnum,
@@ -590,12 +628,14 @@ static void set_parameters(struct tree_balance *tb, int h, int lnum,
590 tb->rnum[h] = rnum; 628 tb->rnum[h] = rnum;
591 tb->blknum[h] = blk_num; 629 tb->blknum[h] = blk_num;
592 630
593 if (h == 0) { /* only for leaf level */ 631 /* only for leaf level */
632 if (h == 0) {
594 if (s012 != NULL) { 633 if (s012 != NULL) {
595 tb->s0num = *s012++, 634 tb->s0num = *s012++;
596 tb->s1num = *s012++, tb->s2num = *s012++; 635 tb->snum[0] = *s012++;
597 tb->s1bytes = *s012++; 636 tb->snum[1] = *s012++;
598 tb->s2bytes = *s012; 637 tb->sbytes[0] = *s012++;
638 tb->sbytes[1] = *s012;
599 } 639 }
600 tb->lbytes = lb; 640 tb->lbytes = lb;
601 tb->rbytes = rb; 641 tb->rbytes = rb;
@@ -607,8 +647,10 @@ static void set_parameters(struct tree_balance *tb, int h, int lnum,
607 PROC_INFO_ADD(tb->tb_sb, rbytes[h], rb); 647 PROC_INFO_ADD(tb->tb_sb, rbytes[h], rb);
608} 648}
609 649
610/* check, does node disappear if we shift tb->lnum[0] items to left 650/*
611 neighbor and tb->rnum[0] to the right one. */ 651 * check if node disappears if we shift tb->lnum[0] items to left
652 * neighbor and tb->rnum[0] to the right one.
653 */
612static int is_leaf_removable(struct tree_balance *tb) 654static int is_leaf_removable(struct tree_balance *tb)
613{ 655{
614 struct virtual_node *vn = tb->tb_vn; 656 struct virtual_node *vn = tb->tb_vn;
@@ -616,8 +658,10 @@ static int is_leaf_removable(struct tree_balance *tb)
616 int size; 658 int size;
617 int remain_items; 659 int remain_items;
618 660
619 /* number of items, that will be shifted to left (right) neighbor 661 /*
620 entirely */ 662 * number of items that will be shifted to left (right) neighbor
663 * entirely
664 */
621 to_left = tb->lnum[0] - ((tb->lbytes != -1) ? 1 : 0); 665 to_left = tb->lnum[0] - ((tb->lbytes != -1) ? 1 : 0);
622 to_right = tb->rnum[0] - ((tb->rbytes != -1) ? 1 : 0); 666 to_right = tb->rnum[0] - ((tb->rbytes != -1) ? 1 : 0);
623 remain_items = vn->vn_nr_item; 667 remain_items = vn->vn_nr_item;
@@ -625,21 +669,21 @@ static int is_leaf_removable(struct tree_balance *tb)
625 /* how many items remain in S[0] after shiftings to neighbors */ 669 /* how many items remain in S[0] after shiftings to neighbors */
626 remain_items -= (to_left + to_right); 670 remain_items -= (to_left + to_right);
627 671
672 /* all content of node can be shifted to neighbors */
628 if (remain_items < 1) { 673 if (remain_items < 1) {
629 /* all content of node can be shifted to neighbors */
630 set_parameters(tb, 0, to_left, vn->vn_nr_item - to_left, 0, 674 set_parameters(tb, 0, to_left, vn->vn_nr_item - to_left, 0,
631 NULL, -1, -1); 675 NULL, -1, -1);
632 return 1; 676 return 1;
633 } 677 }
634 678
679 /* S[0] is not removable */
635 if (remain_items > 1 || tb->lbytes == -1 || tb->rbytes == -1) 680 if (remain_items > 1 || tb->lbytes == -1 || tb->rbytes == -1)
636 /* S[0] is not removable */
637 return 0; 681 return 0;
638 682
639 /* check, whether we can divide 1 remaining item between neighbors */ 683 /* check whether we can divide 1 remaining item between neighbors */
640 684
641 /* get size of remaining item (in item units) */ 685 /* get size of remaining item (in item units) */
642 size = op_unit_num(&(vn->vn_vi[to_left])); 686 size = op_unit_num(&vn->vn_vi[to_left]);
643 687
644 if (tb->lbytes + tb->rbytes >= size) { 688 if (tb->lbytes + tb->rbytes >= size) {
645 set_parameters(tb, 0, to_left + 1, to_right + 1, 0, NULL, 689 set_parameters(tb, 0, to_left + 1, to_right + 1, 0, NULL,
@@ -675,23 +719,28 @@ static int are_leaves_removable(struct tree_balance *tb, int lfree, int rfree)
675 "vs-8125: item number must be 1: it is %d", 719 "vs-8125: item number must be 1: it is %d",
676 B_NR_ITEMS(S0)); 720 B_NR_ITEMS(S0));
677 721
678 ih = B_N_PITEM_HEAD(S0, 0); 722 ih = item_head(S0, 0);
679 if (tb->CFR[0] 723 if (tb->CFR[0]
680 && !comp_short_le_keys(&(ih->ih_key), 724 && !comp_short_le_keys(&ih->ih_key,
681 B_N_PDELIM_KEY(tb->CFR[0], 725 internal_key(tb->CFR[0],
682 tb->rkey[0]))) 726 tb->rkey[0])))
727 /*
728 * Directory must be in correct state here: that is
729 * somewhere at the left side should exist first
730 * directory item. But the item being deleted can
731 * not be that first one because its right neighbor
732 * is item of the same directory. (But first item
733 * always gets deleted in last turn). So, neighbors
734 * of deleted item can be merged, so we can save
735 * ih_size
736 */
683 if (is_direntry_le_ih(ih)) { 737 if (is_direntry_le_ih(ih)) {
684 /* Directory must be in correct state here: that is
685 somewhere at the left side should exist first directory
686 item. But the item being deleted can not be that first
687 one because its right neighbor is item of the same
688 directory. (But first item always gets deleted in last
689 turn). So, neighbors of deleted item can be merged, so
690 we can save ih_size */
691 ih_size = IH_SIZE; 738 ih_size = IH_SIZE;
692 739
693 /* we might check that left neighbor exists and is of the 740 /*
694 same directory */ 741 * we might check that left neighbor exists
742 * and is of the same directory
743 */
695 RFALSE(le_ih_k_offset(ih) == DOT_OFFSET, 744 RFALSE(le_ih_k_offset(ih) == DOT_OFFSET,
696 "vs-8130: first directory item can not be removed until directory is not empty"); 745 "vs-8130: first directory item can not be removed until directory is not empty");
697 } 746 }
@@ -770,7 +819,8 @@ static void free_buffers_in_tb(struct tree_balance *tb)
770 } 819 }
771} 820}
772 821
773/* Get new buffers for storing new nodes that are created while balancing. 822/*
823 * Get new buffers for storing new nodes that are created while balancing.
774 * Returns: SCHEDULE_OCCURRED - schedule occurred while the function worked; 824 * Returns: SCHEDULE_OCCURRED - schedule occurred while the function worked;
775 * CARRY_ON - schedule didn't occur while the function worked; 825 * CARRY_ON - schedule didn't occur while the function worked;
776 * NO_DISK_SPACE - no disk space. 826 * NO_DISK_SPACE - no disk space.
@@ -778,28 +828,33 @@ static void free_buffers_in_tb(struct tree_balance *tb)
778/* The function is NOT SCHEDULE-SAFE! */ 828/* The function is NOT SCHEDULE-SAFE! */
779static int get_empty_nodes(struct tree_balance *tb, int h) 829static int get_empty_nodes(struct tree_balance *tb, int h)
780{ 830{
781 struct buffer_head *new_bh, 831 struct buffer_head *new_bh, *Sh = PATH_H_PBUFFER(tb->tb_path, h);
782 *Sh = PATH_H_PBUFFER(tb->tb_path, h);
783 b_blocknr_t *blocknr, blocknrs[MAX_AMOUNT_NEEDED] = { 0, }; 832 b_blocknr_t *blocknr, blocknrs[MAX_AMOUNT_NEEDED] = { 0, };
784 int counter, number_of_freeblk, amount_needed, /* number of needed empty blocks */ 833 int counter, number_of_freeblk;
785 retval = CARRY_ON; 834 int amount_needed; /* number of needed empty blocks */
835 int retval = CARRY_ON;
786 struct super_block *sb = tb->tb_sb; 836 struct super_block *sb = tb->tb_sb;
787 837
788 /* number_of_freeblk is the number of empty blocks which have been 838 /*
789 acquired for use by the balancing algorithm minus the number of 839 * number_of_freeblk is the number of empty blocks which have been
790 empty blocks used in the previous levels of the analysis, 840 * acquired for use by the balancing algorithm minus the number of
791 number_of_freeblk = tb->cur_blknum can be non-zero if a schedule occurs 841 * empty blocks used in the previous levels of the analysis,
792 after empty blocks are acquired, and the balancing analysis is 842 * number_of_freeblk = tb->cur_blknum can be non-zero if a schedule
793 then restarted, amount_needed is the number needed by this level 843 * occurs after empty blocks are acquired, and the balancing analysis
794 (h) of the balancing analysis. 844 * is then restarted, amount_needed is the number needed by this
795 845 * level (h) of the balancing analysis.
796 Note that for systems with many processes writing, it would be 846 *
797 more layout optimal to calculate the total number needed by all 847 * Note that for systems with many processes writing, it would be
798 levels and then to run reiserfs_new_blocks to get all of them at once. */ 848 * more layout optimal to calculate the total number needed by all
799 849 * levels and then to run reiserfs_new_blocks to get all of them at
800 /* Initiate number_of_freeblk to the amount acquired prior to the restart of 850 * once.
801 the analysis or 0 if not restarted, then subtract the amount needed 851 */
802 by all of the levels of the tree below h. */ 852
853 /*
854 * Initiate number_of_freeblk to the amount acquired prior to the
855 * restart of the analysis or 0 if not restarted, then subtract the
856 * amount needed by all of the levels of the tree below h.
857 */
803 /* blknum includes S[h], so we subtract 1 in this calculation */ 858 /* blknum includes S[h], so we subtract 1 in this calculation */
804 for (counter = 0, number_of_freeblk = tb->cur_blknum; 859 for (counter = 0, number_of_freeblk = tb->cur_blknum;
805 counter < h; counter++) 860 counter < h; counter++)
@@ -810,13 +865,19 @@ static int get_empty_nodes(struct tree_balance *tb, int h)
810 /* Allocate missing empty blocks. */ 865 /* Allocate missing empty blocks. */
811 /* if Sh == 0 then we are getting a new root */ 866 /* if Sh == 0 then we are getting a new root */
812 amount_needed = (Sh) ? (tb->blknum[h] - 1) : 1; 867 amount_needed = (Sh) ? (tb->blknum[h] - 1) : 1;
813 /* Amount_needed = the amount that we need more than the amount that we have. */ 868 /*
869 * Amount_needed = the amount that we need more than the
870 * amount that we have.
871 */
814 if (amount_needed > number_of_freeblk) 872 if (amount_needed > number_of_freeblk)
815 amount_needed -= number_of_freeblk; 873 amount_needed -= number_of_freeblk;
816 else /* If we have enough already then there is nothing to do. */ 874 else /* If we have enough already then there is nothing to do. */
817 return CARRY_ON; 875 return CARRY_ON;
818 876
819 /* No need to check quota - is not allocated for blocks used for formatted nodes */ 877 /*
878 * No need to check quota - is not allocated for blocks used
879 * for formatted nodes
880 */
820 if (reiserfs_new_form_blocknrs(tb, blocknrs, 881 if (reiserfs_new_form_blocknrs(tb, blocknrs,
821 amount_needed) == NO_DISK_SPACE) 882 amount_needed) == NO_DISK_SPACE)
822 return NO_DISK_SPACE; 883 return NO_DISK_SPACE;
@@ -849,8 +910,10 @@ static int get_empty_nodes(struct tree_balance *tb, int h)
849 return retval; 910 return retval;
850} 911}
851 912
852/* Get free space of the left neighbor, which is stored in the parent 913/*
853 * node of the left neighbor. */ 914 * Get free space of the left neighbor, which is stored in the parent
915 * node of the left neighbor.
916 */
854static int get_lfree(struct tree_balance *tb, int h) 917static int get_lfree(struct tree_balance *tb, int h)
855{ 918{
856 struct buffer_head *l, *f; 919 struct buffer_head *l, *f;
@@ -870,7 +933,8 @@ static int get_lfree(struct tree_balance *tb, int h)
870 return (MAX_CHILD_SIZE(f) - dc_size(B_N_CHILD(f, order))); 933 return (MAX_CHILD_SIZE(f) - dc_size(B_N_CHILD(f, order)));
871} 934}
872 935
873/* Get free space of the right neighbor, 936/*
937 * Get free space of the right neighbor,
874 * which is stored in the parent node of the right neighbor. 938 * which is stored in the parent node of the right neighbor.
875 */ 939 */
876static int get_rfree(struct tree_balance *tb, int h) 940static int get_rfree(struct tree_balance *tb, int h)
@@ -916,7 +980,10 @@ static int is_left_neighbor_in_cache(struct tree_balance *tb, int h)
916 "vs-8165: F[h] (%b) or FL[h] (%b) is invalid", 980 "vs-8165: F[h] (%b) or FL[h] (%b) is invalid",
917 father, tb->FL[h]); 981 father, tb->FL[h]);
918 982
919 /* Get position of the pointer to the left neighbor into the left father. */ 983 /*
984 * Get position of the pointer to the left neighbor
985 * into the left father.
986 */
920 left_neighbor_position = (father == tb->FL[h]) ? 987 left_neighbor_position = (father == tb->FL[h]) ?
921 tb->lkey[h] : B_NR_ITEMS(tb->FL[h]); 988 tb->lkey[h] : B_NR_ITEMS(tb->FL[h]);
922 /* Get left neighbor block number. */ 989 /* Get left neighbor block number. */
@@ -940,17 +1007,20 @@ static int is_left_neighbor_in_cache(struct tree_balance *tb, int h)
940 1007
941static void decrement_key(struct cpu_key *key) 1008static void decrement_key(struct cpu_key *key)
942{ 1009{
943 // call item specific function for this key 1010 /* call item specific function for this key */
944 item_ops[cpu_key_k_type(key)]->decrement_key(key); 1011 item_ops[cpu_key_k_type(key)]->decrement_key(key);
945} 1012}
946 1013
947/* Calculate far left/right parent of the left/right neighbor of the current node, that 1014/*
948 * is calculate the left/right (FL[h]/FR[h]) neighbor of the parent F[h]. 1015 * Calculate far left/right parent of the left/right neighbor of the
1016 * current node, that is calculate the left/right (FL[h]/FR[h]) neighbor
1017 * of the parent F[h].
949 * Calculate left/right common parent of the current node and L[h]/R[h]. 1018 * Calculate left/right common parent of the current node and L[h]/R[h].
950 * Calculate left/right delimiting key position. 1019 * Calculate left/right delimiting key position.
951 * Returns: PATH_INCORRECT - path in the tree is not correct; 1020 * Returns: PATH_INCORRECT - path in the tree is not correct
952 SCHEDULE_OCCURRED - schedule occurred while the function worked; 1021 * SCHEDULE_OCCURRED - schedule occurred while the function worked
953 * CARRY_ON - schedule didn't occur while the function worked; 1022 * CARRY_ON - schedule didn't occur while the function
1023 * worked
954 */ 1024 */
955static int get_far_parent(struct tree_balance *tb, 1025static int get_far_parent(struct tree_balance *tb,
956 int h, 1026 int h,
@@ -966,8 +1036,10 @@ static int get_far_parent(struct tree_balance *tb,
966 first_last_position = 0, 1036 first_last_position = 0,
967 path_offset = PATH_H_PATH_OFFSET(path, h); 1037 path_offset = PATH_H_PATH_OFFSET(path, h);
968 1038
969 /* Starting from F[h] go upwards in the tree, and look for the common 1039 /*
970 ancestor of F[h], and its neighbor l/r, that should be obtained. */ 1040 * Starting from F[h] go upwards in the tree, and look for the common
1041 * ancestor of F[h], and its neighbor l/r, that should be obtained.
1042 */
971 1043
972 counter = path_offset; 1044 counter = path_offset;
973 1045
@@ -975,21 +1047,33 @@ static int get_far_parent(struct tree_balance *tb,
975 "PAP-8180: invalid path length"); 1047 "PAP-8180: invalid path length");
976 1048
977 for (; counter > FIRST_PATH_ELEMENT_OFFSET; counter--) { 1049 for (; counter > FIRST_PATH_ELEMENT_OFFSET; counter--) {
978 /* Check whether parent of the current buffer in the path is really parent in the tree. */ 1050 /*
1051 * Check whether parent of the current buffer in the path
1052 * is really parent in the tree.
1053 */
979 if (!B_IS_IN_TREE 1054 if (!B_IS_IN_TREE
980 (parent = PATH_OFFSET_PBUFFER(path, counter - 1))) 1055 (parent = PATH_OFFSET_PBUFFER(path, counter - 1)))
981 return REPEAT_SEARCH; 1056 return REPEAT_SEARCH;
1057
982 /* Check whether position in the parent is correct. */ 1058 /* Check whether position in the parent is correct. */
983 if ((position = 1059 if ((position =
984 PATH_OFFSET_POSITION(path, 1060 PATH_OFFSET_POSITION(path,
985 counter - 1)) > 1061 counter - 1)) >
986 B_NR_ITEMS(parent)) 1062 B_NR_ITEMS(parent))
987 return REPEAT_SEARCH; 1063 return REPEAT_SEARCH;
988 /* Check whether parent at the path really points to the child. */ 1064
1065 /*
1066 * Check whether parent at the path really points
1067 * to the child.
1068 */
989 if (B_N_CHILD_NUM(parent, position) != 1069 if (B_N_CHILD_NUM(parent, position) !=
990 PATH_OFFSET_PBUFFER(path, counter)->b_blocknr) 1070 PATH_OFFSET_PBUFFER(path, counter)->b_blocknr)
991 return REPEAT_SEARCH; 1071 return REPEAT_SEARCH;
992 /* Return delimiting key if position in the parent is not equal to first/last one. */ 1072
1073 /*
1074 * Return delimiting key if position in the parent is not
1075 * equal to first/last one.
1076 */
993 if (c_lr_par == RIGHT_PARENTS) 1077 if (c_lr_par == RIGHT_PARENTS)
994 first_last_position = B_NR_ITEMS(parent); 1078 first_last_position = B_NR_ITEMS(parent);
995 if (position != first_last_position) { 1079 if (position != first_last_position) {
@@ -1002,7 +1086,10 @@ static int get_far_parent(struct tree_balance *tb,
1002 1086
1003 /* if we are in the root of the tree, then there is no common father */ 1087 /* if we are in the root of the tree, then there is no common father */
1004 if (counter == FIRST_PATH_ELEMENT_OFFSET) { 1088 if (counter == FIRST_PATH_ELEMENT_OFFSET) {
1005 /* Check whether first buffer in the path is the root of the tree. */ 1089 /*
1090 * Check whether first buffer in the path is the
1091 * root of the tree.
1092 */
1006 if (PATH_OFFSET_PBUFFER 1093 if (PATH_OFFSET_PBUFFER
1007 (tb->tb_path, 1094 (tb->tb_path,
1008 FIRST_PATH_ELEMENT_OFFSET)->b_blocknr == 1095 FIRST_PATH_ELEMENT_OFFSET)->b_blocknr ==
@@ -1031,12 +1118,15 @@ static int get_far_parent(struct tree_balance *tb,
1031 } 1118 }
1032 } 1119 }
1033 1120
1034 /* So, we got common parent of the current node and its left/right neighbor. 1121 /*
1035 Now we are geting the parent of the left/right neighbor. */ 1122 * So, we got common parent of the current node and its
1123 * left/right neighbor. Now we are getting the parent of the
1124 * left/right neighbor.
1125 */
1036 1126
1037 /* Form key to get parent of the left/right neighbor. */ 1127 /* Form key to get parent of the left/right neighbor. */
1038 le_key2cpu_key(&s_lr_father_key, 1128 le_key2cpu_key(&s_lr_father_key,
1039 B_N_PDELIM_KEY(*pcom_father, 1129 internal_key(*pcom_father,
1040 (c_lr_par == 1130 (c_lr_par ==
1041 LEFT_PARENTS) ? (tb->lkey[h - 1] = 1131 LEFT_PARENTS) ? (tb->lkey[h - 1] =
1042 position - 1132 position -
@@ -1050,7 +1140,7 @@ static int get_far_parent(struct tree_balance *tb,
1050 if (search_by_key 1140 if (search_by_key
1051 (tb->tb_sb, &s_lr_father_key, &s_path_to_neighbor_father, 1141 (tb->tb_sb, &s_lr_father_key, &s_path_to_neighbor_father,
1052 h + 1) == IO_ERROR) 1142 h + 1) == IO_ERROR)
1053 // path is released 1143 /* path is released */
1054 return IO_ERROR; 1144 return IO_ERROR;
1055 1145
1056 if (FILESYSTEM_CHANGED_TB(tb)) { 1146 if (FILESYSTEM_CHANGED_TB(tb)) {
@@ -1071,12 +1161,15 @@ static int get_far_parent(struct tree_balance *tb,
1071 return CARRY_ON; 1161 return CARRY_ON;
1072} 1162}
1073 1163
1074/* Get parents of neighbors of node in the path(S[path_offset]) and common parents of 1164/*
1075 * S[path_offset] and L[path_offset]/R[path_offset]: F[path_offset], FL[path_offset], 1165 * Get parents of neighbors of node in the path(S[path_offset]) and
1076 * FR[path_offset], CFL[path_offset], CFR[path_offset]. 1166 * common parents of S[path_offset] and L[path_offset]/R[path_offset]:
1077 * Calculate numbers of left and right delimiting keys position: lkey[path_offset], rkey[path_offset]. 1167 * F[path_offset], FL[path_offset], FR[path_offset], CFL[path_offset],
1078 * Returns: SCHEDULE_OCCURRED - schedule occurred while the function worked; 1168 * CFR[path_offset].
1079 * CARRY_ON - schedule didn't occur while the function worked; 1169 * Calculate numbers of left and right delimiting keys position:
1170 * lkey[path_offset], rkey[path_offset].
1171 * Returns: SCHEDULE_OCCURRED - schedule occurred while the function worked
1172 * CARRY_ON - schedule didn't occur while the function worked
1080 */ 1173 */
1081static int get_parents(struct tree_balance *tb, int h) 1174static int get_parents(struct tree_balance *tb, int h)
1082{ 1175{
@@ -1088,8 +1181,11 @@ static int get_parents(struct tree_balance *tb, int h)
1088 1181
1089 /* Current node is the root of the tree or will be root of the tree */ 1182 /* Current node is the root of the tree or will be root of the tree */
1090 if (path_offset <= FIRST_PATH_ELEMENT_OFFSET) { 1183 if (path_offset <= FIRST_PATH_ELEMENT_OFFSET) {
1091 /* The root can not have parents. 1184 /*
1092 Release nodes which previously were obtained as parents of the current node neighbors. */ 1185 * The root can not have parents.
1186 * Release nodes which previously were obtained as
1187 * parents of the current node neighbors.
1188 */
1093 brelse(tb->FL[h]); 1189 brelse(tb->FL[h]);
1094 brelse(tb->CFL[h]); 1190 brelse(tb->CFL[h]);
1095 brelse(tb->FR[h]); 1191 brelse(tb->FR[h]);
@@ -1111,10 +1207,14 @@ static int get_parents(struct tree_balance *tb, int h)
1111 get_bh(curf); 1207 get_bh(curf);
1112 tb->lkey[h] = position - 1; 1208 tb->lkey[h] = position - 1;
1113 } else { 1209 } else {
1114 /* Calculate current parent of L[path_offset], which is the left neighbor of the current node. 1210 /*
1115 Calculate current common parent of L[path_offset] and the current node. Note that 1211 * Calculate current parent of L[path_offset], which is the
1116 CFL[path_offset] not equal FL[path_offset] and CFL[path_offset] not equal F[path_offset]. 1212 * left neighbor of the current node. Calculate current
1117 Calculate lkey[path_offset]. */ 1213 * common parent of L[path_offset] and the current node.
1214 * Note that CFL[path_offset] not equal FL[path_offset] and
1215 * CFL[path_offset] not equal F[path_offset].
1216 * Calculate lkey[path_offset].
1217 */
1118 if ((ret = get_far_parent(tb, h + 1, &curf, 1218 if ((ret = get_far_parent(tb, h + 1, &curf,
1119 &curcf, 1219 &curcf,
1120 LEFT_PARENTS)) != CARRY_ON) 1220 LEFT_PARENTS)) != CARRY_ON)
@@ -1130,19 +1230,22 @@ static int get_parents(struct tree_balance *tb, int h)
1130 (curcf && !B_IS_IN_TREE(curcf)), 1230 (curcf && !B_IS_IN_TREE(curcf)),
1131 "PAP-8195: FL (%b) or CFL (%b) is invalid", curf, curcf); 1231 "PAP-8195: FL (%b) or CFL (%b) is invalid", curf, curcf);
1132 1232
1133/* Get parent FR[h] of R[h]. */ 1233 /* Get parent FR[h] of R[h]. */
1134 1234
1135/* Current node is the last child of F[h]. FR[h] != F[h]. */ 1235 /* Current node is the last child of F[h]. FR[h] != F[h]. */
1136 if (position == B_NR_ITEMS(PATH_H_PBUFFER(path, h + 1))) { 1236 if (position == B_NR_ITEMS(PATH_H_PBUFFER(path, h + 1))) {
1137/* Calculate current parent of R[h], which is the right neighbor of F[h]. 1237 /*
1138 Calculate current common parent of R[h] and current node. Note that CFR[h] 1238 * Calculate current parent of R[h], which is the right
1139 not equal FR[path_offset] and CFR[h] not equal F[h]. */ 1239 * neighbor of F[h]. Calculate current common parent of
1240 * R[h] and current node. Note that CFR[h] not equal
1241 * FR[path_offset] and CFR[h] not equal F[h].
1242 */
1140 if ((ret = 1243 if ((ret =
1141 get_far_parent(tb, h + 1, &curf, &curcf, 1244 get_far_parent(tb, h + 1, &curf, &curcf,
1142 RIGHT_PARENTS)) != CARRY_ON) 1245 RIGHT_PARENTS)) != CARRY_ON)
1143 return ret; 1246 return ret;
1144 } else { 1247 } else {
1145/* Current node is not the last child of its parent F[h]. */ 1248 /* Current node is not the last child of its parent F[h]. */
1146 curf = PATH_OFFSET_PBUFFER(path, path_offset - 1); 1249 curf = PATH_OFFSET_PBUFFER(path, path_offset - 1);
1147 curcf = PATH_OFFSET_PBUFFER(path, path_offset - 1); 1250 curcf = PATH_OFFSET_PBUFFER(path, path_offset - 1);
1148 get_bh(curf); 1251 get_bh(curf);
@@ -1165,8 +1268,10 @@ static int get_parents(struct tree_balance *tb, int h)
1165 return CARRY_ON; 1268 return CARRY_ON;
1166} 1269}
1167 1270
1168/* it is possible to remove node as result of shiftings to 1271/*
1169 neighbors even when we insert or paste item. */ 1272 * it is possible to remove node as result of shiftings to
1273 * neighbors even when we insert or paste item.
1274 */
1170static inline int can_node_be_removed(int mode, int lfree, int sfree, int rfree, 1275static inline int can_node_be_removed(int mode, int lfree, int sfree, int rfree,
1171 struct tree_balance *tb, int h) 1276 struct tree_balance *tb, int h)
1172{ 1277{
@@ -1175,21 +1280,22 @@ static inline int can_node_be_removed(int mode, int lfree, int sfree, int rfree,
1175 struct item_head *ih; 1280 struct item_head *ih;
1176 struct reiserfs_key *r_key = NULL; 1281 struct reiserfs_key *r_key = NULL;
1177 1282
1178 ih = B_N_PITEM_HEAD(Sh, 0); 1283 ih = item_head(Sh, 0);
1179 if (tb->CFR[h]) 1284 if (tb->CFR[h])
1180 r_key = B_N_PDELIM_KEY(tb->CFR[h], tb->rkey[h]); 1285 r_key = internal_key(tb->CFR[h], tb->rkey[h]);
1181 1286
1182 if (lfree + rfree + sfree < MAX_CHILD_SIZE(Sh) + levbytes 1287 if (lfree + rfree + sfree < MAX_CHILD_SIZE(Sh) + levbytes
1183 /* shifting may merge items which might save space */ 1288 /* shifting may merge items which might save space */
1184 - 1289 -
1185 ((!h 1290 ((!h
1186 && op_is_left_mergeable(&(ih->ih_key), Sh->b_size)) ? IH_SIZE : 0) 1291 && op_is_left_mergeable(&ih->ih_key, Sh->b_size)) ? IH_SIZE : 0)
1187 - 1292 -
1188 ((!h && r_key 1293 ((!h && r_key
1189 && op_is_left_mergeable(r_key, Sh->b_size)) ? IH_SIZE : 0) 1294 && op_is_left_mergeable(r_key, Sh->b_size)) ? IH_SIZE : 0)
1190 + ((h) ? KEY_SIZE : 0)) { 1295 + ((h) ? KEY_SIZE : 0)) {
1191 /* node can not be removed */ 1296 /* node can not be removed */
1192 if (sfree >= levbytes) { /* new item fits into node S[h] without any shifting */ 1297 if (sfree >= levbytes) {
1298 /* new item fits into node S[h] without any shifting */
1193 if (!h) 1299 if (!h)
1194 tb->s0num = 1300 tb->s0num =
1195 B_NR_ITEMS(Sh) + 1301 B_NR_ITEMS(Sh) +
@@ -1202,7 +1308,8 @@ static inline int can_node_be_removed(int mode, int lfree, int sfree, int rfree,
1202 return !NO_BALANCING_NEEDED; 1308 return !NO_BALANCING_NEEDED;
1203} 1309}
1204 1310
1205/* Check whether current node S[h] is balanced when increasing its size by 1311/*
1312 * Check whether current node S[h] is balanced when increasing its size by
1206 * Inserting or Pasting. 1313 * Inserting or Pasting.
1207 * Calculate parameters for balancing for current level h. 1314 * Calculate parameters for balancing for current level h.
1208 * Parameters: 1315 * Parameters:
@@ -1219,39 +1326,48 @@ static inline int can_node_be_removed(int mode, int lfree, int sfree, int rfree,
1219static int ip_check_balance(struct tree_balance *tb, int h) 1326static int ip_check_balance(struct tree_balance *tb, int h)
1220{ 1327{
1221 struct virtual_node *vn = tb->tb_vn; 1328 struct virtual_node *vn = tb->tb_vn;
1222 int levbytes, /* Number of bytes that must be inserted into (value 1329 /*
1223 is negative if bytes are deleted) buffer which 1330 * Number of bytes that must be inserted into (value is negative
1224 contains node being balanced. The mnemonic is 1331 * if bytes are deleted) buffer which contains node being balanced.
1225 that the attempted change in node space used level 1332 * The mnemonic is that the attempted change in node space used
1226 is levbytes bytes. */ 1333 * level is levbytes bytes.
1227 ret; 1334 */
1335 int levbytes;
1336 int ret;
1228 1337
1229 int lfree, sfree, rfree /* free space in L, S and R */ ; 1338 int lfree, sfree, rfree /* free space in L, S and R */ ;
1230 1339
1231 /* nver is short for number of vertixes, and lnver is the number if 1340 /*
1232 we shift to the left, rnver is the number if we shift to the 1341 * nver is short for number of vertixes, and lnver is the number if
1233 right, and lrnver is the number if we shift in both directions. 1342 * we shift to the left, rnver is the number if we shift to the
1234 The goal is to minimize first the number of vertixes, and second, 1343 * right, and lrnver is the number if we shift in both directions.
1235 the number of vertixes whose contents are changed by shifting, 1344 * The goal is to minimize first the number of vertixes, and second,
1236 and third the number of uncached vertixes whose contents are 1345 * the number of vertixes whose contents are changed by shifting,
1237 changed by shifting and must be read from disk. */ 1346 * and third the number of uncached vertixes whose contents are
1347 * changed by shifting and must be read from disk.
1348 */
1238 int nver, lnver, rnver, lrnver; 1349 int nver, lnver, rnver, lrnver;
1239 1350
1240 /* used at leaf level only, S0 = S[0] is the node being balanced, 1351 /*
1241 sInum [ I = 0,1,2 ] is the number of items that will 1352 * used at leaf level only, S0 = S[0] is the node being balanced,
1242 remain in node SI after balancing. S1 and S2 are new 1353 * sInum [ I = 0,1,2 ] is the number of items that will
1243 nodes that might be created. */ 1354 * remain in node SI after balancing. S1 and S2 are new
1355 * nodes that might be created.
1356 */
1244 1357
1245 /* we perform 8 calls to get_num_ver(). For each call we calculate five parameters. 1358 /*
1246 where 4th parameter is s1bytes and 5th - s2bytes 1359 * we perform 8 calls to get_num_ver(). For each call we
1360 * calculate five parameters. where 4th parameter is s1bytes
1361 * and 5th - s2bytes
1362 *
1363 * s0num, s1num, s2num for 8 cases
1364 * 0,1 - do not shift and do not shift but bottle
1365 * 2 - shift only whole item to left
1366 * 3 - shift to left and bottle as much as possible
1367 * 4,5 - shift to right (whole items and as much as possible
1368 * 6,7 - shift to both directions (whole items and as much as possible)
1247 */ 1369 */
1248 short snum012[40] = { 0, }; /* s0num, s1num, s2num for 8 cases 1370 short snum012[40] = { 0, };
1249 0,1 - do not shift and do not shift but bottle
1250 2 - shift only whole item to left
1251 3 - shift to left and bottle as much as possible
1252 4,5 - shift to right (whole items and as much as possible
1253 6,7 - shift to both directions (whole items and as much as possible)
1254 */
1255 1371
1256 /* Sh is the node whose balance is currently being checked */ 1372 /* Sh is the node whose balance is currently being checked */
1257 struct buffer_head *Sh; 1373 struct buffer_head *Sh;
@@ -1265,9 +1381,10 @@ static int ip_check_balance(struct tree_balance *tb, int h)
1265 reiserfs_panic(tb->tb_sb, "vs-8210", 1381 reiserfs_panic(tb->tb_sb, "vs-8210",
1266 "S[0] can not be 0"); 1382 "S[0] can not be 0");
1267 switch (ret = get_empty_nodes(tb, h)) { 1383 switch (ret = get_empty_nodes(tb, h)) {
1384 /* no balancing for higher levels needed */
1268 case CARRY_ON: 1385 case CARRY_ON:
1269 set_parameters(tb, h, 0, 0, 1, NULL, -1, -1); 1386 set_parameters(tb, h, 0, 0, 1, NULL, -1, -1);
1270 return NO_BALANCING_NEEDED; /* no balancing for higher levels needed */ 1387 return NO_BALANCING_NEEDED;
1271 1388
1272 case NO_DISK_SPACE: 1389 case NO_DISK_SPACE:
1273 case REPEAT_SEARCH: 1390 case REPEAT_SEARCH:
@@ -1278,7 +1395,9 @@ static int ip_check_balance(struct tree_balance *tb, int h)
1278 } 1395 }
1279 } 1396 }
1280 1397
1281 if ((ret = get_parents(tb, h)) != CARRY_ON) /* get parents of S[h] neighbors. */ 1398 /* get parents of S[h] neighbors. */
1399 ret = get_parents(tb, h);
1400 if (ret != CARRY_ON)
1282 return ret; 1401 return ret;
1283 1402
1284 sfree = B_FREE_SPACE(Sh); 1403 sfree = B_FREE_SPACE(Sh);
@@ -1287,38 +1406,44 @@ static int ip_check_balance(struct tree_balance *tb, int h)
1287 rfree = get_rfree(tb, h); 1406 rfree = get_rfree(tb, h);
1288 lfree = get_lfree(tb, h); 1407 lfree = get_lfree(tb, h);
1289 1408
1409 /* and new item fits into node S[h] without any shifting */
1290 if (can_node_be_removed(vn->vn_mode, lfree, sfree, rfree, tb, h) == 1410 if (can_node_be_removed(vn->vn_mode, lfree, sfree, rfree, tb, h) ==
1291 NO_BALANCING_NEEDED) 1411 NO_BALANCING_NEEDED)
1292 /* and new item fits into node S[h] without any shifting */
1293 return NO_BALANCING_NEEDED; 1412 return NO_BALANCING_NEEDED;
1294 1413
1295 create_virtual_node(tb, h); 1414 create_virtual_node(tb, h);
1296 1415
1297 /* 1416 /*
1298 determine maximal number of items we can shift to the left neighbor (in tb structure) 1417 * determine maximal number of items we can shift to the left
1299 and the maximal number of bytes that can flow to the left neighbor 1418 * neighbor (in tb structure) and the maximal number of bytes
1300 from the left most liquid item that cannot be shifted from S[0] entirely (returned value) 1419 * that can flow to the left neighbor from the left most liquid
1420 * item that cannot be shifted from S[0] entirely (returned value)
1301 */ 1421 */
1302 check_left(tb, h, lfree); 1422 check_left(tb, h, lfree);
1303 1423
1304 /* 1424 /*
1305 determine maximal number of items we can shift to the right neighbor (in tb structure) 1425 * determine maximal number of items we can shift to the right
1306 and the maximal number of bytes that can flow to the right neighbor 1426 * neighbor (in tb structure) and the maximal number of bytes
1307 from the right most liquid item that cannot be shifted from S[0] entirely (returned value) 1427 * that can flow to the right neighbor from the right most liquid
1428 * item that cannot be shifted from S[0] entirely (returned value)
1308 */ 1429 */
1309 check_right(tb, h, rfree); 1430 check_right(tb, h, rfree);
1310 1431
1311 /* all contents of internal node S[h] can be moved into its 1432 /*
1312 neighbors, S[h] will be removed after balancing */ 1433 * all contents of internal node S[h] can be moved into its
1434 * neighbors, S[h] will be removed after balancing
1435 */
1313 if (h && (tb->rnum[h] + tb->lnum[h] >= vn->vn_nr_item + 1)) { 1436 if (h && (tb->rnum[h] + tb->lnum[h] >= vn->vn_nr_item + 1)) {
1314 int to_r; 1437 int to_r;
1315 1438
1316 /* Since we are working on internal nodes, and our internal 1439 /*
1317 nodes have fixed size entries, then we can balance by the 1440 * Since we are working on internal nodes, and our internal
1318 number of items rather than the space they consume. In this 1441 * nodes have fixed size entries, then we can balance by the
1319 routine we set the left node equal to the right node, 1442 * number of items rather than the space they consume. In this
1320 allowing a difference of less than or equal to 1 child 1443 * routine we set the left node equal to the right node,
1321 pointer. */ 1444 * allowing a difference of less than or equal to 1 child
1445 * pointer.
1446 */
1322 to_r = 1447 to_r =
1323 ((MAX_NR_KEY(Sh) << 1) + 2 - tb->lnum[h] - tb->rnum[h] + 1448 ((MAX_NR_KEY(Sh) << 1) + 2 - tb->lnum[h] - tb->rnum[h] +
1324 vn->vn_nr_item + 1) / 2 - (MAX_NR_KEY(Sh) + 1 - 1449 vn->vn_nr_item + 1) / 2 - (MAX_NR_KEY(Sh) + 1 -
@@ -1328,7 +1453,10 @@ static int ip_check_balance(struct tree_balance *tb, int h)
1328 return CARRY_ON; 1453 return CARRY_ON;
1329 } 1454 }
1330 1455
1331 /* this checks balance condition, that any two neighboring nodes can not fit in one node */ 1456 /*
1457 * this checks balance condition, that any two neighboring nodes
1458 * can not fit in one node
1459 */
1332 RFALSE(h && 1460 RFALSE(h &&
1333 (tb->lnum[h] >= vn->vn_nr_item + 1 || 1461 (tb->lnum[h] >= vn->vn_nr_item + 1 ||
1334 tb->rnum[h] >= vn->vn_nr_item + 1), 1462 tb->rnum[h] >= vn->vn_nr_item + 1),
@@ -1337,16 +1465,22 @@ static int ip_check_balance(struct tree_balance *tb, int h)
1337 (tb->rnum[h] >= vn->vn_nr_item && (tb->rbytes == -1))), 1465 (tb->rnum[h] >= vn->vn_nr_item && (tb->rbytes == -1))),
1338 "vs-8225: tree is not balanced on leaf level"); 1466 "vs-8225: tree is not balanced on leaf level");
1339 1467
1340 /* all contents of S[0] can be moved into its neighbors 1468 /*
1341 S[0] will be removed after balancing. */ 1469 * all contents of S[0] can be moved into its neighbors
1470 * S[0] will be removed after balancing.
1471 */
1342 if (!h && is_leaf_removable(tb)) 1472 if (!h && is_leaf_removable(tb))
1343 return CARRY_ON; 1473 return CARRY_ON;
1344 1474
1345 /* why do we perform this check here rather than earlier?? 1475 /*
1346 Answer: we can win 1 node in some cases above. Moreover we 1476 * why do we perform this check here rather than earlier??
1347 checked it above, when we checked, that S[0] is not removable 1477 * Answer: we can win 1 node in some cases above. Moreover we
1348 in principle */ 1478 * checked it above, when we checked, that S[0] is not removable
1349 if (sfree >= levbytes) { /* new item fits into node S[h] without any shifting */ 1479 * in principle
1480 */
1481
1482 /* new item fits into node S[h] without any shifting */
1483 if (sfree >= levbytes) {
1350 if (!h) 1484 if (!h)
1351 tb->s0num = vn->vn_nr_item; 1485 tb->s0num = vn->vn_nr_item;
1352 set_parameters(tb, h, 0, 0, 1, NULL, -1, -1); 1486 set_parameters(tb, h, 0, 0, 1, NULL, -1, -1);
@@ -1355,18 +1489,19 @@ static int ip_check_balance(struct tree_balance *tb, int h)
1355 1489
1356 { 1490 {
1357 int lpar, rpar, nset, lset, rset, lrset; 1491 int lpar, rpar, nset, lset, rset, lrset;
1358 /* 1492 /* regular overflowing of the node */
1359 * regular overflowing of the node
1360 */
1361 1493
1362 /* get_num_ver works in 2 modes (FLOW & NO_FLOW) 1494 /*
1363 lpar, rpar - number of items we can shift to left/right neighbor (including splitting item) 1495 * get_num_ver works in 2 modes (FLOW & NO_FLOW)
1364 nset, lset, rset, lrset - shows, whether flowing items give better packing 1496 * lpar, rpar - number of items we can shift to left/right
1497 * neighbor (including splitting item)
1498 * nset, lset, rset, lrset - shows, whether flowing items
1499 * give better packing
1365 */ 1500 */
1366#define FLOW 1 1501#define FLOW 1
1367#define NO_FLOW 0 /* do not any splitting */ 1502#define NO_FLOW 0 /* do not any splitting */
1368 1503
1369 /* we choose one the following */ 1504 /* we choose one of the following */
1370#define NOTHING_SHIFT_NO_FLOW 0 1505#define NOTHING_SHIFT_NO_FLOW 0
1371#define NOTHING_SHIFT_FLOW 5 1506#define NOTHING_SHIFT_FLOW 5
1372#define LEFT_SHIFT_NO_FLOW 10 1507#define LEFT_SHIFT_NO_FLOW 10
@@ -1379,10 +1514,13 @@ static int ip_check_balance(struct tree_balance *tb, int h)
1379 lpar = tb->lnum[h]; 1514 lpar = tb->lnum[h];
1380 rpar = tb->rnum[h]; 1515 rpar = tb->rnum[h];
1381 1516
1382 /* calculate number of blocks S[h] must be split into when 1517 /*
1383 nothing is shifted to the neighbors, 1518 * calculate number of blocks S[h] must be split into when
1384 as well as number of items in each part of the split node (s012 numbers), 1519 * nothing is shifted to the neighbors, as well as number of
1385 and number of bytes (s1bytes) of the shared drop which flow to S1 if any */ 1520 * items in each part of the split node (s012 numbers),
1521 * and number of bytes (s1bytes) of the shared drop which
1522 * flow to S1 if any
1523 */
1386 nset = NOTHING_SHIFT_NO_FLOW; 1524 nset = NOTHING_SHIFT_NO_FLOW;
1387 nver = get_num_ver(vn->vn_mode, tb, h, 1525 nver = get_num_ver(vn->vn_mode, tb, h,
1388 0, -1, h ? vn->vn_nr_item : 0, -1, 1526 0, -1, h ? vn->vn_nr_item : 0, -1,
@@ -1391,7 +1529,10 @@ static int ip_check_balance(struct tree_balance *tb, int h)
1391 if (!h) { 1529 if (!h) {
1392 int nver1; 1530 int nver1;
1393 1531
1394 /* note, that in this case we try to bottle between S[0] and S1 (S1 - the first new node) */ 1532 /*
1533 * note, that in this case we try to bottle
1534 * between S[0] and S1 (S1 - the first new node)
1535 */
1395 nver1 = get_num_ver(vn->vn_mode, tb, h, 1536 nver1 = get_num_ver(vn->vn_mode, tb, h,
1396 0, -1, 0, -1, 1537 0, -1, 0, -1,
1397 snum012 + NOTHING_SHIFT_FLOW, FLOW); 1538 snum012 + NOTHING_SHIFT_FLOW, FLOW);
@@ -1399,11 +1540,13 @@ static int ip_check_balance(struct tree_balance *tb, int h)
1399 nset = NOTHING_SHIFT_FLOW, nver = nver1; 1540 nset = NOTHING_SHIFT_FLOW, nver = nver1;
1400 } 1541 }
1401 1542
1402 /* calculate number of blocks S[h] must be split into when 1543 /*
1403 l_shift_num first items and l_shift_bytes of the right most 1544 * calculate number of blocks S[h] must be split into when
1404 liquid item to be shifted are shifted to the left neighbor, 1545 * l_shift_num first items and l_shift_bytes of the right
1405 as well as number of items in each part of the splitted node (s012 numbers), 1546 * most liquid item to be shifted are shifted to the left
1406 and number of bytes (s1bytes) of the shared drop which flow to S1 if any 1547 * neighbor, as well as number of items in each part of the
1548 * splitted node (s012 numbers), and number of bytes
1549 * (s1bytes) of the shared drop which flow to S1 if any
1407 */ 1550 */
1408 lset = LEFT_SHIFT_NO_FLOW; 1551 lset = LEFT_SHIFT_NO_FLOW;
1409 lnver = get_num_ver(vn->vn_mode, tb, h, 1552 lnver = get_num_ver(vn->vn_mode, tb, h,
@@ -1422,11 +1565,13 @@ static int ip_check_balance(struct tree_balance *tb, int h)
1422 lset = LEFT_SHIFT_FLOW, lnver = lnver1; 1565 lset = LEFT_SHIFT_FLOW, lnver = lnver1;
1423 } 1566 }
1424 1567
1425 /* calculate number of blocks S[h] must be split into when 1568 /*
1426 r_shift_num first items and r_shift_bytes of the left most 1569 * calculate number of blocks S[h] must be split into when
1427 liquid item to be shifted are shifted to the right neighbor, 1570 * r_shift_num first items and r_shift_bytes of the left most
1428 as well as number of items in each part of the splitted node (s012 numbers), 1571 * liquid item to be shifted are shifted to the right neighbor,
1429 and number of bytes (s1bytes) of the shared drop which flow to S1 if any 1572 * as well as number of items in each part of the splitted
1573 * node (s012 numbers), and number of bytes (s1bytes) of the
1574 * shared drop which flow to S1 if any
1430 */ 1575 */
1431 rset = RIGHT_SHIFT_NO_FLOW; 1576 rset = RIGHT_SHIFT_NO_FLOW;
1432 rnver = get_num_ver(vn->vn_mode, tb, h, 1577 rnver = get_num_ver(vn->vn_mode, tb, h,
@@ -1451,10 +1596,12 @@ static int ip_check_balance(struct tree_balance *tb, int h)
1451 rset = RIGHT_SHIFT_FLOW, rnver = rnver1; 1596 rset = RIGHT_SHIFT_FLOW, rnver = rnver1;
1452 } 1597 }
1453 1598
1454 /* calculate number of blocks S[h] must be split into when 1599 /*
1455 items are shifted in both directions, 1600 * calculate number of blocks S[h] must be split into when
1456 as well as number of items in each part of the splitted node (s012 numbers), 1601 * items are shifted in both directions, as well as number
1457 and number of bytes (s1bytes) of the shared drop which flow to S1 if any 1602 * of items in each part of the splitted node (s012 numbers),
1603 * and number of bytes (s1bytes) of the shared drop which
1604 * flow to S1 if any
1458 */ 1605 */
1459 lrset = LR_SHIFT_NO_FLOW; 1606 lrset = LR_SHIFT_NO_FLOW;
1460 lrnver = get_num_ver(vn->vn_mode, tb, h, 1607 lrnver = get_num_ver(vn->vn_mode, tb, h,
@@ -1481,10 +1628,12 @@ static int ip_check_balance(struct tree_balance *tb, int h)
1481 lrset = LR_SHIFT_FLOW, lrnver = lrnver1; 1628 lrset = LR_SHIFT_FLOW, lrnver = lrnver1;
1482 } 1629 }
1483 1630
1484 /* Our general shifting strategy is: 1631 /*
1485 1) to minimized number of new nodes; 1632 * Our general shifting strategy is:
1486 2) to minimized number of neighbors involved in shifting; 1633 * 1) to minimized number of new nodes;
1487 3) to minimized number of disk reads; */ 1634 * 2) to minimized number of neighbors involved in shifting;
1635 * 3) to minimized number of disk reads;
1636 */
1488 1637
1489 /* we can win TWO or ONE nodes by shifting in both directions */ 1638 /* we can win TWO or ONE nodes by shifting in both directions */
1490 if (lrnver < lnver && lrnver < rnver) { 1639 if (lrnver < lnver && lrnver < rnver) {
@@ -1508,42 +1657,59 @@ static int ip_check_balance(struct tree_balance *tb, int h)
1508 return CARRY_ON; 1657 return CARRY_ON;
1509 } 1658 }
1510 1659
1511 /* if shifting doesn't lead to better packing then don't shift */ 1660 /*
1661 * if shifting doesn't lead to better packing
1662 * then don't shift
1663 */
1512 if (nver == lrnver) { 1664 if (nver == lrnver) {
1513 set_parameters(tb, h, 0, 0, nver, snum012 + nset, -1, 1665 set_parameters(tb, h, 0, 0, nver, snum012 + nset, -1,
1514 -1); 1666 -1);
1515 return CARRY_ON; 1667 return CARRY_ON;
1516 } 1668 }
1517 1669
1518 /* now we know that for better packing shifting in only one 1670 /*
1519 direction either to the left or to the right is required */ 1671 * now we know that for better packing shifting in only one
1672 * direction either to the left or to the right is required
1673 */
1520 1674
1521 /* if shifting to the left is better than shifting to the right */ 1675 /*
1676 * if shifting to the left is better than
1677 * shifting to the right
1678 */
1522 if (lnver < rnver) { 1679 if (lnver < rnver) {
1523 SET_PAR_SHIFT_LEFT; 1680 SET_PAR_SHIFT_LEFT;
1524 return CARRY_ON; 1681 return CARRY_ON;
1525 } 1682 }
1526 1683
1527 /* if shifting to the right is better than shifting to the left */ 1684 /*
1685 * if shifting to the right is better than
1686 * shifting to the left
1687 */
1528 if (lnver > rnver) { 1688 if (lnver > rnver) {
1529 SET_PAR_SHIFT_RIGHT; 1689 SET_PAR_SHIFT_RIGHT;
1530 return CARRY_ON; 1690 return CARRY_ON;
1531 } 1691 }
1532 1692
1533 /* now shifting in either direction gives the same number 1693 /*
1534 of nodes and we can make use of the cached neighbors */ 1694 * now shifting in either direction gives the same number
1695 * of nodes and we can make use of the cached neighbors
1696 */
1535 if (is_left_neighbor_in_cache(tb, h)) { 1697 if (is_left_neighbor_in_cache(tb, h)) {
1536 SET_PAR_SHIFT_LEFT; 1698 SET_PAR_SHIFT_LEFT;
1537 return CARRY_ON; 1699 return CARRY_ON;
1538 } 1700 }
1539 1701
1540 /* shift to the right independently on whether the right neighbor in cache or not */ 1702 /*
1703 * shift to the right independently on whether the
1704 * right neighbor in cache or not
1705 */
1541 SET_PAR_SHIFT_RIGHT; 1706 SET_PAR_SHIFT_RIGHT;
1542 return CARRY_ON; 1707 return CARRY_ON;
1543 } 1708 }
1544} 1709}
1545 1710
1546/* Check whether current node S[h] is balanced when Decreasing its size by 1711/*
1712 * Check whether current node S[h] is balanced when Decreasing its size by
1547 * Deleting or Cutting for INTERNAL node of S+tree. 1713 * Deleting or Cutting for INTERNAL node of S+tree.
1548 * Calculate parameters for balancing for current level h. 1714 * Calculate parameters for balancing for current level h.
1549 * Parameters: 1715 * Parameters:
@@ -1563,8 +1729,10 @@ static int dc_check_balance_internal(struct tree_balance *tb, int h)
1563{ 1729{
1564 struct virtual_node *vn = tb->tb_vn; 1730 struct virtual_node *vn = tb->tb_vn;
1565 1731
1566 /* Sh is the node whose balance is currently being checked, 1732 /*
1567 and Fh is its father. */ 1733 * Sh is the node whose balance is currently being checked,
1734 * and Fh is its father.
1735 */
1568 struct buffer_head *Sh, *Fh; 1736 struct buffer_head *Sh, *Fh;
1569 int maxsize, ret; 1737 int maxsize, ret;
1570 int lfree, rfree /* free space in L and R */ ; 1738 int lfree, rfree /* free space in L and R */ ;
@@ -1574,19 +1742,25 @@ static int dc_check_balance_internal(struct tree_balance *tb, int h)
1574 1742
1575 maxsize = MAX_CHILD_SIZE(Sh); 1743 maxsize = MAX_CHILD_SIZE(Sh);
1576 1744
1577/* using tb->insert_size[h], which is negative in this case, create_virtual_node calculates: */ 1745 /*
1578/* new_nr_item = number of items node would have if operation is */ 1746 * using tb->insert_size[h], which is negative in this case,
1579/* performed without balancing (new_nr_item); */ 1747 * create_virtual_node calculates:
1748 * new_nr_item = number of items node would have if operation is
1749 * performed without balancing (new_nr_item);
1750 */
1580 create_virtual_node(tb, h); 1751 create_virtual_node(tb, h);
1581 1752
1582 if (!Fh) { /* S[h] is the root. */ 1753 if (!Fh) { /* S[h] is the root. */
1754 /* no balancing for higher levels needed */
1583 if (vn->vn_nr_item > 0) { 1755 if (vn->vn_nr_item > 0) {
1584 set_parameters(tb, h, 0, 0, 1, NULL, -1, -1); 1756 set_parameters(tb, h, 0, 0, 1, NULL, -1, -1);
1585 return NO_BALANCING_NEEDED; /* no balancing for higher levels needed */ 1757 return NO_BALANCING_NEEDED;
1586 } 1758 }
1587 /* new_nr_item == 0. 1759 /*
1760 * new_nr_item == 0.
1588 * Current root will be deleted resulting in 1761 * Current root will be deleted resulting in
1589 * decrementing the tree height. */ 1762 * decrementing the tree height.
1763 */
1590 set_parameters(tb, h, 0, 0, 0, NULL, -1, -1); 1764 set_parameters(tb, h, 0, 0, 0, NULL, -1, -1);
1591 return CARRY_ON; 1765 return CARRY_ON;
1592 } 1766 }
@@ -1602,12 +1776,18 @@ static int dc_check_balance_internal(struct tree_balance *tb, int h)
1602 check_left(tb, h, lfree); 1776 check_left(tb, h, lfree);
1603 check_right(tb, h, rfree); 1777 check_right(tb, h, rfree);
1604 1778
1605 if (vn->vn_nr_item >= MIN_NR_KEY(Sh)) { /* Balance condition for the internal node is valid. 1779 /*
1606 * In this case we balance only if it leads to better packing. */ 1780 * Balance condition for the internal node is valid.
1607 if (vn->vn_nr_item == MIN_NR_KEY(Sh)) { /* Here we join S[h] with one of its neighbors, 1781 * In this case we balance only if it leads to better packing.
1608 * which is impossible with greater values of new_nr_item. */ 1782 */
1783 if (vn->vn_nr_item >= MIN_NR_KEY(Sh)) {
1784 /*
1785 * Here we join S[h] with one of its neighbors,
1786 * which is impossible with greater values of new_nr_item.
1787 */
1788 if (vn->vn_nr_item == MIN_NR_KEY(Sh)) {
1789 /* All contents of S[h] can be moved to L[h]. */
1609 if (tb->lnum[h] >= vn->vn_nr_item + 1) { 1790 if (tb->lnum[h] >= vn->vn_nr_item + 1) {
1610 /* All contents of S[h] can be moved to L[h]. */
1611 int n; 1791 int n;
1612 int order_L; 1792 int order_L;
1613 1793
@@ -1623,8 +1803,8 @@ static int dc_check_balance_internal(struct tree_balance *tb, int h)
1623 return CARRY_ON; 1803 return CARRY_ON;
1624 } 1804 }
1625 1805
1806 /* All contents of S[h] can be moved to R[h]. */
1626 if (tb->rnum[h] >= vn->vn_nr_item + 1) { 1807 if (tb->rnum[h] >= vn->vn_nr_item + 1) {
1627 /* All contents of S[h] can be moved to R[h]. */
1628 int n; 1808 int n;
1629 int order_R; 1809 int order_R;
1630 1810
@@ -1641,8 +1821,11 @@ static int dc_check_balance_internal(struct tree_balance *tb, int h)
1641 } 1821 }
1642 } 1822 }
1643 1823
1824 /*
1825 * All contents of S[h] can be moved to the neighbors
1826 * (L[h] & R[h]).
1827 */
1644 if (tb->rnum[h] + tb->lnum[h] >= vn->vn_nr_item + 1) { 1828 if (tb->rnum[h] + tb->lnum[h] >= vn->vn_nr_item + 1) {
1645 /* All contents of S[h] can be moved to the neighbors (L[h] & R[h]). */
1646 int to_r; 1829 int to_r;
1647 1830
1648 to_r = 1831 to_r =
@@ -1659,7 +1842,10 @@ static int dc_check_balance_internal(struct tree_balance *tb, int h)
1659 return NO_BALANCING_NEEDED; 1842 return NO_BALANCING_NEEDED;
1660 } 1843 }
1661 1844
1662 /* Current node contain insufficient number of items. Balancing is required. */ 1845 /*
1846 * Current node contain insufficient number of items.
1847 * Balancing is required.
1848 */
1663 /* Check whether we can merge S[h] with left neighbor. */ 1849 /* Check whether we can merge S[h] with left neighbor. */
1664 if (tb->lnum[h] >= vn->vn_nr_item + 1) 1850 if (tb->lnum[h] >= vn->vn_nr_item + 1)
1665 if (is_left_neighbor_in_cache(tb, h) 1851 if (is_left_neighbor_in_cache(tb, h)
@@ -1726,7 +1912,8 @@ static int dc_check_balance_internal(struct tree_balance *tb, int h)
1726 return CARRY_ON; 1912 return CARRY_ON;
1727} 1913}
1728 1914
1729/* Check whether current node S[h] is balanced when Decreasing its size by 1915/*
1916 * Check whether current node S[h] is balanced when Decreasing its size by
1730 * Deleting or Truncating for LEAF node of S+tree. 1917 * Deleting or Truncating for LEAF node of S+tree.
1731 * Calculate parameters for balancing for current level h. 1918 * Calculate parameters for balancing for current level h.
1732 * Parameters: 1919 * Parameters:
@@ -1743,15 +1930,21 @@ static int dc_check_balance_leaf(struct tree_balance *tb, int h)
1743{ 1930{
1744 struct virtual_node *vn = tb->tb_vn; 1931 struct virtual_node *vn = tb->tb_vn;
1745 1932
1746 /* Number of bytes that must be deleted from 1933 /*
1747 (value is negative if bytes are deleted) buffer which 1934 * Number of bytes that must be deleted from
1748 contains node being balanced. The mnemonic is that the 1935 * (value is negative if bytes are deleted) buffer which
1749 attempted change in node space used level is levbytes bytes. */ 1936 * contains node being balanced. The mnemonic is that the
1937 * attempted change in node space used level is levbytes bytes.
1938 */
1750 int levbytes; 1939 int levbytes;
1940
1751 /* the maximal item size */ 1941 /* the maximal item size */
1752 int maxsize, ret; 1942 int maxsize, ret;
1753 /* S0 is the node whose balance is currently being checked, 1943
1754 and F0 is its father. */ 1944 /*
1945 * S0 is the node whose balance is currently being checked,
1946 * and F0 is its father.
1947 */
1755 struct buffer_head *S0, *F0; 1948 struct buffer_head *S0, *F0;
1756 int lfree, rfree /* free space in L and R */ ; 1949 int lfree, rfree /* free space in L and R */ ;
1757 1950
@@ -1784,9 +1977,11 @@ static int dc_check_balance_leaf(struct tree_balance *tb, int h)
1784 if (are_leaves_removable(tb, lfree, rfree)) 1977 if (are_leaves_removable(tb, lfree, rfree))
1785 return CARRY_ON; 1978 return CARRY_ON;
1786 1979
1787 /* determine maximal number of items we can shift to the left/right neighbor 1980 /*
1788 and the maximal number of bytes that can flow to the left/right neighbor 1981 * determine maximal number of items we can shift to the left/right
1789 from the left/right most liquid item that cannot be shifted from S[0] entirely 1982 * neighbor and the maximal number of bytes that can flow to the
1983 * left/right neighbor from the left/right most liquid item that
1984 * cannot be shifted from S[0] entirely
1790 */ 1985 */
1791 check_left(tb, h, lfree); 1986 check_left(tb, h, lfree);
1792 check_right(tb, h, rfree); 1987 check_right(tb, h, rfree);
@@ -1810,7 +2005,10 @@ static int dc_check_balance_leaf(struct tree_balance *tb, int h)
1810 return CARRY_ON; 2005 return CARRY_ON;
1811 } 2006 }
1812 2007
1813 /* All contents of S[0] can be moved to the neighbors (L[0] & R[0]). Set parameters and return */ 2008 /*
2009 * All contents of S[0] can be moved to the neighbors (L[0] & R[0]).
2010 * Set parameters and return
2011 */
1814 if (is_leaf_removable(tb)) 2012 if (is_leaf_removable(tb))
1815 return CARRY_ON; 2013 return CARRY_ON;
1816 2014
@@ -1820,7 +2018,8 @@ static int dc_check_balance_leaf(struct tree_balance *tb, int h)
1820 return NO_BALANCING_NEEDED; 2018 return NO_BALANCING_NEEDED;
1821} 2019}
1822 2020
1823/* Check whether current node S[h] is balanced when Decreasing its size by 2021/*
2022 * Check whether current node S[h] is balanced when Decreasing its size by
1824 * Deleting or Cutting. 2023 * Deleting or Cutting.
1825 * Calculate parameters for balancing for current level h. 2024 * Calculate parameters for balancing for current level h.
1826 * Parameters: 2025 * Parameters:
@@ -1844,15 +2043,16 @@ static int dc_check_balance(struct tree_balance *tb, int h)
1844 return dc_check_balance_leaf(tb, h); 2043 return dc_check_balance_leaf(tb, h);
1845} 2044}
1846 2045
1847/* Check whether current node S[h] is balanced. 2046/*
2047 * Check whether current node S[h] is balanced.
1848 * Calculate parameters for balancing for current level h. 2048 * Calculate parameters for balancing for current level h.
1849 * Parameters: 2049 * Parameters:
1850 * 2050 *
1851 * tb tree_balance structure: 2051 * tb tree_balance structure:
1852 * 2052 *
1853 * tb is a large structure that must be read about in the header file 2053 * tb is a large structure that must be read about in the header
1854 * at the same time as this procedure if the reader is to successfully 2054 * file at the same time as this procedure if the reader is
1855 * understand this procedure 2055 * to successfully understand this procedure
1856 * 2056 *
1857 * h current level of the node; 2057 * h current level of the node;
1858 * inum item number in S[h]; 2058 * inum item number in S[h];
@@ -1882,8 +2082,8 @@ static int check_balance(int mode,
1882 RFALSE(mode == M_INSERT && !vn->vn_ins_ih, 2082 RFALSE(mode == M_INSERT && !vn->vn_ins_ih,
1883 "vs-8255: ins_ih can not be 0 in insert mode"); 2083 "vs-8255: ins_ih can not be 0 in insert mode");
1884 2084
2085 /* Calculate balance parameters when size of node is increasing. */
1885 if (tb->insert_size[h] > 0) 2086 if (tb->insert_size[h] > 0)
1886 /* Calculate balance parameters when size of node is increasing. */
1887 return ip_check_balance(tb, h); 2087 return ip_check_balance(tb, h);
1888 2088
1889 /* Calculate balance parameters when size of node is decreasing. */ 2089 /* Calculate balance parameters when size of node is decreasing. */
@@ -1911,21 +2111,23 @@ static int get_direct_parent(struct tree_balance *tb, int h)
1911 PATH_OFFSET_POSITION(path, path_offset - 1) = 0; 2111 PATH_OFFSET_POSITION(path, path_offset - 1) = 0;
1912 return CARRY_ON; 2112 return CARRY_ON;
1913 } 2113 }
1914 return REPEAT_SEARCH; /* Root is changed and we must recalculate the path. */ 2114 /* Root is changed and we must recalculate the path. */
2115 return REPEAT_SEARCH;
1915 } 2116 }
1916 2117
2118 /* Parent in the path is not in the tree. */
1917 if (!B_IS_IN_TREE 2119 if (!B_IS_IN_TREE
1918 (bh = PATH_OFFSET_PBUFFER(path, path_offset - 1))) 2120 (bh = PATH_OFFSET_PBUFFER(path, path_offset - 1)))
1919 return REPEAT_SEARCH; /* Parent in the path is not in the tree. */ 2121 return REPEAT_SEARCH;
1920 2122
1921 if ((position = 2123 if ((position =
1922 PATH_OFFSET_POSITION(path, 2124 PATH_OFFSET_POSITION(path,
1923 path_offset - 1)) > B_NR_ITEMS(bh)) 2125 path_offset - 1)) > B_NR_ITEMS(bh))
1924 return REPEAT_SEARCH; 2126 return REPEAT_SEARCH;
1925 2127
2128 /* Parent in the path is not parent of the current node in the tree. */
1926 if (B_N_CHILD_NUM(bh, position) != 2129 if (B_N_CHILD_NUM(bh, position) !=
1927 PATH_OFFSET_PBUFFER(path, path_offset)->b_blocknr) 2130 PATH_OFFSET_PBUFFER(path, path_offset)->b_blocknr)
1928 /* Parent in the path is not parent of the current node in the tree. */
1929 return REPEAT_SEARCH; 2131 return REPEAT_SEARCH;
1930 2132
1931 if (buffer_locked(bh)) { 2133 if (buffer_locked(bh)) {
@@ -1936,10 +2138,15 @@ static int get_direct_parent(struct tree_balance *tb, int h)
1936 return REPEAT_SEARCH; 2138 return REPEAT_SEARCH;
1937 } 2139 }
1938 2140
1939 return CARRY_ON; /* Parent in the path is unlocked and really parent of the current node. */ 2141 /*
2142 * Parent in the path is unlocked and really parent
2143 * of the current node.
2144 */
2145 return CARRY_ON;
1940} 2146}
1941 2147
1942/* Using lnum[h] and rnum[h] we should determine what neighbors 2148/*
2149 * Using lnum[h] and rnum[h] we should determine what neighbors
1943 * of S[h] we 2150 * of S[h] we
1944 * need in order to balance S[h], and get them if necessary. 2151 * need in order to balance S[h], and get them if necessary.
1945 * Returns: SCHEDULE_OCCURRED - schedule occurred while the function worked; 2152 * Returns: SCHEDULE_OCCURRED - schedule occurred while the function worked;
@@ -1997,7 +2204,7 @@ static int get_neighbors(struct tree_balance *tb, int h)
1997 } 2204 }
1998 2205
1999 /* We need right neighbor to balance S[path_offset]. */ 2206 /* We need right neighbor to balance S[path_offset]. */
2000 if (tb->rnum[h]) { /* We need right neighbor to balance S[path_offset]. */ 2207 if (tb->rnum[h]) {
2001 PROC_INFO_INC(sb, need_r_neighbor[h]); 2208 PROC_INFO_INC(sb, need_r_neighbor[h]);
2002 bh = PATH_OFFSET_PBUFFER(tb->tb_path, path_offset); 2209 bh = PATH_OFFSET_PBUFFER(tb->tb_path, path_offset);
2003 2210
@@ -2053,9 +2260,11 @@ static int get_virtual_node_size(struct super_block *sb, struct buffer_head *bh)
2053 (max_num_of_entries - 1) * sizeof(__u16)); 2260 (max_num_of_entries - 1) * sizeof(__u16));
2054} 2261}
2055 2262
2056/* maybe we should fail balancing we are going to perform when kmalloc 2263/*
2057 fails several times. But now it will loop until kmalloc gets 2264 * maybe we should fail balancing we are going to perform when kmalloc
2058 required memory */ 2265 * fails several times. But now it will loop until kmalloc gets
2266 * required memory
2267 */
2059static int get_mem_for_virtual_node(struct tree_balance *tb) 2268static int get_mem_for_virtual_node(struct tree_balance *tb)
2060{ 2269{
2061 int check_fs = 0; 2270 int check_fs = 0;
@@ -2064,8 +2273,8 @@ static int get_mem_for_virtual_node(struct tree_balance *tb)
2064 2273
2065 size = get_virtual_node_size(tb->tb_sb, PATH_PLAST_BUFFER(tb->tb_path)); 2274 size = get_virtual_node_size(tb->tb_sb, PATH_PLAST_BUFFER(tb->tb_path));
2066 2275
2276 /* we have to allocate more memory for virtual node */
2067 if (size > tb->vn_buf_size) { 2277 if (size > tb->vn_buf_size) {
2068 /* we have to allocate more memory for virtual node */
2069 if (tb->vn_buf) { 2278 if (tb->vn_buf) {
2070 /* free memory allocated before */ 2279 /* free memory allocated before */
2071 kfree(tb->vn_buf); 2280 kfree(tb->vn_buf);
@@ -2079,10 +2288,12 @@ static int get_mem_for_virtual_node(struct tree_balance *tb)
2079 /* get memory for virtual item */ 2288 /* get memory for virtual item */
2080 buf = kmalloc(size, GFP_ATOMIC | __GFP_NOWARN); 2289 buf = kmalloc(size, GFP_ATOMIC | __GFP_NOWARN);
2081 if (!buf) { 2290 if (!buf) {
2082 /* getting memory with GFP_KERNEL priority may involve 2291 /*
2083 balancing now (due to indirect_to_direct conversion on 2292 * getting memory with GFP_KERNEL priority may involve
2084 dcache shrinking). So, release path and collected 2293 * balancing now (due to indirect_to_direct conversion
2085 resources here */ 2294 * on dcache shrinking). So, release path and collected
2295 * resources here
2296 */
2086 free_buffers_in_tb(tb); 2297 free_buffers_in_tb(tb);
2087 buf = kmalloc(size, GFP_NOFS); 2298 buf = kmalloc(size, GFP_NOFS);
2088 if (!buf) { 2299 if (!buf) {
@@ -2168,8 +2379,10 @@ static int wait_tb_buffers_until_unlocked(struct tree_balance *tb)
2168 for (i = tb->tb_path->path_length; 2379 for (i = tb->tb_path->path_length;
2169 !locked && i > ILLEGAL_PATH_ELEMENT_OFFSET; i--) { 2380 !locked && i > ILLEGAL_PATH_ELEMENT_OFFSET; i--) {
2170 if (PATH_OFFSET_PBUFFER(tb->tb_path, i)) { 2381 if (PATH_OFFSET_PBUFFER(tb->tb_path, i)) {
2171 /* if I understand correctly, we can only be sure the last buffer 2382 /*
2172 ** in the path is in the tree --clm 2383 * if I understand correctly, we can only
2384 * be sure the last buffer in the path is
2385 * in the tree --clm
2173 */ 2386 */
2174#ifdef CONFIG_REISERFS_CHECK 2387#ifdef CONFIG_REISERFS_CHECK
2175 if (PATH_PLAST_BUFFER(tb->tb_path) == 2388 if (PATH_PLAST_BUFFER(tb->tb_path) ==
@@ -2256,13 +2469,15 @@ static int wait_tb_buffers_until_unlocked(struct tree_balance *tb)
2256 } 2469 }
2257 } 2470 }
2258 } 2471 }
2259 /* as far as I can tell, this is not required. The FEB list seems 2472
2260 ** to be full of newly allocated nodes, which will never be locked, 2473 /*
2261 ** dirty, or anything else. 2474 * as far as I can tell, this is not required. The FEB list
2262 ** To be safe, I'm putting in the checks and waits in. For the moment, 2475 * seems to be full of newly allocated nodes, which will
2263 ** they are needed to keep the code in journal.c from complaining 2476 * never be locked, dirty, or anything else.
2264 ** about the buffer. That code is inside CONFIG_REISERFS_CHECK as well. 2477 * To be safe, I'm putting in the checks and waits in.
2265 ** --clm 2478 * For the moment, they are needed to keep the code in
2479 * journal.c from complaining about the buffer.
2480 * That code is inside CONFIG_REISERFS_CHECK as well. --clm
2266 */ 2481 */
2267 for (i = 0; !locked && i < MAX_FEB_SIZE; i++) { 2482 for (i = 0; !locked && i < MAX_FEB_SIZE; i++) {
2268 if (tb->FEB[i]) { 2483 if (tb->FEB[i]) {
@@ -2300,7 +2515,8 @@ static int wait_tb_buffers_until_unlocked(struct tree_balance *tb)
2300 return CARRY_ON; 2515 return CARRY_ON;
2301} 2516}
2302 2517
2303/* Prepare for balancing, that is 2518/*
2519 * Prepare for balancing, that is
2304 * get all necessary parents, and neighbors; 2520 * get all necessary parents, and neighbors;
2305 * analyze what and where should be moved; 2521 * analyze what and where should be moved;
2306 * get sufficient number of new nodes; 2522 * get sufficient number of new nodes;
@@ -2309,13 +2525,14 @@ static int wait_tb_buffers_until_unlocked(struct tree_balance *tb)
2309 * When ported to SMP kernels, only at the last moment after all needed nodes 2525 * When ported to SMP kernels, only at the last moment after all needed nodes
2310 * are collected in cache, will the resources be locked using the usual 2526 * are collected in cache, will the resources be locked using the usual
2311 * textbook ordered lock acquisition algorithms. Note that ensuring that 2527 * textbook ordered lock acquisition algorithms. Note that ensuring that
2312 * this code neither write locks what it does not need to write lock nor locks out of order 2528 * this code neither write locks what it does not need to write lock nor locks
2313 * will be a pain in the butt that could have been avoided. Grumble grumble. -Hans 2529 * out of order will be a pain in the butt that could have been avoided.
2530 * Grumble grumble. -Hans
2314 * 2531 *
2315 * fix is meant in the sense of render unchanging 2532 * fix is meant in the sense of render unchanging
2316 * 2533 *
2317 * Latency might be improved by first gathering a list of what buffers are needed 2534 * Latency might be improved by first gathering a list of what buffers
2318 * and then getting as many of them in parallel as possible? -Hans 2535 * are needed and then getting as many of them in parallel as possible? -Hans
2319 * 2536 *
2320 * Parameters: 2537 * Parameters:
2321 * op_mode i - insert, d - delete, c - cut (truncate), p - paste (append) 2538 * op_mode i - insert, d - delete, c - cut (truncate), p - paste (append)
@@ -2335,8 +2552,9 @@ int fix_nodes(int op_mode, struct tree_balance *tb,
2335 int ret, h, item_num = PATH_LAST_POSITION(tb->tb_path); 2552 int ret, h, item_num = PATH_LAST_POSITION(tb->tb_path);
2336 int pos_in_item; 2553 int pos_in_item;
2337 2554
2338 /* we set wait_tb_buffers_run when we have to restore any dirty bits cleared 2555 /*
2339 ** during wait_tb_buffers_run 2556 * we set wait_tb_buffers_run when we have to restore any dirty
2557 * bits cleared during wait_tb_buffers_run
2340 */ 2558 */
2341 int wait_tb_buffers_run = 0; 2559 int wait_tb_buffers_run = 0;
2342 struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path); 2560 struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path);
@@ -2347,14 +2565,15 @@ int fix_nodes(int op_mode, struct tree_balance *tb,
2347 2565
2348 tb->fs_gen = get_generation(tb->tb_sb); 2566 tb->fs_gen = get_generation(tb->tb_sb);
2349 2567
2350 /* we prepare and log the super here so it will already be in the 2568 /*
2351 ** transaction when do_balance needs to change it. 2569 * we prepare and log the super here so it will already be in the
2352 ** This way do_balance won't have to schedule when trying to prepare 2570 * transaction when do_balance needs to change it.
2353 ** the super for logging 2571 * This way do_balance won't have to schedule when trying to prepare
2572 * the super for logging
2354 */ 2573 */
2355 reiserfs_prepare_for_journal(tb->tb_sb, 2574 reiserfs_prepare_for_journal(tb->tb_sb,
2356 SB_BUFFER_WITH_SB(tb->tb_sb), 1); 2575 SB_BUFFER_WITH_SB(tb->tb_sb), 1);
2357 journal_mark_dirty(tb->transaction_handle, tb->tb_sb, 2576 journal_mark_dirty(tb->transaction_handle,
2358 SB_BUFFER_WITH_SB(tb->tb_sb)); 2577 SB_BUFFER_WITH_SB(tb->tb_sb));
2359 if (FILESYSTEM_CHANGED_TB(tb)) 2578 if (FILESYSTEM_CHANGED_TB(tb))
2360 return REPEAT_SEARCH; 2579 return REPEAT_SEARCH;
@@ -2408,7 +2627,7 @@ int fix_nodes(int op_mode, struct tree_balance *tb,
2408#endif 2627#endif
2409 2628
2410 if (get_mem_for_virtual_node(tb) == REPEAT_SEARCH) 2629 if (get_mem_for_virtual_node(tb) == REPEAT_SEARCH)
2411 // FIXME: maybe -ENOMEM when tb->vn_buf == 0? Now just repeat 2630 /* FIXME: maybe -ENOMEM when tb->vn_buf == 0? Now just repeat */
2412 return REPEAT_SEARCH; 2631 return REPEAT_SEARCH;
2413 2632
2414 /* Starting from the leaf level; for all levels h of the tree. */ 2633 /* Starting from the leaf level; for all levels h of the tree. */
@@ -2427,7 +2646,10 @@ int fix_nodes(int op_mode, struct tree_balance *tb,
2427 goto repeat; 2646 goto repeat;
2428 if (h != MAX_HEIGHT - 1) 2647 if (h != MAX_HEIGHT - 1)
2429 tb->insert_size[h + 1] = 0; 2648 tb->insert_size[h + 1] = 0;
2430 /* ok, analysis and resource gathering are complete */ 2649 /*
2650 * ok, analysis and resource gathering
2651 * are complete
2652 */
2431 break; 2653 break;
2432 } 2654 }
2433 goto repeat; 2655 goto repeat;
@@ -2437,15 +2659,19 @@ int fix_nodes(int op_mode, struct tree_balance *tb,
2437 if (ret != CARRY_ON) 2659 if (ret != CARRY_ON)
2438 goto repeat; 2660 goto repeat;
2439 2661
2440 /* No disk space, or schedule occurred and analysis may be 2662 /*
2441 * invalid and needs to be redone. */ 2663 * No disk space, or schedule occurred and analysis may be
2664 * invalid and needs to be redone.
2665 */
2442 ret = get_empty_nodes(tb, h); 2666 ret = get_empty_nodes(tb, h);
2443 if (ret != CARRY_ON) 2667 if (ret != CARRY_ON)
2444 goto repeat; 2668 goto repeat;
2445 2669
2670 /*
2671 * We have a positive insert size but no nodes exist on this
2672 * level, this means that we are creating a new root.
2673 */
2446 if (!PATH_H_PBUFFER(tb->tb_path, h)) { 2674 if (!PATH_H_PBUFFER(tb->tb_path, h)) {
2447 /* We have a positive insert size but no nodes exist on this
2448 level, this means that we are creating a new root. */
2449 2675
2450 RFALSE(tb->blknum[h] != 1, 2676 RFALSE(tb->blknum[h] != 1,
2451 "PAP-8350: creating new empty root"); 2677 "PAP-8350: creating new empty root");
@@ -2453,11 +2679,13 @@ int fix_nodes(int op_mode, struct tree_balance *tb,
2453 if (h < MAX_HEIGHT - 1) 2679 if (h < MAX_HEIGHT - 1)
2454 tb->insert_size[h + 1] = 0; 2680 tb->insert_size[h + 1] = 0;
2455 } else if (!PATH_H_PBUFFER(tb->tb_path, h + 1)) { 2681 } else if (!PATH_H_PBUFFER(tb->tb_path, h + 1)) {
2682 /*
2683 * The tree needs to be grown, so this node S[h]
2684 * which is the root node is split into two nodes,
2685 * and a new node (S[h+1]) will be created to
2686 * become the root node.
2687 */
2456 if (tb->blknum[h] > 1) { 2688 if (tb->blknum[h] > 1) {
2457 /* The tree needs to be grown, so this node S[h]
2458 which is the root node is split into two nodes,
2459 and a new node (S[h+1]) will be created to
2460 become the root node. */
2461 2689
2462 RFALSE(h == MAX_HEIGHT - 1, 2690 RFALSE(h == MAX_HEIGHT - 1,
2463 "PAP-8355: attempt to create too high of a tree"); 2691 "PAP-8355: attempt to create too high of a tree");
@@ -2487,12 +2715,14 @@ int fix_nodes(int op_mode, struct tree_balance *tb,
2487 goto repeat; 2715 goto repeat;
2488 } 2716 }
2489 2717
2490 repeat: 2718repeat:
2491 // fix_nodes was unable to perform its calculation due to 2719 /*
2492 // filesystem got changed under us, lack of free disk space or i/o 2720 * fix_nodes was unable to perform its calculation due to
2493 // failure. If the first is the case - the search will be 2721 * filesystem got changed under us, lack of free disk space or i/o
2494 // repeated. For now - free all resources acquired so far except 2722 * failure. If the first is the case - the search will be
2495 // for the new allocated nodes 2723 * repeated. For now - free all resources acquired so far except
2724 * for the new allocated nodes
2725 */
2496 { 2726 {
2497 int i; 2727 int i;
2498 2728
@@ -2548,8 +2778,6 @@ int fix_nodes(int op_mode, struct tree_balance *tb,
2548 2778
2549} 2779}
2550 2780
2551/* Anatoly will probably forgive me renaming tb to tb. I just
2552 wanted to make lines shorter */
2553void unfix_nodes(struct tree_balance *tb) 2781void unfix_nodes(struct tree_balance *tb)
2554{ 2782{
2555 int i; 2783 int i;
@@ -2578,8 +2806,10 @@ void unfix_nodes(struct tree_balance *tb)
2578 for (i = 0; i < MAX_FEB_SIZE; i++) { 2806 for (i = 0; i < MAX_FEB_SIZE; i++) {
2579 if (tb->FEB[i]) { 2807 if (tb->FEB[i]) {
2580 b_blocknr_t blocknr = tb->FEB[i]->b_blocknr; 2808 b_blocknr_t blocknr = tb->FEB[i]->b_blocknr;
2581 /* de-allocated block which was not used by balancing and 2809 /*
2582 bforget about buffer for it */ 2810 * de-allocated block which was not used by
2811 * balancing and bforget about buffer for it
2812 */
2583 brelse(tb->FEB[i]); 2813 brelse(tb->FEB[i]);
2584 reiserfs_free_block(tb->transaction_handle, NULL, 2814 reiserfs_free_block(tb->transaction_handle, NULL,
2585 blocknr, 0); 2815 blocknr, 0);
diff --git a/fs/reiserfs/hashes.c b/fs/reiserfs/hashes.c
index 91b0cc1242a2..7a26c4fe6c46 100644
--- a/fs/reiserfs/hashes.c
+++ b/fs/reiserfs/hashes.c
@@ -12,12 +12,6 @@
12 * Yura's function is added (04/07/2000) 12 * Yura's function is added (04/07/2000)
13 */ 13 */
14 14
15//
16// keyed_hash
17// yura_hash
18// r5_hash
19//
20
21#include <linux/kernel.h> 15#include <linux/kernel.h>
22#include "reiserfs.h" 16#include "reiserfs.h"
23#include <asm/types.h> 17#include <asm/types.h>
@@ -56,7 +50,7 @@ u32 keyed_hash(const signed char *msg, int len)
56 u32 pad; 50 u32 pad;
57 int i; 51 int i;
58 52
59 // assert(len >= 0 && len < 256); 53 /* assert(len >= 0 && len < 256); */
60 54
61 pad = (u32) len | ((u32) len << 8); 55 pad = (u32) len | ((u32) len << 8);
62 pad |= pad << 16; 56 pad |= pad << 16;
@@ -127,9 +121,10 @@ u32 keyed_hash(const signed char *msg, int len)
127 return h0 ^ h1; 121 return h0 ^ h1;
128} 122}
129 123
130/* What follows in this file is copyright 2000 by Hans Reiser, and the 124/*
131 * licensing of what follows is governed by reiserfs/README */ 125 * What follows in this file is copyright 2000 by Hans Reiser, and the
132 126 * licensing of what follows is governed by reiserfs/README
127 */
133u32 yura_hash(const signed char *msg, int len) 128u32 yura_hash(const signed char *msg, int len)
134{ 129{
135 int j, pow; 130 int j, pow;
diff --git a/fs/reiserfs/ibalance.c b/fs/reiserfs/ibalance.c
index e1978fd895f5..73231b1ebdbe 100644
--- a/fs/reiserfs/ibalance.c
+++ b/fs/reiserfs/ibalance.c
@@ -12,7 +12,10 @@
12int balance_internal(struct tree_balance *, 12int balance_internal(struct tree_balance *,
13 int, int, struct item_head *, struct buffer_head **); 13 int, int, struct item_head *, struct buffer_head **);
14 14
15/* modes of internal_shift_left, internal_shift_right and internal_insert_childs */ 15/*
16 * modes of internal_shift_left, internal_shift_right and
17 * internal_insert_childs
18 */
16#define INTERNAL_SHIFT_FROM_S_TO_L 0 19#define INTERNAL_SHIFT_FROM_S_TO_L 0
17#define INTERNAL_SHIFT_FROM_R_TO_S 1 20#define INTERNAL_SHIFT_FROM_R_TO_S 1
18#define INTERNAL_SHIFT_FROM_L_TO_S 2 21#define INTERNAL_SHIFT_FROM_L_TO_S 2
@@ -32,7 +35,9 @@ static void internal_define_dest_src_infos(int shift_mode,
32 memset(src_bi, 0, sizeof(struct buffer_info)); 35 memset(src_bi, 0, sizeof(struct buffer_info));
33 /* define dest, src, dest parent, dest position */ 36 /* define dest, src, dest parent, dest position */
34 switch (shift_mode) { 37 switch (shift_mode) {
35 case INTERNAL_SHIFT_FROM_S_TO_L: /* used in internal_shift_left */ 38
39 /* used in internal_shift_left */
40 case INTERNAL_SHIFT_FROM_S_TO_L:
36 src_bi->tb = tb; 41 src_bi->tb = tb;
37 src_bi->bi_bh = PATH_H_PBUFFER(tb->tb_path, h); 42 src_bi->bi_bh = PATH_H_PBUFFER(tb->tb_path, h);
38 src_bi->bi_parent = PATH_H_PPARENT(tb->tb_path, h); 43 src_bi->bi_parent = PATH_H_PPARENT(tb->tb_path, h);
@@ -52,12 +57,14 @@ static void internal_define_dest_src_infos(int shift_mode,
52 dest_bi->tb = tb; 57 dest_bi->tb = tb;
53 dest_bi->bi_bh = PATH_H_PBUFFER(tb->tb_path, h); 58 dest_bi->bi_bh = PATH_H_PBUFFER(tb->tb_path, h);
54 dest_bi->bi_parent = PATH_H_PPARENT(tb->tb_path, h); 59 dest_bi->bi_parent = PATH_H_PPARENT(tb->tb_path, h);
55 dest_bi->bi_position = PATH_H_POSITION(tb->tb_path, h + 1); /* dest position is analog of dest->b_item_order */ 60 /* dest position is analog of dest->b_item_order */
61 dest_bi->bi_position = PATH_H_POSITION(tb->tb_path, h + 1);
56 *d_key = tb->lkey[h]; 62 *d_key = tb->lkey[h];
57 *cf = tb->CFL[h]; 63 *cf = tb->CFL[h];
58 break; 64 break;
59 65
60 case INTERNAL_SHIFT_FROM_R_TO_S: /* used in internal_shift_left */ 66 /* used in internal_shift_left */
67 case INTERNAL_SHIFT_FROM_R_TO_S:
61 src_bi->tb = tb; 68 src_bi->tb = tb;
62 src_bi->bi_bh = tb->R[h]; 69 src_bi->bi_bh = tb->R[h];
63 src_bi->bi_parent = tb->FR[h]; 70 src_bi->bi_parent = tb->FR[h];
@@ -111,7 +118,8 @@ static void internal_define_dest_src_infos(int shift_mode,
111 } 118 }
112} 119}
113 120
114/* Insert count node pointers into buffer cur before position to + 1. 121/*
122 * Insert count node pointers into buffer cur before position to + 1.
115 * Insert count items into buffer cur before position to. 123 * Insert count items into buffer cur before position to.
116 * Items and node pointers are specified by inserted and bh respectively. 124 * Items and node pointers are specified by inserted and bh respectively.
117 */ 125 */
@@ -146,14 +154,14 @@ static void internal_insert_childs(struct buffer_info *cur_bi,
146 154
147 /* copy to_be_insert disk children */ 155 /* copy to_be_insert disk children */
148 for (i = 0; i < count; i++) { 156 for (i = 0; i < count; i++) {
149 put_dc_size(&(new_dc[i]), 157 put_dc_size(&new_dc[i],
150 MAX_CHILD_SIZE(bh[i]) - B_FREE_SPACE(bh[i])); 158 MAX_CHILD_SIZE(bh[i]) - B_FREE_SPACE(bh[i]));
151 put_dc_block_number(&(new_dc[i]), bh[i]->b_blocknr); 159 put_dc_block_number(&new_dc[i], bh[i]->b_blocknr);
152 } 160 }
153 memcpy(dc, new_dc, DC_SIZE * count); 161 memcpy(dc, new_dc, DC_SIZE * count);
154 162
155 /* prepare space for count items */ 163 /* prepare space for count items */
156 ih = B_N_PDELIM_KEY(cur, ((to == -1) ? 0 : to)); 164 ih = internal_key(cur, ((to == -1) ? 0 : to));
157 165
158 memmove(ih + count, ih, 166 memmove(ih + count, ih,
159 (nr - to) * KEY_SIZE + (nr + 1 + count) * DC_SIZE); 167 (nr - to) * KEY_SIZE + (nr + 1 + count) * DC_SIZE);
@@ -190,8 +198,10 @@ static void internal_insert_childs(struct buffer_info *cur_bi,
190 198
191} 199}
192 200
193/* Delete del_num items and node pointers from buffer cur starting from * 201/*
194 * the first_i'th item and first_p'th pointers respectively. */ 202 * Delete del_num items and node pointers from buffer cur starting from
203 * the first_i'th item and first_p'th pointers respectively.
204 */
195static void internal_delete_pointers_items(struct buffer_info *cur_bi, 205static void internal_delete_pointers_items(struct buffer_info *cur_bi,
196 int first_p, 206 int first_p,
197 int first_i, int del_num) 207 int first_i, int del_num)
@@ -233,7 +243,7 @@ static void internal_delete_pointers_items(struct buffer_info *cur_bi,
233 dc = B_N_CHILD(cur, first_p); 243 dc = B_N_CHILD(cur, first_p);
234 244
235 memmove(dc, dc + del_num, (nr + 1 - first_p - del_num) * DC_SIZE); 245 memmove(dc, dc + del_num, (nr + 1 - first_p - del_num) * DC_SIZE);
236 key = B_N_PDELIM_KEY(cur, first_i); 246 key = internal_key(cur, first_i);
237 memmove(key, key + del_num, 247 memmove(key, key + del_num,
238 (nr - first_i - del_num) * KEY_SIZE + (nr + 1 - 248 (nr - first_i - del_num) * KEY_SIZE + (nr + 1 -
239 del_num) * DC_SIZE); 249 del_num) * DC_SIZE);
@@ -270,22 +280,30 @@ static void internal_delete_childs(struct buffer_info *cur_bi, int from, int n)
270 280
271 i_from = (from == 0) ? from : from - 1; 281 i_from = (from == 0) ? from : from - 1;
272 282
273 /* delete n pointers starting from `from' position in CUR; 283 /*
274 delete n keys starting from 'i_from' position in CUR; 284 * delete n pointers starting from `from' position in CUR;
285 * delete n keys starting from 'i_from' position in CUR;
275 */ 286 */
276 internal_delete_pointers_items(cur_bi, from, i_from, n); 287 internal_delete_pointers_items(cur_bi, from, i_from, n);
277} 288}
278 289
279/* copy cpy_num node pointers and cpy_num - 1 items from buffer src to buffer dest 290/*
280* last_first == FIRST_TO_LAST means, that we copy first items from src to tail of dest 291 * copy cpy_num node pointers and cpy_num - 1 items from buffer src to buffer
281 * last_first == LAST_TO_FIRST means, that we copy last items from src to head of dest 292 * dest
293 * last_first == FIRST_TO_LAST means that we copy first items
294 * from src to tail of dest
295 * last_first == LAST_TO_FIRST means that we copy last items
296 * from src to head of dest
282 */ 297 */
283static void internal_copy_pointers_items(struct buffer_info *dest_bi, 298static void internal_copy_pointers_items(struct buffer_info *dest_bi,
284 struct buffer_head *src, 299 struct buffer_head *src,
285 int last_first, int cpy_num) 300 int last_first, int cpy_num)
286{ 301{
287 /* ATTENTION! Number of node pointers in DEST is equal to number of items in DEST * 302 /*
288 * as delimiting key have already inserted to buffer dest.*/ 303 * ATTENTION! Number of node pointers in DEST is equal to number
304 * of items in DEST as delimiting key have already inserted to
305 * buffer dest.
306 */
289 struct buffer_head *dest = dest_bi->bi_bh; 307 struct buffer_head *dest = dest_bi->bi_bh;
290 int nr_dest, nr_src; 308 int nr_dest, nr_src;
291 int dest_order, src_order; 309 int dest_order, src_order;
@@ -330,13 +348,13 @@ static void internal_copy_pointers_items(struct buffer_info *dest_bi,
330 memcpy(dc, B_N_CHILD(src, src_order), DC_SIZE * cpy_num); 348 memcpy(dc, B_N_CHILD(src, src_order), DC_SIZE * cpy_num);
331 349
332 /* prepare space for cpy_num - 1 item headers */ 350 /* prepare space for cpy_num - 1 item headers */
333 key = B_N_PDELIM_KEY(dest, dest_order); 351 key = internal_key(dest, dest_order);
334 memmove(key + cpy_num - 1, key, 352 memmove(key + cpy_num - 1, key,
335 KEY_SIZE * (nr_dest - dest_order) + DC_SIZE * (nr_dest + 353 KEY_SIZE * (nr_dest - dest_order) + DC_SIZE * (nr_dest +
336 cpy_num)); 354 cpy_num));
337 355
338 /* insert headers */ 356 /* insert headers */
339 memcpy(key, B_N_PDELIM_KEY(src, src_order), KEY_SIZE * (cpy_num - 1)); 357 memcpy(key, internal_key(src, src_order), KEY_SIZE * (cpy_num - 1));
340 358
341 /* sizes, item number */ 359 /* sizes, item number */
342 set_blkh_nr_item(blkh, blkh_nr_item(blkh) + (cpy_num - 1)); 360 set_blkh_nr_item(blkh, blkh_nr_item(blkh) + (cpy_num - 1));
@@ -366,7 +384,9 @@ static void internal_copy_pointers_items(struct buffer_info *dest_bi,
366 384
367} 385}
368 386
369/* Copy cpy_num node pointers and cpy_num - 1 items from buffer src to buffer dest. 387/*
388 * Copy cpy_num node pointers and cpy_num - 1 items from buffer src to
389 * buffer dest.
370 * Delete cpy_num - del_par items and node pointers from buffer src. 390 * Delete cpy_num - del_par items and node pointers from buffer src.
371 * last_first == FIRST_TO_LAST means, that we copy/delete first items from src. 391 * last_first == FIRST_TO_LAST means, that we copy/delete first items from src.
372 * last_first == LAST_TO_FIRST means, that we copy/delete last items from src. 392 * last_first == LAST_TO_FIRST means, that we copy/delete last items from src.
@@ -385,8 +405,10 @@ static void internal_move_pointers_items(struct buffer_info *dest_bi,
385 if (last_first == FIRST_TO_LAST) { /* shift_left occurs */ 405 if (last_first == FIRST_TO_LAST) { /* shift_left occurs */
386 first_pointer = 0; 406 first_pointer = 0;
387 first_item = 0; 407 first_item = 0;
388 /* delete cpy_num - del_par pointers and keys starting for pointers with first_pointer, 408 /*
389 for key - with first_item */ 409 * delete cpy_num - del_par pointers and keys starting for
410 * pointers with first_pointer, for key - with first_item
411 */
390 internal_delete_pointers_items(src_bi, first_pointer, 412 internal_delete_pointers_items(src_bi, first_pointer,
391 first_item, cpy_num - del_par); 413 first_item, cpy_num - del_par);
392 } else { /* shift_right occurs */ 414 } else { /* shift_right occurs */
@@ -404,7 +426,9 @@ static void internal_move_pointers_items(struct buffer_info *dest_bi,
404} 426}
405 427
406/* Insert n_src'th key of buffer src before n_dest'th key of buffer dest. */ 428/* Insert n_src'th key of buffer src before n_dest'th key of buffer dest. */
407static void internal_insert_key(struct buffer_info *dest_bi, int dest_position_before, /* insert key before key with n_dest number */ 429static void internal_insert_key(struct buffer_info *dest_bi,
430 /* insert key before key with n_dest number */
431 int dest_position_before,
408 struct buffer_head *src, int src_position) 432 struct buffer_head *src, int src_position)
409{ 433{
410 struct buffer_head *dest = dest_bi->bi_bh; 434 struct buffer_head *dest = dest_bi->bi_bh;
@@ -429,12 +453,12 @@ static void internal_insert_key(struct buffer_info *dest_bi, int dest_position_b
429 nr = blkh_nr_item(blkh); 453 nr = blkh_nr_item(blkh);
430 454
431 /* prepare space for inserting key */ 455 /* prepare space for inserting key */
432 key = B_N_PDELIM_KEY(dest, dest_position_before); 456 key = internal_key(dest, dest_position_before);
433 memmove(key + 1, key, 457 memmove(key + 1, key,
434 (nr - dest_position_before) * KEY_SIZE + (nr + 1) * DC_SIZE); 458 (nr - dest_position_before) * KEY_SIZE + (nr + 1) * DC_SIZE);
435 459
436 /* insert key */ 460 /* insert key */
437 memcpy(key, B_N_PDELIM_KEY(src, src_position), KEY_SIZE); 461 memcpy(key, internal_key(src, src_position), KEY_SIZE);
438 462
439 /* Change dirt, free space, item number fields. */ 463 /* Change dirt, free space, item number fields. */
440 464
@@ -453,13 +477,19 @@ static void internal_insert_key(struct buffer_info *dest_bi, int dest_position_b
453 } 477 }
454} 478}
455 479
456/* Insert d_key'th (delimiting) key from buffer cfl to tail of dest. 480/*
457 * Copy pointer_amount node pointers and pointer_amount - 1 items from buffer src to buffer dest. 481 * Insert d_key'th (delimiting) key from buffer cfl to tail of dest.
482 * Copy pointer_amount node pointers and pointer_amount - 1 items from
483 * buffer src to buffer dest.
458 * Replace d_key'th key in buffer cfl. 484 * Replace d_key'th key in buffer cfl.
459 * Delete pointer_amount items and node pointers from buffer src. 485 * Delete pointer_amount items and node pointers from buffer src.
460 */ 486 */
461/* this can be invoked both to shift from S to L and from R to S */ 487/* this can be invoked both to shift from S to L and from R to S */
462static void internal_shift_left(int mode, /* INTERNAL_FROM_S_TO_L | INTERNAL_FROM_R_TO_S */ 488static void internal_shift_left(
489 /*
490 * INTERNAL_FROM_S_TO_L | INTERNAL_FROM_R_TO_S
491 */
492 int mode,
463 struct tree_balance *tb, 493 struct tree_balance *tb,
464 int h, int pointer_amount) 494 int h, int pointer_amount)
465{ 495{
@@ -473,7 +503,10 @@ static void internal_shift_left(int mode, /* INTERNAL_FROM_S_TO_L | INTERNAL_FRO
473 /*printk("pointer_amount = %d\n",pointer_amount); */ 503 /*printk("pointer_amount = %d\n",pointer_amount); */
474 504
475 if (pointer_amount) { 505 if (pointer_amount) {
476 /* insert delimiting key from common father of dest and src to node dest into position B_NR_ITEM(dest) */ 506 /*
507 * insert delimiting key from common father of dest and
508 * src to node dest into position B_NR_ITEM(dest)
509 */
477 internal_insert_key(&dest_bi, B_NR_ITEMS(dest_bi.bi_bh), cf, 510 internal_insert_key(&dest_bi, B_NR_ITEMS(dest_bi.bi_bh), cf,
478 d_key_position); 511 d_key_position);
479 512
@@ -492,7 +525,8 @@ static void internal_shift_left(int mode, /* INTERNAL_FROM_S_TO_L | INTERNAL_FRO
492 525
493} 526}
494 527
495/* Insert delimiting key to L[h]. 528/*
529 * Insert delimiting key to L[h].
496 * Copy n node pointers and n - 1 items from buffer S[h] to L[h]. 530 * Copy n node pointers and n - 1 items from buffer S[h] to L[h].
497 * Delete n - 1 items and node pointers from buffer S[h]. 531 * Delete n - 1 items and node pointers from buffer S[h].
498 */ 532 */
@@ -507,23 +541,27 @@ static void internal_shift1_left(struct tree_balance *tb,
507 internal_define_dest_src_infos(INTERNAL_SHIFT_FROM_S_TO_L, tb, h, 541 internal_define_dest_src_infos(INTERNAL_SHIFT_FROM_S_TO_L, tb, h,
508 &dest_bi, &src_bi, &d_key_position, &cf); 542 &dest_bi, &src_bi, &d_key_position, &cf);
509 543
510 if (pointer_amount > 0) /* insert lkey[h]-th key from CFL[h] to left neighbor L[h] */ 544 /* insert lkey[h]-th key from CFL[h] to left neighbor L[h] */
545 if (pointer_amount > 0)
511 internal_insert_key(&dest_bi, B_NR_ITEMS(dest_bi.bi_bh), cf, 546 internal_insert_key(&dest_bi, B_NR_ITEMS(dest_bi.bi_bh), cf,
512 d_key_position); 547 d_key_position);
513 /* internal_insert_key (tb->L[h], B_NR_ITEM(tb->L[h]), tb->CFL[h], tb->lkey[h]); */
514 548
515 /* last parameter is del_parameter */ 549 /* last parameter is del_parameter */
516 internal_move_pointers_items(&dest_bi, &src_bi, FIRST_TO_LAST, 550 internal_move_pointers_items(&dest_bi, &src_bi, FIRST_TO_LAST,
517 pointer_amount, 1); 551 pointer_amount, 1);
518 /* internal_move_pointers_items (tb->L[h], tb->S[h], FIRST_TO_LAST, pointer_amount, 1); */
519} 552}
520 553
521/* Insert d_key'th (delimiting) key from buffer cfr to head of dest. 554/*
555 * Insert d_key'th (delimiting) key from buffer cfr to head of dest.
522 * Copy n node pointers and n - 1 items from buffer src to buffer dest. 556 * Copy n node pointers and n - 1 items from buffer src to buffer dest.
523 * Replace d_key'th key in buffer cfr. 557 * Replace d_key'th key in buffer cfr.
524 * Delete n items and node pointers from buffer src. 558 * Delete n items and node pointers from buffer src.
525 */ 559 */
526static void internal_shift_right(int mode, /* INTERNAL_FROM_S_TO_R | INTERNAL_FROM_L_TO_S */ 560static void internal_shift_right(
561 /*
562 * INTERNAL_FROM_S_TO_R | INTERNAL_FROM_L_TO_S
563 */
564 int mode,
527 struct tree_balance *tb, 565 struct tree_balance *tb,
528 int h, int pointer_amount) 566 int h, int pointer_amount)
529{ 567{
@@ -538,7 +576,10 @@ static void internal_shift_right(int mode, /* INTERNAL_FROM_S_TO_R | INTERNAL_FR
538 nr = B_NR_ITEMS(src_bi.bi_bh); 576 nr = B_NR_ITEMS(src_bi.bi_bh);
539 577
540 if (pointer_amount > 0) { 578 if (pointer_amount > 0) {
541 /* insert delimiting key from common father of dest and src to dest node into position 0 */ 579 /*
580 * insert delimiting key from common father of dest
581 * and src to dest node into position 0
582 */
542 internal_insert_key(&dest_bi, 0, cf, d_key_position); 583 internal_insert_key(&dest_bi, 0, cf, d_key_position);
543 if (nr == pointer_amount - 1) { 584 if (nr == pointer_amount - 1) {
544 RFALSE(src_bi.bi_bh != PATH_H_PBUFFER(tb->tb_path, h) /*tb->S[h] */ || 585 RFALSE(src_bi.bi_bh != PATH_H_PBUFFER(tb->tb_path, h) /*tb->S[h] */ ||
@@ -559,7 +600,8 @@ static void internal_shift_right(int mode, /* INTERNAL_FROM_S_TO_R | INTERNAL_FR
559 pointer_amount, 0); 600 pointer_amount, 0);
560} 601}
561 602
562/* Insert delimiting key to R[h]. 603/*
604 * Insert delimiting key to R[h].
563 * Copy n node pointers and n - 1 items from buffer S[h] to R[h]. 605 * Copy n node pointers and n - 1 items from buffer S[h] to R[h].
564 * Delete n - 1 items and node pointers from buffer S[h]. 606 * Delete n - 1 items and node pointers from buffer S[h].
565 */ 607 */
@@ -574,18 +616,19 @@ static void internal_shift1_right(struct tree_balance *tb,
574 internal_define_dest_src_infos(INTERNAL_SHIFT_FROM_S_TO_R, tb, h, 616 internal_define_dest_src_infos(INTERNAL_SHIFT_FROM_S_TO_R, tb, h,
575 &dest_bi, &src_bi, &d_key_position, &cf); 617 &dest_bi, &src_bi, &d_key_position, &cf);
576 618
577 if (pointer_amount > 0) /* insert rkey from CFR[h] to right neighbor R[h] */ 619 /* insert rkey from CFR[h] to right neighbor R[h] */
620 if (pointer_amount > 0)
578 internal_insert_key(&dest_bi, 0, cf, d_key_position); 621 internal_insert_key(&dest_bi, 0, cf, d_key_position);
579 /* internal_insert_key (tb->R[h], 0, tb->CFR[h], tb->rkey[h]); */
580 622
581 /* last parameter is del_parameter */ 623 /* last parameter is del_parameter */
582 internal_move_pointers_items(&dest_bi, &src_bi, LAST_TO_FIRST, 624 internal_move_pointers_items(&dest_bi, &src_bi, LAST_TO_FIRST,
583 pointer_amount, 1); 625 pointer_amount, 1);
584 /* internal_move_pointers_items (tb->R[h], tb->S[h], LAST_TO_FIRST, pointer_amount, 1); */
585} 626}
586 627
587/* Delete insert_num node pointers together with their left items 628/*
588 * and balance current node.*/ 629 * Delete insert_num node pointers together with their left items
630 * and balance current node.
631 */
589static void balance_internal_when_delete(struct tree_balance *tb, 632static void balance_internal_when_delete(struct tree_balance *tb,
590 int h, int child_pos) 633 int h, int child_pos)
591{ 634{
@@ -626,9 +669,11 @@ static void balance_internal_when_delete(struct tree_balance *tb,
626 new_root = tb->R[h - 1]; 669 new_root = tb->R[h - 1];
627 else 670 else
628 new_root = tb->L[h - 1]; 671 new_root = tb->L[h - 1];
629 /* switch super block's tree root block number to the new value */ 672 /*
673 * switch super block's tree root block
674 * number to the new value */
630 PUT_SB_ROOT_BLOCK(tb->tb_sb, new_root->b_blocknr); 675 PUT_SB_ROOT_BLOCK(tb->tb_sb, new_root->b_blocknr);
631 //REISERFS_SB(tb->tb_sb)->s_rs->s_tree_height --; 676 /*REISERFS_SB(tb->tb_sb)->s_rs->s_tree_height --; */
632 PUT_SB_TREE_HEIGHT(tb->tb_sb, 677 PUT_SB_TREE_HEIGHT(tb->tb_sb,
633 SB_TREE_HEIGHT(tb->tb_sb) - 1); 678 SB_TREE_HEIGHT(tb->tb_sb) - 1);
634 679
@@ -636,8 +681,8 @@ static void balance_internal_when_delete(struct tree_balance *tb,
636 REISERFS_SB(tb->tb_sb)->s_sbh, 681 REISERFS_SB(tb->tb_sb)->s_sbh,
637 1); 682 1);
638 /*&&&&&&&&&&&&&&&&&&&&&& */ 683 /*&&&&&&&&&&&&&&&&&&&&&& */
684 /* use check_internal if new root is an internal node */
639 if (h > 1) 685 if (h > 1)
640 /* use check_internal if new root is an internal node */
641 check_internal(new_root); 686 check_internal(new_root);
642 /*&&&&&&&&&&&&&&&&&&&&&& */ 687 /*&&&&&&&&&&&&&&&&&&&&&& */
643 688
@@ -648,7 +693,8 @@ static void balance_internal_when_delete(struct tree_balance *tb,
648 return; 693 return;
649 } 694 }
650 695
651 if (tb->L[h] && tb->lnum[h] == -B_NR_ITEMS(tb->L[h]) - 1) { /* join S[h] with L[h] */ 696 /* join S[h] with L[h] */
697 if (tb->L[h] && tb->lnum[h] == -B_NR_ITEMS(tb->L[h]) - 1) {
652 698
653 RFALSE(tb->rnum[h] != 0, 699 RFALSE(tb->rnum[h] != 0,
654 "invalid tb->rnum[%d]==%d when joining S[h] with L[h]", 700 "invalid tb->rnum[%d]==%d when joining S[h] with L[h]",
@@ -660,7 +706,8 @@ static void balance_internal_when_delete(struct tree_balance *tb,
660 return; 706 return;
661 } 707 }
662 708
663 if (tb->R[h] && tb->rnum[h] == -B_NR_ITEMS(tb->R[h]) - 1) { /* join S[h] with R[h] */ 709 /* join S[h] with R[h] */
710 if (tb->R[h] && tb->rnum[h] == -B_NR_ITEMS(tb->R[h]) - 1) {
664 RFALSE(tb->lnum[h] != 0, 711 RFALSE(tb->lnum[h] != 0,
665 "invalid tb->lnum[%d]==%d when joining S[h] with R[h]", 712 "invalid tb->lnum[%d]==%d when joining S[h] with R[h]",
666 h, tb->lnum[h]); 713 h, tb->lnum[h]);
@@ -671,17 +718,18 @@ static void balance_internal_when_delete(struct tree_balance *tb,
671 return; 718 return;
672 } 719 }
673 720
674 if (tb->lnum[h] < 0) { /* borrow from left neighbor L[h] */ 721 /* borrow from left neighbor L[h] */
722 if (tb->lnum[h] < 0) {
675 RFALSE(tb->rnum[h] != 0, 723 RFALSE(tb->rnum[h] != 0,
676 "wrong tb->rnum[%d]==%d when borrow from L[h]", h, 724 "wrong tb->rnum[%d]==%d when borrow from L[h]", h,
677 tb->rnum[h]); 725 tb->rnum[h]);
678 /*internal_shift_right (tb, h, tb->L[h], tb->CFL[h], tb->lkey[h], tb->S[h], -tb->lnum[h]); */
679 internal_shift_right(INTERNAL_SHIFT_FROM_L_TO_S, tb, h, 726 internal_shift_right(INTERNAL_SHIFT_FROM_L_TO_S, tb, h,
680 -tb->lnum[h]); 727 -tb->lnum[h]);
681 return; 728 return;
682 } 729 }
683 730
684 if (tb->rnum[h] < 0) { /* borrow from right neighbor R[h] */ 731 /* borrow from right neighbor R[h] */
732 if (tb->rnum[h] < 0) {
685 RFALSE(tb->lnum[h] != 0, 733 RFALSE(tb->lnum[h] != 0,
686 "invalid tb->lnum[%d]==%d when borrow from R[h]", 734 "invalid tb->lnum[%d]==%d when borrow from R[h]",
687 h, tb->lnum[h]); 735 h, tb->lnum[h]);
@@ -689,7 +737,8 @@ static void balance_internal_when_delete(struct tree_balance *tb,
689 return; 737 return;
690 } 738 }
691 739
692 if (tb->lnum[h] > 0) { /* split S[h] into two parts and put them into neighbors */ 740 /* split S[h] into two parts and put them into neighbors */
741 if (tb->lnum[h] > 0) {
693 RFALSE(tb->rnum[h] == 0 || tb->lnum[h] + tb->rnum[h] != n + 1, 742 RFALSE(tb->rnum[h] == 0 || tb->lnum[h] + tb->rnum[h] != n + 1,
694 "invalid tb->lnum[%d]==%d or tb->rnum[%d]==%d when S[h](item number == %d) is split between them", 743 "invalid tb->lnum[%d]==%d or tb->rnum[%d]==%d when S[h](item number == %d) is split between them",
695 h, tb->lnum[h], h, tb->rnum[h], n); 744 h, tb->lnum[h], h, tb->rnum[h], n);
@@ -717,7 +766,7 @@ static void replace_lkey(struct tree_balance *tb, int h, struct item_head *key)
717 if (B_NR_ITEMS(PATH_H_PBUFFER(tb->tb_path, h)) == 0) 766 if (B_NR_ITEMS(PATH_H_PBUFFER(tb->tb_path, h)) == 0)
718 return; 767 return;
719 768
720 memcpy(B_N_PDELIM_KEY(tb->CFL[h], tb->lkey[h]), key, KEY_SIZE); 769 memcpy(internal_key(tb->CFL[h], tb->lkey[h]), key, KEY_SIZE);
721 770
722 do_balance_mark_internal_dirty(tb, tb->CFL[h], 0); 771 do_balance_mark_internal_dirty(tb, tb->CFL[h], 0);
723} 772}
@@ -732,34 +781,41 @@ static void replace_rkey(struct tree_balance *tb, int h, struct item_head *key)
732 "R[h] can not be empty if it exists (item number=%d)", 781 "R[h] can not be empty if it exists (item number=%d)",
733 B_NR_ITEMS(tb->R[h])); 782 B_NR_ITEMS(tb->R[h]));
734 783
735 memcpy(B_N_PDELIM_KEY(tb->CFR[h], tb->rkey[h]), key, KEY_SIZE); 784 memcpy(internal_key(tb->CFR[h], tb->rkey[h]), key, KEY_SIZE);
736 785
737 do_balance_mark_internal_dirty(tb, tb->CFR[h], 0); 786 do_balance_mark_internal_dirty(tb, tb->CFR[h], 0);
738} 787}
739 788
740int balance_internal(struct tree_balance *tb, /* tree_balance structure */ 789
741 int h, /* level of the tree */ 790/*
742 int child_pos, struct item_head *insert_key, /* key for insertion on higher level */ 791 * if inserting/pasting {
743 struct buffer_head **insert_ptr /* node for insertion on higher level */ 792 * child_pos is the position of the node-pointer in S[h] that
744 ) 793 * pointed to S[h-1] before balancing of the h-1 level;
745 /* if inserting/pasting 794 * this means that new pointers and items must be inserted AFTER
746 { 795 * child_pos
747 child_pos is the position of the node-pointer in S[h] that * 796 * } else {
748 pointed to S[h-1] before balancing of the h-1 level; * 797 * it is the position of the leftmost pointer that must be deleted
749 this means that new pointers and items must be inserted AFTER * 798 * (together with its corresponding key to the left of the pointer)
750 child_pos 799 * as a result of the previous level's balancing.
751 } 800 * }
752 else 801 */
753 { 802
754 it is the position of the leftmost pointer that must be deleted (together with 803int balance_internal(struct tree_balance *tb,
755 its corresponding key to the left of the pointer) 804 int h, /* level of the tree */
756 as a result of the previous level's balancing. 805 int child_pos,
757 } 806 /* key for insertion on higher level */
758 */ 807 struct item_head *insert_key,
808 /* node for insertion on higher level */
809 struct buffer_head **insert_ptr)
759{ 810{
760 struct buffer_head *tbSh = PATH_H_PBUFFER(tb->tb_path, h); 811 struct buffer_head *tbSh = PATH_H_PBUFFER(tb->tb_path, h);
761 struct buffer_info bi; 812 struct buffer_info bi;
762 int order; /* we return this: it is 0 if there is no S[h], else it is tb->S[h]->b_item_order */ 813
814 /*
815 * we return this: it is 0 if there is no S[h],
816 * else it is tb->S[h]->b_item_order
817 */
818 int order;
763 int insert_num, n, k; 819 int insert_num, n, k;
764 struct buffer_head *S_new; 820 struct buffer_head *S_new;
765 struct item_head new_insert_key; 821 struct item_head new_insert_key;
@@ -774,8 +830,10 @@ int balance_internal(struct tree_balance *tb, /* tree_balance structure
774 (tbSh) ? PATH_H_POSITION(tb->tb_path, 830 (tbSh) ? PATH_H_POSITION(tb->tb_path,
775 h + 1) /*tb->S[h]->b_item_order */ : 0; 831 h + 1) /*tb->S[h]->b_item_order */ : 0;
776 832
777 /* Using insert_size[h] calculate the number insert_num of items 833 /*
778 that must be inserted to or deleted from S[h]. */ 834 * Using insert_size[h] calculate the number insert_num of items
835 * that must be inserted to or deleted from S[h].
836 */
779 insert_num = tb->insert_size[h] / ((int)(KEY_SIZE + DC_SIZE)); 837 insert_num = tb->insert_size[h] / ((int)(KEY_SIZE + DC_SIZE));
780 838
781 /* Check whether insert_num is proper * */ 839 /* Check whether insert_num is proper * */
@@ -794,23 +852,21 @@ int balance_internal(struct tree_balance *tb, /* tree_balance structure
794 852
795 k = 0; 853 k = 0;
796 if (tb->lnum[h] > 0) { 854 if (tb->lnum[h] > 0) {
797 /* shift lnum[h] items from S[h] to the left neighbor L[h]. 855 /*
798 check how many of new items fall into L[h] or CFL[h] after 856 * shift lnum[h] items from S[h] to the left neighbor L[h].
799 shifting */ 857 * check how many of new items fall into L[h] or CFL[h] after
858 * shifting
859 */
800 n = B_NR_ITEMS(tb->L[h]); /* number of items in L[h] */ 860 n = B_NR_ITEMS(tb->L[h]); /* number of items in L[h] */
801 if (tb->lnum[h] <= child_pos) { 861 if (tb->lnum[h] <= child_pos) {
802 /* new items don't fall into L[h] or CFL[h] */ 862 /* new items don't fall into L[h] or CFL[h] */
803 internal_shift_left(INTERNAL_SHIFT_FROM_S_TO_L, tb, h, 863 internal_shift_left(INTERNAL_SHIFT_FROM_S_TO_L, tb, h,
804 tb->lnum[h]); 864 tb->lnum[h]);
805 /*internal_shift_left (tb->L[h],tb->CFL[h],tb->lkey[h],tbSh,tb->lnum[h]); */
806 child_pos -= tb->lnum[h]; 865 child_pos -= tb->lnum[h];
807 } else if (tb->lnum[h] > child_pos + insert_num) { 866 } else if (tb->lnum[h] > child_pos + insert_num) {
808 /* all new items fall into L[h] */ 867 /* all new items fall into L[h] */
809 internal_shift_left(INTERNAL_SHIFT_FROM_S_TO_L, tb, h, 868 internal_shift_left(INTERNAL_SHIFT_FROM_S_TO_L, tb, h,
810 tb->lnum[h] - insert_num); 869 tb->lnum[h] - insert_num);
811 /* internal_shift_left(tb->L[h],tb->CFL[h],tb->lkey[h],tbSh,
812 tb->lnum[h]-insert_num);
813 */
814 /* insert insert_num keys and node-pointers into L[h] */ 870 /* insert insert_num keys and node-pointers into L[h] */
815 bi.tb = tb; 871 bi.tb = tb;
816 bi.bi_bh = tb->L[h]; 872 bi.bi_bh = tb->L[h];
@@ -826,7 +882,10 @@ int balance_internal(struct tree_balance *tb, /* tree_balance structure
826 } else { 882 } else {
827 struct disk_child *dc; 883 struct disk_child *dc;
828 884
829 /* some items fall into L[h] or CFL[h], but some don't fall */ 885 /*
886 * some items fall into L[h] or CFL[h],
887 * but some don't fall
888 */
830 internal_shift1_left(tb, h, child_pos + 1); 889 internal_shift1_left(tb, h, child_pos + 1);
831 /* calculate number of new items that fall into L[h] */ 890 /* calculate number of new items that fall into L[h] */
832 k = tb->lnum[h] - child_pos - 1; 891 k = tb->lnum[h] - child_pos - 1;
@@ -841,7 +900,10 @@ int balance_internal(struct tree_balance *tb, /* tree_balance structure
841 900
842 replace_lkey(tb, h, insert_key + k); 901 replace_lkey(tb, h, insert_key + k);
843 902
844 /* replace the first node-ptr in S[h] by node-ptr to insert_ptr[k] */ 903 /*
904 * replace the first node-ptr in S[h] by
905 * node-ptr to insert_ptr[k]
906 */
845 dc = B_N_CHILD(tbSh, 0); 907 dc = B_N_CHILD(tbSh, 0);
846 put_dc_size(dc, 908 put_dc_size(dc,
847 MAX_CHILD_SIZE(insert_ptr[k]) - 909 MAX_CHILD_SIZE(insert_ptr[k]) -
@@ -860,17 +922,17 @@ int balance_internal(struct tree_balance *tb, /* tree_balance structure
860 /* tb->lnum[h] > 0 */ 922 /* tb->lnum[h] > 0 */
861 if (tb->rnum[h] > 0) { 923 if (tb->rnum[h] > 0) {
862 /*shift rnum[h] items from S[h] to the right neighbor R[h] */ 924 /*shift rnum[h] items from S[h] to the right neighbor R[h] */
863 /* check how many of new items fall into R or CFR after shifting */ 925 /*
926 * check how many of new items fall into R or CFR
927 * after shifting
928 */
864 n = B_NR_ITEMS(tbSh); /* number of items in S[h] */ 929 n = B_NR_ITEMS(tbSh); /* number of items in S[h] */
865 if (n - tb->rnum[h] >= child_pos) 930 if (n - tb->rnum[h] >= child_pos)
866 /* new items fall into S[h] */ 931 /* new items fall into S[h] */
867 /*internal_shift_right(tb,h,tbSh,tb->CFR[h],tb->rkey[h],tb->R[h],tb->rnum[h]); */
868 internal_shift_right(INTERNAL_SHIFT_FROM_S_TO_R, tb, h, 932 internal_shift_right(INTERNAL_SHIFT_FROM_S_TO_R, tb, h,
869 tb->rnum[h]); 933 tb->rnum[h]);
870 else if (n + insert_num - tb->rnum[h] < child_pos) { 934 else if (n + insert_num - tb->rnum[h] < child_pos) {
871 /* all new items fall into R[h] */ 935 /* all new items fall into R[h] */
872 /*internal_shift_right(tb,h,tbSh,tb->CFR[h],tb->rkey[h],tb->R[h],
873 tb->rnum[h] - insert_num); */
874 internal_shift_right(INTERNAL_SHIFT_FROM_S_TO_R, tb, h, 936 internal_shift_right(INTERNAL_SHIFT_FROM_S_TO_R, tb, h,
875 tb->rnum[h] - insert_num); 937 tb->rnum[h] - insert_num);
876 938
@@ -904,7 +966,10 @@ int balance_internal(struct tree_balance *tb, /* tree_balance structure
904 966
905 replace_rkey(tb, h, insert_key + insert_num - k - 1); 967 replace_rkey(tb, h, insert_key + insert_num - k - 1);
906 968
907 /* replace the first node-ptr in R[h] by node-ptr insert_ptr[insert_num-k-1] */ 969 /*
970 * replace the first node-ptr in R[h] by
971 * node-ptr insert_ptr[insert_num-k-1]
972 */
908 dc = B_N_CHILD(tb->R[h], 0); 973 dc = B_N_CHILD(tb->R[h], 0);
909 put_dc_size(dc, 974 put_dc_size(dc,
910 MAX_CHILD_SIZE(insert_ptr 975 MAX_CHILD_SIZE(insert_ptr
@@ -921,7 +986,7 @@ int balance_internal(struct tree_balance *tb, /* tree_balance structure
921 } 986 }
922 } 987 }
923 988
924 /** Fill new node that appears instead of S[h] **/ 989 /** Fill new node that appears instead of S[h] **/
925 RFALSE(tb->blknum[h] > 2, "blknum can not be > 2 for internal level"); 990 RFALSE(tb->blknum[h] > 2, "blknum can not be > 2 for internal level");
926 RFALSE(tb->blknum[h] < 0, "blknum can not be < 0"); 991 RFALSE(tb->blknum[h] < 0, "blknum can not be < 0");
927 992
@@ -997,26 +1062,30 @@ int balance_internal(struct tree_balance *tb, /* tree_balance structure
997 /* new items don't fall into S_new */ 1062 /* new items don't fall into S_new */
998 /* store the delimiting key for the next level */ 1063 /* store the delimiting key for the next level */
999 /* new_insert_key = (n - snum)'th key in S[h] */ 1064 /* new_insert_key = (n - snum)'th key in S[h] */
1000 memcpy(&new_insert_key, B_N_PDELIM_KEY(tbSh, n - snum), 1065 memcpy(&new_insert_key, internal_key(tbSh, n - snum),
1001 KEY_SIZE); 1066 KEY_SIZE);
1002 /* last parameter is del_par */ 1067 /* last parameter is del_par */
1003 internal_move_pointers_items(&dest_bi, &src_bi, 1068 internal_move_pointers_items(&dest_bi, &src_bi,
1004 LAST_TO_FIRST, snum, 0); 1069 LAST_TO_FIRST, snum, 0);
1005 /* internal_move_pointers_items(S_new, tbSh, LAST_TO_FIRST, snum, 0); */
1006 } else if (n + insert_num - snum < child_pos) { 1070 } else if (n + insert_num - snum < child_pos) {
1007 /* all new items fall into S_new */ 1071 /* all new items fall into S_new */
1008 /* store the delimiting key for the next level */ 1072 /* store the delimiting key for the next level */
1009 /* new_insert_key = (n + insert_item - snum)'th key in S[h] */ 1073 /*
1074 * new_insert_key = (n + insert_item - snum)'th
1075 * key in S[h]
1076 */
1010 memcpy(&new_insert_key, 1077 memcpy(&new_insert_key,
1011 B_N_PDELIM_KEY(tbSh, n + insert_num - snum), 1078 internal_key(tbSh, n + insert_num - snum),
1012 KEY_SIZE); 1079 KEY_SIZE);
1013 /* last parameter is del_par */ 1080 /* last parameter is del_par */
1014 internal_move_pointers_items(&dest_bi, &src_bi, 1081 internal_move_pointers_items(&dest_bi, &src_bi,
1015 LAST_TO_FIRST, 1082 LAST_TO_FIRST,
1016 snum - insert_num, 0); 1083 snum - insert_num, 0);
1017 /* internal_move_pointers_items(S_new,tbSh,1,snum - insert_num,0); */
1018 1084
1019 /* insert insert_num keys and node-pointers into S_new */ 1085 /*
1086 * insert insert_num keys and node-pointers
1087 * into S_new
1088 */
1020 internal_insert_childs(&dest_bi, 1089 internal_insert_childs(&dest_bi,
1021 /*S_new,tb->S[h-1]->b_next, */ 1090 /*S_new,tb->S[h-1]->b_next, */
1022 child_pos - n - insert_num + 1091 child_pos - n - insert_num +
@@ -1033,7 +1102,6 @@ int balance_internal(struct tree_balance *tb, /* tree_balance structure
1033 internal_move_pointers_items(&dest_bi, &src_bi, 1102 internal_move_pointers_items(&dest_bi, &src_bi,
1034 LAST_TO_FIRST, 1103 LAST_TO_FIRST,
1035 n - child_pos + 1, 1); 1104 n - child_pos + 1, 1);
1036 /* internal_move_pointers_items(S_new,tbSh,1,n - child_pos + 1,1); */
1037 /* calculate number of new items that fall into S_new */ 1105 /* calculate number of new items that fall into S_new */
1038 k = snum - n + child_pos - 1; 1106 k = snum - n + child_pos - 1;
1039 1107
@@ -1043,7 +1111,10 @@ int balance_internal(struct tree_balance *tb, /* tree_balance structure
1043 /* new_insert_key = insert_key[insert_num - k - 1] */ 1111 /* new_insert_key = insert_key[insert_num - k - 1] */
1044 memcpy(&new_insert_key, insert_key + insert_num - k - 1, 1112 memcpy(&new_insert_key, insert_key + insert_num - k - 1,
1045 KEY_SIZE); 1113 KEY_SIZE);
1046 /* replace first node-ptr in S_new by node-ptr to insert_ptr[insert_num-k-1] */ 1114 /*
1115 * replace first node-ptr in S_new by node-ptr
1116 * to insert_ptr[insert_num-k-1]
1117 */
1047 1118
1048 dc = B_N_CHILD(S_new, 0); 1119 dc = B_N_CHILD(S_new, 0);
1049 put_dc_size(dc, 1120 put_dc_size(dc,
@@ -1066,7 +1137,7 @@ int balance_internal(struct tree_balance *tb, /* tree_balance structure
1066 || buffer_dirty(S_new), "cm-00001: bad S_new (%b)", 1137 || buffer_dirty(S_new), "cm-00001: bad S_new (%b)",
1067 S_new); 1138 S_new);
1068 1139
1069 // S_new is released in unfix_nodes 1140 /* S_new is released in unfix_nodes */
1070 } 1141 }
1071 1142
1072 n = B_NR_ITEMS(tbSh); /*number of items in S[h] */ 1143 n = B_NR_ITEMS(tbSh); /*number of items in S[h] */
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index bc8b8009897d..e3ca04894919 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -25,7 +25,10 @@ int reiserfs_commit_write(struct file *f, struct page *page,
25 25
26void reiserfs_evict_inode(struct inode *inode) 26void reiserfs_evict_inode(struct inode *inode)
27{ 27{
28 /* We need blocks for transaction + (user+group) quota update (possibly delete) */ 28 /*
29 * We need blocks for transaction + (user+group) quota
30 * update (possibly delete)
31 */
29 int jbegin_count = 32 int jbegin_count =
30 JOURNAL_PER_BALANCE_CNT * 2 + 33 JOURNAL_PER_BALANCE_CNT * 2 +
31 2 * REISERFS_QUOTA_INIT_BLOCKS(inode->i_sb); 34 2 * REISERFS_QUOTA_INIT_BLOCKS(inode->i_sb);
@@ -39,8 +42,12 @@ void reiserfs_evict_inode(struct inode *inode)
39 if (inode->i_nlink) 42 if (inode->i_nlink)
40 goto no_delete; 43 goto no_delete;
41 44
42 /* The = 0 happens when we abort creating a new inode for some reason like lack of space.. */ 45 /*
43 if (!(inode->i_state & I_NEW) && INODE_PKEY(inode)->k_objectid != 0) { /* also handles bad_inode case */ 46 * The = 0 happens when we abort creating a new inode
47 * for some reason like lack of space..
48 * also handles bad_inode case
49 */
50 if (!(inode->i_state & I_NEW) && INODE_PKEY(inode)->k_objectid != 0) {
44 51
45 reiserfs_delete_xattrs(inode); 52 reiserfs_delete_xattrs(inode);
46 53
@@ -54,34 +61,43 @@ void reiserfs_evict_inode(struct inode *inode)
54 61
55 err = reiserfs_delete_object(&th, inode); 62 err = reiserfs_delete_object(&th, inode);
56 63
57 /* Do quota update inside a transaction for journaled quotas. We must do that 64 /*
58 * after delete_object so that quota updates go into the same transaction as 65 * Do quota update inside a transaction for journaled quotas.
59 * stat data deletion */ 66 * We must do that after delete_object so that quota updates
67 * go into the same transaction as stat data deletion
68 */
60 if (!err) { 69 if (!err) {
61 int depth = reiserfs_write_unlock_nested(inode->i_sb); 70 int depth = reiserfs_write_unlock_nested(inode->i_sb);
62 dquot_free_inode(inode); 71 dquot_free_inode(inode);
63 reiserfs_write_lock_nested(inode->i_sb, depth); 72 reiserfs_write_lock_nested(inode->i_sb, depth);
64 } 73 }
65 74
66 if (journal_end(&th, inode->i_sb, jbegin_count)) 75 if (journal_end(&th))
67 goto out; 76 goto out;
68 77
69 /* check return value from reiserfs_delete_object after 78 /*
79 * check return value from reiserfs_delete_object after
70 * ending the transaction 80 * ending the transaction
71 */ 81 */
72 if (err) 82 if (err)
73 goto out; 83 goto out;
74 84
75 /* all items of file are deleted, so we can remove "save" link */ 85 /*
76 remove_save_link(inode, 0 /* not truncate */ ); /* we can't do anything 86 * all items of file are deleted, so we can remove
77 * about an error here */ 87 * "save" link
88 * we can't do anything about an error here
89 */
90 remove_save_link(inode, 0 /* not truncate */);
78out: 91out:
79 reiserfs_write_unlock(inode->i_sb); 92 reiserfs_write_unlock(inode->i_sb);
80 } else { 93 } else {
81 /* no object items are in the tree */ 94 /* no object items are in the tree */
82 ; 95 ;
83 } 96 }
84 clear_inode(inode); /* note this must go after the journal_end to prevent deadlock */ 97
98 /* note this must go after the journal_end to prevent deadlock */
99 clear_inode(inode);
100
85 dquot_drop(inode); 101 dquot_drop(inode);
86 inode->i_blocks = 0; 102 inode->i_blocks = 0;
87 return; 103 return;
@@ -103,8 +119,10 @@ static void _make_cpu_key(struct cpu_key *key, int version, __u32 dirid,
103 key->key_length = length; 119 key->key_length = length;
104} 120}
105 121
106/* take base of inode_key (it comes from inode always) (dirid, objectid) and version from an inode, set 122/*
107 offset and type of key */ 123 * take base of inode_key (it comes from inode always) (dirid, objectid)
124 * and version from an inode, set offset and type of key
125 */
108void make_cpu_key(struct cpu_key *key, struct inode *inode, loff_t offset, 126void make_cpu_key(struct cpu_key *key, struct inode *inode, loff_t offset,
109 int type, int length) 127 int type, int length)
110{ 128{
@@ -114,9 +132,7 @@ void make_cpu_key(struct cpu_key *key, struct inode *inode, loff_t offset,
114 length); 132 length);
115} 133}
116 134
117// 135/* when key is 0, do not set version and short key */
118// when key is 0, do not set version and short key
119//
120inline void make_le_item_head(struct item_head *ih, const struct cpu_key *key, 136inline void make_le_item_head(struct item_head *ih, const struct cpu_key *key,
121 int version, 137 int version,
122 loff_t offset, int type, int length, 138 loff_t offset, int type, int length,
@@ -132,43 +148,47 @@ inline void make_le_item_head(struct item_head *ih, const struct cpu_key *key,
132 set_le_ih_k_type(ih, type); 148 set_le_ih_k_type(ih, type);
133 put_ih_item_len(ih, length); 149 put_ih_item_len(ih, length);
134 /* set_ih_free_space (ih, 0); */ 150 /* set_ih_free_space (ih, 0); */
135 // for directory items it is entry count, for directs and stat 151 /*
136 // datas - 0xffff, for indirects - 0 152 * for directory items it is entry count, for directs and stat
153 * datas - 0xffff, for indirects - 0
154 */
137 put_ih_entry_count(ih, entry_count); 155 put_ih_entry_count(ih, entry_count);
138} 156}
139 157
140// 158/*
141// FIXME: we might cache recently accessed indirect item 159 * FIXME: we might cache recently accessed indirect item
142 160 * Ugh. Not too eager for that....
143// Ugh. Not too eager for that.... 161 * I cut the code until such time as I see a convincing argument (benchmark).
144// I cut the code until such time as I see a convincing argument (benchmark). 162 * I don't want a bloated inode struct..., and I don't like code complexity....
145// I don't want a bloated inode struct..., and I don't like code complexity.... 163 */
146
147/* cutting the code is fine, since it really isn't in use yet and is easy
148** to add back in. But, Vladimir has a really good idea here. Think
149** about what happens for reading a file. For each page,
150** The VFS layer calls reiserfs_readpage, who searches the tree to find
151** an indirect item. This indirect item has X number of pointers, where
152** X is a big number if we've done the block allocation right. But,
153** we only use one or two of these pointers during each call to readpage,
154** needlessly researching again later on.
155**
156** The size of the cache could be dynamic based on the size of the file.
157**
158** I'd also like to see us cache the location the stat data item, since
159** we are needlessly researching for that frequently.
160**
161** --chris
162*/
163 164
164/* If this page has a file tail in it, and 165/*
165** it was read in by get_block_create_0, the page data is valid, 166 * cutting the code is fine, since it really isn't in use yet and is easy
166** but tail is still sitting in a direct item, and we can't write to 167 * to add back in. But, Vladimir has a really good idea here. Think
167** it. So, look through this page, and check all the mapped buffers 168 * about what happens for reading a file. For each page,
168** to make sure they have valid block numbers. Any that don't need 169 * The VFS layer calls reiserfs_readpage, who searches the tree to find
169** to be unmapped, so that __block_write_begin will correctly call 170 * an indirect item. This indirect item has X number of pointers, where
170** reiserfs_get_block to convert the tail into an unformatted node 171 * X is a big number if we've done the block allocation right. But,
171*/ 172 * we only use one or two of these pointers during each call to readpage,
173 * needlessly researching again later on.
174 *
175 * The size of the cache could be dynamic based on the size of the file.
176 *
177 * I'd also like to see us cache the location the stat data item, since
178 * we are needlessly researching for that frequently.
179 *
180 * --chris
181 */
182
183/*
184 * If this page has a file tail in it, and
185 * it was read in by get_block_create_0, the page data is valid,
186 * but tail is still sitting in a direct item, and we can't write to
187 * it. So, look through this page, and check all the mapped buffers
188 * to make sure they have valid block numbers. Any that don't need
189 * to be unmapped, so that __block_write_begin will correctly call
190 * reiserfs_get_block to convert the tail into an unformatted node
191 */
172static inline void fix_tail_page_for_writing(struct page *page) 192static inline void fix_tail_page_for_writing(struct page *page)
173{ 193{
174 struct buffer_head *head, *next, *bh; 194 struct buffer_head *head, *next, *bh;
@@ -186,8 +206,10 @@ static inline void fix_tail_page_for_writing(struct page *page)
186 } 206 }
187} 207}
188 208
189/* reiserfs_get_block does not need to allocate a block only if it has been 209/*
190 done already or non-hole position has been found in the indirect item */ 210 * reiserfs_get_block does not need to allocate a block only if it has been
211 * done already or non-hole position has been found in the indirect item
212 */
191static inline int allocation_needed(int retval, b_blocknr_t allocated, 213static inline int allocation_needed(int retval, b_blocknr_t allocated,
192 struct item_head *ih, 214 struct item_head *ih,
193 __le32 * item, int pos_in_item) 215 __le32 * item, int pos_in_item)
@@ -211,14 +233,16 @@ static inline void set_block_dev_mapped(struct buffer_head *bh,
211 map_bh(bh, inode->i_sb, block); 233 map_bh(bh, inode->i_sb, block);
212} 234}
213 235
214// 236/*
215// files which were created in the earlier version can not be longer, 237 * files which were created in the earlier version can not be longer,
216// than 2 gb 238 * than 2 gb
217// 239 */
218static int file_capable(struct inode *inode, sector_t block) 240static int file_capable(struct inode *inode, sector_t block)
219{ 241{
220 if (get_inode_item_key_version(inode) != KEY_FORMAT_3_5 || // it is new file. 242 /* it is new file. */
221 block < (1 << (31 - inode->i_sb->s_blocksize_bits))) // old file, but 'block' is inside of 2gb 243 if (get_inode_item_key_version(inode) != KEY_FORMAT_3_5 ||
244 /* old file, but 'block' is inside of 2gb */
245 block < (1 << (31 - inode->i_sb->s_blocksize_bits)))
222 return 1; 246 return 1;
223 247
224 return 0; 248 return 0;
@@ -228,7 +252,6 @@ static int restart_transaction(struct reiserfs_transaction_handle *th,
228 struct inode *inode, struct treepath *path) 252 struct inode *inode, struct treepath *path)
229{ 253{
230 struct super_block *s = th->t_super; 254 struct super_block *s = th->t_super;
231 int len = th->t_blocks_allocated;
232 int err; 255 int err;
233 256
234 BUG_ON(!th->t_trans_id); 257 BUG_ON(!th->t_trans_id);
@@ -241,7 +264,7 @@ static int restart_transaction(struct reiserfs_transaction_handle *th,
241 return 0; 264 return 0;
242 } 265 }
243 reiserfs_update_sd(th, inode); 266 reiserfs_update_sd(th, inode);
244 err = journal_end(th, s, len); 267 err = journal_end(th);
245 if (!err) { 268 if (!err) {
246 err = journal_begin(th, s, JOURNAL_PER_BALANCE_CNT * 6); 269 err = journal_begin(th, s, JOURNAL_PER_BALANCE_CNT * 6);
247 if (!err) 270 if (!err)
@@ -250,14 +273,14 @@ static int restart_transaction(struct reiserfs_transaction_handle *th,
250 return err; 273 return err;
251} 274}
252 275
253// it is called by get_block when create == 0. Returns block number 276/*
254// for 'block'-th logical block of file. When it hits direct item it 277 * it is called by get_block when create == 0. Returns block number
255// returns 0 (being called from bmap) or read direct item into piece 278 * for 'block'-th logical block of file. When it hits direct item it
256// of page (bh_result) 279 * returns 0 (being called from bmap) or read direct item into piece
257 280 * of page (bh_result)
258// Please improve the english/clarity in the comment above, as it is 281 * Please improve the english/clarity in the comment above, as it is
259// hard to understand. 282 * hard to understand.
260 283 */
261static int _get_block_create_0(struct inode *inode, sector_t block, 284static int _get_block_create_0(struct inode *inode, sector_t block,
262 struct buffer_head *bh_result, int args) 285 struct buffer_head *bh_result, int args)
263{ 286{
@@ -273,7 +296,7 @@ static int _get_block_create_0(struct inode *inode, sector_t block,
273 int done = 0; 296 int done = 0;
274 unsigned long offset; 297 unsigned long offset;
275 298
276 // prepare the key to look for the 'block'-th block of file 299 /* prepare the key to look for the 'block'-th block of file */
277 make_cpu_key(&key, inode, 300 make_cpu_key(&key, inode,
278 (loff_t) block * inode->i_sb->s_blocksize + 1, TYPE_ANY, 301 (loff_t) block * inode->i_sb->s_blocksize + 1, TYPE_ANY,
279 3); 302 3);
@@ -285,23 +308,28 @@ static int _get_block_create_0(struct inode *inode, sector_t block,
285 kunmap(bh_result->b_page); 308 kunmap(bh_result->b_page);
286 if (result == IO_ERROR) 309 if (result == IO_ERROR)
287 return -EIO; 310 return -EIO;
288 // We do not return -ENOENT if there is a hole but page is uptodate, because it means 311 /*
289 // That there is some MMAPED data associated with it that is yet to be written to disk. 312 * We do not return -ENOENT if there is a hole but page is
313 * uptodate, because it means that there is some MMAPED data
314 * associated with it that is yet to be written to disk.
315 */
290 if ((args & GET_BLOCK_NO_HOLE) 316 if ((args & GET_BLOCK_NO_HOLE)
291 && !PageUptodate(bh_result->b_page)) { 317 && !PageUptodate(bh_result->b_page)) {
292 return -ENOENT; 318 return -ENOENT;
293 } 319 }
294 return 0; 320 return 0;
295 } 321 }
296 // 322
297 bh = get_last_bh(&path); 323 bh = get_last_bh(&path);
298 ih = get_ih(&path); 324 ih = tp_item_head(&path);
299 if (is_indirect_le_ih(ih)) { 325 if (is_indirect_le_ih(ih)) {
300 __le32 *ind_item = (__le32 *) B_I_PITEM(bh, ih); 326 __le32 *ind_item = (__le32 *) ih_item_body(bh, ih);
301 327
302 /* FIXME: here we could cache indirect item or part of it in 328 /*
303 the inode to avoid search_by_key in case of subsequent 329 * FIXME: here we could cache indirect item or part of it in
304 access to file */ 330 * the inode to avoid search_by_key in case of subsequent
331 * access to file
332 */
305 blocknr = get_block_num(ind_item, path.pos_in_item); 333 blocknr = get_block_num(ind_item, path.pos_in_item);
306 ret = 0; 334 ret = 0;
307 if (blocknr) { 335 if (blocknr) {
@@ -311,8 +339,12 @@ static int _get_block_create_0(struct inode *inode, sector_t block,
311 set_buffer_boundary(bh_result); 339 set_buffer_boundary(bh_result);
312 } 340 }
313 } else 341 } else
314 // We do not return -ENOENT if there is a hole but page is uptodate, because it means 342 /*
315 // That there is some MMAPED data associated with it that is yet to be written to disk. 343 * We do not return -ENOENT if there is a hole but
344 * page is uptodate, because it means that there is
345 * some MMAPED data associated with it that is
346 * yet to be written to disk.
347 */
316 if ((args & GET_BLOCK_NO_HOLE) 348 if ((args & GET_BLOCK_NO_HOLE)
317 && !PageUptodate(bh_result->b_page)) { 349 && !PageUptodate(bh_result->b_page)) {
318 ret = -ENOENT; 350 ret = -ENOENT;
@@ -323,41 +355,45 @@ static int _get_block_create_0(struct inode *inode, sector_t block,
323 kunmap(bh_result->b_page); 355 kunmap(bh_result->b_page);
324 return ret; 356 return ret;
325 } 357 }
326 // requested data are in direct item(s) 358 /* requested data are in direct item(s) */
327 if (!(args & GET_BLOCK_READ_DIRECT)) { 359 if (!(args & GET_BLOCK_READ_DIRECT)) {
328 // we are called by bmap. FIXME: we can not map block of file 360 /*
329 // when it is stored in direct item(s) 361 * we are called by bmap. FIXME: we can not map block of file
362 * when it is stored in direct item(s)
363 */
330 pathrelse(&path); 364 pathrelse(&path);
331 if (p) 365 if (p)
332 kunmap(bh_result->b_page); 366 kunmap(bh_result->b_page);
333 return -ENOENT; 367 return -ENOENT;
334 } 368 }
335 369
336 /* if we've got a direct item, and the buffer or page was uptodate, 370 /*
337 ** we don't want to pull data off disk again. skip to the 371 * if we've got a direct item, and the buffer or page was uptodate,
338 ** end, where we map the buffer and return 372 * we don't want to pull data off disk again. skip to the
373 * end, where we map the buffer and return
339 */ 374 */
340 if (buffer_uptodate(bh_result)) { 375 if (buffer_uptodate(bh_result)) {
341 goto finished; 376 goto finished;
342 } else 377 } else
343 /* 378 /*
344 ** grab_tail_page can trigger calls to reiserfs_get_block on up to date 379 * grab_tail_page can trigger calls to reiserfs_get_block on
345 ** pages without any buffers. If the page is up to date, we don't want 380 * up to date pages without any buffers. If the page is up
346 ** read old data off disk. Set the up to date bit on the buffer instead 381 * to date, we don't want read old data off disk. Set the up
347 ** and jump to the end 382 * to date bit on the buffer instead and jump to the end
348 */ 383 */
349 if (!bh_result->b_page || PageUptodate(bh_result->b_page)) { 384 if (!bh_result->b_page || PageUptodate(bh_result->b_page)) {
350 set_buffer_uptodate(bh_result); 385 set_buffer_uptodate(bh_result);
351 goto finished; 386 goto finished;
352 } 387 }
353 // read file tail into part of page 388 /* read file tail into part of page */
354 offset = (cpu_key_k_offset(&key) - 1) & (PAGE_CACHE_SIZE - 1); 389 offset = (cpu_key_k_offset(&key) - 1) & (PAGE_CACHE_SIZE - 1);
355 copy_item_head(&tmp_ih, ih); 390 copy_item_head(&tmp_ih, ih);
356 391
357 /* we only want to kmap if we are reading the tail into the page. 392 /*
358 ** this is not the common case, so we don't kmap until we are 393 * we only want to kmap if we are reading the tail into the page.
359 ** sure we need to. But, this means the item might move if 394 * this is not the common case, so we don't kmap until we are
360 ** kmap schedules 395 * sure we need to. But, this means the item might move if
396 * kmap schedules
361 */ 397 */
362 if (!p) 398 if (!p)
363 p = (char *)kmap(bh_result->b_page); 399 p = (char *)kmap(bh_result->b_page);
@@ -368,10 +404,11 @@ static int _get_block_create_0(struct inode *inode, sector_t block,
368 if (!is_direct_le_ih(ih)) { 404 if (!is_direct_le_ih(ih)) {
369 BUG(); 405 BUG();
370 } 406 }
371 /* make sure we don't read more bytes than actually exist in 407 /*
372 ** the file. This can happen in odd cases where i_size isn't 408 * make sure we don't read more bytes than actually exist in
373 ** correct, and when direct item padding results in a few 409 * the file. This can happen in odd cases where i_size isn't
374 ** extra bytes at the end of the direct item 410 * correct, and when direct item padding results in a few
411 * extra bytes at the end of the direct item
375 */ 412 */
376 if ((le_ih_k_offset(ih) + path.pos_in_item) > inode->i_size) 413 if ((le_ih_k_offset(ih) + path.pos_in_item) > inode->i_size)
377 break; 414 break;
@@ -383,40 +420,43 @@ static int _get_block_create_0(struct inode *inode, sector_t block,
383 } else { 420 } else {
384 chars = ih_item_len(ih) - path.pos_in_item; 421 chars = ih_item_len(ih) - path.pos_in_item;
385 } 422 }
386 memcpy(p, B_I_PITEM(bh, ih) + path.pos_in_item, chars); 423 memcpy(p, ih_item_body(bh, ih) + path.pos_in_item, chars);
387 424
388 if (done) 425 if (done)
389 break; 426 break;
390 427
391 p += chars; 428 p += chars;
392 429
430 /*
431 * we done, if read direct item is not the last item of
432 * node FIXME: we could try to check right delimiting key
433 * to see whether direct item continues in the right
434 * neighbor or rely on i_size
435 */
393 if (PATH_LAST_POSITION(&path) != (B_NR_ITEMS(bh) - 1)) 436 if (PATH_LAST_POSITION(&path) != (B_NR_ITEMS(bh) - 1))
394 // we done, if read direct item is not the last item of
395 // node FIXME: we could try to check right delimiting key
396 // to see whether direct item continues in the right
397 // neighbor or rely on i_size
398 break; 437 break;
399 438
400 // update key to look for the next piece 439 /* update key to look for the next piece */
401 set_cpu_key_k_offset(&key, cpu_key_k_offset(&key) + chars); 440 set_cpu_key_k_offset(&key, cpu_key_k_offset(&key) + chars);
402 result = search_for_position_by_key(inode->i_sb, &key, &path); 441 result = search_for_position_by_key(inode->i_sb, &key, &path);
403 if (result != POSITION_FOUND) 442 if (result != POSITION_FOUND)
404 // i/o error most likely 443 /* i/o error most likely */
405 break; 444 break;
406 bh = get_last_bh(&path); 445 bh = get_last_bh(&path);
407 ih = get_ih(&path); 446 ih = tp_item_head(&path);
408 } while (1); 447 } while (1);
409 448
410 flush_dcache_page(bh_result->b_page); 449 flush_dcache_page(bh_result->b_page);
411 kunmap(bh_result->b_page); 450 kunmap(bh_result->b_page);
412 451
413 finished: 452finished:
414 pathrelse(&path); 453 pathrelse(&path);
415 454
416 if (result == IO_ERROR) 455 if (result == IO_ERROR)
417 return -EIO; 456 return -EIO;
418 457
419 /* this buffer has valid data, but isn't valid for io. mapping it to 458 /*
459 * this buffer has valid data, but isn't valid for io. mapping it to
420 * block #0 tells the rest of reiserfs it just has a tail in it 460 * block #0 tells the rest of reiserfs it just has a tail in it
421 */ 461 */
422 map_bh(bh_result, inode->i_sb, 0); 462 map_bh(bh_result, inode->i_sb, 0);
@@ -424,8 +464,10 @@ static int _get_block_create_0(struct inode *inode, sector_t block,
424 return 0; 464 return 0;
425} 465}
426 466
427// this is called to create file map. So, _get_block_create_0 will not 467/*
428// read direct item 468 * this is called to create file map. So, _get_block_create_0 will not
469 * read direct item
470 */
429static int reiserfs_bmap(struct inode *inode, sector_t block, 471static int reiserfs_bmap(struct inode *inode, sector_t block,
430 struct buffer_head *bh_result, int create) 472 struct buffer_head *bh_result, int create)
431{ 473{
@@ -439,22 +481,23 @@ static int reiserfs_bmap(struct inode *inode, sector_t block,
439 return 0; 481 return 0;
440} 482}
441 483
442/* special version of get_block that is only used by grab_tail_page right 484/*
443** now. It is sent to __block_write_begin, and when you try to get a 485 * special version of get_block that is only used by grab_tail_page right
444** block past the end of the file (or a block from a hole) it returns 486 * now. It is sent to __block_write_begin, and when you try to get a
445** -ENOENT instead of a valid buffer. __block_write_begin expects to 487 * block past the end of the file (or a block from a hole) it returns
446** be able to do i/o on the buffers returned, unless an error value 488 * -ENOENT instead of a valid buffer. __block_write_begin expects to
447** is also returned. 489 * be able to do i/o on the buffers returned, unless an error value
448** 490 * is also returned.
449** So, this allows __block_write_begin to be used for reading a single block 491 *
450** in a page. Where it does not produce a valid page for holes, or past the 492 * So, this allows __block_write_begin to be used for reading a single block
451** end of the file. This turns out to be exactly what we need for reading 493 * in a page. Where it does not produce a valid page for holes, or past the
452** tails for conversion. 494 * end of the file. This turns out to be exactly what we need for reading
453** 495 * tails for conversion.
454** The point of the wrapper is forcing a certain value for create, even 496 *
455** though the VFS layer is calling this function with create==1. If you 497 * The point of the wrapper is forcing a certain value for create, even
456** don't want to send create == GET_BLOCK_NO_HOLE to reiserfs_get_block, 498 * though the VFS layer is calling this function with create==1. If you
457** don't use this function. 499 * don't want to send create == GET_BLOCK_NO_HOLE to reiserfs_get_block,
500 * don't use this function.
458*/ 501*/
459static int reiserfs_get_block_create_0(struct inode *inode, sector_t block, 502static int reiserfs_get_block_create_0(struct inode *inode, sector_t block,
460 struct buffer_head *bh_result, 503 struct buffer_head *bh_result,
@@ -463,8 +506,10 @@ static int reiserfs_get_block_create_0(struct inode *inode, sector_t block,
463 return reiserfs_get_block(inode, block, bh_result, GET_BLOCK_NO_HOLE); 506 return reiserfs_get_block(inode, block, bh_result, GET_BLOCK_NO_HOLE);
464} 507}
465 508
466/* This is special helper for reiserfs_get_block in case we are executing 509/*
467 direct_IO request. */ 510 * This is special helper for reiserfs_get_block in case we are executing
511 * direct_IO request.
512 */
468static int reiserfs_get_blocks_direct_io(struct inode *inode, 513static int reiserfs_get_blocks_direct_io(struct inode *inode,
469 sector_t iblock, 514 sector_t iblock,
470 struct buffer_head *bh_result, 515 struct buffer_head *bh_result,
@@ -474,9 +519,11 @@ static int reiserfs_get_blocks_direct_io(struct inode *inode,
474 519
475 bh_result->b_page = NULL; 520 bh_result->b_page = NULL;
476 521
477 /* We set the b_size before reiserfs_get_block call since it is 522 /*
478 referenced in convert_tail_for_hole() that may be called from 523 * We set the b_size before reiserfs_get_block call since it is
479 reiserfs_get_block() */ 524 * referenced in convert_tail_for_hole() that may be called from
525 * reiserfs_get_block()
526 */
480 bh_result->b_size = (1 << inode->i_blkbits); 527 bh_result->b_size = (1 << inode->i_blkbits);
481 528
482 ret = reiserfs_get_block(inode, iblock, bh_result, 529 ret = reiserfs_get_block(inode, iblock, bh_result,
@@ -486,14 +533,18 @@ static int reiserfs_get_blocks_direct_io(struct inode *inode,
486 533
487 /* don't allow direct io onto tail pages */ 534 /* don't allow direct io onto tail pages */
488 if (buffer_mapped(bh_result) && bh_result->b_blocknr == 0) { 535 if (buffer_mapped(bh_result) && bh_result->b_blocknr == 0) {
489 /* make sure future calls to the direct io funcs for this offset 536 /*
490 ** in the file fail by unmapping the buffer 537 * make sure future calls to the direct io funcs for this
538 * offset in the file fail by unmapping the buffer
491 */ 539 */
492 clear_buffer_mapped(bh_result); 540 clear_buffer_mapped(bh_result);
493 ret = -EINVAL; 541 ret = -EINVAL;
494 } 542 }
495 /* Possible unpacked tail. Flush the data before pages have 543
496 disappeared */ 544 /*
545 * Possible unpacked tail. Flush the data before pages have
546 * disappeared
547 */
497 if (REISERFS_I(inode)->i_flags & i_pack_on_close_mask) { 548 if (REISERFS_I(inode)->i_flags & i_pack_on_close_mask) {
498 int err; 549 int err;
499 550
@@ -507,20 +558,20 @@ static int reiserfs_get_blocks_direct_io(struct inode *inode,
507 if (err < 0) 558 if (err < 0)
508 ret = err; 559 ret = err;
509 } 560 }
510 out: 561out:
511 return ret; 562 return ret;
512} 563}
513 564
514/* 565/*
515** helper function for when reiserfs_get_block is called for a hole 566 * helper function for when reiserfs_get_block is called for a hole
516** but the file tail is still in a direct item 567 * but the file tail is still in a direct item
517** bh_result is the buffer head for the hole 568 * bh_result is the buffer head for the hole
518** tail_offset is the offset of the start of the tail in the file 569 * tail_offset is the offset of the start of the tail in the file
519** 570 *
520** This calls prepare_write, which will start a new transaction 571 * This calls prepare_write, which will start a new transaction
521** you should not be in a transaction, or have any paths held when you 572 * you should not be in a transaction, or have any paths held when you
522** call this. 573 * call this.
523*/ 574 */
524static int convert_tail_for_hole(struct inode *inode, 575static int convert_tail_for_hole(struct inode *inode,
525 struct buffer_head *bh_result, 576 struct buffer_head *bh_result,
526 loff_t tail_offset) 577 loff_t tail_offset)
@@ -540,9 +591,10 @@ static int convert_tail_for_hole(struct inode *inode,
540 tail_end = (tail_start | (bh_result->b_size - 1)) + 1; 591 tail_end = (tail_start | (bh_result->b_size - 1)) + 1;
541 592
542 index = tail_offset >> PAGE_CACHE_SHIFT; 593 index = tail_offset >> PAGE_CACHE_SHIFT;
543 /* hole_page can be zero in case of direct_io, we are sure 594 /*
544 that we cannot get here if we write with O_DIRECT into 595 * hole_page can be zero in case of direct_io, we are sure
545 tail page */ 596 * that we cannot get here if we write with O_DIRECT into tail page
597 */
546 if (!hole_page || index != hole_page->index) { 598 if (!hole_page || index != hole_page->index) {
547 tail_page = grab_cache_page(inode->i_mapping, index); 599 tail_page = grab_cache_page(inode->i_mapping, index);
548 retval = -ENOMEM; 600 retval = -ENOMEM;
@@ -553,14 +605,15 @@ static int convert_tail_for_hole(struct inode *inode,
553 tail_page = hole_page; 605 tail_page = hole_page;
554 } 606 }
555 607
556 /* we don't have to make sure the conversion did not happen while 608 /*
557 ** we were locking the page because anyone that could convert 609 * we don't have to make sure the conversion did not happen while
558 ** must first take i_mutex. 610 * we were locking the page because anyone that could convert
559 ** 611 * must first take i_mutex.
560 ** We must fix the tail page for writing because it might have buffers 612 *
561 ** that are mapped, but have a block number of 0. This indicates tail 613 * We must fix the tail page for writing because it might have buffers
562 ** data that has been read directly into the page, and 614 * that are mapped, but have a block number of 0. This indicates tail
563 ** __block_write_begin won't trigger a get_block in this case. 615 * data that has been read directly into the page, and
616 * __block_write_begin won't trigger a get_block in this case.
564 */ 617 */
565 fix_tail_page_for_writing(tail_page); 618 fix_tail_page_for_writing(tail_page);
566 retval = __reiserfs_write_begin(tail_page, tail_start, 619 retval = __reiserfs_write_begin(tail_page, tail_start,
@@ -573,12 +626,12 @@ static int convert_tail_for_hole(struct inode *inode,
573 626
574 retval = reiserfs_commit_write(NULL, tail_page, tail_start, tail_end); 627 retval = reiserfs_commit_write(NULL, tail_page, tail_start, tail_end);
575 628
576 unlock: 629unlock:
577 if (tail_page != hole_page) { 630 if (tail_page != hole_page) {
578 unlock_page(tail_page); 631 unlock_page(tail_page);
579 page_cache_release(tail_page); 632 page_cache_release(tail_page);
580 } 633 }
581 out: 634out:
582 return retval; 635 return retval;
583} 636}
584 637
@@ -604,7 +657,8 @@ int reiserfs_get_block(struct inode *inode, sector_t block,
604 struct buffer_head *bh_result, int create) 657 struct buffer_head *bh_result, int create)
605{ 658{
606 int repeat, retval = 0; 659 int repeat, retval = 0;
607 b_blocknr_t allocated_block_nr = 0; // b_blocknr_t is (unsigned) 32 bit int 660 /* b_blocknr_t is (unsigned) 32 bit int*/
661 b_blocknr_t allocated_block_nr = 0;
608 INITIALIZE_PATH(path); 662 INITIALIZE_PATH(path);
609 int pos_in_item; 663 int pos_in_item;
610 struct cpu_key key; 664 struct cpu_key key;
@@ -614,12 +668,14 @@ int reiserfs_get_block(struct inode *inode, sector_t block,
614 int done; 668 int done;
615 int fs_gen; 669 int fs_gen;
616 struct reiserfs_transaction_handle *th = NULL; 670 struct reiserfs_transaction_handle *th = NULL;
617 /* space reserved in transaction batch: 671 /*
618 . 3 balancings in direct->indirect conversion 672 * space reserved in transaction batch:
619 . 1 block involved into reiserfs_update_sd() 673 * . 3 balancings in direct->indirect conversion
620 XXX in practically impossible worst case direct2indirect() 674 * . 1 block involved into reiserfs_update_sd()
621 can incur (much) more than 3 balancings. 675 * XXX in practically impossible worst case direct2indirect()
622 quota update for user, group */ 676 * can incur (much) more than 3 balancings.
677 * quota update for user, group
678 */
623 int jbegin_count = 679 int jbegin_count =
624 JOURNAL_PER_BALANCE_CNT * 3 + 1 + 680 JOURNAL_PER_BALANCE_CNT * 3 + 1 +
625 2 * REISERFS_QUOTA_TRANS_BLOCKS(inode->i_sb); 681 2 * REISERFS_QUOTA_TRANS_BLOCKS(inode->i_sb);
@@ -636,8 +692,9 @@ int reiserfs_get_block(struct inode *inode, sector_t block,
636 return -EFBIG; 692 return -EFBIG;
637 } 693 }
638 694
639 /* if !create, we aren't changing the FS, so we don't need to 695 /*
640 ** log anything, so we don't need to start a transaction 696 * if !create, we aren't changing the FS, so we don't need to
697 * log anything, so we don't need to start a transaction
641 */ 698 */
642 if (!(create & GET_BLOCK_CREATE)) { 699 if (!(create & GET_BLOCK_CREATE)) {
643 int ret; 700 int ret;
@@ -647,6 +704,7 @@ int reiserfs_get_block(struct inode *inode, sector_t block,
647 reiserfs_write_unlock(inode->i_sb); 704 reiserfs_write_unlock(inode->i_sb);
648 return ret; 705 return ret;
649 } 706 }
707
650 /* 708 /*
651 * if we're already in a transaction, make sure to close 709 * if we're already in a transaction, make sure to close
652 * any new transactions we start in this func 710 * any new transactions we start in this func
@@ -655,8 +713,10 @@ int reiserfs_get_block(struct inode *inode, sector_t block,
655 reiserfs_transaction_running(inode->i_sb)) 713 reiserfs_transaction_running(inode->i_sb))
656 dangle = 0; 714 dangle = 0;
657 715
658 /* If file is of such a size, that it might have a tail and tails are enabled 716 /*
659 ** we should mark it as possibly needing tail packing on close 717 * If file is of such a size, that it might have a tail and
718 * tails are enabled we should mark it as possibly needing
719 * tail packing on close
660 */ 720 */
661 if ((have_large_tails(inode->i_sb) 721 if ((have_large_tails(inode->i_sb)
662 && inode->i_size < i_block_size(inode) * 4) 722 && inode->i_size < i_block_size(inode) * 4)
@@ -667,7 +727,7 @@ int reiserfs_get_block(struct inode *inode, sector_t block,
667 /* set the key of the first byte in the 'block'-th block of file */ 727 /* set the key of the first byte in the 'block'-th block of file */
668 make_cpu_key(&key, inode, new_offset, TYPE_ANY, 3 /*key length */ ); 728 make_cpu_key(&key, inode, new_offset, TYPE_ANY, 3 /*key length */ );
669 if ((new_offset + inode->i_sb->s_blocksize - 1) > inode->i_size) { 729 if ((new_offset + inode->i_sb->s_blocksize - 1) > inode->i_size) {
670 start_trans: 730start_trans:
671 th = reiserfs_persistent_transaction(inode->i_sb, jbegin_count); 731 th = reiserfs_persistent_transaction(inode->i_sb, jbegin_count);
672 if (!th) { 732 if (!th) {
673 retval = -ENOMEM; 733 retval = -ENOMEM;
@@ -675,7 +735,7 @@ int reiserfs_get_block(struct inode *inode, sector_t block,
675 } 735 }
676 reiserfs_update_inode_transaction(inode); 736 reiserfs_update_inode_transaction(inode);
677 } 737 }
678 research: 738research:
679 739
680 retval = search_for_position_by_key(inode->i_sb, &key, &path); 740 retval = search_for_position_by_key(inode->i_sb, &key, &path);
681 if (retval == IO_ERROR) { 741 if (retval == IO_ERROR) {
@@ -684,8 +744,8 @@ int reiserfs_get_block(struct inode *inode, sector_t block,
684 } 744 }
685 745
686 bh = get_last_bh(&path); 746 bh = get_last_bh(&path);
687 ih = get_ih(&path); 747 ih = tp_item_head(&path);
688 item = get_item(&path); 748 item = tp_item_body(&path);
689 pos_in_item = path.pos_in_item; 749 pos_in_item = path.pos_in_item;
690 750
691 fs_gen = get_generation(inode->i_sb); 751 fs_gen = get_generation(inode->i_sb);
@@ -703,11 +763,12 @@ int reiserfs_get_block(struct inode *inode, sector_t block,
703 _allocate_block(th, block, inode, &allocated_block_nr, 763 _allocate_block(th, block, inode, &allocated_block_nr,
704 &path, create); 764 &path, create);
705 765
766 /*
767 * restart the transaction to give the journal a chance to free
768 * some blocks. releases the path, so we have to go back to
769 * research if we succeed on the second try
770 */
706 if (repeat == NO_DISK_SPACE || repeat == QUOTA_EXCEEDED) { 771 if (repeat == NO_DISK_SPACE || repeat == QUOTA_EXCEEDED) {
707 /* restart the transaction to give the journal a chance to free
708 ** some blocks. releases the path, so we have to go back to
709 ** research if we succeed on the second try
710 */
711 SB_JOURNAL(inode->i_sb)->j_next_async_flush = 1; 772 SB_JOURNAL(inode->i_sb)->j_next_async_flush = 1;
712 retval = restart_transaction(th, inode, &path); 773 retval = restart_transaction(th, inode, &path);
713 if (retval) 774 if (retval)
@@ -734,9 +795,11 @@ int reiserfs_get_block(struct inode *inode, sector_t block,
734 795
735 if (indirect_item_found(retval, ih)) { 796 if (indirect_item_found(retval, ih)) {
736 b_blocknr_t unfm_ptr; 797 b_blocknr_t unfm_ptr;
737 /* 'block'-th block is in the file already (there is 798 /*
738 corresponding cell in some indirect item). But it may be 799 * 'block'-th block is in the file already (there is
739 zero unformatted node pointer (hole) */ 800 * corresponding cell in some indirect item). But it may be
801 * zero unformatted node pointer (hole)
802 */
740 unfm_ptr = get_block_num(item, pos_in_item); 803 unfm_ptr = get_block_num(item, pos_in_item);
741 if (unfm_ptr == 0) { 804 if (unfm_ptr == 0) {
742 /* use allocated block to plug the hole */ 805 /* use allocated block to plug the hole */
@@ -753,7 +816,7 @@ int reiserfs_get_block(struct inode *inode, sector_t block,
753 reiserfs_add_ordered_list(inode, bh_result); 816 reiserfs_add_ordered_list(inode, bh_result);
754 put_block_num(item, pos_in_item, allocated_block_nr); 817 put_block_num(item, pos_in_item, allocated_block_nr);
755 unfm_ptr = allocated_block_nr; 818 unfm_ptr = allocated_block_nr;
756 journal_mark_dirty(th, inode->i_sb, bh); 819 journal_mark_dirty(th, bh);
757 reiserfs_update_sd(th, inode); 820 reiserfs_update_sd(th, inode);
758 } 821 }
759 set_block_dev_mapped(bh_result, unfm_ptr, inode); 822 set_block_dev_mapped(bh_result, unfm_ptr, inode);
@@ -764,9 +827,10 @@ int reiserfs_get_block(struct inode *inode, sector_t block,
764 827
765 reiserfs_write_unlock(inode->i_sb); 828 reiserfs_write_unlock(inode->i_sb);
766 829
767 /* the item was found, so new blocks were not added to the file 830 /*
768 ** there is no need to make sure the inode is updated with this 831 * the item was found, so new blocks were not added to the file
769 ** transaction 832 * there is no need to make sure the inode is updated with this
833 * transaction
770 */ 834 */
771 return retval; 835 return retval;
772 } 836 }
@@ -776,9 +840,11 @@ int reiserfs_get_block(struct inode *inode, sector_t block,
776 goto start_trans; 840 goto start_trans;
777 } 841 }
778 842
779 /* desired position is not found or is in the direct item. We have 843 /*
780 to append file with holes up to 'block'-th block converting 844 * desired position is not found or is in the direct item. We have
781 direct items to indirect one if necessary */ 845 * to append file with holes up to 'block'-th block converting
846 * direct items to indirect one if necessary
847 */
782 done = 0; 848 done = 0;
783 do { 849 do {
784 if (is_statdata_le_ih(ih)) { 850 if (is_statdata_le_ih(ih)) {
@@ -790,16 +856,18 @@ int reiserfs_get_block(struct inode *inode, sector_t block,
790 TYPE_INDIRECT, UNFM_P_SIZE, 856 TYPE_INDIRECT, UNFM_P_SIZE,
791 0 /* free_space */ ); 857 0 /* free_space */ );
792 858
859 /*
860 * we are going to add 'block'-th block to the file.
861 * Use allocated block for that
862 */
793 if (cpu_key_k_offset(&key) == 1) { 863 if (cpu_key_k_offset(&key) == 1) {
794 /* we are going to add 'block'-th block to the file. Use
795 allocated block for that */
796 unp = cpu_to_le32(allocated_block_nr); 864 unp = cpu_to_le32(allocated_block_nr);
797 set_block_dev_mapped(bh_result, 865 set_block_dev_mapped(bh_result,
798 allocated_block_nr, inode); 866 allocated_block_nr, inode);
799 set_buffer_new(bh_result); 867 set_buffer_new(bh_result);
800 done = 1; 868 done = 1;
801 } 869 }
802 tmp_key = key; // ;) 870 tmp_key = key; /* ;) */
803 set_cpu_key_k_offset(&tmp_key, 1); 871 set_cpu_key_k_offset(&tmp_key, 1);
804 PATH_LAST_POSITION(&path)++; 872 PATH_LAST_POSITION(&path)++;
805 873
@@ -809,9 +877,12 @@ int reiserfs_get_block(struct inode *inode, sector_t block,
809 if (retval) { 877 if (retval) {
810 reiserfs_free_block(th, inode, 878 reiserfs_free_block(th, inode,
811 allocated_block_nr, 1); 879 allocated_block_nr, 1);
812 goto failure; // retval == -ENOSPC, -EDQUOT or -EIO or -EEXIST 880 /*
881 * retval == -ENOSPC, -EDQUOT or -EIO
882 * or -EEXIST
883 */
884 goto failure;
813 } 885 }
814 //mark_tail_converted (inode);
815 } else if (is_direct_le_ih(ih)) { 886 } else if (is_direct_le_ih(ih)) {
816 /* direct item has to be converted */ 887 /* direct item has to be converted */
817 loff_t tail_offset; 888 loff_t tail_offset;
@@ -819,18 +890,24 @@ int reiserfs_get_block(struct inode *inode, sector_t block,
819 tail_offset = 890 tail_offset =
820 ((le_ih_k_offset(ih) - 891 ((le_ih_k_offset(ih) -
821 1) & ~(inode->i_sb->s_blocksize - 1)) + 1; 892 1) & ~(inode->i_sb->s_blocksize - 1)) + 1;
893
894 /*
895 * direct item we just found fits into block we have
896 * to map. Convert it into unformatted node: use
897 * bh_result for the conversion
898 */
822 if (tail_offset == cpu_key_k_offset(&key)) { 899 if (tail_offset == cpu_key_k_offset(&key)) {
823 /* direct item we just found fits into block we have
824 to map. Convert it into unformatted node: use
825 bh_result for the conversion */
826 set_block_dev_mapped(bh_result, 900 set_block_dev_mapped(bh_result,
827 allocated_block_nr, inode); 901 allocated_block_nr, inode);
828 unbh = bh_result; 902 unbh = bh_result;
829 done = 1; 903 done = 1;
830 } else { 904 } else {
831 /* we have to padd file tail stored in direct item(s) 905 /*
832 up to block size and convert it to unformatted 906 * we have to pad file tail stored in direct
833 node. FIXME: this should also get into page cache */ 907 * item(s) up to block size and convert it
908 * to unformatted node. FIXME: this should
909 * also get into page cache
910 */
834 911
835 pathrelse(&path); 912 pathrelse(&path);
836 /* 913 /*
@@ -859,7 +936,10 @@ int reiserfs_get_block(struct inode *inode, sector_t block,
859 inode->i_ino, 936 inode->i_ino,
860 retval); 937 retval);
861 if (allocated_block_nr) { 938 if (allocated_block_nr) {
862 /* the bitmap, the super, and the stat data == 3 */ 939 /*
940 * the bitmap, the super,
941 * and the stat data == 3
942 */
863 if (!th) 943 if (!th)
864 th = reiserfs_persistent_transaction(inode->i_sb, 3); 944 th = reiserfs_persistent_transaction(inode->i_sb, 3);
865 if (th) 945 if (th)
@@ -881,43 +961,57 @@ int reiserfs_get_block(struct inode *inode, sector_t block,
881 allocated_block_nr, 1); 961 allocated_block_nr, 1);
882 goto failure; 962 goto failure;
883 } 963 }
884 /* it is important the set_buffer_uptodate is done after 964 /*
885 ** the direct2indirect. The buffer might contain valid 965 * it is important the set_buffer_uptodate is done
886 ** data newer than the data on disk (read by readpage, changed, 966 * after the direct2indirect. The buffer might
887 ** and then sent here by writepage). direct2indirect needs 967 * contain valid data newer than the data on disk
888 ** to know if unbh was already up to date, so it can decide 968 * (read by readpage, changed, and then sent here by
889 ** if the data in unbh needs to be replaced with data from 969 * writepage). direct2indirect needs to know if unbh
890 ** the disk 970 * was already up to date, so it can decide if the
971 * data in unbh needs to be replaced with data from
972 * the disk
891 */ 973 */
892 set_buffer_uptodate(unbh); 974 set_buffer_uptodate(unbh);
893 975
894 /* unbh->b_page == NULL in case of DIRECT_IO request, this means 976 /*
895 buffer will disappear shortly, so it should not be added to 977 * unbh->b_page == NULL in case of DIRECT_IO request,
978 * this means buffer will disappear shortly, so it
979 * should not be added to
896 */ 980 */
897 if (unbh->b_page) { 981 if (unbh->b_page) {
898 /* we've converted the tail, so we must 982 /*
899 ** flush unbh before the transaction commits 983 * we've converted the tail, so we must
984 * flush unbh before the transaction commits
900 */ 985 */
901 reiserfs_add_tail_list(inode, unbh); 986 reiserfs_add_tail_list(inode, unbh);
902 987
903 /* mark it dirty now to prevent commit_write from adding 988 /*
904 ** this buffer to the inode's dirty buffer list 989 * mark it dirty now to prevent commit_write
990 * from adding this buffer to the inode's
991 * dirty buffer list
905 */ 992 */
906 /* 993 /*
907 * AKPM: changed __mark_buffer_dirty to mark_buffer_dirty(). 994 * AKPM: changed __mark_buffer_dirty to
908 * It's still atomic, but it sets the page dirty too, 995 * mark_buffer_dirty(). It's still atomic,
909 * which makes it eligible for writeback at any time by the 996 * but it sets the page dirty too, which makes
910 * VM (which was also the case with __mark_buffer_dirty()) 997 * it eligible for writeback at any time by the
998 * VM (which was also the case with
999 * __mark_buffer_dirty())
911 */ 1000 */
912 mark_buffer_dirty(unbh); 1001 mark_buffer_dirty(unbh);
913 } 1002 }
914 } else { 1003 } else {
915 /* append indirect item with holes if needed, when appending 1004 /*
916 pointer to 'block'-th block use block, which is already 1005 * append indirect item with holes if needed, when
917 allocated */ 1006 * appending pointer to 'block'-th block use block,
1007 * which is already allocated
1008 */
918 struct cpu_key tmp_key; 1009 struct cpu_key tmp_key;
919 unp_t unf_single = 0; // We use this in case we need to allocate only 1010 /*
920 // one block which is a fastpath 1011 * We use this in case we need to allocate
1012 * only one block which is a fastpath
1013 */
1014 unp_t unf_single = 0;
921 unp_t *un; 1015 unp_t *un;
922 __u64 max_to_insert = 1016 __u64 max_to_insert =
923 MAX_ITEM_LEN(inode->i_sb->s_blocksize) / 1017 MAX_ITEM_LEN(inode->i_sb->s_blocksize) /
@@ -926,14 +1020,17 @@ int reiserfs_get_block(struct inode *inode, sector_t block,
926 1020
927 RFALSE(pos_in_item != ih_item_len(ih) / UNFM_P_SIZE, 1021 RFALSE(pos_in_item != ih_item_len(ih) / UNFM_P_SIZE,
928 "vs-804: invalid position for append"); 1022 "vs-804: invalid position for append");
929 /* indirect item has to be appended, set up key of that position */ 1023 /*
1024 * indirect item has to be appended,
1025 * set up key of that position
1026 * (key type is unimportant)
1027 */
930 make_cpu_key(&tmp_key, inode, 1028 make_cpu_key(&tmp_key, inode,
931 le_key_k_offset(version, 1029 le_key_k_offset(version,
932 &(ih->ih_key)) + 1030 &ih->ih_key) +
933 op_bytes_number(ih, 1031 op_bytes_number(ih,
934 inode->i_sb->s_blocksize), 1032 inode->i_sb->s_blocksize),
935 //pos_in_item * inode->i_sb->s_blocksize, 1033 TYPE_INDIRECT, 3);
936 TYPE_INDIRECT, 3); // key type is unimportant
937 1034
938 RFALSE(cpu_key_k_offset(&tmp_key) > cpu_key_k_offset(&key), 1035 RFALSE(cpu_key_k_offset(&tmp_key) > cpu_key_k_offset(&key),
939 "green-805: invalid offset"); 1036 "green-805: invalid offset");
@@ -954,8 +1051,10 @@ int reiserfs_get_block(struct inode *inode, sector_t block,
954 } 1051 }
955 } 1052 }
956 if (blocks_needed <= max_to_insert) { 1053 if (blocks_needed <= max_to_insert) {
957 /* we are going to add target block to the file. Use allocated 1054 /*
958 block for that */ 1055 * we are going to add target block to
1056 * the file. Use allocated block for that
1057 */
959 un[blocks_needed - 1] = 1058 un[blocks_needed - 1] =
960 cpu_to_le32(allocated_block_nr); 1059 cpu_to_le32(allocated_block_nr);
961 set_block_dev_mapped(bh_result, 1060 set_block_dev_mapped(bh_result,
@@ -964,8 +1063,11 @@ int reiserfs_get_block(struct inode *inode, sector_t block,
964 done = 1; 1063 done = 1;
965 } else { 1064 } else {
966 /* paste hole to the indirect item */ 1065 /* paste hole to the indirect item */
967 /* If kmalloc failed, max_to_insert becomes zero and it means we 1066 /*
968 only have space for one block */ 1067 * If kmalloc failed, max_to_insert becomes
1068 * zero and it means we only have space for
1069 * one block
1070 */
969 blocks_needed = 1071 blocks_needed =
970 max_to_insert ? max_to_insert : 1; 1072 max_to_insert ? max_to_insert : 1;
971 } 1073 }
@@ -984,9 +1086,12 @@ int reiserfs_get_block(struct inode *inode, sector_t block,
984 goto failure; 1086 goto failure;
985 } 1087 }
986 if (!done) { 1088 if (!done) {
987 /* We need to mark new file size in case this function will be 1089 /*
988 interrupted/aborted later on. And we may do this only for 1090 * We need to mark new file size in case
989 holes. */ 1091 * this function will be interrupted/aborted
1092 * later on. And we may do this only for
1093 * holes.
1094 */
990 inode->i_size += 1095 inode->i_size +=
991 inode->i_sb->s_blocksize * blocks_needed; 1096 inode->i_sb->s_blocksize * blocks_needed;
992 } 1097 }
@@ -995,13 +1100,13 @@ int reiserfs_get_block(struct inode *inode, sector_t block,
995 if (done == 1) 1100 if (done == 1)
996 break; 1101 break;
997 1102
998 /* this loop could log more blocks than we had originally asked 1103 /*
999 ** for. So, we have to allow the transaction to end if it is 1104 * this loop could log more blocks than we had originally
1000 ** too big or too full. Update the inode so things are 1105 * asked for. So, we have to allow the transaction to end
1001 ** consistent if we crash before the function returns 1106 * if it is too big or too full. Update the inode so things
1002 ** 1107 * are consistent if we crash before the function returns
1003 ** release the path so that anybody waiting on the path before 1108 * release the path so that anybody waiting on the path before
1004 ** ending their transaction will be able to continue. 1109 * ending their transaction will be able to continue.
1005 */ 1110 */
1006 if (journal_transaction_should_end(th, th->t_blocks_allocated)) { 1111 if (journal_transaction_should_end(th, th->t_blocks_allocated)) {
1007 retval = restart_transaction(th, inode, &path); 1112 retval = restart_transaction(th, inode, &path);
@@ -1031,14 +1136,14 @@ int reiserfs_get_block(struct inode *inode, sector_t block,
1031 goto failure; 1136 goto failure;
1032 } 1137 }
1033 bh = get_last_bh(&path); 1138 bh = get_last_bh(&path);
1034 ih = get_ih(&path); 1139 ih = tp_item_head(&path);
1035 item = get_item(&path); 1140 item = tp_item_body(&path);
1036 pos_in_item = path.pos_in_item; 1141 pos_in_item = path.pos_in_item;
1037 } while (1); 1142 } while (1);
1038 1143
1039 retval = 0; 1144 retval = 0;
1040 1145
1041 failure: 1146failure:
1042 if (th && (!dangle || (retval && !th->t_trans_id))) { 1147 if (th && (!dangle || (retval && !th->t_trans_id))) {
1043 int err; 1148 int err;
1044 if (th->t_trans_id) 1149 if (th->t_trans_id)
@@ -1060,8 +1165,10 @@ reiserfs_readpages(struct file *file, struct address_space *mapping,
1060 return mpage_readpages(mapping, pages, nr_pages, reiserfs_get_block); 1165 return mpage_readpages(mapping, pages, nr_pages, reiserfs_get_block);
1061} 1166}
1062 1167
1063/* Compute real number of used bytes by file 1168/*
1064 * Following three functions can go away when we'll have enough space in stat item 1169 * Compute real number of used bytes by file
1170 * Following three functions can go away when we'll have enough space in
1171 * stat item
1065 */ 1172 */
1066static int real_space_diff(struct inode *inode, int sd_size) 1173static int real_space_diff(struct inode *inode, int sd_size)
1067{ 1174{
@@ -1071,13 +1178,14 @@ static int real_space_diff(struct inode *inode, int sd_size)
1071 if (S_ISLNK(inode->i_mode) || S_ISDIR(inode->i_mode)) 1178 if (S_ISLNK(inode->i_mode) || S_ISDIR(inode->i_mode))
1072 return sd_size; 1179 return sd_size;
1073 1180
1074 /* End of file is also in full block with indirect reference, so round 1181 /*
1075 ** up to the next block. 1182 * End of file is also in full block with indirect reference, so round
1076 ** 1183 * up to the next block.
1077 ** there is just no way to know if the tail is actually packed 1184 *
1078 ** on the file, so we have to assume it isn't. When we pack the 1185 * there is just no way to know if the tail is actually packed
1079 ** tail, we add 4 bytes to pretend there really is an unformatted 1186 * on the file, so we have to assume it isn't. When we pack the
1080 ** node pointer 1187 * tail, we add 4 bytes to pretend there really is an unformatted
1188 * node pointer
1081 */ 1189 */
1082 bytes = 1190 bytes =
1083 ((inode->i_size + 1191 ((inode->i_size +
@@ -1108,36 +1216,36 @@ static inline ulong to_fake_used_blocks(struct inode *inode, int sd_size)
1108 bytes += (loff_t) 511; 1216 bytes += (loff_t) 511;
1109 } 1217 }
1110 1218
1111 /* files from before the quota patch might i_blocks such that 1219 /*
1112 ** bytes < real_space. Deal with that here to prevent it from 1220 * files from before the quota patch might i_blocks such that
1113 ** going negative. 1221 * bytes < real_space. Deal with that here to prevent it from
1222 * going negative.
1114 */ 1223 */
1115 if (bytes < real_space) 1224 if (bytes < real_space)
1116 return 0; 1225 return 0;
1117 return (bytes - real_space) >> 9; 1226 return (bytes - real_space) >> 9;
1118} 1227}
1119 1228
1120// 1229/*
1121// BAD: new directories have stat data of new type and all other items 1230 * BAD: new directories have stat data of new type and all other items
1122// of old type. Version stored in the inode says about body items, so 1231 * of old type. Version stored in the inode says about body items, so
1123// in update_stat_data we can not rely on inode, but have to check 1232 * in update_stat_data we can not rely on inode, but have to check
1124// item version directly 1233 * item version directly
1125// 1234 */
1126 1235
1127// called by read_locked_inode 1236/* called by read_locked_inode */
1128static void init_inode(struct inode *inode, struct treepath *path) 1237static void init_inode(struct inode *inode, struct treepath *path)
1129{ 1238{
1130 struct buffer_head *bh; 1239 struct buffer_head *bh;
1131 struct item_head *ih; 1240 struct item_head *ih;
1132 __u32 rdev; 1241 __u32 rdev;
1133 //int version = ITEM_VERSION_1;
1134 1242
1135 bh = PATH_PLAST_BUFFER(path); 1243 bh = PATH_PLAST_BUFFER(path);
1136 ih = PATH_PITEM_HEAD(path); 1244 ih = tp_item_head(path);
1137 1245
1138 copy_key(INODE_PKEY(inode), &(ih->ih_key)); 1246 copy_key(INODE_PKEY(inode), &ih->ih_key);
1139 1247
1140 INIT_LIST_HEAD(&(REISERFS_I(inode)->i_prealloc_list)); 1248 INIT_LIST_HEAD(&REISERFS_I(inode)->i_prealloc_list);
1141 REISERFS_I(inode)->i_flags = 0; 1249 REISERFS_I(inode)->i_flags = 0;
1142 REISERFS_I(inode)->i_prealloc_block = 0; 1250 REISERFS_I(inode)->i_prealloc_block = 0;
1143 REISERFS_I(inode)->i_prealloc_count = 0; 1251 REISERFS_I(inode)->i_prealloc_count = 0;
@@ -1147,7 +1255,7 @@ static void init_inode(struct inode *inode, struct treepath *path)
1147 1255
1148 if (stat_data_v1(ih)) { 1256 if (stat_data_v1(ih)) {
1149 struct stat_data_v1 *sd = 1257 struct stat_data_v1 *sd =
1150 (struct stat_data_v1 *)B_I_PITEM(bh, ih); 1258 (struct stat_data_v1 *)ih_item_body(bh, ih);
1151 unsigned long blocks; 1259 unsigned long blocks;
1152 1260
1153 set_inode_item_key_version(inode, KEY_FORMAT_3_5); 1261 set_inode_item_key_version(inode, KEY_FORMAT_3_5);
@@ -1168,20 +1276,26 @@ static void init_inode(struct inode *inode, struct treepath *path)
1168 inode->i_generation = le32_to_cpu(INODE_PKEY(inode)->k_dir_id); 1276 inode->i_generation = le32_to_cpu(INODE_PKEY(inode)->k_dir_id);
1169 blocks = (inode->i_size + 511) >> 9; 1277 blocks = (inode->i_size + 511) >> 9;
1170 blocks = _ROUND_UP(blocks, inode->i_sb->s_blocksize >> 9); 1278 blocks = _ROUND_UP(blocks, inode->i_sb->s_blocksize >> 9);
1279
1280 /*
1281 * there was a bug in <=3.5.23 when i_blocks could take
1282 * negative values. Starting from 3.5.17 this value could
1283 * even be stored in stat data. For such files we set
1284 * i_blocks based on file size. Just 2 notes: this can be
1285 * wrong for sparse files. On-disk value will be only
1286 * updated if file's inode will ever change
1287 */
1171 if (inode->i_blocks > blocks) { 1288 if (inode->i_blocks > blocks) {
1172 // there was a bug in <=3.5.23 when i_blocks could take negative
1173 // values. Starting from 3.5.17 this value could even be stored in
1174 // stat data. For such files we set i_blocks based on file
1175 // size. Just 2 notes: this can be wrong for sparce files. On-disk value will be
1176 // only updated if file's inode will ever change
1177 inode->i_blocks = blocks; 1289 inode->i_blocks = blocks;
1178 } 1290 }
1179 1291
1180 rdev = sd_v1_rdev(sd); 1292 rdev = sd_v1_rdev(sd);
1181 REISERFS_I(inode)->i_first_direct_byte = 1293 REISERFS_I(inode)->i_first_direct_byte =
1182 sd_v1_first_direct_byte(sd); 1294 sd_v1_first_direct_byte(sd);
1183 /* an early bug in the quota code can give us an odd number for the 1295
1184 ** block count. This is incorrect, fix it here. 1296 /*
1297 * an early bug in the quota code can give us an odd
1298 * number for the block count. This is incorrect, fix it here.
1185 */ 1299 */
1186 if (inode->i_blocks & 1) { 1300 if (inode->i_blocks & 1) {
1187 inode->i_blocks++; 1301 inode->i_blocks++;
@@ -1189,13 +1303,17 @@ static void init_inode(struct inode *inode, struct treepath *path)
1189 inode_set_bytes(inode, 1303 inode_set_bytes(inode,
1190 to_real_used_space(inode, inode->i_blocks, 1304 to_real_used_space(inode, inode->i_blocks,
1191 SD_V1_SIZE)); 1305 SD_V1_SIZE));
1192 /* nopack is initially zero for v1 objects. For v2 objects, 1306 /*
1193 nopack is initialised from sd_attrs */ 1307 * nopack is initially zero for v1 objects. For v2 objects,
1308 * nopack is initialised from sd_attrs
1309 */
1194 REISERFS_I(inode)->i_flags &= ~i_nopack_mask; 1310 REISERFS_I(inode)->i_flags &= ~i_nopack_mask;
1195 } else { 1311 } else {
1196 // new stat data found, but object may have old items 1312 /*
1197 // (directories and symlinks) 1313 * new stat data found, but object may have old items
1198 struct stat_data *sd = (struct stat_data *)B_I_PITEM(bh, ih); 1314 * (directories and symlinks)
1315 */
1316 struct stat_data *sd = (struct stat_data *)ih_item_body(bh, ih);
1199 1317
1200 inode->i_mode = sd_v2_mode(sd); 1318 inode->i_mode = sd_v2_mode(sd);
1201 set_nlink(inode, sd_v2_nlink(sd)); 1319 set_nlink(inode, sd_v2_nlink(sd));
@@ -1225,8 +1343,10 @@ static void init_inode(struct inode *inode, struct treepath *path)
1225 inode_set_bytes(inode, 1343 inode_set_bytes(inode,
1226 to_real_used_space(inode, inode->i_blocks, 1344 to_real_used_space(inode, inode->i_blocks,
1227 SD_V2_SIZE)); 1345 SD_V2_SIZE));
1228 /* read persistent inode attributes from sd and initialise 1346 /*
1229 generic inode flags from them */ 1347 * read persistent inode attributes from sd and initialise
1348 * generic inode flags from them
1349 */
1230 REISERFS_I(inode)->i_attrs = sd_v2_attrs(sd); 1350 REISERFS_I(inode)->i_attrs = sd_v2_attrs(sd);
1231 sd_attrs_to_i_attrs(sd_v2_attrs(sd), inode); 1351 sd_attrs_to_i_attrs(sd_v2_attrs(sd), inode);
1232 } 1352 }
@@ -1249,7 +1369,7 @@ static void init_inode(struct inode *inode, struct treepath *path)
1249 } 1369 }
1250} 1370}
1251 1371
1252// update new stat data with inode fields 1372/* update new stat data with inode fields */
1253static void inode2sd(void *sd, struct inode *inode, loff_t size) 1373static void inode2sd(void *sd, struct inode *inode, loff_t size)
1254{ 1374{
1255 struct stat_data *sd_v2 = (struct stat_data *)sd; 1375 struct stat_data *sd_v2 = (struct stat_data *)sd;
@@ -1273,7 +1393,7 @@ static void inode2sd(void *sd, struct inode *inode, loff_t size)
1273 set_sd_v2_attrs(sd_v2, flags); 1393 set_sd_v2_attrs(sd_v2, flags);
1274} 1394}
1275 1395
1276// used to copy inode's fields to old stat data 1396/* used to copy inode's fields to old stat data */
1277static void inode2sd_v1(void *sd, struct inode *inode, loff_t size) 1397static void inode2sd_v1(void *sd, struct inode *inode, loff_t size)
1278{ 1398{
1279 struct stat_data_v1 *sd_v1 = (struct stat_data_v1 *)sd; 1399 struct stat_data_v1 *sd_v1 = (struct stat_data_v1 *)sd;
@@ -1292,14 +1412,15 @@ static void inode2sd_v1(void *sd, struct inode *inode, loff_t size)
1292 else 1412 else
1293 set_sd_v1_blocks(sd_v1, to_fake_used_blocks(inode, SD_V1_SIZE)); 1413 set_sd_v1_blocks(sd_v1, to_fake_used_blocks(inode, SD_V1_SIZE));
1294 1414
1295 // Sigh. i_first_direct_byte is back 1415 /* Sigh. i_first_direct_byte is back */
1296 set_sd_v1_first_direct_byte(sd_v1, 1416 set_sd_v1_first_direct_byte(sd_v1,
1297 REISERFS_I(inode)->i_first_direct_byte); 1417 REISERFS_I(inode)->i_first_direct_byte);
1298} 1418}
1299 1419
1300/* NOTE, you must prepare the buffer head before sending it here, 1420/*
1301** and then log it after the call 1421 * NOTE, you must prepare the buffer head before sending it here,
1302*/ 1422 * and then log it after the call
1423 */
1303static void update_stat_data(struct treepath *path, struct inode *inode, 1424static void update_stat_data(struct treepath *path, struct inode *inode,
1304 loff_t size) 1425 loff_t size)
1305{ 1426{
@@ -1307,17 +1428,17 @@ static void update_stat_data(struct treepath *path, struct inode *inode,
1307 struct item_head *ih; 1428 struct item_head *ih;
1308 1429
1309 bh = PATH_PLAST_BUFFER(path); 1430 bh = PATH_PLAST_BUFFER(path);
1310 ih = PATH_PITEM_HEAD(path); 1431 ih = tp_item_head(path);
1311 1432
1312 if (!is_statdata_le_ih(ih)) 1433 if (!is_statdata_le_ih(ih))
1313 reiserfs_panic(inode->i_sb, "vs-13065", "key %k, found item %h", 1434 reiserfs_panic(inode->i_sb, "vs-13065", "key %k, found item %h",
1314 INODE_PKEY(inode), ih); 1435 INODE_PKEY(inode), ih);
1315 1436
1437 /* path points to old stat data */
1316 if (stat_data_v1(ih)) { 1438 if (stat_data_v1(ih)) {
1317 // path points to old stat data 1439 inode2sd_v1(ih_item_body(bh, ih), inode, size);
1318 inode2sd_v1(B_I_PITEM(bh, ih), inode, size);
1319 } else { 1440 } else {
1320 inode2sd(B_I_PITEM(bh, ih), inode, size); 1441 inode2sd(ih_item_body(bh, ih), inode, size);
1321 } 1442 }
1322 1443
1323 return; 1444 return;
@@ -1335,7 +1456,8 @@ void reiserfs_update_sd_size(struct reiserfs_transaction_handle *th,
1335 1456
1336 BUG_ON(!th->t_trans_id); 1457 BUG_ON(!th->t_trans_id);
1337 1458
1338 make_cpu_key(&key, inode, SD_OFFSET, TYPE_STAT_DATA, 3); //key type is unimportant 1459 /* key type is unimportant */
1460 make_cpu_key(&key, inode, SD_OFFSET, TYPE_STAT_DATA, 3);
1339 1461
1340 for (;;) { 1462 for (;;) {
1341 int pos; 1463 int pos;
@@ -1363,45 +1485,48 @@ void reiserfs_update_sd_size(struct reiserfs_transaction_handle *th,
1363 return; 1485 return;
1364 } 1486 }
1365 1487
1366 /* sigh, prepare_for_journal might schedule. When it schedules the 1488 /*
1367 ** FS might change. We have to detect that, and loop back to the 1489 * sigh, prepare_for_journal might schedule. When it
1368 ** search if the stat data item has moved 1490 * schedules the FS might change. We have to detect that,
1491 * and loop back to the search if the stat data item has moved
1369 */ 1492 */
1370 bh = get_last_bh(&path); 1493 bh = get_last_bh(&path);
1371 ih = get_ih(&path); 1494 ih = tp_item_head(&path);
1372 copy_item_head(&tmp_ih, ih); 1495 copy_item_head(&tmp_ih, ih);
1373 fs_gen = get_generation(inode->i_sb); 1496 fs_gen = get_generation(inode->i_sb);
1374 reiserfs_prepare_for_journal(inode->i_sb, bh, 1); 1497 reiserfs_prepare_for_journal(inode->i_sb, bh, 1);
1498
1499 /* Stat_data item has been moved after scheduling. */
1375 if (fs_changed(fs_gen, inode->i_sb) 1500 if (fs_changed(fs_gen, inode->i_sb)
1376 && item_moved(&tmp_ih, &path)) { 1501 && item_moved(&tmp_ih, &path)) {
1377 reiserfs_restore_prepared_buffer(inode->i_sb, bh); 1502 reiserfs_restore_prepared_buffer(inode->i_sb, bh);
1378 continue; /* Stat_data item has been moved after scheduling. */ 1503 continue;
1379 } 1504 }
1380 break; 1505 break;
1381 } 1506 }
1382 update_stat_data(&path, inode, size); 1507 update_stat_data(&path, inode, size);
1383 journal_mark_dirty(th, th->t_super, bh); 1508 journal_mark_dirty(th, bh);
1384 pathrelse(&path); 1509 pathrelse(&path);
1385 return; 1510 return;
1386} 1511}
1387 1512
1388/* reiserfs_read_locked_inode is called to read the inode off disk, and it 1513/*
1389** does a make_bad_inode when things go wrong. But, we need to make sure 1514 * reiserfs_read_locked_inode is called to read the inode off disk, and it
1390** and clear the key in the private portion of the inode, otherwise a 1515 * does a make_bad_inode when things go wrong. But, we need to make sure
1391** corresponding iput might try to delete whatever object the inode last 1516 * and clear the key in the private portion of the inode, otherwise a
1392** represented. 1517 * corresponding iput might try to delete whatever object the inode last
1393*/ 1518 * represented.
1519 */
1394static void reiserfs_make_bad_inode(struct inode *inode) 1520static void reiserfs_make_bad_inode(struct inode *inode)
1395{ 1521{
1396 memset(INODE_PKEY(inode), 0, KEY_SIZE); 1522 memset(INODE_PKEY(inode), 0, KEY_SIZE);
1397 make_bad_inode(inode); 1523 make_bad_inode(inode);
1398} 1524}
1399 1525
1400// 1526/*
1401// initially this function was derived from minix or ext2's analog and 1527 * initially this function was derived from minix or ext2's analog and
1402// evolved as the prototype did 1528 * evolved as the prototype did
1403// 1529 */
1404
1405int reiserfs_init_locked_inode(struct inode *inode, void *p) 1530int reiserfs_init_locked_inode(struct inode *inode, void *p)
1406{ 1531{
1407 struct reiserfs_iget_args *args = (struct reiserfs_iget_args *)p; 1532 struct reiserfs_iget_args *args = (struct reiserfs_iget_args *)p;
@@ -1410,8 +1535,10 @@ int reiserfs_init_locked_inode(struct inode *inode, void *p)
1410 return 0; 1535 return 0;
1411} 1536}
1412 1537
1413/* looks for stat data in the tree, and fills up the fields of in-core 1538/*
1414 inode stat data fields */ 1539 * looks for stat data in the tree, and fills up the fields of in-core
1540 * inode stat data fields
1541 */
1415void reiserfs_read_locked_inode(struct inode *inode, 1542void reiserfs_read_locked_inode(struct inode *inode,
1416 struct reiserfs_iget_args *args) 1543 struct reiserfs_iget_args *args)
1417{ 1544{
@@ -1422,8 +1549,10 @@ void reiserfs_read_locked_inode(struct inode *inode,
1422 1549
1423 dirino = args->dirid; 1550 dirino = args->dirid;
1424 1551
1425 /* set version 1, version 2 could be used too, because stat data 1552 /*
1426 key is the same in both versions */ 1553 * set version 1, version 2 could be used too, because stat data
1554 * key is the same in both versions
1555 */
1427 key.version = KEY_FORMAT_3_5; 1556 key.version = KEY_FORMAT_3_5;
1428 key.on_disk_key.k_dir_id = dirino; 1557 key.on_disk_key.k_dir_id = dirino;
1429 key.on_disk_key.k_objectid = inode->i_ino; 1558 key.on_disk_key.k_objectid = inode->i_ino;
@@ -1439,8 +1568,9 @@ void reiserfs_read_locked_inode(struct inode *inode,
1439 reiserfs_make_bad_inode(inode); 1568 reiserfs_make_bad_inode(inode);
1440 return; 1569 return;
1441 } 1570 }
1571
1572 /* a stale NFS handle can trigger this without it being an error */
1442 if (retval != ITEM_FOUND) { 1573 if (retval != ITEM_FOUND) {
1443 /* a stale NFS handle can trigger this without it being an error */
1444 pathrelse(&path_to_sd); 1574 pathrelse(&path_to_sd);
1445 reiserfs_make_bad_inode(inode); 1575 reiserfs_make_bad_inode(inode);
1446 clear_nlink(inode); 1576 clear_nlink(inode);
@@ -1449,20 +1579,25 @@ void reiserfs_read_locked_inode(struct inode *inode,
1449 1579
1450 init_inode(inode, &path_to_sd); 1580 init_inode(inode, &path_to_sd);
1451 1581
1452 /* It is possible that knfsd is trying to access inode of a file 1582 /*
1453 that is being removed from the disk by some other thread. As we 1583 * It is possible that knfsd is trying to access inode of a file
1454 update sd on unlink all that is required is to check for nlink 1584 * that is being removed from the disk by some other thread. As we
1455 here. This bug was first found by Sizif when debugging 1585 * update sd on unlink all that is required is to check for nlink
1456 SquidNG/Butterfly, forgotten, and found again after Philippe 1586 * here. This bug was first found by Sizif when debugging
1457 Gramoulle <philippe.gramoulle@mmania.com> reproduced it. 1587 * SquidNG/Butterfly, forgotten, and found again after Philippe
1458 1588 * Gramoulle <philippe.gramoulle@mmania.com> reproduced it.
1459 More logical fix would require changes in fs/inode.c:iput() to 1589
1460 remove inode from hash-table _after_ fs cleaned disk stuff up and 1590 * More logical fix would require changes in fs/inode.c:iput() to
1461 in iget() to return NULL if I_FREEING inode is found in 1591 * remove inode from hash-table _after_ fs cleaned disk stuff up and
1462 hash-table. */ 1592 * in iget() to return NULL if I_FREEING inode is found in
1463 /* Currently there is one place where it's ok to meet inode with 1593 * hash-table.
1464 nlink==0: processing of open-unlinked and half-truncated files 1594 */
1465 during mount (fs/reiserfs/super.c:finish_unfinished()). */ 1595
1596 /*
1597 * Currently there is one place where it's ok to meet inode with
1598 * nlink==0: processing of open-unlinked and half-truncated files
1599 * during mount (fs/reiserfs/super.c:finish_unfinished()).
1600 */
1466 if ((inode->i_nlink == 0) && 1601 if ((inode->i_nlink == 0) &&
1467 !REISERFS_SB(inode->i_sb)->s_is_unlinked_ok) { 1602 !REISERFS_SB(inode->i_sb)->s_is_unlinked_ok) {
1468 reiserfs_warning(inode->i_sb, "vs-13075", 1603 reiserfs_warning(inode->i_sb, "vs-13075",
@@ -1472,7 +1607,8 @@ void reiserfs_read_locked_inode(struct inode *inode,
1472 reiserfs_make_bad_inode(inode); 1607 reiserfs_make_bad_inode(inode);
1473 } 1608 }
1474 1609
1475 reiserfs_check_path(&path_to_sd); /* init inode should be relsing */ 1610 /* init inode should be relsing */
1611 reiserfs_check_path(&path_to_sd);
1476 1612
1477 /* 1613 /*
1478 * Stat data v1 doesn't support ACLs. 1614 * Stat data v1 doesn't support ACLs.
@@ -1481,7 +1617,7 @@ void reiserfs_read_locked_inode(struct inode *inode,
1481 cache_no_acl(inode); 1617 cache_no_acl(inode);
1482} 1618}
1483 1619
1484/** 1620/*
1485 * reiserfs_find_actor() - "find actor" reiserfs supplies to iget5_locked(). 1621 * reiserfs_find_actor() - "find actor" reiserfs supplies to iget5_locked().
1486 * 1622 *
1487 * @inode: inode from hash table to check 1623 * @inode: inode from hash table to check
@@ -1556,7 +1692,8 @@ static struct dentry *reiserfs_get_dentry(struct super_block *sb,
1556struct dentry *reiserfs_fh_to_dentry(struct super_block *sb, struct fid *fid, 1692struct dentry *reiserfs_fh_to_dentry(struct super_block *sb, struct fid *fid,
1557 int fh_len, int fh_type) 1693 int fh_len, int fh_type)
1558{ 1694{
1559 /* fhtype happens to reflect the number of u32s encoded. 1695 /*
1696 * fhtype happens to reflect the number of u32s encoded.
1560 * due to a bug in earlier code, fhtype might indicate there 1697 * due to a bug in earlier code, fhtype might indicate there
1561 * are more u32s then actually fitted. 1698 * are more u32s then actually fitted.
1562 * so if fhtype seems to be more than len, reduce fhtype. 1699 * so if fhtype seems to be more than len, reduce fhtype.
@@ -1625,13 +1762,16 @@ int reiserfs_encode_fh(struct inode *inode, __u32 * data, int *lenp,
1625 return *lenp; 1762 return *lenp;
1626} 1763}
1627 1764
1628/* looks for stat data, then copies fields to it, marks the buffer 1765/*
1629 containing stat data as dirty */ 1766 * looks for stat data, then copies fields to it, marks the buffer
1630/* reiserfs inodes are never really dirty, since the dirty inode call 1767 * containing stat data as dirty
1631** always logs them. This call allows the VFS inode marking routines 1768 */
1632** to properly mark inodes for datasync and such, but only actually 1769/*
1633** does something when called for a synchronous update. 1770 * reiserfs inodes are never really dirty, since the dirty inode call
1634*/ 1771 * always logs them. This call allows the VFS inode marking routines
1772 * to properly mark inodes for datasync and such, but only actually
1773 * does something when called for a synchronous update.
1774 */
1635int reiserfs_write_inode(struct inode *inode, struct writeback_control *wbc) 1775int reiserfs_write_inode(struct inode *inode, struct writeback_control *wbc)
1636{ 1776{
1637 struct reiserfs_transaction_handle th; 1777 struct reiserfs_transaction_handle th;
@@ -1639,24 +1779,28 @@ int reiserfs_write_inode(struct inode *inode, struct writeback_control *wbc)
1639 1779
1640 if (inode->i_sb->s_flags & MS_RDONLY) 1780 if (inode->i_sb->s_flags & MS_RDONLY)
1641 return -EROFS; 1781 return -EROFS;
1642 /* memory pressure can sometimes initiate write_inode calls with sync == 1, 1782 /*
1643 ** these cases are just when the system needs ram, not when the 1783 * memory pressure can sometimes initiate write_inode calls with
1644 ** inode needs to reach disk for safety, and they can safely be 1784 * sync == 1,
1645 ** ignored because the altered inode has already been logged. 1785 * these cases are just when the system needs ram, not when the
1786 * inode needs to reach disk for safety, and they can safely be
1787 * ignored because the altered inode has already been logged.
1646 */ 1788 */
1647 if (wbc->sync_mode == WB_SYNC_ALL && !(current->flags & PF_MEMALLOC)) { 1789 if (wbc->sync_mode == WB_SYNC_ALL && !(current->flags & PF_MEMALLOC)) {
1648 reiserfs_write_lock(inode->i_sb); 1790 reiserfs_write_lock(inode->i_sb);
1649 if (!journal_begin(&th, inode->i_sb, jbegin_count)) { 1791 if (!journal_begin(&th, inode->i_sb, jbegin_count)) {
1650 reiserfs_update_sd(&th, inode); 1792 reiserfs_update_sd(&th, inode);
1651 journal_end_sync(&th, inode->i_sb, jbegin_count); 1793 journal_end_sync(&th);
1652 } 1794 }
1653 reiserfs_write_unlock(inode->i_sb); 1795 reiserfs_write_unlock(inode->i_sb);
1654 } 1796 }
1655 return 0; 1797 return 0;
1656} 1798}
1657 1799
1658/* stat data of new object is inserted already, this inserts the item 1800/*
1659 containing "." and ".." entries */ 1801 * stat data of new object is inserted already, this inserts the item
1802 * containing "." and ".." entries
1803 */
1660static int reiserfs_new_directory(struct reiserfs_transaction_handle *th, 1804static int reiserfs_new_directory(struct reiserfs_transaction_handle *th,
1661 struct inode *inode, 1805 struct inode *inode,
1662 struct item_head *ih, struct treepath *path, 1806 struct item_head *ih, struct treepath *path,
@@ -1674,9 +1818,11 @@ static int reiserfs_new_directory(struct reiserfs_transaction_handle *th,
1674 le32_to_cpu(ih->ih_key.k_objectid), DOT_OFFSET, 1818 le32_to_cpu(ih->ih_key.k_objectid), DOT_OFFSET,
1675 TYPE_DIRENTRY, 3 /*key length */ ); 1819 TYPE_DIRENTRY, 3 /*key length */ );
1676 1820
1677 /* compose item head for new item. Directories consist of items of 1821 /*
1678 old type (ITEM_VERSION_1). Do not set key (second arg is 0), it 1822 * compose item head for new item. Directories consist of items of
1679 is done by reiserfs_new_inode */ 1823 * old type (ITEM_VERSION_1). Do not set key (second arg is 0), it
1824 * is done by reiserfs_new_inode
1825 */
1680 if (old_format_only(sb)) { 1826 if (old_format_only(sb)) {
1681 make_le_item_head(ih, NULL, KEY_FORMAT_3_5, DOT_OFFSET, 1827 make_le_item_head(ih, NULL, KEY_FORMAT_3_5, DOT_OFFSET,
1682 TYPE_DIRENTRY, EMPTY_DIR_SIZE_V1, 2); 1828 TYPE_DIRENTRY, EMPTY_DIR_SIZE_V1, 2);
@@ -1714,9 +1860,12 @@ static int reiserfs_new_directory(struct reiserfs_transaction_handle *th,
1714 return reiserfs_insert_item(th, path, &key, ih, inode, body); 1860 return reiserfs_insert_item(th, path, &key, ih, inode, body);
1715} 1861}
1716 1862
1717/* stat data of object has been inserted, this inserts the item 1863/*
1718 containing the body of symlink */ 1864 * stat data of object has been inserted, this inserts the item
1719static int reiserfs_new_symlink(struct reiserfs_transaction_handle *th, struct inode *inode, /* Inode of symlink */ 1865 * containing the body of symlink
1866 */
1867static int reiserfs_new_symlink(struct reiserfs_transaction_handle *th,
1868 struct inode *inode,
1720 struct item_head *ih, 1869 struct item_head *ih,
1721 struct treepath *path, const char *symname, 1870 struct treepath *path, const char *symname,
1722 int item_len) 1871 int item_len)
@@ -1754,15 +1903,26 @@ static int reiserfs_new_symlink(struct reiserfs_transaction_handle *th, struct i
1754 return reiserfs_insert_item(th, path, &key, ih, inode, symname); 1903 return reiserfs_insert_item(th, path, &key, ih, inode, symname);
1755} 1904}
1756 1905
1757/* inserts the stat data into the tree, and then calls 1906/*
1758 reiserfs_new_directory (to insert ".", ".." item if new object is 1907 * inserts the stat data into the tree, and then calls
1759 directory) or reiserfs_new_symlink (to insert symlink body if new 1908 * reiserfs_new_directory (to insert ".", ".." item if new object is
1760 object is symlink) or nothing (if new object is regular file) 1909 * directory) or reiserfs_new_symlink (to insert symlink body if new
1761 1910 * object is symlink) or nothing (if new object is regular file)
1762 NOTE! uid and gid must already be set in the inode. If we return 1911
1763 non-zero due to an error, we have to drop the quota previously allocated 1912 * NOTE! uid and gid must already be set in the inode. If we return
1764 for the fresh inode. This can only be done outside a transaction, so 1913 * non-zero due to an error, we have to drop the quota previously allocated
1765 if we return non-zero, we also end the transaction. */ 1914 * for the fresh inode. This can only be done outside a transaction, so
1915 * if we return non-zero, we also end the transaction.
1916 *
1917 * @th: active transaction handle
1918 * @dir: parent directory for new inode
1919 * @mode: mode of new inode
1920 * @symname: symlink contents if inode is symlink
1921 * @isize: 0 for regular file, EMPTY_DIR_SIZE for dirs, strlen(symname) for
1922 * symlinks
1923 * @inode: inode to be filled
1924 * @security: optional security context to associate with this inode
1925 */
1766int reiserfs_new_inode(struct reiserfs_transaction_handle *th, 1926int reiserfs_new_inode(struct reiserfs_transaction_handle *th,
1767 struct inode *dir, umode_t mode, const char *symname, 1927 struct inode *dir, umode_t mode, const char *symname,
1768 /* 0 for regular, EMTRY_DIR_SIZE for dirs, 1928 /* 0 for regular, EMTRY_DIR_SIZE for dirs,
@@ -1807,7 +1967,7 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th,
1807 else 1967 else
1808 make_le_item_head(&ih, NULL, KEY_FORMAT_3_6, SD_OFFSET, 1968 make_le_item_head(&ih, NULL, KEY_FORMAT_3_6, SD_OFFSET,
1809 TYPE_STAT_DATA, SD_SIZE, MAX_US_INT); 1969 TYPE_STAT_DATA, SD_SIZE, MAX_US_INT);
1810 memcpy(INODE_PKEY(inode), &(ih.ih_key), KEY_SIZE); 1970 memcpy(INODE_PKEY(inode), &ih.ih_key, KEY_SIZE);
1811 args.dirid = le32_to_cpu(ih.ih_key.k_dir_id); 1971 args.dirid = le32_to_cpu(ih.ih_key.k_dir_id);
1812 1972
1813 depth = reiserfs_write_unlock_nested(inode->i_sb); 1973 depth = reiserfs_write_unlock_nested(inode->i_sb);
@@ -1820,10 +1980,11 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th,
1820 } 1980 }
1821 1981
1822 if (old_format_only(sb)) 1982 if (old_format_only(sb))
1823 /* not a perfect generation count, as object ids can be reused, but 1983 /*
1824 ** this is as good as reiserfs can do right now. 1984 * not a perfect generation count, as object ids can be reused,
1825 ** note that the private part of inode isn't filled in yet, we have 1985 * but this is as good as reiserfs can do right now.
1826 ** to use the directory. 1986 * note that the private part of inode isn't filled in yet,
1987 * we have to use the directory.
1827 */ 1988 */
1828 inode->i_generation = le32_to_cpu(INODE_PKEY(dir)->k_objectid); 1989 inode->i_generation = le32_to_cpu(INODE_PKEY(dir)->k_objectid);
1829 else 1990 else
@@ -1850,7 +2011,7 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th,
1850 REISERFS_I(inode)->i_first_direct_byte = S_ISLNK(mode) ? 1 : 2011 REISERFS_I(inode)->i_first_direct_byte = S_ISLNK(mode) ? 1 :
1851 U32_MAX /*NO_BYTES_IN_DIRECT_ITEM */ ; 2012 U32_MAX /*NO_BYTES_IN_DIRECT_ITEM */ ;
1852 2013
1853 INIT_LIST_HEAD(&(REISERFS_I(inode)->i_prealloc_list)); 2014 INIT_LIST_HEAD(&REISERFS_I(inode)->i_prealloc_list);
1854 REISERFS_I(inode)->i_flags = 0; 2015 REISERFS_I(inode)->i_flags = 0;
1855 REISERFS_I(inode)->i_prealloc_block = 0; 2016 REISERFS_I(inode)->i_prealloc_block = 0;
1856 REISERFS_I(inode)->i_prealloc_count = 0; 2017 REISERFS_I(inode)->i_prealloc_count = 0;
@@ -1878,9 +2039,9 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th,
1878 goto out_bad_inode; 2039 goto out_bad_inode;
1879 } 2040 }
1880 if (old_format_only(sb)) { 2041 if (old_format_only(sb)) {
2042 /* i_uid or i_gid is too big to be stored in stat data v3.5 */
1881 if (i_uid_read(inode) & ~0xffff || i_gid_read(inode) & ~0xffff) { 2043 if (i_uid_read(inode) & ~0xffff || i_gid_read(inode) & ~0xffff) {
1882 pathrelse(&path_to_key); 2044 pathrelse(&path_to_key);
1883 /* i_uid or i_gid is too big to be stored in stat data v3.5 */
1884 err = -EINVAL; 2045 err = -EINVAL;
1885 goto out_bad_inode; 2046 goto out_bad_inode;
1886 } 2047 }
@@ -1888,9 +2049,11 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th,
1888 } else { 2049 } else {
1889 inode2sd(&sd, inode, inode->i_size); 2050 inode2sd(&sd, inode, inode->i_size);
1890 } 2051 }
1891 // store in in-core inode the key of stat data and version all 2052 /*
1892 // object items will have (directory items will have old offset 2053 * store in in-core inode the key of stat data and version all
1893 // format, other new objects will consist of new items) 2054 * object items will have (directory items will have old offset
2055 * format, other new objects will consist of new items)
2056 */
1894 if (old_format_only(sb) || S_ISDIR(mode) || S_ISLNK(mode)) 2057 if (old_format_only(sb) || S_ISDIR(mode) || S_ISLNK(mode))
1895 set_inode_item_key_version(inode, KEY_FORMAT_3_5); 2058 set_inode_item_key_version(inode, KEY_FORMAT_3_5);
1896 else 2059 else
@@ -1934,7 +2097,7 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th,
1934 if (retval) { 2097 if (retval) {
1935 err = retval; 2098 err = retval;
1936 reiserfs_check_path(&path_to_key); 2099 reiserfs_check_path(&path_to_key);
1937 journal_end(th, th->t_super, th->t_blocks_allocated); 2100 journal_end(th);
1938 goto out_inserted_sd; 2101 goto out_inserted_sd;
1939 } 2102 }
1940 2103
@@ -1945,7 +2108,7 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th,
1945 if (retval) { 2108 if (retval) {
1946 err = retval; 2109 err = retval;
1947 reiserfs_check_path(&path_to_key); 2110 reiserfs_check_path(&path_to_key);
1948 journal_end(th, th->t_super, th->t_blocks_allocated); 2111 journal_end(th);
1949 goto out_inserted_sd; 2112 goto out_inserted_sd;
1950 } 2113 }
1951 } else if (inode->i_sb->s_flags & MS_POSIXACL) { 2114 } else if (inode->i_sb->s_flags & MS_POSIXACL) {
@@ -1962,8 +2125,7 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th,
1962 if (retval) { 2125 if (retval) {
1963 err = retval; 2126 err = retval;
1964 reiserfs_check_path(&path_to_key); 2127 reiserfs_check_path(&path_to_key);
1965 retval = journal_end(th, th->t_super, 2128 retval = journal_end(th);
1966 th->t_blocks_allocated);
1967 if (retval) 2129 if (retval)
1968 err = retval; 2130 err = retval;
1969 goto out_inserted_sd; 2131 goto out_inserted_sd;
@@ -1975,11 +2137,7 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th,
1975 2137
1976 return 0; 2138 return 0;
1977 2139
1978/* it looks like you can easily compress these two goto targets into 2140out_bad_inode:
1979 * one. Keeping it like this doesn't actually hurt anything, and they
1980 * are place holders for what the quota code actually needs.
1981 */
1982 out_bad_inode:
1983 /* Invalidate the object, nothing was inserted yet */ 2141 /* Invalidate the object, nothing was inserted yet */
1984 INODE_PKEY(inode)->k_objectid = 0; 2142 INODE_PKEY(inode)->k_objectid = 0;
1985 2143
@@ -1988,16 +2146,19 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th,
1988 dquot_free_inode(inode); 2146 dquot_free_inode(inode);
1989 reiserfs_write_lock_nested(inode->i_sb, depth); 2147 reiserfs_write_lock_nested(inode->i_sb, depth);
1990 2148
1991 out_end_trans: 2149out_end_trans:
1992 journal_end(th, th->t_super, th->t_blocks_allocated); 2150 journal_end(th);
1993 /* Drop can be outside and it needs more credits so it's better to have it outside */ 2151 /*
2152 * Drop can be outside and it needs more credits so it's better
2153 * to have it outside
2154 */
1994 depth = reiserfs_write_unlock_nested(inode->i_sb); 2155 depth = reiserfs_write_unlock_nested(inode->i_sb);
1995 dquot_drop(inode); 2156 dquot_drop(inode);
1996 reiserfs_write_lock_nested(inode->i_sb, depth); 2157 reiserfs_write_lock_nested(inode->i_sb, depth);
1997 inode->i_flags |= S_NOQUOTA; 2158 inode->i_flags |= S_NOQUOTA;
1998 make_bad_inode(inode); 2159 make_bad_inode(inode);
1999 2160
2000 out_inserted_sd: 2161out_inserted_sd:
2001 clear_nlink(inode); 2162 clear_nlink(inode);
2002 th->t_trans_id = 0; /* so the caller can't use this handle later */ 2163 th->t_trans_id = 0; /* so the caller can't use this handle later */
2003 unlock_new_inode(inode); /* OK to do even if we hadn't locked it */ 2164 unlock_new_inode(inode); /* OK to do even if we hadn't locked it */
@@ -2006,25 +2167,26 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th,
2006} 2167}
2007 2168
2008/* 2169/*
2009** finds the tail page in the page cache, 2170 * finds the tail page in the page cache,
2010** reads the last block in. 2171 * reads the last block in.
2011** 2172 *
2012** On success, page_result is set to a locked, pinned page, and bh_result 2173 * On success, page_result is set to a locked, pinned page, and bh_result
2013** is set to an up to date buffer for the last block in the file. returns 0. 2174 * is set to an up to date buffer for the last block in the file. returns 0.
2014** 2175 *
2015** tail conversion is not done, so bh_result might not be valid for writing 2176 * tail conversion is not done, so bh_result might not be valid for writing
2016** check buffer_mapped(bh_result) and bh_result->b_blocknr != 0 before 2177 * check buffer_mapped(bh_result) and bh_result->b_blocknr != 0 before
2017** trying to write the block. 2178 * trying to write the block.
2018** 2179 *
2019** on failure, nonzero is returned, page_result and bh_result are untouched. 2180 * on failure, nonzero is returned, page_result and bh_result are untouched.
2020*/ 2181 */
2021static int grab_tail_page(struct inode *inode, 2182static int grab_tail_page(struct inode *inode,
2022 struct page **page_result, 2183 struct page **page_result,
2023 struct buffer_head **bh_result) 2184 struct buffer_head **bh_result)
2024{ 2185{
2025 2186
2026 /* we want the page with the last byte in the file, 2187 /*
2027 ** not the page that will hold the next byte for appending 2188 * we want the page with the last byte in the file,
2189 * not the page that will hold the next byte for appending
2028 */ 2190 */
2029 unsigned long index = (inode->i_size - 1) >> PAGE_CACHE_SHIFT; 2191 unsigned long index = (inode->i_size - 1) >> PAGE_CACHE_SHIFT;
2030 unsigned long pos = 0; 2192 unsigned long pos = 0;
@@ -2036,10 +2198,11 @@ static int grab_tail_page(struct inode *inode,
2036 struct page *page; 2198 struct page *page;
2037 int error; 2199 int error;
2038 2200
2039 /* we know that we are only called with inode->i_size > 0. 2201 /*
2040 ** we also know that a file tail can never be as big as a block 2202 * we know that we are only called with inode->i_size > 0.
2041 ** If i_size % blocksize == 0, our file is currently block aligned 2203 * we also know that a file tail can never be as big as a block
2042 ** and it won't need converting or zeroing after a truncate. 2204 * If i_size % blocksize == 0, our file is currently block aligned
2205 * and it won't need converting or zeroing after a truncate.
2043 */ 2206 */
2044 if ((offset & (blocksize - 1)) == 0) { 2207 if ((offset & (blocksize - 1)) == 0) {
2045 return -ENOENT; 2208 return -ENOENT;
@@ -2068,10 +2231,11 @@ static int grab_tail_page(struct inode *inode,
2068 } while (bh != head); 2231 } while (bh != head);
2069 2232
2070 if (!buffer_uptodate(bh)) { 2233 if (!buffer_uptodate(bh)) {
2071 /* note, this should never happen, prepare_write should 2234 /*
2072 ** be taking care of this for us. If the buffer isn't up to date, 2235 * note, this should never happen, prepare_write should be
2073 ** I've screwed up the code to find the buffer, or the code to 2236 * taking care of this for us. If the buffer isn't up to
2074 ** call prepare_write 2237 * date, I've screwed up the code to find the buffer, or the
2238 * code to call prepare_write
2075 */ 2239 */
2076 reiserfs_error(inode->i_sb, "clm-6000", 2240 reiserfs_error(inode->i_sb, "clm-6000",
2077 "error reading block %lu", bh->b_blocknr); 2241 "error reading block %lu", bh->b_blocknr);
@@ -2081,21 +2245,21 @@ static int grab_tail_page(struct inode *inode,
2081 *bh_result = bh; 2245 *bh_result = bh;
2082 *page_result = page; 2246 *page_result = page;
2083 2247
2084 out: 2248out:
2085 return error; 2249 return error;
2086 2250
2087 unlock: 2251unlock:
2088 unlock_page(page); 2252 unlock_page(page);
2089 page_cache_release(page); 2253 page_cache_release(page);
2090 return error; 2254 return error;
2091} 2255}
2092 2256
2093/* 2257/*
2094** vfs version of truncate file. Must NOT be called with 2258 * vfs version of truncate file. Must NOT be called with
2095** a transaction already started. 2259 * a transaction already started.
2096** 2260 *
2097** some code taken from block_truncate_page 2261 * some code taken from block_truncate_page
2098*/ 2262 */
2099int reiserfs_truncate_file(struct inode *inode, int update_timestamps) 2263int reiserfs_truncate_file(struct inode *inode, int update_timestamps)
2100{ 2264{
2101 struct reiserfs_transaction_handle th; 2265 struct reiserfs_transaction_handle th;
@@ -2113,9 +2277,11 @@ int reiserfs_truncate_file(struct inode *inode, int update_timestamps)
2113 if (inode->i_size > 0) { 2277 if (inode->i_size > 0) {
2114 error = grab_tail_page(inode, &page, &bh); 2278 error = grab_tail_page(inode, &page, &bh);
2115 if (error) { 2279 if (error) {
2116 // -ENOENT means we truncated past the end of the file, 2280 /*
2117 // and get_block_create_0 could not find a block to read in, 2281 * -ENOENT means we truncated past the end of the
2118 // which is ok. 2282 * file, and get_block_create_0 could not find a
2283 * block to read in, which is ok.
2284 */
2119 if (error != -ENOENT) 2285 if (error != -ENOENT)
2120 reiserfs_error(inode->i_sb, "clm-6001", 2286 reiserfs_error(inode->i_sb, "clm-6001",
2121 "grab_tail_page failed %d", 2287 "grab_tail_page failed %d",
@@ -2125,29 +2291,33 @@ int reiserfs_truncate_file(struct inode *inode, int update_timestamps)
2125 } 2291 }
2126 } 2292 }
2127 2293
2128 /* so, if page != NULL, we have a buffer head for the offset at 2294 /*
2129 ** the end of the file. if the bh is mapped, and bh->b_blocknr != 0, 2295 * so, if page != NULL, we have a buffer head for the offset at
2130 ** then we have an unformatted node. Otherwise, we have a direct item, 2296 * the end of the file. if the bh is mapped, and bh->b_blocknr != 0,
2131 ** and no zeroing is required on disk. We zero after the truncate, 2297 * then we have an unformatted node. Otherwise, we have a direct item,
2132 ** because the truncate might pack the item anyway 2298 * and no zeroing is required on disk. We zero after the truncate,
2133 ** (it will unmap bh if it packs). 2299 * because the truncate might pack the item anyway
2300 * (it will unmap bh if it packs).
2301 *
2302 * it is enough to reserve space in transaction for 2 balancings:
2303 * one for "save" link adding and another for the first
2304 * cut_from_item. 1 is for update_sd
2134 */ 2305 */
2135 /* it is enough to reserve space in transaction for 2 balancings:
2136 one for "save" link adding and another for the first
2137 cut_from_item. 1 is for update_sd */
2138 error = journal_begin(&th, inode->i_sb, 2306 error = journal_begin(&th, inode->i_sb,
2139 JOURNAL_PER_BALANCE_CNT * 2 + 1); 2307 JOURNAL_PER_BALANCE_CNT * 2 + 1);
2140 if (error) 2308 if (error)
2141 goto out; 2309 goto out;
2142 reiserfs_update_inode_transaction(inode); 2310 reiserfs_update_inode_transaction(inode);
2143 if (update_timestamps) 2311 if (update_timestamps)
2144 /* we are doing real truncate: if the system crashes before the last 2312 /*
2145 transaction of truncating gets committed - on reboot the file 2313 * we are doing real truncate: if the system crashes
2146 either appears truncated properly or not truncated at all */ 2314 * before the last transaction of truncating gets committed
2315 * - on reboot the file either appears truncated properly
2316 * or not truncated at all
2317 */
2147 add_save_link(&th, inode, 1); 2318 add_save_link(&th, inode, 1);
2148 err2 = reiserfs_do_truncate(&th, inode, page, update_timestamps); 2319 err2 = reiserfs_do_truncate(&th, inode, page, update_timestamps);
2149 error = 2320 error = journal_end(&th);
2150 journal_end(&th, inode->i_sb, JOURNAL_PER_BALANCE_CNT * 2 + 1);
2151 if (error) 2321 if (error)
2152 goto out; 2322 goto out;
2153 2323
@@ -2180,7 +2350,7 @@ int reiserfs_truncate_file(struct inode *inode, int update_timestamps)
2180 reiserfs_write_unlock(inode->i_sb); 2350 reiserfs_write_unlock(inode->i_sb);
2181 2351
2182 return 0; 2352 return 0;
2183 out: 2353out:
2184 if (page) { 2354 if (page) {
2185 unlock_page(page); 2355 unlock_page(page);
2186 page_cache_release(page); 2356 page_cache_release(page);
@@ -2212,7 +2382,10 @@ static int map_block_for_writepage(struct inode *inode,
2212 int copy_size; 2382 int copy_size;
2213 int trans_running = 0; 2383 int trans_running = 0;
2214 2384
2215 /* catch places below that try to log something without starting a trans */ 2385 /*
2386 * catch places below that try to log something without
2387 * starting a trans
2388 */
2216 th.t_trans_id = 0; 2389 th.t_trans_id = 0;
2217 2390
2218 if (!buffer_uptodate(bh_result)) { 2391 if (!buffer_uptodate(bh_result)) {
@@ -2220,11 +2393,11 @@ static int map_block_for_writepage(struct inode *inode,
2220 } 2393 }
2221 2394
2222 kmap(bh_result->b_page); 2395 kmap(bh_result->b_page);
2223 start_over: 2396start_over:
2224 reiserfs_write_lock(inode->i_sb); 2397 reiserfs_write_lock(inode->i_sb);
2225 make_cpu_key(&key, inode, byte_offset, TYPE_ANY, 3); 2398 make_cpu_key(&key, inode, byte_offset, TYPE_ANY, 3);
2226 2399
2227 research: 2400research:
2228 retval = search_for_position_by_key(inode->i_sb, &key, &path); 2401 retval = search_for_position_by_key(inode->i_sb, &key, &path);
2229 if (retval != POSITION_FOUND) { 2402 if (retval != POSITION_FOUND) {
2230 use_get_block = 1; 2403 use_get_block = 1;
@@ -2232,8 +2405,8 @@ static int map_block_for_writepage(struct inode *inode,
2232 } 2405 }
2233 2406
2234 bh = get_last_bh(&path); 2407 bh = get_last_bh(&path);
2235 ih = get_ih(&path); 2408 ih = tp_item_head(&path);
2236 item = get_item(&path); 2409 item = tp_item_body(&path);
2237 pos_in_item = path.pos_in_item; 2410 pos_in_item = path.pos_in_item;
2238 2411
2239 /* we've found an unformatted node */ 2412 /* we've found an unformatted node */
@@ -2281,10 +2454,10 @@ static int map_block_for_writepage(struct inode *inode,
2281 goto research; 2454 goto research;
2282 } 2455 }
2283 2456
2284 memcpy(B_I_PITEM(bh, ih) + pos_in_item, p + bytes_copied, 2457 memcpy(ih_item_body(bh, ih) + pos_in_item, p + bytes_copied,
2285 copy_size); 2458 copy_size);
2286 2459
2287 journal_mark_dirty(&th, inode->i_sb, bh); 2460 journal_mark_dirty(&th, bh);
2288 bytes_copied += copy_size; 2461 bytes_copied += copy_size;
2289 set_block_dev_mapped(bh_result, 0, inode); 2462 set_block_dev_mapped(bh_result, 0, inode);
2290 2463
@@ -2304,10 +2477,10 @@ static int map_block_for_writepage(struct inode *inode,
2304 } 2477 }
2305 retval = 0; 2478 retval = 0;
2306 2479
2307 out: 2480out:
2308 pathrelse(&path); 2481 pathrelse(&path);
2309 if (trans_running) { 2482 if (trans_running) {
2310 int err = journal_end(&th, inode->i_sb, jbegin_count); 2483 int err = journal_end(&th);
2311 if (err) 2484 if (err)
2312 retval = err; 2485 retval = err;
2313 trans_running = 0; 2486 trans_running = 0;
@@ -2331,7 +2504,8 @@ static int map_block_for_writepage(struct inode *inode,
2331 kunmap(bh_result->b_page); 2504 kunmap(bh_result->b_page);
2332 2505
2333 if (!retval && buffer_mapped(bh_result) && bh_result->b_blocknr == 0) { 2506 if (!retval && buffer_mapped(bh_result) && bh_result->b_blocknr == 0) {
2334 /* we've copied data from the page into the direct item, so the 2507 /*
2508 * we've copied data from the page into the direct item, so the
2335 * buffer in the page is now clean, mark it to reflect that. 2509 * buffer in the page is now clean, mark it to reflect that.
2336 */ 2510 */
2337 lock_buffer(bh_result); 2511 lock_buffer(bh_result);
@@ -2370,7 +2544,8 @@ static int reiserfs_write_full_page(struct page *page,
2370 return 0; 2544 return 0;
2371 } 2545 }
2372 2546
2373 /* The page dirty bit is cleared before writepage is called, which 2547 /*
2548 * The page dirty bit is cleared before writepage is called, which
2374 * means we have to tell create_empty_buffers to make dirty buffers 2549 * means we have to tell create_empty_buffers to make dirty buffers
2375 * The page really should be up to date at this point, so tossing 2550 * The page really should be up to date at this point, so tossing
2376 * in the BH_Uptodate is just a sanity check. 2551 * in the BH_Uptodate is just a sanity check.
@@ -2381,8 +2556,9 @@ static int reiserfs_write_full_page(struct page *page,
2381 } 2556 }
2382 head = page_buffers(page); 2557 head = page_buffers(page);
2383 2558
2384 /* last page in the file, zero out any contents past the 2559 /*
2385 ** last byte in the file 2560 * last page in the file, zero out any contents past the
2561 * last byte in the file
2386 */ 2562 */
2387 if (page->index >= end_index) { 2563 if (page->index >= end_index) {
2388 unsigned last_offset; 2564 unsigned last_offset;
@@ -2412,7 +2588,8 @@ static int reiserfs_write_full_page(struct page *page,
2412 (!buffer_mapped(bh) || (buffer_mapped(bh) 2588 (!buffer_mapped(bh) || (buffer_mapped(bh)
2413 && bh->b_blocknr == 2589 && bh->b_blocknr ==
2414 0))) { 2590 0))) {
2415 /* not mapped yet, or it points to a direct item, search 2591 /*
2592 * not mapped yet, or it points to a direct item, search
2416 * the btree for the mapping info, and log any direct 2593 * the btree for the mapping info, and log any direct
2417 * items found 2594 * items found
2418 */ 2595 */
@@ -2450,10 +2627,11 @@ static int reiserfs_write_full_page(struct page *page,
2450 2627
2451 if (checked) { 2628 if (checked) {
2452 reiserfs_prepare_for_journal(s, bh, 1); 2629 reiserfs_prepare_for_journal(s, bh, 1);
2453 journal_mark_dirty(&th, s, bh); 2630 journal_mark_dirty(&th, bh);
2454 continue; 2631 continue;
2455 } 2632 }
2456 /* from this point on, we know the buffer is mapped to a 2633 /*
2634 * from this point on, we know the buffer is mapped to a
2457 * real block and not a direct item 2635 * real block and not a direct item
2458 */ 2636 */
2459 if (wbc->sync_mode != WB_SYNC_NONE) { 2637 if (wbc->sync_mode != WB_SYNC_NONE) {
@@ -2472,7 +2650,7 @@ static int reiserfs_write_full_page(struct page *page,
2472 } while ((bh = bh->b_this_page) != head); 2650 } while ((bh = bh->b_this_page) != head);
2473 2651
2474 if (checked) { 2652 if (checked) {
2475 error = journal_end(&th, s, bh_per_page + 1); 2653 error = journal_end(&th);
2476 reiserfs_write_unlock(s); 2654 reiserfs_write_unlock(s);
2477 if (error) 2655 if (error)
2478 goto fail; 2656 goto fail;
@@ -2497,7 +2675,7 @@ static int reiserfs_write_full_page(struct page *page,
2497 } while (bh != head); 2675 } while (bh != head);
2498 2676
2499 error = 0; 2677 error = 0;
2500 done: 2678done:
2501 if (nr == 0) { 2679 if (nr == 0) {
2502 /* 2680 /*
2503 * if this page only had a direct item, it is very possible for 2681 * if this page only had a direct item, it is very possible for
@@ -2519,8 +2697,9 @@ static int reiserfs_write_full_page(struct page *page,
2519 } 2697 }
2520 return error; 2698 return error;
2521 2699
2522 fail: 2700fail:
2523 /* catches various errors, we need to make sure any valid dirty blocks 2701 /*
2702 * catches various errors, we need to make sure any valid dirty blocks
2524 * get to the media. The page is currently locked and not marked for 2703 * get to the media. The page is currently locked and not marked for
2525 * writeback 2704 * writeback
2526 */ 2705 */
@@ -2533,8 +2712,8 @@ static int reiserfs_write_full_page(struct page *page,
2533 mark_buffer_async_write(bh); 2712 mark_buffer_async_write(bh);
2534 } else { 2713 } else {
2535 /* 2714 /*
2536 * clear any dirty bits that might have come from getting 2715 * clear any dirty bits that might have come from
2537 * attached to a dirty page 2716 * getting attached to a dirty page
2538 */ 2717 */
2539 clear_buffer_dirty(bh); 2718 clear_buffer_dirty(bh);
2540 } 2719 }
@@ -2614,15 +2793,18 @@ static int reiserfs_write_begin(struct file *file,
2614 ret = __block_write_begin(page, pos, len, reiserfs_get_block); 2793 ret = __block_write_begin(page, pos, len, reiserfs_get_block);
2615 if (ret && reiserfs_transaction_running(inode->i_sb)) { 2794 if (ret && reiserfs_transaction_running(inode->i_sb)) {
2616 struct reiserfs_transaction_handle *th = current->journal_info; 2795 struct reiserfs_transaction_handle *th = current->journal_info;
2617 /* this gets a little ugly. If reiserfs_get_block returned an 2796 /*
2618 * error and left a transacstion running, we've got to close it, 2797 * this gets a little ugly. If reiserfs_get_block returned an
2619 * and we've got to free handle if it was a persistent transaction. 2798 * error and left a transacstion running, we've got to close
2799 * it, and we've got to free handle if it was a persistent
2800 * transaction.
2620 * 2801 *
2621 * But, if we had nested into an existing transaction, we need 2802 * But, if we had nested into an existing transaction, we need
2622 * to just drop the ref count on the handle. 2803 * to just drop the ref count on the handle.
2623 * 2804 *
2624 * If old_ref == 0, the transaction is from reiserfs_get_block, 2805 * If old_ref == 0, the transaction is from reiserfs_get_block,
2625 * and it was a persistent trans. Otherwise, it was nested above. 2806 * and it was a persistent trans. Otherwise, it was nested
2807 * above.
2626 */ 2808 */
2627 if (th->t_refcount > old_ref) { 2809 if (th->t_refcount > old_ref) {
2628 if (old_ref) 2810 if (old_ref)
@@ -2671,15 +2853,18 @@ int __reiserfs_write_begin(struct page *page, unsigned from, unsigned len)
2671 ret = __block_write_begin(page, from, len, reiserfs_get_block); 2853 ret = __block_write_begin(page, from, len, reiserfs_get_block);
2672 if (ret && reiserfs_transaction_running(inode->i_sb)) { 2854 if (ret && reiserfs_transaction_running(inode->i_sb)) {
2673 struct reiserfs_transaction_handle *th = current->journal_info; 2855 struct reiserfs_transaction_handle *th = current->journal_info;
2674 /* this gets a little ugly. If reiserfs_get_block returned an 2856 /*
2675 * error and left a transacstion running, we've got to close it, 2857 * this gets a little ugly. If reiserfs_get_block returned an
2676 * and we've got to free handle if it was a persistent transaction. 2858 * error and left a transacstion running, we've got to close
2859 * it, and we've got to free handle if it was a persistent
2860 * transaction.
2677 * 2861 *
2678 * But, if we had nested into an existing transaction, we need 2862 * But, if we had nested into an existing transaction, we need
2679 * to just drop the ref count on the handle. 2863 * to just drop the ref count on the handle.
2680 * 2864 *
2681 * If old_ref == 0, the transaction is from reiserfs_get_block, 2865 * If old_ref == 0, the transaction is from reiserfs_get_block,
2682 * and it was a persistent trans. Otherwise, it was nested above. 2866 * and it was a persistent trans. Otherwise, it was nested
2867 * above.
2683 */ 2868 */
2684 if (th->t_refcount > old_ref) { 2869 if (th->t_refcount > old_ref) {
2685 if (old_ref) 2870 if (old_ref)
@@ -2734,17 +2919,20 @@ static int reiserfs_write_end(struct file *file, struct address_space *mapping,
2734 2919
2735 reiserfs_commit_page(inode, page, start, start + copied); 2920 reiserfs_commit_page(inode, page, start, start + copied);
2736 2921
2737 /* generic_commit_write does this for us, but does not update the 2922 /*
2738 ** transaction tracking stuff when the size changes. So, we have 2923 * generic_commit_write does this for us, but does not update the
2739 ** to do the i_size updates here. 2924 * transaction tracking stuff when the size changes. So, we have
2925 * to do the i_size updates here.
2740 */ 2926 */
2741 if (pos + copied > inode->i_size) { 2927 if (pos + copied > inode->i_size) {
2742 struct reiserfs_transaction_handle myth; 2928 struct reiserfs_transaction_handle myth;
2743 reiserfs_write_lock(inode->i_sb); 2929 reiserfs_write_lock(inode->i_sb);
2744 locked = true; 2930 locked = true;
2745 /* If the file have grown beyond the border where it 2931 /*
2746 can have a tail, unmark it as needing a tail 2932 * If the file have grown beyond the border where it
2747 packing */ 2933 * can have a tail, unmark it as needing a tail
2934 * packing
2935 */
2748 if ((have_large_tails(inode->i_sb) 2936 if ((have_large_tails(inode->i_sb)
2749 && inode->i_size > i_block_size(inode) * 4) 2937 && inode->i_size > i_block_size(inode) * 4)
2750 || (have_small_tails(inode->i_sb) 2938 || (have_small_tails(inode->i_sb)
@@ -2759,13 +2947,13 @@ static int reiserfs_write_end(struct file *file, struct address_space *mapping,
2759 inode->i_size = pos + copied; 2947 inode->i_size = pos + copied;
2760 /* 2948 /*
2761 * this will just nest into our transaction. It's important 2949 * this will just nest into our transaction. It's important
2762 * to use mark_inode_dirty so the inode gets pushed around on the 2950 * to use mark_inode_dirty so the inode gets pushed around on
2763 * dirty lists, and so that O_SYNC works as expected 2951 * the dirty lists, and so that O_SYNC works as expected
2764 */ 2952 */
2765 mark_inode_dirty(inode); 2953 mark_inode_dirty(inode);
2766 reiserfs_update_sd(&myth, inode); 2954 reiserfs_update_sd(&myth, inode);
2767 update_sd = 1; 2955 update_sd = 1;
2768 ret = journal_end(&myth, inode->i_sb, 1); 2956 ret = journal_end(&myth);
2769 if (ret) 2957 if (ret)
2770 goto journal_error; 2958 goto journal_error;
2771 } 2959 }
@@ -2781,7 +2969,7 @@ static int reiserfs_write_end(struct file *file, struct address_space *mapping,
2781 goto out; 2969 goto out;
2782 } 2970 }
2783 2971
2784 out: 2972out:
2785 if (locked) 2973 if (locked)
2786 reiserfs_write_unlock(inode->i_sb); 2974 reiserfs_write_unlock(inode->i_sb);
2787 unlock_page(page); 2975 unlock_page(page);
@@ -2792,7 +2980,7 @@ static int reiserfs_write_end(struct file *file, struct address_space *mapping,
2792 2980
2793 return ret == 0 ? copied : ret; 2981 return ret == 0 ? copied : ret;
2794 2982
2795 journal_error: 2983journal_error:
2796 reiserfs_write_unlock(inode->i_sb); 2984 reiserfs_write_unlock(inode->i_sb);
2797 locked = false; 2985 locked = false;
2798 if (th) { 2986 if (th) {
@@ -2822,15 +3010,18 @@ int reiserfs_commit_write(struct file *f, struct page *page,
2822 } 3010 }
2823 reiserfs_commit_page(inode, page, from, to); 3011 reiserfs_commit_page(inode, page, from, to);
2824 3012
2825 /* generic_commit_write does this for us, but does not update the 3013 /*
2826 ** transaction tracking stuff when the size changes. So, we have 3014 * generic_commit_write does this for us, but does not update the
2827 ** to do the i_size updates here. 3015 * transaction tracking stuff when the size changes. So, we have
3016 * to do the i_size updates here.
2828 */ 3017 */
2829 if (pos > inode->i_size) { 3018 if (pos > inode->i_size) {
2830 struct reiserfs_transaction_handle myth; 3019 struct reiserfs_transaction_handle myth;
2831 /* If the file have grown beyond the border where it 3020 /*
2832 can have a tail, unmark it as needing a tail 3021 * If the file have grown beyond the border where it
2833 packing */ 3022 * can have a tail, unmark it as needing a tail
3023 * packing
3024 */
2834 if ((have_large_tails(inode->i_sb) 3025 if ((have_large_tails(inode->i_sb)
2835 && inode->i_size > i_block_size(inode) * 4) 3026 && inode->i_size > i_block_size(inode) * 4)
2836 || (have_small_tails(inode->i_sb) 3027 || (have_small_tails(inode->i_sb)
@@ -2845,13 +3036,13 @@ int reiserfs_commit_write(struct file *f, struct page *page,
2845 inode->i_size = pos; 3036 inode->i_size = pos;
2846 /* 3037 /*
2847 * this will just nest into our transaction. It's important 3038 * this will just nest into our transaction. It's important
2848 * to use mark_inode_dirty so the inode gets pushed around on the 3039 * to use mark_inode_dirty so the inode gets pushed around
2849 * dirty lists, and so that O_SYNC works as expected 3040 * on the dirty lists, and so that O_SYNC works as expected
2850 */ 3041 */
2851 mark_inode_dirty(inode); 3042 mark_inode_dirty(inode);
2852 reiserfs_update_sd(&myth, inode); 3043 reiserfs_update_sd(&myth, inode);
2853 update_sd = 1; 3044 update_sd = 1;
2854 ret = journal_end(&myth, inode->i_sb, 1); 3045 ret = journal_end(&myth);
2855 if (ret) 3046 if (ret)
2856 goto journal_error; 3047 goto journal_error;
2857 } 3048 }
@@ -2863,10 +3054,10 @@ int reiserfs_commit_write(struct file *f, struct page *page,
2863 goto out; 3054 goto out;
2864 } 3055 }
2865 3056
2866 out: 3057out:
2867 return ret; 3058 return ret;
2868 3059
2869 journal_error: 3060journal_error:
2870 if (th) { 3061 if (th) {
2871 if (!update_sd) 3062 if (!update_sd)
2872 reiserfs_update_sd(th, inode); 3063 reiserfs_update_sd(th, inode);
@@ -2924,9 +3115,10 @@ void i_attrs_to_sd_attrs(struct inode *inode, __u16 * sd_attrs)
2924 } 3115 }
2925} 3116}
2926 3117
2927/* decide if this buffer needs to stay around for data logging or ordered 3118/*
2928** write purposes 3119 * decide if this buffer needs to stay around for data logging or ordered
2929*/ 3120 * write purposes
3121 */
2930static int invalidatepage_can_drop(struct inode *inode, struct buffer_head *bh) 3122static int invalidatepage_can_drop(struct inode *inode, struct buffer_head *bh)
2931{ 3123{
2932 int ret = 1; 3124 int ret = 1;
@@ -2937,7 +3129,8 @@ static int invalidatepage_can_drop(struct inode *inode, struct buffer_head *bh)
2937 if (!buffer_mapped(bh)) { 3129 if (!buffer_mapped(bh)) {
2938 goto free_jh; 3130 goto free_jh;
2939 } 3131 }
2940 /* the page is locked, and the only places that log a data buffer 3132 /*
3133 * the page is locked, and the only places that log a data buffer
2941 * also lock the page. 3134 * also lock the page.
2942 */ 3135 */
2943 if (reiserfs_file_data_log(inode)) { 3136 if (reiserfs_file_data_log(inode)) {
@@ -2952,7 +3145,8 @@ static int invalidatepage_can_drop(struct inode *inode, struct buffer_head *bh)
2952 struct reiserfs_journal_list *jl; 3145 struct reiserfs_journal_list *jl;
2953 struct reiserfs_jh *jh = bh->b_private; 3146 struct reiserfs_jh *jh = bh->b_private;
2954 3147
2955 /* why is this safe? 3148 /*
3149 * why is this safe?
2956 * reiserfs_setattr updates i_size in the on disk 3150 * reiserfs_setattr updates i_size in the on disk
2957 * stat data before allowing vmtruncate to be called. 3151 * stat data before allowing vmtruncate to be called.
2958 * 3152 *
@@ -2969,7 +3163,7 @@ static int invalidatepage_can_drop(struct inode *inode, struct buffer_head *bh)
2969 && jl != SB_JOURNAL(inode->i_sb)->j_current_jl) 3163 && jl != SB_JOURNAL(inode->i_sb)->j_current_jl)
2970 ret = 0; 3164 ret = 0;
2971 } 3165 }
2972 free_jh: 3166free_jh:
2973 if (ret && bh->b_private) { 3167 if (ret && bh->b_private) {
2974 reiserfs_free_jh(bh); 3168 reiserfs_free_jh(bh);
2975 } 3169 }
@@ -3028,7 +3222,7 @@ static void reiserfs_invalidatepage(struct page *page, unsigned int offset,
3028 ret = try_to_release_page(page, 0); 3222 ret = try_to_release_page(page, 0);
3029 /* maybe should BUG_ON(!ret); - neilb */ 3223 /* maybe should BUG_ON(!ret); - neilb */
3030 } 3224 }
3031 out: 3225out:
3032 return; 3226 return;
3033} 3227}
3034 3228
@@ -3080,8 +3274,10 @@ static int reiserfs_releasepage(struct page *page, gfp_t unused_gfp_flags)
3080 return ret; 3274 return ret;
3081} 3275}
3082 3276
3083/* We thank Mingming Cao for helping us understand in great detail what 3277/*
3084 to do in this section of the code. */ 3278 * We thank Mingming Cao for helping us understand in great detail what
3279 * to do in this section of the code.
3280 */
3085static ssize_t reiserfs_direct_IO(int rw, struct kiocb *iocb, 3281static ssize_t reiserfs_direct_IO(int rw, struct kiocb *iocb,
3086 const struct iovec *iov, loff_t offset, 3282 const struct iovec *iov, loff_t offset,
3087 unsigned long nr_segs) 3283 unsigned long nr_segs)
@@ -3127,8 +3323,9 @@ int reiserfs_setattr(struct dentry *dentry, struct iattr *attr)
3127 dquot_initialize(inode); 3323 dquot_initialize(inode);
3128 reiserfs_write_lock(inode->i_sb); 3324 reiserfs_write_lock(inode->i_sb);
3129 if (attr->ia_valid & ATTR_SIZE) { 3325 if (attr->ia_valid & ATTR_SIZE) {
3130 /* version 2 items will be caught by the s_maxbytes check 3326 /*
3131 ** done for us in vmtruncate 3327 * version 2 items will be caught by the s_maxbytes check
3328 * done for us in vmtruncate
3132 */ 3329 */
3133 if (get_inode_item_key_version(inode) == KEY_FORMAT_3_5 && 3330 if (get_inode_item_key_version(inode) == KEY_FORMAT_3_5 &&
3134 attr->ia_size > MAX_NON_LFS) { 3331 attr->ia_size > MAX_NON_LFS) {
@@ -3149,7 +3346,7 @@ int reiserfs_setattr(struct dentry *dentry, struct iattr *attr)
3149 err = journal_begin(&th, inode->i_sb, 4); 3346 err = journal_begin(&th, inode->i_sb, 4);
3150 if (!err) { 3347 if (!err) {
3151 reiserfs_discard_prealloc(&th, inode); 3348 reiserfs_discard_prealloc(&th, inode);
3152 err = journal_end(&th, inode->i_sb, 4); 3349 err = journal_end(&th);
3153 } 3350 }
3154 if (err) 3351 if (err)
3155 error = err; 3352 error = err;
@@ -3189,7 +3386,10 @@ int reiserfs_setattr(struct dentry *dentry, struct iattr *attr)
3189 if (error) 3386 if (error)
3190 return error; 3387 return error;
3191 3388
3192 /* (user+group)*(old+new) structure - we count quota info and , inode write (sb, inode) */ 3389 /*
3390 * (user+group)*(old+new) structure - we count quota
3391 * info and , inode write (sb, inode)
3392 */
3193 reiserfs_write_lock(inode->i_sb); 3393 reiserfs_write_lock(inode->i_sb);
3194 error = journal_begin(&th, inode->i_sb, jbegin_count); 3394 error = journal_begin(&th, inode->i_sb, jbegin_count);
3195 reiserfs_write_unlock(inode->i_sb); 3395 reiserfs_write_unlock(inode->i_sb);
@@ -3198,19 +3398,21 @@ int reiserfs_setattr(struct dentry *dentry, struct iattr *attr)
3198 error = dquot_transfer(inode, attr); 3398 error = dquot_transfer(inode, attr);
3199 reiserfs_write_lock(inode->i_sb); 3399 reiserfs_write_lock(inode->i_sb);
3200 if (error) { 3400 if (error) {
3201 journal_end(&th, inode->i_sb, jbegin_count); 3401 journal_end(&th);
3202 reiserfs_write_unlock(inode->i_sb); 3402 reiserfs_write_unlock(inode->i_sb);
3203 goto out; 3403 goto out;
3204 } 3404 }
3205 3405
3206 /* Update corresponding info in inode so that everything is in 3406 /*
3207 * one transaction */ 3407 * Update corresponding info in inode so that everything
3408 * is in one transaction
3409 */
3208 if (attr->ia_valid & ATTR_UID) 3410 if (attr->ia_valid & ATTR_UID)
3209 inode->i_uid = attr->ia_uid; 3411 inode->i_uid = attr->ia_uid;
3210 if (attr->ia_valid & ATTR_GID) 3412 if (attr->ia_valid & ATTR_GID)
3211 inode->i_gid = attr->ia_gid; 3413 inode->i_gid = attr->ia_gid;
3212 mark_inode_dirty(inode); 3414 mark_inode_dirty(inode);
3213 error = journal_end(&th, inode->i_sb, jbegin_count); 3415 error = journal_end(&th);
3214 reiserfs_write_unlock(inode->i_sb); 3416 reiserfs_write_unlock(inode->i_sb);
3215 if (error) 3417 if (error)
3216 goto out; 3418 goto out;
@@ -3220,8 +3422,14 @@ int reiserfs_setattr(struct dentry *dentry, struct iattr *attr)
3220 attr->ia_size != i_size_read(inode)) { 3422 attr->ia_size != i_size_read(inode)) {
3221 error = inode_newsize_ok(inode, attr->ia_size); 3423 error = inode_newsize_ok(inode, attr->ia_size);
3222 if (!error) { 3424 if (!error) {
3425 /*
3426 * Could race against reiserfs_file_release
3427 * if called from NFS, so take tailpack mutex.
3428 */
3429 mutex_lock(&REISERFS_I(inode)->tailpack);
3223 truncate_setsize(inode, attr->ia_size); 3430 truncate_setsize(inode, attr->ia_size);
3224 reiserfs_vfs_truncate_file(inode); 3431 reiserfs_truncate_file(inode, 1);
3432 mutex_unlock(&REISERFS_I(inode)->tailpack);
3225 } 3433 }
3226 } 3434 }
3227 3435
diff --git a/fs/reiserfs/ioctl.c b/fs/reiserfs/ioctl.c
index 946ccbf5b5a1..501ed6811a2b 100644
--- a/fs/reiserfs/ioctl.c
+++ b/fs/reiserfs/ioctl.c
@@ -15,7 +15,8 @@
15 * reiserfs_ioctl - handler for ioctl for inode 15 * reiserfs_ioctl - handler for ioctl for inode
16 * supported commands: 16 * supported commands:
17 * 1) REISERFS_IOC_UNPACK - try to unpack tail from direct item into indirect 17 * 1) REISERFS_IOC_UNPACK - try to unpack tail from direct item into indirect
18 * and prevent packing file (argument arg has to be non-zero) 18 * and prevent packing file (argument arg has t
19 * be non-zero)
19 * 2) REISERFS_IOC_[GS]ETFLAGS, REISERFS_IOC_[GS]ETVERSION 20 * 2) REISERFS_IOC_[GS]ETFLAGS, REISERFS_IOC_[GS]ETVERSION
20 * 3) That's all for a while ... 21 * 3) That's all for a while ...
21 */ 22 */
@@ -132,7 +133,10 @@ setversion_out:
132long reiserfs_compat_ioctl(struct file *file, unsigned int cmd, 133long reiserfs_compat_ioctl(struct file *file, unsigned int cmd,
133 unsigned long arg) 134 unsigned long arg)
134{ 135{
135 /* These are just misnamed, they actually get/put from/to user an int */ 136 /*
137 * These are just misnamed, they actually
138 * get/put from/to user an int
139 */
136 switch (cmd) { 140 switch (cmd) {
137 case REISERFS_IOC32_UNPACK: 141 case REISERFS_IOC32_UNPACK:
138 cmd = REISERFS_IOC_UNPACK; 142 cmd = REISERFS_IOC_UNPACK;
@@ -160,10 +164,10 @@ long reiserfs_compat_ioctl(struct file *file, unsigned int cmd,
160int reiserfs_commit_write(struct file *f, struct page *page, 164int reiserfs_commit_write(struct file *f, struct page *page,
161 unsigned from, unsigned to); 165 unsigned from, unsigned to);
162/* 166/*
163** reiserfs_unpack 167 * reiserfs_unpack
164** Function try to convert tail from direct item into indirect. 168 * Function try to convert tail from direct item into indirect.
165** It set up nopack attribute in the REISERFS_I(inode)->nopack 169 * It set up nopack attribute in the REISERFS_I(inode)->nopack
166*/ 170 */
167int reiserfs_unpack(struct inode *inode, struct file *filp) 171int reiserfs_unpack(struct inode *inode, struct file *filp)
168{ 172{
169 int retval = 0; 173 int retval = 0;
@@ -194,9 +198,10 @@ int reiserfs_unpack(struct inode *inode, struct file *filp)
194 goto out; 198 goto out;
195 } 199 }
196 200
197 /* we unpack by finding the page with the tail, and calling 201 /*
198 ** __reiserfs_write_begin on that page. This will force a 202 * we unpack by finding the page with the tail, and calling
199 ** reiserfs_get_block to unpack the tail for us. 203 * __reiserfs_write_begin on that page. This will force a
204 * reiserfs_get_block to unpack the tail for us.
200 */ 205 */
201 index = inode->i_size >> PAGE_CACHE_SHIFT; 206 index = inode->i_size >> PAGE_CACHE_SHIFT;
202 mapping = inode->i_mapping; 207 mapping = inode->i_mapping;
@@ -214,11 +219,11 @@ int reiserfs_unpack(struct inode *inode, struct file *filp)
214 retval = reiserfs_commit_write(NULL, page, write_from, write_from); 219 retval = reiserfs_commit_write(NULL, page, write_from, write_from);
215 REISERFS_I(inode)->i_flags |= i_nopack_mask; 220 REISERFS_I(inode)->i_flags |= i_nopack_mask;
216 221
217 out_unlock: 222out_unlock:
218 unlock_page(page); 223 unlock_page(page);
219 page_cache_release(page); 224 page_cache_release(page);
220 225
221 out: 226out:
222 mutex_unlock(&inode->i_mutex); 227 mutex_unlock(&inode->i_mutex);
223 reiserfs_write_unlock(inode->i_sb); 228 reiserfs_write_unlock(inode->i_sb);
224 return retval; 229 return retval;
diff --git a/fs/reiserfs/item_ops.c b/fs/reiserfs/item_ops.c
index ee382ef3d300..cfaee912ee09 100644
--- a/fs/reiserfs/item_ops.c
+++ b/fs/reiserfs/item_ops.c
@@ -5,15 +5,17 @@
5#include <linux/time.h> 5#include <linux/time.h>
6#include "reiserfs.h" 6#include "reiserfs.h"
7 7
8// this contains item handlers for old item types: sd, direct, 8/*
9// indirect, directory 9 * this contains item handlers for old item types: sd, direct,
10 * indirect, directory
11 */
10 12
11/* and where are the comments? how about saying where we can find an 13/*
12 explanation of each item handler method? -Hans */ 14 * and where are the comments? how about saying where we can find an
15 * explanation of each item handler method? -Hans
16 */
13 17
14////////////////////////////////////////////////////////////////////////////// 18/* stat data functions */
15// stat data functions
16//
17static int sd_bytes_number(struct item_head *ih, int block_size) 19static int sd_bytes_number(struct item_head *ih, int block_size)
18{ 20{
19 return 0; 21 return 0;
@@ -60,7 +62,7 @@ static void sd_print_item(struct item_head *ih, char *item)
60 62
61static void sd_check_item(struct item_head *ih, char *item) 63static void sd_check_item(struct item_head *ih, char *item)
62{ 64{
63 // FIXME: type something here! 65 /* unused */
64} 66}
65 67
66static int sd_create_vi(struct virtual_node *vn, 68static int sd_create_vi(struct virtual_node *vn,
@@ -68,7 +70,6 @@ static int sd_create_vi(struct virtual_node *vn,
68 int is_affected, int insert_size) 70 int is_affected, int insert_size)
69{ 71{
70 vi->vi_index = TYPE_STAT_DATA; 72 vi->vi_index = TYPE_STAT_DATA;
71 //vi->vi_type |= VI_TYPE_STAT_DATA;// not needed?
72 return 0; 73 return 0;
73} 74}
74 75
@@ -117,15 +118,13 @@ static struct item_operations stat_data_ops = {
117 .print_vi = sd_print_vi 118 .print_vi = sd_print_vi
118}; 119};
119 120
120////////////////////////////////////////////////////////////////////////////// 121/* direct item functions */
121// direct item functions
122//
123static int direct_bytes_number(struct item_head *ih, int block_size) 122static int direct_bytes_number(struct item_head *ih, int block_size)
124{ 123{
125 return ih_item_len(ih); 124 return ih_item_len(ih);
126} 125}
127 126
128// FIXME: this should probably switch to indirect as well 127/* FIXME: this should probably switch to indirect as well */
129static void direct_decrement_key(struct cpu_key *key) 128static void direct_decrement_key(struct cpu_key *key)
130{ 129{
131 cpu_key_k_offset_dec(key); 130 cpu_key_k_offset_dec(key);
@@ -144,7 +143,7 @@ static void direct_print_item(struct item_head *ih, char *item)
144{ 143{
145 int j = 0; 144 int j = 0;
146 145
147// return; 146/* return; */
148 printk("\""); 147 printk("\"");
149 while (j < ih_item_len(ih)) 148 while (j < ih_item_len(ih))
150 printk("%c", item[j++]); 149 printk("%c", item[j++]);
@@ -153,7 +152,7 @@ static void direct_print_item(struct item_head *ih, char *item)
153 152
154static void direct_check_item(struct item_head *ih, char *item) 153static void direct_check_item(struct item_head *ih, char *item)
155{ 154{
156 // FIXME: type something here! 155 /* unused */
157} 156}
158 157
159static int direct_create_vi(struct virtual_node *vn, 158static int direct_create_vi(struct virtual_node *vn,
@@ -161,7 +160,6 @@ static int direct_create_vi(struct virtual_node *vn,
161 int is_affected, int insert_size) 160 int is_affected, int insert_size)
162{ 161{
163 vi->vi_index = TYPE_DIRECT; 162 vi->vi_index = TYPE_DIRECT;
164 //vi->vi_type |= VI_TYPE_DIRECT;
165 return 0; 163 return 0;
166} 164}
167 165
@@ -211,16 +209,13 @@ static struct item_operations direct_ops = {
211 .print_vi = direct_print_vi 209 .print_vi = direct_print_vi
212}; 210};
213 211
214////////////////////////////////////////////////////////////////////////////// 212/* indirect item functions */
215// indirect item functions
216//
217
218static int indirect_bytes_number(struct item_head *ih, int block_size) 213static int indirect_bytes_number(struct item_head *ih, int block_size)
219{ 214{
220 return ih_item_len(ih) / UNFM_P_SIZE * block_size; //- get_ih_free_space (ih); 215 return ih_item_len(ih) / UNFM_P_SIZE * block_size;
221} 216}
222 217
223// decrease offset, if it becomes 0, change type to stat data 218/* decrease offset, if it becomes 0, change type to stat data */
224static void indirect_decrement_key(struct cpu_key *key) 219static void indirect_decrement_key(struct cpu_key *key)
225{ 220{
226 cpu_key_k_offset_dec(key); 221 cpu_key_k_offset_dec(key);
@@ -228,7 +223,7 @@ static void indirect_decrement_key(struct cpu_key *key)
228 set_cpu_key_k_type(key, TYPE_STAT_DATA); 223 set_cpu_key_k_type(key, TYPE_STAT_DATA);
229} 224}
230 225
231// if it is not first item of the body, then it is mergeable 226/* if it is not first item of the body, then it is mergeable */
232static int indirect_is_left_mergeable(struct reiserfs_key *key, 227static int indirect_is_left_mergeable(struct reiserfs_key *key,
233 unsigned long bsize) 228 unsigned long bsize)
234{ 229{
@@ -236,7 +231,7 @@ static int indirect_is_left_mergeable(struct reiserfs_key *key,
236 return (le_key_k_offset(version, key) != 1); 231 return (le_key_k_offset(version, key) != 1);
237} 232}
238 233
239// printing of indirect item 234/* printing of indirect item */
240static void start_new_sequence(__u32 * start, int *len, __u32 new) 235static void start_new_sequence(__u32 * start, int *len, __u32 new)
241{ 236{
242 *start = new; 237 *start = new;
@@ -295,7 +290,7 @@ static void indirect_print_item(struct item_head *ih, char *item)
295 290
296static void indirect_check_item(struct item_head *ih, char *item) 291static void indirect_check_item(struct item_head *ih, char *item)
297{ 292{
298 // FIXME: type something here! 293 /* unused */
299} 294}
300 295
301static int indirect_create_vi(struct virtual_node *vn, 296static int indirect_create_vi(struct virtual_node *vn,
@@ -303,7 +298,6 @@ static int indirect_create_vi(struct virtual_node *vn,
303 int is_affected, int insert_size) 298 int is_affected, int insert_size)
304{ 299{
305 vi->vi_index = TYPE_INDIRECT; 300 vi->vi_index = TYPE_INDIRECT;
306 //vi->vi_type |= VI_TYPE_INDIRECT;
307 return 0; 301 return 0;
308} 302}
309 303
@@ -321,16 +315,19 @@ static int indirect_check_right(struct virtual_item *vi, int free)
321 return indirect_check_left(vi, free, 0, 0); 315 return indirect_check_left(vi, free, 0, 0);
322} 316}
323 317
324// return size in bytes of 'units' units. If first == 0 - calculate from the head (left), otherwise - from tail (right) 318/*
319 * return size in bytes of 'units' units. If first == 0 - calculate
320 * from the head (left), otherwise - from tail (right)
321 */
325static int indirect_part_size(struct virtual_item *vi, int first, int units) 322static int indirect_part_size(struct virtual_item *vi, int first, int units)
326{ 323{
327 // unit of indirect item is byte (yet) 324 /* unit of indirect item is byte (yet) */
328 return units; 325 return units;
329} 326}
330 327
331static int indirect_unit_num(struct virtual_item *vi) 328static int indirect_unit_num(struct virtual_item *vi)
332{ 329{
333 // unit of indirect item is byte (yet) 330 /* unit of indirect item is byte (yet) */
334 return vi->vi_item_len - IH_SIZE; 331 return vi->vi_item_len - IH_SIZE;
335} 332}
336 333
@@ -356,10 +353,7 @@ static struct item_operations indirect_ops = {
356 .print_vi = indirect_print_vi 353 .print_vi = indirect_print_vi
357}; 354};
358 355
359////////////////////////////////////////////////////////////////////////////// 356/* direntry functions */
360// direntry functions
361//
362
363static int direntry_bytes_number(struct item_head *ih, int block_size) 357static int direntry_bytes_number(struct item_head *ih, int block_size)
364{ 358{
365 reiserfs_warning(NULL, "vs-16090", 359 reiserfs_warning(NULL, "vs-16090",
@@ -396,7 +390,7 @@ static void direntry_print_item(struct item_head *ih, char *item)
396 390
397 deh = (struct reiserfs_de_head *)item; 391 deh = (struct reiserfs_de_head *)item;
398 392
399 for (i = 0; i < I_ENTRY_COUNT(ih); i++, deh++) { 393 for (i = 0; i < ih_entry_count(ih); i++, deh++) {
400 namelen = 394 namelen =
401 (i ? (deh_location(deh - 1)) : ih_item_len(ih)) - 395 (i ? (deh_location(deh - 1)) : ih_item_len(ih)) -
402 deh_location(deh); 396 deh_location(deh);
@@ -428,9 +422,9 @@ static void direntry_check_item(struct item_head *ih, char *item)
428 int i; 422 int i;
429 struct reiserfs_de_head *deh; 423 struct reiserfs_de_head *deh;
430 424
431 // FIXME: type something here! 425 /* unused */
432 deh = (struct reiserfs_de_head *)item; 426 deh = (struct reiserfs_de_head *)item;
433 for (i = 0; i < I_ENTRY_COUNT(ih); i++, deh++) { 427 for (i = 0; i < ih_entry_count(ih); i++, deh++) {
434 ; 428 ;
435 } 429 }
436} 430}
@@ -439,7 +433,8 @@ static void direntry_check_item(struct item_head *ih, char *item)
439 433
440/* 434/*
441 * function returns old entry number in directory item in real node 435 * function returns old entry number in directory item in real node
442 * using new entry number in virtual item in virtual node */ 436 * using new entry number in virtual item in virtual node
437 */
443static inline int old_entry_num(int is_affected, int virtual_entry_num, 438static inline int old_entry_num(int is_affected, int virtual_entry_num,
444 int pos_in_item, int mode) 439 int pos_in_item, int mode)
445{ 440{
@@ -463,9 +458,11 @@ static inline int old_entry_num(int is_affected, int virtual_entry_num,
463 return virtual_entry_num - 1; 458 return virtual_entry_num - 1;
464} 459}
465 460
466/* Create an array of sizes of directory entries for virtual 461/*
467 item. Return space used by an item. FIXME: no control over 462 * Create an array of sizes of directory entries for virtual
468 consuming of space used by this item handler */ 463 * item. Return space used by an item. FIXME: no control over
464 * consuming of space used by this item handler
465 */
469static int direntry_create_vi(struct virtual_node *vn, 466static int direntry_create_vi(struct virtual_node *vn,
470 struct virtual_item *vi, 467 struct virtual_item *vi,
471 int is_affected, int insert_size) 468 int is_affected, int insert_size)
@@ -494,8 +491,8 @@ static int direntry_create_vi(struct virtual_node *vn,
494 j = old_entry_num(is_affected, i, vn->vn_pos_in_item, 491 j = old_entry_num(is_affected, i, vn->vn_pos_in_item,
495 vn->vn_mode); 492 vn->vn_mode);
496 dir_u->entry_sizes[i] = 493 dir_u->entry_sizes[i] =
497 (j ? deh_location(&(deh[j - 1])) : ih_item_len(vi->vi_ih)) - 494 (j ? deh_location(&deh[j - 1]) : ih_item_len(vi->vi_ih)) -
498 deh_location(&(deh[j])) + DEH_SIZE; 495 deh_location(&deh[j]) + DEH_SIZE;
499 } 496 }
500 497
501 size += (dir_u->entry_count * sizeof(short)); 498 size += (dir_u->entry_count * sizeof(short));
@@ -529,10 +526,10 @@ static int direntry_create_vi(struct virtual_node *vn,
529 526
530} 527}
531 528
532// 529/*
533// return number of entries which may fit into specified amount of 530 * return number of entries which may fit into specified amount of
534// free space, or -1 if free space is not enough even for 1 entry 531 * free space, or -1 if free space is not enough even for 1 entry
535// 532 */
536static int direntry_check_left(struct virtual_item *vi, int free, 533static int direntry_check_left(struct virtual_item *vi, int free,
537 int start_skip, int end_skip) 534 int start_skip, int end_skip)
538{ 535{
@@ -541,8 +538,8 @@ static int direntry_check_left(struct virtual_item *vi, int free,
541 struct direntry_uarea *dir_u = vi->vi_uarea; 538 struct direntry_uarea *dir_u = vi->vi_uarea;
542 539
543 for (i = start_skip; i < dir_u->entry_count - end_skip; i++) { 540 for (i = start_skip; i < dir_u->entry_count - end_skip; i++) {
541 /* i-th entry doesn't fit into the remaining free space */
544 if (dir_u->entry_sizes[i] > free) 542 if (dir_u->entry_sizes[i] > free)
545 /* i-th entry doesn't fit into the remaining free space */
546 break; 543 break;
547 544
548 free -= dir_u->entry_sizes[i]; 545 free -= dir_u->entry_sizes[i];
@@ -570,8 +567,8 @@ static int direntry_check_right(struct virtual_item *vi, int free)
570 struct direntry_uarea *dir_u = vi->vi_uarea; 567 struct direntry_uarea *dir_u = vi->vi_uarea;
571 568
572 for (i = dir_u->entry_count - 1; i >= 0; i--) { 569 for (i = dir_u->entry_count - 1; i >= 0; i--) {
570 /* i-th entry doesn't fit into the remaining free space */
573 if (dir_u->entry_sizes[i] > free) 571 if (dir_u->entry_sizes[i] > free)
574 /* i-th entry doesn't fit into the remaining free space */
575 break; 572 break;
576 573
577 free -= dir_u->entry_sizes[i]; 574 free -= dir_u->entry_sizes[i];
@@ -643,9 +640,7 @@ static struct item_operations direntry_ops = {
643 .print_vi = direntry_print_vi 640 .print_vi = direntry_print_vi
644}; 641};
645 642
646////////////////////////////////////////////////////////////////////////////// 643/* Error catching functions to catch errors caused by incorrect item types. */
647// Error catching functions to catch errors caused by incorrect item types.
648//
649static int errcatch_bytes_number(struct item_head *ih, int block_size) 644static int errcatch_bytes_number(struct item_head *ih, int block_size)
650{ 645{
651 reiserfs_warning(NULL, "green-16001", 646 reiserfs_warning(NULL, "green-16001",
@@ -685,8 +680,12 @@ static int errcatch_create_vi(struct virtual_node *vn,
685{ 680{
686 reiserfs_warning(NULL, "green-16006", 681 reiserfs_warning(NULL, "green-16006",
687 "Invalid item type observed, run fsck ASAP"); 682 "Invalid item type observed, run fsck ASAP");
688 return 0; // We might return -1 here as well, but it won't help as create_virtual_node() from where 683 /*
689 // this operation is called from is of return type void. 684 * We might return -1 here as well, but it won't help as
685 * create_virtual_node() from where this operation is called
686 * from is of return type void.
687 */
688 return 0;
690} 689}
691 690
692static int errcatch_check_left(struct virtual_item *vi, int free, 691static int errcatch_check_left(struct virtual_item *vi, int free,
@@ -739,9 +738,6 @@ static struct item_operations errcatch_ops = {
739 errcatch_print_vi 738 errcatch_print_vi
740}; 739};
741 740
742//////////////////////////////////////////////////////////////////////////////
743//
744//
745#if ! (TYPE_STAT_DATA == 0 && TYPE_INDIRECT == 1 && TYPE_DIRECT == 2 && TYPE_DIRENTRY == 3) 741#if ! (TYPE_STAT_DATA == 0 && TYPE_INDIRECT == 1 && TYPE_DIRECT == 2 && TYPE_DIRENTRY == 3)
746#error Item types must use disk-format assigned values. 742#error Item types must use disk-format assigned values.
747#endif 743#endif
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index fd777032c2ba..e8870de4627e 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -1,38 +1,38 @@
1/* 1/*
2** Write ahead logging implementation copyright Chris Mason 2000 2 * Write ahead logging implementation copyright Chris Mason 2000
3** 3 *
4** The background commits make this code very interrelated, and 4 * The background commits make this code very interrelated, and
5** overly complex. I need to rethink things a bit....The major players: 5 * overly complex. I need to rethink things a bit....The major players:
6** 6 *
7** journal_begin -- call with the number of blocks you expect to log. 7 * journal_begin -- call with the number of blocks you expect to log.
8** If the current transaction is too 8 * If the current transaction is too
9** old, it will block until the current transaction is 9 * old, it will block until the current transaction is
10** finished, and then start a new one. 10 * finished, and then start a new one.
11** Usually, your transaction will get joined in with 11 * Usually, your transaction will get joined in with
12** previous ones for speed. 12 * previous ones for speed.
13** 13 *
14** journal_join -- same as journal_begin, but won't block on the current 14 * journal_join -- same as journal_begin, but won't block on the current
15** transaction regardless of age. Don't ever call 15 * transaction regardless of age. Don't ever call
16** this. Ever. There are only two places it should be 16 * this. Ever. There are only two places it should be
17** called from, and they are both inside this file. 17 * called from, and they are both inside this file.
18** 18 *
19** journal_mark_dirty -- adds blocks into this transaction. clears any flags 19 * journal_mark_dirty -- adds blocks into this transaction. clears any flags
20** that might make them get sent to disk 20 * that might make them get sent to disk
21** and then marks them BH_JDirty. Puts the buffer head 21 * and then marks them BH_JDirty. Puts the buffer head
22** into the current transaction hash. 22 * into the current transaction hash.
23** 23 *
24** journal_end -- if the current transaction is batchable, it does nothing 24 * journal_end -- if the current transaction is batchable, it does nothing
25** otherwise, it could do an async/synchronous commit, or 25 * otherwise, it could do an async/synchronous commit, or
26** a full flush of all log and real blocks in the 26 * a full flush of all log and real blocks in the
27** transaction. 27 * transaction.
28** 28 *
29** flush_old_commits -- if the current transaction is too old, it is ended and 29 * flush_old_commits -- if the current transaction is too old, it is ended and
30** commit blocks are sent to disk. Forces commit blocks 30 * commit blocks are sent to disk. Forces commit blocks
31** to disk for all backgrounded commits that have been 31 * to disk for all backgrounded commits that have been
32** around too long. 32 * around too long.
33** -- Note, if you call this as an immediate flush from 33 * -- Note, if you call this as an immediate flush from
34** from within kupdate, it will ignore the immediate flag 34 * from within kupdate, it will ignore the immediate flag
35*/ 35 */
36 36
37#include <linux/time.h> 37#include <linux/time.h>
38#include <linux/semaphore.h> 38#include <linux/semaphore.h>
@@ -58,23 +58,19 @@
58#define JOURNAL_WORK_ENTRY(h) (list_entry((h), struct reiserfs_journal_list, \ 58#define JOURNAL_WORK_ENTRY(h) (list_entry((h), struct reiserfs_journal_list, \
59 j_working_list)) 59 j_working_list))
60 60
61/* the number of mounted filesystems. This is used to decide when to 61/* must be correct to keep the desc and commit structs at 4k */
62** start and kill the commit workqueue 62#define JOURNAL_TRANS_HALF 1018
63*/
64static int reiserfs_mounted_fs_count;
65
66static struct workqueue_struct *commit_wq;
67
68#define JOURNAL_TRANS_HALF 1018 /* must be correct to keep the desc and commit
69 structs at 4k */
70#define BUFNR 64 /*read ahead */ 63#define BUFNR 64 /*read ahead */
71 64
72/* cnode stat bits. Move these into reiserfs_fs.h */ 65/* cnode stat bits. Move these into reiserfs_fs.h */
73 66
74#define BLOCK_FREED 2 /* this block was freed, and can't be written. */ 67/* this block was freed, and can't be written. */
75#define BLOCK_FREED_HOLDER 3 /* this block was freed during this transaction, and can't be written */ 68#define BLOCK_FREED 2
69/* this block was freed during this transaction, and can't be written */
70#define BLOCK_FREED_HOLDER 3
76 71
77#define BLOCK_NEEDS_FLUSH 4 /* used in flush_journal_list */ 72/* used in flush_journal_list */
73#define BLOCK_NEEDS_FLUSH 4
78#define BLOCK_DIRTIED 5 74#define BLOCK_DIRTIED 5
79 75
80/* journal list state bits */ 76/* journal list state bits */
@@ -87,16 +83,14 @@ static struct workqueue_struct *commit_wq;
87#define COMMIT_NOW 2 /* end and commit this transaction */ 83#define COMMIT_NOW 2 /* end and commit this transaction */
88#define WAIT 4 /* wait for the log blocks to hit the disk */ 84#define WAIT 4 /* wait for the log blocks to hit the disk */
89 85
90static int do_journal_end(struct reiserfs_transaction_handle *, 86static int do_journal_end(struct reiserfs_transaction_handle *, int flags);
91 struct super_block *, unsigned long nblocks,
92 int flags);
93static int flush_journal_list(struct super_block *s, 87static int flush_journal_list(struct super_block *s,
94 struct reiserfs_journal_list *jl, int flushall); 88 struct reiserfs_journal_list *jl, int flushall);
95static int flush_commit_list(struct super_block *s, 89static int flush_commit_list(struct super_block *s,
96 struct reiserfs_journal_list *jl, int flushall); 90 struct reiserfs_journal_list *jl, int flushall);
97static int can_dirty(struct reiserfs_journal_cnode *cn); 91static int can_dirty(struct reiserfs_journal_cnode *cn);
98static int journal_join(struct reiserfs_transaction_handle *th, 92static int journal_join(struct reiserfs_transaction_handle *th,
99 struct super_block *sb, unsigned long nblocks); 93 struct super_block *sb);
100static void release_journal_dev(struct super_block *super, 94static void release_journal_dev(struct super_block *super,
101 struct reiserfs_journal *journal); 95 struct reiserfs_journal *journal);
102static int dirty_one_transaction(struct super_block *s, 96static int dirty_one_transaction(struct super_block *s,
@@ -107,8 +101,10 @@ static void queue_log_writer(struct super_block *s);
107/* values for join in do_journal_begin_r */ 101/* values for join in do_journal_begin_r */
108enum { 102enum {
109 JBEGIN_REG = 0, /* regular journal begin */ 103 JBEGIN_REG = 0, /* regular journal begin */
110 JBEGIN_JOIN = 1, /* join the running transaction if at all possible */ 104 /* join the running transaction if at all possible */
111 JBEGIN_ABORT = 2, /* called from cleanup code, ignores aborted flag */ 105 JBEGIN_JOIN = 1,
106 /* called from cleanup code, ignores aborted flag */
107 JBEGIN_ABORT = 2,
112}; 108};
113 109
114static int do_journal_begin_r(struct reiserfs_transaction_handle *th, 110static int do_journal_begin_r(struct reiserfs_transaction_handle *th,
@@ -123,10 +119,11 @@ static void init_journal_hash(struct super_block *sb)
123} 119}
124 120
125/* 121/*
126** clears BH_Dirty and sticks the buffer on the clean list. Called because I can't allow refile_buffer to 122 * clears BH_Dirty and sticks the buffer on the clean list. Called because
127** make schedule happen after I've freed a block. Look at remove_from_transaction and journal_mark_freed for 123 * I can't allow refile_buffer to make schedule happen after I've freed a
128** more details. 124 * block. Look at remove_from_transaction and journal_mark_freed for
129*/ 125 * more details.
126 */
130static int reiserfs_clean_and_file_buffer(struct buffer_head *bh) 127static int reiserfs_clean_and_file_buffer(struct buffer_head *bh)
131{ 128{
132 if (bh) { 129 if (bh) {
@@ -163,7 +160,7 @@ static struct reiserfs_bitmap_node *get_bitmap_node(struct super_block *sb)
163 struct list_head *entry = journal->j_bitmap_nodes.next; 160 struct list_head *entry = journal->j_bitmap_nodes.next;
164 161
165 journal->j_used_bitmap_nodes++; 162 journal->j_used_bitmap_nodes++;
166 repeat: 163repeat:
167 164
168 if (entry != &journal->j_bitmap_nodes) { 165 if (entry != &journal->j_bitmap_nodes) {
169 bn = list_entry(entry, struct reiserfs_bitmap_node, list); 166 bn = list_entry(entry, struct reiserfs_bitmap_node, list);
@@ -204,7 +201,8 @@ static void allocate_bitmap_nodes(struct super_block *sb)
204 list_add(&bn->list, &journal->j_bitmap_nodes); 201 list_add(&bn->list, &journal->j_bitmap_nodes);
205 journal->j_free_bitmap_nodes++; 202 journal->j_free_bitmap_nodes++;
206 } else { 203 } else {
207 break; /* this is ok, we'll try again when more are needed */ 204 /* this is ok, we'll try again when more are needed */
205 break;
208 } 206 }
209 } 207 }
210} 208}
@@ -239,8 +237,8 @@ static void cleanup_bitmap_list(struct super_block *sb,
239} 237}
240 238
241/* 239/*
242** only call this on FS unmount. 240 * only call this on FS unmount.
243*/ 241 */
244static int free_list_bitmaps(struct super_block *sb, 242static int free_list_bitmaps(struct super_block *sb,
245 struct reiserfs_list_bitmap *jb_array) 243 struct reiserfs_list_bitmap *jb_array)
246{ 244{
@@ -275,9 +273,9 @@ static int free_bitmap_nodes(struct super_block *sb)
275} 273}
276 274
277/* 275/*
278** get memory for JOURNAL_NUM_BITMAPS worth of bitmaps. 276 * get memory for JOURNAL_NUM_BITMAPS worth of bitmaps.
279** jb_array is the array to be filled in. 277 * jb_array is the array to be filled in.
280*/ 278 */
281int reiserfs_allocate_list_bitmaps(struct super_block *sb, 279int reiserfs_allocate_list_bitmaps(struct super_block *sb,
282 struct reiserfs_list_bitmap *jb_array, 280 struct reiserfs_list_bitmap *jb_array,
283 unsigned int bmap_nr) 281 unsigned int bmap_nr)
@@ -306,9 +304,9 @@ int reiserfs_allocate_list_bitmaps(struct super_block *sb,
306} 304}
307 305
308/* 306/*
309** find an available list bitmap. If you can't find one, flush a commit list 307 * find an available list bitmap. If you can't find one, flush a commit list
310** and try again 308 * and try again
311*/ 309 */
312static struct reiserfs_list_bitmap *get_list_bitmap(struct super_block *sb, 310static struct reiserfs_list_bitmap *get_list_bitmap(struct super_block *sb,
313 struct reiserfs_journal_list 311 struct reiserfs_journal_list
314 *jl) 312 *jl)
@@ -332,18 +330,18 @@ static struct reiserfs_list_bitmap *get_list_bitmap(struct super_block *sb,
332 break; 330 break;
333 } 331 }
334 } 332 }
335 if (jb->journal_list) { /* double check to make sure if flushed correctly */ 333 /* double check to make sure if flushed correctly */
334 if (jb->journal_list)
336 return NULL; 335 return NULL;
337 }
338 jb->journal_list = jl; 336 jb->journal_list = jl;
339 return jb; 337 return jb;
340} 338}
341 339
342/* 340/*
343** allocates a new chunk of X nodes, and links them all together as a list. 341 * allocates a new chunk of X nodes, and links them all together as a list.
344** Uses the cnode->next and cnode->prev pointers 342 * Uses the cnode->next and cnode->prev pointers
345** returns NULL on failure 343 * returns NULL on failure
346*/ 344 */
347static struct reiserfs_journal_cnode *allocate_cnodes(int num_cnodes) 345static struct reiserfs_journal_cnode *allocate_cnodes(int num_cnodes)
348{ 346{
349 struct reiserfs_journal_cnode *head; 347 struct reiserfs_journal_cnode *head;
@@ -365,9 +363,7 @@ static struct reiserfs_journal_cnode *allocate_cnodes(int num_cnodes)
365 return head; 363 return head;
366} 364}
367 365
368/* 366/* pulls a cnode off the free list, or returns NULL on failure */
369** pulls a cnode off the free list, or returns NULL on failure
370*/
371static struct reiserfs_journal_cnode *get_cnode(struct super_block *sb) 367static struct reiserfs_journal_cnode *get_cnode(struct super_block *sb)
372{ 368{
373 struct reiserfs_journal_cnode *cn; 369 struct reiserfs_journal_cnode *cn;
@@ -393,8 +389,8 @@ static struct reiserfs_journal_cnode *get_cnode(struct super_block *sb)
393} 389}
394 390
395/* 391/*
396** returns a cnode to the free list 392 * returns a cnode to the free list
397*/ 393 */
398static void free_cnode(struct super_block *sb, 394static void free_cnode(struct super_block *sb,
399 struct reiserfs_journal_cnode *cn) 395 struct reiserfs_journal_cnode *cn)
400{ 396{
@@ -419,7 +415,10 @@ static void clear_prepared_bits(struct buffer_head *bh)
419 clear_buffer_journal_restore_dirty(bh); 415 clear_buffer_journal_restore_dirty(bh);
420} 416}
421 417
422/* return a cnode with same dev, block number and size in table, or null if not found */ 418/*
419 * return a cnode with same dev, block number and size in table,
420 * or null if not found
421 */
423static inline struct reiserfs_journal_cnode *get_journal_hash_dev(struct 422static inline struct reiserfs_journal_cnode *get_journal_hash_dev(struct
424 super_block 423 super_block
425 *sb, 424 *sb,
@@ -439,23 +438,24 @@ static inline struct reiserfs_journal_cnode *get_journal_hash_dev(struct
439} 438}
440 439
441/* 440/*
442** this actually means 'can this block be reallocated yet?'. If you set search_all, a block can only be allocated 441 * this actually means 'can this block be reallocated yet?'. If you set
443** if it is not in the current transaction, was not freed by the current transaction, and has no chance of ever 442 * search_all, a block can only be allocated if it is not in the current
444** being overwritten by a replay after crashing. 443 * transaction, was not freed by the current transaction, and has no chance
445** 444 * of ever being overwritten by a replay after crashing.
446** If you don't set search_all, a block can only be allocated if it is not in the current transaction. Since deleting 445 *
447** a block removes it from the current transaction, this case should never happen. If you don't set search_all, make 446 * If you don't set search_all, a block can only be allocated if it is not
448** sure you never write the block without logging it. 447 * in the current transaction. Since deleting a block removes it from the
449** 448 * current transaction, this case should never happen. If you don't set
450** next_zero_bit is a suggestion about the next block to try for find_forward. 449 * search_all, make sure you never write the block without logging it.
451** when bl is rejected because it is set in a journal list bitmap, we search 450 *
452** for the next zero bit in the bitmap that rejected bl. Then, we return that 451 * next_zero_bit is a suggestion about the next block to try for find_forward.
453** through next_zero_bit for find_forward to try. 452 * when bl is rejected because it is set in a journal list bitmap, we search
454** 453 * for the next zero bit in the bitmap that rejected bl. Then, we return
455** Just because we return something in next_zero_bit does not mean we won't 454 * that through next_zero_bit for find_forward to try.
456** reject it on the next call to reiserfs_in_journal 455 *
457** 456 * Just because we return something in next_zero_bit does not mean we won't
458*/ 457 * reject it on the next call to reiserfs_in_journal
458 */
459int reiserfs_in_journal(struct super_block *sb, 459int reiserfs_in_journal(struct super_block *sb,
460 unsigned int bmap_nr, int bit_nr, int search_all, 460 unsigned int bmap_nr, int bit_nr, int search_all,
461 b_blocknr_t * next_zero_bit) 461 b_blocknr_t * next_zero_bit)
@@ -469,9 +469,11 @@ int reiserfs_in_journal(struct super_block *sb,
469 *next_zero_bit = 0; /* always start this at zero. */ 469 *next_zero_bit = 0; /* always start this at zero. */
470 470
471 PROC_INFO_INC(sb, journal.in_journal); 471 PROC_INFO_INC(sb, journal.in_journal);
472 /* If we aren't doing a search_all, this is a metablock, and it will be logged before use. 472 /*
473 ** if we crash before the transaction that freed it commits, this transaction won't 473 * If we aren't doing a search_all, this is a metablock, and it
474 ** have committed either, and the block will never be written 474 * will be logged before use. if we crash before the transaction
475 * that freed it commits, this transaction won't have committed
476 * either, and the block will never be written
475 */ 477 */
476 if (search_all) { 478 if (search_all) {
477 for (i = 0; i < JOURNAL_NUM_BITMAPS; i++) { 479 for (i = 0; i < JOURNAL_NUM_BITMAPS; i++) {
@@ -511,8 +513,7 @@ int reiserfs_in_journal(struct super_block *sb,
511 return 0; 513 return 0;
512} 514}
513 515
514/* insert cn into table 516/* insert cn into table */
515*/
516static inline void insert_journal_hash(struct reiserfs_journal_cnode **table, 517static inline void insert_journal_hash(struct reiserfs_journal_cnode **table,
517 struct reiserfs_journal_cnode *cn) 518 struct reiserfs_journal_cnode *cn)
518{ 519{
@@ -558,10 +559,10 @@ static inline void put_journal_list(struct super_block *s,
558} 559}
559 560
560/* 561/*
561** this used to be much more involved, and I'm keeping it just in case things get ugly again. 562 * this used to be much more involved, and I'm keeping it just in case
562** it gets called by flush_commit_list, and cleans up any data stored about blocks freed during a 563 * things get ugly again. it gets called by flush_commit_list, and
563** transaction. 564 * cleans up any data stored about blocks freed during a transaction.
564*/ 565 */
565static void cleanup_freed_for_journal_list(struct super_block *sb, 566static void cleanup_freed_for_journal_list(struct super_block *sb,
566 struct reiserfs_journal_list *jl) 567 struct reiserfs_journal_list *jl)
567{ 568{
@@ -756,11 +757,12 @@ static inline int __add_jh(struct reiserfs_journal *j, struct buffer_head *bh,
756 jh = bh->b_private; 757 jh = bh->b_private;
757 list_del_init(&jh->list); 758 list_del_init(&jh->list);
758 } else { 759 } else {
759 no_jh: 760no_jh:
760 get_bh(bh); 761 get_bh(bh);
761 jh = alloc_jh(); 762 jh = alloc_jh();
762 spin_lock(&j->j_dirty_buffers_lock); 763 spin_lock(&j->j_dirty_buffers_lock);
763 /* buffer must be locked for __add_jh, should be able to have 764 /*
765 * buffer must be locked for __add_jh, should be able to have
764 * two adds at the same time 766 * two adds at the same time
765 */ 767 */
766 BUG_ON(bh->b_private); 768 BUG_ON(bh->b_private);
@@ -818,7 +820,8 @@ static int write_ordered_buffers(spinlock_t * lock,
818 spin_lock(lock); 820 spin_lock(lock);
819 goto loop_next; 821 goto loop_next;
820 } 822 }
821 /* in theory, dirty non-uptodate buffers should never get here, 823 /*
824 * in theory, dirty non-uptodate buffers should never get here,
822 * but the upper layer io error paths still have a few quirks. 825 * but the upper layer io error paths still have a few quirks.
823 * Handle them here as gracefully as we can 826 * Handle them here as gracefully as we can
824 */ 827 */
@@ -833,7 +836,7 @@ static int write_ordered_buffers(spinlock_t * lock,
833 reiserfs_free_jh(bh); 836 reiserfs_free_jh(bh);
834 unlock_buffer(bh); 837 unlock_buffer(bh);
835 } 838 }
836 loop_next: 839loop_next:
837 put_bh(bh); 840 put_bh(bh);
838 cond_resched_lock(lock); 841 cond_resched_lock(lock);
839 } 842 }
@@ -856,13 +859,14 @@ static int write_ordered_buffers(spinlock_t * lock,
856 if (!buffer_uptodate(bh)) { 859 if (!buffer_uptodate(bh)) {
857 ret = -EIO; 860 ret = -EIO;
858 } 861 }
859 /* ugly interaction with invalidatepage here. 862 /*
860 * reiserfs_invalidate_page will pin any buffer that has a valid 863 * ugly interaction with invalidatepage here.
861 * journal head from an older transaction. If someone else sets 864 * reiserfs_invalidate_page will pin any buffer that has a
862 * our buffer dirty after we write it in the first loop, and 865 * valid journal head from an older transaction. If someone
863 * then someone truncates the page away, nobody will ever write 866 * else sets our buffer dirty after we write it in the first
864 * the buffer. We're safe if we write the page one last time 867 * loop, and then someone truncates the page away, nobody
865 * after freeing the journal header. 868 * will ever write the buffer. We're safe if we write the
869 * page one last time after freeing the journal header.
866 */ 870 */
867 if (buffer_dirty(bh) && unlikely(bh->b_page->mapping == NULL)) { 871 if (buffer_dirty(bh) && unlikely(bh->b_page->mapping == NULL)) {
868 spin_unlock(lock); 872 spin_unlock(lock);
@@ -887,7 +891,7 @@ static int flush_older_commits(struct super_block *s,
887 unsigned int other_trans_id; 891 unsigned int other_trans_id;
888 unsigned int first_trans_id; 892 unsigned int first_trans_id;
889 893
890 find_first: 894find_first:
891 /* 895 /*
892 * first we walk backwards to find the oldest uncommitted transation 896 * first we walk backwards to find the oldest uncommitted transation
893 */ 897 */
@@ -923,9 +927,11 @@ static int flush_older_commits(struct super_block *s,
923 if (!journal_list_still_alive(s, trans_id)) 927 if (!journal_list_still_alive(s, trans_id))
924 return 1; 928 return 1;
925 929
926 /* the one we just flushed is gone, this means all 930 /*
927 * older lists are also gone, so first_jl is no longer 931 * the one we just flushed is gone, this means
928 * valid either. Go back to the beginning. 932 * all older lists are also gone, so first_jl
933 * is no longer valid either. Go back to the
934 * beginning.
929 */ 935 */
930 if (!journal_list_still_alive 936 if (!journal_list_still_alive
931 (s, other_trans_id)) { 937 (s, other_trans_id)) {
@@ -958,12 +964,12 @@ static int reiserfs_async_progress_wait(struct super_block *s)
958} 964}
959 965
960/* 966/*
961** if this journal list still has commit blocks unflushed, send them to disk. 967 * if this journal list still has commit blocks unflushed, send them to disk.
962** 968 *
963** log areas must be flushed in order (transaction 2 can't commit before transaction 1) 969 * log areas must be flushed in order (transaction 2 can't commit before
964** Before the commit block can by written, every other log block must be safely on disk 970 * transaction 1) Before the commit block can by written, every other log
965** 971 * block must be safely on disk
966*/ 972 */
967static int flush_commit_list(struct super_block *s, 973static int flush_commit_list(struct super_block *s,
968 struct reiserfs_journal_list *jl, int flushall) 974 struct reiserfs_journal_list *jl, int flushall)
969{ 975{
@@ -982,8 +988,9 @@ static int flush_commit_list(struct super_block *s,
982 return 0; 988 return 0;
983 } 989 }
984 990
985 /* before we can put our commit blocks on disk, we have to make sure everyone older than 991 /*
986 ** us is on disk too 992 * before we can put our commit blocks on disk, we have to make
993 * sure everyone older than us is on disk too
987 */ 994 */
988 BUG_ON(jl->j_len <= 0); 995 BUG_ON(jl->j_len <= 0);
989 BUG_ON(trans_id == journal->j_trans_id); 996 BUG_ON(trans_id == journal->j_trans_id);
@@ -991,7 +998,10 @@ static int flush_commit_list(struct super_block *s,
991 get_journal_list(jl); 998 get_journal_list(jl);
992 if (flushall) { 999 if (flushall) {
993 if (flush_older_commits(s, jl) == 1) { 1000 if (flush_older_commits(s, jl) == 1) {
994 /* list disappeared during flush_older_commits. return */ 1001 /*
1002 * list disappeared during flush_older_commits.
1003 * return
1004 */
995 goto put_jl; 1005 goto put_jl;
996 } 1006 }
997 } 1007 }
@@ -1006,9 +1016,9 @@ static int flush_commit_list(struct super_block *s,
1006 BUG_ON(jl->j_trans_id == 0); 1016 BUG_ON(jl->j_trans_id == 0);
1007 1017
1008 /* this commit is done, exit */ 1018 /* this commit is done, exit */
1009 if (atomic_read(&(jl->j_commit_left)) <= 0) { 1019 if (atomic_read(&jl->j_commit_left) <= 0) {
1010 if (flushall) { 1020 if (flushall) {
1011 atomic_set(&(jl->j_older_commits_done), 1); 1021 atomic_set(&jl->j_older_commits_done, 1);
1012 } 1022 }
1013 mutex_unlock(&jl->j_commit_mutex); 1023 mutex_unlock(&jl->j_commit_mutex);
1014 goto put_jl; 1024 goto put_jl;
@@ -1063,9 +1073,10 @@ static int flush_commit_list(struct super_block *s,
1063 depth = reiserfs_write_unlock_nested(s); 1073 depth = reiserfs_write_unlock_nested(s);
1064 __wait_on_buffer(tbh); 1074 __wait_on_buffer(tbh);
1065 reiserfs_write_lock_nested(s, depth); 1075 reiserfs_write_lock_nested(s, depth);
1066 // since we're using ll_rw_blk above, it might have skipped over 1076 /*
1067 // a locked buffer. Double check here 1077 * since we're using ll_rw_blk above, it might have skipped
1068 // 1078 * over a locked buffer. Double check here
1079 */
1069 /* redundant, sync_dirty_buffer() checks */ 1080 /* redundant, sync_dirty_buffer() checks */
1070 if (buffer_dirty(tbh)) { 1081 if (buffer_dirty(tbh)) {
1071 depth = reiserfs_write_unlock_nested(s); 1082 depth = reiserfs_write_unlock_nested(s);
@@ -1079,17 +1090,21 @@ static int flush_commit_list(struct super_block *s,
1079#endif 1090#endif
1080 retval = -EIO; 1091 retval = -EIO;
1081 } 1092 }
1082 put_bh(tbh); /* once for journal_find_get_block */ 1093 /* once for journal_find_get_block */
1083 put_bh(tbh); /* once due to original getblk in do_journal_end */ 1094 put_bh(tbh);
1084 atomic_dec(&(jl->j_commit_left)); 1095 /* once due to original getblk in do_journal_end */
1096 put_bh(tbh);
1097 atomic_dec(&jl->j_commit_left);
1085 } 1098 }
1086 1099
1087 BUG_ON(atomic_read(&(jl->j_commit_left)) != 1); 1100 BUG_ON(atomic_read(&jl->j_commit_left) != 1);
1088 1101
1089 /* If there was a write error in the journal - we can't commit 1102 /*
1103 * If there was a write error in the journal - we can't commit
1090 * this transaction - it will be invalid and, if successful, 1104 * this transaction - it will be invalid and, if successful,
1091 * will just end up propagating the write error out to 1105 * will just end up propagating the write error out to
1092 * the file system. */ 1106 * the file system.
1107 */
1093 if (likely(!retval && !reiserfs_is_journal_aborted (journal))) { 1108 if (likely(!retval && !reiserfs_is_journal_aborted (journal))) {
1094 if (buffer_dirty(jl->j_commit_bh)) 1109 if (buffer_dirty(jl->j_commit_bh))
1095 BUG(); 1110 BUG();
@@ -1102,9 +1117,11 @@ static int flush_commit_list(struct super_block *s,
1102 reiserfs_write_lock_nested(s, depth); 1117 reiserfs_write_lock_nested(s, depth);
1103 } 1118 }
1104 1119
1105 /* If there was a write error in the journal - we can't commit this 1120 /*
1121 * If there was a write error in the journal - we can't commit this
1106 * transaction - it will be invalid and, if successful, will just end 1122 * transaction - it will be invalid and, if successful, will just end
1107 * up propagating the write error out to the filesystem. */ 1123 * up propagating the write error out to the filesystem.
1124 */
1108 if (unlikely(!buffer_uptodate(jl->j_commit_bh))) { 1125 if (unlikely(!buffer_uptodate(jl->j_commit_bh))) {
1109#ifdef CONFIG_REISERFS_CHECK 1126#ifdef CONFIG_REISERFS_CHECK
1110 reiserfs_warning(s, "journal-615", "buffer write failed"); 1127 reiserfs_warning(s, "journal-615", "buffer write failed");
@@ -1119,7 +1136,10 @@ static int flush_commit_list(struct super_block *s,
1119 } 1136 }
1120 journal->j_last_commit_id = jl->j_trans_id; 1137 journal->j_last_commit_id = jl->j_trans_id;
1121 1138
1122 /* now, every commit block is on the disk. It is safe to allow blocks freed during this transaction to be reallocated */ 1139 /*
1140 * now, every commit block is on the disk. It is safe to allow
1141 * blocks freed during this transaction to be reallocated
1142 */
1123 cleanup_freed_for_journal_list(s, jl); 1143 cleanup_freed_for_journal_list(s, jl);
1124 1144
1125 retval = retval ? retval : journal->j_errno; 1145 retval = retval ? retval : journal->j_errno;
@@ -1127,13 +1147,13 @@ static int flush_commit_list(struct super_block *s,
1127 /* mark the metadata dirty */ 1147 /* mark the metadata dirty */
1128 if (!retval) 1148 if (!retval)
1129 dirty_one_transaction(s, jl); 1149 dirty_one_transaction(s, jl);
1130 atomic_dec(&(jl->j_commit_left)); 1150 atomic_dec(&jl->j_commit_left);
1131 1151
1132 if (flushall) { 1152 if (flushall) {
1133 atomic_set(&(jl->j_older_commits_done), 1); 1153 atomic_set(&jl->j_older_commits_done, 1);
1134 } 1154 }
1135 mutex_unlock(&jl->j_commit_mutex); 1155 mutex_unlock(&jl->j_commit_mutex);
1136 put_jl: 1156put_jl:
1137 put_journal_list(s, jl); 1157 put_journal_list(s, jl);
1138 1158
1139 if (retval) 1159 if (retval)
@@ -1143,9 +1163,9 @@ static int flush_commit_list(struct super_block *s,
1143} 1163}
1144 1164
1145/* 1165/*
1146** flush_journal_list frequently needs to find a newer transaction for a given block. This does that, or 1166 * flush_journal_list frequently needs to find a newer transaction for a
1147** returns NULL if it can't find anything 1167 * given block. This does that, or returns NULL if it can't find anything
1148*/ 1168 */
1149static struct reiserfs_journal_list *find_newer_jl_for_cn(struct 1169static struct reiserfs_journal_list *find_newer_jl_for_cn(struct
1150 reiserfs_journal_cnode 1170 reiserfs_journal_cnode
1151 *cn) 1171 *cn)
@@ -1169,10 +1189,11 @@ static void remove_journal_hash(struct super_block *,
1169 int); 1189 int);
1170 1190
1171/* 1191/*
1172** once all the real blocks have been flushed, it is safe to remove them from the 1192 * once all the real blocks have been flushed, it is safe to remove them
1173** journal list for this transaction. Aside from freeing the cnode, this also allows the 1193 * from the journal list for this transaction. Aside from freeing the
1174** block to be reallocated for data blocks if it had been deleted. 1194 * cnode, this also allows the block to be reallocated for data blocks
1175*/ 1195 * if it had been deleted.
1196 */
1176static void remove_all_from_journal_list(struct super_block *sb, 1197static void remove_all_from_journal_list(struct super_block *sb,
1177 struct reiserfs_journal_list *jl, 1198 struct reiserfs_journal_list *jl,
1178 int debug) 1199 int debug)
@@ -1181,8 +1202,9 @@ static void remove_all_from_journal_list(struct super_block *sb,
1181 struct reiserfs_journal_cnode *cn, *last; 1202 struct reiserfs_journal_cnode *cn, *last;
1182 cn = jl->j_realblock; 1203 cn = jl->j_realblock;
1183 1204
1184 /* which is better, to lock once around the whole loop, or 1205 /*
1185 ** to lock for each call to remove_journal_hash? 1206 * which is better, to lock once around the whole loop, or
1207 * to lock for each call to remove_journal_hash?
1186 */ 1208 */
1187 while (cn) { 1209 while (cn) {
1188 if (cn->blocknr != 0) { 1210 if (cn->blocknr != 0) {
@@ -1204,12 +1226,13 @@ static void remove_all_from_journal_list(struct super_block *sb,
1204} 1226}
1205 1227
1206/* 1228/*
1207** if this timestamp is greater than the timestamp we wrote last to the header block, write it to the header block. 1229 * if this timestamp is greater than the timestamp we wrote last to the
1208** once this is done, I can safely say the log area for this transaction won't ever be replayed, and I can start 1230 * header block, write it to the header block. once this is done, I can
1209** releasing blocks in this transaction for reuse as data blocks. 1231 * safely say the log area for this transaction won't ever be replayed,
1210** called by flush_journal_list, before it calls remove_all_from_journal_list 1232 * and I can start releasing blocks in this transaction for reuse as data
1211** 1233 * blocks. called by flush_journal_list, before it calls
1212*/ 1234 * remove_all_from_journal_list
1235 */
1213static int _update_journal_header_block(struct super_block *sb, 1236static int _update_journal_header_block(struct super_block *sb,
1214 unsigned long offset, 1237 unsigned long offset,
1215 unsigned int trans_id) 1238 unsigned int trans_id)
@@ -1279,10 +1302,11 @@ static int flush_older_journal_lists(struct super_block *sb,
1279 struct reiserfs_journal *journal = SB_JOURNAL(sb); 1302 struct reiserfs_journal *journal = SB_JOURNAL(sb);
1280 unsigned int trans_id = jl->j_trans_id; 1303 unsigned int trans_id = jl->j_trans_id;
1281 1304
1282 /* we know we are the only ones flushing things, no extra race 1305 /*
1306 * we know we are the only ones flushing things, no extra race
1283 * protection is required. 1307 * protection is required.
1284 */ 1308 */
1285 restart: 1309restart:
1286 entry = journal->j_journal_list.next; 1310 entry = journal->j_journal_list.next;
1287 /* Did we wrap? */ 1311 /* Did we wrap? */
1288 if (entry == &journal->j_journal_list) 1312 if (entry == &journal->j_journal_list)
@@ -1309,15 +1333,16 @@ static void del_from_work_list(struct super_block *s,
1309 } 1333 }
1310} 1334}
1311 1335
1312/* flush a journal list, both commit and real blocks 1336/*
1313** 1337 * flush a journal list, both commit and real blocks
1314** always set flushall to 1, unless you are calling from inside 1338 *
1315** flush_journal_list 1339 * always set flushall to 1, unless you are calling from inside
1316** 1340 * flush_journal_list
1317** IMPORTANT. This can only be called while there are no journal writers, 1341 *
1318** and the journal is locked. That means it can only be called from 1342 * IMPORTANT. This can only be called while there are no journal writers,
1319** do_journal_end, or by journal_release 1343 * and the journal is locked. That means it can only be called from
1320*/ 1344 * do_journal_end, or by journal_release
1345 */
1321static int flush_journal_list(struct super_block *s, 1346static int flush_journal_list(struct super_block *s,
1322 struct reiserfs_journal_list *jl, int flushall) 1347 struct reiserfs_journal_list *jl, int flushall)
1323{ 1348{
@@ -1354,13 +1379,14 @@ static int flush_journal_list(struct super_block *s,
1354 } 1379 }
1355 1380
1356 /* if all the work is already done, get out of here */ 1381 /* if all the work is already done, get out of here */
1357 if (atomic_read(&(jl->j_nonzerolen)) <= 0 && 1382 if (atomic_read(&jl->j_nonzerolen) <= 0 &&
1358 atomic_read(&(jl->j_commit_left)) <= 0) { 1383 atomic_read(&jl->j_commit_left) <= 0) {
1359 goto flush_older_and_return; 1384 goto flush_older_and_return;
1360 } 1385 }
1361 1386
1362 /* start by putting the commit list on disk. This will also flush 1387 /*
1363 ** the commit lists of any olders transactions 1388 * start by putting the commit list on disk. This will also flush
1389 * the commit lists of any olders transactions
1364 */ 1390 */
1365 flush_commit_list(s, jl, 1); 1391 flush_commit_list(s, jl, 1);
1366 1392
@@ -1369,15 +1395,16 @@ static int flush_journal_list(struct super_block *s,
1369 BUG(); 1395 BUG();
1370 1396
1371 /* are we done now? */ 1397 /* are we done now? */
1372 if (atomic_read(&(jl->j_nonzerolen)) <= 0 && 1398 if (atomic_read(&jl->j_nonzerolen) <= 0 &&
1373 atomic_read(&(jl->j_commit_left)) <= 0) { 1399 atomic_read(&jl->j_commit_left) <= 0) {
1374 goto flush_older_and_return; 1400 goto flush_older_and_return;
1375 } 1401 }
1376 1402
1377 /* loop through each cnode, see if we need to write it, 1403 /*
1378 ** or wait on a more recent transaction, or just ignore it 1404 * loop through each cnode, see if we need to write it,
1405 * or wait on a more recent transaction, or just ignore it
1379 */ 1406 */
1380 if (atomic_read(&(journal->j_wcount)) != 0) { 1407 if (atomic_read(&journal->j_wcount) != 0) {
1381 reiserfs_panic(s, "journal-844", "journal list is flushing, " 1408 reiserfs_panic(s, "journal-844", "journal list is flushing, "
1382 "wcount is not 0"); 1409 "wcount is not 0");
1383 } 1410 }
@@ -1391,20 +1418,25 @@ static int flush_journal_list(struct super_block *s,
1391 goto free_cnode; 1418 goto free_cnode;
1392 } 1419 }
1393 1420
1394 /* This transaction failed commit. Don't write out to the disk */ 1421 /*
1422 * This transaction failed commit.
1423 * Don't write out to the disk
1424 */
1395 if (!(jl->j_state & LIST_DIRTY)) 1425 if (!(jl->j_state & LIST_DIRTY))
1396 goto free_cnode; 1426 goto free_cnode;
1397 1427
1398 pjl = find_newer_jl_for_cn(cn); 1428 pjl = find_newer_jl_for_cn(cn);
1399 /* the order is important here. We check pjl to make sure we 1429 /*
1400 ** don't clear BH_JDirty_wait if we aren't the one writing this 1430 * the order is important here. We check pjl to make sure we
1401 ** block to disk 1431 * don't clear BH_JDirty_wait if we aren't the one writing this
1432 * block to disk
1402 */ 1433 */
1403 if (!pjl && cn->bh) { 1434 if (!pjl && cn->bh) {
1404 saved_bh = cn->bh; 1435 saved_bh = cn->bh;
1405 1436
1406 /* we do this to make sure nobody releases the buffer while 1437 /*
1407 ** we are working with it 1438 * we do this to make sure nobody releases the
1439 * buffer while we are working with it
1408 */ 1440 */
1409 get_bh(saved_bh); 1441 get_bh(saved_bh);
1410 1442
@@ -1413,13 +1445,17 @@ static int flush_journal_list(struct super_block *s,
1413 was_jwait = 1; 1445 was_jwait = 1;
1414 was_dirty = 1; 1446 was_dirty = 1;
1415 } else if (can_dirty(cn)) { 1447 } else if (can_dirty(cn)) {
1416 /* everything with !pjl && jwait should be writable */ 1448 /*
1449 * everything with !pjl && jwait
1450 * should be writable
1451 */
1417 BUG(); 1452 BUG();
1418 } 1453 }
1419 } 1454 }
1420 1455
1421 /* if someone has this block in a newer transaction, just make 1456 /*
1422 ** sure they are committed, and don't try writing it to disk 1457 * if someone has this block in a newer transaction, just make
1458 * sure they are committed, and don't try writing it to disk
1423 */ 1459 */
1424 if (pjl) { 1460 if (pjl) {
1425 if (atomic_read(&pjl->j_commit_left)) 1461 if (atomic_read(&pjl->j_commit_left))
@@ -1427,16 +1463,18 @@ static int flush_journal_list(struct super_block *s,
1427 goto free_cnode; 1463 goto free_cnode;
1428 } 1464 }
1429 1465
1430 /* bh == NULL when the block got to disk on its own, OR, 1466 /*
1431 ** the block got freed in a future transaction 1467 * bh == NULL when the block got to disk on its own, OR,
1468 * the block got freed in a future transaction
1432 */ 1469 */
1433 if (saved_bh == NULL) { 1470 if (saved_bh == NULL) {
1434 goto free_cnode; 1471 goto free_cnode;
1435 } 1472 }
1436 1473
1437 /* this should never happen. kupdate_one_transaction has this list 1474 /*
1438 ** locked while it works, so we should never see a buffer here that 1475 * this should never happen. kupdate_one_transaction has
1439 ** is not marked JDirty_wait 1476 * this list locked while it works, so we should never see a
1477 * buffer here that is not marked JDirty_wait
1440 */ 1478 */
1441 if ((!was_jwait) && !buffer_locked(saved_bh)) { 1479 if ((!was_jwait) && !buffer_locked(saved_bh)) {
1442 reiserfs_warning(s, "journal-813", 1480 reiserfs_warning(s, "journal-813",
@@ -1447,7 +1485,10 @@ static int flush_journal_list(struct super_block *s,
1447 was_jwait ? ' ' : '!'); 1485 was_jwait ? ' ' : '!');
1448 } 1486 }
1449 if (was_dirty) { 1487 if (was_dirty) {
1450 /* we inc again because saved_bh gets decremented at free_cnode */ 1488 /*
1489 * we inc again because saved_bh gets decremented
1490 * at free_cnode
1491 */
1451 get_bh(saved_bh); 1492 get_bh(saved_bh);
1452 set_bit(BLOCK_NEEDS_FLUSH, &cn->state); 1493 set_bit(BLOCK_NEEDS_FLUSH, &cn->state);
1453 lock_buffer(saved_bh); 1494 lock_buffer(saved_bh);
@@ -1463,13 +1504,16 @@ static int flush_journal_list(struct super_block *s,
1463 (unsigned long long)saved_bh-> 1504 (unsigned long long)saved_bh->
1464 b_blocknr, __func__); 1505 b_blocknr, __func__);
1465 } 1506 }
1466 free_cnode: 1507free_cnode:
1467 last = cn; 1508 last = cn;
1468 cn = cn->next; 1509 cn = cn->next;
1469 if (saved_bh) { 1510 if (saved_bh) {
1470 /* we incremented this to keep others from taking the buffer head away */ 1511 /*
1512 * we incremented this to keep others from
1513 * taking the buffer head away
1514 */
1471 put_bh(saved_bh); 1515 put_bh(saved_bh);
1472 if (atomic_read(&(saved_bh->b_count)) < 0) { 1516 if (atomic_read(&saved_bh->b_count) < 0) {
1473 reiserfs_warning(s, "journal-945", 1517 reiserfs_warning(s, "journal-945",
1474 "saved_bh->b_count < 0"); 1518 "saved_bh->b_count < 0");
1475 } 1519 }
@@ -1499,8 +1543,10 @@ static int flush_journal_list(struct super_block *s,
1499#endif 1543#endif
1500 err = -EIO; 1544 err = -EIO;
1501 } 1545 }
1502 /* note, we must clear the JDirty_wait bit after the up to date 1546 /*
1503 ** check, otherwise we race against our flushpage routine 1547 * note, we must clear the JDirty_wait bit
1548 * after the up to date check, otherwise we
1549 * race against our flushpage routine
1504 */ 1550 */
1505 BUG_ON(!test_clear_buffer_journal_dirty 1551 BUG_ON(!test_clear_buffer_journal_dirty
1506 (cn->bh)); 1552 (cn->bh));
@@ -1518,25 +1564,27 @@ static int flush_journal_list(struct super_block *s,
1518 reiserfs_abort(s, -EIO, 1564 reiserfs_abort(s, -EIO,
1519 "Write error while pushing transaction to disk in %s", 1565 "Write error while pushing transaction to disk in %s",
1520 __func__); 1566 __func__);
1521 flush_older_and_return: 1567flush_older_and_return:
1522 1568
1523 /* before we can update the journal header block, we _must_ flush all 1569 /*
1524 ** real blocks from all older transactions to disk. This is because 1570 * before we can update the journal header block, we _must_ flush all
1525 ** once the header block is updated, this transaction will not be 1571 * real blocks from all older transactions to disk. This is because
1526 ** replayed after a crash 1572 * once the header block is updated, this transaction will not be
1573 * replayed after a crash
1527 */ 1574 */
1528 if (flushall) { 1575 if (flushall) {
1529 flush_older_journal_lists(s, jl); 1576 flush_older_journal_lists(s, jl);
1530 } 1577 }
1531 1578
1532 err = journal->j_errno; 1579 err = journal->j_errno;
1533 /* before we can remove everything from the hash tables for this 1580 /*
1534 ** transaction, we must make sure it can never be replayed 1581 * before we can remove everything from the hash tables for this
1535 ** 1582 * transaction, we must make sure it can never be replayed
1536 ** since we are only called from do_journal_end, we know for sure there 1583 *
1537 ** are no allocations going on while we are flushing journal lists. So, 1584 * since we are only called from do_journal_end, we know for sure there
1538 ** we only need to update the journal header block for the last list 1585 * are no allocations going on while we are flushing journal lists. So,
1539 ** being flushed 1586 * we only need to update the journal header block for the last list
1587 * being flushed
1540 */ 1588 */
1541 if (!err && flushall) { 1589 if (!err && flushall) {
1542 err = 1590 err =
@@ -1561,11 +1609,12 @@ static int flush_journal_list(struct super_block *s,
1561 } 1609 }
1562 journal->j_last_flush_id = jl->j_trans_id; 1610 journal->j_last_flush_id = jl->j_trans_id;
1563 1611
1564 /* not strictly required since we are freeing the list, but it should 1612 /*
1613 * not strictly required since we are freeing the list, but it should
1565 * help find code using dead lists later on 1614 * help find code using dead lists later on
1566 */ 1615 */
1567 jl->j_len = 0; 1616 jl->j_len = 0;
1568 atomic_set(&(jl->j_nonzerolen), 0); 1617 atomic_set(&jl->j_nonzerolen, 0);
1569 jl->j_start = 0; 1618 jl->j_start = 0;
1570 jl->j_realblock = NULL; 1619 jl->j_realblock = NULL;
1571 jl->j_commit_bh = NULL; 1620 jl->j_commit_bh = NULL;
@@ -1592,15 +1641,17 @@ static int write_one_transaction(struct super_block *s,
1592 1641
1593 cn = jl->j_realblock; 1642 cn = jl->j_realblock;
1594 while (cn) { 1643 while (cn) {
1595 /* if the blocknr == 0, this has been cleared from the hash, 1644 /*
1596 ** skip it 1645 * if the blocknr == 0, this has been cleared from the hash,
1646 * skip it
1597 */ 1647 */
1598 if (cn->blocknr == 0) { 1648 if (cn->blocknr == 0) {
1599 goto next; 1649 goto next;
1600 } 1650 }
1601 if (cn->bh && can_dirty(cn) && buffer_dirty(cn->bh)) { 1651 if (cn->bh && can_dirty(cn) && buffer_dirty(cn->bh)) {
1602 struct buffer_head *tmp_bh; 1652 struct buffer_head *tmp_bh;
1603 /* we can race against journal_mark_freed when we try 1653 /*
1654 * we can race against journal_mark_freed when we try
1604 * to lock_buffer(cn->bh), so we have to inc the buffer 1655 * to lock_buffer(cn->bh), so we have to inc the buffer
1605 * count, and recheck things after locking 1656 * count, and recheck things after locking
1606 */ 1657 */
@@ -1619,7 +1670,7 @@ static int write_one_transaction(struct super_block *s,
1619 } 1670 }
1620 put_bh(tmp_bh); 1671 put_bh(tmp_bh);
1621 } 1672 }
1622 next: 1673next:
1623 cn = cn->next; 1674 cn = cn->next;
1624 cond_resched(); 1675 cond_resched();
1625 } 1676 }
@@ -1637,15 +1688,17 @@ static int dirty_one_transaction(struct super_block *s,
1637 jl->j_state |= LIST_DIRTY; 1688 jl->j_state |= LIST_DIRTY;
1638 cn = jl->j_realblock; 1689 cn = jl->j_realblock;
1639 while (cn) { 1690 while (cn) {
1640 /* look for a more recent transaction that logged this 1691 /*
1641 ** buffer. Only the most recent transaction with a buffer in 1692 * look for a more recent transaction that logged this
1642 ** it is allowed to send that buffer to disk 1693 * buffer. Only the most recent transaction with a buffer in
1694 * it is allowed to send that buffer to disk
1643 */ 1695 */
1644 pjl = find_newer_jl_for_cn(cn); 1696 pjl = find_newer_jl_for_cn(cn);
1645 if (!pjl && cn->blocknr && cn->bh 1697 if (!pjl && cn->blocknr && cn->bh
1646 && buffer_journal_dirty(cn->bh)) { 1698 && buffer_journal_dirty(cn->bh)) {
1647 BUG_ON(!can_dirty(cn)); 1699 BUG_ON(!can_dirty(cn));
1648 /* if the buffer is prepared, it will either be logged 1700 /*
1701 * if the buffer is prepared, it will either be logged
1649 * or restored. If restored, we need to make sure 1702 * or restored. If restored, we need to make sure
1650 * it actually gets marked dirty 1703 * it actually gets marked dirty
1651 */ 1704 */
@@ -1682,7 +1735,8 @@ static int kupdate_transactions(struct super_block *s,
1682 goto done; 1735 goto done;
1683 } 1736 }
1684 1737
1685 /* we've got j_flush_mutex held, nobody is going to delete any 1738 /*
1739 * we've got j_flush_mutex held, nobody is going to delete any
1686 * of these lists out from underneath us 1740 * of these lists out from underneath us
1687 */ 1741 */
1688 while ((num_trans && transactions_flushed < num_trans) || 1742 while ((num_trans && transactions_flushed < num_trans) ||
@@ -1716,20 +1770,21 @@ static int kupdate_transactions(struct super_block *s,
1716 write_chunk(&chunk); 1770 write_chunk(&chunk);
1717 } 1771 }
1718 1772
1719 done: 1773done:
1720 mutex_unlock(&journal->j_flush_mutex); 1774 mutex_unlock(&journal->j_flush_mutex);
1721 return ret; 1775 return ret;
1722} 1776}
1723 1777
1724/* for o_sync and fsync heavy applications, they tend to use 1778/*
1725** all the journa list slots with tiny transactions. These 1779 * for o_sync and fsync heavy applications, they tend to use
1726** trigger lots and lots of calls to update the header block, which 1780 * all the journa list slots with tiny transactions. These
1727** adds seeks and slows things down. 1781 * trigger lots and lots of calls to update the header block, which
1728** 1782 * adds seeks and slows things down.
1729** This function tries to clear out a large chunk of the journal lists 1783 *
1730** at once, which makes everything faster since only the newest journal 1784 * This function tries to clear out a large chunk of the journal lists
1731** list updates the header block 1785 * at once, which makes everything faster since only the newest journal
1732*/ 1786 * list updates the header block
1787 */
1733static int flush_used_journal_lists(struct super_block *s, 1788static int flush_used_journal_lists(struct super_block *s,
1734 struct reiserfs_journal_list *jl) 1789 struct reiserfs_journal_list *jl)
1735{ 1790{
@@ -1766,9 +1821,11 @@ static int flush_used_journal_lists(struct super_block *s,
1766 } 1821 }
1767 get_journal_list(jl); 1822 get_journal_list(jl);
1768 get_journal_list(flush_jl); 1823 get_journal_list(flush_jl);
1769 /* try to find a group of blocks we can flush across all the 1824
1770 ** transactions, but only bother if we've actually spanned 1825 /*
1771 ** across multiple lists 1826 * try to find a group of blocks we can flush across all the
1827 * transactions, but only bother if we've actually spanned
1828 * across multiple lists
1772 */ 1829 */
1773 if (flush_jl != jl) { 1830 if (flush_jl != jl) {
1774 ret = kupdate_transactions(s, jl, &tjl, &trans_id, len, i); 1831 ret = kupdate_transactions(s, jl, &tjl, &trans_id, len, i);
@@ -1780,9 +1837,9 @@ static int flush_used_journal_lists(struct super_block *s,
1780} 1837}
1781 1838
1782/* 1839/*
1783** removes any nodes in table with name block and dev as bh. 1840 * removes any nodes in table with name block and dev as bh.
1784** only touchs the hnext and hprev pointers. 1841 * only touchs the hnext and hprev pointers.
1785*/ 1842 */
1786void remove_journal_hash(struct super_block *sb, 1843void remove_journal_hash(struct super_block *sb,
1787 struct reiserfs_journal_cnode **table, 1844 struct reiserfs_journal_cnode **table,
1788 struct reiserfs_journal_list *jl, 1845 struct reiserfs_journal_list *jl,
@@ -1811,8 +1868,12 @@ void remove_journal_hash(struct super_block *sb,
1811 cur->blocknr = 0; 1868 cur->blocknr = 0;
1812 cur->sb = NULL; 1869 cur->sb = NULL;
1813 cur->state = 0; 1870 cur->state = 0;
1814 if (cur->bh && cur->jlist) /* anybody who clears the cur->bh will also dec the nonzerolen */ 1871 /*
1815 atomic_dec(&(cur->jlist->j_nonzerolen)); 1872 * anybody who clears the cur->bh will also
1873 * dec the nonzerolen
1874 */
1875 if (cur->bh && cur->jlist)
1876 atomic_dec(&cur->jlist->j_nonzerolen);
1816 cur->bh = NULL; 1877 cur->bh = NULL;
1817 cur->jlist = NULL; 1878 cur->jlist = NULL;
1818 } 1879 }
@@ -1832,17 +1893,18 @@ static void free_journal_ram(struct super_block *sb)
1832 if (journal->j_header_bh) { 1893 if (journal->j_header_bh) {
1833 brelse(journal->j_header_bh); 1894 brelse(journal->j_header_bh);
1834 } 1895 }
1835 /* j_header_bh is on the journal dev, make sure not to release the journal 1896 /*
1836 * dev until we brelse j_header_bh 1897 * j_header_bh is on the journal dev, make sure
1898 * not to release the journal dev until we brelse j_header_bh
1837 */ 1899 */
1838 release_journal_dev(sb, journal); 1900 release_journal_dev(sb, journal);
1839 vfree(journal); 1901 vfree(journal);
1840} 1902}
1841 1903
1842/* 1904/*
1843** call on unmount. Only set error to 1 if you haven't made your way out 1905 * call on unmount. Only set error to 1 if you haven't made your way out
1844** of read_super() yet. Any other caller must keep error at 0. 1906 * of read_super() yet. Any other caller must keep error at 0.
1845*/ 1907 */
1846static int do_journal_release(struct reiserfs_transaction_handle *th, 1908static int do_journal_release(struct reiserfs_transaction_handle *th,
1847 struct super_block *sb, int error) 1909 struct super_block *sb, int error)
1848{ 1910{
@@ -1850,21 +1912,25 @@ static int do_journal_release(struct reiserfs_transaction_handle *th,
1850 int flushed = 0; 1912 int flushed = 0;
1851 struct reiserfs_journal *journal = SB_JOURNAL(sb); 1913 struct reiserfs_journal *journal = SB_JOURNAL(sb);
1852 1914
1853 /* we only want to flush out transactions if we were called with error == 0 1915 /*
1916 * we only want to flush out transactions if we were
1917 * called with error == 0
1854 */ 1918 */
1855 if (!error && !(sb->s_flags & MS_RDONLY)) { 1919 if (!error && !(sb->s_flags & MS_RDONLY)) {
1856 /* end the current trans */ 1920 /* end the current trans */
1857 BUG_ON(!th->t_trans_id); 1921 BUG_ON(!th->t_trans_id);
1858 do_journal_end(th, sb, 10, FLUSH_ALL); 1922 do_journal_end(th, FLUSH_ALL);
1859 1923
1860 /* make sure something gets logged to force our way into the flush code */ 1924 /*
1861 if (!journal_join(&myth, sb, 1)) { 1925 * make sure something gets logged to force
1926 * our way into the flush code
1927 */
1928 if (!journal_join(&myth, sb)) {
1862 reiserfs_prepare_for_journal(sb, 1929 reiserfs_prepare_for_journal(sb,
1863 SB_BUFFER_WITH_SB(sb), 1930 SB_BUFFER_WITH_SB(sb),
1864 1); 1931 1);
1865 journal_mark_dirty(&myth, sb, 1932 journal_mark_dirty(&myth, SB_BUFFER_WITH_SB(sb));
1866 SB_BUFFER_WITH_SB(sb)); 1933 do_journal_end(&myth, FLUSH_ALL);
1867 do_journal_end(&myth, sb, 1, FLUSH_ALL);
1868 flushed = 1; 1934 flushed = 1;
1869 } 1935 }
1870 } 1936 }
@@ -1872,17 +1938,15 @@ static int do_journal_release(struct reiserfs_transaction_handle *th,
1872 /* this also catches errors during the do_journal_end above */ 1938 /* this also catches errors during the do_journal_end above */
1873 if (!error && reiserfs_is_journal_aborted(journal)) { 1939 if (!error && reiserfs_is_journal_aborted(journal)) {
1874 memset(&myth, 0, sizeof(myth)); 1940 memset(&myth, 0, sizeof(myth));
1875 if (!journal_join_abort(&myth, sb, 1)) { 1941 if (!journal_join_abort(&myth, sb)) {
1876 reiserfs_prepare_for_journal(sb, 1942 reiserfs_prepare_for_journal(sb,
1877 SB_BUFFER_WITH_SB(sb), 1943 SB_BUFFER_WITH_SB(sb),
1878 1); 1944 1);
1879 journal_mark_dirty(&myth, sb, 1945 journal_mark_dirty(&myth, SB_BUFFER_WITH_SB(sb));
1880 SB_BUFFER_WITH_SB(sb)); 1946 do_journal_end(&myth, FLUSH_ALL);
1881 do_journal_end(&myth, sb, 1, FLUSH_ALL);
1882 } 1947 }
1883 } 1948 }
1884 1949
1885 reiserfs_mounted_fs_count--;
1886 /* wait for all commits to finish */ 1950 /* wait for all commits to finish */
1887 cancel_delayed_work(&SB_JOURNAL(sb)->j_work); 1951 cancel_delayed_work(&SB_JOURNAL(sb)->j_work);
1888 1952
@@ -1893,12 +1957,7 @@ static int do_journal_release(struct reiserfs_transaction_handle *th,
1893 reiserfs_write_unlock(sb); 1957 reiserfs_write_unlock(sb);
1894 1958
1895 cancel_delayed_work_sync(&REISERFS_SB(sb)->old_work); 1959 cancel_delayed_work_sync(&REISERFS_SB(sb)->old_work);
1896 flush_workqueue(commit_wq); 1960 flush_workqueue(REISERFS_SB(sb)->commit_wq);
1897
1898 if (!reiserfs_mounted_fs_count) {
1899 destroy_workqueue(commit_wq);
1900 commit_wq = NULL;
1901 }
1902 1961
1903 free_journal_ram(sb); 1962 free_journal_ram(sb);
1904 1963
@@ -1907,25 +1966,24 @@ static int do_journal_release(struct reiserfs_transaction_handle *th,
1907 return 0; 1966 return 0;
1908} 1967}
1909 1968
1910/* 1969/* * call on unmount. flush all journal trans, release all alloc'd ram */
1911** call on unmount. flush all journal trans, release all alloc'd ram
1912*/
1913int journal_release(struct reiserfs_transaction_handle *th, 1970int journal_release(struct reiserfs_transaction_handle *th,
1914 struct super_block *sb) 1971 struct super_block *sb)
1915{ 1972{
1916 return do_journal_release(th, sb, 0); 1973 return do_journal_release(th, sb, 0);
1917} 1974}
1918 1975
1919/* 1976/* only call from an error condition inside reiserfs_read_super! */
1920** only call from an error condition inside reiserfs_read_super!
1921*/
1922int journal_release_error(struct reiserfs_transaction_handle *th, 1977int journal_release_error(struct reiserfs_transaction_handle *th,
1923 struct super_block *sb) 1978 struct super_block *sb)
1924{ 1979{
1925 return do_journal_release(th, sb, 1); 1980 return do_journal_release(th, sb, 1);
1926} 1981}
1927 1982
1928/* compares description block with commit block. returns 1 if they differ, 0 if they are the same */ 1983/*
1984 * compares description block with commit block.
1985 * returns 1 if they differ, 0 if they are the same
1986 */
1929static int journal_compare_desc_commit(struct super_block *sb, 1987static int journal_compare_desc_commit(struct super_block *sb,
1930 struct reiserfs_journal_desc *desc, 1988 struct reiserfs_journal_desc *desc,
1931 struct reiserfs_journal_commit *commit) 1989 struct reiserfs_journal_commit *commit)
@@ -1939,11 +1997,12 @@ static int journal_compare_desc_commit(struct super_block *sb,
1939 return 0; 1997 return 0;
1940} 1998}
1941 1999
1942/* returns 0 if it did not find a description block 2000/*
1943** returns -1 if it found a corrupt commit block 2001 * returns 0 if it did not find a description block
1944** returns 1 if both desc and commit were valid 2002 * returns -1 if it found a corrupt commit block
1945** NOTE: only called during fs mount 2003 * returns 1 if both desc and commit were valid
1946*/ 2004 * NOTE: only called during fs mount
2005 */
1947static int journal_transaction_is_valid(struct super_block *sb, 2006static int journal_transaction_is_valid(struct super_block *sb,
1948 struct buffer_head *d_bh, 2007 struct buffer_head *d_bh,
1949 unsigned int *oldest_invalid_trans_id, 2008 unsigned int *oldest_invalid_trans_id,
@@ -1989,7 +2048,10 @@ static int journal_transaction_is_valid(struct super_block *sb,
1989 } 2048 }
1990 offset = d_bh->b_blocknr - SB_ONDISK_JOURNAL_1st_BLOCK(sb); 2049 offset = d_bh->b_blocknr - SB_ONDISK_JOURNAL_1st_BLOCK(sb);
1991 2050
1992 /* ok, we have a journal description block, lets see if the transaction was valid */ 2051 /*
2052 * ok, we have a journal description block,
2053 * let's see if the transaction was valid
2054 */
1993 c_bh = 2055 c_bh =
1994 journal_bread(sb, 2056 journal_bread(sb,
1995 SB_ONDISK_JOURNAL_1st_BLOCK(sb) + 2057 SB_ONDISK_JOURNAL_1st_BLOCK(sb) +
@@ -2041,11 +2103,11 @@ static void brelse_array(struct buffer_head **heads, int num)
2041} 2103}
2042 2104
2043/* 2105/*
2044** given the start, and values for the oldest acceptable transactions, 2106 * given the start, and values for the oldest acceptable transactions,
2045** this either reads in a replays a transaction, or returns because the 2107 * this either reads in a replays a transaction, or returns because the
2046** transaction is invalid, or too old. 2108 * transaction is invalid, or too old.
2047** NOTE: only called during fs mount 2109 * NOTE: only called during fs mount
2048*/ 2110 */
2049static int journal_read_transaction(struct super_block *sb, 2111static int journal_read_transaction(struct super_block *sb,
2050 unsigned long cur_dblock, 2112 unsigned long cur_dblock,
2051 unsigned long oldest_start, 2113 unsigned long oldest_start,
@@ -2119,7 +2181,10 @@ static int journal_read_transaction(struct super_block *sb,
2119 } 2181 }
2120 2182
2121 trans_id = get_desc_trans_id(desc); 2183 trans_id = get_desc_trans_id(desc);
2122 /* now we know we've got a good transaction, and it was inside the valid time ranges */ 2184 /*
2185 * now we know we've got a good transaction, and it was
2186 * inside the valid time ranges
2187 */
2123 log_blocks = kmalloc(get_desc_trans_len(desc) * 2188 log_blocks = kmalloc(get_desc_trans_len(desc) *
2124 sizeof(struct buffer_head *), GFP_NOFS); 2189 sizeof(struct buffer_head *), GFP_NOFS);
2125 real_blocks = kmalloc(get_desc_trans_len(desc) * 2190 real_blocks = kmalloc(get_desc_trans_len(desc) *
@@ -2164,7 +2229,7 @@ static int journal_read_transaction(struct super_block *sb,
2164 reiserfs_warning(sb, "journal-1204", 2229 reiserfs_warning(sb, "journal-1204",
2165 "REPLAY FAILURE fsck required! " 2230 "REPLAY FAILURE fsck required! "
2166 "Trying to replay onto a log block"); 2231 "Trying to replay onto a log block");
2167 abort_replay: 2232abort_replay:
2168 brelse_array(log_blocks, i); 2233 brelse_array(log_blocks, i);
2169 brelse_array(real_blocks, i); 2234 brelse_array(real_blocks, i);
2170 brelse(c_bh); 2235 brelse(c_bh);
@@ -2226,7 +2291,10 @@ static int journal_read_transaction(struct super_block *sb,
2226 "journal-1095: setting journal " "start to offset %ld", 2291 "journal-1095: setting journal " "start to offset %ld",
2227 cur_dblock - SB_ONDISK_JOURNAL_1st_BLOCK(sb)); 2292 cur_dblock - SB_ONDISK_JOURNAL_1st_BLOCK(sb));
2228 2293
2229 /* init starting values for the first transaction, in case this is the last transaction to be replayed. */ 2294 /*
2295 * init starting values for the first transaction, in case
2296 * this is the last transaction to be replayed.
2297 */
2230 journal->j_start = cur_dblock - SB_ONDISK_JOURNAL_1st_BLOCK(sb); 2298 journal->j_start = cur_dblock - SB_ONDISK_JOURNAL_1st_BLOCK(sb);
2231 journal->j_last_flush_trans_id = trans_id; 2299 journal->j_last_flush_trans_id = trans_id;
2232 journal->j_trans_id = trans_id + 1; 2300 journal->j_trans_id = trans_id + 1;
@@ -2240,12 +2308,14 @@ static int journal_read_transaction(struct super_block *sb,
2240 return 0; 2308 return 0;
2241} 2309}
2242 2310
2243/* This function reads blocks starting from block and to max_block of bufsize 2311/*
2244 size (but no more than BUFNR blocks at a time). This proved to improve 2312 * This function reads blocks starting from block and to max_block of bufsize
2245 mounting speed on self-rebuilding raid5 arrays at least. 2313 * size (but no more than BUFNR blocks at a time). This proved to improve
2246 Right now it is only used from journal code. But later we might use it 2314 * mounting speed on self-rebuilding raid5 arrays at least.
2247 from other places. 2315 * Right now it is only used from journal code. But later we might use it
2248 Note: Do not use journal_getblk/sb_getblk functions here! */ 2316 * from other places.
2317 * Note: Do not use journal_getblk/sb_getblk functions here!
2318 */
2249static struct buffer_head *reiserfs_breada(struct block_device *dev, 2319static struct buffer_head *reiserfs_breada(struct block_device *dev,
2250 b_blocknr_t block, int bufsize, 2320 b_blocknr_t block, int bufsize,
2251 b_blocknr_t max_block) 2321 b_blocknr_t max_block)
@@ -2284,15 +2354,17 @@ static struct buffer_head *reiserfs_breada(struct block_device *dev,
2284} 2354}
2285 2355
2286/* 2356/*
2287** read and replay the log 2357 * read and replay the log
2288** on a clean unmount, the journal header's next unflushed pointer will 2358 * on a clean unmount, the journal header's next unflushed pointer will be
2289** be to an invalid transaction. This tests that before finding all the 2359 * to an invalid transaction. This tests that before finding all the
2290** transactions in the log, which makes normal mount times fast. 2360 * transactions in the log, which makes normal mount times fast.
2291** After a crash, this starts with the next unflushed transaction, and 2361 *
2292** replays until it finds one too old, or invalid. 2362 * After a crash, this starts with the next unflushed transaction, and
2293** On exit, it sets things up so the first transaction will work correctly. 2363 * replays until it finds one too old, or invalid.
2294** NOTE: only called during fs mount 2364 *
2295*/ 2365 * On exit, it sets things up so the first transaction will work correctly.
2366 * NOTE: only called during fs mount
2367 */
2296static int journal_read(struct super_block *sb) 2368static int journal_read(struct super_block *sb)
2297{ 2369{
2298 struct reiserfs_journal *journal = SB_JOURNAL(sb); 2370 struct reiserfs_journal *journal = SB_JOURNAL(sb);
@@ -2316,9 +2388,10 @@ static int journal_read(struct super_block *sb)
2316 bdevname(journal->j_dev_bd, b)); 2388 bdevname(journal->j_dev_bd, b));
2317 start = get_seconds(); 2389 start = get_seconds();
2318 2390
2319 /* step 1, read in the journal header block. Check the transaction it says 2391 /*
2320 ** is the first unflushed, and if that transaction is not valid, 2392 * step 1, read in the journal header block. Check the transaction
2321 ** replay is done 2393 * it says is the first unflushed, and if that transaction is not
2394 * valid, replay is done
2322 */ 2395 */
2323 journal->j_header_bh = journal_bread(sb, 2396 journal->j_header_bh = journal_bread(sb,
2324 SB_ONDISK_JOURNAL_1st_BLOCK(sb) 2397 SB_ONDISK_JOURNAL_1st_BLOCK(sb)
@@ -2342,9 +2415,10 @@ static int journal_read(struct super_block *sb)
2342 le32_to_cpu(jh->j_last_flush_trans_id)); 2415 le32_to_cpu(jh->j_last_flush_trans_id));
2343 valid_journal_header = 1; 2416 valid_journal_header = 1;
2344 2417
2345 /* now, we try to read the first unflushed offset. If it is not valid, 2418 /*
2346 ** there is nothing more we can do, and it makes no sense to read 2419 * now, we try to read the first unflushed offset. If it
2347 ** through the whole log. 2420 * is not valid, there is nothing more we can do, and it
2421 * makes no sense to read through the whole log.
2348 */ 2422 */
2349 d_bh = 2423 d_bh =
2350 journal_bread(sb, 2424 journal_bread(sb,
@@ -2358,15 +2432,19 @@ static int journal_read(struct super_block *sb)
2358 goto start_log_replay; 2432 goto start_log_replay;
2359 } 2433 }
2360 2434
2361 /* ok, there are transactions that need to be replayed. start with the first log block, find 2435 /*
2362 ** all the valid transactions, and pick out the oldest. 2436 * ok, there are transactions that need to be replayed. start
2437 * with the first log block, find all the valid transactions, and
2438 * pick out the oldest.
2363 */ 2439 */
2364 while (continue_replay 2440 while (continue_replay
2365 && cur_dblock < 2441 && cur_dblock <
2366 (SB_ONDISK_JOURNAL_1st_BLOCK(sb) + 2442 (SB_ONDISK_JOURNAL_1st_BLOCK(sb) +
2367 SB_ONDISK_JOURNAL_SIZE(sb))) { 2443 SB_ONDISK_JOURNAL_SIZE(sb))) {
2368 /* Note that it is required for blocksize of primary fs device and journal 2444 /*
2369 device to be the same */ 2445 * Note that it is required for blocksize of primary fs
2446 * device and journal device to be the same
2447 */
2370 d_bh = 2448 d_bh =
2371 reiserfs_breada(journal->j_dev_bd, cur_dblock, 2449 reiserfs_breada(journal->j_dev_bd, cur_dblock,
2372 sb->s_blocksize, 2450 sb->s_blocksize,
@@ -2413,7 +2491,7 @@ static int journal_read(struct super_block *sb)
2413 brelse(d_bh); 2491 brelse(d_bh);
2414 } 2492 }
2415 2493
2416 start_log_replay: 2494start_log_replay:
2417 cur_dblock = oldest_start; 2495 cur_dblock = oldest_start;
2418 if (oldest_trans_id) { 2496 if (oldest_trans_id) {
2419 reiserfs_debug(sb, REISERFS_DEBUG_CODE, 2497 reiserfs_debug(sb, REISERFS_DEBUG_CODE,
@@ -2444,9 +2522,11 @@ static int journal_read(struct super_block *sb)
2444 reiserfs_debug(sb, REISERFS_DEBUG_CODE, 2522 reiserfs_debug(sb, REISERFS_DEBUG_CODE,
2445 "journal-1225: No valid " "transactions found"); 2523 "journal-1225: No valid " "transactions found");
2446 } 2524 }
2447 /* j_start does not get set correctly if we don't replay any transactions. 2525 /*
2448 ** if we had a valid journal_header, set j_start to the first unflushed transaction value, 2526 * j_start does not get set correctly if we don't replay any
2449 ** copy the trans_id from the header 2527 * transactions. if we had a valid journal_header, set j_start
2528 * to the first unflushed transaction value, copy the trans_id
2529 * from the header
2450 */ 2530 */
2451 if (valid_journal_header && replay_count == 0) { 2531 if (valid_journal_header && replay_count == 0) {
2452 journal->j_start = le32_to_cpu(jh->j_first_unflushed_offset); 2532 journal->j_start = le32_to_cpu(jh->j_first_unflushed_offset);
@@ -2475,8 +2555,9 @@ static int journal_read(struct super_block *sb)
2475 _update_journal_header_block(sb, journal->j_start, 2555 _update_journal_header_block(sb, journal->j_start,
2476 journal->j_last_flush_trans_id)) { 2556 journal->j_last_flush_trans_id)) {
2477 reiserfs_write_unlock(sb); 2557 reiserfs_write_unlock(sb);
2478 /* replay failed, caller must call free_journal_ram and abort 2558 /*
2479 ** the mount 2559 * replay failed, caller must call free_journal_ram and abort
2560 * the mount
2480 */ 2561 */
2481 return -1; 2562 return -1;
2482 } 2563 }
@@ -2569,7 +2650,7 @@ static int journal_init_dev(struct super_block *super,
2569 return 0; 2650 return 0;
2570} 2651}
2571 2652
2572/** 2653/*
2573 * When creating/tuning a file system user can assign some 2654 * When creating/tuning a file system user can assign some
2574 * journal params within boundaries which depend on the ratio 2655 * journal params within boundaries which depend on the ratio
2575 * blocksize/standard_blocksize. 2656 * blocksize/standard_blocksize.
@@ -2587,8 +2668,7 @@ static int check_advise_trans_params(struct super_block *sb,
2587 struct reiserfs_journal *journal) 2668 struct reiserfs_journal *journal)
2588{ 2669{
2589 if (journal->j_trans_max) { 2670 if (journal->j_trans_max) {
2590 /* Non-default journal params. 2671 /* Non-default journal params. Do sanity check for them. */
2591 Do sanity check for them. */
2592 int ratio = 1; 2672 int ratio = 1;
2593 if (sb->s_blocksize < REISERFS_STANDARD_BLKSIZE) 2673 if (sb->s_blocksize < REISERFS_STANDARD_BLKSIZE)
2594 ratio = REISERFS_STANDARD_BLKSIZE / sb->s_blocksize; 2674 ratio = REISERFS_STANDARD_BLKSIZE / sb->s_blocksize;
@@ -2610,10 +2690,12 @@ static int check_advise_trans_params(struct super_block *sb,
2610 return 1; 2690 return 1;
2611 } 2691 }
2612 } else { 2692 } else {
2613 /* Default journal params. 2693 /*
2614 The file system was created by old version 2694 * Default journal params.
2615 of mkreiserfs, so some fields contain zeros, 2695 * The file system was created by old version
2616 and we need to advise proper values for them */ 2696 * of mkreiserfs, so some fields contain zeros,
2697 * and we need to advise proper values for them
2698 */
2617 if (sb->s_blocksize != REISERFS_STANDARD_BLKSIZE) { 2699 if (sb->s_blocksize != REISERFS_STANDARD_BLKSIZE) {
2618 reiserfs_warning(sb, "sh-464", "bad blocksize (%u)", 2700 reiserfs_warning(sb, "sh-464", "bad blocksize (%u)",
2619 sb->s_blocksize); 2701 sb->s_blocksize);
@@ -2626,9 +2708,7 @@ static int check_advise_trans_params(struct super_block *sb,
2626 return 0; 2708 return 0;
2627} 2709}
2628 2710
2629/* 2711/* must be called once on fs mount. calls journal_read for you */
2630** must be called once on fs mount. calls journal_read for you
2631*/
2632int journal_init(struct super_block *sb, const char *j_dev_name, 2712int journal_init(struct super_block *sb, const char *j_dev_name,
2633 int old_format, unsigned int commit_max_age) 2713 int old_format, unsigned int commit_max_age)
2634{ 2714{
@@ -2667,8 +2747,10 @@ int journal_init(struct super_block *sb, const char *j_dev_name,
2667 REISERFS_DISK_OFFSET_IN_BYTES / 2747 REISERFS_DISK_OFFSET_IN_BYTES /
2668 sb->s_blocksize + 2); 2748 sb->s_blocksize + 2);
2669 2749
2670 /* Sanity check to see is the standard journal fitting within first bitmap 2750 /*
2671 (actual for small blocksizes) */ 2751 * Sanity check to see is the standard journal fitting
2752 * within first bitmap (actual for small blocksizes)
2753 */
2672 if (!SB_ONDISK_JOURNAL_DEVICE(sb) && 2754 if (!SB_ONDISK_JOURNAL_DEVICE(sb) &&
2673 (SB_JOURNAL_1st_RESERVED_BLOCK(sb) + 2755 (SB_JOURNAL_1st_RESERVED_BLOCK(sb) +
2674 SB_ONDISK_JOURNAL_SIZE(sb) > sb->s_blocksize * 8)) { 2756 SB_ONDISK_JOURNAL_SIZE(sb) > sb->s_blocksize * 8)) {
@@ -2754,20 +2836,20 @@ int journal_init(struct super_block *sb, const char *j_dev_name,
2754 journal->j_start = 0; 2836 journal->j_start = 0;
2755 journal->j_len = 0; 2837 journal->j_len = 0;
2756 journal->j_len_alloc = 0; 2838 journal->j_len_alloc = 0;
2757 atomic_set(&(journal->j_wcount), 0); 2839 atomic_set(&journal->j_wcount, 0);
2758 atomic_set(&(journal->j_async_throttle), 0); 2840 atomic_set(&journal->j_async_throttle, 0);
2759 journal->j_bcount = 0; 2841 journal->j_bcount = 0;
2760 journal->j_trans_start_time = 0; 2842 journal->j_trans_start_time = 0;
2761 journal->j_last = NULL; 2843 journal->j_last = NULL;
2762 journal->j_first = NULL; 2844 journal->j_first = NULL;
2763 init_waitqueue_head(&(journal->j_join_wait)); 2845 init_waitqueue_head(&journal->j_join_wait);
2764 mutex_init(&journal->j_mutex); 2846 mutex_init(&journal->j_mutex);
2765 mutex_init(&journal->j_flush_mutex); 2847 mutex_init(&journal->j_flush_mutex);
2766 2848
2767 journal->j_trans_id = 10; 2849 journal->j_trans_id = 10;
2768 journal->j_mount_id = 10; 2850 journal->j_mount_id = 10;
2769 journal->j_state = 0; 2851 journal->j_state = 0;
2770 atomic_set(&(journal->j_jlock), 0); 2852 atomic_set(&journal->j_jlock, 0);
2771 journal->j_cnode_free_list = allocate_cnodes(num_cnodes); 2853 journal->j_cnode_free_list = allocate_cnodes(num_cnodes);
2772 journal->j_cnode_free_orig = journal->j_cnode_free_list; 2854 journal->j_cnode_free_orig = journal->j_cnode_free_list;
2773 journal->j_cnode_free = journal->j_cnode_free_list ? num_cnodes : 0; 2855 journal->j_cnode_free = journal->j_cnode_free_list ? num_cnodes : 0;
@@ -2807,23 +2889,19 @@ int journal_init(struct super_block *sb, const char *j_dev_name,
2807 goto free_and_return; 2889 goto free_and_return;
2808 } 2890 }
2809 2891
2810 reiserfs_mounted_fs_count++;
2811 if (reiserfs_mounted_fs_count <= 1)
2812 commit_wq = alloc_workqueue("reiserfs", WQ_MEM_RECLAIM, 0);
2813
2814 INIT_DELAYED_WORK(&journal->j_work, flush_async_commits); 2892 INIT_DELAYED_WORK(&journal->j_work, flush_async_commits);
2815 journal->j_work_sb = sb; 2893 journal->j_work_sb = sb;
2816 return 0; 2894 return 0;
2817 free_and_return: 2895free_and_return:
2818 free_journal_ram(sb); 2896 free_journal_ram(sb);
2819 return 1; 2897 return 1;
2820} 2898}
2821 2899
2822/* 2900/*
2823** test for a polite end of the current transaction. Used by file_write, and should 2901 * test for a polite end of the current transaction. Used by file_write,
2824** be used by delete to make sure they don't write more than can fit inside a single 2902 * and should be used by delete to make sure they don't write more than
2825** transaction 2903 * can fit inside a single transaction
2826*/ 2904 */
2827int journal_transaction_should_end(struct reiserfs_transaction_handle *th, 2905int journal_transaction_should_end(struct reiserfs_transaction_handle *th,
2828 int new_alloc) 2906 int new_alloc)
2829{ 2907{
@@ -2835,7 +2913,7 @@ int journal_transaction_should_end(struct reiserfs_transaction_handle *th,
2835 return 0; 2913 return 0;
2836 if (journal->j_must_wait > 0 || 2914 if (journal->j_must_wait > 0 ||
2837 (journal->j_len_alloc + new_alloc) >= journal->j_max_batch || 2915 (journal->j_len_alloc + new_alloc) >= journal->j_max_batch ||
2838 atomic_read(&(journal->j_jlock)) || 2916 atomic_read(&journal->j_jlock) ||
2839 (now - journal->j_trans_start_time) > journal->j_max_trans_age || 2917 (now - journal->j_trans_start_time) > journal->j_max_trans_age ||
2840 journal->j_cnode_free < (journal->j_trans_max * 3)) { 2918 journal->j_cnode_free < (journal->j_trans_max * 3)) {
2841 return 1; 2919 return 1;
@@ -2846,8 +2924,7 @@ int journal_transaction_should_end(struct reiserfs_transaction_handle *th,
2846 return 0; 2924 return 0;
2847} 2925}
2848 2926
2849/* this must be called inside a transaction 2927/* this must be called inside a transaction */
2850*/
2851void reiserfs_block_writes(struct reiserfs_transaction_handle *th) 2928void reiserfs_block_writes(struct reiserfs_transaction_handle *th)
2852{ 2929{
2853 struct reiserfs_journal *journal = SB_JOURNAL(th->t_super); 2930 struct reiserfs_journal *journal = SB_JOURNAL(th->t_super);
@@ -2857,8 +2934,7 @@ void reiserfs_block_writes(struct reiserfs_transaction_handle *th)
2857 return; 2934 return;
2858} 2935}
2859 2936
2860/* this must be called without a transaction started 2937/* this must be called without a transaction started */
2861*/
2862void reiserfs_allow_writes(struct super_block *s) 2938void reiserfs_allow_writes(struct super_block *s)
2863{ 2939{
2864 struct reiserfs_journal *journal = SB_JOURNAL(s); 2940 struct reiserfs_journal *journal = SB_JOURNAL(s);
@@ -2866,8 +2942,7 @@ void reiserfs_allow_writes(struct super_block *s)
2866 wake_up(&journal->j_join_wait); 2942 wake_up(&journal->j_join_wait);
2867} 2943}
2868 2944
2869/* this must be called without a transaction started 2945/* this must be called without a transaction started */
2870*/
2871void reiserfs_wait_on_write_block(struct super_block *s) 2946void reiserfs_wait_on_write_block(struct super_block *s)
2872{ 2947{
2873 struct reiserfs_journal *journal = SB_JOURNAL(s); 2948 struct reiserfs_journal *journal = SB_JOURNAL(s);
@@ -2929,11 +3004,12 @@ static void let_transaction_grow(struct super_block *sb, unsigned int trans_id)
2929 } 3004 }
2930} 3005}
2931 3006
2932/* join == true if you must join an existing transaction. 3007/*
2933** join == false if you can deal with waiting for others to finish 3008 * join == true if you must join an existing transaction.
2934** 3009 * join == false if you can deal with waiting for others to finish
2935** this will block until the transaction is joinable. send the number of blocks you 3010 *
2936** expect to use in nblocks. 3011 * this will block until the transaction is joinable. send the number of
3012 * blocks you expect to use in nblocks.
2937*/ 3013*/
2938static int do_journal_begin_r(struct reiserfs_transaction_handle *th, 3014static int do_journal_begin_r(struct reiserfs_transaction_handle *th,
2939 struct super_block *sb, unsigned long nblocks, 3015 struct super_block *sb, unsigned long nblocks,
@@ -2955,7 +3031,7 @@ static int do_journal_begin_r(struct reiserfs_transaction_handle *th,
2955 th->t_refcount = 1; 3031 th->t_refcount = 1;
2956 th->t_super = sb; 3032 th->t_super = sb;
2957 3033
2958 relock: 3034relock:
2959 lock_journal(sb); 3035 lock_journal(sb);
2960 if (join != JBEGIN_ABORT && reiserfs_is_journal_aborted(journal)) { 3036 if (join != JBEGIN_ABORT && reiserfs_is_journal_aborted(journal)) {
2961 unlock_journal(sb); 3037 unlock_journal(sb);
@@ -2974,9 +3050,11 @@ static int do_journal_begin_r(struct reiserfs_transaction_handle *th,
2974 } 3050 }
2975 now = get_seconds(); 3051 now = get_seconds();
2976 3052
2977 /* if there is no room in the journal OR 3053 /*
2978 ** if this transaction is too old, and we weren't called joinable, wait for it to finish before beginning 3054 * if there is no room in the journal OR
2979 ** we don't sleep if there aren't other writers 3055 * if this transaction is too old, and we weren't called joinable,
3056 * wait for it to finish before beginning we don't sleep if there
3057 * aren't other writers
2980 */ 3058 */
2981 3059
2982 if ((!join && journal->j_must_wait > 0) || 3060 if ((!join && journal->j_must_wait > 0) ||
@@ -2990,7 +3068,8 @@ static int do_journal_begin_r(struct reiserfs_transaction_handle *th,
2990 || (!join && journal->j_cnode_free < (journal->j_trans_max * 3))) { 3068 || (!join && journal->j_cnode_free < (journal->j_trans_max * 3))) {
2991 3069
2992 old_trans_id = journal->j_trans_id; 3070 old_trans_id = journal->j_trans_id;
2993 unlock_journal(sb); /* allow others to finish this transaction */ 3071 /* allow others to finish this transaction */
3072 unlock_journal(sb);
2994 3073
2995 if (!join && (journal->j_len_alloc + nblocks + 2) >= 3074 if (!join && (journal->j_len_alloc + nblocks + 2) >=
2996 journal->j_max_batch && 3075 journal->j_max_batch &&
@@ -3002,8 +3081,9 @@ static int do_journal_begin_r(struct reiserfs_transaction_handle *th,
3002 goto relock; 3081 goto relock;
3003 } 3082 }
3004 } 3083 }
3005 /* don't mess with joining the transaction if all we have to do is 3084 /*
3006 * wait for someone else to do a commit 3085 * don't mess with joining the transaction if all we
3086 * have to do is wait for someone else to do a commit
3007 */ 3087 */
3008 if (atomic_read(&journal->j_jlock)) { 3088 if (atomic_read(&journal->j_jlock)) {
3009 while (journal->j_trans_id == old_trans_id && 3089 while (journal->j_trans_id == old_trans_id &&
@@ -3012,15 +3092,15 @@ static int do_journal_begin_r(struct reiserfs_transaction_handle *th,
3012 } 3092 }
3013 goto relock; 3093 goto relock;
3014 } 3094 }
3015 retval = journal_join(&myth, sb, 1); 3095 retval = journal_join(&myth, sb);
3016 if (retval) 3096 if (retval)
3017 goto out_fail; 3097 goto out_fail;
3018 3098
3019 /* someone might have ended the transaction while we joined */ 3099 /* someone might have ended the transaction while we joined */
3020 if (old_trans_id != journal->j_trans_id) { 3100 if (old_trans_id != journal->j_trans_id) {
3021 retval = do_journal_end(&myth, sb, 1, 0); 3101 retval = do_journal_end(&myth, 0);
3022 } else { 3102 } else {
3023 retval = do_journal_end(&myth, sb, 1, COMMIT_NOW); 3103 retval = do_journal_end(&myth, COMMIT_NOW);
3024 } 3104 }
3025 3105
3026 if (retval) 3106 if (retval)
@@ -3033,7 +3113,7 @@ static int do_journal_begin_r(struct reiserfs_transaction_handle *th,
3033 if (journal->j_trans_start_time == 0) { 3113 if (journal->j_trans_start_time == 0) {
3034 journal->j_trans_start_time = get_seconds(); 3114 journal->j_trans_start_time = get_seconds();
3035 } 3115 }
3036 atomic_inc(&(journal->j_wcount)); 3116 atomic_inc(&journal->j_wcount);
3037 journal->j_len_alloc += nblocks; 3117 journal->j_len_alloc += nblocks;
3038 th->t_blocks_logged = 0; 3118 th->t_blocks_logged = 0;
3039 th->t_blocks_allocated = nblocks; 3119 th->t_blocks_allocated = nblocks;
@@ -3042,11 +3122,13 @@ static int do_journal_begin_r(struct reiserfs_transaction_handle *th,
3042 INIT_LIST_HEAD(&th->t_list); 3122 INIT_LIST_HEAD(&th->t_list);
3043 return 0; 3123 return 0;
3044 3124
3045 out_fail: 3125out_fail:
3046 memset(th, 0, sizeof(*th)); 3126 memset(th, 0, sizeof(*th));
3047 /* Re-set th->t_super, so we can properly keep track of how many 3127 /*
3128 * Re-set th->t_super, so we can properly keep track of how many
3048 * persistent transactions there are. We need to do this so if this 3129 * persistent transactions there are. We need to do this so if this
3049 * call is part of a failed restart_transaction, we can free it later */ 3130 * call is part of a failed restart_transaction, we can free it later
3131 */
3050 th->t_super = sb; 3132 th->t_super = sb;
3051 return retval; 3133 return retval;
3052} 3134}
@@ -3059,14 +3141,15 @@ struct reiserfs_transaction_handle *reiserfs_persistent_transaction(struct
3059 int ret; 3141 int ret;
3060 struct reiserfs_transaction_handle *th; 3142 struct reiserfs_transaction_handle *th;
3061 3143
3062 /* if we're nesting into an existing transaction. It will be 3144 /*
3063 ** persistent on its own 3145 * if we're nesting into an existing transaction. It will be
3146 * persistent on its own
3064 */ 3147 */
3065 if (reiserfs_transaction_running(s)) { 3148 if (reiserfs_transaction_running(s)) {
3066 th = current->journal_info; 3149 th = current->journal_info;
3067 th->t_refcount++; 3150 th->t_refcount++;
3068 BUG_ON(th->t_refcount < 2); 3151 BUG_ON(th->t_refcount < 2);
3069 3152
3070 return th; 3153 return th;
3071 } 3154 }
3072 th = kmalloc(sizeof(struct reiserfs_transaction_handle), GFP_NOFS); 3155 th = kmalloc(sizeof(struct reiserfs_transaction_handle), GFP_NOFS);
@@ -3087,7 +3170,7 @@ int reiserfs_end_persistent_transaction(struct reiserfs_transaction_handle *th)
3087 struct super_block *s = th->t_super; 3170 struct super_block *s = th->t_super;
3088 int ret = 0; 3171 int ret = 0;
3089 if (th->t_trans_id) 3172 if (th->t_trans_id)
3090 ret = journal_end(th, th->t_super, th->t_blocks_allocated); 3173 ret = journal_end(th);
3091 else 3174 else
3092 ret = -EIO; 3175 ret = -EIO;
3093 if (th->t_refcount == 0) { 3176 if (th->t_refcount == 0) {
@@ -3098,29 +3181,31 @@ int reiserfs_end_persistent_transaction(struct reiserfs_transaction_handle *th)
3098} 3181}
3099 3182
3100static int journal_join(struct reiserfs_transaction_handle *th, 3183static int journal_join(struct reiserfs_transaction_handle *th,
3101 struct super_block *sb, unsigned long nblocks) 3184 struct super_block *sb)
3102{ 3185{
3103 struct reiserfs_transaction_handle *cur_th = current->journal_info; 3186 struct reiserfs_transaction_handle *cur_th = current->journal_info;
3104 3187
3105 /* this keeps do_journal_end from NULLing out the current->journal_info 3188 /*
3106 ** pointer 3189 * this keeps do_journal_end from NULLing out the
3190 * current->journal_info pointer
3107 */ 3191 */
3108 th->t_handle_save = cur_th; 3192 th->t_handle_save = cur_th;
3109 BUG_ON(cur_th && cur_th->t_refcount > 1); 3193 BUG_ON(cur_th && cur_th->t_refcount > 1);
3110 return do_journal_begin_r(th, sb, nblocks, JBEGIN_JOIN); 3194 return do_journal_begin_r(th, sb, 1, JBEGIN_JOIN);
3111} 3195}
3112 3196
3113int journal_join_abort(struct reiserfs_transaction_handle *th, 3197int journal_join_abort(struct reiserfs_transaction_handle *th,
3114 struct super_block *sb, unsigned long nblocks) 3198 struct super_block *sb)
3115{ 3199{
3116 struct reiserfs_transaction_handle *cur_th = current->journal_info; 3200 struct reiserfs_transaction_handle *cur_th = current->journal_info;
3117 3201
3118 /* this keeps do_journal_end from NULLing out the current->journal_info 3202 /*
3119 ** pointer 3203 * this keeps do_journal_end from NULLing out the
3204 * current->journal_info pointer
3120 */ 3205 */
3121 th->t_handle_save = cur_th; 3206 th->t_handle_save = cur_th;
3122 BUG_ON(cur_th && cur_th->t_refcount > 1); 3207 BUG_ON(cur_th && cur_th->t_refcount > 1);
3123 return do_journal_begin_r(th, sb, nblocks, JBEGIN_ABORT); 3208 return do_journal_begin_r(th, sb, 1, JBEGIN_ABORT);
3124} 3209}
3125 3210
3126int journal_begin(struct reiserfs_transaction_handle *th, 3211int journal_begin(struct reiserfs_transaction_handle *th,
@@ -3142,9 +3227,10 @@ int journal_begin(struct reiserfs_transaction_handle *th,
3142 "journal_info != 0"); 3227 "journal_info != 0");
3143 return 0; 3228 return 0;
3144 } else { 3229 } else {
3145 /* we've ended up with a handle from a different filesystem. 3230 /*
3146 ** save it and restore on journal_end. This should never 3231 * we've ended up with a handle from a different
3147 ** really happen... 3232 * filesystem. save it and restore on journal_end.
3233 * This should never really happen...
3148 */ 3234 */
3149 reiserfs_warning(sb, "clm-2100", 3235 reiserfs_warning(sb, "clm-2100",
3150 "nesting info a different FS"); 3236 "nesting info a different FS");
@@ -3157,9 +3243,10 @@ int journal_begin(struct reiserfs_transaction_handle *th,
3157 ret = do_journal_begin_r(th, sb, nblocks, JBEGIN_REG); 3243 ret = do_journal_begin_r(th, sb, nblocks, JBEGIN_REG);
3158 BUG_ON(current->journal_info != th); 3244 BUG_ON(current->journal_info != th);
3159 3245
3160 /* I guess this boils down to being the reciprocal of clm-2100 above. 3246 /*
3161 * If do_journal_begin_r fails, we need to put it back, since journal_end 3247 * I guess this boils down to being the reciprocal of clm-2100 above.
3162 * won't be called to do it. */ 3248 * If do_journal_begin_r fails, we need to put it back, since
3249 * journal_end won't be called to do it. */
3163 if (ret) 3250 if (ret)
3164 current->journal_info = th->t_handle_save; 3251 current->journal_info = th->t_handle_save;
3165 else 3252 else
@@ -3169,17 +3256,19 @@ int journal_begin(struct reiserfs_transaction_handle *th,
3169} 3256}
3170 3257
3171/* 3258/*
3172** puts bh into the current transaction. If it was already there, reorders removes the 3259 * puts bh into the current transaction. If it was already there, reorders
3173** old pointers from the hash, and puts new ones in (to make sure replay happen in the right order). 3260 * removes the old pointers from the hash, and puts new ones in (to make
3174** 3261 * sure replay happen in the right order).
3175** if it was dirty, cleans and files onto the clean list. I can't let it be dirty again until the 3262 *
3176** transaction is committed. 3263 * if it was dirty, cleans and files onto the clean list. I can't let it
3177** 3264 * be dirty again until the transaction is committed.
3178** if j_len, is bigger than j_len_alloc, it pushes j_len_alloc to 10 + j_len. 3265 *
3179*/ 3266 * if j_len, is bigger than j_len_alloc, it pushes j_len_alloc to 10 + j_len.
3267 */
3180int journal_mark_dirty(struct reiserfs_transaction_handle *th, 3268int journal_mark_dirty(struct reiserfs_transaction_handle *th,
3181 struct super_block *sb, struct buffer_head *bh) 3269 struct buffer_head *bh)
3182{ 3270{
3271 struct super_block *sb = th->t_super;
3183 struct reiserfs_journal *journal = SB_JOURNAL(sb); 3272 struct reiserfs_journal *journal = SB_JOURNAL(sb);
3184 struct reiserfs_journal_cnode *cn = NULL; 3273 struct reiserfs_journal_cnode *cn = NULL;
3185 int count_already_incd = 0; 3274 int count_already_incd = 0;
@@ -3201,9 +3290,10 @@ int journal_mark_dirty(struct reiserfs_transaction_handle *th,
3201 return 0; 3290 return 0;
3202 } 3291 }
3203 3292
3204 /* this must be turned into a panic instead of a warning. We can't allow 3293 /*
3205 ** a dirty or journal_dirty or locked buffer to be logged, as some changes 3294 * this must be turned into a panic instead of a warning. We can't
3206 ** could get to disk too early. NOT GOOD. 3295 * allow a dirty or journal_dirty or locked buffer to be logged, as
3296 * some changes could get to disk too early. NOT GOOD.
3207 */ 3297 */
3208 if (!prepared || buffer_dirty(bh)) { 3298 if (!prepared || buffer_dirty(bh)) {
3209 reiserfs_warning(sb, "journal-1777", 3299 reiserfs_warning(sb, "journal-1777",
@@ -3216,14 +3306,16 @@ int journal_mark_dirty(struct reiserfs_transaction_handle *th,
3216 buffer_journal_dirty(bh) ? ' ' : '!'); 3306 buffer_journal_dirty(bh) ? ' ' : '!');
3217 } 3307 }
3218 3308
3219 if (atomic_read(&(journal->j_wcount)) <= 0) { 3309 if (atomic_read(&journal->j_wcount) <= 0) {
3220 reiserfs_warning(sb, "journal-1409", 3310 reiserfs_warning(sb, "journal-1409",
3221 "returning because j_wcount was %d", 3311 "returning because j_wcount was %d",
3222 atomic_read(&(journal->j_wcount))); 3312 atomic_read(&journal->j_wcount));
3223 return 1; 3313 return 1;
3224 } 3314 }
3225 /* this error means I've screwed up, and we've overflowed the transaction. 3315 /*
3226 ** Nothing can be done here, except make the FS readonly or panic. 3316 * this error means I've screwed up, and we've overflowed
3317 * the transaction. Nothing can be done here, except make the
3318 * FS readonly or panic.
3227 */ 3319 */
3228 if (journal->j_len >= journal->j_trans_max) { 3320 if (journal->j_len >= journal->j_trans_max) {
3229 reiserfs_panic(th->t_super, "journal-1413", 3321 reiserfs_panic(th->t_super, "journal-1413",
@@ -3280,9 +3372,9 @@ int journal_mark_dirty(struct reiserfs_transaction_handle *th,
3280 return 0; 3372 return 0;
3281} 3373}
3282 3374
3283int journal_end(struct reiserfs_transaction_handle *th, 3375int journal_end(struct reiserfs_transaction_handle *th)
3284 struct super_block *sb, unsigned long nblocks)
3285{ 3376{
3377 struct super_block *sb = th->t_super;
3286 if (!current->journal_info && th->t_refcount > 1) 3378 if (!current->journal_info && th->t_refcount > 1)
3287 reiserfs_warning(sb, "REISER-NESTING", 3379 reiserfs_warning(sb, "REISER-NESTING",
3288 "th NULL, refcount %d", th->t_refcount); 3380 "th NULL, refcount %d", th->t_refcount);
@@ -3297,8 +3389,9 @@ int journal_end(struct reiserfs_transaction_handle *th,
3297 struct reiserfs_transaction_handle *cur_th = 3389 struct reiserfs_transaction_handle *cur_th =
3298 current->journal_info; 3390 current->journal_info;
3299 3391
3300 /* we aren't allowed to close a nested transaction on a different 3392 /*
3301 ** filesystem from the one in the task struct 3393 * we aren't allowed to close a nested transaction on a
3394 * different filesystem from the one in the task struct
3302 */ 3395 */
3303 BUG_ON(cur_th->t_super != th->t_super); 3396 BUG_ON(cur_th->t_super != th->t_super);
3304 3397
@@ -3308,17 +3401,18 @@ int journal_end(struct reiserfs_transaction_handle *th,
3308 } 3401 }
3309 return 0; 3402 return 0;
3310 } else { 3403 } else {
3311 return do_journal_end(th, sb, nblocks, 0); 3404 return do_journal_end(th, 0);
3312 } 3405 }
3313} 3406}
3314 3407
3315/* removes from the current transaction, relsing and descrementing any counters. 3408/*
3316** also files the removed buffer directly onto the clean list 3409 * removes from the current transaction, relsing and descrementing any counters.
3317** 3410 * also files the removed buffer directly onto the clean list
3318** called by journal_mark_freed when a block has been deleted 3411 *
3319** 3412 * called by journal_mark_freed when a block has been deleted
3320** returns 1 if it cleaned and relsed the buffer. 0 otherwise 3413 *
3321*/ 3414 * returns 1 if it cleaned and relsed the buffer. 0 otherwise
3415 */
3322static int remove_from_transaction(struct super_block *sb, 3416static int remove_from_transaction(struct super_block *sb,
3323 b_blocknr_t blocknr, int already_cleaned) 3417 b_blocknr_t blocknr, int already_cleaned)
3324{ 3418{
@@ -3354,7 +3448,7 @@ static int remove_from_transaction(struct super_block *sb,
3354 clear_buffer_dirty(bh); 3448 clear_buffer_dirty(bh);
3355 clear_buffer_journal_test(bh); 3449 clear_buffer_journal_test(bh);
3356 put_bh(bh); 3450 put_bh(bh);
3357 if (atomic_read(&(bh->b_count)) < 0) { 3451 if (atomic_read(&bh->b_count) < 0) {
3358 reiserfs_warning(sb, "journal-1752", 3452 reiserfs_warning(sb, "journal-1752",
3359 "b_count < 0"); 3453 "b_count < 0");
3360 } 3454 }
@@ -3367,15 +3461,16 @@ static int remove_from_transaction(struct super_block *sb,
3367} 3461}
3368 3462
3369/* 3463/*
3370** for any cnode in a journal list, it can only be dirtied of all the 3464 * for any cnode in a journal list, it can only be dirtied of all the
3371** transactions that include it are committed to disk. 3465 * transactions that include it are committed to disk.
3372** this checks through each transaction, and returns 1 if you are allowed to dirty, 3466 * this checks through each transaction, and returns 1 if you are allowed
3373** and 0 if you aren't 3467 * to dirty, and 0 if you aren't
3374** 3468 *
3375** it is called by dirty_journal_list, which is called after flush_commit_list has gotten all the log 3469 * it is called by dirty_journal_list, which is called after
3376** blocks for a given transaction on disk 3470 * flush_commit_list has gotten all the log blocks for a given
3377** 3471 * transaction on disk
3378*/ 3472 *
3473 */
3379static int can_dirty(struct reiserfs_journal_cnode *cn) 3474static int can_dirty(struct reiserfs_journal_cnode *cn)
3380{ 3475{
3381 struct super_block *sb = cn->sb; 3476 struct super_block *sb = cn->sb;
@@ -3383,9 +3478,10 @@ static int can_dirty(struct reiserfs_journal_cnode *cn)
3383 struct reiserfs_journal_cnode *cur = cn->hprev; 3478 struct reiserfs_journal_cnode *cur = cn->hprev;
3384 int can_dirty = 1; 3479 int can_dirty = 1;
3385 3480
3386 /* first test hprev. These are all newer than cn, so any node here 3481 /*
3387 ** with the same block number and dev means this node can't be sent 3482 * first test hprev. These are all newer than cn, so any node here
3388 ** to disk right now. 3483 * with the same block number and dev means this node can't be sent
3484 * to disk right now.
3389 */ 3485 */
3390 while (cur && can_dirty) { 3486 while (cur && can_dirty) {
3391 if (cur->jlist && cur->bh && cur->blocknr && cur->sb == sb && 3487 if (cur->jlist && cur->bh && cur->blocknr && cur->sb == sb &&
@@ -3394,13 +3490,14 @@ static int can_dirty(struct reiserfs_journal_cnode *cn)
3394 } 3490 }
3395 cur = cur->hprev; 3491 cur = cur->hprev;
3396 } 3492 }
3397 /* then test hnext. These are all older than cn. As long as they 3493 /*
3398 ** are committed to the log, it is safe to write cn to disk 3494 * then test hnext. These are all older than cn. As long as they
3495 * are committed to the log, it is safe to write cn to disk
3399 */ 3496 */
3400 cur = cn->hnext; 3497 cur = cn->hnext;
3401 while (cur && can_dirty) { 3498 while (cur && can_dirty) {
3402 if (cur->jlist && cur->jlist->j_len > 0 && 3499 if (cur->jlist && cur->jlist->j_len > 0 &&
3403 atomic_read(&(cur->jlist->j_commit_left)) > 0 && cur->bh && 3500 atomic_read(&cur->jlist->j_commit_left) > 0 && cur->bh &&
3404 cur->blocknr && cur->sb == sb && cur->blocknr == blocknr) { 3501 cur->blocknr && cur->sb == sb && cur->blocknr == blocknr) {
3405 can_dirty = 0; 3502 can_dirty = 0;
3406 } 3503 }
@@ -3409,12 +3506,13 @@ static int can_dirty(struct reiserfs_journal_cnode *cn)
3409 return can_dirty; 3506 return can_dirty;
3410} 3507}
3411 3508
3412/* syncs the commit blocks, but does not force the real buffers to disk 3509/*
3413** will wait until the current transaction is done/committed before returning 3510 * syncs the commit blocks, but does not force the real buffers to disk
3414*/ 3511 * will wait until the current transaction is done/committed before returning
3415int journal_end_sync(struct reiserfs_transaction_handle *th, 3512 */
3416 struct super_block *sb, unsigned long nblocks) 3513int journal_end_sync(struct reiserfs_transaction_handle *th)
3417{ 3514{
3515 struct super_block *sb = th->t_super;
3418 struct reiserfs_journal *journal = SB_JOURNAL(sb); 3516 struct reiserfs_journal *journal = SB_JOURNAL(sb);
3419 3517
3420 BUG_ON(!th->t_trans_id); 3518 BUG_ON(!th->t_trans_id);
@@ -3423,14 +3521,12 @@ int journal_end_sync(struct reiserfs_transaction_handle *th,
3423 if (journal->j_len == 0) { 3521 if (journal->j_len == 0) {
3424 reiserfs_prepare_for_journal(sb, SB_BUFFER_WITH_SB(sb), 3522 reiserfs_prepare_for_journal(sb, SB_BUFFER_WITH_SB(sb),
3425 1); 3523 1);
3426 journal_mark_dirty(th, sb, SB_BUFFER_WITH_SB(sb)); 3524 journal_mark_dirty(th, SB_BUFFER_WITH_SB(sb));
3427 } 3525 }
3428 return do_journal_end(th, sb, nblocks, COMMIT_NOW | WAIT); 3526 return do_journal_end(th, COMMIT_NOW | WAIT);
3429} 3527}
3430 3528
3431/* 3529/* writeback the pending async commits to disk */
3432** writeback the pending async commits to disk
3433*/
3434static void flush_async_commits(struct work_struct *work) 3530static void flush_async_commits(struct work_struct *work)
3435{ 3531{
3436 struct reiserfs_journal *journal = 3532 struct reiserfs_journal *journal =
@@ -3450,9 +3546,9 @@ static void flush_async_commits(struct work_struct *work)
3450} 3546}
3451 3547
3452/* 3548/*
3453** flushes any old transactions to disk 3549 * flushes any old transactions to disk
3454** ends the current transaction if it is too old 3550 * ends the current transaction if it is too old
3455*/ 3551 */
3456void reiserfs_flush_old_commits(struct super_block *sb) 3552void reiserfs_flush_old_commits(struct super_block *sb)
3457{ 3553{
3458 time_t now; 3554 time_t now;
@@ -3460,48 +3556,53 @@ void reiserfs_flush_old_commits(struct super_block *sb)
3460 struct reiserfs_journal *journal = SB_JOURNAL(sb); 3556 struct reiserfs_journal *journal = SB_JOURNAL(sb);
3461 3557
3462 now = get_seconds(); 3558 now = get_seconds();
3463 /* safety check so we don't flush while we are replaying the log during 3559 /*
3560 * safety check so we don't flush while we are replaying the log during
3464 * mount 3561 * mount
3465 */ 3562 */
3466 if (list_empty(&journal->j_journal_list)) 3563 if (list_empty(&journal->j_journal_list))
3467 return; 3564 return;
3468 3565
3469 /* check the current transaction. If there are no writers, and it is 3566 /*
3567 * check the current transaction. If there are no writers, and it is
3470 * too old, finish it, and force the commit blocks to disk 3568 * too old, finish it, and force the commit blocks to disk
3471 */ 3569 */
3472 if (atomic_read(&journal->j_wcount) <= 0 && 3570 if (atomic_read(&journal->j_wcount) <= 0 &&
3473 journal->j_trans_start_time > 0 && 3571 journal->j_trans_start_time > 0 &&
3474 journal->j_len > 0 && 3572 journal->j_len > 0 &&
3475 (now - journal->j_trans_start_time) > journal->j_max_trans_age) { 3573 (now - journal->j_trans_start_time) > journal->j_max_trans_age) {
3476 if (!journal_join(&th, sb, 1)) { 3574 if (!journal_join(&th, sb)) {
3477 reiserfs_prepare_for_journal(sb, 3575 reiserfs_prepare_for_journal(sb,
3478 SB_BUFFER_WITH_SB(sb), 3576 SB_BUFFER_WITH_SB(sb),
3479 1); 3577 1);
3480 journal_mark_dirty(&th, sb, 3578 journal_mark_dirty(&th, SB_BUFFER_WITH_SB(sb));
3481 SB_BUFFER_WITH_SB(sb));
3482 3579
3483 /* we're only being called from kreiserfsd, it makes no sense to do 3580 /*
3484 ** an async commit so that kreiserfsd can do it later 3581 * we're only being called from kreiserfsd, it makes
3582 * no sense to do an async commit so that kreiserfsd
3583 * can do it later
3485 */ 3584 */
3486 do_journal_end(&th, sb, 1, COMMIT_NOW | WAIT); 3585 do_journal_end(&th, COMMIT_NOW | WAIT);
3487 } 3586 }
3488 } 3587 }
3489} 3588}
3490 3589
3491/* 3590/*
3492** returns 0 if do_journal_end should return right away, returns 1 if do_journal_end should finish the commit 3591 * returns 0 if do_journal_end should return right away, returns 1 if
3493** 3592 * do_journal_end should finish the commit
3494** if the current transaction is too old, but still has writers, this will wait on j_join_wait until all 3593 *
3495** the writers are done. By the time it wakes up, the transaction it was called has already ended, so it just 3594 * if the current transaction is too old, but still has writers, this will
3496** flushes the commit list and returns 0. 3595 * wait on j_join_wait until all the writers are done. By the time it
3497** 3596 * wakes up, the transaction it was called has already ended, so it just
3498** Won't batch when flush or commit_now is set. Also won't batch when others are waiting on j_join_wait. 3597 * flushes the commit list and returns 0.
3499** 3598 *
3500** Note, we can't allow the journal_end to proceed while there are still writers in the log. 3599 * Won't batch when flush or commit_now is set. Also won't batch when
3501*/ 3600 * others are waiting on j_join_wait.
3502static int check_journal_end(struct reiserfs_transaction_handle *th, 3601 *
3503 struct super_block *sb, unsigned long nblocks, 3602 * Note, we can't allow the journal_end to proceed while there are still
3504 int flags) 3603 * writers in the log.
3604 */
3605static int check_journal_end(struct reiserfs_transaction_handle *th, int flags)
3505{ 3606{
3506 3607
3507 time_t now; 3608 time_t now;
@@ -3509,6 +3610,7 @@ static int check_journal_end(struct reiserfs_transaction_handle *th,
3509 int commit_now = flags & COMMIT_NOW; 3610 int commit_now = flags & COMMIT_NOW;
3510 int wait_on_commit = flags & WAIT; 3611 int wait_on_commit = flags & WAIT;
3511 struct reiserfs_journal_list *jl; 3612 struct reiserfs_journal_list *jl;
3613 struct super_block *sb = th->t_super;
3512 struct reiserfs_journal *journal = SB_JOURNAL(sb); 3614 struct reiserfs_journal *journal = SB_JOURNAL(sb);
3513 3615
3514 BUG_ON(!th->t_trans_id); 3616 BUG_ON(!th->t_trans_id);
@@ -3520,23 +3622,27 @@ static int check_journal_end(struct reiserfs_transaction_handle *th,
3520 } 3622 }
3521 3623
3522 journal->j_len_alloc -= (th->t_blocks_allocated - th->t_blocks_logged); 3624 journal->j_len_alloc -= (th->t_blocks_allocated - th->t_blocks_logged);
3523 if (atomic_read(&(journal->j_wcount)) > 0) { /* <= 0 is allowed. unmounting might not call begin */ 3625 /* <= 0 is allowed. unmounting might not call begin */
3524 atomic_dec(&(journal->j_wcount)); 3626 if (atomic_read(&journal->j_wcount) > 0)
3525 } 3627 atomic_dec(&journal->j_wcount);
3526 3628
3527 /* BUG, deal with case where j_len is 0, but people previously freed blocks need to be released 3629 /*
3528 ** will be dealt with by next transaction that actually writes something, but should be taken 3630 * BUG, deal with case where j_len is 0, but people previously
3529 ** care of in this trans 3631 * freed blocks need to be released will be dealt with by next
3632 * transaction that actually writes something, but should be taken
3633 * care of in this trans
3530 */ 3634 */
3531 BUG_ON(journal->j_len == 0); 3635 BUG_ON(journal->j_len == 0);
3532 3636
3533 /* if wcount > 0, and we are called to with flush or commit_now, 3637 /*
3534 ** we wait on j_join_wait. We will wake up when the last writer has 3638 * if wcount > 0, and we are called to with flush or commit_now,
3535 ** finished the transaction, and started it on its way to the disk. 3639 * we wait on j_join_wait. We will wake up when the last writer has
3536 ** Then, we flush the commit or journal list, and just return 0 3640 * finished the transaction, and started it on its way to the disk.
3537 ** because the rest of journal end was already done for this transaction. 3641 * Then, we flush the commit or journal list, and just return 0
3642 * because the rest of journal end was already done for this
3643 * transaction.
3538 */ 3644 */
3539 if (atomic_read(&(journal->j_wcount)) > 0) { 3645 if (atomic_read(&journal->j_wcount) > 0) {
3540 if (flush || commit_now) { 3646 if (flush || commit_now) {
3541 unsigned trans_id; 3647 unsigned trans_id;
3542 3648
@@ -3544,27 +3650,30 @@ static int check_journal_end(struct reiserfs_transaction_handle *th,
3544 trans_id = jl->j_trans_id; 3650 trans_id = jl->j_trans_id;
3545 if (wait_on_commit) 3651 if (wait_on_commit)
3546 jl->j_state |= LIST_COMMIT_PENDING; 3652 jl->j_state |= LIST_COMMIT_PENDING;
3547 atomic_set(&(journal->j_jlock), 1); 3653 atomic_set(&journal->j_jlock, 1);
3548 if (flush) { 3654 if (flush) {
3549 journal->j_next_full_flush = 1; 3655 journal->j_next_full_flush = 1;
3550 } 3656 }
3551 unlock_journal(sb); 3657 unlock_journal(sb);
3552 3658
3553 /* sleep while the current transaction is still j_jlocked */ 3659 /*
3660 * sleep while the current transaction is
3661 * still j_jlocked
3662 */
3554 while (journal->j_trans_id == trans_id) { 3663 while (journal->j_trans_id == trans_id) {
3555 if (atomic_read(&journal->j_jlock)) { 3664 if (atomic_read(&journal->j_jlock)) {
3556 queue_log_writer(sb); 3665 queue_log_writer(sb);
3557 } else { 3666 } else {
3558 lock_journal(sb); 3667 lock_journal(sb);
3559 if (journal->j_trans_id == trans_id) { 3668 if (journal->j_trans_id == trans_id) {
3560 atomic_set(&(journal->j_jlock), 3669 atomic_set(&journal->j_jlock,
3561 1); 3670 1);
3562 } 3671 }
3563 unlock_journal(sb); 3672 unlock_journal(sb);
3564 } 3673 }
3565 } 3674 }
3566 BUG_ON(journal->j_trans_id == trans_id); 3675 BUG_ON(journal->j_trans_id == trans_id);
3567 3676
3568 if (commit_now 3677 if (commit_now
3569 && journal_list_still_alive(sb, trans_id) 3678 && journal_list_still_alive(sb, trans_id)
3570 && wait_on_commit) { 3679 && wait_on_commit) {
@@ -3584,7 +3693,7 @@ static int check_journal_end(struct reiserfs_transaction_handle *th,
3584 } 3693 }
3585 /* don't batch when someone is waiting on j_join_wait */ 3694 /* don't batch when someone is waiting on j_join_wait */
3586 /* don't batch when syncing the commit or flushing the whole trans */ 3695 /* don't batch when syncing the commit or flushing the whole trans */
3587 if (!(journal->j_must_wait > 0) && !(atomic_read(&(journal->j_jlock))) 3696 if (!(journal->j_must_wait > 0) && !(atomic_read(&journal->j_jlock))
3588 && !flush && !commit_now && (journal->j_len < journal->j_max_batch) 3697 && !flush && !commit_now && (journal->j_len < journal->j_max_batch)
3589 && journal->j_len_alloc < journal->j_max_batch 3698 && journal->j_len_alloc < journal->j_max_batch
3590 && journal->j_cnode_free > (journal->j_trans_max * 3)) { 3699 && journal->j_cnode_free > (journal->j_trans_max * 3)) {
@@ -3602,19 +3711,22 @@ static int check_journal_end(struct reiserfs_transaction_handle *th,
3602} 3711}
3603 3712
3604/* 3713/*
3605** Does all the work that makes deleting blocks safe. 3714 * Does all the work that makes deleting blocks safe.
3606** when deleting a block mark BH_JNew, just remove it from the current transaction, clean it's buffer_head and move on. 3715 * when deleting a block mark BH_JNew, just remove it from the current
3607** 3716 * transaction, clean it's buffer_head and move on.
3608** otherwise: 3717 *
3609** set a bit for the block in the journal bitmap. That will prevent it from being allocated for unformatted nodes 3718 * otherwise:
3610** before this transaction has finished. 3719 * set a bit for the block in the journal bitmap. That will prevent it from
3611** 3720 * being allocated for unformatted nodes before this transaction has finished.
3612** mark any cnodes for this block as BLOCK_FREED, and clear their bh pointers. That will prevent any old transactions with 3721 *
3613** this block from trying to flush to the real location. Since we aren't removing the cnode from the journal_list_hash, 3722 * mark any cnodes for this block as BLOCK_FREED, and clear their bh pointers.
3614** the block can't be reallocated yet. 3723 * That will prevent any old transactions with this block from trying to flush
3615** 3724 * to the real location. Since we aren't removing the cnode from the
3616** Then remove it from the current transaction, decrementing any counters and filing it on the clean list. 3725 * journal_list_hash, *the block can't be reallocated yet.
3617*/ 3726 *
3727 * Then remove it from the current transaction, decrementing any counters and
3728 * filing it on the clean list.
3729 */
3618int journal_mark_freed(struct reiserfs_transaction_handle *th, 3730int journal_mark_freed(struct reiserfs_transaction_handle *th,
3619 struct super_block *sb, b_blocknr_t blocknr) 3731 struct super_block *sb, b_blocknr_t blocknr)
3620{ 3732{
@@ -3637,7 +3749,10 @@ int journal_mark_freed(struct reiserfs_transaction_handle *th,
3637 reiserfs_clean_and_file_buffer(bh); 3749 reiserfs_clean_and_file_buffer(bh);
3638 cleaned = remove_from_transaction(sb, blocknr, cleaned); 3750 cleaned = remove_from_transaction(sb, blocknr, cleaned);
3639 } else { 3751 } else {
3640 /* set the bit for this block in the journal bitmap for this transaction */ 3752 /*
3753 * set the bit for this block in the journal bitmap
3754 * for this transaction
3755 */
3641 jb = journal->j_current_jl->j_list_bitmap; 3756 jb = journal->j_current_jl->j_list_bitmap;
3642 if (!jb) { 3757 if (!jb) {
3643 reiserfs_panic(sb, "journal-1702", 3758 reiserfs_panic(sb, "journal-1702",
@@ -3653,17 +3768,22 @@ int journal_mark_freed(struct reiserfs_transaction_handle *th,
3653 } 3768 }
3654 cleaned = remove_from_transaction(sb, blocknr, cleaned); 3769 cleaned = remove_from_transaction(sb, blocknr, cleaned);
3655 3770
3656 /* find all older transactions with this block, make sure they don't try to write it out */ 3771 /*
3772 * find all older transactions with this block,
3773 * make sure they don't try to write it out
3774 */
3657 cn = get_journal_hash_dev(sb, journal->j_list_hash_table, 3775 cn = get_journal_hash_dev(sb, journal->j_list_hash_table,
3658 blocknr); 3776 blocknr);
3659 while (cn) { 3777 while (cn) {
3660 if (sb == cn->sb && blocknr == cn->blocknr) { 3778 if (sb == cn->sb && blocknr == cn->blocknr) {
3661 set_bit(BLOCK_FREED, &cn->state); 3779 set_bit(BLOCK_FREED, &cn->state);
3662 if (cn->bh) { 3780 if (cn->bh) {
3781 /*
3782 * remove_from_transaction will brelse
3783 * the buffer if it was in the current
3784 * trans
3785 */
3663 if (!cleaned) { 3786 if (!cleaned) {
3664 /* remove_from_transaction will brelse the buffer if it was
3665 ** in the current trans
3666 */
3667 clear_buffer_journal_dirty(cn-> 3787 clear_buffer_journal_dirty(cn->
3668 bh); 3788 bh);
3669 clear_buffer_dirty(cn->bh); 3789 clear_buffer_dirty(cn->bh);
@@ -3672,16 +3792,19 @@ int journal_mark_freed(struct reiserfs_transaction_handle *th,
3672 cleaned = 1; 3792 cleaned = 1;
3673 put_bh(cn->bh); 3793 put_bh(cn->bh);
3674 if (atomic_read 3794 if (atomic_read
3675 (&(cn->bh->b_count)) < 0) { 3795 (&cn->bh->b_count) < 0) {
3676 reiserfs_warning(sb, 3796 reiserfs_warning(sb,
3677 "journal-2138", 3797 "journal-2138",
3678 "cn->bh->b_count < 0"); 3798 "cn->bh->b_count < 0");
3679 } 3799 }
3680 } 3800 }
3681 if (cn->jlist) { /* since we are clearing the bh, we MUST dec nonzerolen */ 3801 /*
3682 atomic_dec(& 3802 * since we are clearing the bh,
3683 (cn->jlist-> 3803 * we MUST dec nonzerolen
3684 j_nonzerolen)); 3804 */
3805 if (cn->jlist) {
3806 atomic_dec(&cn->jlist->
3807 j_nonzerolen);
3685 } 3808 }
3686 cn->bh = NULL; 3809 cn->bh = NULL;
3687 } 3810 }
@@ -3714,10 +3837,16 @@ static int __commit_trans_jl(struct inode *inode, unsigned long id,
3714 struct reiserfs_journal *journal = SB_JOURNAL(sb); 3837 struct reiserfs_journal *journal = SB_JOURNAL(sb);
3715 int ret = 0; 3838 int ret = 0;
3716 3839
3717 /* is it from the current transaction, or from an unknown transaction? */ 3840 /*
3841 * is it from the current transaction,
3842 * or from an unknown transaction?
3843 */
3718 if (id == journal->j_trans_id) { 3844 if (id == journal->j_trans_id) {
3719 jl = journal->j_current_jl; 3845 jl = journal->j_current_jl;
3720 /* try to let other writers come in and grow this transaction */ 3846 /*
3847 * try to let other writers come in and
3848 * grow this transaction
3849 */
3721 let_transaction_grow(sb, id); 3850 let_transaction_grow(sb, id);
3722 if (journal->j_trans_id != id) { 3851 if (journal->j_trans_id != id) {
3723 goto flush_commit_only; 3852 goto flush_commit_only;
@@ -3731,21 +3860,22 @@ static int __commit_trans_jl(struct inode *inode, unsigned long id,
3731 if (journal->j_trans_id != id) { 3860 if (journal->j_trans_id != id) {
3732 reiserfs_prepare_for_journal(sb, SB_BUFFER_WITH_SB(sb), 3861 reiserfs_prepare_for_journal(sb, SB_BUFFER_WITH_SB(sb),
3733 1); 3862 1);
3734 journal_mark_dirty(&th, sb, SB_BUFFER_WITH_SB(sb)); 3863 journal_mark_dirty(&th, SB_BUFFER_WITH_SB(sb));
3735 ret = journal_end(&th, sb, 1); 3864 ret = journal_end(&th);
3736 goto flush_commit_only; 3865 goto flush_commit_only;
3737 } 3866 }
3738 3867
3739 ret = journal_end_sync(&th, sb, 1); 3868 ret = journal_end_sync(&th);
3740 if (!ret) 3869 if (!ret)
3741 ret = 1; 3870 ret = 1;
3742 3871
3743 } else { 3872 } else {
3744 /* this gets tricky, we have to make sure the journal list in 3873 /*
3874 * this gets tricky, we have to make sure the journal list in
3745 * the inode still exists. We know the list is still around 3875 * the inode still exists. We know the list is still around
3746 * if we've got a larger transaction id than the oldest list 3876 * if we've got a larger transaction id than the oldest list
3747 */ 3877 */
3748 flush_commit_only: 3878flush_commit_only:
3749 if (journal_list_still_alive(inode->i_sb, id)) { 3879 if (journal_list_still_alive(inode->i_sb, id)) {
3750 /* 3880 /*
3751 * we only set ret to 1 when we know for sure 3881 * we only set ret to 1 when we know for sure
@@ -3768,7 +3898,8 @@ int reiserfs_commit_for_inode(struct inode *inode)
3768 unsigned int id = REISERFS_I(inode)->i_trans_id; 3898 unsigned int id = REISERFS_I(inode)->i_trans_id;
3769 struct reiserfs_journal_list *jl = REISERFS_I(inode)->i_jl; 3899 struct reiserfs_journal_list *jl = REISERFS_I(inode)->i_jl;
3770 3900
3771 /* for the whole inode, assume unset id means it was 3901 /*
3902 * for the whole inode, assume unset id means it was
3772 * changed in the current transaction. More conservative 3903 * changed in the current transaction. More conservative
3773 */ 3904 */
3774 if (!id || !jl) { 3905 if (!id || !jl) {
@@ -3806,12 +3937,11 @@ void reiserfs_restore_prepared_buffer(struct super_block *sb,
3806 3937
3807extern struct tree_balance *cur_tb; 3938extern struct tree_balance *cur_tb;
3808/* 3939/*
3809** before we can change a metadata block, we have to make sure it won't 3940 * before we can change a metadata block, we have to make sure it won't
3810** be written to disk while we are altering it. So, we must: 3941 * be written to disk while we are altering it. So, we must:
3811** clean it 3942 * clean it
3812** wait on it. 3943 * wait on it.
3813** 3944 */
3814*/
3815int reiserfs_prepare_for_journal(struct super_block *sb, 3945int reiserfs_prepare_for_journal(struct super_block *sb,
3816 struct buffer_head *bh, int wait) 3946 struct buffer_head *bh, int wait)
3817{ 3947{
@@ -3832,19 +3962,18 @@ int reiserfs_prepare_for_journal(struct super_block *sb,
3832} 3962}
3833 3963
3834/* 3964/*
3835** long and ugly. If flush, will not return until all commit 3965 * long and ugly. If flush, will not return until all commit
3836** blocks and all real buffers in the trans are on disk. 3966 * blocks and all real buffers in the trans are on disk.
3837** If no_async, won't return until all commit blocks are on disk. 3967 * If no_async, won't return until all commit blocks are on disk.
3838** 3968 *
3839** keep reading, there are comments as you go along 3969 * keep reading, there are comments as you go along
3840** 3970 *
3841** If the journal is aborted, we just clean up. Things like flushing 3971 * If the journal is aborted, we just clean up. Things like flushing
3842** journal lists, etc just won't happen. 3972 * journal lists, etc just won't happen.
3843*/ 3973 */
3844static int do_journal_end(struct reiserfs_transaction_handle *th, 3974static int do_journal_end(struct reiserfs_transaction_handle *th, int flags)
3845 struct super_block *sb, unsigned long nblocks,
3846 int flags)
3847{ 3975{
3976 struct super_block *sb = th->t_super;
3848 struct reiserfs_journal *journal = SB_JOURNAL(sb); 3977 struct reiserfs_journal *journal = SB_JOURNAL(sb);
3849 struct reiserfs_journal_cnode *cn, *next, *jl_cn; 3978 struct reiserfs_journal_cnode *cn, *next, *jl_cn;
3850 struct reiserfs_journal_cnode *last_cn = NULL; 3979 struct reiserfs_journal_cnode *last_cn = NULL;
@@ -3866,9 +3995,12 @@ static int do_journal_end(struct reiserfs_transaction_handle *th,
3866 3995
3867 BUG_ON(th->t_refcount > 1); 3996 BUG_ON(th->t_refcount > 1);
3868 BUG_ON(!th->t_trans_id); 3997 BUG_ON(!th->t_trans_id);
3998 BUG_ON(!th->t_super);
3869 3999
3870 /* protect flush_older_commits from doing mistakes if the 4000 /*
3871 transaction ID counter gets overflowed. */ 4001 * protect flush_older_commits from doing mistakes if the
4002 * transaction ID counter gets overflowed.
4003 */
3872 if (th->t_trans_id == ~0U) 4004 if (th->t_trans_id == ~0U)
3873 flags |= FLUSH_ALL | COMMIT_NOW | WAIT; 4005 flags |= FLUSH_ALL | COMMIT_NOW | WAIT;
3874 flush = flags & FLUSH_ALL; 4006 flush = flags & FLUSH_ALL;
@@ -3879,7 +4011,7 @@ static int do_journal_end(struct reiserfs_transaction_handle *th,
3879 if (journal->j_len == 0) { 4011 if (journal->j_len == 0) {
3880 reiserfs_prepare_for_journal(sb, SB_BUFFER_WITH_SB(sb), 4012 reiserfs_prepare_for_journal(sb, SB_BUFFER_WITH_SB(sb),
3881 1); 4013 1);
3882 journal_mark_dirty(th, sb, SB_BUFFER_WITH_SB(sb)); 4014 journal_mark_dirty(th, SB_BUFFER_WITH_SB(sb));
3883 } 4015 }
3884 4016
3885 lock_journal(sb); 4017 lock_journal(sb);
@@ -3892,10 +4024,12 @@ static int do_journal_end(struct reiserfs_transaction_handle *th,
3892 wait_on_commit = 1; 4024 wait_on_commit = 1;
3893 } 4025 }
3894 4026
3895 /* check_journal_end locks the journal, and unlocks if it does not return 1 4027 /*
3896 ** it tells us if we should continue with the journal_end, or just return 4028 * check_journal_end locks the journal, and unlocks if it does
4029 * not return 1 it tells us if we should continue with the
4030 * journal_end, or just return
3897 */ 4031 */
3898 if (!check_journal_end(th, sb, nblocks, flags)) { 4032 if (!check_journal_end(th, flags)) {
3899 reiserfs_schedule_old_flush(sb); 4033 reiserfs_schedule_old_flush(sb);
3900 wake_queued_writers(sb); 4034 wake_queued_writers(sb);
3901 reiserfs_async_progress_wait(sb); 4035 reiserfs_async_progress_wait(sb);
@@ -3908,19 +4042,23 @@ static int do_journal_end(struct reiserfs_transaction_handle *th,
3908 } 4042 }
3909 4043
3910 /* 4044 /*
3911 ** j must wait means we have to flush the log blocks, and the real blocks for 4045 * j must wait means we have to flush the log blocks, and the
3912 ** this transaction 4046 * real blocks for this transaction
3913 */ 4047 */
3914 if (journal->j_must_wait > 0) { 4048 if (journal->j_must_wait > 0) {
3915 flush = 1; 4049 flush = 1;
3916 } 4050 }
3917#ifdef REISERFS_PREALLOCATE 4051#ifdef REISERFS_PREALLOCATE
3918 /* quota ops might need to nest, setup the journal_info pointer for them 4052 /*
3919 * and raise the refcount so that it is > 0. */ 4053 * quota ops might need to nest, setup the journal_info pointer
4054 * for them and raise the refcount so that it is > 0.
4055 */
3920 current->journal_info = th; 4056 current->journal_info = th;
3921 th->t_refcount++; 4057 th->t_refcount++;
3922 reiserfs_discard_all_prealloc(th); /* it should not involve new blocks into 4058
3923 * the transaction */ 4059 /* it should not involve new blocks into the transaction */
4060 reiserfs_discard_all_prealloc(th);
4061
3924 th->t_refcount--; 4062 th->t_refcount--;
3925 current->journal_info = th->t_handle_save; 4063 current->journal_info = th->t_handle_save;
3926#endif 4064#endif
@@ -3936,7 +4074,10 @@ static int do_journal_end(struct reiserfs_transaction_handle *th,
3936 memcpy(get_journal_desc_magic(d_bh), JOURNAL_DESC_MAGIC, 8); 4074 memcpy(get_journal_desc_magic(d_bh), JOURNAL_DESC_MAGIC, 8);
3937 set_desc_trans_id(desc, journal->j_trans_id); 4075 set_desc_trans_id(desc, journal->j_trans_id);
3938 4076
3939 /* setup commit block. Don't write (keep it clean too) this one until after everyone else is written */ 4077 /*
4078 * setup commit block. Don't write (keep it clean too) this one
4079 * until after everyone else is written
4080 */
3940 c_bh = journal_getblk(sb, SB_ONDISK_JOURNAL_1st_BLOCK(sb) + 4081 c_bh = journal_getblk(sb, SB_ONDISK_JOURNAL_1st_BLOCK(sb) +
3941 ((journal->j_start + journal->j_len + 4082 ((journal->j_start + journal->j_len +
3942 1) % SB_ONDISK_JOURNAL_SIZE(sb))); 4083 1) % SB_ONDISK_JOURNAL_SIZE(sb)));
@@ -3948,7 +4089,8 @@ static int do_journal_end(struct reiserfs_transaction_handle *th,
3948 /* init this journal list */ 4089 /* init this journal list */
3949 jl = journal->j_current_jl; 4090 jl = journal->j_current_jl;
3950 4091
3951 /* we lock the commit before doing anything because 4092 /*
4093 * we lock the commit before doing anything because
3952 * we want to make sure nobody tries to run flush_commit_list until 4094 * we want to make sure nobody tries to run flush_commit_list until
3953 * the new transaction is fully setup, and we've already flushed the 4095 * the new transaction is fully setup, and we've already flushed the
3954 * ordered bh list 4096 * ordered bh list
@@ -3968,9 +4110,10 @@ static int do_journal_end(struct reiserfs_transaction_handle *th,
3968 atomic_set(&jl->j_commit_left, journal->j_len + 2); 4110 atomic_set(&jl->j_commit_left, journal->j_len + 2);
3969 jl->j_realblock = NULL; 4111 jl->j_realblock = NULL;
3970 4112
3971 /* The ENTIRE FOR LOOP MUST not cause schedule to occur. 4113 /*
3972 ** for each real block, add it to the journal list hash, 4114 * The ENTIRE FOR LOOP MUST not cause schedule to occur.
3973 ** copy into real block index array in the commit or desc block 4115 * for each real block, add it to the journal list hash,
4116 * copy into real block index array in the commit or desc block
3974 */ 4117 */
3975 trans_half = journal_trans_half(sb->s_blocksize); 4118 trans_half = journal_trans_half(sb->s_blocksize);
3976 for (i = 0, cn = journal->j_first; cn; cn = cn->next, i++) { 4119 for (i = 0, cn = journal->j_first; cn; cn = cn->next, i++) {
@@ -3989,9 +4132,10 @@ static int do_journal_end(struct reiserfs_transaction_handle *th,
3989 last_cn->next = jl_cn; 4132 last_cn->next = jl_cn;
3990 } 4133 }
3991 last_cn = jl_cn; 4134 last_cn = jl_cn;
3992 /* make sure the block we are trying to log is not a block 4135 /*
3993 of journal or reserved area */ 4136 * make sure the block we are trying to log
3994 4137 * is not a block of journal or reserved area
4138 */
3995 if (is_block_in_log_or_reserved_area 4139 if (is_block_in_log_or_reserved_area
3996 (sb, cn->bh->b_blocknr)) { 4140 (sb, cn->bh->b_blocknr)) {
3997 reiserfs_panic(sb, "journal-2332", 4141 reiserfs_panic(sb, "journal-2332",
@@ -4021,19 +4165,26 @@ static int do_journal_end(struct reiserfs_transaction_handle *th,
4021 set_desc_trans_id(desc, journal->j_trans_id); 4165 set_desc_trans_id(desc, journal->j_trans_id);
4022 set_commit_trans_len(commit, journal->j_len); 4166 set_commit_trans_len(commit, journal->j_len);
4023 4167
4024 /* special check in case all buffers in the journal were marked for not logging */ 4168 /*
4169 * special check in case all buffers in the journal
4170 * were marked for not logging
4171 */
4025 BUG_ON(journal->j_len == 0); 4172 BUG_ON(journal->j_len == 0);
4026 4173
4027 /* we're about to dirty all the log blocks, mark the description block 4174 /*
4175 * we're about to dirty all the log blocks, mark the description block
4028 * dirty now too. Don't mark the commit block dirty until all the 4176 * dirty now too. Don't mark the commit block dirty until all the
4029 * others are on disk 4177 * others are on disk
4030 */ 4178 */
4031 mark_buffer_dirty(d_bh); 4179 mark_buffer_dirty(d_bh);
4032 4180
4033 /* first data block is j_start + 1, so add one to cur_write_start wherever you use it */ 4181 /*
4182 * first data block is j_start + 1, so add one to
4183 * cur_write_start wherever you use it
4184 */
4034 cur_write_start = journal->j_start; 4185 cur_write_start = journal->j_start;
4035 cn = journal->j_first; 4186 cn = journal->j_first;
4036 jindex = 1; /* start at one so we don't get the desc again */ 4187 jindex = 1; /* start at one so we don't get the desc again */
4037 while (cn) { 4188 while (cn) {
4038 clear_buffer_journal_new(cn->bh); 4189 clear_buffer_journal_new(cn->bh);
4039 /* copy all the real blocks into log area. dirty log blocks */ 4190 /* copy all the real blocks into log area. dirty log blocks */
@@ -4059,7 +4210,10 @@ static int do_journal_end(struct reiserfs_transaction_handle *th,
4059 set_buffer_journal_dirty(cn->bh); 4210 set_buffer_journal_dirty(cn->bh);
4060 clear_buffer_journaled(cn->bh); 4211 clear_buffer_journaled(cn->bh);
4061 } else { 4212 } else {
4062 /* JDirty cleared sometime during transaction. don't log this one */ 4213 /*
4214 * JDirty cleared sometime during transaction.
4215 * don't log this one
4216 */
4063 reiserfs_warning(sb, "journal-2048", 4217 reiserfs_warning(sb, "journal-2048",
4064 "BAD, buffer in journal hash, " 4218 "BAD, buffer in journal hash, "
4065 "but not JDirty!"); 4219 "but not JDirty!");
@@ -4071,9 +4225,10 @@ static int do_journal_end(struct reiserfs_transaction_handle *th,
4071 reiserfs_cond_resched(sb); 4225 reiserfs_cond_resched(sb);
4072 } 4226 }
4073 4227
4074 /* we are done with both the c_bh and d_bh, but 4228 /*
4075 ** c_bh must be written after all other commit blocks, 4229 * we are done with both the c_bh and d_bh, but
4076 ** so we dirty/relse c_bh in flush_commit_list, with commit_left <= 1. 4230 * c_bh must be written after all other commit blocks,
4231 * so we dirty/relse c_bh in flush_commit_list, with commit_left <= 1.
4077 */ 4232 */
4078 4233
4079 journal->j_current_jl = alloc_journal_list(sb); 4234 journal->j_current_jl = alloc_journal_list(sb);
@@ -4088,7 +4243,7 @@ static int do_journal_end(struct reiserfs_transaction_handle *th,
4088 journal->j_start = 4243 journal->j_start =
4089 (journal->j_start + journal->j_len + 4244 (journal->j_start + journal->j_len +
4090 2) % SB_ONDISK_JOURNAL_SIZE(sb); 4245 2) % SB_ONDISK_JOURNAL_SIZE(sb);
4091 atomic_set(&(journal->j_wcount), 0); 4246 atomic_set(&journal->j_wcount, 0);
4092 journal->j_bcount = 0; 4247 journal->j_bcount = 0;
4093 journal->j_last = NULL; 4248 journal->j_last = NULL;
4094 journal->j_first = NULL; 4249 journal->j_first = NULL;
@@ -4104,15 +4259,18 @@ static int do_journal_end(struct reiserfs_transaction_handle *th,
4104 journal->j_next_async_flush = 0; 4259 journal->j_next_async_flush = 0;
4105 init_journal_hash(sb); 4260 init_journal_hash(sb);
4106 4261
4107 // make sure reiserfs_add_jh sees the new current_jl before we 4262 /*
4108 // write out the tails 4263 * make sure reiserfs_add_jh sees the new current_jl before we
4264 * write out the tails
4265 */
4109 smp_mb(); 4266 smp_mb();
4110 4267
4111 /* tail conversion targets have to hit the disk before we end the 4268 /*
4269 * tail conversion targets have to hit the disk before we end the
4112 * transaction. Otherwise a later transaction might repack the tail 4270 * transaction. Otherwise a later transaction might repack the tail
4113 * before this transaction commits, leaving the data block unflushed and 4271 * before this transaction commits, leaving the data block unflushed
4114 * clean, if we crash before the later transaction commits, the data block 4272 * and clean, if we crash before the later transaction commits, the
4115 * is lost. 4273 * data block is lost.
4116 */ 4274 */
4117 if (!list_empty(&jl->j_tail_bh_list)) { 4275 if (!list_empty(&jl->j_tail_bh_list)) {
4118 depth = reiserfs_write_unlock_nested(sb); 4276 depth = reiserfs_write_unlock_nested(sb);
@@ -4123,24 +4281,27 @@ static int do_journal_end(struct reiserfs_transaction_handle *th,
4123 BUG_ON(!list_empty(&jl->j_tail_bh_list)); 4281 BUG_ON(!list_empty(&jl->j_tail_bh_list));
4124 mutex_unlock(&jl->j_commit_mutex); 4282 mutex_unlock(&jl->j_commit_mutex);
4125 4283
4126 /* honor the flush wishes from the caller, simple commits can 4284 /*
4127 ** be done outside the journal lock, they are done below 4285 * honor the flush wishes from the caller, simple commits can
4128 ** 4286 * be done outside the journal lock, they are done below
4129 ** if we don't flush the commit list right now, we put it into 4287 *
4130 ** the work queue so the people waiting on the async progress work 4288 * if we don't flush the commit list right now, we put it into
4131 ** queue don't wait for this proc to flush journal lists and such. 4289 * the work queue so the people waiting on the async progress work
4290 * queue don't wait for this proc to flush journal lists and such.
4132 */ 4291 */
4133 if (flush) { 4292 if (flush) {
4134 flush_commit_list(sb, jl, 1); 4293 flush_commit_list(sb, jl, 1);
4135 flush_journal_list(sb, jl, 1); 4294 flush_journal_list(sb, jl, 1);
4136 } else if (!(jl->j_state & LIST_COMMIT_PENDING)) 4295 } else if (!(jl->j_state & LIST_COMMIT_PENDING))
4137 queue_delayed_work(commit_wq, &journal->j_work, HZ / 10); 4296 queue_delayed_work(REISERFS_SB(sb)->commit_wq,
4297 &journal->j_work, HZ / 10);
4138 4298
4139 /* if the next transaction has any chance of wrapping, flush 4299 /*
4140 ** transactions that might get overwritten. If any journal lists are very 4300 * if the next transaction has any chance of wrapping, flush
4141 ** old flush them as well. 4301 * transactions that might get overwritten. If any journal lists
4302 * are very old flush them as well.
4142 */ 4303 */
4143 first_jl: 4304first_jl:
4144 list_for_each_safe(entry, safe, &journal->j_journal_list) { 4305 list_for_each_safe(entry, safe, &journal->j_journal_list) {
4145 temp_jl = JOURNAL_LIST_ENTRY(entry); 4306 temp_jl = JOURNAL_LIST_ENTRY(entry);
4146 if (journal->j_start <= temp_jl->j_start) { 4307 if (journal->j_start <= temp_jl->j_start) {
@@ -4151,8 +4312,10 @@ static int do_journal_end(struct reiserfs_transaction_handle *th,
4151 } else if ((journal->j_start + 4312 } else if ((journal->j_start +
4152 journal->j_trans_max + 1) < 4313 journal->j_trans_max + 1) <
4153 SB_ONDISK_JOURNAL_SIZE(sb)) { 4314 SB_ONDISK_JOURNAL_SIZE(sb)) {
4154 /* if we don't cross into the next transaction and we don't 4315 /*
4155 * wrap, there is no way we can overlap any later transactions 4316 * if we don't cross into the next
4317 * transaction and we don't wrap, there is
4318 * no way we can overlap any later transactions
4156 * break now 4319 * break now
4157 */ 4320 */
4158 break; 4321 break;
@@ -4166,10 +4329,12 @@ static int do_journal_end(struct reiserfs_transaction_handle *th,
4166 flush_used_journal_lists(sb, temp_jl); 4329 flush_used_journal_lists(sb, temp_jl);
4167 goto first_jl; 4330 goto first_jl;
4168 } else { 4331 } else {
4169 /* we don't overlap anything from out start to the end of the 4332 /*
4170 * log, and our wrapped portion doesn't overlap anything at 4333 * we don't overlap anything from out start
4171 * the start of the log. We can break 4334 * to the end of the log, and our wrapped
4172 */ 4335 * portion doesn't overlap anything at
4336 * the start of the log. We can break
4337 */
4173 break; 4338 break;
4174 } 4339 }
4175 } 4340 }
@@ -4183,23 +4348,25 @@ static int do_journal_end(struct reiserfs_transaction_handle *th,
4183 "could not get a list bitmap"); 4348 "could not get a list bitmap");
4184 } 4349 }
4185 4350
4186 atomic_set(&(journal->j_jlock), 0); 4351 atomic_set(&journal->j_jlock, 0);
4187 unlock_journal(sb); 4352 unlock_journal(sb);
4188 /* wake up any body waiting to join. */ 4353 /* wake up any body waiting to join. */
4189 clear_bit(J_WRITERS_QUEUED, &journal->j_state); 4354 clear_bit(J_WRITERS_QUEUED, &journal->j_state);
4190 wake_up(&(journal->j_join_wait)); 4355 wake_up(&journal->j_join_wait);
4191 4356
4192 if (!flush && wait_on_commit && 4357 if (!flush && wait_on_commit &&
4193 journal_list_still_alive(sb, commit_trans_id)) { 4358 journal_list_still_alive(sb, commit_trans_id)) {
4194 flush_commit_list(sb, jl, 1); 4359 flush_commit_list(sb, jl, 1);
4195 } 4360 }
4196 out: 4361out:
4197 reiserfs_check_lock_depth(sb, "journal end2"); 4362 reiserfs_check_lock_depth(sb, "journal end2");
4198 4363
4199 memset(th, 0, sizeof(*th)); 4364 memset(th, 0, sizeof(*th));
4200 /* Re-set th->t_super, so we can properly keep track of how many 4365 /*
4366 * Re-set th->t_super, so we can properly keep track of how many
4201 * persistent transactions there are. We need to do this so if this 4367 * persistent transactions there are. We need to do this so if this
4202 * call is part of a failed restart_transaction, we can free it later */ 4368 * call is part of a failed restart_transaction, we can free it later
4369 */
4203 th->t_super = sb; 4370 th->t_super = sb;
4204 4371
4205 return journal->j_errno; 4372 return journal->j_errno;
diff --git a/fs/reiserfs/lbalance.c b/fs/reiserfs/lbalance.c
index 79e5a8b4c226..d6744c8b24e1 100644
--- a/fs/reiserfs/lbalance.c
+++ b/fs/reiserfs/lbalance.c
@@ -8,46 +8,42 @@
8#include "reiserfs.h" 8#include "reiserfs.h"
9#include <linux/buffer_head.h> 9#include <linux/buffer_head.h>
10 10
11/* these are used in do_balance.c */ 11/*
12 12 * copy copy_count entries from source directory item to dest buffer
13/* leaf_move_items 13 * (creating new item if needed)
14 leaf_shift_left 14 */
15 leaf_shift_right
16 leaf_delete_items
17 leaf_insert_into_buf
18 leaf_paste_in_buffer
19 leaf_cut_from_buffer
20 leaf_paste_entries
21 */
22
23/* copy copy_count entries from source directory item to dest buffer (creating new item if needed) */
24static void leaf_copy_dir_entries(struct buffer_info *dest_bi, 15static void leaf_copy_dir_entries(struct buffer_info *dest_bi,
25 struct buffer_head *source, int last_first, 16 struct buffer_head *source, int last_first,
26 int item_num, int from, int copy_count) 17 int item_num, int from, int copy_count)
27{ 18{
28 struct buffer_head *dest = dest_bi->bi_bh; 19 struct buffer_head *dest = dest_bi->bi_bh;
29 int item_num_in_dest; /* either the number of target item, 20 /*
30 or if we must create a new item, 21 * either the number of target item, or if we must create a
31 the number of the item we will 22 * new item, the number of the item we will create it next to
32 create it next to */ 23 */
24 int item_num_in_dest;
25
33 struct item_head *ih; 26 struct item_head *ih;
34 struct reiserfs_de_head *deh; 27 struct reiserfs_de_head *deh;
35 int copy_records_len; /* length of all records in item to be copied */ 28 int copy_records_len; /* length of all records in item to be copied */
36 char *records; 29 char *records;
37 30
38 ih = B_N_PITEM_HEAD(source, item_num); 31 ih = item_head(source, item_num);
39 32
40 RFALSE(!is_direntry_le_ih(ih), "vs-10000: item must be directory item"); 33 RFALSE(!is_direntry_le_ih(ih), "vs-10000: item must be directory item");
41 34
42 /* length of all record to be copied and first byte of the last of them */ 35 /*
36 * length of all record to be copied and first byte of
37 * the last of them
38 */
43 deh = B_I_DEH(source, ih); 39 deh = B_I_DEH(source, ih);
44 if (copy_count) { 40 if (copy_count) {
45 copy_records_len = (from ? deh_location(&(deh[from - 1])) : 41 copy_records_len = (from ? deh_location(&deh[from - 1]) :
46 ih_item_len(ih)) - 42 ih_item_len(ih)) -
47 deh_location(&(deh[from + copy_count - 1])); 43 deh_location(&deh[from + copy_count - 1]);
48 records = 44 records =
49 source->b_data + ih_location(ih) + 45 source->b_data + ih_location(ih) +
50 deh_location(&(deh[from + copy_count - 1])); 46 deh_location(&deh[from + copy_count - 1]);
51 } else { 47 } else {
52 copy_records_len = 0; 48 copy_records_len = 0;
53 records = NULL; 49 records = NULL;
@@ -59,12 +55,15 @@ static void leaf_copy_dir_entries(struct buffer_info *dest_bi,
59 LAST_TO_FIRST) ? ((B_NR_ITEMS(dest)) ? 0 : -1) : (B_NR_ITEMS(dest) 55 LAST_TO_FIRST) ? ((B_NR_ITEMS(dest)) ? 0 : -1) : (B_NR_ITEMS(dest)
60 - 1); 56 - 1);
61 57
62 /* if there are no items in dest or the first/last item in dest is not item of the same directory */ 58 /*
59 * if there are no items in dest or the first/last item in
60 * dest is not item of the same directory
61 */
63 if ((item_num_in_dest == -1) || 62 if ((item_num_in_dest == -1) ||
64 (last_first == FIRST_TO_LAST && le_ih_k_offset(ih) == DOT_OFFSET) || 63 (last_first == FIRST_TO_LAST && le_ih_k_offset(ih) == DOT_OFFSET) ||
65 (last_first == LAST_TO_FIRST 64 (last_first == LAST_TO_FIRST
66 && comp_short_le_keys /*COMP_SHORT_KEYS */ (&ih->ih_key, 65 && comp_short_le_keys /*COMP_SHORT_KEYS */ (&ih->ih_key,
67 B_N_PKEY(dest, 66 leaf_key(dest,
68 item_num_in_dest)))) 67 item_num_in_dest))))
69 { 68 {
70 /* create new item in dest */ 69 /* create new item in dest */
@@ -80,16 +79,22 @@ static void leaf_copy_dir_entries(struct buffer_info *dest_bi,
80 79
81 if (last_first == LAST_TO_FIRST) { 80 if (last_first == LAST_TO_FIRST) {
82 /* form key by the following way */ 81 /* form key by the following way */
83 if (from < I_ENTRY_COUNT(ih)) { 82 if (from < ih_entry_count(ih)) {
84 set_le_ih_k_offset(&new_ih, 83 set_le_ih_k_offset(&new_ih,
85 deh_offset(&(deh[from]))); 84 deh_offset(&deh[from]));
86 /*memcpy (&new_ih.ih_key.k_offset, &deh[from].deh_offset, SHORT_KEY_SIZE); */
87 } else { 85 } else {
88 /* no entries will be copied to this item in this function */ 86 /*
87 * no entries will be copied to this
88 * item in this function
89 */
89 set_le_ih_k_offset(&new_ih, U32_MAX); 90 set_le_ih_k_offset(&new_ih, U32_MAX);
90 /* this item is not yet valid, but we want I_IS_DIRECTORY_ITEM to return 1 for it, so we -1 */ 91 /*
92 * this item is not yet valid, but we
93 * want I_IS_DIRECTORY_ITEM to return 1
94 * for it, so we -1
95 */
91 } 96 }
92 set_le_key_k_type(KEY_FORMAT_3_5, &(new_ih.ih_key), 97 set_le_key_k_type(KEY_FORMAT_3_5, &new_ih.ih_key,
93 TYPE_DIRENTRY); 98 TYPE_DIRENTRY);
94 } 99 }
95 100
@@ -113,36 +118,44 @@ static void leaf_copy_dir_entries(struct buffer_info *dest_bi,
113 118
114 leaf_paste_entries(dest_bi, item_num_in_dest, 119 leaf_paste_entries(dest_bi, item_num_in_dest,
115 (last_first == 120 (last_first ==
116 FIRST_TO_LAST) ? I_ENTRY_COUNT(B_N_PITEM_HEAD(dest, 121 FIRST_TO_LAST) ? ih_entry_count(item_head(dest,
117 item_num_in_dest)) 122 item_num_in_dest))
118 : 0, copy_count, deh + from, records, 123 : 0, copy_count, deh + from, records,
119 DEH_SIZE * copy_count + copy_records_len); 124 DEH_SIZE * copy_count + copy_records_len);
120} 125}
121 126
122/* Copy the first (if last_first == FIRST_TO_LAST) or last (last_first == LAST_TO_FIRST) item or 127/*
123 part of it or nothing (see the return 0 below) from SOURCE to the end 128 * Copy the first (if last_first == FIRST_TO_LAST) or last
124 (if last_first) or beginning (!last_first) of the DEST */ 129 * (last_first == LAST_TO_FIRST) item or part of it or nothing
130 * (see the return 0 below) from SOURCE to the end (if last_first)
131 * or beginning (!last_first) of the DEST
132 */
125/* returns 1 if anything was copied, else 0 */ 133/* returns 1 if anything was copied, else 0 */
126static int leaf_copy_boundary_item(struct buffer_info *dest_bi, 134static int leaf_copy_boundary_item(struct buffer_info *dest_bi,
127 struct buffer_head *src, int last_first, 135 struct buffer_head *src, int last_first,
128 int bytes_or_entries) 136 int bytes_or_entries)
129{ 137{
130 struct buffer_head *dest = dest_bi->bi_bh; 138 struct buffer_head *dest = dest_bi->bi_bh;
131 int dest_nr_item, src_nr_item; /* number of items in the source and destination buffers */ 139 /* number of items in the source and destination buffers */
140 int dest_nr_item, src_nr_item;
132 struct item_head *ih; 141 struct item_head *ih;
133 struct item_head *dih; 142 struct item_head *dih;
134 143
135 dest_nr_item = B_NR_ITEMS(dest); 144 dest_nr_item = B_NR_ITEMS(dest);
136 145
146 /*
147 * if ( DEST is empty or first item of SOURCE and last item of
148 * DEST are the items of different objects or of different types )
149 * then there is no need to treat this item differently from the
150 * other items that we copy, so we return
151 */
137 if (last_first == FIRST_TO_LAST) { 152 if (last_first == FIRST_TO_LAST) {
138 /* if ( DEST is empty or first item of SOURCE and last item of DEST are the items of different objects 153 ih = item_head(src, 0);
139 or of different types ) then there is no need to treat this item differently from the other items 154 dih = item_head(dest, dest_nr_item - 1);
140 that we copy, so we return */ 155
141 ih = B_N_PITEM_HEAD(src, 0); 156 /* there is nothing to merge */
142 dih = B_N_PITEM_HEAD(dest, dest_nr_item - 1);
143 if (!dest_nr_item 157 if (!dest_nr_item
144 || (!op_is_left_mergeable(&(ih->ih_key), src->b_size))) 158 || (!op_is_left_mergeable(&ih->ih_key, src->b_size)))
145 /* there is nothing to merge */
146 return 0; 159 return 0;
147 160
148 RFALSE(!ih_item_len(ih), 161 RFALSE(!ih_item_len(ih),
@@ -157,8 +170,11 @@ static int leaf_copy_boundary_item(struct buffer_info *dest_bi,
157 return 1; 170 return 1;
158 } 171 }
159 172
160 /* copy part of the body of the first item of SOURCE to the end of the body of the last item of the DEST 173 /*
161 part defined by 'bytes_or_entries'; if bytes_or_entries == -1 copy whole body; don't create new item header 174 * copy part of the body of the first item of SOURCE
175 * to the end of the body of the last item of the DEST
176 * part defined by 'bytes_or_entries'; if bytes_or_entries
177 * == -1 copy whole body; don't create new item header
162 */ 178 */
163 if (bytes_or_entries == -1) 179 if (bytes_or_entries == -1)
164 bytes_or_entries = ih_item_len(ih); 180 bytes_or_entries = ih_item_len(ih);
@@ -176,11 +192,13 @@ static int leaf_copy_boundary_item(struct buffer_info *dest_bi,
176 } 192 }
177#endif 193#endif
178 194
179 /* merge first item (or its part) of src buffer with the last 195 /*
180 item of dest buffer. Both are of the same file */ 196 * merge first item (or its part) of src buffer with the last
197 * item of dest buffer. Both are of the same file
198 */
181 leaf_paste_in_buffer(dest_bi, 199 leaf_paste_in_buffer(dest_bi,
182 dest_nr_item - 1, ih_item_len(dih), 200 dest_nr_item - 1, ih_item_len(dih),
183 bytes_or_entries, B_I_PITEM(src, ih), 0); 201 bytes_or_entries, ih_item_body(src, ih), 0);
184 202
185 if (is_indirect_le_ih(dih)) { 203 if (is_indirect_le_ih(dih)) {
186 RFALSE(get_ih_free_space(dih), 204 RFALSE(get_ih_free_space(dih),
@@ -195,19 +213,23 @@ static int leaf_copy_boundary_item(struct buffer_info *dest_bi,
195 213
196 /* copy boundary item to right (last_first == LAST_TO_FIRST) */ 214 /* copy boundary item to right (last_first == LAST_TO_FIRST) */
197 215
198 /* ( DEST is empty or last item of SOURCE and first item of DEST 216 /*
199 are the items of different object or of different types ) 217 * (DEST is empty or last item of SOURCE and first item of DEST
218 * are the items of different object or of different types)
200 */ 219 */
201 src_nr_item = B_NR_ITEMS(src); 220 src_nr_item = B_NR_ITEMS(src);
202 ih = B_N_PITEM_HEAD(src, src_nr_item - 1); 221 ih = item_head(src, src_nr_item - 1);
203 dih = B_N_PITEM_HEAD(dest, 0); 222 dih = item_head(dest, 0);
204 223
205 if (!dest_nr_item || !op_is_left_mergeable(&(dih->ih_key), src->b_size)) 224 if (!dest_nr_item || !op_is_left_mergeable(&dih->ih_key, src->b_size))
206 return 0; 225 return 0;
207 226
208 if (is_direntry_le_ih(ih)) { 227 if (is_direntry_le_ih(ih)) {
228 /*
229 * bytes_or_entries = entries number in last
230 * item body of SOURCE
231 */
209 if (bytes_or_entries == -1) 232 if (bytes_or_entries == -1)
210 /* bytes_or_entries = entries number in last item body of SOURCE */
211 bytes_or_entries = ih_entry_count(ih); 233 bytes_or_entries = ih_entry_count(ih);
212 234
213 leaf_copy_dir_entries(dest_bi, src, LAST_TO_FIRST, 235 leaf_copy_dir_entries(dest_bi, src, LAST_TO_FIRST,
@@ -217,9 +239,11 @@ static int leaf_copy_boundary_item(struct buffer_info *dest_bi,
217 return 1; 239 return 1;
218 } 240 }
219 241
220 /* copy part of the body of the last item of SOURCE to the begin of the body of the first item of the DEST; 242 /*
221 part defined by 'bytes_or_entries'; if byte_or_entriess == -1 copy whole body; change first item key of the DEST; 243 * copy part of the body of the last item of SOURCE to the
222 don't create new item header 244 * begin of the body of the first item of the DEST; part defined
245 * by 'bytes_or_entries'; if byte_or_entriess == -1 copy whole body;
246 * change first item key of the DEST; don't create new item header
223 */ 247 */
224 248
225 RFALSE(is_indirect_le_ih(ih) && get_ih_free_space(ih), 249 RFALSE(is_indirect_le_ih(ih) && get_ih_free_space(ih),
@@ -270,15 +294,18 @@ static int leaf_copy_boundary_item(struct buffer_info *dest_bi,
270 } 294 }
271 295
272 leaf_paste_in_buffer(dest_bi, 0, 0, bytes_or_entries, 296 leaf_paste_in_buffer(dest_bi, 0, 0, bytes_or_entries,
273 B_I_PITEM(src, 297 ih_item_body(src,
274 ih) + ih_item_len(ih) - bytes_or_entries, 298 ih) + ih_item_len(ih) - bytes_or_entries,
275 0); 299 0);
276 return 1; 300 return 1;
277} 301}
278 302
279/* copy cpy_mun items from buffer src to buffer dest 303/*
280 * last_first == FIRST_TO_LAST means, that we copy cpy_num items beginning from first-th item in src to tail of dest 304 * copy cpy_mun items from buffer src to buffer dest
281 * last_first == LAST_TO_FIRST means, that we copy cpy_num items beginning from first-th item in src to head of dest 305 * last_first == FIRST_TO_LAST means, that we copy cpy_num items beginning
306 * from first-th item in src to tail of dest
307 * last_first == LAST_TO_FIRST means, that we copy cpy_num items beginning
308 * from first-th item in src to head of dest
282 */ 309 */
283static void leaf_copy_items_entirely(struct buffer_info *dest_bi, 310static void leaf_copy_items_entirely(struct buffer_info *dest_bi,
284 struct buffer_head *src, int last_first, 311 struct buffer_head *src, int last_first,
@@ -311,11 +338,14 @@ static void leaf_copy_items_entirely(struct buffer_info *dest_bi,
311 nr = blkh_nr_item(blkh); 338 nr = blkh_nr_item(blkh);
312 free_space = blkh_free_space(blkh); 339 free_space = blkh_free_space(blkh);
313 340
314 /* we will insert items before 0-th or nr-th item in dest buffer. It depends of last_first parameter */ 341 /*
342 * we will insert items before 0-th or nr-th item in dest buffer.
343 * It depends of last_first parameter
344 */
315 dest_before = (last_first == LAST_TO_FIRST) ? 0 : nr; 345 dest_before = (last_first == LAST_TO_FIRST) ? 0 : nr;
316 346
317 /* location of head of first new item */ 347 /* location of head of first new item */
318 ih = B_N_PITEM_HEAD(dest, dest_before); 348 ih = item_head(dest, dest_before);
319 349
320 RFALSE(blkh_free_space(blkh) < cpy_num * IH_SIZE, 350 RFALSE(blkh_free_space(blkh) < cpy_num * IH_SIZE,
321 "vs-10140: not enough free space for headers %d (needed %d)", 351 "vs-10140: not enough free space for headers %d (needed %d)",
@@ -325,7 +355,7 @@ static void leaf_copy_items_entirely(struct buffer_info *dest_bi,
325 memmove(ih + cpy_num, ih, (nr - dest_before) * IH_SIZE); 355 memmove(ih + cpy_num, ih, (nr - dest_before) * IH_SIZE);
326 356
327 /* copy item headers */ 357 /* copy item headers */
328 memcpy(ih, B_N_PITEM_HEAD(src, first), cpy_num * IH_SIZE); 358 memcpy(ih, item_head(src, first), cpy_num * IH_SIZE);
329 359
330 free_space -= (IH_SIZE * cpy_num); 360 free_space -= (IH_SIZE * cpy_num);
331 set_blkh_free_space(blkh, free_space); 361 set_blkh_free_space(blkh, free_space);
@@ -338,8 +368,8 @@ static void leaf_copy_items_entirely(struct buffer_info *dest_bi,
338 } 368 }
339 369
340 /* prepare space for items */ 370 /* prepare space for items */
341 last_loc = ih_location(&(ih[nr + cpy_num - 1 - dest_before])); 371 last_loc = ih_location(&ih[nr + cpy_num - 1 - dest_before]);
342 last_inserted_loc = ih_location(&(ih[cpy_num - 1])); 372 last_inserted_loc = ih_location(&ih[cpy_num - 1]);
343 373
344 /* check free space */ 374 /* check free space */
345 RFALSE(free_space < j - last_inserted_loc, 375 RFALSE(free_space < j - last_inserted_loc,
@@ -352,7 +382,8 @@ static void leaf_copy_items_entirely(struct buffer_info *dest_bi,
352 382
353 /* copy items */ 383 /* copy items */
354 memcpy(dest->b_data + last_inserted_loc, 384 memcpy(dest->b_data + last_inserted_loc,
355 B_N_PITEM(src, (first + cpy_num - 1)), j - last_inserted_loc); 385 item_body(src, (first + cpy_num - 1)),
386 j - last_inserted_loc);
356 387
357 /* sizes, item number */ 388 /* sizes, item number */
358 set_blkh_nr_item(blkh, nr + cpy_num); 389 set_blkh_nr_item(blkh, nr + cpy_num);
@@ -376,8 +407,10 @@ static void leaf_copy_items_entirely(struct buffer_info *dest_bi,
376 } 407 }
377} 408}
378 409
379/* This function splits the (liquid) item into two items (useful when 410/*
380 shifting part of an item into another node.) */ 411 * This function splits the (liquid) item into two items (useful when
412 * shifting part of an item into another node.)
413 */
381static void leaf_item_bottle(struct buffer_info *dest_bi, 414static void leaf_item_bottle(struct buffer_info *dest_bi,
382 struct buffer_head *src, int last_first, 415 struct buffer_head *src, int last_first,
383 int item_num, int cpy_bytes) 416 int item_num, int cpy_bytes)
@@ -389,17 +422,22 @@ static void leaf_item_bottle(struct buffer_info *dest_bi,
389 "vs-10170: bytes == - 1 means: do not split item"); 422 "vs-10170: bytes == - 1 means: do not split item");
390 423
391 if (last_first == FIRST_TO_LAST) { 424 if (last_first == FIRST_TO_LAST) {
392 /* if ( if item in position item_num in buffer SOURCE is directory item ) */ 425 /*
393 ih = B_N_PITEM_HEAD(src, item_num); 426 * if ( if item in position item_num in buffer SOURCE
427 * is directory item )
428 */
429 ih = item_head(src, item_num);
394 if (is_direntry_le_ih(ih)) 430 if (is_direntry_le_ih(ih))
395 leaf_copy_dir_entries(dest_bi, src, FIRST_TO_LAST, 431 leaf_copy_dir_entries(dest_bi, src, FIRST_TO_LAST,
396 item_num, 0, cpy_bytes); 432 item_num, 0, cpy_bytes);
397 else { 433 else {
398 struct item_head n_ih; 434 struct item_head n_ih;
399 435
400 /* copy part of the body of the item number 'item_num' of SOURCE to the end of the DEST 436 /*
401 part defined by 'cpy_bytes'; create new item header; change old item_header (????); 437 * copy part of the body of the item number 'item_num'
402 n_ih = new item_header; 438 * of SOURCE to the end of the DEST part defined by
439 * 'cpy_bytes'; create new item header; change old
440 * item_header (????); n_ih = new item_header;
403 */ 441 */
404 memcpy(&n_ih, ih, IH_SIZE); 442 memcpy(&n_ih, ih, IH_SIZE);
405 put_ih_item_len(&n_ih, cpy_bytes); 443 put_ih_item_len(&n_ih, cpy_bytes);
@@ -411,30 +449,36 @@ static void leaf_item_bottle(struct buffer_info *dest_bi,
411 set_ih_free_space(&n_ih, 0); 449 set_ih_free_space(&n_ih, 0);
412 } 450 }
413 451
414 RFALSE(op_is_left_mergeable(&(ih->ih_key), src->b_size), 452 RFALSE(op_is_left_mergeable(&ih->ih_key, src->b_size),
415 "vs-10190: bad mergeability of item %h", ih); 453 "vs-10190: bad mergeability of item %h", ih);
416 n_ih.ih_version = ih->ih_version; /* JDM Endian safe, both le */ 454 n_ih.ih_version = ih->ih_version; /* JDM Endian safe, both le */
417 leaf_insert_into_buf(dest_bi, B_NR_ITEMS(dest), &n_ih, 455 leaf_insert_into_buf(dest_bi, B_NR_ITEMS(dest), &n_ih,
418 B_N_PITEM(src, item_num), 0); 456 item_body(src, item_num), 0);
419 } 457 }
420 } else { 458 } else {
421 /* if ( if item in position item_num in buffer SOURCE is directory item ) */ 459 /*
422 ih = B_N_PITEM_HEAD(src, item_num); 460 * if ( if item in position item_num in buffer
461 * SOURCE is directory item )
462 */
463 ih = item_head(src, item_num);
423 if (is_direntry_le_ih(ih)) 464 if (is_direntry_le_ih(ih))
424 leaf_copy_dir_entries(dest_bi, src, LAST_TO_FIRST, 465 leaf_copy_dir_entries(dest_bi, src, LAST_TO_FIRST,
425 item_num, 466 item_num,
426 I_ENTRY_COUNT(ih) - cpy_bytes, 467 ih_entry_count(ih) - cpy_bytes,
427 cpy_bytes); 468 cpy_bytes);
428 else { 469 else {
429 struct item_head n_ih; 470 struct item_head n_ih;
430 471
431 /* copy part of the body of the item number 'item_num' of SOURCE to the begin of the DEST 472 /*
432 part defined by 'cpy_bytes'; create new item header; 473 * copy part of the body of the item number 'item_num'
433 n_ih = new item_header; 474 * of SOURCE to the begin of the DEST part defined by
475 * 'cpy_bytes'; create new item header;
476 * n_ih = new item_header;
434 */ 477 */
435 memcpy(&n_ih, ih, SHORT_KEY_SIZE); 478 memcpy(&n_ih, ih, SHORT_KEY_SIZE);
436 479
437 n_ih.ih_version = ih->ih_version; /* JDM Endian safe, both le */ 480 /* Endian safe, both le */
481 n_ih.ih_version = ih->ih_version;
438 482
439 if (is_direct_le_ih(ih)) { 483 if (is_direct_le_ih(ih)) {
440 set_le_ih_k_offset(&n_ih, 484 set_le_ih_k_offset(&n_ih,
@@ -458,20 +502,22 @@ static void leaf_item_bottle(struct buffer_info *dest_bi,
458 /* set item length */ 502 /* set item length */
459 put_ih_item_len(&n_ih, cpy_bytes); 503 put_ih_item_len(&n_ih, cpy_bytes);
460 504
461 n_ih.ih_version = ih->ih_version; /* JDM Endian safe, both le */ 505 /* Endian safe, both le */
506 n_ih.ih_version = ih->ih_version;
462 507
463 leaf_insert_into_buf(dest_bi, 0, &n_ih, 508 leaf_insert_into_buf(dest_bi, 0, &n_ih,
464 B_N_PITEM(src, 509 item_body(src, item_num) +
465 item_num) + 510 ih_item_len(ih) - cpy_bytes, 0);
466 ih_item_len(ih) - cpy_bytes, 0);
467 } 511 }
468 } 512 }
469} 513}
470 514
471/* If cpy_bytes equals minus one than copy cpy_num whole items from SOURCE to DEST. 515/*
472 If cpy_bytes not equal to minus one than copy cpy_num-1 whole items from SOURCE to DEST. 516 * If cpy_bytes equals minus one than copy cpy_num whole items from SOURCE
473 From last item copy cpy_num bytes for regular item and cpy_num directory entries for 517 * to DEST. If cpy_bytes not equal to minus one than copy cpy_num-1 whole
474 directory item. */ 518 * items from SOURCE to DEST. From last item copy cpy_num bytes for regular
519 * item and cpy_num directory entries for directory item.
520 */
475static int leaf_copy_items(struct buffer_info *dest_bi, struct buffer_head *src, 521static int leaf_copy_items(struct buffer_info *dest_bi, struct buffer_head *src,
476 int last_first, int cpy_num, int cpy_bytes) 522 int last_first, int cpy_num, int cpy_bytes)
477{ 523{
@@ -498,22 +544,34 @@ static int leaf_copy_items(struct buffer_info *dest_bi, struct buffer_head *src,
498 else 544 else
499 bytes = -1; 545 bytes = -1;
500 546
501 /* copy the first item or it part or nothing to the end of the DEST (i = leaf_copy_boundary_item(DEST,SOURCE,0,bytes)) */ 547 /*
548 * copy the first item or it part or nothing to the end of
549 * the DEST (i = leaf_copy_boundary_item(DEST,SOURCE,0,bytes))
550 */
502 i = leaf_copy_boundary_item(dest_bi, src, FIRST_TO_LAST, bytes); 551 i = leaf_copy_boundary_item(dest_bi, src, FIRST_TO_LAST, bytes);
503 cpy_num -= i; 552 cpy_num -= i;
504 if (cpy_num == 0) 553 if (cpy_num == 0)
505 return i; 554 return i;
506 pos += i; 555 pos += i;
507 if (cpy_bytes == -1) 556 if (cpy_bytes == -1)
508 /* copy first cpy_num items starting from position 'pos' of SOURCE to end of DEST */ 557 /*
558 * copy first cpy_num items starting from position
559 * 'pos' of SOURCE to end of DEST
560 */
509 leaf_copy_items_entirely(dest_bi, src, FIRST_TO_LAST, 561 leaf_copy_items_entirely(dest_bi, src, FIRST_TO_LAST,
510 pos, cpy_num); 562 pos, cpy_num);
511 else { 563 else {
512 /* copy first cpy_num-1 items starting from position 'pos-1' of the SOURCE to the end of the DEST */ 564 /*
565 * copy first cpy_num-1 items starting from position
566 * 'pos-1' of the SOURCE to the end of the DEST
567 */
513 leaf_copy_items_entirely(dest_bi, src, FIRST_TO_LAST, 568 leaf_copy_items_entirely(dest_bi, src, FIRST_TO_LAST,
514 pos, cpy_num - 1); 569 pos, cpy_num - 1);
515 570
516 /* copy part of the item which number is cpy_num+pos-1 to the end of the DEST */ 571 /*
572 * copy part of the item which number is
573 * cpy_num+pos-1 to the end of the DEST
574 */
517 leaf_item_bottle(dest_bi, src, FIRST_TO_LAST, 575 leaf_item_bottle(dest_bi, src, FIRST_TO_LAST,
518 cpy_num + pos - 1, cpy_bytes); 576 cpy_num + pos - 1, cpy_bytes);
519 } 577 }
@@ -525,7 +583,11 @@ static int leaf_copy_items(struct buffer_info *dest_bi, struct buffer_head *src,
525 else 583 else
526 bytes = -1; 584 bytes = -1;
527 585
528 /* copy the last item or it part or nothing to the begin of the DEST (i = leaf_copy_boundary_item(DEST,SOURCE,1,bytes)); */ 586 /*
587 * copy the last item or it part or nothing to the
588 * begin of the DEST
589 * (i = leaf_copy_boundary_item(DEST,SOURCE,1,bytes));
590 */
529 i = leaf_copy_boundary_item(dest_bi, src, LAST_TO_FIRST, bytes); 591 i = leaf_copy_boundary_item(dest_bi, src, LAST_TO_FIRST, bytes);
530 592
531 cpy_num -= i; 593 cpy_num -= i;
@@ -534,15 +596,24 @@ static int leaf_copy_items(struct buffer_info *dest_bi, struct buffer_head *src,
534 596
535 pos = src_nr_item - cpy_num - i; 597 pos = src_nr_item - cpy_num - i;
536 if (cpy_bytes == -1) { 598 if (cpy_bytes == -1) {
537 /* starting from position 'pos' copy last cpy_num items of SOURCE to begin of DEST */ 599 /*
600 * starting from position 'pos' copy last cpy_num
601 * items of SOURCE to begin of DEST
602 */
538 leaf_copy_items_entirely(dest_bi, src, LAST_TO_FIRST, 603 leaf_copy_items_entirely(dest_bi, src, LAST_TO_FIRST,
539 pos, cpy_num); 604 pos, cpy_num);
540 } else { 605 } else {
541 /* copy last cpy_num-1 items starting from position 'pos+1' of the SOURCE to the begin of the DEST; */ 606 /*
607 * copy last cpy_num-1 items starting from position
608 * 'pos+1' of the SOURCE to the begin of the DEST;
609 */
542 leaf_copy_items_entirely(dest_bi, src, LAST_TO_FIRST, 610 leaf_copy_items_entirely(dest_bi, src, LAST_TO_FIRST,
543 pos + 1, cpy_num - 1); 611 pos + 1, cpy_num - 1);
544 612
545 /* copy part of the item which number is pos to the begin of the DEST */ 613 /*
614 * copy part of the item which number is pos to
615 * the begin of the DEST
616 */
546 leaf_item_bottle(dest_bi, src, LAST_TO_FIRST, pos, 617 leaf_item_bottle(dest_bi, src, LAST_TO_FIRST, pos,
547 cpy_bytes); 618 cpy_bytes);
548 } 619 }
@@ -550,9 +621,11 @@ static int leaf_copy_items(struct buffer_info *dest_bi, struct buffer_head *src,
550 return i; 621 return i;
551} 622}
552 623
553/* there are types of coping: from S[0] to L[0], from S[0] to R[0], 624/*
554 from R[0] to L[0]. for each of these we have to define parent and 625 * there are types of coping: from S[0] to L[0], from S[0] to R[0],
555 positions of destination and source buffers */ 626 * from R[0] to L[0]. for each of these we have to define parent and
627 * positions of destination and source buffers
628 */
556static void leaf_define_dest_src_infos(int shift_mode, struct tree_balance *tb, 629static void leaf_define_dest_src_infos(int shift_mode, struct tree_balance *tb,
557 struct buffer_info *dest_bi, 630 struct buffer_info *dest_bi,
558 struct buffer_info *src_bi, 631 struct buffer_info *src_bi,
@@ -568,7 +641,9 @@ static void leaf_define_dest_src_infos(int shift_mode, struct tree_balance *tb,
568 src_bi->tb = tb; 641 src_bi->tb = tb;
569 src_bi->bi_bh = PATH_PLAST_BUFFER(tb->tb_path); 642 src_bi->bi_bh = PATH_PLAST_BUFFER(tb->tb_path);
570 src_bi->bi_parent = PATH_H_PPARENT(tb->tb_path, 0); 643 src_bi->bi_parent = PATH_H_PPARENT(tb->tb_path, 0);
571 src_bi->bi_position = PATH_H_B_ITEM_ORDER(tb->tb_path, 0); /* src->b_item_order */ 644
645 /* src->b_item_order */
646 src_bi->bi_position = PATH_H_B_ITEM_ORDER(tb->tb_path, 0);
572 dest_bi->tb = tb; 647 dest_bi->tb = tb;
573 dest_bi->bi_bh = tb->L[0]; 648 dest_bi->bi_bh = tb->L[0];
574 dest_bi->bi_parent = tb->FL[0]; 649 dest_bi->bi_parent = tb->FL[0];
@@ -633,8 +708,10 @@ static void leaf_define_dest_src_infos(int shift_mode, struct tree_balance *tb,
633 shift_mode, src_bi->bi_bh, dest_bi->bi_bh); 708 shift_mode, src_bi->bi_bh, dest_bi->bi_bh);
634} 709}
635 710
636/* copy mov_num items and mov_bytes of the (mov_num-1)th item to 711/*
637 neighbor. Delete them from source */ 712 * copy mov_num items and mov_bytes of the (mov_num-1)th item to
713 * neighbor. Delete them from source
714 */
638int leaf_move_items(int shift_mode, struct tree_balance *tb, int mov_num, 715int leaf_move_items(int shift_mode, struct tree_balance *tb, int mov_num,
639 int mov_bytes, struct buffer_head *Snew) 716 int mov_bytes, struct buffer_head *Snew)
640{ 717{
@@ -657,18 +734,24 @@ int leaf_move_items(int shift_mode, struct tree_balance *tb, int mov_num,
657 return ret_value; 734 return ret_value;
658} 735}
659 736
660/* Shift shift_num items (and shift_bytes of last shifted item if shift_bytes != -1) 737/*
661 from S[0] to L[0] and replace the delimiting key */ 738 * Shift shift_num items (and shift_bytes of last shifted item if
739 * shift_bytes != -1) from S[0] to L[0] and replace the delimiting key
740 */
662int leaf_shift_left(struct tree_balance *tb, int shift_num, int shift_bytes) 741int leaf_shift_left(struct tree_balance *tb, int shift_num, int shift_bytes)
663{ 742{
664 struct buffer_head *S0 = PATH_PLAST_BUFFER(tb->tb_path); 743 struct buffer_head *S0 = PATH_PLAST_BUFFER(tb->tb_path);
665 int i; 744 int i;
666 745
667 /* move shift_num (and shift_bytes bytes) items from S[0] to left neighbor L[0] */ 746 /*
747 * move shift_num (and shift_bytes bytes) items from S[0]
748 * to left neighbor L[0]
749 */
668 i = leaf_move_items(LEAF_FROM_S_TO_L, tb, shift_num, shift_bytes, NULL); 750 i = leaf_move_items(LEAF_FROM_S_TO_L, tb, shift_num, shift_bytes, NULL);
669 751
670 if (shift_num) { 752 if (shift_num) {
671 if (B_NR_ITEMS(S0) == 0) { /* number of items in S[0] == 0 */ 753 /* number of items in S[0] == 0 */
754 if (B_NR_ITEMS(S0) == 0) {
672 755
673 RFALSE(shift_bytes != -1, 756 RFALSE(shift_bytes != -1,
674 "vs-10270: S0 is empty now, but shift_bytes != -1 (%d)", 757 "vs-10270: S0 is empty now, but shift_bytes != -1 (%d)",
@@ -691,10 +774,10 @@ int leaf_shift_left(struct tree_balance *tb, int shift_num, int shift_bytes)
691 replace_key(tb, tb->CFL[0], tb->lkey[0], S0, 0); 774 replace_key(tb, tb->CFL[0], tb->lkey[0], S0, 0);
692 775
693 RFALSE((shift_bytes != -1 && 776 RFALSE((shift_bytes != -1 &&
694 !(is_direntry_le_ih(B_N_PITEM_HEAD(S0, 0)) 777 !(is_direntry_le_ih(item_head(S0, 0))
695 && !I_ENTRY_COUNT(B_N_PITEM_HEAD(S0, 0)))) && 778 && !ih_entry_count(item_head(S0, 0)))) &&
696 (!op_is_left_mergeable 779 (!op_is_left_mergeable
697 (B_N_PKEY(S0, 0), S0->b_size)), 780 (leaf_key(S0, 0), S0->b_size)),
698 "vs-10280: item must be mergeable"); 781 "vs-10280: item must be mergeable");
699 } 782 }
700 } 783 }
@@ -704,13 +787,18 @@ int leaf_shift_left(struct tree_balance *tb, int shift_num, int shift_bytes)
704 787
705/* CLEANING STOPPED HERE */ 788/* CLEANING STOPPED HERE */
706 789
707/* Shift shift_num (shift_bytes) items from S[0] to the right neighbor, and replace the delimiting key */ 790/*
791 * Shift shift_num (shift_bytes) items from S[0] to the right neighbor,
792 * and replace the delimiting key
793 */
708int leaf_shift_right(struct tree_balance *tb, int shift_num, int shift_bytes) 794int leaf_shift_right(struct tree_balance *tb, int shift_num, int shift_bytes)
709{ 795{
710 // struct buffer_head * S0 = PATH_PLAST_BUFFER (tb->tb_path);
711 int ret_value; 796 int ret_value;
712 797
713 /* move shift_num (and shift_bytes) items from S[0] to right neighbor R[0] */ 798 /*
799 * move shift_num (and shift_bytes) items from S[0] to
800 * right neighbor R[0]
801 */
714 ret_value = 802 ret_value =
715 leaf_move_items(LEAF_FROM_S_TO_R, tb, shift_num, shift_bytes, NULL); 803 leaf_move_items(LEAF_FROM_S_TO_R, tb, shift_num, shift_bytes, NULL);
716 804
@@ -725,12 +813,16 @@ int leaf_shift_right(struct tree_balance *tb, int shift_num, int shift_bytes)
725 813
726static void leaf_delete_items_entirely(struct buffer_info *bi, 814static void leaf_delete_items_entirely(struct buffer_info *bi,
727 int first, int del_num); 815 int first, int del_num);
728/* If del_bytes == -1, starting from position 'first' delete del_num items in whole in buffer CUR. 816/*
729 If not. 817 * If del_bytes == -1, starting from position 'first' delete del_num
730 If last_first == 0. Starting from position 'first' delete del_num-1 items in whole. Delete part of body of 818 * items in whole in buffer CUR.
731 the first item. Part defined by del_bytes. Don't delete first item header 819 * If not.
732 If last_first == 1. Starting from position 'first+1' delete del_num-1 items in whole. Delete part of body of 820 * If last_first == 0. Starting from position 'first' delete del_num-1
733 the last item . Part defined by del_bytes. Don't delete last item header. 821 * items in whole. Delete part of body of the first item. Part defined by
822 * del_bytes. Don't delete first item header
823 * If last_first == 1. Starting from position 'first+1' delete del_num-1
824 * items in whole. Delete part of body of the last item . Part defined by
825 * del_bytes. Don't delete last item header.
734*/ 826*/
735void leaf_delete_items(struct buffer_info *cur_bi, int last_first, 827void leaf_delete_items(struct buffer_info *cur_bi, int last_first,
736 int first, int del_num, int del_bytes) 828 int first, int del_num, int del_bytes)
@@ -761,32 +853,43 @@ void leaf_delete_items(struct buffer_info *cur_bi, int last_first,
761 leaf_delete_items_entirely(cur_bi, first, del_num); 853 leaf_delete_items_entirely(cur_bi, first, del_num);
762 else { 854 else {
763 if (last_first == FIRST_TO_LAST) { 855 if (last_first == FIRST_TO_LAST) {
764 /* delete del_num-1 items beginning from item in position first */ 856 /*
857 * delete del_num-1 items beginning from
858 * item in position first
859 */
765 leaf_delete_items_entirely(cur_bi, first, del_num - 1); 860 leaf_delete_items_entirely(cur_bi, first, del_num - 1);
766 861
767 /* delete the part of the first item of the bh 862 /*
768 do not delete item header 863 * delete the part of the first item of the bh
864 * do not delete item header
769 */ 865 */
770 leaf_cut_from_buffer(cur_bi, 0, 0, del_bytes); 866 leaf_cut_from_buffer(cur_bi, 0, 0, del_bytes);
771 } else { 867 } else {
772 struct item_head *ih; 868 struct item_head *ih;
773 int len; 869 int len;
774 870
775 /* delete del_num-1 items beginning from item in position first+1 */ 871 /*
872 * delete del_num-1 items beginning from
873 * item in position first+1
874 */
776 leaf_delete_items_entirely(cur_bi, first + 1, 875 leaf_delete_items_entirely(cur_bi, first + 1,
777 del_num - 1); 876 del_num - 1);
778 877
779 ih = B_N_PITEM_HEAD(bh, B_NR_ITEMS(bh) - 1); 878 ih = item_head(bh, B_NR_ITEMS(bh) - 1);
780 if (is_direntry_le_ih(ih)) 879 if (is_direntry_le_ih(ih))
781 /* the last item is directory */ 880 /* the last item is directory */
782 /* len = numbers of directory entries in this item */ 881 /*
882 * len = numbers of directory entries
883 * in this item
884 */
783 len = ih_entry_count(ih); 885 len = ih_entry_count(ih);
784 else 886 else
785 /* len = body len of item */ 887 /* len = body len of item */
786 len = ih_item_len(ih); 888 len = ih_item_len(ih);
787 889
788 /* delete the part of the last item of the bh 890 /*
789 do not delete item header 891 * delete the part of the last item of the bh
892 * do not delete item header
790 */ 893 */
791 leaf_cut_from_buffer(cur_bi, B_NR_ITEMS(bh) - 1, 894 leaf_cut_from_buffer(cur_bi, B_NR_ITEMS(bh) - 1,
792 len - del_bytes, del_bytes); 895 len - del_bytes, del_bytes);
@@ -820,10 +923,10 @@ void leaf_insert_into_buf(struct buffer_info *bi, int before,
820 zeros_number, ih_item_len(inserted_item_ih)); 923 zeros_number, ih_item_len(inserted_item_ih));
821 924
822 /* get item new item must be inserted before */ 925 /* get item new item must be inserted before */
823 ih = B_N_PITEM_HEAD(bh, before); 926 ih = item_head(bh, before);
824 927
825 /* prepare space for the body of new item */ 928 /* prepare space for the body of new item */
826 last_loc = nr ? ih_location(&(ih[nr - before - 1])) : bh->b_size; 929 last_loc = nr ? ih_location(&ih[nr - before - 1]) : bh->b_size;
827 unmoved_loc = before ? ih_location(ih - 1) : bh->b_size; 930 unmoved_loc = before ? ih_location(ih - 1) : bh->b_size;
828 931
829 memmove(bh->b_data + last_loc - ih_item_len(inserted_item_ih), 932 memmove(bh->b_data + last_loc - ih_item_len(inserted_item_ih),
@@ -846,8 +949,8 @@ void leaf_insert_into_buf(struct buffer_info *bi, int before,
846 949
847 /* change locations */ 950 /* change locations */
848 for (i = before; i < nr + 1; i++) { 951 for (i = before; i < nr + 1; i++) {
849 unmoved_loc -= ih_item_len(&(ih[i - before])); 952 unmoved_loc -= ih_item_len(&ih[i - before]);
850 put_ih_location(&(ih[i - before]), unmoved_loc); 953 put_ih_location(&ih[i - before], unmoved_loc);
851 } 954 }
852 955
853 /* sizes, free space, item number */ 956 /* sizes, free space, item number */
@@ -867,8 +970,10 @@ void leaf_insert_into_buf(struct buffer_info *bi, int before,
867 } 970 }
868} 971}
869 972
870/* paste paste_size bytes to affected_item_num-th item. 973/*
871 When item is a directory, this only prepare space for new entries */ 974 * paste paste_size bytes to affected_item_num-th item.
975 * When item is a directory, this only prepare space for new entries
976 */
872void leaf_paste_in_buffer(struct buffer_info *bi, int affected_item_num, 977void leaf_paste_in_buffer(struct buffer_info *bi, int affected_item_num,
873 int pos_in_item, int paste_size, 978 int pos_in_item, int paste_size,
874 const char *body, int zeros_number) 979 const char *body, int zeros_number)
@@ -902,9 +1007,9 @@ void leaf_paste_in_buffer(struct buffer_info *bi, int affected_item_num,
902#endif /* CONFIG_REISERFS_CHECK */ 1007#endif /* CONFIG_REISERFS_CHECK */
903 1008
904 /* item to be appended */ 1009 /* item to be appended */
905 ih = B_N_PITEM_HEAD(bh, affected_item_num); 1010 ih = item_head(bh, affected_item_num);
906 1011
907 last_loc = ih_location(&(ih[nr - affected_item_num - 1])); 1012 last_loc = ih_location(&ih[nr - affected_item_num - 1]);
908 unmoved_loc = affected_item_num ? ih_location(ih - 1) : bh->b_size; 1013 unmoved_loc = affected_item_num ? ih_location(ih - 1) : bh->b_size;
909 1014
910 /* prepare space */ 1015 /* prepare space */
@@ -913,8 +1018,8 @@ void leaf_paste_in_buffer(struct buffer_info *bi, int affected_item_num,
913 1018
914 /* change locations */ 1019 /* change locations */
915 for (i = affected_item_num; i < nr; i++) 1020 for (i = affected_item_num; i < nr; i++)
916 put_ih_location(&(ih[i - affected_item_num]), 1021 put_ih_location(&ih[i - affected_item_num],
917 ih_location(&(ih[i - affected_item_num])) - 1022 ih_location(&ih[i - affected_item_num]) -
918 paste_size); 1023 paste_size);
919 1024
920 if (body) { 1025 if (body) {
@@ -957,10 +1062,12 @@ void leaf_paste_in_buffer(struct buffer_info *bi, int affected_item_num,
957 } 1062 }
958} 1063}
959 1064
960/* cuts DEL_COUNT entries beginning from FROM-th entry. Directory item 1065/*
961 does not have free space, so it moves DEHs and remaining records as 1066 * cuts DEL_COUNT entries beginning from FROM-th entry. Directory item
962 necessary. Return value is size of removed part of directory item 1067 * does not have free space, so it moves DEHs and remaining records as
963 in bytes. */ 1068 * necessary. Return value is size of removed part of directory item
1069 * in bytes.
1070 */
964static int leaf_cut_entries(struct buffer_head *bh, 1071static int leaf_cut_entries(struct buffer_head *bh,
965 struct item_head *ih, int from, int del_count) 1072 struct item_head *ih, int from, int del_count)
966{ 1073{
@@ -971,12 +1078,14 @@ static int leaf_cut_entries(struct buffer_head *bh,
971 int cut_records_len; /* length of all removed records */ 1078 int cut_records_len; /* length of all removed records */
972 int i; 1079 int i;
973 1080
974 /* make sure, that item is directory and there are enough entries to 1081 /*
975 remove */ 1082 * make sure that item is directory and there are enough entries to
1083 * remove
1084 */
976 RFALSE(!is_direntry_le_ih(ih), "10180: item is not directory item"); 1085 RFALSE(!is_direntry_le_ih(ih), "10180: item is not directory item");
977 RFALSE(I_ENTRY_COUNT(ih) < from + del_count, 1086 RFALSE(ih_entry_count(ih) < from + del_count,
978 "10185: item contains not enough entries: entry_count = %d, from = %d, to delete = %d", 1087 "10185: item contains not enough entries: entry_count = %d, from = %d, to delete = %d",
979 I_ENTRY_COUNT(ih), from, del_count); 1088 ih_entry_count(ih), from, del_count);
980 1089
981 if (del_count == 0) 1090 if (del_count == 0)
982 return 0; 1091 return 0;
@@ -987,22 +1096,24 @@ static int leaf_cut_entries(struct buffer_head *bh,
987 /* entry head array */ 1096 /* entry head array */
988 deh = B_I_DEH(bh, ih); 1097 deh = B_I_DEH(bh, ih);
989 1098
990 /* first byte of remaining entries, those are BEFORE cut entries 1099 /*
991 (prev_record) and length of all removed records (cut_records_len) */ 1100 * first byte of remaining entries, those are BEFORE cut entries
1101 * (prev_record) and length of all removed records (cut_records_len)
1102 */
992 prev_record_offset = 1103 prev_record_offset =
993 (from ? deh_location(&(deh[from - 1])) : ih_item_len(ih)); 1104 (from ? deh_location(&deh[from - 1]) : ih_item_len(ih));
994 cut_records_len = prev_record_offset /*from_record */ - 1105 cut_records_len = prev_record_offset /*from_record */ -
995 deh_location(&(deh[from + del_count - 1])); 1106 deh_location(&deh[from + del_count - 1]);
996 prev_record = item + prev_record_offset; 1107 prev_record = item + prev_record_offset;
997 1108
998 /* adjust locations of remaining entries */ 1109 /* adjust locations of remaining entries */
999 for (i = I_ENTRY_COUNT(ih) - 1; i > from + del_count - 1; i--) 1110 for (i = ih_entry_count(ih) - 1; i > from + del_count - 1; i--)
1000 put_deh_location(&(deh[i]), 1111 put_deh_location(&deh[i],
1001 deh_location(&deh[i]) - 1112 deh_location(&deh[i]) -
1002 (DEH_SIZE * del_count)); 1113 (DEH_SIZE * del_count));
1003 1114
1004 for (i = 0; i < from; i++) 1115 for (i = 0; i < from; i++)
1005 put_deh_location(&(deh[i]), 1116 put_deh_location(&deh[i],
1006 deh_location(&deh[i]) - (DEH_SIZE * del_count + 1117 deh_location(&deh[i]) - (DEH_SIZE * del_count +
1007 cut_records_len)); 1118 cut_records_len));
1008 1119
@@ -1021,14 +1132,15 @@ static int leaf_cut_entries(struct buffer_head *bh,
1021 return DEH_SIZE * del_count + cut_records_len; 1132 return DEH_SIZE * del_count + cut_records_len;
1022} 1133}
1023 1134
1024/* when cut item is part of regular file 1135/*
1025 pos_in_item - first byte that must be cut 1136 * when cut item is part of regular file
1026 cut_size - number of bytes to be cut beginning from pos_in_item 1137 * pos_in_item - first byte that must be cut
1027 1138 * cut_size - number of bytes to be cut beginning from pos_in_item
1028 when cut item is part of directory 1139 *
1029 pos_in_item - number of first deleted entry 1140 * when cut item is part of directory
1030 cut_size - count of deleted entries 1141 * pos_in_item - number of first deleted entry
1031 */ 1142 * cut_size - count of deleted entries
1143 */
1032void leaf_cut_from_buffer(struct buffer_info *bi, int cut_item_num, 1144void leaf_cut_from_buffer(struct buffer_info *bi, int cut_item_num,
1033 int pos_in_item, int cut_size) 1145 int pos_in_item, int cut_size)
1034{ 1146{
@@ -1043,7 +1155,7 @@ void leaf_cut_from_buffer(struct buffer_info *bi, int cut_item_num,
1043 nr = blkh_nr_item(blkh); 1155 nr = blkh_nr_item(blkh);
1044 1156
1045 /* item head of truncated item */ 1157 /* item head of truncated item */
1046 ih = B_N_PITEM_HEAD(bh, cut_item_num); 1158 ih = item_head(bh, cut_item_num);
1047 1159
1048 if (is_direntry_le_ih(ih)) { 1160 if (is_direntry_le_ih(ih)) {
1049 /* first cut entry () */ 1161 /* first cut entry () */
@@ -1055,7 +1167,6 @@ void leaf_cut_from_buffer(struct buffer_info *bi, int cut_item_num,
1055 cut_item_num); 1167 cut_item_num);
1056 /* change item key by key of first entry in the item */ 1168 /* change item key by key of first entry in the item */
1057 set_le_ih_k_offset(ih, deh_offset(B_I_DEH(bh, ih))); 1169 set_le_ih_k_offset(ih, deh_offset(B_I_DEH(bh, ih)));
1058 /*memcpy (&ih->ih_key.k_offset, &(B_I_DEH (bh, ih)->deh_offset), SHORT_KEY_SIZE); */
1059 } 1170 }
1060 } else { 1171 } else {
1061 /* item is direct or indirect */ 1172 /* item is direct or indirect */
@@ -1089,7 +1200,7 @@ void leaf_cut_from_buffer(struct buffer_info *bi, int cut_item_num,
1089 } 1200 }
1090 1201
1091 /* location of the last item */ 1202 /* location of the last item */
1092 last_loc = ih_location(&(ih[nr - cut_item_num - 1])); 1203 last_loc = ih_location(&ih[nr - cut_item_num - 1]);
1093 1204
1094 /* location of the item, which is remaining at the same place */ 1205 /* location of the item, which is remaining at the same place */
1095 unmoved_loc = cut_item_num ? ih_location(ih - 1) : bh->b_size; 1206 unmoved_loc = cut_item_num ? ih_location(ih - 1) : bh->b_size;
@@ -1108,7 +1219,7 @@ void leaf_cut_from_buffer(struct buffer_info *bi, int cut_item_num,
1108 1219
1109 /* change locations */ 1220 /* change locations */
1110 for (i = cut_item_num; i < nr; i++) 1221 for (i = cut_item_num; i < nr; i++)
1111 put_ih_location(&(ih[i - cut_item_num]), 1222 put_ih_location(&ih[i - cut_item_num],
1112 ih_location(&ih[i - cut_item_num]) + cut_size); 1223 ih_location(&ih[i - cut_item_num]) + cut_size);
1113 1224
1114 /* size, free space */ 1225 /* size, free space */
@@ -1156,14 +1267,14 @@ static void leaf_delete_items_entirely(struct buffer_info *bi,
1156 return; 1267 return;
1157 } 1268 }
1158 1269
1159 ih = B_N_PITEM_HEAD(bh, first); 1270 ih = item_head(bh, first);
1160 1271
1161 /* location of unmovable item */ 1272 /* location of unmovable item */
1162 j = (first == 0) ? bh->b_size : ih_location(ih - 1); 1273 j = (first == 0) ? bh->b_size : ih_location(ih - 1);
1163 1274
1164 /* delete items */ 1275 /* delete items */
1165 last_loc = ih_location(&(ih[nr - 1 - first])); 1276 last_loc = ih_location(&ih[nr - 1 - first]);
1166 last_removed_loc = ih_location(&(ih[del_num - 1])); 1277 last_removed_loc = ih_location(&ih[del_num - 1]);
1167 1278
1168 memmove(bh->b_data + last_loc + j - last_removed_loc, 1279 memmove(bh->b_data + last_loc + j - last_removed_loc,
1169 bh->b_data + last_loc, last_removed_loc - last_loc); 1280 bh->b_data + last_loc, last_removed_loc - last_loc);
@@ -1173,8 +1284,8 @@ static void leaf_delete_items_entirely(struct buffer_info *bi,
1173 1284
1174 /* change item location */ 1285 /* change item location */
1175 for (i = first; i < nr - del_num; i++) 1286 for (i = first; i < nr - del_num; i++)
1176 put_ih_location(&(ih[i - first]), 1287 put_ih_location(&ih[i - first],
1177 ih_location(&(ih[i - first])) + (j - 1288 ih_location(&ih[i - first]) + (j -
1178 last_removed_loc)); 1289 last_removed_loc));
1179 1290
1180 /* sizes, item number */ 1291 /* sizes, item number */
@@ -1195,7 +1306,10 @@ static void leaf_delete_items_entirely(struct buffer_info *bi,
1195 } 1306 }
1196} 1307}
1197 1308
1198/* paste new_entry_count entries (new_dehs, records) into position before to item_num-th item */ 1309/*
1310 * paste new_entry_count entries (new_dehs, records) into position
1311 * before to item_num-th item
1312 */
1199void leaf_paste_entries(struct buffer_info *bi, 1313void leaf_paste_entries(struct buffer_info *bi,
1200 int item_num, 1314 int item_num,
1201 int before, 1315 int before,
@@ -1213,13 +1327,16 @@ void leaf_paste_entries(struct buffer_info *bi,
1213 if (new_entry_count == 0) 1327 if (new_entry_count == 0)
1214 return; 1328 return;
1215 1329
1216 ih = B_N_PITEM_HEAD(bh, item_num); 1330 ih = item_head(bh, item_num);
1217 1331
1218 /* make sure, that item is directory, and there are enough records in it */ 1332 /*
1333 * make sure, that item is directory, and there are enough
1334 * records in it
1335 */
1219 RFALSE(!is_direntry_le_ih(ih), "10225: item is not directory item"); 1336 RFALSE(!is_direntry_le_ih(ih), "10225: item is not directory item");
1220 RFALSE(I_ENTRY_COUNT(ih) < before, 1337 RFALSE(ih_entry_count(ih) < before,
1221 "10230: there are no entry we paste entries before. entry_count = %d, before = %d", 1338 "10230: there are no entry we paste entries before. entry_count = %d, before = %d",
1222 I_ENTRY_COUNT(ih), before); 1339 ih_entry_count(ih), before);
1223 1340
1224 /* first byte of dest item */ 1341 /* first byte of dest item */
1225 item = bh->b_data + ih_location(ih); 1342 item = bh->b_data + ih_location(ih);
@@ -1230,21 +1347,21 @@ void leaf_paste_entries(struct buffer_info *bi,
1230 /* new records will be pasted at this point */ 1347 /* new records will be pasted at this point */
1231 insert_point = 1348 insert_point =
1232 item + 1349 item +
1233 (before ? deh_location(&(deh[before - 1])) 1350 (before ? deh_location(&deh[before - 1])
1234 : (ih_item_len(ih) - paste_size)); 1351 : (ih_item_len(ih) - paste_size));
1235 1352
1236 /* adjust locations of records that will be AFTER new records */ 1353 /* adjust locations of records that will be AFTER new records */
1237 for (i = I_ENTRY_COUNT(ih) - 1; i >= before; i--) 1354 for (i = ih_entry_count(ih) - 1; i >= before; i--)
1238 put_deh_location(&(deh[i]), 1355 put_deh_location(&deh[i],
1239 deh_location(&(deh[i])) + 1356 deh_location(&deh[i]) +
1240 (DEH_SIZE * new_entry_count)); 1357 (DEH_SIZE * new_entry_count));
1241 1358
1242 /* adjust locations of records that will be BEFORE new records */ 1359 /* adjust locations of records that will be BEFORE new records */
1243 for (i = 0; i < before; i++) 1360 for (i = 0; i < before; i++)
1244 put_deh_location(&(deh[i]), 1361 put_deh_location(&deh[i],
1245 deh_location(&(deh[i])) + paste_size); 1362 deh_location(&deh[i]) + paste_size);
1246 1363
1247 old_entry_num = I_ENTRY_COUNT(ih); 1364 old_entry_num = ih_entry_count(ih);
1248 put_ih_entry_count(ih, ih_entry_count(ih) + new_entry_count); 1365 put_ih_entry_count(ih, ih_entry_count(ih) + new_entry_count);
1249 1366
1250 /* prepare space for pasted records */ 1367 /* prepare space for pasted records */
@@ -1266,10 +1383,10 @@ void leaf_paste_entries(struct buffer_info *bi,
1266 1383
1267 /* set locations of new records */ 1384 /* set locations of new records */
1268 for (i = 0; i < new_entry_count; i++) { 1385 for (i = 0; i < new_entry_count; i++) {
1269 put_deh_location(&(deh[i]), 1386 put_deh_location(&deh[i],
1270 deh_location(&(deh[i])) + 1387 deh_location(&deh[i]) +
1271 (-deh_location 1388 (-deh_location
1272 (&(new_dehs[new_entry_count - 1])) + 1389 (&new_dehs[new_entry_count - 1]) +
1273 insert_point + DEH_SIZE * new_entry_count - 1390 insert_point + DEH_SIZE * new_entry_count -
1274 item)); 1391 item));
1275 } 1392 }
@@ -1277,28 +1394,26 @@ void leaf_paste_entries(struct buffer_info *bi,
1277 /* change item key if necessary (when we paste before 0-th entry */ 1394 /* change item key if necessary (when we paste before 0-th entry */
1278 if (!before) { 1395 if (!before) {
1279 set_le_ih_k_offset(ih, deh_offset(new_dehs)); 1396 set_le_ih_k_offset(ih, deh_offset(new_dehs));
1280/* memcpy (&ih->ih_key.k_offset,
1281 &new_dehs->deh_offset, SHORT_KEY_SIZE);*/
1282 } 1397 }
1283#ifdef CONFIG_REISERFS_CHECK 1398#ifdef CONFIG_REISERFS_CHECK
1284 { 1399 {
1285 int prev, next; 1400 int prev, next;
1286 /* check record locations */ 1401 /* check record locations */
1287 deh = B_I_DEH(bh, ih); 1402 deh = B_I_DEH(bh, ih);
1288 for (i = 0; i < I_ENTRY_COUNT(ih); i++) { 1403 for (i = 0; i < ih_entry_count(ih); i++) {
1289 next = 1404 next =
1290 (i < 1405 (i <
1291 I_ENTRY_COUNT(ih) - 1406 ih_entry_count(ih) -
1292 1) ? deh_location(&(deh[i + 1])) : 0; 1407 1) ? deh_location(&deh[i + 1]) : 0;
1293 prev = (i != 0) ? deh_location(&(deh[i - 1])) : 0; 1408 prev = (i != 0) ? deh_location(&deh[i - 1]) : 0;
1294 1409
1295 if (prev && prev <= deh_location(&(deh[i]))) 1410 if (prev && prev <= deh_location(&deh[i]))
1296 reiserfs_error(sb_from_bi(bi), "vs-10240", 1411 reiserfs_error(sb_from_bi(bi), "vs-10240",
1297 "directory item (%h) " 1412 "directory item (%h) "
1298 "corrupted (prev %a, " 1413 "corrupted (prev %a, "
1299 "cur(%d) %a)", 1414 "cur(%d) %a)",
1300 ih, deh + i - 1, i, deh + i); 1415 ih, deh + i - 1, i, deh + i);
1301 if (next && next >= deh_location(&(deh[i]))) 1416 if (next && next >= deh_location(&deh[i]))
1302 reiserfs_error(sb_from_bi(bi), "vs-10250", 1417 reiserfs_error(sb_from_bi(bi), "vs-10250",
1303 "directory item (%h) " 1418 "directory item (%h) "
1304 "corrupted (cur(%d) %a, " 1419 "corrupted (cur(%d) %a, "
diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c
index e825f8b63e6b..cd11358b10c7 100644
--- a/fs/reiserfs/namei.c
+++ b/fs/reiserfs/namei.c
@@ -22,8 +22,10 @@
22#define INC_DIR_INODE_NLINK(i) if (i->i_nlink != 1) { inc_nlink(i); if (i->i_nlink >= REISERFS_LINK_MAX) set_nlink(i, 1); } 22#define INC_DIR_INODE_NLINK(i) if (i->i_nlink != 1) { inc_nlink(i); if (i->i_nlink >= REISERFS_LINK_MAX) set_nlink(i, 1); }
23#define DEC_DIR_INODE_NLINK(i) if (i->i_nlink != 1) drop_nlink(i); 23#define DEC_DIR_INODE_NLINK(i) if (i->i_nlink != 1) drop_nlink(i);
24 24
25// directory item contains array of entry headers. This performs 25/*
26// binary search through that array 26 * directory item contains array of entry headers. This performs
27 * binary search through that array
28 */
27static int bin_search_in_dir_item(struct reiserfs_dir_entry *de, loff_t off) 29static int bin_search_in_dir_item(struct reiserfs_dir_entry *de, loff_t off)
28{ 30{
29 struct item_head *ih = de->de_ih; 31 struct item_head *ih = de->de_ih;
@@ -31,7 +33,7 @@ static int bin_search_in_dir_item(struct reiserfs_dir_entry *de, loff_t off)
31 int rbound, lbound, j; 33 int rbound, lbound, j;
32 34
33 lbound = 0; 35 lbound = 0;
34 rbound = I_ENTRY_COUNT(ih) - 1; 36 rbound = ih_entry_count(ih) - 1;
35 37
36 for (j = (rbound + lbound) / 2; lbound <= rbound; 38 for (j = (rbound + lbound) / 2; lbound <= rbound;
37 j = (rbound + lbound) / 2) { 39 j = (rbound + lbound) / 2) {
@@ -43,7 +45,7 @@ static int bin_search_in_dir_item(struct reiserfs_dir_entry *de, loff_t off)
43 lbound = j + 1; 45 lbound = j + 1;
44 continue; 46 continue;
45 } 47 }
46 // this is not name found, but matched third key component 48 /* this is not name found, but matched third key component */
47 de->de_entry_num = j; 49 de->de_entry_num = j;
48 return NAME_FOUND; 50 return NAME_FOUND;
49 } 51 }
@@ -52,17 +54,21 @@ static int bin_search_in_dir_item(struct reiserfs_dir_entry *de, loff_t off)
52 return NAME_NOT_FOUND; 54 return NAME_NOT_FOUND;
53} 55}
54 56
55// comment? maybe something like set de to point to what the path points to? 57/*
58 * comment? maybe something like set de to point to what the path points to?
59 */
56static inline void set_de_item_location(struct reiserfs_dir_entry *de, 60static inline void set_de_item_location(struct reiserfs_dir_entry *de,
57 struct treepath *path) 61 struct treepath *path)
58{ 62{
59 de->de_bh = get_last_bh(path); 63 de->de_bh = get_last_bh(path);
60 de->de_ih = get_ih(path); 64 de->de_ih = tp_item_head(path);
61 de->de_deh = B_I_DEH(de->de_bh, de->de_ih); 65 de->de_deh = B_I_DEH(de->de_bh, de->de_ih);
62 de->de_item_num = PATH_LAST_POSITION(path); 66 de->de_item_num = PATH_LAST_POSITION(path);
63} 67}
64 68
65// de_bh, de_ih, de_deh (points to first element of array), de_item_num is set 69/*
70 * de_bh, de_ih, de_deh (points to first element of array), de_item_num is set
71 */
66inline void set_de_name_and_namelen(struct reiserfs_dir_entry *de) 72inline void set_de_name_and_namelen(struct reiserfs_dir_entry *de)
67{ 73{
68 struct reiserfs_de_head *deh = de->de_deh + de->de_entry_num; 74 struct reiserfs_de_head *deh = de->de_deh + de->de_entry_num;
@@ -71,17 +77,17 @@ inline void set_de_name_and_namelen(struct reiserfs_dir_entry *de)
71 77
72 de->de_entrylen = entry_length(de->de_bh, de->de_ih, de->de_entry_num); 78 de->de_entrylen = entry_length(de->de_bh, de->de_ih, de->de_entry_num);
73 de->de_namelen = de->de_entrylen - (de_with_sd(deh) ? SD_SIZE : 0); 79 de->de_namelen = de->de_entrylen - (de_with_sd(deh) ? SD_SIZE : 0);
74 de->de_name = B_I_PITEM(de->de_bh, de->de_ih) + deh_location(deh); 80 de->de_name = ih_item_body(de->de_bh, de->de_ih) + deh_location(deh);
75 if (de->de_name[de->de_namelen - 1] == 0) 81 if (de->de_name[de->de_namelen - 1] == 0)
76 de->de_namelen = strlen(de->de_name); 82 de->de_namelen = strlen(de->de_name);
77} 83}
78 84
79// what entry points to 85/* what entry points to */
80static inline void set_de_object_key(struct reiserfs_dir_entry *de) 86static inline void set_de_object_key(struct reiserfs_dir_entry *de)
81{ 87{
82 BUG_ON(de->de_entry_num >= ih_entry_count(de->de_ih)); 88 BUG_ON(de->de_entry_num >= ih_entry_count(de->de_ih));
83 de->de_dir_id = deh_dir_id(&(de->de_deh[de->de_entry_num])); 89 de->de_dir_id = deh_dir_id(&de->de_deh[de->de_entry_num]);
84 de->de_objectid = deh_objectid(&(de->de_deh[de->de_entry_num])); 90 de->de_objectid = deh_objectid(&de->de_deh[de->de_entry_num]);
85} 91}
86 92
87static inline void store_de_entry_key(struct reiserfs_dir_entry *de) 93static inline void store_de_entry_key(struct reiserfs_dir_entry *de)
@@ -96,21 +102,20 @@ static inline void store_de_entry_key(struct reiserfs_dir_entry *de)
96 le32_to_cpu(de->de_ih->ih_key.k_dir_id); 102 le32_to_cpu(de->de_ih->ih_key.k_dir_id);
97 de->de_entry_key.on_disk_key.k_objectid = 103 de->de_entry_key.on_disk_key.k_objectid =
98 le32_to_cpu(de->de_ih->ih_key.k_objectid); 104 le32_to_cpu(de->de_ih->ih_key.k_objectid);
99 set_cpu_key_k_offset(&(de->de_entry_key), deh_offset(deh)); 105 set_cpu_key_k_offset(&de->de_entry_key, deh_offset(deh));
100 set_cpu_key_k_type(&(de->de_entry_key), TYPE_DIRENTRY); 106 set_cpu_key_k_type(&de->de_entry_key, TYPE_DIRENTRY);
101} 107}
102 108
103/* We assign a key to each directory item, and place multiple entries 109/*
104in a single directory item. A directory item has a key equal to the 110 * We assign a key to each directory item, and place multiple entries in a
105key of the first directory entry in it. 111 * single directory item. A directory item has a key equal to the key of
106 112 * the first directory entry in it.
107This function first calls search_by_key, then, if item whose first 113
108entry matches is not found it looks for the entry inside directory 114 * This function first calls search_by_key, then, if item whose first entry
109item found by search_by_key. Fills the path to the entry, and to the 115 * matches is not found it looks for the entry inside directory item found
110entry position in the item 116 * by search_by_key. Fills the path to the entry, and to the entry position
111 117 * in the item
112*/ 118 */
113
114/* The function is NOT SCHEDULE-SAFE! */ 119/* The function is NOT SCHEDULE-SAFE! */
115int search_by_entry_key(struct super_block *sb, const struct cpu_key *key, 120int search_by_entry_key(struct super_block *sb, const struct cpu_key *key,
116 struct treepath *path, struct reiserfs_dir_entry *de) 121 struct treepath *path, struct reiserfs_dir_entry *de)
@@ -144,7 +149,7 @@ int search_by_entry_key(struct super_block *sb, const struct cpu_key *key,
144 149
145#ifdef CONFIG_REISERFS_CHECK 150#ifdef CONFIG_REISERFS_CHECK
146 if (!is_direntry_le_ih(de->de_ih) || 151 if (!is_direntry_le_ih(de->de_ih) ||
147 COMP_SHORT_KEYS(&(de->de_ih->ih_key), key)) { 152 COMP_SHORT_KEYS(&de->de_ih->ih_key, key)) {
148 print_block(de->de_bh, 0, -1, -1); 153 print_block(de->de_bh, 0, -1, -1);
149 reiserfs_panic(sb, "vs-7005", "found item %h is not directory " 154 reiserfs_panic(sb, "vs-7005", "found item %h is not directory "
150 "item or does not belong to the same directory " 155 "item or does not belong to the same directory "
@@ -152,12 +157,17 @@ int search_by_entry_key(struct super_block *sb, const struct cpu_key *key,
152 } 157 }
153#endif /* CONFIG_REISERFS_CHECK */ 158#endif /* CONFIG_REISERFS_CHECK */
154 159
155 /* binary search in directory item by third componen t of the 160 /*
156 key. sets de->de_entry_num of de */ 161 * binary search in directory item by third component of the
162 * key. sets de->de_entry_num of de
163 */
157 retval = bin_search_in_dir_item(de, cpu_key_k_offset(key)); 164 retval = bin_search_in_dir_item(de, cpu_key_k_offset(key));
158 path->pos_in_item = de->de_entry_num; 165 path->pos_in_item = de->de_entry_num;
159 if (retval != NAME_NOT_FOUND) { 166 if (retval != NAME_NOT_FOUND) {
160 // ugly, but rename needs de_bh, de_deh, de_name, de_namelen, de_objectid set 167 /*
168 * ugly, but rename needs de_bh, de_deh, de_name,
169 * de_namelen, de_objectid set
170 */
161 set_de_name_and_namelen(de); 171 set_de_name_and_namelen(de);
162 set_de_object_key(de); 172 set_de_object_key(de);
163 } 173 }
@@ -166,11 +176,12 @@ int search_by_entry_key(struct super_block *sb, const struct cpu_key *key,
166 176
167/* Keyed 32-bit hash function using TEA in a Davis-Meyer function */ 177/* Keyed 32-bit hash function using TEA in a Davis-Meyer function */
168 178
169/* The third component is hashed, and you can choose from more than 179/*
170 one hash function. Per directory hashes are not yet implemented 180 * The third component is hashed, and you can choose from more than
171 but are thought about. This function should be moved to hashes.c 181 * one hash function. Per directory hashes are not yet implemented
172 Jedi, please do so. -Hans */ 182 * but are thought about. This function should be moved to hashes.c
173 183 * Jedi, please do so. -Hans
184 */
174static __u32 get_third_component(struct super_block *s, 185static __u32 get_third_component(struct super_block *s,
175 const char *name, int len) 186 const char *name, int len)
176{ 187{
@@ -183,11 +194,13 @@ static __u32 get_third_component(struct super_block *s,
183 194
184 res = REISERFS_SB(s)->s_hash_function(name, len); 195 res = REISERFS_SB(s)->s_hash_function(name, len);
185 196
186 // take bits from 7-th to 30-th including both bounds 197 /* take bits from 7-th to 30-th including both bounds */
187 res = GET_HASH_VALUE(res); 198 res = GET_HASH_VALUE(res);
188 if (res == 0) 199 if (res == 0)
189 // needed to have no names before "." and ".." those have hash 200 /*
190 // value == 0 and generation conters 1 and 2 accordingly 201 * needed to have no names before "." and ".." those have hash
202 * value == 0 and generation conters 1 and 2 accordingly
203 */
191 res = 128; 204 res = 128;
192 return res + MAX_GENERATION_NUMBER; 205 return res + MAX_GENERATION_NUMBER;
193} 206}
@@ -208,7 +221,7 @@ static int reiserfs_match(struct reiserfs_dir_entry *de,
208 221
209/* de's de_bh, de_ih, de_deh, de_item_num, de_entry_num are set already */ 222/* de's de_bh, de_ih, de_deh, de_item_num, de_entry_num are set already */
210 223
211 /* used when hash collisions exist */ 224/* used when hash collisions exist */
212 225
213static int linear_search_in_dir_item(struct cpu_key *key, 226static int linear_search_in_dir_item(struct cpu_key *key,
214 struct reiserfs_dir_entry *de, 227 struct reiserfs_dir_entry *de,
@@ -220,7 +233,7 @@ static int linear_search_in_dir_item(struct cpu_key *key,
220 233
221 i = de->de_entry_num; 234 i = de->de_entry_num;
222 235
223 if (i == I_ENTRY_COUNT(de->de_ih) || 236 if (i == ih_entry_count(de->de_ih) ||
224 GET_HASH_VALUE(deh_offset(deh + i)) != 237 GET_HASH_VALUE(deh_offset(deh + i)) !=
225 GET_HASH_VALUE(cpu_key_k_offset(key))) { 238 GET_HASH_VALUE(cpu_key_k_offset(key))) {
226 i--; 239 i--;
@@ -232,43 +245,50 @@ static int linear_search_in_dir_item(struct cpu_key *key,
232 deh += i; 245 deh += i;
233 246
234 for (; i >= 0; i--, deh--) { 247 for (; i >= 0; i--, deh--) {
248 /* hash value does not match, no need to check whole name */
235 if (GET_HASH_VALUE(deh_offset(deh)) != 249 if (GET_HASH_VALUE(deh_offset(deh)) !=
236 GET_HASH_VALUE(cpu_key_k_offset(key))) { 250 GET_HASH_VALUE(cpu_key_k_offset(key))) {
237 // hash value does not match, no need to check whole name
238 return NAME_NOT_FOUND; 251 return NAME_NOT_FOUND;
239 } 252 }
240 253
241 /* mark, that this generation number is used */ 254 /* mark that this generation number is used */
242 if (de->de_gen_number_bit_string) 255 if (de->de_gen_number_bit_string)
243 set_bit(GET_GENERATION_NUMBER(deh_offset(deh)), 256 set_bit(GET_GENERATION_NUMBER(deh_offset(deh)),
244 de->de_gen_number_bit_string); 257 de->de_gen_number_bit_string);
245 258
246 // calculate pointer to name and namelen 259 /* calculate pointer to name and namelen */
247 de->de_entry_num = i; 260 de->de_entry_num = i;
248 set_de_name_and_namelen(de); 261 set_de_name_and_namelen(de);
249 262
263 /*
264 * de's de_name, de_namelen, de_recordlen are set.
265 * Fill the rest.
266 */
250 if ((retval = 267 if ((retval =
251 reiserfs_match(de, name, namelen)) != NAME_NOT_FOUND) { 268 reiserfs_match(de, name, namelen)) != NAME_NOT_FOUND) {
252 // de's de_name, de_namelen, de_recordlen are set. Fill the rest:
253 269
254 // key of pointed object 270 /* key of pointed object */
255 set_de_object_key(de); 271 set_de_object_key(de);
256 272
257 store_de_entry_key(de); 273 store_de_entry_key(de);
258 274
259 // retval can be NAME_FOUND or NAME_FOUND_INVISIBLE 275 /* retval can be NAME_FOUND or NAME_FOUND_INVISIBLE */
260 return retval; 276 return retval;
261 } 277 }
262 } 278 }
263 279
264 if (GET_GENERATION_NUMBER(le_ih_k_offset(de->de_ih)) == 0) 280 if (GET_GENERATION_NUMBER(le_ih_k_offset(de->de_ih)) == 0)
265 /* we have reached left most entry in the node. In common we 281 /*
266 have to go to the left neighbor, but if generation counter 282 * we have reached left most entry in the node. In common we
267 is 0 already, we know for sure, that there is no name with 283 * have to go to the left neighbor, but if generation counter
268 the same hash value */ 284 * is 0 already, we know for sure, that there is no name with
269 // FIXME: this work correctly only because hash value can not 285 * the same hash value
270 // be 0. Btw, in case of Yura's hash it is probably possible, 286 */
271 // so, this is a bug 287 /*
288 * FIXME: this work correctly only because hash value can not
289 * be 0. Btw, in case of Yura's hash it is probably possible,
290 * so, this is a bug
291 */
272 return NAME_NOT_FOUND; 292 return NAME_NOT_FOUND;
273 293
274 RFALSE(de->de_item_num, 294 RFALSE(de->de_item_num,
@@ -277,8 +297,10 @@ static int linear_search_in_dir_item(struct cpu_key *key,
277 return GOTO_PREVIOUS_ITEM; 297 return GOTO_PREVIOUS_ITEM;
278} 298}
279 299
280// may return NAME_FOUND, NAME_FOUND_INVISIBLE, NAME_NOT_FOUND 300/*
281// FIXME: should add something like IOERROR 301 * may return NAME_FOUND, NAME_FOUND_INVISIBLE, NAME_NOT_FOUND
302 * FIXME: should add something like IOERROR
303 */
282static int reiserfs_find_entry(struct inode *dir, const char *name, int namelen, 304static int reiserfs_find_entry(struct inode *dir, const char *name, int namelen,
283 struct treepath *path_to_entry, 305 struct treepath *path_to_entry,
284 struct reiserfs_dir_entry *de) 306 struct reiserfs_dir_entry *de)
@@ -307,13 +329,19 @@ static int reiserfs_find_entry(struct inode *dir, const char *name, int namelen,
307 retval = 329 retval =
308 linear_search_in_dir_item(&key_to_search, de, name, 330 linear_search_in_dir_item(&key_to_search, de, name,
309 namelen); 331 namelen);
332 /*
333 * there is no need to scan directory anymore.
334 * Given entry found or does not exist
335 */
310 if (retval != GOTO_PREVIOUS_ITEM) { 336 if (retval != GOTO_PREVIOUS_ITEM) {
311 /* there is no need to scan directory anymore. Given entry found or does not exist */
312 path_to_entry->pos_in_item = de->de_entry_num; 337 path_to_entry->pos_in_item = de->de_entry_num;
313 return retval; 338 return retval;
314 } 339 }
315 340
316 /* there is left neighboring item of this directory and given entry can be there */ 341 /*
342 * there is left neighboring item of this directory
343 * and given entry can be there
344 */
317 set_cpu_key_k_offset(&key_to_search, 345 set_cpu_key_k_offset(&key_to_search,
318 le_ih_k_offset(de->de_ih) - 1); 346 le_ih_k_offset(de->de_ih) - 1);
319 pathrelse(path_to_entry); 347 pathrelse(path_to_entry);
@@ -341,14 +369,16 @@ static struct dentry *reiserfs_lookup(struct inode *dir, struct dentry *dentry,
341 pathrelse(&path_to_entry); 369 pathrelse(&path_to_entry);
342 if (retval == NAME_FOUND) { 370 if (retval == NAME_FOUND) {
343 inode = reiserfs_iget(dir->i_sb, 371 inode = reiserfs_iget(dir->i_sb,
344 (struct cpu_key *)&(de.de_dir_id)); 372 (struct cpu_key *)&de.de_dir_id);
345 if (!inode || IS_ERR(inode)) { 373 if (!inode || IS_ERR(inode)) {
346 reiserfs_write_unlock(dir->i_sb); 374 reiserfs_write_unlock(dir->i_sb);
347 return ERR_PTR(-EACCES); 375 return ERR_PTR(-EACCES);
348 } 376 }
349 377
350 /* Propagate the private flag so we know we're 378 /*
351 * in the priv tree */ 379 * Propagate the private flag so we know we're
380 * in the priv tree
381 */
352 if (IS_PRIVATE(dir)) 382 if (IS_PRIVATE(dir))
353 inode->i_flags |= S_PRIVATE; 383 inode->i_flags |= S_PRIVATE;
354 } 384 }
@@ -361,9 +391,9 @@ static struct dentry *reiserfs_lookup(struct inode *dir, struct dentry *dentry,
361} 391}
362 392
363/* 393/*
364** looks up the dentry of the parent directory for child. 394 * looks up the dentry of the parent directory for child.
365** taken from ext2_get_parent 395 * taken from ext2_get_parent
366*/ 396 */
367struct dentry *reiserfs_get_parent(struct dentry *child) 397struct dentry *reiserfs_get_parent(struct dentry *child)
368{ 398{
369 int retval; 399 int retval;
@@ -384,7 +414,7 @@ struct dentry *reiserfs_get_parent(struct dentry *child)
384 reiserfs_write_unlock(dir->i_sb); 414 reiserfs_write_unlock(dir->i_sb);
385 return ERR_PTR(-ENOENT); 415 return ERR_PTR(-ENOENT);
386 } 416 }
387 inode = reiserfs_iget(dir->i_sb, (struct cpu_key *)&(de.de_dir_id)); 417 inode = reiserfs_iget(dir->i_sb, (struct cpu_key *)&de.de_dir_id);
388 reiserfs_write_unlock(dir->i_sb); 418 reiserfs_write_unlock(dir->i_sb);
389 419
390 return d_obtain_alias(inode); 420 return d_obtain_alias(inode);
@@ -406,8 +436,13 @@ static int reiserfs_add_entry(struct reiserfs_transaction_handle *th,
406 struct reiserfs_dir_entry de; 436 struct reiserfs_dir_entry de;
407 DECLARE_BITMAP(bit_string, MAX_GENERATION_NUMBER + 1); 437 DECLARE_BITMAP(bit_string, MAX_GENERATION_NUMBER + 1);
408 int gen_number; 438 int gen_number;
409 char small_buf[32 + DEH_SIZE]; /* 48 bytes now and we avoid kmalloc 439
410 if we create file with short name */ 440 /*
441 * 48 bytes now and we avoid kmalloc if we
442 * create file with short name
443 */
444 char small_buf[32 + DEH_SIZE];
445
411 char *buffer; 446 char *buffer;
412 int buflen, paste_size; 447 int buflen, paste_size;
413 int retval; 448 int retval;
@@ -439,21 +474,30 @@ static int reiserfs_add_entry(struct reiserfs_transaction_handle *th,
439 (get_inode_sd_version(dir) == 474 (get_inode_sd_version(dir) ==
440 STAT_DATA_V1) ? (DEH_SIZE + namelen) : buflen; 475 STAT_DATA_V1) ? (DEH_SIZE + namelen) : buflen;
441 476
442 /* fill buffer : directory entry head, name[, dir objectid | , stat data | ,stat data, dir objectid ] */ 477 /*
478 * fill buffer : directory entry head, name[, dir objectid | ,
479 * stat data | ,stat data, dir objectid ]
480 */
443 deh = (struct reiserfs_de_head *)buffer; 481 deh = (struct reiserfs_de_head *)buffer;
444 deh->deh_location = 0; /* JDM Endian safe if 0 */ 482 deh->deh_location = 0; /* JDM Endian safe if 0 */
445 put_deh_offset(deh, cpu_key_k_offset(&entry_key)); 483 put_deh_offset(deh, cpu_key_k_offset(&entry_key));
446 deh->deh_state = 0; /* JDM Endian safe if 0 */ 484 deh->deh_state = 0; /* JDM Endian safe if 0 */
447 /* put key (ino analog) to de */ 485 /* put key (ino analog) to de */
448 deh->deh_dir_id = INODE_PKEY(inode)->k_dir_id; /* safe: k_dir_id is le */ 486
449 deh->deh_objectid = INODE_PKEY(inode)->k_objectid; /* safe: k_objectid is le */ 487 /* safe: k_dir_id is le */
488 deh->deh_dir_id = INODE_PKEY(inode)->k_dir_id;
489 /* safe: k_objectid is le */
490 deh->deh_objectid = INODE_PKEY(inode)->k_objectid;
450 491
451 /* copy name */ 492 /* copy name */
452 memcpy((char *)(deh + 1), name, namelen); 493 memcpy((char *)(deh + 1), name, namelen);
453 /* padd by 0s to the 4 byte boundary */ 494 /* padd by 0s to the 4 byte boundary */
454 padd_item((char *)(deh + 1), ROUND_UP(namelen), namelen); 495 padd_item((char *)(deh + 1), ROUND_UP(namelen), namelen);
455 496
456 /* entry is ready to be pasted into tree, set 'visibility' and 'stat data in entry' attributes */ 497 /*
498 * entry is ready to be pasted into tree, set 'visibility'
499 * and 'stat data in entry' attributes
500 */
457 mark_de_without_sd(deh); 501 mark_de_without_sd(deh);
458 visible ? mark_de_visible(deh) : mark_de_hidden(deh); 502 visible ? mark_de_visible(deh) : mark_de_hidden(deh);
459 503
@@ -499,7 +543,8 @@ static int reiserfs_add_entry(struct reiserfs_transaction_handle *th,
499 /* update max-hash-collisions counter in reiserfs_sb_info */ 543 /* update max-hash-collisions counter in reiserfs_sb_info */
500 PROC_INFO_MAX(th->t_super, max_hash_collisions, gen_number); 544 PROC_INFO_MAX(th->t_super, max_hash_collisions, gen_number);
501 545
502 if (gen_number != 0) { /* we need to re-search for the insertion point */ 546 /* we need to re-search for the insertion point */
547 if (gen_number != 0) {
503 if (search_by_entry_key(dir->i_sb, &entry_key, &path, &de) != 548 if (search_by_entry_key(dir->i_sb, &entry_key, &path, &de) !=
504 NAME_NOT_FOUND) { 549 NAME_NOT_FOUND) {
505 reiserfs_warning(dir->i_sb, "vs-7032", 550 reiserfs_warning(dir->i_sb, "vs-7032",
@@ -527,18 +572,19 @@ static int reiserfs_add_entry(struct reiserfs_transaction_handle *th,
527 dir->i_size += paste_size; 572 dir->i_size += paste_size;
528 dir->i_mtime = dir->i_ctime = CURRENT_TIME_SEC; 573 dir->i_mtime = dir->i_ctime = CURRENT_TIME_SEC;
529 if (!S_ISDIR(inode->i_mode) && visible) 574 if (!S_ISDIR(inode->i_mode) && visible)
530 // reiserfs_mkdir or reiserfs_rename will do that by itself 575 /* reiserfs_mkdir or reiserfs_rename will do that by itself */
531 reiserfs_update_sd(th, dir); 576 reiserfs_update_sd(th, dir);
532 577
533 reiserfs_check_path(&path); 578 reiserfs_check_path(&path);
534 return 0; 579 return 0;
535} 580}
536 581
537/* quota utility function, call if you've had to abort after calling 582/*
538** new_inode_init, and have not called reiserfs_new_inode yet. 583 * quota utility function, call if you've had to abort after calling
539** This should only be called on inodes that do not have stat data 584 * new_inode_init, and have not called reiserfs_new_inode yet.
540** inserted into the tree yet. 585 * This should only be called on inodes that do not have stat data
541*/ 586 * inserted into the tree yet.
587 */
542static int drop_new_inode(struct inode *inode) 588static int drop_new_inode(struct inode *inode)
543{ 589{
544 dquot_drop(inode); 590 dquot_drop(inode);
@@ -548,18 +594,23 @@ static int drop_new_inode(struct inode *inode)
548 return 0; 594 return 0;
549} 595}
550 596
551/* utility function that does setup for reiserfs_new_inode. 597/*
552** dquot_initialize needs lots of credits so it's better to have it 598 * utility function that does setup for reiserfs_new_inode.
553** outside of a transaction, so we had to pull some bits of 599 * dquot_initialize needs lots of credits so it's better to have it
554** reiserfs_new_inode out into this func. 600 * outside of a transaction, so we had to pull some bits of
555*/ 601 * reiserfs_new_inode out into this func.
602 */
556static int new_inode_init(struct inode *inode, struct inode *dir, umode_t mode) 603static int new_inode_init(struct inode *inode, struct inode *dir, umode_t mode)
557{ 604{
558 /* Make inode invalid - just in case we are going to drop it before 605 /*
559 * the initialization happens */ 606 * Make inode invalid - just in case we are going to drop it before
607 * the initialization happens
608 */
560 INODE_PKEY(inode)->k_objectid = 0; 609 INODE_PKEY(inode)->k_objectid = 0;
561 /* the quota init calls have to know who to charge the quota to, so 610
562 ** we have to set uid and gid here 611 /*
612 * the quota init calls have to know who to charge the quota to, so
613 * we have to set uid and gid here
563 */ 614 */
564 inode_init_owner(inode, dir, mode); 615 inode_init_owner(inode, dir, mode);
565 dquot_initialize(inode); 616 dquot_initialize(inode);
@@ -571,7 +622,10 @@ static int reiserfs_create(struct inode *dir, struct dentry *dentry, umode_t mod
571{ 622{
572 int retval; 623 int retval;
573 struct inode *inode; 624 struct inode *inode;
574 /* We need blocks for transaction + (user+group)*(quotas for new inode + update of quota for directory owner) */ 625 /*
626 * We need blocks for transaction + (user+group)*(quotas
627 * for new inode + update of quota for directory owner)
628 */
575 int jbegin_count = 629 int jbegin_count =
576 JOURNAL_PER_BALANCE_CNT * 2 + 630 JOURNAL_PER_BALANCE_CNT * 2 +
577 2 * (REISERFS_QUOTA_INIT_BLOCKS(dir->i_sb) + 631 2 * (REISERFS_QUOTA_INIT_BLOCKS(dir->i_sb) +
@@ -618,7 +672,7 @@ static int reiserfs_create(struct inode *dir, struct dentry *dentry, umode_t mod
618 int err; 672 int err;
619 drop_nlink(inode); 673 drop_nlink(inode);
620 reiserfs_update_sd(&th, inode); 674 reiserfs_update_sd(&th, inode);
621 err = journal_end(&th, dir->i_sb, jbegin_count); 675 err = journal_end(&th);
622 if (err) 676 if (err)
623 retval = err; 677 retval = err;
624 unlock_new_inode(inode); 678 unlock_new_inode(inode);
@@ -630,9 +684,9 @@ static int reiserfs_create(struct inode *dir, struct dentry *dentry, umode_t mod
630 684
631 unlock_new_inode(inode); 685 unlock_new_inode(inode);
632 d_instantiate(dentry, inode); 686 d_instantiate(dentry, inode);
633 retval = journal_end(&th, dir->i_sb, jbegin_count); 687 retval = journal_end(&th);
634 688
635 out_failed: 689out_failed:
636 reiserfs_write_unlock(dir->i_sb); 690 reiserfs_write_unlock(dir->i_sb);
637 return retval; 691 return retval;
638} 692}
@@ -644,7 +698,10 @@ static int reiserfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode
644 struct inode *inode; 698 struct inode *inode;
645 struct reiserfs_transaction_handle th; 699 struct reiserfs_transaction_handle th;
646 struct reiserfs_security_handle security; 700 struct reiserfs_security_handle security;
647 /* We need blocks for transaction + (user+group)*(quotas for new inode + update of quota for directory owner) */ 701 /*
702 * We need blocks for transaction + (user+group)*(quotas
703 * for new inode + update of quota for directory owner)
704 */
648 int jbegin_count = 705 int jbegin_count =
649 JOURNAL_PER_BALANCE_CNT * 3 + 706 JOURNAL_PER_BALANCE_CNT * 3 +
650 2 * (REISERFS_QUOTA_INIT_BLOCKS(dir->i_sb) + 707 2 * (REISERFS_QUOTA_INIT_BLOCKS(dir->i_sb) +
@@ -685,7 +742,7 @@ static int reiserfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode
685 inode->i_op = &reiserfs_special_inode_operations; 742 inode->i_op = &reiserfs_special_inode_operations;
686 init_special_inode(inode, inode->i_mode, rdev); 743 init_special_inode(inode, inode->i_mode, rdev);
687 744
688 //FIXME: needed for block and char devices only 745 /* FIXME: needed for block and char devices only */
689 reiserfs_update_sd(&th, inode); 746 reiserfs_update_sd(&th, inode);
690 747
691 reiserfs_update_inode_transaction(inode); 748 reiserfs_update_inode_transaction(inode);
@@ -698,7 +755,7 @@ static int reiserfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode
698 int err; 755 int err;
699 drop_nlink(inode); 756 drop_nlink(inode);
700 reiserfs_update_sd(&th, inode); 757 reiserfs_update_sd(&th, inode);
701 err = journal_end(&th, dir->i_sb, jbegin_count); 758 err = journal_end(&th);
702 if (err) 759 if (err)
703 retval = err; 760 retval = err;
704 unlock_new_inode(inode); 761 unlock_new_inode(inode);
@@ -708,9 +765,9 @@ static int reiserfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode
708 765
709 unlock_new_inode(inode); 766 unlock_new_inode(inode);
710 d_instantiate(dentry, inode); 767 d_instantiate(dentry, inode);
711 retval = journal_end(&th, dir->i_sb, jbegin_count); 768 retval = journal_end(&th);
712 769
713 out_failed: 770out_failed:
714 reiserfs_write_unlock(dir->i_sb); 771 reiserfs_write_unlock(dir->i_sb);
715 return retval; 772 return retval;
716} 773}
@@ -721,7 +778,10 @@ static int reiserfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode
721 struct inode *inode; 778 struct inode *inode;
722 struct reiserfs_transaction_handle th; 779 struct reiserfs_transaction_handle th;
723 struct reiserfs_security_handle security; 780 struct reiserfs_security_handle security;
724 /* We need blocks for transaction + (user+group)*(quotas for new inode + update of quota for directory owner) */ 781 /*
782 * We need blocks for transaction + (user+group)*(quotas
783 * for new inode + update of quota for directory owner)
784 */
725 int jbegin_count = 785 int jbegin_count =
726 JOURNAL_PER_BALANCE_CNT * 3 + 786 JOURNAL_PER_BALANCE_CNT * 3 +
727 2 * (REISERFS_QUOTA_INIT_BLOCKS(dir->i_sb) + 787 2 * (REISERFS_QUOTA_INIT_BLOCKS(dir->i_sb) +
@@ -730,7 +790,10 @@ static int reiserfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode
730 dquot_initialize(dir); 790 dquot_initialize(dir);
731 791
732#ifdef DISPLACE_NEW_PACKING_LOCALITIES 792#ifdef DISPLACE_NEW_PACKING_LOCALITIES
733 /* set flag that new packing locality created and new blocks for the content * of that directory are not displaced yet */ 793 /*
794 * set flag that new packing locality created and new blocks
795 * for the content of that directory are not displaced yet
796 */
734 REISERFS_I(dir)->new_packing_locality = 1; 797 REISERFS_I(dir)->new_packing_locality = 1;
735#endif 798#endif
736 mode = S_IFDIR | mode; 799 mode = S_IFDIR | mode;
@@ -754,8 +817,9 @@ static int reiserfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode
754 goto out_failed; 817 goto out_failed;
755 } 818 }
756 819
757 /* inc the link count now, so another writer doesn't overflow it while 820 /*
758 ** we sleep later on. 821 * inc the link count now, so another writer doesn't overflow
822 * it while we sleep later on.
759 */ 823 */
760 INC_DIR_INODE_NLINK(dir) 824 INC_DIR_INODE_NLINK(dir)
761 825
@@ -774,7 +838,7 @@ static int reiserfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode
774 inode->i_op = &reiserfs_dir_inode_operations; 838 inode->i_op = &reiserfs_dir_inode_operations;
775 inode->i_fop = &reiserfs_dir_operations; 839 inode->i_fop = &reiserfs_dir_operations;
776 840
777 // note, _this_ add_entry will not update dir's stat data 841 /* note, _this_ add_entry will not update dir's stat data */
778 retval = 842 retval =
779 reiserfs_add_entry(&th, dir, dentry->d_name.name, 843 reiserfs_add_entry(&th, dir, dentry->d_name.name,
780 dentry->d_name.len, inode, 1 /*visible */ ); 844 dentry->d_name.len, inode, 1 /*visible */ );
@@ -783,19 +847,19 @@ static int reiserfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode
783 clear_nlink(inode); 847 clear_nlink(inode);
784 DEC_DIR_INODE_NLINK(dir); 848 DEC_DIR_INODE_NLINK(dir);
785 reiserfs_update_sd(&th, inode); 849 reiserfs_update_sd(&th, inode);
786 err = journal_end(&th, dir->i_sb, jbegin_count); 850 err = journal_end(&th);
787 if (err) 851 if (err)
788 retval = err; 852 retval = err;
789 unlock_new_inode(inode); 853 unlock_new_inode(inode);
790 iput(inode); 854 iput(inode);
791 goto out_failed; 855 goto out_failed;
792 } 856 }
793 // the above add_entry did not update dir's stat data 857 /* the above add_entry did not update dir's stat data */
794 reiserfs_update_sd(&th, dir); 858 reiserfs_update_sd(&th, dir);
795 859
796 unlock_new_inode(inode); 860 unlock_new_inode(inode);
797 d_instantiate(dentry, inode); 861 d_instantiate(dentry, inode);
798 retval = journal_end(&th, dir->i_sb, jbegin_count); 862 retval = journal_end(&th);
799out_failed: 863out_failed:
800 reiserfs_write_unlock(dir->i_sb); 864 reiserfs_write_unlock(dir->i_sb);
801 return retval; 865 return retval;
@@ -803,10 +867,11 @@ out_failed:
803 867
804static inline int reiserfs_empty_dir(struct inode *inode) 868static inline int reiserfs_empty_dir(struct inode *inode)
805{ 869{
806 /* we can cheat because an old format dir cannot have 870 /*
807 ** EMPTY_DIR_SIZE, and a new format dir cannot have 871 * we can cheat because an old format dir cannot have
808 ** EMPTY_DIR_SIZE_V1. So, if the inode is either size, 872 * EMPTY_DIR_SIZE, and a new format dir cannot have
809 ** regardless of disk format version, the directory is empty. 873 * EMPTY_DIR_SIZE_V1. So, if the inode is either size,
874 * regardless of disk format version, the directory is empty.
810 */ 875 */
811 if (inode->i_size != EMPTY_DIR_SIZE && 876 if (inode->i_size != EMPTY_DIR_SIZE &&
812 inode->i_size != EMPTY_DIR_SIZE_V1) { 877 inode->i_size != EMPTY_DIR_SIZE_V1) {
@@ -824,10 +889,12 @@ static int reiserfs_rmdir(struct inode *dir, struct dentry *dentry)
824 INITIALIZE_PATH(path); 889 INITIALIZE_PATH(path);
825 struct reiserfs_dir_entry de; 890 struct reiserfs_dir_entry de;
826 891
827 /* we will be doing 2 balancings and update 2 stat data, we change quotas 892 /*
828 * of the owner of the directory and of the owner of the parent directory. 893 * we will be doing 2 balancings and update 2 stat data, we
829 * The quota structure is possibly deleted only on last iput => outside 894 * change quotas of the owner of the directory and of the owner
830 * of this transaction */ 895 * of the parent directory. The quota structure is possibly
896 * deleted only on last iput => outside of this transaction
897 */
831 jbegin_count = 898 jbegin_count =
832 JOURNAL_PER_BALANCE_CNT * 2 + 2 + 899 JOURNAL_PER_BALANCE_CNT * 2 + 2 +
833 4 * REISERFS_QUOTA_TRANS_BLOCKS(dir->i_sb); 900 4 * REISERFS_QUOTA_TRANS_BLOCKS(dir->i_sb);
@@ -856,8 +923,9 @@ static int reiserfs_rmdir(struct inode *dir, struct dentry *dentry)
856 reiserfs_update_inode_transaction(dir); 923 reiserfs_update_inode_transaction(dir);
857 924
858 if (de.de_objectid != inode->i_ino) { 925 if (de.de_objectid != inode->i_ino) {
859 // FIXME: compare key of an object and a key found in the 926 /*
860 // entry 927 * FIXME: compare key of an object and a key found in the entry
928 */
861 retval = -EIO; 929 retval = -EIO;
862 goto end_rmdir; 930 goto end_rmdir;
863 } 931 }
@@ -867,7 +935,8 @@ static int reiserfs_rmdir(struct inode *dir, struct dentry *dentry)
867 } 935 }
868 936
869 /* cut entry from dir directory */ 937 /* cut entry from dir directory */
870 retval = reiserfs_cut_from_item(&th, &path, &(de.de_entry_key), dir, NULL, /* page */ 938 retval = reiserfs_cut_from_item(&th, &path, &de.de_entry_key,
939 dir, NULL, /* page */
871 0 /*new file size - not used here */ ); 940 0 /*new file size - not used here */ );
872 if (retval < 0) 941 if (retval < 0)
873 goto end_rmdir; 942 goto end_rmdir;
@@ -888,18 +957,20 @@ static int reiserfs_rmdir(struct inode *dir, struct dentry *dentry)
888 /* prevent empty directory from getting lost */ 957 /* prevent empty directory from getting lost */
889 add_save_link(&th, inode, 0 /* not truncate */ ); 958 add_save_link(&th, inode, 0 /* not truncate */ );
890 959
891 retval = journal_end(&th, dir->i_sb, jbegin_count); 960 retval = journal_end(&th);
892 reiserfs_check_path(&path); 961 reiserfs_check_path(&path);
893 out_rmdir: 962out_rmdir:
894 reiserfs_write_unlock(dir->i_sb); 963 reiserfs_write_unlock(dir->i_sb);
895 return retval; 964 return retval;
896 965
897 end_rmdir: 966end_rmdir:
898 /* we must release path, because we did not call 967 /*
899 reiserfs_cut_from_item, or reiserfs_cut_from_item does not 968 * we must release path, because we did not call
900 release path if operation was not complete */ 969 * reiserfs_cut_from_item, or reiserfs_cut_from_item does not
970 * release path if operation was not complete
971 */
901 pathrelse(&path); 972 pathrelse(&path);
902 err = journal_end(&th, dir->i_sb, jbegin_count); 973 err = journal_end(&th);
903 reiserfs_write_unlock(dir->i_sb); 974 reiserfs_write_unlock(dir->i_sb);
904 return err ? err : retval; 975 return err ? err : retval;
905} 976}
@@ -918,10 +989,13 @@ static int reiserfs_unlink(struct inode *dir, struct dentry *dentry)
918 989
919 inode = dentry->d_inode; 990 inode = dentry->d_inode;
920 991
921 /* in this transaction we can be doing at max two balancings and update 992 /*
922 * two stat datas, we change quotas of the owner of the directory and of 993 * in this transaction we can be doing at max two balancings and
923 * the owner of the parent directory. The quota structure is possibly 994 * update two stat datas, we change quotas of the owner of the
924 * deleted only on iput => outside of this transaction */ 995 * directory and of the owner of the parent directory. The quota
996 * structure is possibly deleted only on iput => outside of
997 * this transaction
998 */
925 jbegin_count = 999 jbegin_count =
926 JOURNAL_PER_BALANCE_CNT * 2 + 2 + 1000 JOURNAL_PER_BALANCE_CNT * 2 + 2 +
927 4 * REISERFS_QUOTA_TRANS_BLOCKS(dir->i_sb); 1001 4 * REISERFS_QUOTA_TRANS_BLOCKS(dir->i_sb);
@@ -946,8 +1020,9 @@ static int reiserfs_unlink(struct inode *dir, struct dentry *dentry)
946 reiserfs_update_inode_transaction(dir); 1020 reiserfs_update_inode_transaction(dir);
947 1021
948 if (de.de_objectid != inode->i_ino) { 1022 if (de.de_objectid != inode->i_ino) {
949 // FIXME: compare key of an object and a key found in the 1023 /*
950 // entry 1024 * FIXME: compare key of an object and a key found in the entry
1025 */
951 retval = -EIO; 1026 retval = -EIO;
952 goto end_unlink; 1027 goto end_unlink;
953 } 1028 }
@@ -968,7 +1043,7 @@ static int reiserfs_unlink(struct inode *dir, struct dentry *dentry)
968 savelink = inode->i_nlink; 1043 savelink = inode->i_nlink;
969 1044
970 retval = 1045 retval =
971 reiserfs_cut_from_item(&th, &path, &(de.de_entry_key), dir, NULL, 1046 reiserfs_cut_from_item(&th, &path, &de.de_entry_key, dir, NULL,
972 0); 1047 0);
973 if (retval < 0) { 1048 if (retval < 0) {
974 inc_nlink(inode); 1049 inc_nlink(inode);
@@ -985,18 +1060,18 @@ static int reiserfs_unlink(struct inode *dir, struct dentry *dentry)
985 /* prevent file from getting lost */ 1060 /* prevent file from getting lost */
986 add_save_link(&th, inode, 0 /* not truncate */ ); 1061 add_save_link(&th, inode, 0 /* not truncate */ );
987 1062
988 retval = journal_end(&th, dir->i_sb, jbegin_count); 1063 retval = journal_end(&th);
989 reiserfs_check_path(&path); 1064 reiserfs_check_path(&path);
990 reiserfs_write_unlock(dir->i_sb); 1065 reiserfs_write_unlock(dir->i_sb);
991 return retval; 1066 return retval;
992 1067
993 end_unlink: 1068end_unlink:
994 pathrelse(&path); 1069 pathrelse(&path);
995 err = journal_end(&th, dir->i_sb, jbegin_count); 1070 err = journal_end(&th);
996 reiserfs_check_path(&path); 1071 reiserfs_check_path(&path);
997 if (err) 1072 if (err)
998 retval = err; 1073 retval = err;
999 out_unlink: 1074out_unlink:
1000 reiserfs_write_unlock(dir->i_sb); 1075 reiserfs_write_unlock(dir->i_sb);
1001 return retval; 1076 return retval;
1002} 1077}
@@ -1011,7 +1086,10 @@ static int reiserfs_symlink(struct inode *parent_dir,
1011 struct reiserfs_transaction_handle th; 1086 struct reiserfs_transaction_handle th;
1012 struct reiserfs_security_handle security; 1087 struct reiserfs_security_handle security;
1013 int mode = S_IFLNK | S_IRWXUGO; 1088 int mode = S_IFLNK | S_IRWXUGO;
1014 /* We need blocks for transaction + (user+group)*(quotas for new inode + update of quota for directory owner) */ 1089 /*
1090 * We need blocks for transaction + (user+group)*(quotas for
1091 * new inode + update of quota for directory owner)
1092 */
1015 int jbegin_count = 1093 int jbegin_count =
1016 JOURNAL_PER_BALANCE_CNT * 3 + 1094 JOURNAL_PER_BALANCE_CNT * 3 +
1017 2 * (REISERFS_QUOTA_INIT_BLOCKS(parent_dir->i_sb) + 1095 2 * (REISERFS_QUOTA_INIT_BLOCKS(parent_dir->i_sb) +
@@ -1070,17 +1148,13 @@ static int reiserfs_symlink(struct inode *parent_dir,
1070 inode->i_op = &reiserfs_symlink_inode_operations; 1148 inode->i_op = &reiserfs_symlink_inode_operations;
1071 inode->i_mapping->a_ops = &reiserfs_address_space_operations; 1149 inode->i_mapping->a_ops = &reiserfs_address_space_operations;
1072 1150
1073 // must be sure this inode is written with this transaction
1074 //
1075 //reiserfs_update_sd (&th, inode, READ_BLOCKS);
1076
1077 retval = reiserfs_add_entry(&th, parent_dir, dentry->d_name.name, 1151 retval = reiserfs_add_entry(&th, parent_dir, dentry->d_name.name,
1078 dentry->d_name.len, inode, 1 /*visible */ ); 1152 dentry->d_name.len, inode, 1 /*visible */ );
1079 if (retval) { 1153 if (retval) {
1080 int err; 1154 int err;
1081 drop_nlink(inode); 1155 drop_nlink(inode);
1082 reiserfs_update_sd(&th, inode); 1156 reiserfs_update_sd(&th, inode);
1083 err = journal_end(&th, parent_dir->i_sb, jbegin_count); 1157 err = journal_end(&th);
1084 if (err) 1158 if (err)
1085 retval = err; 1159 retval = err;
1086 unlock_new_inode(inode); 1160 unlock_new_inode(inode);
@@ -1090,8 +1164,8 @@ static int reiserfs_symlink(struct inode *parent_dir,
1090 1164
1091 unlock_new_inode(inode); 1165 unlock_new_inode(inode);
1092 d_instantiate(dentry, inode); 1166 d_instantiate(dentry, inode);
1093 retval = journal_end(&th, parent_dir->i_sb, jbegin_count); 1167 retval = journal_end(&th);
1094 out_failed: 1168out_failed:
1095 reiserfs_write_unlock(parent_dir->i_sb); 1169 reiserfs_write_unlock(parent_dir->i_sb);
1096 return retval; 1170 return retval;
1097} 1171}
@@ -1102,7 +1176,10 @@ static int reiserfs_link(struct dentry *old_dentry, struct inode *dir,
1102 int retval; 1176 int retval;
1103 struct inode *inode = old_dentry->d_inode; 1177 struct inode *inode = old_dentry->d_inode;
1104 struct reiserfs_transaction_handle th; 1178 struct reiserfs_transaction_handle th;
1105 /* We need blocks for transaction + update of quotas for the owners of the directory */ 1179 /*
1180 * We need blocks for transaction + update of quotas for
1181 * the owners of the directory
1182 */
1106 int jbegin_count = 1183 int jbegin_count =
1107 JOURNAL_PER_BALANCE_CNT * 3 + 1184 JOURNAL_PER_BALANCE_CNT * 3 +
1108 2 * REISERFS_QUOTA_TRANS_BLOCKS(dir->i_sb); 1185 2 * REISERFS_QUOTA_TRANS_BLOCKS(dir->i_sb);
@@ -1111,7 +1188,7 @@ static int reiserfs_link(struct dentry *old_dentry, struct inode *dir,
1111 1188
1112 reiserfs_write_lock(dir->i_sb); 1189 reiserfs_write_lock(dir->i_sb);
1113 if (inode->i_nlink >= REISERFS_LINK_MAX) { 1190 if (inode->i_nlink >= REISERFS_LINK_MAX) {
1114 //FIXME: sd_nlink is 32 bit for new files 1191 /* FIXME: sd_nlink is 32 bit for new files */
1115 reiserfs_write_unlock(dir->i_sb); 1192 reiserfs_write_unlock(dir->i_sb);
1116 return -EMLINK; 1193 return -EMLINK;
1117 } 1194 }
@@ -1137,7 +1214,7 @@ static int reiserfs_link(struct dentry *old_dentry, struct inode *dir,
1137 if (retval) { 1214 if (retval) {
1138 int err; 1215 int err;
1139 drop_nlink(inode); 1216 drop_nlink(inode);
1140 err = journal_end(&th, dir->i_sb, jbegin_count); 1217 err = journal_end(&th);
1141 reiserfs_write_unlock(dir->i_sb); 1218 reiserfs_write_unlock(dir->i_sb);
1142 return err ? err : retval; 1219 return err ? err : retval;
1143 } 1220 }
@@ -1147,7 +1224,7 @@ static int reiserfs_link(struct dentry *old_dentry, struct inode *dir,
1147 1224
1148 ihold(inode); 1225 ihold(inode);
1149 d_instantiate(dentry, inode); 1226 d_instantiate(dentry, inode);
1150 retval = journal_end(&th, dir->i_sb, jbegin_count); 1227 retval = journal_end(&th);
1151 reiserfs_write_unlock(dir->i_sb); 1228 reiserfs_write_unlock(dir->i_sb);
1152 return retval; 1229 return retval;
1153} 1230}
@@ -1158,9 +1235,9 @@ static int de_still_valid(const char *name, int len,
1158{ 1235{
1159 struct reiserfs_dir_entry tmp = *de; 1236 struct reiserfs_dir_entry tmp = *de;
1160 1237
1161 // recalculate pointer to name and name length 1238 /* recalculate pointer to name and name length */
1162 set_de_name_and_namelen(&tmp); 1239 set_de_name_and_namelen(&tmp);
1163 // FIXME: could check more 1240 /* FIXME: could check more */
1164 if (tmp.de_namelen != len || memcmp(name, de->de_name, len)) 1241 if (tmp.de_namelen != len || memcmp(name, de->de_name, len))
1165 return 0; 1242 return 0;
1166 return 1; 1243 return 1;
@@ -1217,14 +1294,16 @@ static int reiserfs_rename(struct inode *old_dir, struct dentry *old_dentry,
1217 unsigned long savelink = 1; 1294 unsigned long savelink = 1;
1218 struct timespec ctime; 1295 struct timespec ctime;
1219 1296
1220 /* three balancings: (1) old name removal, (2) new name insertion 1297 /*
1221 and (3) maybe "save" link insertion 1298 * three balancings: (1) old name removal, (2) new name insertion
1222 stat data updates: (1) old directory, 1299 * and (3) maybe "save" link insertion
1223 (2) new directory and (3) maybe old object stat data (when it is 1300 * stat data updates: (1) old directory,
1224 directory) and (4) maybe stat data of object to which new entry 1301 * (2) new directory and (3) maybe old object stat data (when it is
1225 pointed initially and (5) maybe block containing ".." of 1302 * directory) and (4) maybe stat data of object to which new entry
1226 renamed directory 1303 * pointed initially and (5) maybe block containing ".." of
1227 quota updates: two parent directories */ 1304 * renamed directory
1305 * quota updates: two parent directories
1306 */
1228 jbegin_count = 1307 jbegin_count =
1229 JOURNAL_PER_BALANCE_CNT * 3 + 5 + 1308 JOURNAL_PER_BALANCE_CNT * 3 + 5 +
1230 4 * REISERFS_QUOTA_TRANS_BLOCKS(old_dir->i_sb); 1309 4 * REISERFS_QUOTA_TRANS_BLOCKS(old_dir->i_sb);
@@ -1235,8 +1314,10 @@ static int reiserfs_rename(struct inode *old_dir, struct dentry *old_dentry,
1235 old_inode = old_dentry->d_inode; 1314 old_inode = old_dentry->d_inode;
1236 new_dentry_inode = new_dentry->d_inode; 1315 new_dentry_inode = new_dentry->d_inode;
1237 1316
1238 // make sure, that oldname still exists and points to an object we 1317 /*
1239 // are going to rename 1318 * make sure that oldname still exists and points to an object we
1319 * are going to rename
1320 */
1240 old_de.de_gen_number_bit_string = NULL; 1321 old_de.de_gen_number_bit_string = NULL;
1241 reiserfs_write_lock(old_dir->i_sb); 1322 reiserfs_write_lock(old_dir->i_sb);
1242 retval = 1323 retval =
@@ -1256,10 +1337,11 @@ static int reiserfs_rename(struct inode *old_dir, struct dentry *old_dentry,
1256 1337
1257 old_inode_mode = old_inode->i_mode; 1338 old_inode_mode = old_inode->i_mode;
1258 if (S_ISDIR(old_inode_mode)) { 1339 if (S_ISDIR(old_inode_mode)) {
1259 // make sure, that directory being renamed has correct ".." 1340 /*
1260 // and that its new parent directory has not too many links 1341 * make sure that directory being renamed has correct ".."
1261 // already 1342 * and that its new parent directory has not too many links
1262 1343 * already
1344 */
1263 if (new_dentry_inode) { 1345 if (new_dentry_inode) {
1264 if (!reiserfs_empty_dir(new_dentry_inode)) { 1346 if (!reiserfs_empty_dir(new_dentry_inode)) {
1265 reiserfs_write_unlock(old_dir->i_sb); 1347 reiserfs_write_unlock(old_dir->i_sb);
@@ -1267,8 +1349,9 @@ static int reiserfs_rename(struct inode *old_dir, struct dentry *old_dentry,
1267 } 1349 }
1268 } 1350 }
1269 1351
1270 /* directory is renamed, its parent directory will be changed, 1352 /*
1271 ** so find ".." entry 1353 * directory is renamed, its parent directory will be changed,
1354 * so find ".." entry
1272 */ 1355 */
1273 dot_dot_de.de_gen_number_bit_string = NULL; 1356 dot_dot_de.de_gen_number_bit_string = NULL;
1274 retval = 1357 retval =
@@ -1303,7 +1386,7 @@ static int reiserfs_rename(struct inode *old_dir, struct dentry *old_dentry,
1303 "new entry is found, new inode == 0"); 1386 "new entry is found, new inode == 0");
1304 } 1387 }
1305 } else if (retval) { 1388 } else if (retval) {
1306 int err = journal_end(&th, old_dir->i_sb, jbegin_count); 1389 int err = journal_end(&th);
1307 reiserfs_write_unlock(old_dir->i_sb); 1390 reiserfs_write_unlock(old_dir->i_sb);
1308 return err ? err : retval; 1391 return err ? err : retval;
1309 } 1392 }
@@ -1311,8 +1394,9 @@ static int reiserfs_rename(struct inode *old_dir, struct dentry *old_dentry,
1311 reiserfs_update_inode_transaction(old_dir); 1394 reiserfs_update_inode_transaction(old_dir);
1312 reiserfs_update_inode_transaction(new_dir); 1395 reiserfs_update_inode_transaction(new_dir);
1313 1396
1314 /* this makes it so an fsync on an open fd for the old name will 1397 /*
1315 ** commit the rename operation 1398 * this makes it so an fsync on an open fd for the old name will
1399 * commit the rename operation
1316 */ 1400 */
1317 reiserfs_update_inode_transaction(old_inode); 1401 reiserfs_update_inode_transaction(old_inode);
1318 1402
@@ -1320,38 +1404,45 @@ static int reiserfs_rename(struct inode *old_dir, struct dentry *old_dentry,
1320 reiserfs_update_inode_transaction(new_dentry_inode); 1404 reiserfs_update_inode_transaction(new_dentry_inode);
1321 1405
1322 while (1) { 1406 while (1) {
1323 // look for old name using corresponding entry key (found by reiserfs_find_entry) 1407 /*
1408 * look for old name using corresponding entry key
1409 * (found by reiserfs_find_entry)
1410 */
1324 if ((retval = 1411 if ((retval =
1325 search_by_entry_key(new_dir->i_sb, &old_de.de_entry_key, 1412 search_by_entry_key(new_dir->i_sb, &old_de.de_entry_key,
1326 &old_entry_path, 1413 &old_entry_path,
1327 &old_de)) != NAME_FOUND) { 1414 &old_de)) != NAME_FOUND) {
1328 pathrelse(&old_entry_path); 1415 pathrelse(&old_entry_path);
1329 journal_end(&th, old_dir->i_sb, jbegin_count); 1416 journal_end(&th);
1330 reiserfs_write_unlock(old_dir->i_sb); 1417 reiserfs_write_unlock(old_dir->i_sb);
1331 return -EIO; 1418 return -EIO;
1332 } 1419 }
1333 1420
1334 copy_item_head(&old_entry_ih, get_ih(&old_entry_path)); 1421 copy_item_head(&old_entry_ih, tp_item_head(&old_entry_path));
1335 1422
1336 reiserfs_prepare_for_journal(old_inode->i_sb, old_de.de_bh, 1); 1423 reiserfs_prepare_for_journal(old_inode->i_sb, old_de.de_bh, 1);
1337 1424
1338 // look for new name by reiserfs_find_entry 1425 /* look for new name by reiserfs_find_entry */
1339 new_de.de_gen_number_bit_string = NULL; 1426 new_de.de_gen_number_bit_string = NULL;
1340 retval = 1427 retval =
1341 reiserfs_find_entry(new_dir, new_dentry->d_name.name, 1428 reiserfs_find_entry(new_dir, new_dentry->d_name.name,
1342 new_dentry->d_name.len, &new_entry_path, 1429 new_dentry->d_name.len, &new_entry_path,
1343 &new_de); 1430 &new_de);
1344 // reiserfs_add_entry should not return IO_ERROR, because it is called with essentially same parameters from 1431 /*
1345 // reiserfs_add_entry above, and we'll catch any i/o errors before we get here. 1432 * reiserfs_add_entry should not return IO_ERROR,
1433 * because it is called with essentially same parameters from
1434 * reiserfs_add_entry above, and we'll catch any i/o errors
1435 * before we get here.
1436 */
1346 if (retval != NAME_FOUND_INVISIBLE && retval != NAME_FOUND) { 1437 if (retval != NAME_FOUND_INVISIBLE && retval != NAME_FOUND) {
1347 pathrelse(&new_entry_path); 1438 pathrelse(&new_entry_path);
1348 pathrelse(&old_entry_path); 1439 pathrelse(&old_entry_path);
1349 journal_end(&th, old_dir->i_sb, jbegin_count); 1440 journal_end(&th);
1350 reiserfs_write_unlock(old_dir->i_sb); 1441 reiserfs_write_unlock(old_dir->i_sb);
1351 return -EIO; 1442 return -EIO;
1352 } 1443 }
1353 1444
1354 copy_item_head(&new_entry_ih, get_ih(&new_entry_path)); 1445 copy_item_head(&new_entry_ih, tp_item_head(&new_entry_path));
1355 1446
1356 reiserfs_prepare_for_journal(old_inode->i_sb, new_de.de_bh, 1); 1447 reiserfs_prepare_for_journal(old_inode->i_sb, new_de.de_bh, 1);
1357 1448
@@ -1364,28 +1455,32 @@ static int reiserfs_rename(struct inode *old_dir, struct dentry *old_dentry,
1364 pathrelse(&dot_dot_entry_path); 1455 pathrelse(&dot_dot_entry_path);
1365 pathrelse(&new_entry_path); 1456 pathrelse(&new_entry_path);
1366 pathrelse(&old_entry_path); 1457 pathrelse(&old_entry_path);
1367 journal_end(&th, old_dir->i_sb, jbegin_count); 1458 journal_end(&th);
1368 reiserfs_write_unlock(old_dir->i_sb); 1459 reiserfs_write_unlock(old_dir->i_sb);
1369 return -EIO; 1460 return -EIO;
1370 } 1461 }
1371 copy_item_head(&dot_dot_ih, 1462 copy_item_head(&dot_dot_ih,
1372 get_ih(&dot_dot_entry_path)); 1463 tp_item_head(&dot_dot_entry_path));
1373 // node containing ".." gets into transaction 1464 /* node containing ".." gets into transaction */
1374 reiserfs_prepare_for_journal(old_inode->i_sb, 1465 reiserfs_prepare_for_journal(old_inode->i_sb,
1375 dot_dot_de.de_bh, 1); 1466 dot_dot_de.de_bh, 1);
1376 } 1467 }
1377 /* we should check seals here, not do 1468 /*
1378 this stuff, yes? Then, having 1469 * we should check seals here, not do
1379 gathered everything into RAM we 1470 * this stuff, yes? Then, having
1380 should lock the buffers, yes? -Hans */ 1471 * gathered everything into RAM we
1381 /* probably. our rename needs to hold more 1472 * should lock the buffers, yes? -Hans
1382 ** than one path at once. The seals would 1473 */
1383 ** have to be written to deal with multi-path 1474 /*
1384 ** issues -chris 1475 * probably. our rename needs to hold more
1476 * than one path at once. The seals would
1477 * have to be written to deal with multi-path
1478 * issues -chris
1385 */ 1479 */
1386 /* sanity checking before doing the rename - avoid races many 1480 /*
1387 ** of the above checks could have scheduled. We have to be 1481 * sanity checking before doing the rename - avoid races many
1388 ** sure our items haven't been shifted by another process. 1482 * of the above checks could have scheduled. We have to be
1483 * sure our items haven't been shifted by another process.
1389 */ 1484 */
1390 if (item_moved(&new_entry_ih, &new_entry_path) || 1485 if (item_moved(&new_entry_ih, &new_entry_path) ||
1391 !entry_points_to_object(new_dentry->d_name.name, 1486 !entry_points_to_object(new_dentry->d_name.name,
@@ -1430,24 +1525,28 @@ static int reiserfs_rename(struct inode *old_dir, struct dentry *old_dentry,
1430 break; 1525 break;
1431 } 1526 }
1432 1527
1433 /* ok, all the changes can be done in one fell swoop when we 1528 /*
1434 have claimed all the buffers needed. */ 1529 * ok, all the changes can be done in one fell swoop when we
1530 * have claimed all the buffers needed.
1531 */
1435 1532
1436 mark_de_visible(new_de.de_deh + new_de.de_entry_num); 1533 mark_de_visible(new_de.de_deh + new_de.de_entry_num);
1437 set_ino_in_dir_entry(&new_de, INODE_PKEY(old_inode)); 1534 set_ino_in_dir_entry(&new_de, INODE_PKEY(old_inode));
1438 journal_mark_dirty(&th, old_dir->i_sb, new_de.de_bh); 1535 journal_mark_dirty(&th, new_de.de_bh);
1439 1536
1440 mark_de_hidden(old_de.de_deh + old_de.de_entry_num); 1537 mark_de_hidden(old_de.de_deh + old_de.de_entry_num);
1441 journal_mark_dirty(&th, old_dir->i_sb, old_de.de_bh); 1538 journal_mark_dirty(&th, old_de.de_bh);
1442 ctime = CURRENT_TIME_SEC; 1539 ctime = CURRENT_TIME_SEC;
1443 old_dir->i_ctime = old_dir->i_mtime = ctime; 1540 old_dir->i_ctime = old_dir->i_mtime = ctime;
1444 new_dir->i_ctime = new_dir->i_mtime = ctime; 1541 new_dir->i_ctime = new_dir->i_mtime = ctime;
1445 /* thanks to Alex Adriaanse <alex_a@caltech.edu> for patch which adds ctime update of 1542 /*
1446 renamed object */ 1543 * thanks to Alex Adriaanse <alex_a@caltech.edu> for patch
1544 * which adds ctime update of renamed object
1545 */
1447 old_inode->i_ctime = ctime; 1546 old_inode->i_ctime = ctime;
1448 1547
1449 if (new_dentry_inode) { 1548 if (new_dentry_inode) {
1450 // adjust link number of the victim 1549 /* adjust link number of the victim */
1451 if (S_ISDIR(new_dentry_inode->i_mode)) { 1550 if (S_ISDIR(new_dentry_inode->i_mode)) {
1452 clear_nlink(new_dentry_inode); 1551 clear_nlink(new_dentry_inode);
1453 } else { 1552 } else {
@@ -1460,25 +1559,32 @@ static int reiserfs_rename(struct inode *old_dir, struct dentry *old_dentry,
1460 if (S_ISDIR(old_inode_mode)) { 1559 if (S_ISDIR(old_inode_mode)) {
1461 /* adjust ".." of renamed directory */ 1560 /* adjust ".." of renamed directory */
1462 set_ino_in_dir_entry(&dot_dot_de, INODE_PKEY(new_dir)); 1561 set_ino_in_dir_entry(&dot_dot_de, INODE_PKEY(new_dir));
1463 journal_mark_dirty(&th, new_dir->i_sb, dot_dot_de.de_bh); 1562 journal_mark_dirty(&th, dot_dot_de.de_bh);
1464 1563
1564 /*
1565 * there (in new_dir) was no directory, so it got new link
1566 * (".." of renamed directory)
1567 */
1465 if (!new_dentry_inode) 1568 if (!new_dentry_inode)
1466 /* there (in new_dir) was no directory, so it got new link
1467 (".." of renamed directory) */
1468 INC_DIR_INODE_NLINK(new_dir); 1569 INC_DIR_INODE_NLINK(new_dir);
1469 1570
1470 /* old directory lost one link - ".. " of renamed directory */ 1571 /* old directory lost one link - ".. " of renamed directory */
1471 DEC_DIR_INODE_NLINK(old_dir); 1572 DEC_DIR_INODE_NLINK(old_dir);
1472 } 1573 }
1473 // looks like in 2.3.99pre3 brelse is atomic. so we can use pathrelse 1574 /*
1575 * looks like in 2.3.99pre3 brelse is atomic.
1576 * so we can use pathrelse
1577 */
1474 pathrelse(&new_entry_path); 1578 pathrelse(&new_entry_path);
1475 pathrelse(&dot_dot_entry_path); 1579 pathrelse(&dot_dot_entry_path);
1476 1580
1477 // FIXME: this reiserfs_cut_from_item's return value may screw up 1581 /*
1478 // anybody, but it will panic if will not be able to find the 1582 * FIXME: this reiserfs_cut_from_item's return value may screw up
1479 // entry. This needs one more clean up 1583 * anybody, but it will panic if will not be able to find the
1584 * entry. This needs one more clean up
1585 */
1480 if (reiserfs_cut_from_item 1586 if (reiserfs_cut_from_item
1481 (&th, &old_entry_path, &(old_de.de_entry_key), old_dir, NULL, 1587 (&th, &old_entry_path, &old_de.de_entry_key, old_dir, NULL,
1482 0) < 0) 1588 0) < 0)
1483 reiserfs_error(old_dir->i_sb, "vs-7060", 1589 reiserfs_error(old_dir->i_sb, "vs-7060",
1484 "couldn't not cut old name. Fsck later?"); 1590 "couldn't not cut old name. Fsck later?");
@@ -1496,16 +1602,13 @@ static int reiserfs_rename(struct inode *old_dir, struct dentry *old_dentry,
1496 reiserfs_update_sd(&th, new_dentry_inode); 1602 reiserfs_update_sd(&th, new_dentry_inode);
1497 } 1603 }
1498 1604
1499 retval = journal_end(&th, old_dir->i_sb, jbegin_count); 1605 retval = journal_end(&th);
1500 reiserfs_write_unlock(old_dir->i_sb); 1606 reiserfs_write_unlock(old_dir->i_sb);
1501 return retval; 1607 return retval;
1502} 1608}
1503 1609
1504/* 1610/* directories can handle most operations... */
1505 * directories can handle most operations...
1506 */
1507const struct inode_operations reiserfs_dir_inode_operations = { 1611const struct inode_operations reiserfs_dir_inode_operations = {
1508 //&reiserfs_dir_operations, /* default_file_ops */
1509 .create = reiserfs_create, 1612 .create = reiserfs_create,
1510 .lookup = reiserfs_lookup, 1613 .lookup = reiserfs_lookup,
1511 .link = reiserfs_link, 1614 .link = reiserfs_link,
diff --git a/fs/reiserfs/objectid.c b/fs/reiserfs/objectid.c
index f732d6a5251d..99a5d5dae46a 100644
--- a/fs/reiserfs/objectid.c
+++ b/fs/reiserfs/objectid.c
@@ -7,7 +7,7 @@
7#include <linux/time.h> 7#include <linux/time.h>
8#include "reiserfs.h" 8#include "reiserfs.h"
9 9
10// find where objectid map starts 10/* find where objectid map starts */
11#define objectid_map(s,rs) (old_format_only (s) ? \ 11#define objectid_map(s,rs) (old_format_only (s) ? \
12 (__le32 *)((struct reiserfs_super_block_v1 *)(rs) + 1) :\ 12 (__le32 *)((struct reiserfs_super_block_v1 *)(rs) + 1) :\
13 (__le32 *)((rs) + 1)) 13 (__le32 *)((rs) + 1))
@@ -20,7 +20,7 @@ static void check_objectid_map(struct super_block *s, __le32 * map)
20 reiserfs_panic(s, "vs-15010", "map corrupted: %lx", 20 reiserfs_panic(s, "vs-15010", "map corrupted: %lx",
21 (long unsigned int)le32_to_cpu(map[0])); 21 (long unsigned int)le32_to_cpu(map[0]));
22 22
23 // FIXME: add something else here 23 /* FIXME: add something else here */
24} 24}
25 25
26#else 26#else
@@ -29,19 +29,21 @@ static void check_objectid_map(struct super_block *s, __le32 * map)
29} 29}
30#endif 30#endif
31 31
32/* When we allocate objectids we allocate the first unused objectid. 32/*
33 Each sequence of objectids in use (the odd sequences) is followed 33 * When we allocate objectids we allocate the first unused objectid.
34 by a sequence of objectids not in use (the even sequences). We 34 * Each sequence of objectids in use (the odd sequences) is followed
35 only need to record the last objectid in each of these sequences 35 * by a sequence of objectids not in use (the even sequences). We
36 (both the odd and even sequences) in order to fully define the 36 * only need to record the last objectid in each of these sequences
37 boundaries of the sequences. A consequence of allocating the first 37 * (both the odd and even sequences) in order to fully define the
38 objectid not in use is that under most conditions this scheme is 38 * boundaries of the sequences. A consequence of allocating the first
39 extremely compact. The exception is immediately after a sequence 39 * objectid not in use is that under most conditions this scheme is
40 of operations which deletes a large number of objects of 40 * extremely compact. The exception is immediately after a sequence
41 non-sequential objectids, and even then it will become compact 41 * of operations which deletes a large number of objects of
42 again as soon as more objects are created. Note that many 42 * non-sequential objectids, and even then it will become compact
43 interesting optimizations of layout could result from complicating 43 * again as soon as more objects are created. Note that many
44 objectid assignment, but we have deferred making them for now. */ 44 * interesting optimizations of layout could result from complicating
45 * objectid assignment, but we have deferred making them for now.
46 */
45 47
46/* get unique object identifier */ 48/* get unique object identifier */
47__u32 reiserfs_get_unused_objectid(struct reiserfs_transaction_handle *th) 49__u32 reiserfs_get_unused_objectid(struct reiserfs_transaction_handle *th)
@@ -64,26 +66,30 @@ __u32 reiserfs_get_unused_objectid(struct reiserfs_transaction_handle *th)
64 return 0; 66 return 0;
65 } 67 }
66 68
67 /* This incrementation allocates the first unused objectid. That 69 /*
68 is to say, the first entry on the objectid map is the first 70 * This incrementation allocates the first unused objectid. That
69 unused objectid, and by incrementing it we use it. See below 71 * is to say, the first entry on the objectid map is the first
70 where we check to see if we eliminated a sequence of unused 72 * unused objectid, and by incrementing it we use it. See below
71 objectids.... */ 73 * where we check to see if we eliminated a sequence of unused
74 * objectids....
75 */
72 map[1] = cpu_to_le32(unused_objectid + 1); 76 map[1] = cpu_to_le32(unused_objectid + 1);
73 77
74 /* Now we check to see if we eliminated the last remaining member of 78 /*
75 the first even sequence (and can eliminate the sequence by 79 * Now we check to see if we eliminated the last remaining member of
76 eliminating its last objectid from oids), and can collapse the 80 * the first even sequence (and can eliminate the sequence by
77 first two odd sequences into one sequence. If so, then the net 81 * eliminating its last objectid from oids), and can collapse the
78 result is to eliminate a pair of objectids from oids. We do this 82 * first two odd sequences into one sequence. If so, then the net
79 by shifting the entire map to the left. */ 83 * result is to eliminate a pair of objectids from oids. We do this
84 * by shifting the entire map to the left.
85 */
80 if (sb_oid_cursize(rs) > 2 && map[1] == map[2]) { 86 if (sb_oid_cursize(rs) > 2 && map[1] == map[2]) {
81 memmove(map + 1, map + 3, 87 memmove(map + 1, map + 3,
82 (sb_oid_cursize(rs) - 3) * sizeof(__u32)); 88 (sb_oid_cursize(rs) - 3) * sizeof(__u32));
83 set_sb_oid_cursize(rs, sb_oid_cursize(rs) - 2); 89 set_sb_oid_cursize(rs, sb_oid_cursize(rs) - 2);
84 } 90 }
85 91
86 journal_mark_dirty(th, s, SB_BUFFER_WITH_SB(s)); 92 journal_mark_dirty(th, SB_BUFFER_WITH_SB(s));
87 return unused_objectid; 93 return unused_objectid;
88} 94}
89 95
@@ -97,30 +103,33 @@ void reiserfs_release_objectid(struct reiserfs_transaction_handle *th,
97 int i = 0; 103 int i = 0;
98 104
99 BUG_ON(!th->t_trans_id); 105 BUG_ON(!th->t_trans_id);
100 //return; 106 /*return; */
101 check_objectid_map(s, map); 107 check_objectid_map(s, map);
102 108
103 reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s), 1); 109 reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s), 1);
104 journal_mark_dirty(th, s, SB_BUFFER_WITH_SB(s)); 110 journal_mark_dirty(th, SB_BUFFER_WITH_SB(s));
105 111
106 /* start at the beginning of the objectid map (i = 0) and go to 112 /*
107 the end of it (i = disk_sb->s_oid_cursize). Linear search is 113 * start at the beginning of the objectid map (i = 0) and go to
108 what we use, though it is possible that binary search would be 114 * the end of it (i = disk_sb->s_oid_cursize). Linear search is
109 more efficient after performing lots of deletions (which is 115 * what we use, though it is possible that binary search would be
110 when oids is large.) We only check even i's. */ 116 * more efficient after performing lots of deletions (which is
117 * when oids is large.) We only check even i's.
118 */
111 while (i < sb_oid_cursize(rs)) { 119 while (i < sb_oid_cursize(rs)) {
112 if (objectid_to_release == le32_to_cpu(map[i])) { 120 if (objectid_to_release == le32_to_cpu(map[i])) {
113 /* This incrementation unallocates the objectid. */ 121 /* This incrementation unallocates the objectid. */
114 //map[i]++;
115 le32_add_cpu(&map[i], 1); 122 le32_add_cpu(&map[i], 1);
116 123
117 /* Did we unallocate the last member of an odd sequence, and can shrink oids? */ 124 /*
125 * Did we unallocate the last member of an
126 * odd sequence, and can shrink oids?
127 */
118 if (map[i] == map[i + 1]) { 128 if (map[i] == map[i + 1]) {
119 /* shrink objectid map */ 129 /* shrink objectid map */
120 memmove(map + i, map + i + 2, 130 memmove(map + i, map + i + 2,
121 (sb_oid_cursize(rs) - i - 131 (sb_oid_cursize(rs) - i -
122 2) * sizeof(__u32)); 132 2) * sizeof(__u32));
123 //disk_sb->s_oid_cursize -= 2;
124 set_sb_oid_cursize(rs, sb_oid_cursize(rs) - 2); 133 set_sb_oid_cursize(rs, sb_oid_cursize(rs) - 2);
125 134
126 RFALSE(sb_oid_cursize(rs) < 2 || 135 RFALSE(sb_oid_cursize(rs) < 2 ||
@@ -135,14 +144,19 @@ void reiserfs_release_objectid(struct reiserfs_transaction_handle *th,
135 objectid_to_release < le32_to_cpu(map[i + 1])) { 144 objectid_to_release < le32_to_cpu(map[i + 1])) {
136 /* size of objectid map is not changed */ 145 /* size of objectid map is not changed */
137 if (objectid_to_release + 1 == le32_to_cpu(map[i + 1])) { 146 if (objectid_to_release + 1 == le32_to_cpu(map[i + 1])) {
138 //objectid_map[i+1]--;
139 le32_add_cpu(&map[i + 1], -1); 147 le32_add_cpu(&map[i + 1], -1);
140 return; 148 return;
141 } 149 }
142 150
143 /* JDM comparing two little-endian values for equality -- safe */ 151 /*
152 * JDM comparing two little-endian values for
153 * equality -- safe
154 */
155 /*
156 * objectid map must be expanded, but
157 * there is no space
158 */
144 if (sb_oid_cursize(rs) == sb_oid_maxsize(rs)) { 159 if (sb_oid_cursize(rs) == sb_oid_maxsize(rs)) {
145 /* objectid map must be expanded, but there is no space */
146 PROC_INFO_INC(s, leaked_oid); 160 PROC_INFO_INC(s, leaked_oid);
147 return; 161 return;
148 } 162 }
@@ -178,8 +192,9 @@ int reiserfs_convert_objectid_map_v1(struct super_block *s)
178 new_objectid_map = (__le32 *) (disk_sb + 1); 192 new_objectid_map = (__le32 *) (disk_sb + 1);
179 193
180 if (cur_size > new_size) { 194 if (cur_size > new_size) {
181 /* mark everyone used that was listed as free at the end of the objectid 195 /*
182 ** map 196 * mark everyone used that was listed as free at
197 * the end of the objectid map
183 */ 198 */
184 objectid_map[new_size - 1] = objectid_map[cur_size - 1]; 199 objectid_map[new_size - 1] = objectid_map[cur_size - 1];
185 set_sb_oid_cursize(disk_sb, new_size); 200 set_sb_oid_cursize(disk_sb, new_size);
diff --git a/fs/reiserfs/prints.c b/fs/reiserfs/prints.c
index 54944d5a4a6e..c9b47e91baf8 100644
--- a/fs/reiserfs/prints.c
+++ b/fs/reiserfs/prints.c
@@ -172,18 +172,19 @@ static char *is_there_reiserfs_struct(char *fmt, int *what)
172 return k; 172 return k;
173} 173}
174 174
175/* debugging reiserfs we used to print out a lot of different 175/*
176 variables, like keys, item headers, buffer heads etc. Values of 176 * debugging reiserfs we used to print out a lot of different
177 most fields matter. So it took a long time just to write 177 * variables, like keys, item headers, buffer heads etc. Values of
178 appropriative printk. With this reiserfs_warning you can use format 178 * most fields matter. So it took a long time just to write
179 specification for complex structures like you used to do with 179 * appropriative printk. With this reiserfs_warning you can use format
180 printfs for integers, doubles and pointers. For instance, to print 180 * specification for complex structures like you used to do with
181 out key structure you have to write just: 181 * printfs for integers, doubles and pointers. For instance, to print
182 reiserfs_warning ("bad key %k", key); 182 * out key structure you have to write just:
183 instead of 183 * reiserfs_warning ("bad key %k", key);
184 printk ("bad key %lu %lu %lu %lu", key->k_dir_id, key->k_objectid, 184 * instead of
185 key->k_offset, key->k_uniqueness); 185 * printk ("bad key %lu %lu %lu %lu", key->k_dir_id, key->k_objectid,
186*/ 186 * key->k_offset, key->k_uniqueness);
187 */
187static DEFINE_SPINLOCK(error_lock); 188static DEFINE_SPINLOCK(error_lock);
188static void prepare_error_buf(const char *fmt, va_list args) 189static void prepare_error_buf(const char *fmt, va_list args)
189{ 190{
@@ -243,15 +244,16 @@ static void prepare_error_buf(const char *fmt, va_list args)
243 244
244} 245}
245 246
246/* in addition to usual conversion specifiers this accepts reiserfs 247/*
247 specific conversion specifiers: 248 * in addition to usual conversion specifiers this accepts reiserfs
248 %k to print little endian key, 249 * specific conversion specifiers:
249 %K to print cpu key, 250 * %k to print little endian key,
250 %h to print item_head, 251 * %K to print cpu key,
251 %t to print directory entry 252 * %h to print item_head,
252 %z to print block head (arg must be struct buffer_head * 253 * %t to print directory entry
253 %b to print buffer_head 254 * %z to print block head (arg must be struct buffer_head *
254*/ 255 * %b to print buffer_head
256 */
255 257
256#define do_reiserfs_warning(fmt)\ 258#define do_reiserfs_warning(fmt)\
257{\ 259{\
@@ -304,50 +306,52 @@ void reiserfs_debug(struct super_block *s, int level, const char *fmt, ...)
304#endif 306#endif
305} 307}
306 308
307/* The format: 309/*
308 310 * The format:
309 maintainer-errorid: [function-name:] message 311 *
310 312 * maintainer-errorid: [function-name:] message
311 where errorid is unique to the maintainer and function-name is 313 *
312 optional, is recommended, so that anyone can easily find the bug 314 * where errorid is unique to the maintainer and function-name is
313 with a simple grep for the short to type string 315 * optional, is recommended, so that anyone can easily find the bug
314 maintainer-errorid. Don't bother with reusing errorids, there are 316 * with a simple grep for the short to type string
315 lots of numbers out there. 317 * maintainer-errorid. Don't bother with reusing errorids, there are
316 318 * lots of numbers out there.
317 Example: 319 *
318 320 * Example:
319 reiserfs_panic( 321 *
320 p_sb, "reiser-29: reiserfs_new_blocknrs: " 322 * reiserfs_panic(
321 "one of search_start or rn(%d) is equal to MAX_B_NUM," 323 * p_sb, "reiser-29: reiserfs_new_blocknrs: "
322 "which means that we are optimizing location based on the bogus location of a temp buffer (%p).", 324 * "one of search_start or rn(%d) is equal to MAX_B_NUM,"
323 rn, bh 325 * "which means that we are optimizing location based on the "
324 ); 326 * "bogus location of a temp buffer (%p).",
325 327 * rn, bh
326 Regular panic()s sometimes clear the screen before the message can 328 * );
327 be read, thus the need for the while loop. 329 *
328 330 * Regular panic()s sometimes clear the screen before the message can
329 Numbering scheme for panic used by Vladimir and Anatoly( Hans completely ignores this scheme, and considers it 331 * be read, thus the need for the while loop.
330 pointless complexity): 332 *
331 333 * Numbering scheme for panic used by Vladimir and Anatoly( Hans completely
332 panics in reiserfs.h have numbers from 1000 to 1999 334 * ignores this scheme, and considers it pointless complexity):
333 super.c 2000 to 2999 335 *
334 preserve.c (unused) 3000 to 3999 336 * panics in reiserfs_fs.h have numbers from 1000 to 1999
335 bitmap.c 4000 to 4999 337 * super.c 2000 to 2999
336 stree.c 5000 to 5999 338 * preserve.c (unused) 3000 to 3999
337 prints.c 6000 to 6999 339 * bitmap.c 4000 to 4999
338 namei.c 7000 to 7999 340 * stree.c 5000 to 5999
339 fix_nodes.c 8000 to 8999 341 * prints.c 6000 to 6999
340 dir.c 9000 to 9999 342 * namei.c 7000 to 7999
341 lbalance.c 10000 to 10999 343 * fix_nodes.c 8000 to 8999
342 ibalance.c 11000 to 11999 not ready 344 * dir.c 9000 to 9999
343 do_balan.c 12000 to 12999 345 * lbalance.c 10000 to 10999
344 inode.c 13000 to 13999 346 * ibalance.c 11000 to 11999 not ready
345 file.c 14000 to 14999 347 * do_balan.c 12000 to 12999
346 objectid.c 15000 - 15999 348 * inode.c 13000 to 13999
347 buffer.c 16000 - 16999 349 * file.c 14000 to 14999
348 symlink.c 17000 - 17999 350 * objectid.c 15000 - 15999
349 351 * buffer.c 16000 - 16999
350 . */ 352 * symlink.c 17000 - 17999
353 *
354 * . */
351 355
352void __reiserfs_panic(struct super_block *sb, const char *id, 356void __reiserfs_panic(struct super_block *sb, const char *id,
353 const char *function, const char *fmt, ...) 357 const char *function, const char *fmt, ...)
@@ -411,9 +415,11 @@ void reiserfs_abort(struct super_block *sb, int errno, const char *fmt, ...)
411 reiserfs_abort_journal(sb, errno); 415 reiserfs_abort_journal(sb, errno);
412} 416}
413 417
414/* this prints internal nodes (4 keys/items in line) (dc_number, 418/*
415 dc_size)[k_dirid, k_objectid, k_offset, k_uniqueness](dc_number, 419 * this prints internal nodes (4 keys/items in line) (dc_number,
416 dc_size)...*/ 420 * dc_size)[k_dirid, k_objectid, k_offset, k_uniqueness](dc_number,
421 * dc_size)...
422 */
417static int print_internal(struct buffer_head *bh, int first, int last) 423static int print_internal(struct buffer_head *bh, int first, int last)
418{ 424{
419 struct reiserfs_key *key; 425 struct reiserfs_key *key;
@@ -439,7 +445,7 @@ static int print_internal(struct buffer_head *bh, int first, int last)
439 dc = B_N_CHILD(bh, from); 445 dc = B_N_CHILD(bh, from);
440 reiserfs_printk("PTR %d: %y ", from, dc); 446 reiserfs_printk("PTR %d: %y ", from, dc);
441 447
442 for (i = from, key = B_N_PDELIM_KEY(bh, from), dc++; i < to; 448 for (i = from, key = internal_key(bh, from), dc++; i < to;
443 i++, key++, dc++) { 449 i++, key++, dc++) {
444 reiserfs_printk("KEY %d: %k PTR %d: %y ", i, key, i + 1, dc); 450 reiserfs_printk("KEY %d: %k PTR %d: %y ", i, key, i + 1, dc);
445 if (i && i % 4 == 0) 451 if (i && i % 4 == 0)
@@ -463,7 +469,7 @@ static int print_leaf(struct buffer_head *bh, int print_mode, int first,
463 check_leaf(bh); 469 check_leaf(bh);
464 470
465 blkh = B_BLK_HEAD(bh); 471 blkh = B_BLK_HEAD(bh);
466 ih = B_N_PITEM_HEAD(bh, 0); 472 ih = item_head(bh, 0);
467 nr = blkh_nr_item(blkh); 473 nr = blkh_nr_item(blkh);
468 474
469 printk 475 printk
@@ -496,7 +502,7 @@ static int print_leaf(struct buffer_head *bh, int print_mode, int first,
496 ("-------------------------------------------------------------------------------\n"); 502 ("-------------------------------------------------------------------------------\n");
497 reiserfs_printk("|%2d| %h |\n", i, ih); 503 reiserfs_printk("|%2d| %h |\n", i, ih);
498 if (print_mode & PRINT_LEAF_ITEMS) 504 if (print_mode & PRINT_LEAF_ITEMS)
499 op_print_item(ih, B_I_PITEM(bh, ih)); 505 op_print_item(ih, ih_item_body(bh, ih));
500 } 506 }
501 507
502 printk 508 printk
@@ -543,9 +549,11 @@ static int print_super_block(struct buffer_head *bh)
543 printk("Block count %u\n", sb_block_count(rs)); 549 printk("Block count %u\n", sb_block_count(rs));
544 printk("Blocksize %d\n", sb_blocksize(rs)); 550 printk("Blocksize %d\n", sb_blocksize(rs));
545 printk("Free blocks %u\n", sb_free_blocks(rs)); 551 printk("Free blocks %u\n", sb_free_blocks(rs));
546 // FIXME: this would be confusing if 552 /*
547 // someone stores reiserfs super block in some data block ;) 553 * FIXME: this would be confusing if
554 * someone stores reiserfs super block in some data block ;)
548// skipped = (bh->b_blocknr * bh->b_size) / sb_blocksize(rs); 555// skipped = (bh->b_blocknr * bh->b_size) / sb_blocksize(rs);
556 */
549 skipped = bh->b_blocknr; 557 skipped = bh->b_blocknr;
550 data_blocks = sb_block_count(rs) - skipped - 1 - sb_bmap_nr(rs) - 558 data_blocks = sb_block_count(rs) - skipped - 1 - sb_bmap_nr(rs) -
551 (!is_reiserfs_jr(rs) ? sb_jp_journal_size(rs) + 559 (!is_reiserfs_jr(rs) ? sb_jp_journal_size(rs) +
@@ -581,8 +589,8 @@ static int print_desc_block(struct buffer_head *bh)
581 589
582 return 0; 590 return 0;
583} 591}
584 592/* ..., int print_mode, int first, int last) */
585void print_block(struct buffer_head *bh, ...) //int print_mode, int first, int last) 593void print_block(struct buffer_head *bh, ...)
586{ 594{
587 va_list args; 595 va_list args;
588 int mode, first, last; 596 int mode, first, last;
@@ -644,11 +652,11 @@ void store_print_tb(struct tree_balance *tb)
644 "* %d * %3lld(%2d) * %3lld(%2d) * %3lld(%2d) * %5lld * %5lld * %5lld * %5lld * %5lld *\n", 652 "* %d * %3lld(%2d) * %3lld(%2d) * %3lld(%2d) * %5lld * %5lld * %5lld * %5lld * %5lld *\n",
645 h, 653 h,
646 (tbSh) ? (long long)(tbSh->b_blocknr) : (-1LL), 654 (tbSh) ? (long long)(tbSh->b_blocknr) : (-1LL),
647 (tbSh) ? atomic_read(&(tbSh->b_count)) : -1, 655 (tbSh) ? atomic_read(&tbSh->b_count) : -1,
648 (tb->L[h]) ? (long long)(tb->L[h]->b_blocknr) : (-1LL), 656 (tb->L[h]) ? (long long)(tb->L[h]->b_blocknr) : (-1LL),
649 (tb->L[h]) ? atomic_read(&(tb->L[h]->b_count)) : -1, 657 (tb->L[h]) ? atomic_read(&tb->L[h]->b_count) : -1,
650 (tb->R[h]) ? (long long)(tb->R[h]->b_blocknr) : (-1LL), 658 (tb->R[h]) ? (long long)(tb->R[h]->b_blocknr) : (-1LL),
651 (tb->R[h]) ? atomic_read(&(tb->R[h]->b_count)) : -1, 659 (tb->R[h]) ? atomic_read(&tb->R[h]->b_count) : -1,
652 (tbFh) ? (long long)(tbFh->b_blocknr) : (-1LL), 660 (tbFh) ? (long long)(tbFh->b_blocknr) : (-1LL),
653 (tb->FL[h]) ? (long long)(tb->FL[h]-> 661 (tb->FL[h]) ? (long long)(tb->FL[h]->
654 b_blocknr) : (-1LL), 662 b_blocknr) : (-1LL),
@@ -665,9 +673,9 @@ void store_print_tb(struct tree_balance *tb)
665 "* h * size * ln * lb * rn * rb * blkn * s0 * s1 * s1b * s2 * s2b * curb * lk * rk *\n" 673 "* h * size * ln * lb * rn * rb * blkn * s0 * s1 * s1b * s2 * s2b * curb * lk * rk *\n"
666 "* 0 * %4d * %2d * %2d * %2d * %2d * %4d * %2d * %2d * %3d * %2d * %3d * %4d * %2d * %2d *\n", 674 "* 0 * %4d * %2d * %2d * %2d * %2d * %4d * %2d * %2d * %3d * %2d * %3d * %4d * %2d * %2d *\n",
667 tb->insert_size[0], tb->lnum[0], tb->lbytes, tb->rnum[0], 675 tb->insert_size[0], tb->lnum[0], tb->lbytes, tb->rnum[0],
668 tb->rbytes, tb->blknum[0], tb->s0num, tb->s1num, tb->s1bytes, 676 tb->rbytes, tb->blknum[0], tb->s0num, tb->snum[0],
669 tb->s2num, tb->s2bytes, tb->cur_blknum, tb->lkey[0], 677 tb->sbytes[0], tb->snum[1], tb->sbytes[1],
670 tb->rkey[0]); 678 tb->cur_blknum, tb->lkey[0], tb->rkey[0]);
671 679
672 /* this prints balance parameters for non-leaf levels */ 680 /* this prints balance parameters for non-leaf levels */
673 h = 0; 681 h = 0;
@@ -690,7 +698,7 @@ void store_print_tb(struct tree_balance *tb)
690 "%p (%llu %d)%s", tb->FEB[i], 698 "%p (%llu %d)%s", tb->FEB[i],
691 tb->FEB[i] ? (unsigned long long)tb->FEB[i]-> 699 tb->FEB[i] ? (unsigned long long)tb->FEB[i]->
692 b_blocknr : 0ULL, 700 b_blocknr : 0ULL,
693 tb->FEB[i] ? atomic_read(&(tb->FEB[i]->b_count)) : 0, 701 tb->FEB[i] ? atomic_read(&tb->FEB[i]->b_count) : 0,
694 (i == ARRAY_SIZE(tb->FEB) - 1) ? "\n" : ", "); 702 (i == ARRAY_SIZE(tb->FEB) - 1) ? "\n" : ", ");
695 703
696 sprintf(print_tb_buf + strlen(print_tb_buf), 704 sprintf(print_tb_buf + strlen(print_tb_buf),
@@ -744,8 +752,8 @@ void check_leaf(struct buffer_head *bh)
744 if (!bh) 752 if (!bh)
745 return; 753 return;
746 check_leaf_block_head(bh); 754 check_leaf_block_head(bh);
747 for (i = 0, ih = B_N_PITEM_HEAD(bh, 0); i < B_NR_ITEMS(bh); i++, ih++) 755 for (i = 0, ih = item_head(bh, 0); i < B_NR_ITEMS(bh); i++, ih++)
748 op_check_item(ih, B_I_PITEM(bh, ih)); 756 op_check_item(ih, ih_item_body(bh, ih));
749} 757}
750 758
751void check_internal(struct buffer_head *bh) 759void check_internal(struct buffer_head *bh)
diff --git a/fs/reiserfs/reiserfs.h b/fs/reiserfs/reiserfs.h
index 83d4eac8059a..bf53888c7f59 100644
--- a/fs/reiserfs/reiserfs.h
+++ b/fs/reiserfs/reiserfs.h
@@ -1,5 +1,6 @@
1/* 1/*
2 * Copyright 1996, 1997, 1998 Hans Reiser, see reiserfs/README for licensing and copyright details 2 * Copyright 1996, 1997, 1998 Hans Reiser, see reiserfs/README for
3 * licensing and copyright details
3 */ 4 */
4 5
5#include <linux/reiserfs_fs.h> 6#include <linux/reiserfs_fs.h>
@@ -23,52 +24,73 @@
23 24
24struct reiserfs_journal_list; 25struct reiserfs_journal_list;
25 26
26/** bitmasks for i_flags field in reiserfs-specific part of inode */ 27/* bitmasks for i_flags field in reiserfs-specific part of inode */
27typedef enum { 28typedef enum {
28 /** this says what format of key do all items (but stat data) of 29 /*
29 an object have. If this is set, that format is 3.6 otherwise 30 * this says what format of key do all items (but stat data) of
30 - 3.5 */ 31 * an object have. If this is set, that format is 3.6 otherwise - 3.5
32 */
31 i_item_key_version_mask = 0x0001, 33 i_item_key_version_mask = 0x0001,
32 /** If this is unset, object has 3.5 stat data, otherwise, it has 34
33 3.6 stat data with 64bit size, 32bit nlink etc. */ 35 /*
36 * If this is unset, object has 3.5 stat data, otherwise,
37 * it has 3.6 stat data with 64bit size, 32bit nlink etc.
38 */
34 i_stat_data_version_mask = 0x0002, 39 i_stat_data_version_mask = 0x0002,
35 /** file might need tail packing on close */ 40
41 /* file might need tail packing on close */
36 i_pack_on_close_mask = 0x0004, 42 i_pack_on_close_mask = 0x0004,
37 /** don't pack tail of file */ 43
44 /* don't pack tail of file */
38 i_nopack_mask = 0x0008, 45 i_nopack_mask = 0x0008,
39 /** If those is set, "safe link" was created for this file during 46
40 truncate or unlink. Safe link is used to avoid leakage of disk 47 /*
41 space on crash with some files open, but unlinked. */ 48 * If either of these are set, "safe link" was created for this
49 * file during truncate or unlink. Safe link is used to avoid
50 * leakage of disk space on crash with some files open, but unlinked.
51 */
42 i_link_saved_unlink_mask = 0x0010, 52 i_link_saved_unlink_mask = 0x0010,
43 i_link_saved_truncate_mask = 0x0020, 53 i_link_saved_truncate_mask = 0x0020,
54
44 i_has_xattr_dir = 0x0040, 55 i_has_xattr_dir = 0x0040,
45 i_data_log = 0x0080, 56 i_data_log = 0x0080,
46} reiserfs_inode_flags; 57} reiserfs_inode_flags;
47 58
48struct reiserfs_inode_info { 59struct reiserfs_inode_info {
49 __u32 i_key[4]; /* key is still 4 32 bit integers */ 60 __u32 i_key[4]; /* key is still 4 32 bit integers */
50 /** transient inode flags that are never stored on disk. Bitmasks 61
51 for this field are defined above. */ 62 /*
63 * transient inode flags that are never stored on disk. Bitmasks
64 * for this field are defined above.
65 */
52 __u32 i_flags; 66 __u32 i_flags;
53 67
54 __u32 i_first_direct_byte; // offset of first byte stored in direct item. 68 /* offset of first byte stored in direct item. */
69 __u32 i_first_direct_byte;
55 70
56 /* copy of persistent inode flags read from sd_attrs. */ 71 /* copy of persistent inode flags read from sd_attrs. */
57 __u32 i_attrs; 72 __u32 i_attrs;
58 73
59 int i_prealloc_block; /* first unused block of a sequence of unused blocks */ 74 /* first unused block of a sequence of unused blocks */
75 int i_prealloc_block;
60 int i_prealloc_count; /* length of that sequence */ 76 int i_prealloc_count; /* length of that sequence */
61 struct list_head i_prealloc_list; /* per-transaction list of inodes which
62 * have preallocated blocks */
63 77
64 unsigned new_packing_locality:1; /* new_packig_locality is created; new blocks 78 /* per-transaction list of inodes which have preallocated blocks */
65 * for the contents of this directory should be 79 struct list_head i_prealloc_list;
66 * displaced */
67 80
68 /* we use these for fsync or O_SYNC to decide which transaction 81 /*
69 ** needs to be committed in order for this inode to be properly 82 * new_packing_locality is created; new blocks for the contents
70 ** flushed */ 83 * of this directory should be displaced
84 */
85 unsigned new_packing_locality:1;
86
87 /*
88 * we use these for fsync or O_SYNC to decide which transaction
89 * needs to be committed in order for this inode to be properly
90 * flushed
91 */
71 unsigned int i_trans_id; 92 unsigned int i_trans_id;
93
72 struct reiserfs_journal_list *i_jl; 94 struct reiserfs_journal_list *i_jl;
73 atomic_t openers; 95 atomic_t openers;
74 struct mutex tailpack; 96 struct mutex tailpack;
@@ -82,9 +104,10 @@ typedef enum {
82 reiserfs_attrs_cleared = 0x00000001, 104 reiserfs_attrs_cleared = 0x00000001,
83} reiserfs_super_block_flags; 105} reiserfs_super_block_flags;
84 106
85/* struct reiserfs_super_block accessors/mutators 107/*
86 * since this is a disk structure, it will always be in 108 * struct reiserfs_super_block accessors/mutators since this is a disk
87 * little endian format. */ 109 * structure, it will always be in little endian format.
110 */
88#define sb_block_count(sbp) (le32_to_cpu((sbp)->s_v1.s_block_count)) 111#define sb_block_count(sbp) (le32_to_cpu((sbp)->s_v1.s_block_count))
89#define set_sb_block_count(sbp,v) ((sbp)->s_v1.s_block_count = cpu_to_le32(v)) 112#define set_sb_block_count(sbp,v) ((sbp)->s_v1.s_block_count = cpu_to_le32(v))
90#define sb_free_blocks(sbp) (le32_to_cpu((sbp)->s_v1.s_free_blocks)) 113#define sb_free_blocks(sbp) (le32_to_cpu((sbp)->s_v1.s_free_blocks))
@@ -152,48 +175,61 @@ typedef enum {
152 175
153/* LOGGING -- */ 176/* LOGGING -- */
154 177
155/* These all interelate for performance. 178/*
156** 179 * These all interelate for performance.
157** If the journal block count is smaller than n transactions, you lose speed. 180 *
158** I don't know what n is yet, I'm guessing 8-16. 181 * If the journal block count is smaller than n transactions, you lose speed.
159** 182 * I don't know what n is yet, I'm guessing 8-16.
160** typical transaction size depends on the application, how often fsync is 183 *
161** called, and how many metadata blocks you dirty in a 30 second period. 184 * typical transaction size depends on the application, how often fsync is
162** The more small files (<16k) you use, the larger your transactions will 185 * called, and how many metadata blocks you dirty in a 30 second period.
163** be. 186 * The more small files (<16k) you use, the larger your transactions will
164** 187 * be.
165** If your journal fills faster than dirty buffers get flushed to disk, it must flush them before allowing the journal 188 *
166** to wrap, which slows things down. If you need high speed meta data updates, the journal should be big enough 189 * If your journal fills faster than dirty buffers get flushed to disk, it
167** to prevent wrapping before dirty meta blocks get to disk. 190 * must flush them before allowing the journal to wrap, which slows things
168** 191 * down. If you need high speed meta data updates, the journal should be
169** If the batch max is smaller than the transaction max, you'll waste space at the end of the journal 192 * big enough to prevent wrapping before dirty meta blocks get to disk.
170** because journal_end sets the next transaction to start at 0 if the next transaction has any chance of wrapping. 193 *
171** 194 * If the batch max is smaller than the transaction max, you'll waste space
172** The large the batch max age, the better the speed, and the more meta data changes you'll lose after a crash. 195 * at the end of the journal because journal_end sets the next transaction
173** 196 * to start at 0 if the next transaction has any chance of wrapping.
174*/ 197 *
198 * The large the batch max age, the better the speed, and the more meta
199 * data changes you'll lose after a crash.
200 */
175 201
176/* don't mess with these for a while */ 202/* don't mess with these for a while */
177 /* we have a node size define somewhere in reiserfs_fs.h. -Hans */ 203/* we have a node size define somewhere in reiserfs_fs.h. -Hans */
178#define JOURNAL_BLOCK_SIZE 4096 /* BUG gotta get rid of this */ 204#define JOURNAL_BLOCK_SIZE 4096 /* BUG gotta get rid of this */
179#define JOURNAL_MAX_CNODE 1500 /* max cnodes to allocate. */ 205#define JOURNAL_MAX_CNODE 1500 /* max cnodes to allocate. */
180#define JOURNAL_HASH_SIZE 8192 206#define JOURNAL_HASH_SIZE 8192
181#define JOURNAL_NUM_BITMAPS 5 /* number of copies of the bitmaps to have floating. Must be >= 2 */ 207
182 208/* number of copies of the bitmaps to have floating. Must be >= 2 */
183/* One of these for every block in every transaction 209#define JOURNAL_NUM_BITMAPS 5
184** Each one is in two hash tables. First, a hash of the current transaction, and after journal_end, a 210
185** hash of all the in memory transactions. 211/*
186** next and prev are used by the current transaction (journal_hash). 212 * One of these for every block in every transaction
187** hnext and hprev are used by journal_list_hash. If a block is in more than one transaction, the journal_list_hash 213 * Each one is in two hash tables. First, a hash of the current transaction,
188** links it in multiple times. This allows flush_journal_list to remove just the cnode belonging 214 * and after journal_end, a hash of all the in memory transactions.
189** to a given transaction. 215 * next and prev are used by the current transaction (journal_hash).
190*/ 216 * hnext and hprev are used by journal_list_hash. If a block is in more
217 * than one transaction, the journal_list_hash links it in multiple times.
218 * This allows flush_journal_list to remove just the cnode belonging to a
219 * given transaction.
220 */
191struct reiserfs_journal_cnode { 221struct reiserfs_journal_cnode {
192 struct buffer_head *bh; /* real buffer head */ 222 struct buffer_head *bh; /* real buffer head */
193 struct super_block *sb; /* dev of real buffer head */ 223 struct super_block *sb; /* dev of real buffer head */
194 __u32 blocknr; /* block number of real buffer head, == 0 when buffer on disk */ 224
225 /* block number of real buffer head, == 0 when buffer on disk */
226 __u32 blocknr;
227
195 unsigned long state; 228 unsigned long state;
196 struct reiserfs_journal_list *jlist; /* journal list this cnode lives in */ 229
230 /* journal list this cnode lives in */
231 struct reiserfs_journal_list *jlist;
232
197 struct reiserfs_journal_cnode *next; /* next in transaction list */ 233 struct reiserfs_journal_cnode *next; /* next in transaction list */
198 struct reiserfs_journal_cnode *prev; /* prev in transaction list */ 234 struct reiserfs_journal_cnode *prev; /* prev in transaction list */
199 struct reiserfs_journal_cnode *hprev; /* prev in hash list */ 235 struct reiserfs_journal_cnode *hprev; /* prev in hash list */
@@ -212,18 +248,22 @@ struct reiserfs_list_bitmap {
212}; 248};
213 249
214/* 250/*
215** one of these for each transaction. The most important part here is the j_realblock. 251 * one of these for each transaction. The most important part here is the
216** this list of cnodes is used to hash all the blocks in all the commits, to mark all the 252 * j_realblock. this list of cnodes is used to hash all the blocks in all
217** real buffer heads dirty once all the commits hit the disk, 253 * the commits, to mark all the real buffer heads dirty once all the commits
218** and to make sure every real block in a transaction is on disk before allowing the log area 254 * hit the disk, and to make sure every real block in a transaction is on
219** to be overwritten */ 255 * disk before allowing the log area to be overwritten
256 */
220struct reiserfs_journal_list { 257struct reiserfs_journal_list {
221 unsigned long j_start; 258 unsigned long j_start;
222 unsigned long j_state; 259 unsigned long j_state;
223 unsigned long j_len; 260 unsigned long j_len;
224 atomic_t j_nonzerolen; 261 atomic_t j_nonzerolen;
225 atomic_t j_commit_left; 262 atomic_t j_commit_left;
226 atomic_t j_older_commits_done; /* all commits older than this on disk */ 263
264 /* all commits older than this on disk */
265 atomic_t j_older_commits_done;
266
227 struct mutex j_commit_mutex; 267 struct mutex j_commit_mutex;
228 unsigned int j_trans_id; 268 unsigned int j_trans_id;
229 time_t j_timestamp; 269 time_t j_timestamp;
@@ -234,11 +274,15 @@ struct reiserfs_journal_list {
234 /* time ordered list of all active transactions */ 274 /* time ordered list of all active transactions */
235 struct list_head j_list; 275 struct list_head j_list;
236 276
237 /* time ordered list of all transactions we haven't tried to flush yet */ 277 /*
278 * time ordered list of all transactions we haven't tried
279 * to flush yet
280 */
238 struct list_head j_working_list; 281 struct list_head j_working_list;
239 282
240 /* list of tail conversion targets in need of flush before commit */ 283 /* list of tail conversion targets in need of flush before commit */
241 struct list_head j_tail_bh_list; 284 struct list_head j_tail_bh_list;
285
242 /* list of data=ordered buffers in need of flush before commit */ 286 /* list of data=ordered buffers in need of flush before commit */
243 struct list_head j_bh_list; 287 struct list_head j_bh_list;
244 int j_refcount; 288 int j_refcount;
@@ -246,46 +290,83 @@ struct reiserfs_journal_list {
246 290
247struct reiserfs_journal { 291struct reiserfs_journal {
248 struct buffer_head **j_ap_blocks; /* journal blocks on disk */ 292 struct buffer_head **j_ap_blocks; /* journal blocks on disk */
249 struct reiserfs_journal_cnode *j_last; /* newest journal block */ 293 /* newest journal block */
250 struct reiserfs_journal_cnode *j_first; /* oldest journal block. start here for traverse */ 294 struct reiserfs_journal_cnode *j_last;
295
296 /* oldest journal block. start here for traverse */
297 struct reiserfs_journal_cnode *j_first;
251 298
252 struct block_device *j_dev_bd; 299 struct block_device *j_dev_bd;
253 fmode_t j_dev_mode; 300 fmode_t j_dev_mode;
254 int j_1st_reserved_block; /* first block on s_dev of reserved area journal */ 301
302 /* first block on s_dev of reserved area journal */
303 int j_1st_reserved_block;
255 304
256 unsigned long j_state; 305 unsigned long j_state;
257 unsigned int j_trans_id; 306 unsigned int j_trans_id;
258 unsigned long j_mount_id; 307 unsigned long j_mount_id;
259 unsigned long j_start; /* start of current waiting commit (index into j_ap_blocks) */ 308
309 /* start of current waiting commit (index into j_ap_blocks) */
310 unsigned long j_start;
260 unsigned long j_len; /* length of current waiting commit */ 311 unsigned long j_len; /* length of current waiting commit */
261 unsigned long j_len_alloc; /* number of buffers requested by journal_begin() */ 312
313 /* number of buffers requested by journal_begin() */
314 unsigned long j_len_alloc;
315
262 atomic_t j_wcount; /* count of writers for current commit */ 316 atomic_t j_wcount; /* count of writers for current commit */
263 unsigned long j_bcount; /* batch count. allows turning X transactions into 1 */ 317
264 unsigned long j_first_unflushed_offset; /* first unflushed transactions offset */ 318 /* batch count. allows turning X transactions into 1 */
265 unsigned j_last_flush_trans_id; /* last fully flushed journal timestamp */ 319 unsigned long j_bcount;
320
321 /* first unflushed transactions offset */
322 unsigned long j_first_unflushed_offset;
323
324 /* last fully flushed journal timestamp */
325 unsigned j_last_flush_trans_id;
326
266 struct buffer_head *j_header_bh; 327 struct buffer_head *j_header_bh;
267 328
268 time_t j_trans_start_time; /* time this transaction started */ 329 time_t j_trans_start_time; /* time this transaction started */
269 struct mutex j_mutex; 330 struct mutex j_mutex;
270 struct mutex j_flush_mutex; 331 struct mutex j_flush_mutex;
271 wait_queue_head_t j_join_wait; /* wait for current transaction to finish before starting new one */ 332
272 atomic_t j_jlock; /* lock for j_join_wait */ 333 /* wait for current transaction to finish before starting new one */
334 wait_queue_head_t j_join_wait;
335
336 atomic_t j_jlock; /* lock for j_join_wait */
273 int j_list_bitmap_index; /* number of next list bitmap to use */ 337 int j_list_bitmap_index; /* number of next list bitmap to use */
274 int j_must_wait; /* no more journal begins allowed. MUST sleep on j_join_wait */ 338
275 int j_next_full_flush; /* next journal_end will flush all journal list */ 339 /* no more journal begins allowed. MUST sleep on j_join_wait */
276 int j_next_async_flush; /* next journal_end will flush all async commits */ 340 int j_must_wait;
341
342 /* next journal_end will flush all journal list */
343 int j_next_full_flush;
344
345 /* next journal_end will flush all async commits */
346 int j_next_async_flush;
277 347
278 int j_cnode_used; /* number of cnodes on the used list */ 348 int j_cnode_used; /* number of cnodes on the used list */
279 int j_cnode_free; /* number of cnodes on the free list */ 349 int j_cnode_free; /* number of cnodes on the free list */
280 350
281 unsigned int j_trans_max; /* max number of blocks in a transaction. */ 351 /* max number of blocks in a transaction. */
282 unsigned int j_max_batch; /* max number of blocks to batch into a trans */ 352 unsigned int j_trans_max;
283 unsigned int j_max_commit_age; /* in seconds, how old can an async commit be */ 353
284 unsigned int j_max_trans_age; /* in seconds, how old can a transaction be */ 354 /* max number of blocks to batch into a trans */
285 unsigned int j_default_max_commit_age; /* the default for the max commit age */ 355 unsigned int j_max_batch;
356
357 /* in seconds, how old can an async commit be */
358 unsigned int j_max_commit_age;
359
360 /* in seconds, how old can a transaction be */
361 unsigned int j_max_trans_age;
362
363 /* the default for the max commit age */
364 unsigned int j_default_max_commit_age;
286 365
287 struct reiserfs_journal_cnode *j_cnode_free_list; 366 struct reiserfs_journal_cnode *j_cnode_free_list;
288 struct reiserfs_journal_cnode *j_cnode_free_orig; /* orig pointer returned from vmalloc */ 367
368 /* orig pointer returned from vmalloc */
369 struct reiserfs_journal_cnode *j_cnode_free_orig;
289 370
290 struct reiserfs_journal_list *j_current_jl; 371 struct reiserfs_journal_list *j_current_jl;
291 int j_free_bitmap_nodes; 372 int j_free_bitmap_nodes;
@@ -306,14 +387,21 @@ struct reiserfs_journal {
306 387
307 /* list of all active transactions */ 388 /* list of all active transactions */
308 struct list_head j_journal_list; 389 struct list_head j_journal_list;
390
309 /* lists that haven't been touched by writeback attempts */ 391 /* lists that haven't been touched by writeback attempts */
310 struct list_head j_working_list; 392 struct list_head j_working_list;
311 393
312 struct reiserfs_list_bitmap j_list_bitmap[JOURNAL_NUM_BITMAPS]; /* array of bitmaps to record the deleted blocks */ 394 /* hash table for real buffer heads in current trans */
313 struct reiserfs_journal_cnode *j_hash_table[JOURNAL_HASH_SIZE]; /* hash table for real buffer heads in current trans */ 395 struct reiserfs_journal_cnode *j_hash_table[JOURNAL_HASH_SIZE];
314 struct reiserfs_journal_cnode *j_list_hash_table[JOURNAL_HASH_SIZE]; /* hash table for all the real buffer heads in all 396
315 the transactions */ 397 /* hash table for all the real buffer heads in all the transactions */
316 struct list_head j_prealloc_list; /* list of inodes which have preallocated blocks */ 398 struct reiserfs_journal_cnode *j_list_hash_table[JOURNAL_HASH_SIZE];
399
400 /* array of bitmaps to record the deleted blocks */
401 struct reiserfs_list_bitmap j_list_bitmap[JOURNAL_NUM_BITMAPS];
402
403 /* list of inodes which have preallocated blocks */
404 struct list_head j_prealloc_list;
317 int j_persistent_trans; 405 int j_persistent_trans;
318 unsigned long j_max_trans_size; 406 unsigned long j_max_trans_size;
319 unsigned long j_max_batch_size; 407 unsigned long j_max_batch_size;
@@ -328,11 +416,12 @@ struct reiserfs_journal {
328 416
329enum journal_state_bits { 417enum journal_state_bits {
330 J_WRITERS_BLOCKED = 1, /* set when new writers not allowed */ 418 J_WRITERS_BLOCKED = 1, /* set when new writers not allowed */
331 J_WRITERS_QUEUED, /* set when log is full due to too many writers */ 419 J_WRITERS_QUEUED, /* set when log is full due to too many writers */
332 J_ABORTED, /* set when log is aborted */ 420 J_ABORTED, /* set when log is aborted */
333}; 421};
334 422
335#define JOURNAL_DESC_MAGIC "ReIsErLB" /* ick. magic string to find desc blocks in the journal */ 423/* ick. magic string to find desc blocks in the journal */
424#define JOURNAL_DESC_MAGIC "ReIsErLB"
336 425
337typedef __u32(*hashf_t) (const signed char *, int); 426typedef __u32(*hashf_t) (const signed char *, int);
338 427
@@ -364,7 +453,10 @@ typedef struct reiserfs_proc_info_data {
364 stat_cnt_t leaked_oid; 453 stat_cnt_t leaked_oid;
365 stat_cnt_t leaves_removable; 454 stat_cnt_t leaves_removable;
366 455
367 /* balances per level. Use explicit 5 as MAX_HEIGHT is not visible yet. */ 456 /*
457 * balances per level.
458 * Use explicit 5 as MAX_HEIGHT is not visible yet.
459 */
368 stat_cnt_t balance_at[5]; /* XXX */ 460 stat_cnt_t balance_at[5]; /* XXX */
369 /* sbk == search_by_key */ 461 /* sbk == search_by_key */
370 stat_cnt_t sbk_read_at[5]; /* XXX */ 462 stat_cnt_t sbk_read_at[5]; /* XXX */
@@ -416,47 +508,75 @@ typedef struct reiserfs_proc_info_data {
416 508
417/* reiserfs union of in-core super block data */ 509/* reiserfs union of in-core super block data */
418struct reiserfs_sb_info { 510struct reiserfs_sb_info {
419 struct buffer_head *s_sbh; /* Buffer containing the super block */ 511 /* Buffer containing the super block */
420 /* both the comment and the choice of 512 struct buffer_head *s_sbh;
421 name are unclear for s_rs -Hans */ 513
422 struct reiserfs_super_block *s_rs; /* Pointer to the super block in the buffer */ 514 /* Pointer to the on-disk super block in the buffer */
515 struct reiserfs_super_block *s_rs;
423 struct reiserfs_bitmap_info *s_ap_bitmap; 516 struct reiserfs_bitmap_info *s_ap_bitmap;
424 struct reiserfs_journal *s_journal; /* pointer to journal information */ 517
518 /* pointer to journal information */
519 struct reiserfs_journal *s_journal;
520
425 unsigned short s_mount_state; /* reiserfs state (valid, invalid) */ 521 unsigned short s_mount_state; /* reiserfs state (valid, invalid) */
426 522
427 /* Serialize writers access, replace the old bkl */ 523 /* Serialize writers access, replace the old bkl */
428 struct mutex lock; 524 struct mutex lock;
525
429 /* Owner of the lock (can be recursive) */ 526 /* Owner of the lock (can be recursive) */
430 struct task_struct *lock_owner; 527 struct task_struct *lock_owner;
528
431 /* Depth of the lock, start from -1 like the bkl */ 529 /* Depth of the lock, start from -1 like the bkl */
432 int lock_depth; 530 int lock_depth;
433 531
532 struct workqueue_struct *commit_wq;
533
434 /* Comment? -Hans */ 534 /* Comment? -Hans */
435 void (*end_io_handler) (struct buffer_head *, int); 535 void (*end_io_handler) (struct buffer_head *, int);
436 hashf_t s_hash_function; /* pointer to function which is used 536
437 to sort names in directory. Set on 537 /*
438 mount */ 538 * pointer to function which is used to sort names in directory.
439 unsigned long s_mount_opt; /* reiserfs's mount options are set 539 * Set on mount
440 here (currently - NOTAIL, NOLOG, 540 */
441 REPLAYONLY) */ 541 hashf_t s_hash_function;
442 542
443 struct { /* This is a structure that describes block allocator options */ 543 /* reiserfs's mount options are set here */
444 unsigned long bits; /* Bitfield for enable/disable kind of options */ 544 unsigned long s_mount_opt;
445 unsigned long large_file_size; /* size started from which we consider file to be a large one(in blocks) */ 545
546 /* This is a structure that describes block allocator options */
547 struct {
548 /* Bitfield for enable/disable kind of options */
549 unsigned long bits;
550
551 /*
552 * size started from which we consider file
553 * to be a large one (in blocks)
554 */
555 unsigned long large_file_size;
556
446 int border; /* percentage of disk, border takes */ 557 int border; /* percentage of disk, border takes */
447 int preallocmin; /* Minimal file size (in blocks) starting from which we do preallocations */ 558
448 int preallocsize; /* Number of blocks we try to prealloc when file 559 /*
449 reaches preallocmin size (in blocks) or 560 * Minimal file size (in blocks) starting
450 prealloc_list is empty. */ 561 * from which we do preallocations
562 */
563 int preallocmin;
564
565 /*
566 * Number of blocks we try to prealloc when file
567 * reaches preallocmin size (in blocks) or prealloc_list
568 is empty.
569 */
570 int preallocsize;
451 } s_alloc_options; 571 } s_alloc_options;
452 572
453 /* Comment? -Hans */ 573 /* Comment? -Hans */
454 wait_queue_head_t s_wait; 574 wait_queue_head_t s_wait;
455 /* To be obsoleted soon by per buffer seals.. -Hans */ 575 /* increased by one every time the tree gets re-balanced */
456 atomic_t s_generation_counter; // increased by one every time the 576 atomic_t s_generation_counter;
457 // tree gets re-balanced 577
458 unsigned long s_properties; /* File system properties. Currently holds 578 /* File system properties. Currently holds on-disk FS format */
459 on-disk FS format */ 579 unsigned long s_properties;
460 580
461 /* session statistics */ 581 /* session statistics */
462 int s_disk_reads; 582 int s_disk_reads;
@@ -469,14 +589,23 @@ struct reiserfs_sb_info {
469 int s_bmaps_without_search; 589 int s_bmaps_without_search;
470 int s_direct2indirect; 590 int s_direct2indirect;
471 int s_indirect2direct; 591 int s_indirect2direct;
472 /* set up when it's ok for reiserfs_read_inode2() to read from 592
473 disk inode with nlink==0. Currently this is only used during 593 /*
474 finish_unfinished() processing at mount time */ 594 * set up when it's ok for reiserfs_read_inode2() to read from
595 * disk inode with nlink==0. Currently this is only used during
596 * finish_unfinished() processing at mount time
597 */
475 int s_is_unlinked_ok; 598 int s_is_unlinked_ok;
599
476 reiserfs_proc_info_data_t s_proc_info_data; 600 reiserfs_proc_info_data_t s_proc_info_data;
477 struct proc_dir_entry *procdir; 601 struct proc_dir_entry *procdir;
478 int reserved_blocks; /* amount of blocks reserved for further allocations */ 602
479 spinlock_t bitmap_lock; /* this lock on now only used to protect reserved_blocks variable */ 603 /* amount of blocks reserved for further allocations */
604 int reserved_blocks;
605
606
607 /* this lock on now only used to protect reserved_blocks variable */
608 spinlock_t bitmap_lock;
480 struct dentry *priv_root; /* root of /.reiserfs_priv */ 609 struct dentry *priv_root; /* root of /.reiserfs_priv */
481 struct dentry *xattr_root; /* root of /.reiserfs_priv/xattrs */ 610 struct dentry *xattr_root; /* root of /.reiserfs_priv/xattrs */
482 int j_errno; 611 int j_errno;
@@ -492,14 +621,13 @@ struct reiserfs_sb_info {
492 char *s_jdev; /* Stored jdev for mount option showing */ 621 char *s_jdev; /* Stored jdev for mount option showing */
493#ifdef CONFIG_REISERFS_CHECK 622#ifdef CONFIG_REISERFS_CHECK
494 623
495 struct tree_balance *cur_tb; /* 624 /*
496 * Detects whether more than one 625 * Detects whether more than one copy of tb exists per superblock
497 * copy of tb exists per superblock 626 * as a means of checking whether do_balance is executing
498 * as a means of checking whether 627 * concurrently against another tree reader/writer on a same
499 * do_balance is executing concurrently 628 * mount point.
500 * against another tree reader/writer 629 */
501 * on a same mount point. 630 struct tree_balance *cur_tb;
502 */
503#endif 631#endif
504}; 632};
505 633
@@ -508,25 +636,36 @@ struct reiserfs_sb_info {
508#define REISERFS_3_6 1 636#define REISERFS_3_6 1
509#define REISERFS_OLD_FORMAT 2 637#define REISERFS_OLD_FORMAT 2
510 638
511enum reiserfs_mount_options {
512/* Mount options */ 639/* Mount options */
513 REISERFS_LARGETAIL, /* large tails will be created in a session */ 640enum reiserfs_mount_options {
514 REISERFS_SMALLTAIL, /* small (for files less than block size) tails will be created in a session */ 641 /* large tails will be created in a session */
515 REPLAYONLY, /* replay journal and return 0. Use by fsck */ 642 REISERFS_LARGETAIL,
516 REISERFS_CONVERT, /* -o conv: causes conversion of old 643 /*
517 format super block to the new 644 * small (for files less than block size) tails will
518 format. If not specified - old 645 * be created in a session
519 partition will be dealt with in a 646 */
520 manner of 3.5.x */ 647 REISERFS_SMALLTAIL,
521 648
522/* -o hash={tea, rupasov, r5, detect} is meant for properly mounting 649 /* replay journal and return 0. Use by fsck */
523** reiserfs disks from 3.5.19 or earlier. 99% of the time, this option 650 REPLAYONLY,
524** is not required. If the normal autodection code can't determine which 651
525** hash to use (because both hashes had the same value for a file) 652 /*
526** use this option to force a specific hash. It won't allow you to override 653 * -o conv: causes conversion of old format super block to the
527** the existing hash on the FS, so if you have a tea hash disk, and mount 654 * new format. If not specified - old partition will be dealt
528** with -o hash=rupasov, the mount will fail. 655 * with in a manner of 3.5.x
529*/ 656 */
657 REISERFS_CONVERT,
658
659 /*
660 * -o hash={tea, rupasov, r5, detect} is meant for properly mounting
661 * reiserfs disks from 3.5.19 or earlier. 99% of the time, this
662 * option is not required. If the normal autodection code can't
663 * determine which hash to use (because both hashes had the same
664 * value for a file) use this option to force a specific hash.
665 * It won't allow you to override the existing hash on the FS, so
666 * if you have a tea hash disk, and mount with -o hash=rupasov,
667 * the mount will fail.
668 */
530 FORCE_TEA_HASH, /* try to force tea hash on mount */ 669 FORCE_TEA_HASH, /* try to force tea hash on mount */
531 FORCE_RUPASOV_HASH, /* try to force rupasov hash on mount */ 670 FORCE_RUPASOV_HASH, /* try to force rupasov hash on mount */
532 FORCE_R5_HASH, /* try to force rupasov hash on mount */ 671 FORCE_R5_HASH, /* try to force rupasov hash on mount */
@@ -536,9 +675,11 @@ enum reiserfs_mount_options {
536 REISERFS_DATA_ORDERED, 675 REISERFS_DATA_ORDERED,
537 REISERFS_DATA_WRITEBACK, 676 REISERFS_DATA_WRITEBACK,
538 677
539/* used for testing experimental features, makes benchmarking new 678 /*
540 features with and without more convenient, should never be used by 679 * used for testing experimental features, makes benchmarking new
541 users in any code shipped to users (ideally) */ 680 * features with and without more convenient, should never be used by
681 * users in any code shipped to users (ideally)
682 */
542 683
543 REISERFS_NO_BORDER, 684 REISERFS_NO_BORDER,
544 REISERFS_NO_UNHASHED_RELOCATION, 685 REISERFS_NO_UNHASHED_RELOCATION,
@@ -705,28 +846,28 @@ static inline void reiserfs_cond_resched(struct super_block *s)
705 846
706struct fid; 847struct fid;
707 848
708/* in reading the #defines, it may help to understand that they employ 849/*
709 the following abbreviations: 850 * in reading the #defines, it may help to understand that they employ
710 851 * the following abbreviations:
711 B = Buffer 852 *
712 I = Item header 853 * B = Buffer
713 H = Height within the tree (should be changed to LEV) 854 * I = Item header
714 N = Number of the item in the node 855 * H = Height within the tree (should be changed to LEV)
715 STAT = stat data 856 * N = Number of the item in the node
716 DEH = Directory Entry Header 857 * STAT = stat data
717 EC = Entry Count 858 * DEH = Directory Entry Header
718 E = Entry number 859 * EC = Entry Count
719 UL = Unsigned Long 860 * E = Entry number
720 BLKH = BLocK Header 861 * UL = Unsigned Long
721 UNFM = UNForMatted node 862 * BLKH = BLocK Header
722 DC = Disk Child 863 * UNFM = UNForMatted node
723 P = Path 864 * DC = Disk Child
724 865 * P = Path
725 These #defines are named by concatenating these abbreviations, 866 *
726 where first comes the arguments, and last comes the return value, 867 * These #defines are named by concatenating these abbreviations,
727 of the macro. 868 * where first comes the arguments, and last comes the return value,
728 869 * of the macro.
729*/ 870 */
730 871
731#define USE_INODE_GENERATION_COUNTER 872#define USE_INODE_GENERATION_COUNTER
732 873
@@ -737,14 +878,17 @@ struct fid;
737/* n must be power of 2 */ 878/* n must be power of 2 */
738#define _ROUND_UP(x,n) (((x)+(n)-1u) & ~((n)-1u)) 879#define _ROUND_UP(x,n) (((x)+(n)-1u) & ~((n)-1u))
739 880
740// to be ok for alpha and others we have to align structures to 8 byte 881/*
741// boundary. 882 * to be ok for alpha and others we have to align structures to 8 byte
742// FIXME: do not change 4 by anything else: there is code which relies on that 883 * boundary.
884 * FIXME: do not change 4 by anything else: there is code which relies on that
885 */
743#define ROUND_UP(x) _ROUND_UP(x,8LL) 886#define ROUND_UP(x) _ROUND_UP(x,8LL)
744 887
745/* debug levels. Right now, CONFIG_REISERFS_CHECK means print all debug 888/*
746** messages. 889 * debug levels. Right now, CONFIG_REISERFS_CHECK means print all debug
747*/ 890 * messages.
891 */
748#define REISERFS_DEBUG_CODE 5 /* extra messages to help find/debug errors */ 892#define REISERFS_DEBUG_CODE 5 /* extra messages to help find/debug errors */
749 893
750void __reiserfs_warning(struct super_block *s, const char *id, 894void __reiserfs_warning(struct super_block *s, const char *id,
@@ -753,7 +897,7 @@ void __reiserfs_warning(struct super_block *s, const char *id,
753 __reiserfs_warning(s, id, __func__, fmt, ##args) 897 __reiserfs_warning(s, id, __func__, fmt, ##args)
754/* assertions handling */ 898/* assertions handling */
755 899
756/** always check a condition and panic if it's false. */ 900/* always check a condition and panic if it's false. */
757#define __RASSERT(cond, scond, format, args...) \ 901#define __RASSERT(cond, scond, format, args...) \
758do { \ 902do { \
759 if (!(cond)) \ 903 if (!(cond)) \
@@ -776,35 +920,48 @@ do { \
776 * Disk Data Structures 920 * Disk Data Structures
777 */ 921 */
778 922
779/***************************************************************************/ 923/***************************************************************************
780/* SUPER BLOCK */ 924 * SUPER BLOCK *
781/***************************************************************************/ 925 ***************************************************************************/
782 926
783/* 927/*
784 * Structure of super block on disk, a version of which in RAM is often accessed as REISERFS_SB(s)->s_rs 928 * Structure of super block on disk, a version of which in RAM is often
785 * the version in RAM is part of a larger structure containing fields never written to disk. 929 * accessed as REISERFS_SB(s)->s_rs. The version in RAM is part of a larger
930 * structure containing fields never written to disk.
786 */ 931 */
787#define UNSET_HASH 0 // read_super will guess about, what hash names 932#define UNSET_HASH 0 /* Detect hash on disk */
788 // in directories were sorted with
789#define TEA_HASH 1 933#define TEA_HASH 1
790#define YURA_HASH 2 934#define YURA_HASH 2
791#define R5_HASH 3 935#define R5_HASH 3
792#define DEFAULT_HASH R5_HASH 936#define DEFAULT_HASH R5_HASH
793 937
794struct journal_params { 938struct journal_params {
795 __le32 jp_journal_1st_block; /* where does journal start from on its 939 /* where does journal start from on its * device */
796 * device */ 940 __le32 jp_journal_1st_block;
797 __le32 jp_journal_dev; /* journal device st_rdev */ 941
798 __le32 jp_journal_size; /* size of the journal */ 942 /* journal device st_rdev */
799 __le32 jp_journal_trans_max; /* max number of blocks in a transaction. */ 943 __le32 jp_journal_dev;
800 __le32 jp_journal_magic; /* random value made on fs creation (this 944
801 * was sb_journal_block_count) */ 945 /* size of the journal */
802 __le32 jp_journal_max_batch; /* max number of blocks to batch into a 946 __le32 jp_journal_size;
803 * trans */ 947
804 __le32 jp_journal_max_commit_age; /* in seconds, how old can an async 948 /* max number of blocks in a transaction. */
805 * commit be */ 949 __le32 jp_journal_trans_max;
806 __le32 jp_journal_max_trans_age; /* in seconds, how old can a transaction 950
807 * be */ 951 /*
952 * random value made on fs creation
953 * (this was sb_journal_block_count)
954 */
955 __le32 jp_journal_magic;
956
957 /* max number of blocks to batch into a trans */
958 __le32 jp_journal_max_batch;
959
960 /* in seconds, how old can an async commit be */
961 __le32 jp_journal_max_commit_age;
962
963 /* in seconds, how old can a transaction be */
964 __le32 jp_journal_max_trans_age;
808}; 965};
809 966
810/* this is the super from 3.5.X, where X >= 10 */ 967/* this is the super from 3.5.X, where X >= 10 */
@@ -814,26 +971,48 @@ struct reiserfs_super_block_v1 {
814 __le32 s_root_block; /* root block number */ 971 __le32 s_root_block; /* root block number */
815 struct journal_params s_journal; 972 struct journal_params s_journal;
816 __le16 s_blocksize; /* block size */ 973 __le16 s_blocksize; /* block size */
817 __le16 s_oid_maxsize; /* max size of object id array, see 974
818 * get_objectid() commentary */ 975 /* max size of object id array, see get_objectid() commentary */
976 __le16 s_oid_maxsize;
819 __le16 s_oid_cursize; /* current size of object id array */ 977 __le16 s_oid_cursize; /* current size of object id array */
820 __le16 s_umount_state; /* this is set to 1 when filesystem was 978
821 * umounted, to 2 - when not */ 979 /* this is set to 1 when filesystem was umounted, to 2 - when not */
822 char s_magic[10]; /* reiserfs magic string indicates that 980 __le16 s_umount_state;
823 * file system is reiserfs: 981
824 * "ReIsErFs" or "ReIsEr2Fs" or "ReIsEr3Fs" */ 982 /*
825 __le16 s_fs_state; /* it is set to used by fsck to mark which 983 * reiserfs magic string indicates that file system is reiserfs:
826 * phase of rebuilding is done */ 984 * "ReIsErFs" or "ReIsEr2Fs" or "ReIsEr3Fs"
827 __le32 s_hash_function_code; /* indicate, what hash function is being use 985 */
828 * to sort names in a directory*/ 986 char s_magic[10];
987
988 /*
989 * it is set to used by fsck to mark which
990 * phase of rebuilding is done
991 */
992 __le16 s_fs_state;
993 /*
994 * indicate, what hash function is being use
995 * to sort names in a directory
996 */
997 __le32 s_hash_function_code;
829 __le16 s_tree_height; /* height of disk tree */ 998 __le16 s_tree_height; /* height of disk tree */
830 __le16 s_bmap_nr; /* amount of bitmap blocks needed to address 999
831 * each block of file system */ 1000 /*
832 __le16 s_version; /* this field is only reliable on filesystem 1001 * amount of bitmap blocks needed to address
833 * with non-standard journal */ 1002 * each block of file system
834 __le16 s_reserved_for_journal; /* size in blocks of journal area on main 1003 */
835 * device, we need to keep after 1004 __le16 s_bmap_nr;
836 * making fs with non-standard journal */ 1005
1006 /*
1007 * this field is only reliable on filesystem with non-standard journal
1008 */
1009 __le16 s_version;
1010
1011 /*
1012 * size in blocks of journal area on main device, we need to
1013 * keep after making fs with non-standard journal
1014 */
1015 __le16 s_reserved_for_journal;
837} __attribute__ ((__packed__)); 1016} __attribute__ ((__packed__));
838 1017
839#define SB_SIZE_V1 (sizeof(struct reiserfs_super_block_v1)) 1018#define SB_SIZE_V1 (sizeof(struct reiserfs_super_block_v1))
@@ -842,17 +1021,21 @@ struct reiserfs_super_block_v1 {
842struct reiserfs_super_block { 1021struct reiserfs_super_block {
843 struct reiserfs_super_block_v1 s_v1; 1022 struct reiserfs_super_block_v1 s_v1;
844 __le32 s_inode_generation; 1023 __le32 s_inode_generation;
845 __le32 s_flags; /* Right now used only by inode-attributes, if enabled */ 1024
1025 /* Right now used only by inode-attributes, if enabled */
1026 __le32 s_flags;
1027
846 unsigned char s_uuid[16]; /* filesystem unique identifier */ 1028 unsigned char s_uuid[16]; /* filesystem unique identifier */
847 unsigned char s_label[16]; /* filesystem volume label */ 1029 unsigned char s_label[16]; /* filesystem volume label */
848 __le16 s_mnt_count; /* Count of mounts since last fsck */ 1030 __le16 s_mnt_count; /* Count of mounts since last fsck */
849 __le16 s_max_mnt_count; /* Maximum mounts before check */ 1031 __le16 s_max_mnt_count; /* Maximum mounts before check */
850 __le32 s_lastcheck; /* Timestamp of last fsck */ 1032 __le32 s_lastcheck; /* Timestamp of last fsck */
851 __le32 s_check_interval; /* Interval between checks */ 1033 __le32 s_check_interval; /* Interval between checks */
852 char s_unused[76]; /* zero filled by mkreiserfs and 1034
853 * reiserfs_convert_objectid_map_v1() 1035 /*
854 * so any additions must be updated 1036 * zero filled by mkreiserfs and reiserfs_convert_objectid_map_v1()
855 * there as well. */ 1037 * so any additions must be updated there as well. */
1038 char s_unused[76];
856} __attribute__ ((__packed__)); 1039} __attribute__ ((__packed__));
857 1040
858#define SB_SIZE (sizeof(struct reiserfs_super_block)) 1041#define SB_SIZE (sizeof(struct reiserfs_super_block))
@@ -860,7 +1043,7 @@ struct reiserfs_super_block {
860#define REISERFS_VERSION_1 0 1043#define REISERFS_VERSION_1 0
861#define REISERFS_VERSION_2 2 1044#define REISERFS_VERSION_2 2
862 1045
863// on-disk super block fields converted to cpu form 1046/* on-disk super block fields converted to cpu form */
864#define SB_DISK_SUPER_BLOCK(s) (REISERFS_SB(s)->s_rs) 1047#define SB_DISK_SUPER_BLOCK(s) (REISERFS_SB(s)->s_rs)
865#define SB_V1_DISK_SUPER_BLOCK(s) (&(SB_DISK_SUPER_BLOCK(s)->s_v1)) 1048#define SB_V1_DISK_SUPER_BLOCK(s) (&(SB_DISK_SUPER_BLOCK(s)->s_v1))
866#define SB_BLOCKSIZE(s) \ 1049#define SB_BLOCKSIZE(s) \
@@ -915,11 +1098,13 @@ int is_reiserfs_3_5(struct reiserfs_super_block *rs);
915int is_reiserfs_3_6(struct reiserfs_super_block *rs); 1098int is_reiserfs_3_6(struct reiserfs_super_block *rs);
916int is_reiserfs_jr(struct reiserfs_super_block *rs); 1099int is_reiserfs_jr(struct reiserfs_super_block *rs);
917 1100
918/* ReiserFS leaves the first 64k unused, so that partition labels have 1101/*
919 enough space. If someone wants to write a fancy bootloader that 1102 * ReiserFS leaves the first 64k unused, so that partition labels have
920 needs more than 64k, let us know, and this will be increased in size. 1103 * enough space. If someone wants to write a fancy bootloader that
921 This number must be larger than than the largest block size on any 1104 * needs more than 64k, let us know, and this will be increased in size.
922 platform, or code will break. -Hans */ 1105 * This number must be larger than than the largest block size on any
1106 * platform, or code will break. -Hans
1107 */
923#define REISERFS_DISK_OFFSET_IN_BYTES (64 * 1024) 1108#define REISERFS_DISK_OFFSET_IN_BYTES (64 * 1024)
924#define REISERFS_FIRST_BLOCK unused_define 1109#define REISERFS_FIRST_BLOCK unused_define
925#define REISERFS_JOURNAL_OFFSET_IN_BYTES REISERFS_DISK_OFFSET_IN_BYTES 1110#define REISERFS_JOURNAL_OFFSET_IN_BYTES REISERFS_DISK_OFFSET_IN_BYTES
@@ -944,8 +1129,7 @@ struct unfm_nodeinfo {
944 unsigned short unfm_freespace; 1129 unsigned short unfm_freespace;
945}; 1130};
946 1131
947/* there are two formats of keys: 3.5 and 3.6 1132/* there are two formats of keys: 3.5 and 3.6 */
948 */
949#define KEY_FORMAT_3_5 0 1133#define KEY_FORMAT_3_5 0
950#define KEY_FORMAT_3_6 1 1134#define KEY_FORMAT_3_6 1
951 1135
@@ -963,8 +1147,10 @@ static inline struct reiserfs_sb_info *REISERFS_SB(const struct super_block *sb)
963 return sb->s_fs_info; 1147 return sb->s_fs_info;
964} 1148}
965 1149
966/* Don't trust REISERFS_SB(sb)->s_bmap_nr, it's a u16 1150/*
967 * which overflows on large file systems. */ 1151 * Don't trust REISERFS_SB(sb)->s_bmap_nr, it's a u16
1152 * which overflows on large file systems.
1153 */
968static inline __u32 reiserfs_bmap_count(struct super_block *sb) 1154static inline __u32 reiserfs_bmap_count(struct super_block *sb)
969{ 1155{
970 return (SB_BLOCK_COUNT(sb) - 1) / (sb->s_blocksize * 8) + 1; 1156 return (SB_BLOCK_COUNT(sb) - 1) / (sb->s_blocksize * 8) + 1;
@@ -975,8 +1161,10 @@ static inline int bmap_would_wrap(unsigned bmap_nr)
975 return bmap_nr > ((1LL << 16) - 1); 1161 return bmap_nr > ((1LL << 16) - 1);
976} 1162}
977 1163
978/** this says about version of key of all items (but stat data) the 1164/*
979 object consists of */ 1165 * this says about version of key of all items (but stat data) the
1166 * object consists of
1167 */
980#define get_inode_item_key_version( inode ) \ 1168#define get_inode_item_key_version( inode ) \
981 ((REISERFS_I(inode)->i_flags & i_item_key_version_mask) ? KEY_FORMAT_3_6 : KEY_FORMAT_3_5) 1169 ((REISERFS_I(inode)->i_flags & i_item_key_version_mask) ? KEY_FORMAT_3_6 : KEY_FORMAT_3_5)
982 1170
@@ -995,16 +1183,18 @@ static inline int bmap_would_wrap(unsigned bmap_nr)
995 else \ 1183 else \
996 REISERFS_I(inode)->i_flags &= ~i_stat_data_version_mask; }) 1184 REISERFS_I(inode)->i_flags &= ~i_stat_data_version_mask; })
997 1185
998/* This is an aggressive tail suppression policy, I am hoping it 1186/*
999 improves our benchmarks. The principle behind it is that percentage 1187 * This is an aggressive tail suppression policy, I am hoping it
1000 space saving is what matters, not absolute space saving. This is 1188 * improves our benchmarks. The principle behind it is that percentage
1001 non-intuitive, but it helps to understand it if you consider that the 1189 * space saving is what matters, not absolute space saving. This is
1002 cost to access 4 blocks is not much more than the cost to access 1 1190 * non-intuitive, but it helps to understand it if you consider that the
1003 block, if you have to do a seek and rotate. A tail risks a 1191 * cost to access 4 blocks is not much more than the cost to access 1
1004 non-linear disk access that is significant as a percentage of total 1192 * block, if you have to do a seek and rotate. A tail risks a
1005 time cost for a 4 block file and saves an amount of space that is 1193 * non-linear disk access that is significant as a percentage of total
1006 less significant as a percentage of space, or so goes the hypothesis. 1194 * time cost for a 4 block file and saves an amount of space that is
1007 -Hans */ 1195 * less significant as a percentage of space, or so goes the hypothesis.
1196 * -Hans
1197 */
1008#define STORE_TAIL_IN_UNFM_S1(n_file_size,n_tail_size,n_block_size) \ 1198#define STORE_TAIL_IN_UNFM_S1(n_file_size,n_tail_size,n_block_size) \
1009(\ 1199(\
1010 (!(n_tail_size)) || \ 1200 (!(n_tail_size)) || \
@@ -1018,10 +1208,11 @@ static inline int bmap_would_wrap(unsigned bmap_nr)
1018 ( (n_tail_size) >= (MAX_DIRECT_ITEM_LEN(n_block_size) * 3)/4) ) ) \ 1208 ( (n_tail_size) >= (MAX_DIRECT_ITEM_LEN(n_block_size) * 3)/4) ) ) \
1019) 1209)
1020 1210
1021/* Another strategy for tails, this one means only create a tail if all the 1211/*
1022 file would fit into one DIRECT item. 1212 * Another strategy for tails, this one means only create a tail if all the
1023 Primary intention for this one is to increase performance by decreasing 1213 * file would fit into one DIRECT item.
1024 seeking. 1214 * Primary intention for this one is to increase performance by decreasing
1215 * seeking.
1025*/ 1216*/
1026#define STORE_TAIL_IN_UNFM_S2(n_file_size,n_tail_size,n_block_size) \ 1217#define STORE_TAIL_IN_UNFM_S2(n_file_size,n_tail_size,n_block_size) \
1027(\ 1218(\
@@ -1035,23 +1226,21 @@ static inline int bmap_would_wrap(unsigned bmap_nr)
1035#define REISERFS_VALID_FS 1 1226#define REISERFS_VALID_FS 1
1036#define REISERFS_ERROR_FS 2 1227#define REISERFS_ERROR_FS 2
1037 1228
1038// 1229/*
1039// there are 5 item types currently 1230 * there are 5 item types currently
1040// 1231 */
1041#define TYPE_STAT_DATA 0 1232#define TYPE_STAT_DATA 0
1042#define TYPE_INDIRECT 1 1233#define TYPE_INDIRECT 1
1043#define TYPE_DIRECT 2 1234#define TYPE_DIRECT 2
1044#define TYPE_DIRENTRY 3 1235#define TYPE_DIRENTRY 3
1045#define TYPE_MAXTYPE 3 1236#define TYPE_MAXTYPE 3
1046#define TYPE_ANY 15 // FIXME: comment is required 1237#define TYPE_ANY 15 /* FIXME: comment is required */
1047 1238
1048/***************************************************************************/ 1239/***************************************************************************
1049/* KEY & ITEM HEAD */ 1240 * KEY & ITEM HEAD *
1050/***************************************************************************/ 1241 ***************************************************************************/
1051 1242
1052// 1243/* * directories use this key as well as old files */
1053// directories use this key as well as old files
1054//
1055struct offset_v1 { 1244struct offset_v1 {
1056 __le32 k_offset; 1245 __le32 k_offset;
1057 __le32 k_uniqueness; 1246 __le32 k_uniqueness;
@@ -1084,11 +1273,14 @@ static inline void set_offset_v2_k_offset(struct offset_v2 *v2, loff_t offset)
1084 v2->v = (v2->v & cpu_to_le64(15ULL << 60)) | cpu_to_le64(offset); 1273 v2->v = (v2->v & cpu_to_le64(15ULL << 60)) | cpu_to_le64(offset);
1085} 1274}
1086 1275
1087/* Key of an item determines its location in the S+tree, and 1276/*
1088 is composed of 4 components */ 1277 * Key of an item determines its location in the S+tree, and
1278 * is composed of 4 components
1279 */
1089struct reiserfs_key { 1280struct reiserfs_key {
1090 __le32 k_dir_id; /* packing locality: by default parent 1281 /* packing locality: by default parent directory object id */
1091 directory object id */ 1282 __le32 k_dir_id;
1283
1092 __le32 k_objectid; /* object identifier */ 1284 __le32 k_objectid; /* object identifier */
1093 union { 1285 union {
1094 struct offset_v1 k_offset_v1; 1286 struct offset_v1 k_offset_v1;
@@ -1097,8 +1289,8 @@ struct reiserfs_key {
1097} __attribute__ ((__packed__)); 1289} __attribute__ ((__packed__));
1098 1290
1099struct in_core_key { 1291struct in_core_key {
1100 __u32 k_dir_id; /* packing locality: by default parent 1292 /* packing locality: by default parent directory object id */
1101 directory object id */ 1293 __u32 k_dir_id;
1102 __u32 k_objectid; /* object identifier */ 1294 __u32 k_objectid; /* object identifier */
1103 __u64 k_offset; 1295 __u64 k_offset;
1104 __u8 k_type; 1296 __u8 k_type;
@@ -1107,14 +1299,16 @@ struct in_core_key {
1107struct cpu_key { 1299struct cpu_key {
1108 struct in_core_key on_disk_key; 1300 struct in_core_key on_disk_key;
1109 int version; 1301 int version;
1110 int key_length; /* 3 in all cases but direct2indirect and 1302 /* 3 in all cases but direct2indirect and indirect2direct conversion */
1111 indirect2direct conversion */ 1303 int key_length;
1112}; 1304};
1113 1305
1114/* Our function for comparing keys can compare keys of different 1306/*
1115 lengths. It takes as a parameter the length of the keys it is to 1307 * Our function for comparing keys can compare keys of different
1116 compare. These defines are used in determining what is to be passed 1308 * lengths. It takes as a parameter the length of the keys it is to
1117 to it as that parameter. */ 1309 * compare. These defines are used in determining what is to be passed
1310 * to it as that parameter.
1311 */
1118#define REISERFS_FULL_KEY_LEN 4 1312#define REISERFS_FULL_KEY_LEN 4
1119#define REISERFS_SHORT_KEY_LEN 2 1313#define REISERFS_SHORT_KEY_LEN 2
1120 1314
@@ -1143,40 +1337,52 @@ struct cpu_key {
1143#define POSITION_FOUND 1 1337#define POSITION_FOUND 1
1144#define POSITION_NOT_FOUND 0 1338#define POSITION_NOT_FOUND 0
1145 1339
1146// return values for reiserfs_find_entry and search_by_entry_key 1340/* return values for reiserfs_find_entry and search_by_entry_key */
1147#define NAME_FOUND 1 1341#define NAME_FOUND 1
1148#define NAME_NOT_FOUND 0 1342#define NAME_NOT_FOUND 0
1149#define GOTO_PREVIOUS_ITEM 2 1343#define GOTO_PREVIOUS_ITEM 2
1150#define NAME_FOUND_INVISIBLE 3 1344#define NAME_FOUND_INVISIBLE 3
1151 1345
1152/* Everything in the filesystem is stored as a set of items. The 1346/*
1153 item head contains the key of the item, its free space (for 1347 * Everything in the filesystem is stored as a set of items. The
1154 indirect items) and specifies the location of the item itself 1348 * item head contains the key of the item, its free space (for
1155 within the block. */ 1349 * indirect items) and specifies the location of the item itself
1350 * within the block.
1351 */
1156 1352
1157struct item_head { 1353struct item_head {
1158 /* Everything in the tree is found by searching for it based on 1354 /*
1159 * its key.*/ 1355 * Everything in the tree is found by searching for it based on
1356 * its key.
1357 */
1160 struct reiserfs_key ih_key; 1358 struct reiserfs_key ih_key;
1161 union { 1359 union {
1162 /* The free space in the last unformatted node of an 1360 /*
1163 indirect item if this is an indirect item. This 1361 * The free space in the last unformatted node of an
1164 equals 0xFFFF iff this is a direct item or stat data 1362 * indirect item if this is an indirect item. This
1165 item. Note that the key, not this field, is used to 1363 * equals 0xFFFF iff this is a direct item or stat data
1166 determine the item type, and thus which field this 1364 * item. Note that the key, not this field, is used to
1167 union contains. */ 1365 * determine the item type, and thus which field this
1366 * union contains.
1367 */
1168 __le16 ih_free_space_reserved; 1368 __le16 ih_free_space_reserved;
1169 /* Iff this is a directory item, this field equals the 1369
1170 number of directory entries in the directory item. */ 1370 /*
1371 * Iff this is a directory item, this field equals the
1372 * number of directory entries in the directory item.
1373 */
1171 __le16 ih_entry_count; 1374 __le16 ih_entry_count;
1172 } __attribute__ ((__packed__)) u; 1375 } __attribute__ ((__packed__)) u;
1173 __le16 ih_item_len; /* total size of the item body */ 1376 __le16 ih_item_len; /* total size of the item body */
1174 __le16 ih_item_location; /* an offset to the item body 1377
1175 * within the block */ 1378 /* an offset to the item body within the block */
1176 __le16 ih_version; /* 0 for all old items, 2 for new 1379 __le16 ih_item_location;
1177 ones. Highest bit is set by fsck 1380
1178 temporary, cleaned after all 1381 /*
1179 done */ 1382 * 0 for all old items, 2 for new ones. Highest bit is set by fsck
1383 * temporary, cleaned after all done
1384 */
1385 __le16 ih_version;
1180} __attribute__ ((__packed__)); 1386} __attribute__ ((__packed__));
1181/* size of item header */ 1387/* size of item header */
1182#define IH_SIZE (sizeof(struct item_head)) 1388#define IH_SIZE (sizeof(struct item_head))
@@ -1198,27 +1404,24 @@ struct item_head {
1198#define get_ih_free_space(ih) (ih_version (ih) == KEY_FORMAT_3_6 ? 0 : ih_free_space (ih)) 1404#define get_ih_free_space(ih) (ih_version (ih) == KEY_FORMAT_3_6 ? 0 : ih_free_space (ih))
1199#define set_ih_free_space(ih,val) put_ih_free_space((ih), ((ih_version(ih) == KEY_FORMAT_3_6) ? 0 : (val))) 1405#define set_ih_free_space(ih,val) put_ih_free_space((ih), ((ih_version(ih) == KEY_FORMAT_3_6) ? 0 : (val)))
1200 1406
1201/* these operate on indirect items, where you've got an array of ints 1407/*
1202** at a possibly unaligned location. These are a noop on ia32 1408 * these operate on indirect items, where you've got an array of ints
1203** 1409 * at a possibly unaligned location. These are a noop on ia32
1204** p is the array of __u32, i is the index into the array, v is the value 1410 *
1205** to store there. 1411 * p is the array of __u32, i is the index into the array, v is the value
1206*/ 1412 * to store there.
1413 */
1207#define get_block_num(p, i) get_unaligned_le32((p) + (i)) 1414#define get_block_num(p, i) get_unaligned_le32((p) + (i))
1208#define put_block_num(p, i, v) put_unaligned_le32((v), (p) + (i)) 1415#define put_block_num(p, i, v) put_unaligned_le32((v), (p) + (i))
1209 1416
1210// 1417/* * in old version uniqueness field shows key type */
1211// in old version uniqueness field shows key type
1212//
1213#define V1_SD_UNIQUENESS 0 1418#define V1_SD_UNIQUENESS 0
1214#define V1_INDIRECT_UNIQUENESS 0xfffffffe 1419#define V1_INDIRECT_UNIQUENESS 0xfffffffe
1215#define V1_DIRECT_UNIQUENESS 0xffffffff 1420#define V1_DIRECT_UNIQUENESS 0xffffffff
1216#define V1_DIRENTRY_UNIQUENESS 500 1421#define V1_DIRENTRY_UNIQUENESS 500
1217#define V1_ANY_UNIQUENESS 555 // FIXME: comment is required 1422#define V1_ANY_UNIQUENESS 555 /* FIXME: comment is required */
1218 1423
1219// 1424/* here are conversion routines */
1220// here are conversion routines
1221//
1222static inline int uniqueness2type(__u32 uniqueness) CONSTF; 1425static inline int uniqueness2type(__u32 uniqueness) CONSTF;
1223static inline int uniqueness2type(__u32 uniqueness) 1426static inline int uniqueness2type(__u32 uniqueness)
1224{ 1427{
@@ -1255,11 +1458,11 @@ static inline __u32 type2uniqueness(int type)
1255 } 1458 }
1256} 1459}
1257 1460
1258// 1461/*
1259// key is pointer to on disk key which is stored in le, result is cpu, 1462 * key is pointer to on disk key which is stored in le, result is cpu,
1260// there is no way to get version of object from key, so, provide 1463 * there is no way to get version of object from key, so, provide
1261// version to these defines 1464 * version to these defines
1262// 1465 */
1263static inline loff_t le_key_k_offset(int version, 1466static inline loff_t le_key_k_offset(int version,
1264 const struct reiserfs_key *key) 1467 const struct reiserfs_key *key)
1265{ 1468{
@@ -1275,9 +1478,11 @@ static inline loff_t le_ih_k_offset(const struct item_head *ih)
1275 1478
1276static inline loff_t le_key_k_type(int version, const struct reiserfs_key *key) 1479static inline loff_t le_key_k_type(int version, const struct reiserfs_key *key)
1277{ 1480{
1278 return (version == KEY_FORMAT_3_5) ? 1481 if (version == KEY_FORMAT_3_5) {
1279 uniqueness2type(le32_to_cpu(key->u.k_offset_v1.k_uniqueness)) : 1482 loff_t val = le32_to_cpu(key->u.k_offset_v1.k_uniqueness);
1280 offset_v2_k_type(&(key->u.k_offset_v2)); 1483 return uniqueness2type(val);
1484 } else
1485 return offset_v2_k_type(&(key->u.k_offset_v2));
1281} 1486}
1282 1487
1283static inline loff_t le_ih_k_type(const struct item_head *ih) 1488static inline loff_t le_ih_k_type(const struct item_head *ih)
@@ -1288,8 +1493,22 @@ static inline loff_t le_ih_k_type(const struct item_head *ih)
1288static inline void set_le_key_k_offset(int version, struct reiserfs_key *key, 1493static inline void set_le_key_k_offset(int version, struct reiserfs_key *key,
1289 loff_t offset) 1494 loff_t offset)
1290{ 1495{
1291 (version == KEY_FORMAT_3_5) ? (void)(key->u.k_offset_v1.k_offset = cpu_to_le32(offset)) : /* jdm check */ 1496 if (version == KEY_FORMAT_3_5)
1292 (void)(set_offset_v2_k_offset(&(key->u.k_offset_v2), offset)); 1497 key->u.k_offset_v1.k_offset = cpu_to_le32(offset);
1498 else
1499 set_offset_v2_k_offset(&key->u.k_offset_v2, offset);
1500}
1501
1502static inline void add_le_key_k_offset(int version, struct reiserfs_key *key,
1503 loff_t offset)
1504{
1505 set_le_key_k_offset(version, key,
1506 le_key_k_offset(version, key) + offset);
1507}
1508
1509static inline void add_le_ih_k_offset(struct item_head *ih, loff_t offset)
1510{
1511 add_le_key_k_offset(ih_version(ih), &(ih->ih_key), offset);
1293} 1512}
1294 1513
1295static inline void set_le_ih_k_offset(struct item_head *ih, loff_t offset) 1514static inline void set_le_ih_k_offset(struct item_head *ih, loff_t offset)
@@ -1300,10 +1519,11 @@ static inline void set_le_ih_k_offset(struct item_head *ih, loff_t offset)
1300static inline void set_le_key_k_type(int version, struct reiserfs_key *key, 1519static inline void set_le_key_k_type(int version, struct reiserfs_key *key,
1301 int type) 1520 int type)
1302{ 1521{
1303 (version == KEY_FORMAT_3_5) ? 1522 if (version == KEY_FORMAT_3_5) {
1304 (void)(key->u.k_offset_v1.k_uniqueness = 1523 type = type2uniqueness(type);
1305 cpu_to_le32(type2uniqueness(type))) 1524 key->u.k_offset_v1.k_uniqueness = cpu_to_le32(type);
1306 : (void)(set_offset_v2_k_type(&(key->u.k_offset_v2), type)); 1525 } else
1526 set_offset_v2_k_type(&key->u.k_offset_v2, type);
1307} 1527}
1308 1528
1309static inline void set_le_ih_k_type(struct item_head *ih, int type) 1529static inline void set_le_ih_k_type(struct item_head *ih, int type)
@@ -1331,9 +1551,7 @@ static inline int is_statdata_le_key(int version, struct reiserfs_key *key)
1331 return le_key_k_type(version, key) == TYPE_STAT_DATA; 1551 return le_key_k_type(version, key) == TYPE_STAT_DATA;
1332} 1552}
1333 1553
1334// 1554/* item header has version. */
1335// item header has version.
1336//
1337static inline int is_direntry_le_ih(struct item_head *ih) 1555static inline int is_direntry_le_ih(struct item_head *ih)
1338{ 1556{
1339 return is_direntry_le_key(ih_version(ih), &ih->ih_key); 1557 return is_direntry_le_key(ih_version(ih), &ih->ih_key);
@@ -1354,9 +1572,7 @@ static inline int is_statdata_le_ih(struct item_head *ih)
1354 return is_statdata_le_key(ih_version(ih), &ih->ih_key); 1572 return is_statdata_le_key(ih_version(ih), &ih->ih_key);
1355} 1573}
1356 1574
1357// 1575/* key is pointer to cpu key, result is cpu */
1358// key is pointer to cpu key, result is cpu
1359//
1360static inline loff_t cpu_key_k_offset(const struct cpu_key *key) 1576static inline loff_t cpu_key_k_offset(const struct cpu_key *key)
1361{ 1577{
1362 return key->on_disk_key.k_offset; 1578 return key->on_disk_key.k_offset;
@@ -1407,7 +1623,7 @@ static inline void cpu_key_k_offset_dec(struct cpu_key *key)
1407 1623
1408extern struct reiserfs_key root_key; 1624extern struct reiserfs_key root_key;
1409 1625
1410/* 1626/*
1411 * Picture represents a leaf of the S+tree 1627 * Picture represents a leaf of the S+tree
1412 * ______________________________________________________ 1628 * ______________________________________________________
1413 * | | Array of | | | 1629 * | | Array of | | |
@@ -1416,15 +1632,19 @@ extern struct reiserfs_key root_key;
1416 * |______|_______________|___________________|___________| 1632 * |______|_______________|___________________|___________|
1417 */ 1633 */
1418 1634
1419/* Header of a disk block. More precisely, header of a formatted leaf 1635/*
1420 or internal node, and not the header of an unformatted node. */ 1636 * Header of a disk block. More precisely, header of a formatted leaf
1637 * or internal node, and not the header of an unformatted node.
1638 */
1421struct block_head { 1639struct block_head {
1422 __le16 blk_level; /* Level of a block in the tree. */ 1640 __le16 blk_level; /* Level of a block in the tree. */
1423 __le16 blk_nr_item; /* Number of keys/items in a block. */ 1641 __le16 blk_nr_item; /* Number of keys/items in a block. */
1424 __le16 blk_free_space; /* Block free space in bytes. */ 1642 __le16 blk_free_space; /* Block free space in bytes. */
1425 __le16 blk_reserved; 1643 __le16 blk_reserved;
1426 /* dump this in v4/planA */ 1644 /* dump this in v4/planA */
1427 struct reiserfs_key blk_right_delim_key; /* kept only for compatibility */ 1645
1646 /* kept only for compatibility */
1647 struct reiserfs_key blk_right_delim_key;
1428}; 1648};
1429 1649
1430#define BLKH_SIZE (sizeof(struct block_head)) 1650#define BLKH_SIZE (sizeof(struct block_head))
@@ -1439,18 +1659,20 @@ struct block_head {
1439#define blkh_right_delim_key(p_blkh) ((p_blkh)->blk_right_delim_key) 1659#define blkh_right_delim_key(p_blkh) ((p_blkh)->blk_right_delim_key)
1440#define set_blkh_right_delim_key(p_blkh,val) ((p_blkh)->blk_right_delim_key = val) 1660#define set_blkh_right_delim_key(p_blkh,val) ((p_blkh)->blk_right_delim_key = val)
1441 1661
1662/* values for blk_level field of the struct block_head */
1663
1442/* 1664/*
1443 * values for blk_level field of the struct block_head 1665 * When node gets removed from the tree its blk_level is set to FREE_LEVEL.
1666 * It is then used to see whether the node is still in the tree
1444 */ 1667 */
1445 1668#define FREE_LEVEL 0
1446#define FREE_LEVEL 0 /* when node gets removed from the tree its
1447 blk_level is set to FREE_LEVEL. It is then
1448 used to see whether the node is still in the
1449 tree */
1450 1669
1451#define DISK_LEAF_NODE_LEVEL 1 /* Leaf node level. */ 1670#define DISK_LEAF_NODE_LEVEL 1 /* Leaf node level. */
1452 1671
1453/* Given the buffer head of a formatted node, resolve to the block head of that node. */ 1672/*
1673 * Given the buffer head of a formatted node, resolve to the
1674 * block head of that node.
1675 */
1454#define B_BLK_HEAD(bh) ((struct block_head *)((bh)->b_data)) 1676#define B_BLK_HEAD(bh) ((struct block_head *)((bh)->b_data))
1455/* Number of items that are in buffer. */ 1677/* Number of items that are in buffer. */
1456#define B_NR_ITEMS(bh) (blkh_nr_item(B_BLK_HEAD(bh))) 1678#define B_NR_ITEMS(bh) (blkh_nr_item(B_BLK_HEAD(bh)))
@@ -1471,14 +1693,14 @@ struct block_head {
1471#define B_IS_KEYS_LEVEL(bh) (B_LEVEL(bh) > DISK_LEAF_NODE_LEVEL \ 1693#define B_IS_KEYS_LEVEL(bh) (B_LEVEL(bh) > DISK_LEAF_NODE_LEVEL \
1472 && B_LEVEL(bh) <= MAX_HEIGHT) 1694 && B_LEVEL(bh) <= MAX_HEIGHT)
1473 1695
1474/***************************************************************************/ 1696/***************************************************************************
1475/* STAT DATA */ 1697 * STAT DATA *
1476/***************************************************************************/ 1698 ***************************************************************************/
1477 1699
1478// 1700/*
1479// old stat data is 32 bytes long. We are going to distinguish new one by 1701 * old stat data is 32 bytes long. We are going to distinguish new one by
1480// different size 1702 * different size
1481// 1703*/
1482struct stat_data_v1 { 1704struct stat_data_v1 {
1483 __le16 sd_mode; /* file type, permissions */ 1705 __le16 sd_mode; /* file type, permissions */
1484 __le16 sd_nlink; /* number of hard links */ 1706 __le16 sd_nlink; /* number of hard links */
@@ -1487,20 +1709,25 @@ struct stat_data_v1 {
1487 __le32 sd_size; /* file size */ 1709 __le32 sd_size; /* file size */
1488 __le32 sd_atime; /* time of last access */ 1710 __le32 sd_atime; /* time of last access */
1489 __le32 sd_mtime; /* time file was last modified */ 1711 __le32 sd_mtime; /* time file was last modified */
1490 __le32 sd_ctime; /* time inode (stat data) was last changed (except changes to sd_atime and sd_mtime) */ 1712
1713 /*
1714 * time inode (stat data) was last changed
1715 * (except changes to sd_atime and sd_mtime)
1716 */
1717 __le32 sd_ctime;
1491 union { 1718 union {
1492 __le32 sd_rdev; 1719 __le32 sd_rdev;
1493 __le32 sd_blocks; /* number of blocks file uses */ 1720 __le32 sd_blocks; /* number of blocks file uses */
1494 } __attribute__ ((__packed__)) u; 1721 } __attribute__ ((__packed__)) u;
1495 __le32 sd_first_direct_byte; /* first byte of file which is stored 1722
1496 in a direct item: except that if it 1723 /*
1497 equals 1 it is a symlink and if it 1724 * first byte of file which is stored in a direct item: except that if
1498 equals ~(__u32)0 there is no 1725 * it equals 1 it is a symlink and if it equals ~(__u32)0 there is no
1499 direct item. The existence of this 1726 * direct item. The existence of this field really grates on me.
1500 field really grates on me. Let's 1727 * Let's replace it with a macro based on sd_size and our tail
1501 replace it with a macro based on 1728 * suppression policy. Someday. -Hans
1502 sd_size and our tail suppression 1729 */
1503 policy. Someday. -Hans */ 1730 __le32 sd_first_direct_byte;
1504} __attribute__ ((__packed__)); 1731} __attribute__ ((__packed__));
1505 1732
1506#define SD_V1_SIZE (sizeof(struct stat_data_v1)) 1733#define SD_V1_SIZE (sizeof(struct stat_data_v1))
@@ -1532,8 +1759,10 @@ struct stat_data_v1 {
1532 1759
1533/* inode flags stored in sd_attrs (nee sd_reserved) */ 1760/* inode flags stored in sd_attrs (nee sd_reserved) */
1534 1761
1535/* we want common flags to have the same values as in ext2, 1762/*
1536 so chattr(1) will work without problems */ 1763 * we want common flags to have the same values as in ext2,
1764 * so chattr(1) will work without problems
1765 */
1537#define REISERFS_IMMUTABLE_FL FS_IMMUTABLE_FL 1766#define REISERFS_IMMUTABLE_FL FS_IMMUTABLE_FL
1538#define REISERFS_APPEND_FL FS_APPEND_FL 1767#define REISERFS_APPEND_FL FS_APPEND_FL
1539#define REISERFS_SYNC_FL FS_SYNC_FL 1768#define REISERFS_SYNC_FL FS_SYNC_FL
@@ -1553,8 +1782,10 @@ struct stat_data_v1 {
1553 REISERFS_COMPR_FL | \ 1782 REISERFS_COMPR_FL | \
1554 REISERFS_NOTAIL_FL ) 1783 REISERFS_NOTAIL_FL )
1555 1784
1556/* Stat Data on disk (reiserfs version of UFS disk inode minus the 1785/*
1557 address blocks) */ 1786 * Stat Data on disk (reiserfs version of UFS disk inode minus the
1787 * address blocks)
1788 */
1558struct stat_data { 1789struct stat_data {
1559 __le16 sd_mode; /* file type, permissions */ 1790 __le16 sd_mode; /* file type, permissions */
1560 __le16 sd_attrs; /* persistent inode flags */ 1791 __le16 sd_attrs; /* persistent inode flags */
@@ -1564,25 +1795,20 @@ struct stat_data {
1564 __le32 sd_gid; /* group */ 1795 __le32 sd_gid; /* group */
1565 __le32 sd_atime; /* time of last access */ 1796 __le32 sd_atime; /* time of last access */
1566 __le32 sd_mtime; /* time file was last modified */ 1797 __le32 sd_mtime; /* time file was last modified */
1567 __le32 sd_ctime; /* time inode (stat data) was last changed (except changes to sd_atime and sd_mtime) */ 1798
1799 /*
1800 * time inode (stat data) was last changed
1801 * (except changes to sd_atime and sd_mtime)
1802 */
1803 __le32 sd_ctime;
1568 __le32 sd_blocks; 1804 __le32 sd_blocks;
1569 union { 1805 union {
1570 __le32 sd_rdev; 1806 __le32 sd_rdev;
1571 __le32 sd_generation; 1807 __le32 sd_generation;
1572 //__le32 sd_first_direct_byte;
1573 /* first byte of file which is stored in a
1574 direct item: except that if it equals 1
1575 it is a symlink and if it equals
1576 ~(__u32)0 there is no direct item. The
1577 existence of this field really grates
1578 on me. Let's replace it with a macro
1579 based on sd_size and our tail
1580 suppression policy? */
1581 } __attribute__ ((__packed__)) u; 1808 } __attribute__ ((__packed__)) u;
1582} __attribute__ ((__packed__)); 1809} __attribute__ ((__packed__));
1583// 1810
1584// this is 44 bytes long 1811/* this is 44 bytes long */
1585//
1586#define SD_SIZE (sizeof(struct stat_data)) 1812#define SD_SIZE (sizeof(struct stat_data))
1587#define SD_V2_SIZE SD_SIZE 1813#define SD_V2_SIZE SD_SIZE
1588#define stat_data_v2(ih) (ih_version (ih) == KEY_FORMAT_3_6) 1814#define stat_data_v2(ih) (ih_version (ih) == KEY_FORMAT_3_6)
@@ -1613,48 +1839,61 @@ struct stat_data {
1613#define sd_v2_attrs(sdp) (le16_to_cpu((sdp)->sd_attrs)) 1839#define sd_v2_attrs(sdp) (le16_to_cpu((sdp)->sd_attrs))
1614#define set_sd_v2_attrs(sdp,v) ((sdp)->sd_attrs = cpu_to_le16(v)) 1840#define set_sd_v2_attrs(sdp,v) ((sdp)->sd_attrs = cpu_to_le16(v))
1615 1841
1616/***************************************************************************/ 1842/***************************************************************************
1617/* DIRECTORY STRUCTURE */ 1843 * DIRECTORY STRUCTURE *
1618/***************************************************************************/ 1844 ***************************************************************************/
1619/* 1845/*
1620 Picture represents the structure of directory items 1846 * Picture represents the structure of directory items
1621 ________________________________________________ 1847 * ________________________________________________
1622 | Array of | | | | | | 1848 * | Array of | | | | | |
1623 | directory |N-1| N-2 | .... | 1st |0th| 1849 * | directory |N-1| N-2 | .... | 1st |0th|
1624 | entry headers | | | | | | 1850 * | entry headers | | | | | |
1625 |_______________|___|_____|________|_______|___| 1851 * |_______________|___|_____|________|_______|___|
1626 <---- directory entries ------> 1852 * <---- directory entries ------>
1627 1853 *
1628 First directory item has k_offset component 1. We store "." and ".." 1854 * First directory item has k_offset component 1. We store "." and ".."
1629 in one item, always, we never split "." and ".." into differing 1855 * in one item, always, we never split "." and ".." into differing
1630 items. This makes, among other things, the code for removing 1856 * items. This makes, among other things, the code for removing
1631 directories simpler. */ 1857 * directories simpler.
1858 */
1632#define SD_OFFSET 0 1859#define SD_OFFSET 0
1633#define SD_UNIQUENESS 0 1860#define SD_UNIQUENESS 0
1634#define DOT_OFFSET 1 1861#define DOT_OFFSET 1
1635#define DOT_DOT_OFFSET 2 1862#define DOT_DOT_OFFSET 2
1636#define DIRENTRY_UNIQUENESS 500 1863#define DIRENTRY_UNIQUENESS 500
1637 1864
1638/* */
1639#define FIRST_ITEM_OFFSET 1 1865#define FIRST_ITEM_OFFSET 1
1640 1866
1641/* 1867/*
1642 Q: How to get key of object pointed to by entry from entry? 1868 * Q: How to get key of object pointed to by entry from entry?
1643 1869 *
1644 A: Each directory entry has its header. This header has deh_dir_id and deh_objectid fields, those are key 1870 * A: Each directory entry has its header. This header has deh_dir_id
1645 of object, entry points to */ 1871 * and deh_objectid fields, those are key of object, entry points to
1872 */
1646 1873
1647/* NOT IMPLEMENTED: 1874/*
1648 Directory will someday contain stat data of object */ 1875 * NOT IMPLEMENTED:
1876 * Directory will someday contain stat data of object
1877 */
1649 1878
1650struct reiserfs_de_head { 1879struct reiserfs_de_head {
1651 __le32 deh_offset; /* third component of the directory entry key */ 1880 __le32 deh_offset; /* third component of the directory entry key */
1652 __le32 deh_dir_id; /* objectid of the parent directory of the object, that is referenced 1881
1653 by directory entry */ 1882 /*
1654 __le32 deh_objectid; /* objectid of the object, that is referenced by directory entry */ 1883 * objectid of the parent directory of the object, that is referenced
1884 * by directory entry
1885 */
1886 __le32 deh_dir_id;
1887
1888 /* objectid of the object, that is referenced by directory entry */
1889 __le32 deh_objectid;
1655 __le16 deh_location; /* offset of name in the whole item */ 1890 __le16 deh_location; /* offset of name in the whole item */
1656 __le16 deh_state; /* whether 1) entry contains stat data (for future), and 2) whether 1891
1657 entry is hidden (unlinked) */ 1892 /*
1893 * whether 1) entry contains stat data (for future), and
1894 * 2) whether entry is hidden (unlinked)
1895 */
1896 __le16 deh_state;
1658} __attribute__ ((__packed__)); 1897} __attribute__ ((__packed__));
1659#define DEH_SIZE sizeof(struct reiserfs_de_head) 1898#define DEH_SIZE sizeof(struct reiserfs_de_head)
1660#define deh_offset(p_deh) (le32_to_cpu((p_deh)->deh_offset)) 1899#define deh_offset(p_deh) (le32_to_cpu((p_deh)->deh_offset))
@@ -1684,9 +1923,11 @@ struct reiserfs_de_head {
1684# define ADDR_UNALIGNED_BITS (3) 1923# define ADDR_UNALIGNED_BITS (3)
1685#endif 1924#endif
1686 1925
1687/* These are only used to manipulate deh_state. 1926/*
1927 * These are only used to manipulate deh_state.
1688 * Because of this, we'll use the ext2_ bit routines, 1928 * Because of this, we'll use the ext2_ bit routines,
1689 * since they are little endian */ 1929 * since they are little endian
1930 */
1690#ifdef ADDR_UNALIGNED_BITS 1931#ifdef ADDR_UNALIGNED_BITS
1691 1932
1692# define aligned_address(addr) ((void *)((long)(addr) & ~((1UL << ADDR_UNALIGNED_BITS) - 1))) 1933# define aligned_address(addr) ((void *)((long)(addr) & ~((1UL << ADDR_UNALIGNED_BITS) - 1)))
@@ -1721,46 +1962,16 @@ extern void make_empty_dir_item_v1(char *body, __le32 dirid, __le32 objid,
1721extern void make_empty_dir_item(char *body, __le32 dirid, __le32 objid, 1962extern void make_empty_dir_item(char *body, __le32 dirid, __le32 objid,
1722 __le32 par_dirid, __le32 par_objid); 1963 __le32 par_dirid, __le32 par_objid);
1723 1964
1724/* array of the entry headers */ 1965/* two entries per block (at least) */
1725 /* get item body */
1726#define B_I_PITEM(bh,ih) ( (bh)->b_data + ih_location(ih) )
1727#define B_I_DEH(bh,ih) ((struct reiserfs_de_head *)(B_I_PITEM(bh,ih)))
1728
1729/* length of the directory entry in directory item. This define
1730 calculates length of i-th directory entry using directory entry
1731 locations from dir entry head. When it calculates length of 0-th
1732 directory entry, it uses length of whole item in place of entry
1733 location of the non-existent following entry in the calculation.
1734 See picture above.*/
1735/*
1736#define I_DEH_N_ENTRY_LENGTH(ih,deh,i) \
1737((i) ? (deh_location((deh)-1) - deh_location((deh))) : (ih_item_len((ih)) - deh_location((deh))))
1738*/
1739static inline int entry_length(const struct buffer_head *bh,
1740 const struct item_head *ih, int pos_in_item)
1741{
1742 struct reiserfs_de_head *deh;
1743
1744 deh = B_I_DEH(bh, ih) + pos_in_item;
1745 if (pos_in_item)
1746 return deh_location(deh - 1) - deh_location(deh);
1747
1748 return ih_item_len(ih) - deh_location(deh);
1749}
1750
1751/* number of entries in the directory item, depends on ENTRY_COUNT being at the start of directory dynamic data. */
1752#define I_ENTRY_COUNT(ih) (ih_entry_count((ih)))
1753
1754/* name by bh, ih and entry_num */
1755#define B_I_E_NAME(bh,ih,entry_num) ((char *)(bh->b_data + ih_location(ih) + deh_location(B_I_DEH(bh,ih)+(entry_num))))
1756
1757// two entries per block (at least)
1758#define REISERFS_MAX_NAME(block_size) 255 1966#define REISERFS_MAX_NAME(block_size) 255
1759 1967
1760/* this structure is used for operations on directory entries. It is 1968/*
1761 not a disk structure. */ 1969 * this structure is used for operations on directory entries. It is
1762/* When reiserfs_find_entry or search_by_entry_key find directory 1970 * not a disk structure.
1763 entry, they return filled reiserfs_dir_entry structure */ 1971 *
1972 * When reiserfs_find_entry or search_by_entry_key find directory
1973 * entry, they return filled reiserfs_dir_entry structure
1974 */
1764struct reiserfs_dir_entry { 1975struct reiserfs_dir_entry {
1765 struct buffer_head *de_bh; 1976 struct buffer_head *de_bh;
1766 int de_item_num; 1977 int de_item_num;
@@ -1778,10 +1989,14 @@ struct reiserfs_dir_entry {
1778 struct cpu_key de_entry_key; 1989 struct cpu_key de_entry_key;
1779}; 1990};
1780 1991
1781/* these defines are useful when a particular member of a reiserfs_dir_entry is needed */ 1992/*
1993 * these defines are useful when a particular member of
1994 * a reiserfs_dir_entry is needed
1995 */
1782 1996
1783/* pointer to file name, stored in entry */ 1997/* pointer to file name, stored in entry */
1784#define B_I_DEH_ENTRY_FILE_NAME(bh,ih,deh) (B_I_PITEM (bh, ih) + deh_location(deh)) 1998#define B_I_DEH_ENTRY_FILE_NAME(bh, ih, deh) \
1999 (ih_item_body(bh, ih) + deh_location(deh))
1785 2000
1786/* length of name */ 2001/* length of name */
1787#define I_DEH_N_ENTRY_FILE_NAME_LENGTH(ih,deh,entry_num) \ 2002#define I_DEH_N_ENTRY_FILE_NAME_LENGTH(ih,deh,entry_num) \
@@ -1804,11 +2019,13 @@ struct reiserfs_dir_entry {
1804 * |______|_______________|___________________|___________| 2019 * |______|_______________|___________________|___________|
1805 */ 2020 */
1806 2021
1807/***************************************************************************/ 2022/***************************************************************************
1808/* DISK CHILD */ 2023 * DISK CHILD *
1809/***************************************************************************/ 2024 ***************************************************************************/
1810/* Disk child pointer: The pointer from an internal node of the tree 2025/*
1811 to a node that is on disk. */ 2026 * Disk child pointer:
2027 * The pointer from an internal node of the tree to a node that is on disk.
2028 */
1812struct disk_child { 2029struct disk_child {
1813 __le32 dc_block_number; /* Disk child's block number. */ 2030 __le32 dc_block_number; /* Disk child's block number. */
1814 __le16 dc_size; /* Disk child's used space. */ 2031 __le16 dc_size; /* Disk child's used space. */
@@ -1841,47 +2058,66 @@ struct disk_child {
1841#define MAX_NR_KEY(bh) ( (MAX_CHILD_SIZE(bh)-DC_SIZE)/(KEY_SIZE+DC_SIZE) ) 2058#define MAX_NR_KEY(bh) ( (MAX_CHILD_SIZE(bh)-DC_SIZE)/(KEY_SIZE+DC_SIZE) )
1842#define MIN_NR_KEY(bh) (MAX_NR_KEY(bh)/2) 2059#define MIN_NR_KEY(bh) (MAX_NR_KEY(bh)/2)
1843 2060
1844/***************************************************************************/ 2061/***************************************************************************
1845/* PATH STRUCTURES AND DEFINES */ 2062 * PATH STRUCTURES AND DEFINES *
1846/***************************************************************************/ 2063 ***************************************************************************/
1847 2064
1848/* Search_by_key fills up the path from the root to the leaf as it descends the tree looking for the 2065/*
1849 key. It uses reiserfs_bread to try to find buffers in the cache given their block number. If it 2066 * search_by_key fills up the path from the root to the leaf as it descends
1850 does not find them in the cache it reads them from disk. For each node search_by_key finds using 2067 * the tree looking for the key. It uses reiserfs_bread to try to find
1851 reiserfs_bread it then uses bin_search to look through that node. bin_search will find the 2068 * buffers in the cache given their block number. If it does not find
1852 position of the block_number of the next node if it is looking through an internal node. If it 2069 * them in the cache it reads them from disk. For each node search_by_key
1853 is looking through a leaf node bin_search will find the position of the item which has key either 2070 * finds using reiserfs_bread it then uses bin_search to look through that
1854 equal to given key, or which is the maximal key less than the given key. */ 2071 * node. bin_search will find the position of the block_number of the next
2072 * node if it is looking through an internal node. If it is looking through
2073 * a leaf node bin_search will find the position of the item which has key
2074 * either equal to given key, or which is the maximal key less than the
2075 * given key.
2076 */
1855 2077
1856struct path_element { 2078struct path_element {
1857 struct buffer_head *pe_buffer; /* Pointer to the buffer at the path in the tree. */ 2079 /* Pointer to the buffer at the path in the tree. */
1858 int pe_position; /* Position in the tree node which is placed in the */ 2080 struct buffer_head *pe_buffer;
1859 /* buffer above. */ 2081 /* Position in the tree node which is placed in the buffer above. */
2082 int pe_position;
1860}; 2083};
1861 2084
1862#define MAX_HEIGHT 5 /* maximal height of a tree. don't change this without changing JOURNAL_PER_BALANCE_CNT */ 2085/*
1863#define EXTENDED_MAX_HEIGHT 7 /* Must be equals MAX_HEIGHT + FIRST_PATH_ELEMENT_OFFSET */ 2086 * maximal height of a tree. don't change this without
1864#define FIRST_PATH_ELEMENT_OFFSET 2 /* Must be equal to at least 2. */ 2087 * changing JOURNAL_PER_BALANCE_CNT
1865 2088 */
1866#define ILLEGAL_PATH_ELEMENT_OFFSET 1 /* Must be equal to FIRST_PATH_ELEMENT_OFFSET - 1 */ 2089#define MAX_HEIGHT 5
1867#define MAX_FEB_SIZE 6 /* this MUST be MAX_HEIGHT + 1. See about FEB below */ 2090
1868 2091/* Must be equals MAX_HEIGHT + FIRST_PATH_ELEMENT_OFFSET */
1869/* We need to keep track of who the ancestors of nodes are. When we 2092#define EXTENDED_MAX_HEIGHT 7
1870 perform a search we record which nodes were visited while 2093
1871 descending the tree looking for the node we searched for. This list 2094/* Must be equal to at least 2. */
1872 of nodes is called the path. This information is used while 2095#define FIRST_PATH_ELEMENT_OFFSET 2
1873 performing balancing. Note that this path information may become 2096
1874 invalid, and this means we must check it when using it to see if it 2097/* Must be equal to FIRST_PATH_ELEMENT_OFFSET - 1 */
1875 is still valid. You'll need to read search_by_key and the comments 2098#define ILLEGAL_PATH_ELEMENT_OFFSET 1
1876 in it, especially about decrement_counters_in_path(), to understand 2099
1877 this structure. 2100/* this MUST be MAX_HEIGHT + 1. See about FEB below */
1878 2101#define MAX_FEB_SIZE 6
1879Paths make the code so much harder to work with and debug.... An 2102
1880enormous number of bugs are due to them, and trying to write or modify 2103/*
1881code that uses them just makes my head hurt. They are based on an 2104 * We need to keep track of who the ancestors of nodes are. When we
1882excessive effort to avoid disturbing the precious VFS code.:-( The 2105 * perform a search we record which nodes were visited while
1883gods only know how we are going to SMP the code that uses them. 2106 * descending the tree looking for the node we searched for. This list
1884znodes are the way! */ 2107 * of nodes is called the path. This information is used while
2108 * performing balancing. Note that this path information may become
2109 * invalid, and this means we must check it when using it to see if it
2110 * is still valid. You'll need to read search_by_key and the comments
2111 * in it, especially about decrement_counters_in_path(), to understand
2112 * this structure.
2113 *
2114 * Paths make the code so much harder to work with and debug.... An
2115 * enormous number of bugs are due to them, and trying to write or modify
2116 * code that uses them just makes my head hurt. They are based on an
2117 * excessive effort to avoid disturbing the precious VFS code.:-( The
2118 * gods only know how we are going to SMP the code that uses them.
2119 * znodes are the way!
2120 */
1885 2121
1886#define PATH_READA 0x1 /* do read ahead */ 2122#define PATH_READA 0x1 /* do read ahead */
1887#define PATH_READA_BACK 0x2 /* read backwards */ 2123#define PATH_READA_BACK 0x2 /* read backwards */
@@ -1889,7 +2125,8 @@ znodes are the way! */
1889struct treepath { 2125struct treepath {
1890 int path_length; /* Length of the array above. */ 2126 int path_length; /* Length of the array above. */
1891 int reada; 2127 int reada;
1892 struct path_element path_elements[EXTENDED_MAX_HEIGHT]; /* Array of the path elements. */ 2128 /* Array of the path elements. */
2129 struct path_element path_elements[EXTENDED_MAX_HEIGHT];
1893 int pos_in_item; 2130 int pos_in_item;
1894}; 2131};
1895 2132
@@ -1908,41 +2145,124 @@ struct treepath var = {.path_length = ILLEGAL_PATH_ELEMENT_OFFSET, .reada = 0,}
1908#define PATH_OFFSET_POSITION(path, n_offset) (PATH_OFFSET_PELEMENT(path, n_offset)->pe_position) 2145#define PATH_OFFSET_POSITION(path, n_offset) (PATH_OFFSET_PELEMENT(path, n_offset)->pe_position)
1909 2146
1910#define PATH_PLAST_BUFFER(path) (PATH_OFFSET_PBUFFER((path), (path)->path_length)) 2147#define PATH_PLAST_BUFFER(path) (PATH_OFFSET_PBUFFER((path), (path)->path_length))
1911 /* you know, to the person who didn't 2148
1912 write this the macro name does not 2149/*
1913 at first suggest what it does. 2150 * you know, to the person who didn't write this the macro name does not
1914 Maybe POSITION_FROM_PATH_END? Or 2151 * at first suggest what it does. Maybe POSITION_FROM_PATH_END? Or
1915 maybe we should just focus on 2152 * maybe we should just focus on dumping paths... -Hans
1916 dumping paths... -Hans */ 2153 */
1917#define PATH_LAST_POSITION(path) (PATH_OFFSET_POSITION((path), (path)->path_length)) 2154#define PATH_LAST_POSITION(path) (PATH_OFFSET_POSITION((path), (path)->path_length))
1918 2155
1919#define PATH_PITEM_HEAD(path) B_N_PITEM_HEAD(PATH_PLAST_BUFFER(path), PATH_LAST_POSITION(path)) 2156/*
2157 * in do_balance leaf has h == 0 in contrast with path structure,
2158 * where root has level == 0. That is why we need these defines
2159 */
2160
2161/* tb->S[h] */
2162#define PATH_H_PBUFFER(path, h) \
2163 PATH_OFFSET_PBUFFER(path, path->path_length - (h))
2164
2165/* tb->F[h] or tb->S[0]->b_parent */
2166#define PATH_H_PPARENT(path, h) PATH_H_PBUFFER(path, (h) + 1)
2167
2168#define PATH_H_POSITION(path, h) \
2169 PATH_OFFSET_POSITION(path, path->path_length - (h))
1920 2170
1921/* in do_balance leaf has h == 0 in contrast with path structure, 2171/* tb->S[h]->b_item_order */
1922 where root has level == 0. That is why we need these defines */ 2172#define PATH_H_B_ITEM_ORDER(path, h) PATH_H_POSITION(path, h + 1)
1923#define PATH_H_PBUFFER(path, h) PATH_OFFSET_PBUFFER (path, path->path_length - (h)) /* tb->S[h] */
1924#define PATH_H_PPARENT(path, h) PATH_H_PBUFFER (path, (h) + 1) /* tb->F[h] or tb->S[0]->b_parent */
1925#define PATH_H_POSITION(path, h) PATH_OFFSET_POSITION (path, path->path_length - (h))
1926#define PATH_H_B_ITEM_ORDER(path, h) PATH_H_POSITION(path, h + 1) /* tb->S[h]->b_item_order */
1927 2173
1928#define PATH_H_PATH_OFFSET(path, n_h) ((path)->path_length - (n_h)) 2174#define PATH_H_PATH_OFFSET(path, n_h) ((path)->path_length - (n_h))
1929 2175
2176static inline void *reiserfs_node_data(const struct buffer_head *bh)
2177{
2178 return bh->b_data + sizeof(struct block_head);
2179}
2180
2181/* get key from internal node */
2182static inline struct reiserfs_key *internal_key(struct buffer_head *bh,
2183 int item_num)
2184{
2185 struct reiserfs_key *key = reiserfs_node_data(bh);
2186
2187 return &key[item_num];
2188}
2189
2190/* get the item header from leaf node */
2191static inline struct item_head *item_head(const struct buffer_head *bh,
2192 int item_num)
2193{
2194 struct item_head *ih = reiserfs_node_data(bh);
2195
2196 return &ih[item_num];
2197}
2198
2199/* get the key from leaf node */
2200static inline struct reiserfs_key *leaf_key(const struct buffer_head *bh,
2201 int item_num)
2202{
2203 return &item_head(bh, item_num)->ih_key;
2204}
2205
2206static inline void *ih_item_body(const struct buffer_head *bh,
2207 const struct item_head *ih)
2208{
2209 return bh->b_data + ih_location(ih);
2210}
2211
2212/* get item body from leaf node */
2213static inline void *item_body(const struct buffer_head *bh, int item_num)
2214{
2215 return ih_item_body(bh, item_head(bh, item_num));
2216}
2217
2218static inline struct item_head *tp_item_head(const struct treepath *path)
2219{
2220 return item_head(PATH_PLAST_BUFFER(path), PATH_LAST_POSITION(path));
2221}
2222
2223static inline void *tp_item_body(const struct treepath *path)
2224{
2225 return item_body(PATH_PLAST_BUFFER(path), PATH_LAST_POSITION(path));
2226}
2227
1930#define get_last_bh(path) PATH_PLAST_BUFFER(path) 2228#define get_last_bh(path) PATH_PLAST_BUFFER(path)
1931#define get_ih(path) PATH_PITEM_HEAD(path)
1932#define get_item_pos(path) PATH_LAST_POSITION(path) 2229#define get_item_pos(path) PATH_LAST_POSITION(path)
1933#define get_item(path) ((void *)B_N_PITEM(PATH_PLAST_BUFFER(path), PATH_LAST_POSITION (path)))
1934#define item_moved(ih,path) comp_items(ih, path) 2230#define item_moved(ih,path) comp_items(ih, path)
1935#define path_changed(ih,path) comp_items (ih, path) 2231#define path_changed(ih,path) comp_items (ih, path)
1936 2232
1937/***************************************************************************/ 2233/* array of the entry headers */
1938/* MISC */ 2234 /* get item body */
1939/***************************************************************************/ 2235#define B_I_DEH(bh, ih) ((struct reiserfs_de_head *)(ih_item_body(bh, ih)))
2236
2237/*
2238 * length of the directory entry in directory item. This define
2239 * calculates length of i-th directory entry using directory entry
2240 * locations from dir entry head. When it calculates length of 0-th
2241 * directory entry, it uses length of whole item in place of entry
2242 * location of the non-existent following entry in the calculation.
2243 * See picture above.
2244 */
2245static inline int entry_length(const struct buffer_head *bh,
2246 const struct item_head *ih, int pos_in_item)
2247{
2248 struct reiserfs_de_head *deh;
2249
2250 deh = B_I_DEH(bh, ih) + pos_in_item;
2251 if (pos_in_item)
2252 return deh_location(deh - 1) - deh_location(deh);
2253
2254 return ih_item_len(ih) - deh_location(deh);
2255}
2256
2257/***************************************************************************
2258 * MISC *
2259 ***************************************************************************/
1940 2260
1941/* Size of pointer to the unformatted node. */ 2261/* Size of pointer to the unformatted node. */
1942#define UNFM_P_SIZE (sizeof(unp_t)) 2262#define UNFM_P_SIZE (sizeof(unp_t))
1943#define UNFM_P_SHIFT 2 2263#define UNFM_P_SHIFT 2
1944 2264
1945// in in-core inode key is stored on le form 2265/* in in-core inode key is stored on le form */
1946#define INODE_PKEY(inode) ((struct reiserfs_key *)(REISERFS_I(inode)->i_key)) 2266#define INODE_PKEY(inode) ((struct reiserfs_key *)(REISERFS_I(inode)->i_key))
1947 2267
1948#define MAX_UL_INT 0xffffffff 2268#define MAX_UL_INT 0xffffffff
@@ -1958,7 +2278,6 @@ static inline loff_t max_reiserfs_offset(struct inode *inode)
1958 return (loff_t) ((~(__u64) 0) >> 4); 2278 return (loff_t) ((~(__u64) 0) >> 4);
1959} 2279}
1960 2280
1961/*#define MAX_KEY_UNIQUENESS MAX_UL_INT*/
1962#define MAX_KEY_OBJECTID MAX_UL_INT 2281#define MAX_KEY_OBJECTID MAX_UL_INT
1963 2282
1964#define MAX_B_NUM MAX_UL_INT 2283#define MAX_B_NUM MAX_UL_INT
@@ -1967,9 +2286,12 @@ static inline loff_t max_reiserfs_offset(struct inode *inode)
1967/* the purpose is to detect overflow of an unsigned short */ 2286/* the purpose is to detect overflow of an unsigned short */
1968#define REISERFS_LINK_MAX (MAX_US_INT - 1000) 2287#define REISERFS_LINK_MAX (MAX_US_INT - 1000)
1969 2288
1970/* The following defines are used in reiserfs_insert_item and reiserfs_append_item */ 2289/*
1971#define REISERFS_KERNEL_MEM 0 /* reiserfs kernel memory mode */ 2290 * The following defines are used in reiserfs_insert_item
1972#define REISERFS_USER_MEM 1 /* reiserfs user memory mode */ 2291 * and reiserfs_append_item
2292 */
2293#define REISERFS_KERNEL_MEM 0 /* kernel memory mode */
2294#define REISERFS_USER_MEM 1 /* user memory mode */
1973 2295
1974#define fs_generation(s) (REISERFS_SB(s)->s_generation_counter) 2296#define fs_generation(s) (REISERFS_SB(s)->s_generation_counter)
1975#define get_generation(s) atomic_read (&fs_generation(s)) 2297#define get_generation(s) atomic_read (&fs_generation(s))
@@ -1981,46 +2303,65 @@ static inline loff_t max_reiserfs_offset(struct inode *inode)
1981 __fs_changed(gen, s); \ 2303 __fs_changed(gen, s); \
1982}) 2304})
1983 2305
1984/***************************************************************************/ 2306/***************************************************************************
1985/* FIXATE NODES */ 2307 * FIXATE NODES *
1986/***************************************************************************/ 2308 ***************************************************************************/
1987 2309
1988#define VI_TYPE_LEFT_MERGEABLE 1 2310#define VI_TYPE_LEFT_MERGEABLE 1
1989#define VI_TYPE_RIGHT_MERGEABLE 2 2311#define VI_TYPE_RIGHT_MERGEABLE 2
1990 2312
1991/* To make any changes in the tree we always first find node, that 2313/*
1992 contains item to be changed/deleted or place to insert a new 2314 * To make any changes in the tree we always first find node, that
1993 item. We call this node S. To do balancing we need to decide what 2315 * contains item to be changed/deleted or place to insert a new
1994 we will shift to left/right neighbor, or to a new node, where new 2316 * item. We call this node S. To do balancing we need to decide what
1995 item will be etc. To make this analysis simpler we build virtual 2317 * we will shift to left/right neighbor, or to a new node, where new
1996 node. Virtual node is an array of items, that will replace items of 2318 * item will be etc. To make this analysis simpler we build virtual
1997 node S. (For instance if we are going to delete an item, virtual 2319 * node. Virtual node is an array of items, that will replace items of
1998 node does not contain it). Virtual node keeps information about 2320 * node S. (For instance if we are going to delete an item, virtual
1999 item sizes and types, mergeability of first and last items, sizes 2321 * node does not contain it). Virtual node keeps information about
2000 of all entries in directory item. We use this array of items when 2322 * item sizes and types, mergeability of first and last items, sizes
2001 calculating what we can shift to neighbors and how many nodes we 2323 * of all entries in directory item. We use this array of items when
2002 have to have if we do not any shiftings, if we shift to left/right 2324 * calculating what we can shift to neighbors and how many nodes we
2003 neighbor or to both. */ 2325 * have to have if we do not any shiftings, if we shift to left/right
2326 * neighbor or to both.
2327 */
2004struct virtual_item { 2328struct virtual_item {
2005 int vi_index; // index in the array of item operations 2329 int vi_index; /* index in the array of item operations */
2006 unsigned short vi_type; // left/right mergeability 2330 unsigned short vi_type; /* left/right mergeability */
2007 unsigned short vi_item_len; /* length of item that it will have after balancing */ 2331
2332 /* length of item that it will have after balancing */
2333 unsigned short vi_item_len;
2334
2008 struct item_head *vi_ih; 2335 struct item_head *vi_ih;
2009 const char *vi_item; // body of item (old or new) 2336 const char *vi_item; /* body of item (old or new) */
2010 const void *vi_new_data; // 0 always but paste mode 2337 const void *vi_new_data; /* 0 always but paste mode */
2011 void *vi_uarea; // item specific area 2338 void *vi_uarea; /* item specific area */
2012}; 2339};
2013 2340
2014struct virtual_node { 2341struct virtual_node {
2015 char *vn_free_ptr; /* this is a pointer to the free space in the buffer */ 2342 /* this is a pointer to the free space in the buffer */
2343 char *vn_free_ptr;
2344
2016 unsigned short vn_nr_item; /* number of items in virtual node */ 2345 unsigned short vn_nr_item; /* number of items in virtual node */
2017 short vn_size; /* size of node , that node would have if it has unlimited size and no balancing is performed */ 2346
2018 short vn_mode; /* mode of balancing (paste, insert, delete, cut) */ 2347 /*
2348 * size of node , that node would have if it has
2349 * unlimited size and no balancing is performed
2350 */
2351 short vn_size;
2352
2353 /* mode of balancing (paste, insert, delete, cut) */
2354 short vn_mode;
2355
2019 short vn_affected_item_num; 2356 short vn_affected_item_num;
2020 short vn_pos_in_item; 2357 short vn_pos_in_item;
2021 struct item_head *vn_ins_ih; /* item header of inserted item, 0 for other modes */ 2358
2359 /* item header of inserted item, 0 for other modes */
2360 struct item_head *vn_ins_ih;
2022 const void *vn_data; 2361 const void *vn_data;
2023 struct virtual_item *vn_vi; /* array of items (including a new one, excluding item to be deleted) */ 2362
2363 /* array of items (including a new one, excluding item to be deleted) */
2364 struct virtual_item *vn_vi;
2024}; 2365};
2025 2366
2026/* used by directory items when creating virtual nodes */ 2367/* used by directory items when creating virtual nodes */
@@ -2030,22 +2371,25 @@ struct direntry_uarea {
2030 __u16 entry_sizes[1]; 2371 __u16 entry_sizes[1];
2031} __attribute__ ((__packed__)); 2372} __attribute__ ((__packed__));
2032 2373
2033/***************************************************************************/ 2374/***************************************************************************
2034/* TREE BALANCE */ 2375 * TREE BALANCE *
2035/***************************************************************************/ 2376 ***************************************************************************/
2036 2377
2037/* This temporary structure is used in tree balance algorithms, and 2378/*
2038 constructed as we go to the extent that its various parts are 2379 * This temporary structure is used in tree balance algorithms, and
2039 needed. It contains arrays of nodes that can potentially be 2380 * constructed as we go to the extent that its various parts are
2040 involved in the balancing of node S, and parameters that define how 2381 * needed. It contains arrays of nodes that can potentially be
2041 each of the nodes must be balanced. Note that in these algorithms 2382 * involved in the balancing of node S, and parameters that define how
2042 for balancing the worst case is to need to balance the current node 2383 * each of the nodes must be balanced. Note that in these algorithms
2043 S and the left and right neighbors and all of their parents plus 2384 * for balancing the worst case is to need to balance the current node
2044 create a new node. We implement S1 balancing for the leaf nodes 2385 * S and the left and right neighbors and all of their parents plus
2045 and S0 balancing for the internal nodes (S1 and S0 are defined in 2386 * create a new node. We implement S1 balancing for the leaf nodes
2046 our papers.)*/ 2387 * and S0 balancing for the internal nodes (S1 and S0 are defined in
2388 * our papers.)
2389 */
2047 2390
2048#define MAX_FREE_BLOCK 7 /* size of the array of buffers to free at end of do_balance */ 2391/* size of the array of buffers to free at end of do_balance */
2392#define MAX_FREE_BLOCK 7
2049 2393
2050/* maximum number of FEB blocknrs on a single level */ 2394/* maximum number of FEB blocknrs on a single level */
2051#define MAX_AMOUNT_NEEDED 2 2395#define MAX_AMOUNT_NEEDED 2
@@ -2057,64 +2401,144 @@ struct tree_balance {
2057 struct super_block *tb_sb; 2401 struct super_block *tb_sb;
2058 struct reiserfs_transaction_handle *transaction_handle; 2402 struct reiserfs_transaction_handle *transaction_handle;
2059 struct treepath *tb_path; 2403 struct treepath *tb_path;
2060 struct buffer_head *L[MAX_HEIGHT]; /* array of left neighbors of nodes in the path */ 2404
2061 struct buffer_head *R[MAX_HEIGHT]; /* array of right neighbors of nodes in the path */ 2405 /* array of left neighbors of nodes in the path */
2062 struct buffer_head *FL[MAX_HEIGHT]; /* array of fathers of the left neighbors */ 2406 struct buffer_head *L[MAX_HEIGHT];
2063 struct buffer_head *FR[MAX_HEIGHT]; /* array of fathers of the right neighbors */ 2407
2064 struct buffer_head *CFL[MAX_HEIGHT]; /* array of common parents of center node and its left neighbor */ 2408 /* array of right neighbors of nodes in the path */
2065 struct buffer_head *CFR[MAX_HEIGHT]; /* array of common parents of center node and its right neighbor */ 2409 struct buffer_head *R[MAX_HEIGHT];
2066 2410
2067 struct buffer_head *FEB[MAX_FEB_SIZE]; /* array of empty buffers. Number of buffers in array equals 2411 /* array of fathers of the left neighbors */
2068 cur_blknum. */ 2412 struct buffer_head *FL[MAX_HEIGHT];
2413
2414 /* array of fathers of the right neighbors */
2415 struct buffer_head *FR[MAX_HEIGHT];
2416 /* array of common parents of center node and its left neighbor */
2417 struct buffer_head *CFL[MAX_HEIGHT];
2418
2419 /* array of common parents of center node and its right neighbor */
2420 struct buffer_head *CFR[MAX_HEIGHT];
2421
2422 /*
2423 * array of empty buffers. Number of buffers in array equals
2424 * cur_blknum.
2425 */
2426 struct buffer_head *FEB[MAX_FEB_SIZE];
2069 struct buffer_head *used[MAX_FEB_SIZE]; 2427 struct buffer_head *used[MAX_FEB_SIZE];
2070 struct buffer_head *thrown[MAX_FEB_SIZE]; 2428 struct buffer_head *thrown[MAX_FEB_SIZE];
2071 int lnum[MAX_HEIGHT]; /* array of number of items which must be 2429
2072 shifted to the left in order to balance the 2430 /*
2073 current node; for leaves includes item that 2431 * array of number of items which must be shifted to the left in
2074 will be partially shifted; for internal 2432 * order to balance the current node; for leaves includes item that
2075 nodes, it is the number of child pointers 2433 * will be partially shifted; for internal nodes, it is the number
2076 rather than items. It includes the new item 2434 * of child pointers rather than items. It includes the new item
2077 being created. The code sometimes subtracts 2435 * being created. The code sometimes subtracts one to get the
2078 one to get the number of wholly shifted 2436 * number of wholly shifted items for other purposes.
2079 items for other purposes. */ 2437 */
2080 int rnum[MAX_HEIGHT]; /* substitute right for left in comment above */ 2438 int lnum[MAX_HEIGHT];
2081 int lkey[MAX_HEIGHT]; /* array indexed by height h mapping the key delimiting L[h] and 2439
2082 S[h] to its item number within the node CFL[h] */ 2440 /* substitute right for left in comment above */
2083 int rkey[MAX_HEIGHT]; /* substitute r for l in comment above */ 2441 int rnum[MAX_HEIGHT];
2084 int insert_size[MAX_HEIGHT]; /* the number of bytes by we are trying to add or remove from 2442
2085 S[h]. A negative value means removing. */ 2443 /*
2086 int blknum[MAX_HEIGHT]; /* number of nodes that will replace node S[h] after 2444 * array indexed by height h mapping the key delimiting L[h] and
2087 balancing on the level h of the tree. If 0 then S is 2445 * S[h] to its item number within the node CFL[h]
2088 being deleted, if 1 then S is remaining and no new nodes 2446 */
2089 are being created, if 2 or 3 then 1 or 2 new nodes is 2447 int lkey[MAX_HEIGHT];
2090 being created */ 2448
2449 /* substitute r for l in comment above */
2450 int rkey[MAX_HEIGHT];
2451
2452 /*
2453 * the number of bytes by we are trying to add or remove from
2454 * S[h]. A negative value means removing.
2455 */
2456 int insert_size[MAX_HEIGHT];
2457
2458 /*
2459 * number of nodes that will replace node S[h] after balancing
2460 * on the level h of the tree. If 0 then S is being deleted,
2461 * if 1 then S is remaining and no new nodes are being created,
2462 * if 2 or 3 then 1 or 2 new nodes is being created
2463 */
2464 int blknum[MAX_HEIGHT];
2091 2465
2092 /* fields that are used only for balancing leaves of the tree */ 2466 /* fields that are used only for balancing leaves of the tree */
2093 int cur_blknum; /* number of empty blocks having been already allocated */ 2467
2094 int s0num; /* number of items that fall into left most node when S[0] splits */ 2468 /* number of empty blocks having been already allocated */
2095 int s1num; /* number of items that fall into first new node when S[0] splits */ 2469 int cur_blknum;
2096 int s2num; /* number of items that fall into second new node when S[0] splits */ 2470
2097 int lbytes; /* number of bytes which can flow to the left neighbor from the left */ 2471 /* number of items that fall into left most node when S[0] splits */
2098 /* most liquid item that cannot be shifted from S[0] entirely */ 2472 int s0num;
2099 /* if -1 then nothing will be partially shifted */ 2473
2100 int rbytes; /* number of bytes which will flow to the right neighbor from the right */ 2474 /*
2101 /* most liquid item that cannot be shifted from S[0] entirely */ 2475 * number of bytes which can flow to the left neighbor from the left
2102 /* if -1 then nothing will be partially shifted */ 2476 * most liquid item that cannot be shifted from S[0] entirely
2103 int s1bytes; /* number of bytes which flow to the first new node when S[0] splits */ 2477 * if -1 then nothing will be partially shifted
2104 /* note: if S[0] splits into 3 nodes, then items do not need to be cut */ 2478 */
2105 int s2bytes; 2479 int lbytes;
2106 struct buffer_head *buf_to_free[MAX_FREE_BLOCK]; /* buffers which are to be freed after do_balance finishes by unfix_nodes */ 2480
2107 char *vn_buf; /* kmalloced memory. Used to create 2481 /*
2108 virtual node and keep map of 2482 * number of bytes which will flow to the right neighbor from the right
2109 dirtied bitmap blocks */ 2483 * most liquid item that cannot be shifted from S[0] entirely
2484 * if -1 then nothing will be partially shifted
2485 */
2486 int rbytes;
2487
2488
2489 /*
2490 * index into the array of item headers in
2491 * S[0] of the affected item
2492 */
2493 int item_pos;
2494
2495 /* new nodes allocated to hold what could not fit into S */
2496 struct buffer_head *S_new[2];
2497
2498 /*
2499 * number of items that will be placed into nodes in S_new
2500 * when S[0] splits
2501 */
2502 int snum[2];
2503
2504 /*
2505 * number of bytes which flow to nodes in S_new when S[0] splits
2506 * note: if S[0] splits into 3 nodes, then items do not need to be cut
2507 */
2508 int sbytes[2];
2509
2510 int pos_in_item;
2511 int zeroes_num;
2512
2513 /*
2514 * buffers which are to be freed after do_balance finishes
2515 * by unfix_nodes
2516 */
2517 struct buffer_head *buf_to_free[MAX_FREE_BLOCK];
2518
2519 /*
2520 * kmalloced memory. Used to create virtual node and keep
2521 * map of dirtied bitmap blocks
2522 */
2523 char *vn_buf;
2524
2110 int vn_buf_size; /* size of the vn_buf */ 2525 int vn_buf_size; /* size of the vn_buf */
2111 struct virtual_node *tb_vn; /* VN starts after bitmap of bitmap blocks */
2112 2526
2113 int fs_gen; /* saved value of `reiserfs_generation' counter 2527 /* VN starts after bitmap of bitmap blocks */
2114 see FILESYSTEM_CHANGED() macro in reiserfs_fs.h */ 2528 struct virtual_node *tb_vn;
2529
2530 /*
2531 * saved value of `reiserfs_generation' counter see
2532 * FILESYSTEM_CHANGED() macro in reiserfs_fs.h
2533 */
2534 int fs_gen;
2535
2115#ifdef DISPLACE_NEW_PACKING_LOCALITIES 2536#ifdef DISPLACE_NEW_PACKING_LOCALITIES
2116 struct in_core_key key; /* key pointer, to pass to block allocator or 2537 /*
2117 another low-level subsystem */ 2538 * key pointer, to pass to block allocator or
2539 * another low-level subsystem
2540 */
2541 struct in_core_key key;
2118#endif 2542#endif
2119}; 2543};
2120 2544
@@ -2122,20 +2546,24 @@ struct tree_balance {
2122 2546
2123/* When inserting an item. */ 2547/* When inserting an item. */
2124#define M_INSERT 'i' 2548#define M_INSERT 'i'
2125/* When inserting into (directories only) or appending onto an already 2549/*
2126 existent item. */ 2550 * When inserting into (directories only) or appending onto an already
2551 * existent item.
2552 */
2127#define M_PASTE 'p' 2553#define M_PASTE 'p'
2128/* When deleting an item. */ 2554/* When deleting an item. */
2129#define M_DELETE 'd' 2555#define M_DELETE 'd'
2130/* When truncating an item or removing an entry from a (directory) item. */ 2556/* When truncating an item or removing an entry from a (directory) item. */
2131#define M_CUT 'c' 2557#define M_CUT 'c'
2132 2558
2133/* used when balancing on leaf level skipped (in reiserfsck) */ 2559/* used when balancing on leaf level skipped (in reiserfsck) */
2134#define M_INTERNAL 'n' 2560#define M_INTERNAL 'n'
2135 2561
2136/* When further balancing is not needed, then do_balance does not need 2562/*
2137 to be called. */ 2563 * When further balancing is not needed, then do_balance does not need
2138#define M_SKIP_BALANCING 's' 2564 * to be called.
2565 */
2566#define M_SKIP_BALANCING 's'
2139#define M_CONVERT 'v' 2567#define M_CONVERT 'v'
2140 2568
2141/* modes of leaf_move_items */ 2569/* modes of leaf_move_items */
@@ -2148,8 +2576,10 @@ struct tree_balance {
2148#define FIRST_TO_LAST 0 2576#define FIRST_TO_LAST 0
2149#define LAST_TO_FIRST 1 2577#define LAST_TO_FIRST 1
2150 2578
2151/* used in do_balance for passing parent of node information that has 2579/*
2152 been gotten from tb struct */ 2580 * used in do_balance for passing parent of node information that has
2581 * been gotten from tb struct
2582 */
2153struct buffer_info { 2583struct buffer_info {
2154 struct tree_balance *tb; 2584 struct tree_balance *tb;
2155 struct buffer_head *bi_bh; 2585 struct buffer_head *bi_bh;
@@ -2167,20 +2597,24 @@ static inline struct super_block *sb_from_bi(struct buffer_info *bi)
2167 return bi ? sb_from_tb(bi->tb) : NULL; 2597 return bi ? sb_from_tb(bi->tb) : NULL;
2168} 2598}
2169 2599
2170/* there are 4 types of items: stat data, directory item, indirect, direct. 2600/*
2171+-------------------+------------+--------------+------------+ 2601 * there are 4 types of items: stat data, directory item, indirect, direct.
2172| | k_offset | k_uniqueness | mergeable? | 2602 * +-------------------+------------+--------------+------------+
2173+-------------------+------------+--------------+------------+ 2603 * | | k_offset | k_uniqueness | mergeable? |
2174| stat data | 0 | 0 | no | 2604 * +-------------------+------------+--------------+------------+
2175+-------------------+------------+--------------+------------+ 2605 * | stat data | 0 | 0 | no |
2176| 1st directory item| DOT_OFFSET |DIRENTRY_UNIQUENESS| no | 2606 * +-------------------+------------+--------------+------------+
2177| non 1st directory | hash value | | yes | 2607 * | 1st directory item| DOT_OFFSET | DIRENTRY_ .. | no |
2178| item | | | | 2608 * | non 1st directory | hash value | UNIQUENESS | yes |
2179+-------------------+------------+--------------+------------+ 2609 * | item | | | |
2180| indirect item | offset + 1 |TYPE_INDIRECT | if this is not the first indirect item of the object 2610 * +-------------------+------------+--------------+------------+
2181+-------------------+------------+--------------+------------+ 2611 * | indirect item | offset + 1 |TYPE_INDIRECT | [1] |
2182| direct item | offset + 1 |TYPE_DIRECT | if not this is not the first direct item of the object 2612 * +-------------------+------------+--------------+------------+
2183+-------------------+------------+--------------+------------+ 2613 * | direct item | offset + 1 |TYPE_DIRECT | [2] |
2614 * +-------------------+------------+--------------+------------+
2615 *
2616 * [1] if this is not the first indirect item of the object
2617 * [2] if this is not the first direct item of the object
2184*/ 2618*/
2185 2619
2186struct item_operations { 2620struct item_operations {
@@ -2219,49 +2653,43 @@ extern struct item_operations *item_ops[TYPE_ANY + 1];
2219/* number of blocks pointed to by the indirect item */ 2653/* number of blocks pointed to by the indirect item */
2220#define I_UNFM_NUM(ih) (ih_item_len(ih) / UNFM_P_SIZE) 2654#define I_UNFM_NUM(ih) (ih_item_len(ih) / UNFM_P_SIZE)
2221 2655
2222/* the used space within the unformatted node corresponding to pos within the item pointed to by ih */ 2656/*
2657 * the used space within the unformatted node corresponding
2658 * to pos within the item pointed to by ih
2659 */
2223#define I_POS_UNFM_SIZE(ih,pos,size) (((pos) == I_UNFM_NUM(ih) - 1 ) ? (size) - ih_free_space(ih) : (size)) 2660#define I_POS_UNFM_SIZE(ih,pos,size) (((pos) == I_UNFM_NUM(ih) - 1 ) ? (size) - ih_free_space(ih) : (size))
2224 2661
2225/* number of bytes contained by the direct item or the unformatted nodes the indirect item points to */ 2662/*
2226 2663 * number of bytes contained by the direct item or the
2227/* get the item header */ 2664 * unformatted nodes the indirect item points to
2228#define B_N_PITEM_HEAD(bh,item_num) ( (struct item_head * )((bh)->b_data + BLKH_SIZE) + (item_num) ) 2665 */
2229
2230/* get key */
2231#define B_N_PDELIM_KEY(bh,item_num) ( (struct reiserfs_key * )((bh)->b_data + BLKH_SIZE) + (item_num) )
2232
2233/* get the key */
2234#define B_N_PKEY(bh,item_num) ( &(B_N_PITEM_HEAD(bh,item_num)->ih_key) )
2235
2236/* get item body */
2237#define B_N_PITEM(bh,item_num) ( (bh)->b_data + ih_location(B_N_PITEM_HEAD((bh),(item_num))))
2238
2239/* get the stat data by the buffer header and the item order */
2240#define B_N_STAT_DATA(bh,nr) \
2241( (struct stat_data *)((bh)->b_data + ih_location(B_N_PITEM_HEAD((bh),(nr))) ) )
2242 2666
2243 /* following defines use reiserfs buffer header and item header */ 2667/* following defines use reiserfs buffer header and item header */
2244 2668
2245/* get stat-data */ 2669/* get stat-data */
2246#define B_I_STAT_DATA(bh, ih) ( (struct stat_data * )((bh)->b_data + ih_location(ih)) ) 2670#define B_I_STAT_DATA(bh, ih) ( (struct stat_data * )((bh)->b_data + ih_location(ih)) )
2247 2671
2248// this is 3976 for size==4096 2672/* this is 3976 for size==4096 */
2249#define MAX_DIRECT_ITEM_LEN(size) ((size) - BLKH_SIZE - 2*IH_SIZE - SD_SIZE - UNFM_P_SIZE) 2673#define MAX_DIRECT_ITEM_LEN(size) ((size) - BLKH_SIZE - 2*IH_SIZE - SD_SIZE - UNFM_P_SIZE)
2250 2674
2251/* indirect items consist of entries which contain blocknrs, pos 2675/*
2252 indicates which entry, and B_I_POS_UNFM_POINTER resolves to the 2676 * indirect items consist of entries which contain blocknrs, pos
2253 blocknr contained by the entry pos points to */ 2677 * indicates which entry, and B_I_POS_UNFM_POINTER resolves to the
2254#define B_I_POS_UNFM_POINTER(bh,ih,pos) le32_to_cpu(*(((unp_t *)B_I_PITEM(bh,ih)) + (pos))) 2678 * blocknr contained by the entry pos points to
2255#define PUT_B_I_POS_UNFM_POINTER(bh,ih,pos, val) do {*(((unp_t *)B_I_PITEM(bh,ih)) + (pos)) = cpu_to_le32(val); } while (0) 2679 */
2680#define B_I_POS_UNFM_POINTER(bh, ih, pos) \
2681 le32_to_cpu(*(((unp_t *)ih_item_body(bh, ih)) + (pos)))
2682#define PUT_B_I_POS_UNFM_POINTER(bh, ih, pos, val) \
2683 (*(((unp_t *)ih_item_body(bh, ih)) + (pos)) = cpu_to_le32(val))
2256 2684
2257struct reiserfs_iget_args { 2685struct reiserfs_iget_args {
2258 __u32 objectid; 2686 __u32 objectid;
2259 __u32 dirid; 2687 __u32 dirid;
2260}; 2688};
2261 2689
2262/***************************************************************************/ 2690/***************************************************************************
2263/* FUNCTION DECLARATIONS */ 2691 * FUNCTION DECLARATIONS *
2264/***************************************************************************/ 2692 ***************************************************************************/
2265 2693
2266#define get_journal_desc_magic(bh) (bh->b_data + bh->b_size - 12) 2694#define get_journal_desc_magic(bh) (bh->b_data + bh->b_size - 12)
2267 2695
@@ -2273,7 +2701,10 @@ struct reiserfs_iget_args {
2273/* first block written in a commit. */ 2701/* first block written in a commit. */
2274struct reiserfs_journal_desc { 2702struct reiserfs_journal_desc {
2275 __le32 j_trans_id; /* id of commit */ 2703 __le32 j_trans_id; /* id of commit */
2276 __le32 j_len; /* length of commit. len +1 is the commit block */ 2704
2705 /* length of commit. len +1 is the commit block */
2706 __le32 j_len;
2707
2277 __le32 j_mount_id; /* mount id of this trans */ 2708 __le32 j_mount_id; /* mount id of this trans */
2278 __le32 j_realblock[1]; /* real locations for each block */ 2709 __le32 j_realblock[1]; /* real locations for each block */
2279}; 2710};
@@ -2300,22 +2731,35 @@ struct reiserfs_journal_commit {
2300#define set_commit_trans_id(c,val) do { (c)->j_trans_id = cpu_to_le32 (val); } while (0) 2731#define set_commit_trans_id(c,val) do { (c)->j_trans_id = cpu_to_le32 (val); } while (0)
2301#define set_commit_trans_len(c,val) do { (c)->j_len = cpu_to_le32 (val); } while (0) 2732#define set_commit_trans_len(c,val) do { (c)->j_len = cpu_to_le32 (val); } while (0)
2302 2733
2303/* this header block gets written whenever a transaction is considered fully flushed, and is more recent than the 2734/*
2304** last fully flushed transaction. fully flushed means all the log blocks and all the real blocks are on disk, 2735 * this header block gets written whenever a transaction is considered
2305** and this transaction does not need to be replayed. 2736 * fully flushed, and is more recent than the last fully flushed transaction.
2306*/ 2737 * fully flushed means all the log blocks and all the real blocks are on
2738 * disk, and this transaction does not need to be replayed.
2739 */
2307struct reiserfs_journal_header { 2740struct reiserfs_journal_header {
2308 __le32 j_last_flush_trans_id; /* id of last fully flushed transaction */ 2741 /* id of last fully flushed transaction */
2309 __le32 j_first_unflushed_offset; /* offset in the log of where to start replay after a crash */ 2742 __le32 j_last_flush_trans_id;
2743
2744 /* offset in the log of where to start replay after a crash */
2745 __le32 j_first_unflushed_offset;
2746
2310 __le32 j_mount_id; 2747 __le32 j_mount_id;
2311 /* 12 */ struct journal_params jh_journal; 2748 /* 12 */ struct journal_params jh_journal;
2312}; 2749};
2313 2750
2314/* biggest tunable defines are right here */ 2751/* biggest tunable defines are right here */
2315#define JOURNAL_BLOCK_COUNT 8192 /* number of blocks in the journal */ 2752#define JOURNAL_BLOCK_COUNT 8192 /* number of blocks in the journal */
2316#define JOURNAL_TRANS_MAX_DEFAULT 1024 /* biggest possible single transaction, don't change for now (8/3/99) */ 2753
2754/* biggest possible single transaction, don't change for now (8/3/99) */
2755#define JOURNAL_TRANS_MAX_DEFAULT 1024
2317#define JOURNAL_TRANS_MIN_DEFAULT 256 2756#define JOURNAL_TRANS_MIN_DEFAULT 256
2318#define JOURNAL_MAX_BATCH_DEFAULT 900 /* max blocks to batch into one transaction, don't make this any bigger than 900 */ 2757
2758/*
2759 * max blocks to batch into one transaction,
2760 * don't make this any bigger than 900
2761 */
2762#define JOURNAL_MAX_BATCH_DEFAULT 900
2319#define JOURNAL_MIN_RATIO 2 2763#define JOURNAL_MIN_RATIO 2
2320#define JOURNAL_MAX_COMMIT_AGE 30 2764#define JOURNAL_MAX_COMMIT_AGE 30
2321#define JOURNAL_MAX_TRANS_AGE 30 2765#define JOURNAL_MAX_TRANS_AGE 30
@@ -2340,16 +2784,18 @@ struct reiserfs_journal_header {
2340#define REISERFS_QUOTA_DEL_BLOCKS(s) 0 2784#define REISERFS_QUOTA_DEL_BLOCKS(s) 0
2341#endif 2785#endif
2342 2786
2343/* both of these can be as low as 1, or as high as you want. The min is the 2787/*
2344** number of 4k bitmap nodes preallocated on mount. New nodes are allocated 2788 * both of these can be as low as 1, or as high as you want. The min is the
2345** as needed, and released when transactions are committed. On release, if 2789 * number of 4k bitmap nodes preallocated on mount. New nodes are allocated
2346** the current number of nodes is > max, the node is freed, otherwise, 2790 * as needed, and released when transactions are committed. On release, if
2347** it is put on a free list for faster use later. 2791 * the current number of nodes is > max, the node is freed, otherwise,
2792 * it is put on a free list for faster use later.
2348*/ 2793*/
2349#define REISERFS_MIN_BITMAP_NODES 10 2794#define REISERFS_MIN_BITMAP_NODES 10
2350#define REISERFS_MAX_BITMAP_NODES 100 2795#define REISERFS_MAX_BITMAP_NODES 100
2351 2796
2352#define JBH_HASH_SHIFT 13 /* these are based on journal hash size of 8192 */ 2797/* these are based on journal hash size of 8192 */
2798#define JBH_HASH_SHIFT 13
2353#define JBH_HASH_MASK 8191 2799#define JBH_HASH_MASK 8191
2354 2800
2355#define _jhashfn(sb,block) \ 2801#define _jhashfn(sb,block) \
@@ -2357,7 +2803,7 @@ struct reiserfs_journal_header {
2357 (((block)<<(JBH_HASH_SHIFT - 6)) ^ ((block) >> 13) ^ ((block) << (JBH_HASH_SHIFT - 12)))) 2803 (((block)<<(JBH_HASH_SHIFT - 6)) ^ ((block) >> 13) ^ ((block) << (JBH_HASH_SHIFT - 12))))
2358#define journal_hash(t,sb,block) ((t)[_jhashfn((sb),(block)) & JBH_HASH_MASK]) 2804#define journal_hash(t,sb,block) ((t)[_jhashfn((sb),(block)) & JBH_HASH_MASK])
2359 2805
2360// We need these to make journal.c code more readable 2806/* We need these to make journal.c code more readable */
2361#define journal_find_get_block(s, block) __find_get_block(SB_JOURNAL(s)->j_dev_bd, block, s->s_blocksize) 2807#define journal_find_get_block(s, block) __find_get_block(SB_JOURNAL(s)->j_dev_bd, block, s->s_blocksize)
2362#define journal_getblk(s, block) __getblk(SB_JOURNAL(s)->j_dev_bd, block, s->s_blocksize) 2808#define journal_getblk(s, block) __getblk(SB_JOURNAL(s)->j_dev_bd, block, s->s_blocksize)
2363#define journal_bread(s, block) __bread(SB_JOURNAL(s)->j_dev_bd, block, s->s_blocksize) 2809#define journal_bread(s, block) __bread(SB_JOURNAL(s)->j_dev_bd, block, s->s_blocksize)
@@ -2365,12 +2811,14 @@ struct reiserfs_journal_header {
2365enum reiserfs_bh_state_bits { 2811enum reiserfs_bh_state_bits {
2366 BH_JDirty = BH_PrivateStart, /* buffer is in current transaction */ 2812 BH_JDirty = BH_PrivateStart, /* buffer is in current transaction */
2367 BH_JDirty_wait, 2813 BH_JDirty_wait,
2368 BH_JNew, /* disk block was taken off free list before 2814 /*
2369 * being in a finished transaction, or 2815 * disk block was taken off free list before being in a
2370 * written to disk. Can be reused immed. */ 2816 * finished transaction, or written to disk. Can be reused immed.
2817 */
2818 BH_JNew,
2371 BH_JPrepared, 2819 BH_JPrepared,
2372 BH_JRestore_dirty, 2820 BH_JRestore_dirty,
2373 BH_JTest, // debugging only will go away 2821 BH_JTest, /* debugging only will go away */
2374}; 2822};
2375 2823
2376BUFFER_FNS(JDirty, journaled); 2824BUFFER_FNS(JDirty, journaled);
@@ -2386,27 +2834,36 @@ TAS_BUFFER_FNS(JRestore_dirty, journal_restore_dirty);
2386BUFFER_FNS(JTest, journal_test); 2834BUFFER_FNS(JTest, journal_test);
2387TAS_BUFFER_FNS(JTest, journal_test); 2835TAS_BUFFER_FNS(JTest, journal_test);
2388 2836
2389/* 2837/* transaction handle which is passed around for all journal calls */
2390** transaction handle which is passed around for all journal calls
2391*/
2392struct reiserfs_transaction_handle { 2838struct reiserfs_transaction_handle {
2393 struct super_block *t_super; /* super for this FS when journal_begin was 2839 /*
2394 called. saves calls to reiserfs_get_super 2840 * super for this FS when journal_begin was called. saves calls to
2395 also used by nested transactions to make 2841 * reiserfs_get_super also used by nested transactions to make
2396 sure they are nesting on the right FS 2842 * sure they are nesting on the right FS _must_ be first
2397 _must_ be first in the handle 2843 * in the handle
2398 */ 2844 */
2845 struct super_block *t_super;
2846
2399 int t_refcount; 2847 int t_refcount;
2400 int t_blocks_logged; /* number of blocks this writer has logged */ 2848 int t_blocks_logged; /* number of blocks this writer has logged */
2401 int t_blocks_allocated; /* number of blocks this writer allocated */ 2849 int t_blocks_allocated; /* number of blocks this writer allocated */
2402 unsigned int t_trans_id; /* sanity check, equals the current trans id */ 2850
2851 /* sanity check, equals the current trans id */
2852 unsigned int t_trans_id;
2853
2403 void *t_handle_save; /* save existing current->journal_info */ 2854 void *t_handle_save; /* save existing current->journal_info */
2404 unsigned displace_new_blocks:1; /* if new block allocation occurres, that block 2855
2405 should be displaced from others */ 2856 /*
2857 * if new block allocation occurres, that block
2858 * should be displaced from others
2859 */
2860 unsigned displace_new_blocks:1;
2861
2406 struct list_head t_list; 2862 struct list_head t_list;
2407}; 2863};
2408 2864
2409/* used to keep track of ordered and tail writes, attached to the buffer 2865/*
2866 * used to keep track of ordered and tail writes, attached to the buffer
2410 * head through b_journal_head. 2867 * head through b_journal_head.
2411 */ 2868 */
2412struct reiserfs_jh { 2869struct reiserfs_jh {
@@ -2419,7 +2876,7 @@ void reiserfs_free_jh(struct buffer_head *bh);
2419int reiserfs_add_tail_list(struct inode *inode, struct buffer_head *bh); 2876int reiserfs_add_tail_list(struct inode *inode, struct buffer_head *bh);
2420int reiserfs_add_ordered_list(struct inode *inode, struct buffer_head *bh); 2877int reiserfs_add_ordered_list(struct inode *inode, struct buffer_head *bh);
2421int journal_mark_dirty(struct reiserfs_transaction_handle *, 2878int journal_mark_dirty(struct reiserfs_transaction_handle *,
2422 struct super_block *, struct buffer_head *bh); 2879 struct buffer_head *bh);
2423 2880
2424static inline int reiserfs_file_data_log(struct inode *inode) 2881static inline int reiserfs_file_data_log(struct inode *inode)
2425{ 2882{
@@ -2469,10 +2926,8 @@ int journal_init(struct super_block *, const char *j_dev_name, int old_format,
2469int journal_release(struct reiserfs_transaction_handle *, struct super_block *); 2926int journal_release(struct reiserfs_transaction_handle *, struct super_block *);
2470int journal_release_error(struct reiserfs_transaction_handle *, 2927int journal_release_error(struct reiserfs_transaction_handle *,
2471 struct super_block *); 2928 struct super_block *);
2472int journal_end(struct reiserfs_transaction_handle *, struct super_block *, 2929int journal_end(struct reiserfs_transaction_handle *);
2473 unsigned long); 2930int journal_end_sync(struct reiserfs_transaction_handle *);
2474int journal_end_sync(struct reiserfs_transaction_handle *, struct super_block *,
2475 unsigned long);
2476int journal_mark_freed(struct reiserfs_transaction_handle *, 2931int journal_mark_freed(struct reiserfs_transaction_handle *,
2477 struct super_block *, b_blocknr_t blocknr); 2932 struct super_block *, b_blocknr_t blocknr);
2478int journal_transaction_should_end(struct reiserfs_transaction_handle *, int); 2933int journal_transaction_should_end(struct reiserfs_transaction_handle *, int);
@@ -2481,7 +2936,7 @@ int reiserfs_in_journal(struct super_block *sb, unsigned int bmap_nr,
2481int journal_begin(struct reiserfs_transaction_handle *, 2936int journal_begin(struct reiserfs_transaction_handle *,
2482 struct super_block *sb, unsigned long); 2937 struct super_block *sb, unsigned long);
2483int journal_join_abort(struct reiserfs_transaction_handle *, 2938int journal_join_abort(struct reiserfs_transaction_handle *,
2484 struct super_block *sb, unsigned long); 2939 struct super_block *sb);
2485void reiserfs_abort_journal(struct super_block *sb, int errno); 2940void reiserfs_abort_journal(struct super_block *sb, int errno);
2486void reiserfs_abort(struct super_block *sb, int errno, const char *fmt, ...); 2941void reiserfs_abort(struct super_block *sb, int errno, const char *fmt, ...);
2487int reiserfs_allocate_list_bitmaps(struct super_block *s, 2942int reiserfs_allocate_list_bitmaps(struct super_block *s,
@@ -2503,20 +2958,18 @@ int B_IS_IN_TREE(const struct buffer_head *);
2503extern void copy_item_head(struct item_head *to, 2958extern void copy_item_head(struct item_head *to,
2504 const struct item_head *from); 2959 const struct item_head *from);
2505 2960
2506// first key is in cpu form, second - le 2961/* first key is in cpu form, second - le */
2507extern int comp_short_keys(const struct reiserfs_key *le_key, 2962extern int comp_short_keys(const struct reiserfs_key *le_key,
2508 const struct cpu_key *cpu_key); 2963 const struct cpu_key *cpu_key);
2509extern void le_key2cpu_key(struct cpu_key *to, const struct reiserfs_key *from); 2964extern void le_key2cpu_key(struct cpu_key *to, const struct reiserfs_key *from);
2510 2965
2511// both are in le form 2966/* both are in le form */
2512extern int comp_le_keys(const struct reiserfs_key *, 2967extern int comp_le_keys(const struct reiserfs_key *,
2513 const struct reiserfs_key *); 2968 const struct reiserfs_key *);
2514extern int comp_short_le_keys(const struct reiserfs_key *, 2969extern int comp_short_le_keys(const struct reiserfs_key *,
2515 const struct reiserfs_key *); 2970 const struct reiserfs_key *);
2516 2971
2517// 2972/* * get key version from on disk key - kludge */
2518// get key version from on disk key - kludge
2519//
2520static inline int le_key_version(const struct reiserfs_key *key) 2973static inline int le_key_version(const struct reiserfs_key *key)
2521{ 2974{
2522 int type; 2975 int type;
@@ -2593,12 +3046,12 @@ void padd_item(char *item, int total_length, int length);
2593 3046
2594/* inode.c */ 3047/* inode.c */
2595/* args for the create parameter of reiserfs_get_block */ 3048/* args for the create parameter of reiserfs_get_block */
2596#define GET_BLOCK_NO_CREATE 0 /* don't create new blocks or convert tails */ 3049#define GET_BLOCK_NO_CREATE 0 /* don't create new blocks or convert tails */
2597#define GET_BLOCK_CREATE 1 /* add anything you need to find block */ 3050#define GET_BLOCK_CREATE 1 /* add anything you need to find block */
2598#define GET_BLOCK_NO_HOLE 2 /* return -ENOENT for file holes */ 3051#define GET_BLOCK_NO_HOLE 2 /* return -ENOENT for file holes */
2599#define GET_BLOCK_READ_DIRECT 4 /* read the tail if indirect item not found */ 3052#define GET_BLOCK_READ_DIRECT 4 /* read the tail if indirect item not found */
2600#define GET_BLOCK_NO_IMUX 8 /* i_mutex is not held, don't preallocate */ 3053#define GET_BLOCK_NO_IMUX 8 /* i_mutex is not held, don't preallocate */
2601#define GET_BLOCK_NO_DANGLE 16 /* don't leave any transactions running */ 3054#define GET_BLOCK_NO_DANGLE 16 /* don't leave any transactions running */
2602 3055
2603void reiserfs_read_locked_inode(struct inode *inode, 3056void reiserfs_read_locked_inode(struct inode *inode,
2604 struct reiserfs_iget_args *args); 3057 struct reiserfs_iget_args *args);
@@ -2797,25 +3250,49 @@ struct buffer_head *get_FEB(struct tree_balance *);
2797 3250
2798/* bitmap.c */ 3251/* bitmap.c */
2799 3252
2800/* structure contains hints for block allocator, and it is a container for 3253/*
2801 * arguments, such as node, search path, transaction_handle, etc. */ 3254 * structure contains hints for block allocator, and it is a container for
3255 * arguments, such as node, search path, transaction_handle, etc.
3256 */
2802struct __reiserfs_blocknr_hint { 3257struct __reiserfs_blocknr_hint {
2803 struct inode *inode; /* inode passed to allocator, if we allocate unf. nodes */ 3258 /* inode passed to allocator, if we allocate unf. nodes */
3259 struct inode *inode;
3260
2804 sector_t block; /* file offset, in blocks */ 3261 sector_t block; /* file offset, in blocks */
2805 struct in_core_key key; 3262 struct in_core_key key;
2806 struct treepath *path; /* search path, used by allocator to deternine search_start by 3263
2807 * various ways */ 3264 /*
2808 struct reiserfs_transaction_handle *th; /* transaction handle is needed to log super blocks and 3265 * search path, used by allocator to deternine search_start by
2809 * bitmap blocks changes */ 3266 * various ways
3267 */
3268 struct treepath *path;
3269
3270 /*
3271 * transaction handle is needed to log super blocks
3272 * and bitmap blocks changes
3273 */
3274 struct reiserfs_transaction_handle *th;
3275
2810 b_blocknr_t beg, end; 3276 b_blocknr_t beg, end;
2811 b_blocknr_t search_start; /* a field used to transfer search start value (block number) 3277
2812 * between different block allocator procedures 3278 /*
2813 * (determine_search_start() and others) */ 3279 * a field used to transfer search start value (block number)
2814 int prealloc_size; /* is set in determine_prealloc_size() function, used by underlayed 3280 * between different block allocator procedures
2815 * function that do actual allocation */ 3281 * (determine_search_start() and others)
2816 3282 */
2817 unsigned formatted_node:1; /* the allocator uses different polices for getting disk space for 3283 b_blocknr_t search_start;
2818 * formatted/unformatted blocks with/without preallocation */ 3284
3285 /*
3286 * is set in determine_prealloc_size() function,
3287 * used by underlayed function that do actual allocation
3288 */
3289 int prealloc_size;
3290
3291 /*
3292 * the allocator uses different polices for getting disk
3293 * space for formatted/unformatted blocks with/without preallocation
3294 */
3295 unsigned formatted_node:1;
2819 unsigned preallocate:1; 3296 unsigned preallocate:1;
2820}; 3297};
2821 3298
@@ -2909,13 +3386,15 @@ __u32 r5_hash(const signed char *msg, int len);
2909#define reiserfs_test_le_bit test_bit_le 3386#define reiserfs_test_le_bit test_bit_le
2910#define reiserfs_find_next_zero_le_bit find_next_zero_bit_le 3387#define reiserfs_find_next_zero_le_bit find_next_zero_bit_le
2911 3388
2912/* sometimes reiserfs_truncate may require to allocate few new blocks 3389/*
2913 to perform indirect2direct conversion. People probably used to 3390 * sometimes reiserfs_truncate may require to allocate few new blocks
2914 think, that truncate should work without problems on a filesystem 3391 * to perform indirect2direct conversion. People probably used to
2915 without free disk space. They may complain that they can not 3392 * think, that truncate should work without problems on a filesystem
2916 truncate due to lack of free disk space. This spare space allows us 3393 * without free disk space. They may complain that they can not
2917 to not worry about it. 500 is probably too much, but it should be 3394 * truncate due to lack of free disk space. This spare space allows us
2918 absolutely safe */ 3395 * to not worry about it. 500 is probably too much, but it should be
3396 * absolutely safe
3397 */
2919#define SPARE_SPACE 500 3398#define SPARE_SPACE 500
2920 3399
2921/* prototypes from ioctl.c */ 3400/* prototypes from ioctl.c */
diff --git a/fs/reiserfs/resize.c b/fs/reiserfs/resize.c
index a4ef5cd606eb..6052d323bc9a 100644
--- a/fs/reiserfs/resize.c
+++ b/fs/reiserfs/resize.c
@@ -53,8 +53,10 @@ int reiserfs_resize(struct super_block *s, unsigned long block_count_new)
53 } 53 }
54 bforget(bh); 54 bforget(bh);
55 55
56 /* old disk layout detection; those partitions can be mounted, but 56 /*
57 * cannot be resized */ 57 * old disk layout detection; those partitions can be mounted, but
58 * cannot be resized
59 */
58 if (SB_BUFFER_WITH_SB(s)->b_blocknr * SB_BUFFER_WITH_SB(s)->b_size 60 if (SB_BUFFER_WITH_SB(s)->b_blocknr * SB_BUFFER_WITH_SB(s)->b_size
59 != REISERFS_DISK_OFFSET_IN_BYTES) { 61 != REISERFS_DISK_OFFSET_IN_BYTES) {
60 printk 62 printk
@@ -86,12 +88,14 @@ int reiserfs_resize(struct super_block *s, unsigned long block_count_new)
86 ("reiserfs_resize: unable to allocate memory for journal bitmaps\n"); 88 ("reiserfs_resize: unable to allocate memory for journal bitmaps\n");
87 return -ENOMEM; 89 return -ENOMEM;
88 } 90 }
89 /* the new journal bitmaps are zero filled, now we copy in the bitmap 91 /*
90 ** node pointers from the old journal bitmap structs, and then 92 * the new journal bitmaps are zero filled, now we copy i
91 ** transfer the new data structures into the journal struct. 93 * the bitmap node pointers from the old journal bitmap
92 ** 94 * structs, and then transfer the new data structures
93 ** using the copy_size var below allows this code to work for 95 * into the journal struct.
94 ** both shrinking and expanding the FS. 96 *
97 * using the copy_size var below allows this code to work for
98 * both shrinking and expanding the FS.
95 */ 99 */
96 copy_size = bmap_nr_new < bmap_nr ? bmap_nr_new : bmap_nr; 100 copy_size = bmap_nr_new < bmap_nr ? bmap_nr_new : bmap_nr;
97 copy_size = 101 copy_size =
@@ -101,36 +105,45 @@ int reiserfs_resize(struct super_block *s, unsigned long block_count_new)
101 jb = SB_JOURNAL(s)->j_list_bitmap + i; 105 jb = SB_JOURNAL(s)->j_list_bitmap + i;
102 memcpy(jbitmap[i].bitmaps, jb->bitmaps, copy_size); 106 memcpy(jbitmap[i].bitmaps, jb->bitmaps, copy_size);
103 107
104 /* just in case vfree schedules on us, copy the new 108 /*
105 ** pointer into the journal struct before freeing the 109 * just in case vfree schedules on us, copy the new
106 ** old one 110 * pointer into the journal struct before freeing the
111 * old one
107 */ 112 */
108 node_tmp = jb->bitmaps; 113 node_tmp = jb->bitmaps;
109 jb->bitmaps = jbitmap[i].bitmaps; 114 jb->bitmaps = jbitmap[i].bitmaps;
110 vfree(node_tmp); 115 vfree(node_tmp);
111 } 116 }
112 117
113 /* allocate additional bitmap blocks, reallocate array of bitmap 118 /*
114 * block pointers */ 119 * allocate additional bitmap blocks, reallocate
120 * array of bitmap block pointers
121 */
115 bitmap = 122 bitmap =
116 vzalloc(sizeof(struct reiserfs_bitmap_info) * bmap_nr_new); 123 vzalloc(sizeof(struct reiserfs_bitmap_info) * bmap_nr_new);
117 if (!bitmap) { 124 if (!bitmap) {
118 /* Journal bitmaps are still supersized, but the memory isn't 125 /*
119 * leaked, so I guess it's ok */ 126 * Journal bitmaps are still supersized, but the
127 * memory isn't leaked, so I guess it's ok
128 */
120 printk("reiserfs_resize: unable to allocate memory.\n"); 129 printk("reiserfs_resize: unable to allocate memory.\n");
121 return -ENOMEM; 130 return -ENOMEM;
122 } 131 }
123 for (i = 0; i < bmap_nr; i++) 132 for (i = 0; i < bmap_nr; i++)
124 bitmap[i] = old_bitmap[i]; 133 bitmap[i] = old_bitmap[i];
125 134
126 /* This doesn't go through the journal, but it doesn't have to. 135 /*
127 * The changes are still atomic: We're synced up when the journal 136 * This doesn't go through the journal, but it doesn't have to.
128 * transaction begins, and the new bitmaps don't matter if the 137 * The changes are still atomic: We're synced up when the
129 * transaction fails. */ 138 * journal transaction begins, and the new bitmaps don't
139 * matter if the transaction fails.
140 */
130 for (i = bmap_nr; i < bmap_nr_new; i++) { 141 for (i = bmap_nr; i < bmap_nr_new; i++) {
131 int depth; 142 int depth;
132 /* don't use read_bitmap_block since it will cache 143 /*
133 * the uninitialized bitmap */ 144 * don't use read_bitmap_block since it will cache
145 * the uninitialized bitmap
146 */
134 depth = reiserfs_write_unlock_nested(s); 147 depth = reiserfs_write_unlock_nested(s);
135 bh = sb_bread(s, i * s->s_blocksize * 8); 148 bh = sb_bread(s, i * s->s_blocksize * 8);
136 reiserfs_write_lock_nested(s, depth); 149 reiserfs_write_lock_nested(s, depth);
@@ -147,7 +160,7 @@ int reiserfs_resize(struct super_block *s, unsigned long block_count_new)
147 depth = reiserfs_write_unlock_nested(s); 160 depth = reiserfs_write_unlock_nested(s);
148 sync_dirty_buffer(bh); 161 sync_dirty_buffer(bh);
149 reiserfs_write_lock_nested(s, depth); 162 reiserfs_write_lock_nested(s, depth);
150 // update bitmap_info stuff 163 /* update bitmap_info stuff */
151 bitmap[i].free_count = sb_blocksize(sb) * 8 - 1; 164 bitmap[i].free_count = sb_blocksize(sb) * 8 - 1;
152 brelse(bh); 165 brelse(bh);
153 } 166 }
@@ -156,9 +169,11 @@ int reiserfs_resize(struct super_block *s, unsigned long block_count_new)
156 vfree(old_bitmap); 169 vfree(old_bitmap);
157 } 170 }
158 171
159 /* begin transaction, if there was an error, it's fine. Yes, we have 172 /*
173 * begin transaction, if there was an error, it's fine. Yes, we have
160 * incorrect bitmaps now, but none of it is ever going to touch the 174 * incorrect bitmaps now, but none of it is ever going to touch the
161 * disk anyway. */ 175 * disk anyway.
176 */
162 err = journal_begin(&th, s, 10); 177 err = journal_begin(&th, s, 10);
163 if (err) 178 if (err)
164 return err; 179 return err;
@@ -167,7 +182,7 @@ int reiserfs_resize(struct super_block *s, unsigned long block_count_new)
167 info = SB_AP_BITMAP(s) + bmap_nr - 1; 182 info = SB_AP_BITMAP(s) + bmap_nr - 1;
168 bh = reiserfs_read_bitmap_block(s, bmap_nr - 1); 183 bh = reiserfs_read_bitmap_block(s, bmap_nr - 1);
169 if (!bh) { 184 if (!bh) {
170 int jerr = journal_end(&th, s, 10); 185 int jerr = journal_end(&th);
171 if (jerr) 186 if (jerr)
172 return jerr; 187 return jerr;
173 return -EIO; 188 return -EIO;
@@ -178,14 +193,14 @@ int reiserfs_resize(struct super_block *s, unsigned long block_count_new)
178 reiserfs_clear_le_bit(i, bh->b_data); 193 reiserfs_clear_le_bit(i, bh->b_data);
179 info->free_count += s->s_blocksize * 8 - block_r; 194 info->free_count += s->s_blocksize * 8 - block_r;
180 195
181 journal_mark_dirty(&th, s, bh); 196 journal_mark_dirty(&th, bh);
182 brelse(bh); 197 brelse(bh);
183 198
184 /* Correct new last bitmap block - It may not be full */ 199 /* Correct new last bitmap block - It may not be full */
185 info = SB_AP_BITMAP(s) + bmap_nr_new - 1; 200 info = SB_AP_BITMAP(s) + bmap_nr_new - 1;
186 bh = reiserfs_read_bitmap_block(s, bmap_nr_new - 1); 201 bh = reiserfs_read_bitmap_block(s, bmap_nr_new - 1);
187 if (!bh) { 202 if (!bh) {
188 int jerr = journal_end(&th, s, 10); 203 int jerr = journal_end(&th);
189 if (jerr) 204 if (jerr)
190 return jerr; 205 return jerr;
191 return -EIO; 206 return -EIO;
@@ -194,7 +209,7 @@ int reiserfs_resize(struct super_block *s, unsigned long block_count_new)
194 reiserfs_prepare_for_journal(s, bh, 1); 209 reiserfs_prepare_for_journal(s, bh, 1);
195 for (i = block_r_new; i < s->s_blocksize * 8; i++) 210 for (i = block_r_new; i < s->s_blocksize * 8; i++)
196 reiserfs_set_le_bit(i, bh->b_data); 211 reiserfs_set_le_bit(i, bh->b_data);
197 journal_mark_dirty(&th, s, bh); 212 journal_mark_dirty(&th, bh);
198 brelse(bh); 213 brelse(bh);
199 214
200 info->free_count -= s->s_blocksize * 8 - block_r_new; 215 info->free_count -= s->s_blocksize * 8 - block_r_new;
@@ -207,8 +222,8 @@ int reiserfs_resize(struct super_block *s, unsigned long block_count_new)
207 PUT_SB_BLOCK_COUNT(s, block_count_new); 222 PUT_SB_BLOCK_COUNT(s, block_count_new);
208 PUT_SB_BMAP_NR(s, bmap_would_wrap(bmap_nr_new) ? : bmap_nr_new); 223 PUT_SB_BMAP_NR(s, bmap_would_wrap(bmap_nr_new) ? : bmap_nr_new);
209 224
210 journal_mark_dirty(&th, s, SB_BUFFER_WITH_SB(s)); 225 journal_mark_dirty(&th, SB_BUFFER_WITH_SB(s));
211 226
212 SB_JOURNAL(s)->j_must_wait = 1; 227 SB_JOURNAL(s)->j_must_wait = 1;
213 return journal_end(&th, s, 10); 228 return journal_end(&th);
214} 229}
diff --git a/fs/reiserfs/stree.c b/fs/reiserfs/stree.c
index 615cd9ab7940..dd44468edc2b 100644
--- a/fs/reiserfs/stree.c
+++ b/fs/reiserfs/stree.c
@@ -8,46 +8,6 @@
8 * Pereslavl-Zalessky Russia 8 * Pereslavl-Zalessky Russia
9 */ 9 */
10 10
11/*
12 * This file contains functions dealing with S+tree
13 *
14 * B_IS_IN_TREE
15 * copy_item_head
16 * comp_short_keys
17 * comp_keys
18 * comp_short_le_keys
19 * le_key2cpu_key
20 * comp_le_keys
21 * bin_search
22 * get_lkey
23 * get_rkey
24 * key_in_buffer
25 * decrement_bcount
26 * reiserfs_check_path
27 * pathrelse_and_restore
28 * pathrelse
29 * search_by_key_reada
30 * search_by_key
31 * search_for_position_by_key
32 * comp_items
33 * prepare_for_direct_item
34 * prepare_for_direntry_item
35 * prepare_for_delete_or_cut
36 * calc_deleted_bytes_number
37 * init_tb_struct
38 * padd_item
39 * reiserfs_delete_item
40 * reiserfs_delete_solid_item
41 * reiserfs_delete_object
42 * maybe_indirect_to_direct
43 * indirect_to_direct_roll_back
44 * reiserfs_cut_from_item
45 * truncate_directory
46 * reiserfs_do_truncate
47 * reiserfs_paste_into_item
48 * reiserfs_insert_item
49 */
50
51#include <linux/time.h> 11#include <linux/time.h>
52#include <linux/string.h> 12#include <linux/string.h>
53#include <linux/pagemap.h> 13#include <linux/pagemap.h>
@@ -65,21 +25,21 @@ inline int B_IS_IN_TREE(const struct buffer_head *bh)
65 return (B_LEVEL(bh) != FREE_LEVEL); 25 return (B_LEVEL(bh) != FREE_LEVEL);
66} 26}
67 27
68// 28/* to get item head in le form */
69// to gets item head in le form
70//
71inline void copy_item_head(struct item_head *to, 29inline void copy_item_head(struct item_head *to,
72 const struct item_head *from) 30 const struct item_head *from)
73{ 31{
74 memcpy(to, from, IH_SIZE); 32 memcpy(to, from, IH_SIZE);
75} 33}
76 34
77/* k1 is pointer to on-disk structure which is stored in little-endian 35/*
78 form. k2 is pointer to cpu variable. For key of items of the same 36 * k1 is pointer to on-disk structure which is stored in little-endian
79 object this returns 0. 37 * form. k2 is pointer to cpu variable. For key of items of the same
80 Returns: -1 if key1 < key2 38 * object this returns 0.
81 0 if key1 == key2 39 * Returns: -1 if key1 < key2
82 1 if key1 > key2 */ 40 * 0 if key1 == key2
41 * 1 if key1 > key2
42 */
83inline int comp_short_keys(const struct reiserfs_key *le_key, 43inline int comp_short_keys(const struct reiserfs_key *le_key,
84 const struct cpu_key *cpu_key) 44 const struct cpu_key *cpu_key)
85{ 45{
@@ -97,11 +57,13 @@ inline int comp_short_keys(const struct reiserfs_key *le_key,
97 return 0; 57 return 0;
98} 58}
99 59
100/* k1 is pointer to on-disk structure which is stored in little-endian 60/*
101 form. k2 is pointer to cpu variable. 61 * k1 is pointer to on-disk structure which is stored in little-endian
102 Compare keys using all 4 key fields. 62 * form. k2 is pointer to cpu variable.
103 Returns: -1 if key1 < key2 0 63 * Compare keys using all 4 key fields.
104 if key1 = key2 1 if key1 > key2 */ 64 * Returns: -1 if key1 < key2 0
65 * if key1 = key2 1 if key1 > key2
66 */
105static inline int comp_keys(const struct reiserfs_key *le_key, 67static inline int comp_keys(const struct reiserfs_key *le_key,
106 const struct cpu_key *cpu_key) 68 const struct cpu_key *cpu_key)
107{ 69{
@@ -155,15 +117,17 @@ inline void le_key2cpu_key(struct cpu_key *to, const struct reiserfs_key *from)
155 to->on_disk_key.k_dir_id = le32_to_cpu(from->k_dir_id); 117 to->on_disk_key.k_dir_id = le32_to_cpu(from->k_dir_id);
156 to->on_disk_key.k_objectid = le32_to_cpu(from->k_objectid); 118 to->on_disk_key.k_objectid = le32_to_cpu(from->k_objectid);
157 119
158 // find out version of the key 120 /* find out version of the key */
159 version = le_key_version(from); 121 version = le_key_version(from);
160 to->version = version; 122 to->version = version;
161 to->on_disk_key.k_offset = le_key_k_offset(version, from); 123 to->on_disk_key.k_offset = le_key_k_offset(version, from);
162 to->on_disk_key.k_type = le_key_k_type(version, from); 124 to->on_disk_key.k_type = le_key_k_type(version, from);
163} 125}
164 126
165// this does not say which one is bigger, it only returns 1 if keys 127/*
166// are not equal, 0 otherwise 128 * this does not say which one is bigger, it only returns 1 if keys
129 * are not equal, 0 otherwise
130 */
167inline int comp_le_keys(const struct reiserfs_key *k1, 131inline int comp_le_keys(const struct reiserfs_key *k1,
168 const struct reiserfs_key *k2) 132 const struct reiserfs_key *k2)
169{ 133{
@@ -177,24 +141,27 @@ inline int comp_le_keys(const struct reiserfs_key *k1,
177 * *pos = number of the searched element if found, else the * 141 * *pos = number of the searched element if found, else the *
178 * number of the first element that is larger than key. * 142 * number of the first element that is larger than key. *
179 **************************************************************************/ 143 **************************************************************************/
180/* For those not familiar with binary search: lbound is the leftmost item that it 144/*
181 could be, rbound the rightmost item that it could be. We examine the item 145 * For those not familiar with binary search: lbound is the leftmost item
182 halfway between lbound and rbound, and that tells us either that we can increase 146 * that it could be, rbound the rightmost item that it could be. We examine
183 lbound, or decrease rbound, or that we have found it, or if lbound <= rbound that 147 * the item halfway between lbound and rbound, and that tells us either
184 there are no possible items, and we have not found it. With each examination we 148 * that we can increase lbound, or decrease rbound, or that we have found it,
185 cut the number of possible items it could be by one more than half rounded down, 149 * or if lbound <= rbound that there are no possible items, and we have not
186 or we find it. */ 150 * found it. With each examination we cut the number of possible items it
151 * could be by one more than half rounded down, or we find it.
152 */
187static inline int bin_search(const void *key, /* Key to search for. */ 153static inline int bin_search(const void *key, /* Key to search for. */
188 const void *base, /* First item in the array. */ 154 const void *base, /* First item in the array. */
189 int num, /* Number of items in the array. */ 155 int num, /* Number of items in the array. */
190 int width, /* Item size in the array. 156 /*
191 searched. Lest the reader be 157 * Item size in the array. searched. Lest the
192 confused, note that this is crafted 158 * reader be confused, note that this is crafted
193 as a general function, and when it 159 * as a general function, and when it is applied
194 is applied specifically to the array 160 * specifically to the array of item headers in a
195 of item headers in a node, width 161 * node, width is actually the item header size
196 is actually the item header size not 162 * not the item size.
197 the item size. */ 163 */
164 int width,
198 int *pos /* Number of the searched for element. */ 165 int *pos /* Number of the searched for element. */
199 ) 166 )
200{ 167{
@@ -216,8 +183,10 @@ static inline int bin_search(const void *key, /* Key to search for. */
216 return ITEM_FOUND; /* Key found in the array. */ 183 return ITEM_FOUND; /* Key found in the array. */
217 } 184 }
218 185
219 /* bin_search did not find given key, it returns position of key, 186 /*
220 that is minimal and greater than the given one. */ 187 * bin_search did not find given key, it returns position of key,
188 * that is minimal and greater than the given one.
189 */
221 *pos = lbound; 190 *pos = lbound;
222 return ITEM_NOT_FOUND; 191 return ITEM_NOT_FOUND;
223} 192}
@@ -234,10 +203,14 @@ static const struct reiserfs_key MAX_KEY = {
234 cpu_to_le32(0xffffffff)},} 203 cpu_to_le32(0xffffffff)},}
235}; 204};
236 205
237/* Get delimiting key of the buffer by looking for it in the buffers in the path, starting from the bottom 206/*
238 of the path, and going upwards. We must check the path's validity at each step. If the key is not in 207 * Get delimiting key of the buffer by looking for it in the buffers in the
239 the path, there is no delimiting key in the tree (buffer is first or last buffer in tree), and in this 208 * path, starting from the bottom of the path, and going upwards. We must
240 case we return a special key, either MIN_KEY or MAX_KEY. */ 209 * check the path's validity at each step. If the key is not in the path,
210 * there is no delimiting key in the tree (buffer is first or last buffer
211 * in tree), and in this case we return a special key, either MIN_KEY or
212 * MAX_KEY.
213 */
241static inline const struct reiserfs_key *get_lkey(const struct treepath *chk_path, 214static inline const struct reiserfs_key *get_lkey(const struct treepath *chk_path,
242 const struct super_block *sb) 215 const struct super_block *sb)
243{ 216{
@@ -270,9 +243,12 @@ static inline const struct reiserfs_key *get_lkey(const struct treepath *chk_pat
270 PATH_OFFSET_PBUFFER(chk_path, 243 PATH_OFFSET_PBUFFER(chk_path,
271 path_offset + 1)->b_blocknr) 244 path_offset + 1)->b_blocknr)
272 return &MAX_KEY; 245 return &MAX_KEY;
273 /* Return delimiting key if position in the parent is not equal to zero. */ 246 /*
247 * Return delimiting key if position in the parent
248 * is not equal to zero.
249 */
274 if (position) 250 if (position)
275 return B_N_PDELIM_KEY(parent, position - 1); 251 return internal_key(parent, position - 1);
276 } 252 }
277 /* Return MIN_KEY if we are in the root of the buffer tree. */ 253 /* Return MIN_KEY if we are in the root of the buffer tree. */
278 if (PATH_OFFSET_PBUFFER(chk_path, FIRST_PATH_ELEMENT_OFFSET)-> 254 if (PATH_OFFSET_PBUFFER(chk_path, FIRST_PATH_ELEMENT_OFFSET)->
@@ -308,15 +284,23 @@ inline const struct reiserfs_key *get_rkey(const struct treepath *chk_path,
308 path_offset)) > 284 path_offset)) >
309 B_NR_ITEMS(parent)) 285 B_NR_ITEMS(parent))
310 return &MIN_KEY; 286 return &MIN_KEY;
311 /* Check whether parent at the path really points to the child. */ 287 /*
288 * Check whether parent at the path really points
289 * to the child.
290 */
312 if (B_N_CHILD_NUM(parent, position) != 291 if (B_N_CHILD_NUM(parent, position) !=
313 PATH_OFFSET_PBUFFER(chk_path, 292 PATH_OFFSET_PBUFFER(chk_path,
314 path_offset + 1)->b_blocknr) 293 path_offset + 1)->b_blocknr)
315 return &MIN_KEY; 294 return &MIN_KEY;
316 /* Return delimiting key if position in the parent is not the last one. */ 295
296 /*
297 * Return delimiting key if position in the parent
298 * is not the last one.
299 */
317 if (position != B_NR_ITEMS(parent)) 300 if (position != B_NR_ITEMS(parent))
318 return B_N_PDELIM_KEY(parent, position); 301 return internal_key(parent, position);
319 } 302 }
303
320 /* Return MAX_KEY if we are in the root of the buffer tree. */ 304 /* Return MAX_KEY if we are in the root of the buffer tree. */
321 if (PATH_OFFSET_PBUFFER(chk_path, FIRST_PATH_ELEMENT_OFFSET)-> 305 if (PATH_OFFSET_PBUFFER(chk_path, FIRST_PATH_ELEMENT_OFFSET)->
322 b_blocknr == SB_ROOT_BLOCK(sb)) 306 b_blocknr == SB_ROOT_BLOCK(sb))
@@ -324,13 +308,20 @@ inline const struct reiserfs_key *get_rkey(const struct treepath *chk_path,
324 return &MIN_KEY; 308 return &MIN_KEY;
325} 309}
326 310
327/* Check whether a key is contained in the tree rooted from a buffer at a path. */ 311/*
328/* This works by looking at the left and right delimiting keys for the buffer in the last path_element in 312 * Check whether a key is contained in the tree rooted from a buffer at a path.
329 the path. These delimiting keys are stored at least one level above that buffer in the tree. If the 313 * This works by looking at the left and right delimiting keys for the buffer
330 buffer is the first or last node in the tree order then one of the delimiting keys may be absent, and in 314 * in the last path_element in the path. These delimiting keys are stored
331 this case get_lkey and get_rkey return a special key which is MIN_KEY or MAX_KEY. */ 315 * at least one level above that buffer in the tree. If the buffer is the
332static inline int key_in_buffer(struct treepath *chk_path, /* Path which should be checked. */ 316 * first or last node in the tree order then one of the delimiting keys may
333 const struct cpu_key *key, /* Key which should be checked. */ 317 * be absent, and in this case get_lkey and get_rkey return a special key
318 * which is MIN_KEY or MAX_KEY.
319 */
320static inline int key_in_buffer(
321 /* Path which should be checked. */
322 struct treepath *chk_path,
323 /* Key which should be checked. */
324 const struct cpu_key *key,
334 struct super_block *sb 325 struct super_block *sb
335 ) 326 )
336{ 327{
@@ -359,9 +350,11 @@ int reiserfs_check_path(struct treepath *p)
359 return 0; 350 return 0;
360} 351}
361 352
362/* Drop the reference to each buffer in a path and restore 353/*
354 * Drop the reference to each buffer in a path and restore
363 * dirty bits clean when preparing the buffer for the log. 355 * dirty bits clean when preparing the buffer for the log.
364 * This version should only be called from fix_nodes() */ 356 * This version should only be called from fix_nodes()
357 */
365void pathrelse_and_restore(struct super_block *sb, 358void pathrelse_and_restore(struct super_block *sb,
366 struct treepath *search_path) 359 struct treepath *search_path)
367{ 360{
@@ -418,14 +411,17 @@ static int is_leaf(char *buf, int blocksize, struct buffer_head *bh)
418 } 411 }
419 ih = (struct item_head *)(buf + BLKH_SIZE) + nr - 1; 412 ih = (struct item_head *)(buf + BLKH_SIZE) + nr - 1;
420 used_space = BLKH_SIZE + IH_SIZE * nr + (blocksize - ih_location(ih)); 413 used_space = BLKH_SIZE + IH_SIZE * nr + (blocksize - ih_location(ih));
414
415 /* free space does not match to calculated amount of use space */
421 if (used_space != blocksize - blkh_free_space(blkh)) { 416 if (used_space != blocksize - blkh_free_space(blkh)) {
422 /* free space does not match to calculated amount of use space */
423 reiserfs_warning(NULL, "reiserfs-5082", 417 reiserfs_warning(NULL, "reiserfs-5082",
424 "free space seems wrong: %z", bh); 418 "free space seems wrong: %z", bh);
425 return 0; 419 return 0;
426 } 420 }
427 // FIXME: it is_leaf will hit performance too much - we may have 421 /*
428 // return 1 here 422 * FIXME: it is_leaf will hit performance too much - we may have
423 * return 1 here
424 */
429 425
430 /* check tables of item heads */ 426 /* check tables of item heads */
431 ih = (struct item_head *)(buf + BLKH_SIZE); 427 ih = (struct item_head *)(buf + BLKH_SIZE);
@@ -460,7 +456,7 @@ static int is_leaf(char *buf, int blocksize, struct buffer_head *bh)
460 prev_location = ih_location(ih); 456 prev_location = ih_location(ih);
461 } 457 }
462 458
463 // one may imagine much more checks 459 /* one may imagine many more checks */
464 return 1; 460 return 1;
465} 461}
466 462
@@ -481,8 +477,8 @@ static int is_internal(char *buf, int blocksize, struct buffer_head *bh)
481 } 477 }
482 478
483 nr = blkh_nr_item(blkh); 479 nr = blkh_nr_item(blkh);
480 /* for internal which is not root we might check min number of keys */
484 if (nr > (blocksize - BLKH_SIZE - DC_SIZE) / (KEY_SIZE + DC_SIZE)) { 481 if (nr > (blocksize - BLKH_SIZE - DC_SIZE) / (KEY_SIZE + DC_SIZE)) {
485 /* for internal which is not root we might check min number of keys */
486 reiserfs_warning(NULL, "reiserfs-5088", 482 reiserfs_warning(NULL, "reiserfs-5088",
487 "number of key seems wrong: %z", bh); 483 "number of key seems wrong: %z", bh);
488 return 0; 484 return 0;
@@ -494,12 +490,15 @@ static int is_internal(char *buf, int blocksize, struct buffer_head *bh)
494 "free space seems wrong: %z", bh); 490 "free space seems wrong: %z", bh);
495 return 0; 491 return 0;
496 } 492 }
497 // one may imagine much more checks 493
494 /* one may imagine many more checks */
498 return 1; 495 return 1;
499} 496}
500 497
501// make sure that bh contains formatted node of reiserfs tree of 498/*
502// 'level'-th level 499 * make sure that bh contains formatted node of reiserfs tree of
500 * 'level'-th level
501 */
503static int is_tree_node(struct buffer_head *bh, int level) 502static int is_tree_node(struct buffer_head *bh, int level)
504{ 503{
505 if (B_LEVEL(bh) != level) { 504 if (B_LEVEL(bh) != level) {
@@ -546,7 +545,8 @@ static int search_by_key_reada(struct super_block *s,
546 for (j = 0; j < i; j++) { 545 for (j = 0; j < i; j++) {
547 /* 546 /*
548 * note, this needs attention if we are getting rid of the BKL 547 * note, this needs attention if we are getting rid of the BKL
549 * you have to make sure the prepared bit isn't set on this buffer 548 * you have to make sure the prepared bit isn't set on this
549 * buffer
550 */ 550 */
551 if (!buffer_uptodate(bh[j])) { 551 if (!buffer_uptodate(bh[j])) {
552 if (depth == -1) 552 if (depth == -1)
@@ -558,39 +558,34 @@ static int search_by_key_reada(struct super_block *s,
558 return depth; 558 return depth;
559} 559}
560 560
561/************************************************************************** 561/*
562 * Algorithm SearchByKey * 562 * This function fills up the path from the root to the leaf as it
563 * look for item in the Disk S+Tree by its key * 563 * descends the tree looking for the key. It uses reiserfs_bread to
564 * Input: sb - super block * 564 * try to find buffers in the cache given their block number. If it
565 * key - pointer to the key to search * 565 * does not find them in the cache it reads them from disk. For each
566 * Output: ITEM_FOUND, ITEM_NOT_FOUND or IO_ERROR * 566 * node search_by_key finds using reiserfs_bread it then uses
567 * search_path - path from the root to the needed leaf * 567 * bin_search to look through that node. bin_search will find the
568 **************************************************************************/ 568 * position of the block_number of the next node if it is looking
569 569 * through an internal node. If it is looking through a leaf node
570/* This function fills up the path from the root to the leaf as it 570 * bin_search will find the position of the item which has key either
571 descends the tree looking for the key. It uses reiserfs_bread to 571 * equal to given key, or which is the maximal key less than the given
572 try to find buffers in the cache given their block number. If it 572 * key. search_by_key returns a path that must be checked for the
573 does not find them in the cache it reads them from disk. For each 573 * correctness of the top of the path but need not be checked for the
574 node search_by_key finds using reiserfs_bread it then uses 574 * correctness of the bottom of the path
575 bin_search to look through that node. bin_search will find the 575 */
576 position of the block_number of the next node if it is looking 576/*
577 through an internal node. If it is looking through a leaf node 577 * search_by_key - search for key (and item) in stree
578 bin_search will find the position of the item which has key either 578 * @sb: superblock
579 equal to given key, or which is the maximal key less than the given 579 * @key: pointer to key to search for
580 key. search_by_key returns a path that must be checked for the 580 * @search_path: Allocated and initialized struct treepath; Returned filled
581 correctness of the top of the path but need not be checked for the 581 * on success.
582 correctness of the bottom of the path */ 582 * @stop_level: How far down the tree to search, Use DISK_LEAF_NODE_LEVEL to
583/* The function is NOT SCHEDULE-SAFE! */ 583 * stop at leaf level.
584int search_by_key(struct super_block *sb, const struct cpu_key *key, /* Key to search. */ 584 *
585 struct treepath *search_path,/* This structure was 585 * The function is NOT SCHEDULE-SAFE!
586 allocated and initialized 586 */
587 by the calling 587int search_by_key(struct super_block *sb, const struct cpu_key *key,
588 function. It is filled up 588 struct treepath *search_path, int stop_level)
589 by this function. */
590 int stop_level /* How far down the tree to search. To
591 stop at leaf level - set to
592 DISK_LEAF_NODE_LEVEL */
593 )
594{ 589{
595 b_blocknr_t block_number; 590 b_blocknr_t block_number;
596 int expected_level; 591 int expected_level;
@@ -609,17 +604,22 @@ int search_by_key(struct super_block *sb, const struct cpu_key *key, /* Key to s
609 604
610 PROC_INFO_INC(sb, search_by_key); 605 PROC_INFO_INC(sb, search_by_key);
611 606
612 /* As we add each node to a path we increase its count. This means that 607 /*
613 we must be careful to release all nodes in a path before we either 608 * As we add each node to a path we increase its count. This means
614 discard the path struct or re-use the path struct, as we do here. */ 609 * that we must be careful to release all nodes in a path before we
610 * either discard the path struct or re-use the path struct, as we
611 * do here.
612 */
615 613
616 pathrelse(search_path); 614 pathrelse(search_path);
617 615
618 right_neighbor_of_leaf_node = 0; 616 right_neighbor_of_leaf_node = 0;
619 617
620 /* With each iteration of this loop we search through the items in the 618 /*
621 current node, and calculate the next current node(next path element) 619 * With each iteration of this loop we search through the items in the
622 for the next iteration of this loop.. */ 620 * current node, and calculate the next current node(next path element)
621 * for the next iteration of this loop..
622 */
623 block_number = SB_ROOT_BLOCK(sb); 623 block_number = SB_ROOT_BLOCK(sb);
624 expected_level = -1; 624 expected_level = -1;
625 while (1) { 625 while (1) {
@@ -639,8 +639,10 @@ int search_by_key(struct super_block *sb, const struct cpu_key *key, /* Key to s
639 ++search_path->path_length); 639 ++search_path->path_length);
640 fs_gen = get_generation(sb); 640 fs_gen = get_generation(sb);
641 641
642 /* Read the next tree node, and set the last element in the path to 642 /*
643 have a pointer to it. */ 643 * Read the next tree node, and set the last element
644 * in the path to have a pointer to it.
645 */
644 if ((bh = last_element->pe_buffer = 646 if ((bh = last_element->pe_buffer =
645 sb_getblk(sb, block_number))) { 647 sb_getblk(sb, block_number))) {
646 648
@@ -666,7 +668,7 @@ int search_by_key(struct super_block *sb, const struct cpu_key *key, /* Key to s
666 if (!buffer_uptodate(bh)) 668 if (!buffer_uptodate(bh))
667 goto io_error; 669 goto io_error;
668 } else { 670 } else {
669 io_error: 671io_error:
670 search_path->path_length--; 672 search_path->path_length--;
671 pathrelse(search_path); 673 pathrelse(search_path);
672 return IO_ERROR; 674 return IO_ERROR;
@@ -676,9 +678,12 @@ int search_by_key(struct super_block *sb, const struct cpu_key *key, /* Key to s
676 expected_level = SB_TREE_HEIGHT(sb); 678 expected_level = SB_TREE_HEIGHT(sb);
677 expected_level--; 679 expected_level--;
678 680
679 /* It is possible that schedule occurred. We must check whether the key 681 /*
680 to search is still in the tree rooted from the current buffer. If 682 * It is possible that schedule occurred. We must check
681 not then repeat search from the root. */ 683 * whether the key to search is still in the tree rooted
684 * from the current buffer. If not then repeat search
685 * from the root.
686 */
682 if (fs_changed(fs_gen, sb) && 687 if (fs_changed(fs_gen, sb) &&
683 (!B_IS_IN_TREE(bh) || 688 (!B_IS_IN_TREE(bh) ||
684 B_LEVEL(bh) != expected_level || 689 B_LEVEL(bh) != expected_level ||
@@ -689,8 +694,10 @@ int search_by_key(struct super_block *sb, const struct cpu_key *key, /* Key to s
689 sbk_restarted[expected_level - 1]); 694 sbk_restarted[expected_level - 1]);
690 pathrelse(search_path); 695 pathrelse(search_path);
691 696
692 /* Get the root block number so that we can repeat the search 697 /*
693 starting from the root. */ 698 * Get the root block number so that we can
699 * repeat the search starting from the root.
700 */
694 block_number = SB_ROOT_BLOCK(sb); 701 block_number = SB_ROOT_BLOCK(sb);
695 expected_level = -1; 702 expected_level = -1;
696 right_neighbor_of_leaf_node = 0; 703 right_neighbor_of_leaf_node = 0;
@@ -699,9 +706,11 @@ int search_by_key(struct super_block *sb, const struct cpu_key *key, /* Key to s
699 continue; 706 continue;
700 } 707 }
701 708
702 /* only check that the key is in the buffer if key is not 709 /*
703 equal to the MAX_KEY. Latter case is only possible in 710 * only check that the key is in the buffer if key is not
704 "finish_unfinished()" processing during mount. */ 711 * equal to the MAX_KEY. Latter case is only possible in
712 * "finish_unfinished()" processing during mount.
713 */
705 RFALSE(comp_keys(&MAX_KEY, key) && 714 RFALSE(comp_keys(&MAX_KEY, key) &&
706 !key_in_buffer(search_path, key, sb), 715 !key_in_buffer(search_path, key, sb),
707 "PAP-5130: key is not in the buffer"); 716 "PAP-5130: key is not in the buffer");
@@ -713,8 +722,10 @@ int search_by_key(struct super_block *sb, const struct cpu_key *key, /* Key to s
713 } 722 }
714#endif 723#endif
715 724
716 // make sure, that the node contents look like a node of 725 /*
717 // certain level 726 * make sure, that the node contents look like a node of
727 * certain level
728 */
718 if (!is_tree_node(bh, expected_level)) { 729 if (!is_tree_node(bh, expected_level)) {
719 reiserfs_error(sb, "vs-5150", 730 reiserfs_error(sb, "vs-5150",
720 "invalid format found in block %ld. " 731 "invalid format found in block %ld. "
@@ -732,32 +743,42 @@ int search_by_key(struct super_block *sb, const struct cpu_key *key, /* Key to s
732 "vs-5152: tree level (%d) is less than stop level (%d)", 743 "vs-5152: tree level (%d) is less than stop level (%d)",
733 node_level, stop_level); 744 node_level, stop_level);
734 745
735 retval = bin_search(key, B_N_PITEM_HEAD(bh, 0), 746 retval = bin_search(key, item_head(bh, 0),
736 B_NR_ITEMS(bh), 747 B_NR_ITEMS(bh),
737 (node_level == 748 (node_level ==
738 DISK_LEAF_NODE_LEVEL) ? IH_SIZE : 749 DISK_LEAF_NODE_LEVEL) ? IH_SIZE :
739 KEY_SIZE, 750 KEY_SIZE,
740 &(last_element->pe_position)); 751 &last_element->pe_position);
741 if (node_level == stop_level) { 752 if (node_level == stop_level) {
742 return retval; 753 return retval;
743 } 754 }
744 755
745 /* we are not in the stop level */ 756 /* we are not in the stop level */
757 /*
758 * item has been found, so we choose the pointer which
759 * is to the right of the found one
760 */
746 if (retval == ITEM_FOUND) 761 if (retval == ITEM_FOUND)
747 /* item has been found, so we choose the pointer which is to the right of the found one */
748 last_element->pe_position++; 762 last_element->pe_position++;
749 763
750 /* if item was not found we choose the position which is to 764 /*
751 the left of the found item. This requires no code, 765 * if item was not found we choose the position which is to
752 bin_search did it already. */ 766 * the left of the found item. This requires no code,
767 * bin_search did it already.
768 */
753 769
754 /* So we have chosen a position in the current node which is 770 /*
755 an internal node. Now we calculate child block number by 771 * So we have chosen a position in the current node which is
756 position in the node. */ 772 * an internal node. Now we calculate child block number by
773 * position in the node.
774 */
757 block_number = 775 block_number =
758 B_N_CHILD_NUM(bh, last_element->pe_position); 776 B_N_CHILD_NUM(bh, last_element->pe_position);
759 777
760 /* if we are going to read leaf nodes, try for read ahead as well */ 778 /*
779 * if we are going to read leaf nodes, try for read
780 * ahead as well
781 */
761 if ((search_path->reada & PATH_READA) && 782 if ((search_path->reada & PATH_READA) &&
762 node_level == DISK_LEAF_NODE_LEVEL + 1) { 783 node_level == DISK_LEAF_NODE_LEVEL + 1) {
763 int pos = last_element->pe_position; 784 int pos = last_element->pe_position;
@@ -779,7 +800,7 @@ int search_by_key(struct super_block *sb, const struct cpu_key *key, /* Key to s
779 /* 800 /*
780 * check to make sure we're in the same object 801 * check to make sure we're in the same object
781 */ 802 */
782 le_key = B_N_PDELIM_KEY(bh, pos); 803 le_key = internal_key(bh, pos);
783 if (le32_to_cpu(le_key->k_objectid) != 804 if (le32_to_cpu(le_key->k_objectid) !=
784 key->on_disk_key.k_objectid) { 805 key->on_disk_key.k_objectid) {
785 break; 806 break;
@@ -789,26 +810,28 @@ int search_by_key(struct super_block *sb, const struct cpu_key *key, /* Key to s
789 } 810 }
790} 811}
791 812
792/* Form the path to an item and position in this item which contains 813/*
793 file byte defined by key. If there is no such item 814 * Form the path to an item and position in this item which contains
794 corresponding to the key, we point the path to the item with 815 * file byte defined by key. If there is no such item
795 maximal key less than key, and *pos_in_item is set to one 816 * corresponding to the key, we point the path to the item with
796 past the last entry/byte in the item. If searching for entry in a 817 * maximal key less than key, and *pos_in_item is set to one
797 directory item, and it is not found, *pos_in_item is set to one 818 * past the last entry/byte in the item. If searching for entry in a
798 entry more than the entry with maximal key which is less than the 819 * directory item, and it is not found, *pos_in_item is set to one
799 sought key. 820 * entry more than the entry with maximal key which is less than the
800 821 * sought key.
801 Note that if there is no entry in this same node which is one more, 822 *
802 then we point to an imaginary entry. for direct items, the 823 * Note that if there is no entry in this same node which is one more,
803 position is in units of bytes, for indirect items the position is 824 * then we point to an imaginary entry. for direct items, the
804 in units of blocknr entries, for directory items the position is in 825 * position is in units of bytes, for indirect items the position is
805 units of directory entries. */ 826 * in units of blocknr entries, for directory items the position is in
806 827 * units of directory entries.
828 */
807/* The function is NOT SCHEDULE-SAFE! */ 829/* The function is NOT SCHEDULE-SAFE! */
808int search_for_position_by_key(struct super_block *sb, /* Pointer to the super block. */ 830int search_for_position_by_key(struct super_block *sb,
809 const struct cpu_key *p_cpu_key, /* Key to search (cpu variable) */ 831 /* Key to search (cpu variable) */
810 struct treepath *search_path /* Filled up by this function. */ 832 const struct cpu_key *p_cpu_key,
811 ) 833 /* Filled up by this function. */
834 struct treepath *search_path)
812{ 835{
813 struct item_head *p_le_ih; /* pointer to on-disk structure */ 836 struct item_head *p_le_ih; /* pointer to on-disk structure */
814 int blk_size; 837 int blk_size;
@@ -830,7 +853,7 @@ int search_for_position_by_key(struct super_block *sb, /* Pointer to the super b
830 if (retval == ITEM_FOUND) { 853 if (retval == ITEM_FOUND) {
831 854
832 RFALSE(!ih_item_len 855 RFALSE(!ih_item_len
833 (B_N_PITEM_HEAD 856 (item_head
834 (PATH_PLAST_BUFFER(search_path), 857 (PATH_PLAST_BUFFER(search_path),
835 PATH_LAST_POSITION(search_path))), 858 PATH_LAST_POSITION(search_path))),
836 "PAP-5165: item length equals zero"); 859 "PAP-5165: item length equals zero");
@@ -844,14 +867,14 @@ int search_for_position_by_key(struct super_block *sb, /* Pointer to the super b
844 867
845 /* Item is not found. Set path to the previous item. */ 868 /* Item is not found. Set path to the previous item. */
846 p_le_ih = 869 p_le_ih =
847 B_N_PITEM_HEAD(PATH_PLAST_BUFFER(search_path), 870 item_head(PATH_PLAST_BUFFER(search_path),
848 --PATH_LAST_POSITION(search_path)); 871 --PATH_LAST_POSITION(search_path));
849 blk_size = sb->s_blocksize; 872 blk_size = sb->s_blocksize;
850 873
851 if (comp_short_keys(&(p_le_ih->ih_key), p_cpu_key)) { 874 if (comp_short_keys(&p_le_ih->ih_key, p_cpu_key))
852 return FILE_NOT_FOUND; 875 return FILE_NOT_FOUND;
853 } 876
854 // FIXME: quite ugly this far 877 /* FIXME: quite ugly this far */
855 878
856 item_offset = le_ih_k_offset(p_le_ih); 879 item_offset = le_ih_k_offset(p_le_ih);
857 offset = cpu_key_k_offset(p_cpu_key); 880 offset = cpu_key_k_offset(p_cpu_key);
@@ -866,8 +889,10 @@ int search_for_position_by_key(struct super_block *sb, /* Pointer to the super b
866 return POSITION_FOUND; 889 return POSITION_FOUND;
867 } 890 }
868 891
869 /* Needed byte is not contained in the item pointed to by the 892 /*
870 path. Set pos_in_item out of the item. */ 893 * Needed byte is not contained in the item pointed to by the
894 * path. Set pos_in_item out of the item.
895 */
871 if (is_indirect_le_ih(p_le_ih)) 896 if (is_indirect_le_ih(p_le_ih))
872 pos_in_item(search_path) = 897 pos_in_item(search_path) =
873 ih_item_len(p_le_ih) / UNFM_P_SIZE; 898 ih_item_len(p_le_ih) / UNFM_P_SIZE;
@@ -892,19 +917,17 @@ int comp_items(const struct item_head *stored_ih, const struct treepath *path)
892 return 1; 917 return 1;
893 918
894 /* we need only to know, whether it is the same item */ 919 /* we need only to know, whether it is the same item */
895 ih = get_ih(path); 920 ih = tp_item_head(path);
896 return memcmp(stored_ih, ih, IH_SIZE); 921 return memcmp(stored_ih, ih, IH_SIZE);
897} 922}
898 923
899/* unformatted nodes are not logged anymore, ever. This is safe 924/* unformatted nodes are not logged anymore, ever. This is safe now */
900** now
901*/
902#define held_by_others(bh) (atomic_read(&(bh)->b_count) > 1) 925#define held_by_others(bh) (atomic_read(&(bh)->b_count) > 1)
903 926
904// block can not be forgotten as it is in I/O or held by someone 927/* block can not be forgotten as it is in I/O or held by someone */
905#define block_in_use(bh) (buffer_locked(bh) || (held_by_others(bh))) 928#define block_in_use(bh) (buffer_locked(bh) || (held_by_others(bh)))
906 929
907// prepare for delete or cut of direct item 930/* prepare for delete or cut of direct item */
908static inline int prepare_for_direct_item(struct treepath *path, 931static inline int prepare_for_direct_item(struct treepath *path,
909 struct item_head *le_ih, 932 struct item_head *le_ih,
910 struct inode *inode, 933 struct inode *inode,
@@ -917,9 +940,8 @@ static inline int prepare_for_direct_item(struct treepath *path,
917 *cut_size = -(IH_SIZE + ih_item_len(le_ih)); 940 *cut_size = -(IH_SIZE + ih_item_len(le_ih));
918 return M_DELETE; 941 return M_DELETE;
919 } 942 }
920 // new file gets truncated 943 /* new file gets truncated */
921 if (get_inode_item_key_version(inode) == KEY_FORMAT_3_6) { 944 if (get_inode_item_key_version(inode) == KEY_FORMAT_3_6) {
922 //
923 round_len = ROUND_UP(new_file_length); 945 round_len = ROUND_UP(new_file_length);
924 /* this was new_file_length < le_ih ... */ 946 /* this was new_file_length < le_ih ... */
925 if (round_len < le_ih_k_offset(le_ih)) { 947 if (round_len < le_ih_k_offset(le_ih)) {
@@ -933,12 +955,13 @@ static inline int prepare_for_direct_item(struct treepath *path,
933 return M_CUT; /* Cut from this item. */ 955 return M_CUT; /* Cut from this item. */
934 } 956 }
935 957
936 // old file: items may have any length 958 /* old file: items may have any length */
937 959
938 if (new_file_length < le_ih_k_offset(le_ih)) { 960 if (new_file_length < le_ih_k_offset(le_ih)) {
939 *cut_size = -(IH_SIZE + ih_item_len(le_ih)); 961 *cut_size = -(IH_SIZE + ih_item_len(le_ih));
940 return M_DELETE; /* Delete this item. */ 962 return M_DELETE; /* Delete this item. */
941 } 963 }
964
942 /* Calculate first position and size for cutting from item. */ 965 /* Calculate first position and size for cutting from item. */
943 *cut_size = -(ih_item_len(le_ih) - 966 *cut_size = -(ih_item_len(le_ih) -
944 (pos_in_item(path) = 967 (pos_in_item(path) =
@@ -957,12 +980,15 @@ static inline int prepare_for_direntry_item(struct treepath *path,
957 RFALSE(ih_entry_count(le_ih) != 2, 980 RFALSE(ih_entry_count(le_ih) != 2,
958 "PAP-5220: incorrect empty directory item (%h)", le_ih); 981 "PAP-5220: incorrect empty directory item (%h)", le_ih);
959 *cut_size = -(IH_SIZE + ih_item_len(le_ih)); 982 *cut_size = -(IH_SIZE + ih_item_len(le_ih));
960 return M_DELETE; /* Delete the directory item containing "." and ".." entry. */ 983 /* Delete the directory item containing "." and ".." entry. */
984 return M_DELETE;
961 } 985 }
962 986
963 if (ih_entry_count(le_ih) == 1) { 987 if (ih_entry_count(le_ih) == 1) {
964 /* Delete the directory item such as there is one record only 988 /*
965 in this item */ 989 * Delete the directory item such as there is one record only
990 * in this item
991 */
966 *cut_size = -(IH_SIZE + ih_item_len(le_ih)); 992 *cut_size = -(IH_SIZE + ih_item_len(le_ih));
967 return M_DELETE; 993 return M_DELETE;
968 } 994 }
@@ -976,18 +1002,34 @@ static inline int prepare_for_direntry_item(struct treepath *path,
976 1002
977#define JOURNAL_FOR_FREE_BLOCK_AND_UPDATE_SD (2 * JOURNAL_PER_BALANCE_CNT + 1) 1003#define JOURNAL_FOR_FREE_BLOCK_AND_UPDATE_SD (2 * JOURNAL_PER_BALANCE_CNT + 1)
978 1004
979/* If the path points to a directory or direct item, calculate mode and the size cut, for balance. 1005/*
980 If the path points to an indirect item, remove some number of its unformatted nodes. 1006 * If the path points to a directory or direct item, calculate mode
981 In case of file truncate calculate whether this item must be deleted/truncated or last 1007 * and the size cut, for balance.
982 unformatted node of this item will be converted to a direct item. 1008 * If the path points to an indirect item, remove some number of its
983 This function returns a determination of what balance mode the calling function should employ. */ 1009 * unformatted nodes.
984static char prepare_for_delete_or_cut(struct reiserfs_transaction_handle *th, struct inode *inode, struct treepath *path, const struct cpu_key *item_key, int *removed, /* Number of unformatted nodes which were removed 1010 * In case of file truncate calculate whether this item must be
985 from end of the file. */ 1011 * deleted/truncated or last unformatted node of this item will be
986 int *cut_size, unsigned long long new_file_length /* MAX_KEY_OFFSET in case of delete. */ 1012 * converted to a direct item.
1013 * This function returns a determination of what balance mode the
1014 * calling function should employ.
1015 */
1016static char prepare_for_delete_or_cut(struct reiserfs_transaction_handle *th,
1017 struct inode *inode,
1018 struct treepath *path,
1019 const struct cpu_key *item_key,
1020 /*
1021 * Number of unformatted nodes
1022 * which were removed from end
1023 * of the file.
1024 */
1025 int *removed,
1026 int *cut_size,
1027 /* MAX_KEY_OFFSET in case of delete. */
1028 unsigned long long new_file_length
987 ) 1029 )
988{ 1030{
989 struct super_block *sb = inode->i_sb; 1031 struct super_block *sb = inode->i_sb;
990 struct item_head *p_le_ih = PATH_PITEM_HEAD(path); 1032 struct item_head *p_le_ih = tp_item_head(path);
991 struct buffer_head *bh = PATH_PLAST_BUFFER(path); 1033 struct buffer_head *bh = PATH_PLAST_BUFFER(path);
992 1034
993 BUG_ON(!th->t_trans_id); 1035 BUG_ON(!th->t_trans_id);
@@ -1023,8 +1065,10 @@ static char prepare_for_delete_or_cut(struct reiserfs_transaction_handle *th, st
1023 int pos = 0; 1065 int pos = 0;
1024 1066
1025 if ( new_file_length == max_reiserfs_offset (inode) ) { 1067 if ( new_file_length == max_reiserfs_offset (inode) ) {
1026 /* prepare_for_delete_or_cut() is called by 1068 /*
1027 * reiserfs_delete_item() */ 1069 * prepare_for_delete_or_cut() is called by
1070 * reiserfs_delete_item()
1071 */
1028 new_file_length = 0; 1072 new_file_length = 0;
1029 delete = 1; 1073 delete = 1;
1030 } 1074 }
@@ -1033,27 +1077,30 @@ static char prepare_for_delete_or_cut(struct reiserfs_transaction_handle *th, st
1033 need_re_search = 0; 1077 need_re_search = 0;
1034 *cut_size = 0; 1078 *cut_size = 0;
1035 bh = PATH_PLAST_BUFFER(path); 1079 bh = PATH_PLAST_BUFFER(path);
1036 copy_item_head(&s_ih, PATH_PITEM_HEAD(path)); 1080 copy_item_head(&s_ih, tp_item_head(path));
1037 pos = I_UNFM_NUM(&s_ih); 1081 pos = I_UNFM_NUM(&s_ih);
1038 1082
1039 while (le_ih_k_offset (&s_ih) + (pos - 1) * blk_size > new_file_length) { 1083 while (le_ih_k_offset (&s_ih) + (pos - 1) * blk_size > new_file_length) {
1040 __le32 *unfm; 1084 __le32 *unfm;
1041 __u32 block; 1085 __u32 block;
1042 1086
1043 /* Each unformatted block deletion may involve one additional 1087 /*
1044 * bitmap block into the transaction, thereby the initial 1088 * Each unformatted block deletion may involve
1045 * journal space reservation might not be enough. */ 1089 * one additional bitmap block into the transaction,
1090 * thereby the initial journal space reservation
1091 * might not be enough.
1092 */
1046 if (!delete && (*cut_size) != 0 && 1093 if (!delete && (*cut_size) != 0 &&
1047 reiserfs_transaction_free_space(th) < JOURNAL_FOR_FREE_BLOCK_AND_UPDATE_SD) 1094 reiserfs_transaction_free_space(th) < JOURNAL_FOR_FREE_BLOCK_AND_UPDATE_SD)
1048 break; 1095 break;
1049 1096
1050 unfm = (__le32 *)B_I_PITEM(bh, &s_ih) + pos - 1; 1097 unfm = (__le32 *)ih_item_body(bh, &s_ih) + pos - 1;
1051 block = get_block_num(unfm, 0); 1098 block = get_block_num(unfm, 0);
1052 1099
1053 if (block != 0) { 1100 if (block != 0) {
1054 reiserfs_prepare_for_journal(sb, bh, 1); 1101 reiserfs_prepare_for_journal(sb, bh, 1);
1055 put_block_num(unfm, 0, 0); 1102 put_block_num(unfm, 0, 0);
1056 journal_mark_dirty(th, sb, bh); 1103 journal_mark_dirty(th, bh);
1057 reiserfs_free_block(th, inode, block, 1); 1104 reiserfs_free_block(th, inode, block, 1);
1058 } 1105 }
1059 1106
@@ -1074,17 +1121,21 @@ static char prepare_for_delete_or_cut(struct reiserfs_transaction_handle *th, st
1074 break; 1121 break;
1075 } 1122 }
1076 } 1123 }
1077 /* a trick. If the buffer has been logged, this will do nothing. If 1124 /*
1078 ** we've broken the loop without logging it, it will restore the 1125 * a trick. If the buffer has been logged, this will
1079 ** buffer */ 1126 * do nothing. If we've broken the loop without logging
1127 * it, it will restore the buffer
1128 */
1080 reiserfs_restore_prepared_buffer(sb, bh); 1129 reiserfs_restore_prepared_buffer(sb, bh);
1081 } while (need_re_search && 1130 } while (need_re_search &&
1082 search_for_position_by_key(sb, item_key, path) == POSITION_FOUND); 1131 search_for_position_by_key(sb, item_key, path) == POSITION_FOUND);
1083 pos_in_item(path) = pos * UNFM_P_SIZE; 1132 pos_in_item(path) = pos * UNFM_P_SIZE;
1084 1133
1085 if (*cut_size == 0) { 1134 if (*cut_size == 0) {
1086 /* Nothing were cut. maybe convert last unformatted node to the 1135 /*
1087 * direct item? */ 1136 * Nothing was cut. maybe convert last unformatted node to the
1137 * direct item?
1138 */
1088 result = M_CONVERT; 1139 result = M_CONVERT;
1089 } 1140 }
1090 return result; 1141 return result;
@@ -1095,7 +1146,7 @@ static char prepare_for_delete_or_cut(struct reiserfs_transaction_handle *th, st
1095static int calc_deleted_bytes_number(struct tree_balance *tb, char mode) 1146static int calc_deleted_bytes_number(struct tree_balance *tb, char mode)
1096{ 1147{
1097 int del_size; 1148 int del_size;
1098 struct item_head *p_le_ih = PATH_PITEM_HEAD(tb->tb_path); 1149 struct item_head *p_le_ih = tp_item_head(tb->tb_path);
1099 1150
1100 if (is_statdata_le_ih(p_le_ih)) 1151 if (is_statdata_le_ih(p_le_ih))
1101 return 0; 1152 return 0;
@@ -1104,9 +1155,11 @@ static int calc_deleted_bytes_number(struct tree_balance *tb, char mode)
1104 (mode == 1155 (mode ==
1105 M_DELETE) ? ih_item_len(p_le_ih) : -tb->insert_size[0]; 1156 M_DELETE) ? ih_item_len(p_le_ih) : -tb->insert_size[0];
1106 if (is_direntry_le_ih(p_le_ih)) { 1157 if (is_direntry_le_ih(p_le_ih)) {
1107 /* return EMPTY_DIR_SIZE; We delete emty directoris only. 1158 /*
1108 * we can't use EMPTY_DIR_SIZE, as old format dirs have a different 1159 * return EMPTY_DIR_SIZE; We delete emty directories only.
1109 * empty size. ick. FIXME, is this right? */ 1160 * we can't use EMPTY_DIR_SIZE, as old format dirs have a
1161 * different empty size. ick. FIXME, is this right?
1162 */
1110 return del_size; 1163 return del_size;
1111 } 1164 }
1112 1165
@@ -1169,7 +1222,8 @@ char head2type(struct item_head *ih)
1169} 1222}
1170#endif 1223#endif
1171 1224
1172/* Delete object item. 1225/*
1226 * Delete object item.
1173 * th - active transaction handle 1227 * th - active transaction handle
1174 * path - path to the deleted item 1228 * path - path to the deleted item
1175 * item_key - key to search for the deleted item 1229 * item_key - key to search for the deleted item
@@ -1212,7 +1266,7 @@ int reiserfs_delete_item(struct reiserfs_transaction_handle *th,
1212 1266
1213 RFALSE(mode != M_DELETE, "PAP-5320: mode must be M_DELETE"); 1267 RFALSE(mode != M_DELETE, "PAP-5320: mode must be M_DELETE");
1214 1268
1215 copy_item_head(&s_ih, PATH_PITEM_HEAD(path)); 1269 copy_item_head(&s_ih, tp_item_head(path));
1216 s_del_balance.insert_size[0] = del_size; 1270 s_del_balance.insert_size[0] = del_size;
1217 1271
1218 ret_value = fix_nodes(M_DELETE, &s_del_balance, NULL, NULL); 1272 ret_value = fix_nodes(M_DELETE, &s_del_balance, NULL, NULL);
@@ -1221,7 +1275,7 @@ int reiserfs_delete_item(struct reiserfs_transaction_handle *th,
1221 1275
1222 PROC_INFO_INC(sb, delete_item_restarted); 1276 PROC_INFO_INC(sb, delete_item_restarted);
1223 1277
1224 // file system changed, repeat search 1278 /* file system changed, repeat search */
1225 ret_value = 1279 ret_value =
1226 search_for_position_by_key(sb, item_key, path); 1280 search_for_position_by_key(sb, item_key, path);
1227 if (ret_value == IO_ERROR) 1281 if (ret_value == IO_ERROR)
@@ -1238,16 +1292,18 @@ int reiserfs_delete_item(struct reiserfs_transaction_handle *th,
1238 unfix_nodes(&s_del_balance); 1292 unfix_nodes(&s_del_balance);
1239 return 0; 1293 return 0;
1240 } 1294 }
1241 // reiserfs_delete_item returns item length when success 1295
1296 /* reiserfs_delete_item returns item length when success */
1242 ret_value = calc_deleted_bytes_number(&s_del_balance, M_DELETE); 1297 ret_value = calc_deleted_bytes_number(&s_del_balance, M_DELETE);
1243 q_ih = get_ih(path); 1298 q_ih = tp_item_head(path);
1244 quota_cut_bytes = ih_item_len(q_ih); 1299 quota_cut_bytes = ih_item_len(q_ih);
1245 1300
1246 /* hack so the quota code doesn't have to guess if the file 1301 /*
1247 ** has a tail. On tail insert, we allocate quota for 1 unformatted node. 1302 * hack so the quota code doesn't have to guess if the file has a
1248 ** We test the offset because the tail might have been 1303 * tail. On tail insert, we allocate quota for 1 unformatted node.
1249 ** split into multiple items, and we only want to decrement for 1304 * We test the offset because the tail might have been
1250 ** the unfm node once 1305 * split into multiple items, and we only want to decrement for
1306 * the unfm node once
1251 */ 1307 */
1252 if (!S_ISLNK(inode->i_mode) && is_direct_le_ih(q_ih)) { 1308 if (!S_ISLNK(inode->i_mode) && is_direct_le_ih(q_ih)) {
1253 if ((le_ih_k_offset(q_ih) & (sb->s_blocksize - 1)) == 1) { 1309 if ((le_ih_k_offset(q_ih) & (sb->s_blocksize - 1)) == 1) {
@@ -1261,33 +1317,38 @@ int reiserfs_delete_item(struct reiserfs_transaction_handle *th,
1261 int off; 1317 int off;
1262 char *data; 1318 char *data;
1263 1319
1264 /* We are in direct2indirect conversion, so move tail contents 1320 /*
1265 to the unformatted node */ 1321 * We are in direct2indirect conversion, so move tail contents
1266 /* note, we do the copy before preparing the buffer because we 1322 * to the unformatted node
1267 ** don't care about the contents of the unformatted node yet. 1323 */
1268 ** the only thing we really care about is the direct item's data 1324 /*
1269 ** is in the unformatted node. 1325 * note, we do the copy before preparing the buffer because we
1270 ** 1326 * don't care about the contents of the unformatted node yet.
1271 ** Otherwise, we would have to call reiserfs_prepare_for_journal on 1327 * the only thing we really care about is the direct item's
1272 ** the unformatted node, which might schedule, meaning we'd have to 1328 * data is in the unformatted node.
1273 ** loop all the way back up to the start of the while loop. 1329 *
1274 ** 1330 * Otherwise, we would have to call
1275 ** The unformatted node must be dirtied later on. We can't be 1331 * reiserfs_prepare_for_journal on the unformatted node,
1276 ** sure here if the entire tail has been deleted yet. 1332 * which might schedule, meaning we'd have to loop all the
1277 ** 1333 * way back up to the start of the while loop.
1278 ** un_bh is from the page cache (all unformatted nodes are 1334 *
1279 ** from the page cache) and might be a highmem page. So, we 1335 * The unformatted node must be dirtied later on. We can't be
1280 ** can't use un_bh->b_data. 1336 * sure here if the entire tail has been deleted yet.
1281 ** -clm 1337 *
1338 * un_bh is from the page cache (all unformatted nodes are
1339 * from the page cache) and might be a highmem page. So, we
1340 * can't use un_bh->b_data.
1341 * -clm
1282 */ 1342 */
1283 1343
1284 data = kmap_atomic(un_bh->b_page); 1344 data = kmap_atomic(un_bh->b_page);
1285 off = ((le_ih_k_offset(&s_ih) - 1) & (PAGE_CACHE_SIZE - 1)); 1345 off = ((le_ih_k_offset(&s_ih) - 1) & (PAGE_CACHE_SIZE - 1));
1286 memcpy(data + off, 1346 memcpy(data + off,
1287 B_I_PITEM(PATH_PLAST_BUFFER(path), &s_ih), 1347 ih_item_body(PATH_PLAST_BUFFER(path), &s_ih),
1288 ret_value); 1348 ret_value);
1289 kunmap_atomic(data); 1349 kunmap_atomic(data);
1290 } 1350 }
1351
1291 /* Perform balancing after all resources have been collected at once. */ 1352 /* Perform balancing after all resources have been collected at once. */
1292 do_balance(&s_del_balance, NULL, NULL, M_DELETE); 1353 do_balance(&s_del_balance, NULL, NULL, M_DELETE);
1293 1354
@@ -1304,20 +1365,21 @@ int reiserfs_delete_item(struct reiserfs_transaction_handle *th,
1304 return ret_value; 1365 return ret_value;
1305} 1366}
1306 1367
1307/* Summary Of Mechanisms For Handling Collisions Between Processes: 1368/*
1308 1369 * Summary Of Mechanisms For Handling Collisions Between Processes:
1309 deletion of the body of the object is performed by iput(), with the 1370 *
1310 result that if multiple processes are operating on a file, the 1371 * deletion of the body of the object is performed by iput(), with the
1311 deletion of the body of the file is deferred until the last process 1372 * result that if multiple processes are operating on a file, the
1312 that has an open inode performs its iput(). 1373 * deletion of the body of the file is deferred until the last process
1313 1374 * that has an open inode performs its iput().
1314 writes and truncates are protected from collisions by use of 1375 *
1315 semaphores. 1376 * writes and truncates are protected from collisions by use of
1316 1377 * semaphores.
1317 creates, linking, and mknod are protected from collisions with other 1378 *
1318 processes by making the reiserfs_add_entry() the last step in the 1379 * creates, linking, and mknod are protected from collisions with other
1319 creation, and then rolling back all changes if there was a collision. 1380 * processes by making the reiserfs_add_entry() the last step in the
1320 - Hans 1381 * creation, and then rolling back all changes if there was a collision.
1382 * - Hans
1321*/ 1383*/
1322 1384
1323/* this deletes item which never gets split */ 1385/* this deletes item which never gets split */
@@ -1347,7 +1409,11 @@ void reiserfs_delete_solid_item(struct reiserfs_transaction_handle *th,
1347 } 1409 }
1348 if (retval != ITEM_FOUND) { 1410 if (retval != ITEM_FOUND) {
1349 pathrelse(&path); 1411 pathrelse(&path);
1350 // No need for a warning, if there is just no free space to insert '..' item into the newly-created subdir 1412 /*
1413 * No need for a warning, if there is just no free
1414 * space to insert '..' item into the
1415 * newly-created subdir
1416 */
1351 if (! 1417 if (!
1352 ((unsigned long long) 1418 ((unsigned long long)
1353 GET_HASH_VALUE(le_key_k_offset 1419 GET_HASH_VALUE(le_key_k_offset
@@ -1362,11 +1428,11 @@ void reiserfs_delete_solid_item(struct reiserfs_transaction_handle *th,
1362 } 1428 }
1363 if (!tb_init) { 1429 if (!tb_init) {
1364 tb_init = 1; 1430 tb_init = 1;
1365 item_len = ih_item_len(PATH_PITEM_HEAD(&path)); 1431 item_len = ih_item_len(tp_item_head(&path));
1366 init_tb_struct(th, &tb, th->t_super, &path, 1432 init_tb_struct(th, &tb, th->t_super, &path,
1367 -(IH_SIZE + item_len)); 1433 -(IH_SIZE + item_len));
1368 } 1434 }
1369 quota_cut_bytes = ih_item_len(PATH_PITEM_HEAD(&path)); 1435 quota_cut_bytes = ih_item_len(tp_item_head(&path));
1370 1436
1371 retval = fix_nodes(M_DELETE, &tb, NULL, NULL); 1437 retval = fix_nodes(M_DELETE, &tb, NULL, NULL);
1372 if (retval == REPEAT_SEARCH) { 1438 if (retval == REPEAT_SEARCH) {
@@ -1376,7 +1442,11 @@ void reiserfs_delete_solid_item(struct reiserfs_transaction_handle *th,
1376 1442
1377 if (retval == CARRY_ON) { 1443 if (retval == CARRY_ON) {
1378 do_balance(&tb, NULL, NULL, M_DELETE); 1444 do_balance(&tb, NULL, NULL, M_DELETE);
1379 if (inode) { /* Should we count quota for item? (we don't count quotas for save-links) */ 1445 /*
1446 * Should we count quota for item? (we don't
1447 * count quotas for save-links)
1448 */
1449 if (inode) {
1380 int depth; 1450 int depth;
1381#ifdef REISERQUOTA_DEBUG 1451#ifdef REISERQUOTA_DEBUG
1382 reiserfs_debug(th->t_super, REISERFS_DEBUG_CODE, 1452 reiserfs_debug(th->t_super, REISERFS_DEBUG_CODE,
@@ -1391,7 +1461,8 @@ void reiserfs_delete_solid_item(struct reiserfs_transaction_handle *th,
1391 } 1461 }
1392 break; 1462 break;
1393 } 1463 }
1394 // IO_ERROR, NO_DISK_SPACE, etc 1464
1465 /* IO_ERROR, NO_DISK_SPACE, etc */
1395 reiserfs_warning(th->t_super, "vs-5360", 1466 reiserfs_warning(th->t_super, "vs-5360",
1396 "could not delete %K due to fix_nodes failure", 1467 "could not delete %K due to fix_nodes failure",
1397 &cpu_key); 1468 &cpu_key);
@@ -1447,11 +1518,13 @@ static void unmap_buffers(struct page *page, loff_t pos)
1447 do { 1518 do {
1448 next = bh->b_this_page; 1519 next = bh->b_this_page;
1449 1520
1450 /* we want to unmap the buffers that contain the tail, and 1521 /*
1451 ** all the buffers after it (since the tail must be at the 1522 * we want to unmap the buffers that contain
1452 ** end of the file). We don't want to unmap file data 1523 * the tail, and all the buffers after it
1453 ** before the tail, since it might be dirty and waiting to 1524 * (since the tail must be at the end of the
1454 ** reach disk 1525 * file). We don't want to unmap file data
1526 * before the tail, since it might be dirty
1527 * and waiting to reach disk
1455 */ 1528 */
1456 cur_index += bh->b_size; 1529 cur_index += bh->b_size;
1457 if (cur_index > tail_index) { 1530 if (cur_index > tail_index) {
@@ -1476,9 +1549,10 @@ static int maybe_indirect_to_direct(struct reiserfs_transaction_handle *th,
1476 BUG_ON(!th->t_trans_id); 1549 BUG_ON(!th->t_trans_id);
1477 BUG_ON(new_file_size != inode->i_size); 1550 BUG_ON(new_file_size != inode->i_size);
1478 1551
1479 /* the page being sent in could be NULL if there was an i/o error 1552 /*
1480 ** reading in the last block. The user will hit problems trying to 1553 * the page being sent in could be NULL if there was an i/o error
1481 ** read the file, but for now we just skip the indirect2direct 1554 * reading in the last block. The user will hit problems trying to
1555 * read the file, but for now we just skip the indirect2direct
1482 */ 1556 */
1483 if (atomic_read(&inode->i_count) > 1 || 1557 if (atomic_read(&inode->i_count) > 1 ||
1484 !tail_has_to_be_packed(inode) || 1558 !tail_has_to_be_packed(inode) ||
@@ -1490,17 +1564,18 @@ static int maybe_indirect_to_direct(struct reiserfs_transaction_handle *th,
1490 pathrelse(path); 1564 pathrelse(path);
1491 return cut_bytes; 1565 return cut_bytes;
1492 } 1566 }
1567
1493 /* Perform the conversion to a direct_item. */ 1568 /* Perform the conversion to a direct_item. */
1494 /* return indirect_to_direct(inode, path, item_key,
1495 new_file_size, mode); */
1496 return indirect2direct(th, inode, page, path, item_key, 1569 return indirect2direct(th, inode, page, path, item_key,
1497 new_file_size, mode); 1570 new_file_size, mode);
1498} 1571}
1499 1572
1500/* we did indirect_to_direct conversion. And we have inserted direct 1573/*
1501 item successesfully, but there were no disk space to cut unfm 1574 * we did indirect_to_direct conversion. And we have inserted direct
1502 pointer being converted. Therefore we have to delete inserted 1575 * item successesfully, but there were no disk space to cut unfm
1503 direct item(s) */ 1576 * pointer being converted. Therefore we have to delete inserted
1577 * direct item(s)
1578 */
1504static void indirect_to_direct_roll_back(struct reiserfs_transaction_handle *th, 1579static void indirect_to_direct_roll_back(struct reiserfs_transaction_handle *th,
1505 struct inode *inode, struct treepath *path) 1580 struct inode *inode, struct treepath *path)
1506{ 1581{
@@ -1509,7 +1584,7 @@ static void indirect_to_direct_roll_back(struct reiserfs_transaction_handle *th,
1509 int removed; 1584 int removed;
1510 BUG_ON(!th->t_trans_id); 1585 BUG_ON(!th->t_trans_id);
1511 1586
1512 make_cpu_key(&tail_key, inode, inode->i_size + 1, TYPE_DIRECT, 4); // !!!! 1587 make_cpu_key(&tail_key, inode, inode->i_size + 1, TYPE_DIRECT, 4);
1513 tail_key.key_length = 4; 1588 tail_key.key_length = 4;
1514 1589
1515 tail_len = 1590 tail_len =
@@ -1521,7 +1596,7 @@ static void indirect_to_direct_roll_back(struct reiserfs_transaction_handle *th,
1521 reiserfs_panic(inode->i_sb, "vs-5615", 1596 reiserfs_panic(inode->i_sb, "vs-5615",
1522 "found invalid item"); 1597 "found invalid item");
1523 RFALSE(path->pos_in_item != 1598 RFALSE(path->pos_in_item !=
1524 ih_item_len(PATH_PITEM_HEAD(path)) - 1, 1599 ih_item_len(tp_item_head(path)) - 1,
1525 "vs-5616: appended bytes found"); 1600 "vs-5616: appended bytes found");
1526 PATH_LAST_POSITION(path)--; 1601 PATH_LAST_POSITION(path)--;
1527 1602
@@ -1539,7 +1614,6 @@ static void indirect_to_direct_roll_back(struct reiserfs_transaction_handle *th,
1539 reiserfs_warning(inode->i_sb, "reiserfs-5091", "indirect_to_direct " 1614 reiserfs_warning(inode->i_sb, "reiserfs-5091", "indirect_to_direct "
1540 "conversion has been rolled back due to " 1615 "conversion has been rolled back due to "
1541 "lack of disk space"); 1616 "lack of disk space");
1542 //mark_file_without_tail (inode);
1543 mark_inode_dirty(inode); 1617 mark_inode_dirty(inode);
1544} 1618}
1545 1619
@@ -1551,15 +1625,18 @@ int reiserfs_cut_from_item(struct reiserfs_transaction_handle *th,
1551 struct page *page, loff_t new_file_size) 1625 struct page *page, loff_t new_file_size)
1552{ 1626{
1553 struct super_block *sb = inode->i_sb; 1627 struct super_block *sb = inode->i_sb;
1554 /* Every function which is going to call do_balance must first 1628 /*
1555 create a tree_balance structure. Then it must fill up this 1629 * Every function which is going to call do_balance must first
1556 structure by using the init_tb_struct and fix_nodes functions. 1630 * create a tree_balance structure. Then it must fill up this
1557 After that we can make tree balancing. */ 1631 * structure by using the init_tb_struct and fix_nodes functions.
1632 * After that we can make tree balancing.
1633 */
1558 struct tree_balance s_cut_balance; 1634 struct tree_balance s_cut_balance;
1559 struct item_head *p_le_ih; 1635 struct item_head *p_le_ih;
1560 int cut_size = 0, /* Amount to be cut. */ 1636 int cut_size = 0; /* Amount to be cut. */
1561 ret_value = CARRY_ON, removed = 0, /* Number of the removed unformatted nodes. */ 1637 int ret_value = CARRY_ON;
1562 is_inode_locked = 0; 1638 int removed = 0; /* Number of the removed unformatted nodes. */
1639 int is_inode_locked = 0;
1563 char mode; /* Mode of the balance. */ 1640 char mode; /* Mode of the balance. */
1564 int retval2 = -1; 1641 int retval2 = -1;
1565 int quota_cut_bytes; 1642 int quota_cut_bytes;
@@ -1571,21 +1648,27 @@ int reiserfs_cut_from_item(struct reiserfs_transaction_handle *th,
1571 init_tb_struct(th, &s_cut_balance, inode->i_sb, path, 1648 init_tb_struct(th, &s_cut_balance, inode->i_sb, path,
1572 cut_size); 1649 cut_size);
1573 1650
1574 /* Repeat this loop until we either cut the item without needing 1651 /*
1575 to balance, or we fix_nodes without schedule occurring */ 1652 * Repeat this loop until we either cut the item without needing
1653 * to balance, or we fix_nodes without schedule occurring
1654 */
1576 while (1) { 1655 while (1) {
1577 /* Determine the balance mode, position of the first byte to 1656 /*
1578 be cut, and size to be cut. In case of the indirect item 1657 * Determine the balance mode, position of the first byte to
1579 free unformatted nodes which are pointed to by the cut 1658 * be cut, and size to be cut. In case of the indirect item
1580 pointers. */ 1659 * free unformatted nodes which are pointed to by the cut
1660 * pointers.
1661 */
1581 1662
1582 mode = 1663 mode =
1583 prepare_for_delete_or_cut(th, inode, path, 1664 prepare_for_delete_or_cut(th, inode, path,
1584 item_key, &removed, 1665 item_key, &removed,
1585 &cut_size, new_file_size); 1666 &cut_size, new_file_size);
1586 if (mode == M_CONVERT) { 1667 if (mode == M_CONVERT) {
1587 /* convert last unformatted node to direct item or leave 1668 /*
1588 tail in the unformatted node */ 1669 * convert last unformatted node to direct item or
1670 * leave tail in the unformatted node
1671 */
1589 RFALSE(ret_value != CARRY_ON, 1672 RFALSE(ret_value != CARRY_ON,
1590 "PAP-5570: can not convert twice"); 1673 "PAP-5570: can not convert twice");
1591 1674
@@ -1599,15 +1682,20 @@ int reiserfs_cut_from_item(struct reiserfs_transaction_handle *th,
1599 1682
1600 is_inode_locked = 1; 1683 is_inode_locked = 1;
1601 1684
1602 /* removing of last unformatted node will change value we 1685 /*
1603 have to return to truncate. Save it */ 1686 * removing of last unformatted node will
1687 * change value we have to return to truncate.
1688 * Save it
1689 */
1604 retval2 = ret_value; 1690 retval2 = ret_value;
1605 /*retval2 = sb->s_blocksize - (new_file_size & (sb->s_blocksize - 1)); */
1606 1691
1607 /* So, we have performed the first part of the conversion: 1692 /*
1608 inserting the new direct item. Now we are removing the 1693 * So, we have performed the first part of the
1609 last unformatted node pointer. Set key to search for 1694 * conversion:
1610 it. */ 1695 * inserting the new direct item. Now we are
1696 * removing the last unformatted node pointer.
1697 * Set key to search for it.
1698 */
1611 set_cpu_key_k_type(item_key, TYPE_INDIRECT); 1699 set_cpu_key_k_type(item_key, TYPE_INDIRECT);
1612 item_key->key_length = 4; 1700 item_key->key_length = 4;
1613 new_file_size -= 1701 new_file_size -=
@@ -1650,11 +1738,13 @@ int reiserfs_cut_from_item(struct reiserfs_transaction_handle *th,
1650 return (ret_value == IO_ERROR) ? -EIO : -ENOENT; 1738 return (ret_value == IO_ERROR) ? -EIO : -ENOENT;
1651 } /* while */ 1739 } /* while */
1652 1740
1653 // check fix_nodes results (IO_ERROR or NO_DISK_SPACE) 1741 /* check fix_nodes results (IO_ERROR or NO_DISK_SPACE) */
1654 if (ret_value != CARRY_ON) { 1742 if (ret_value != CARRY_ON) {
1655 if (is_inode_locked) { 1743 if (is_inode_locked) {
1656 // FIXME: this seems to be not needed: we are always able 1744 /*
1657 // to cut item 1745 * FIXME: this seems to be not needed: we are always
1746 * able to cut item
1747 */
1658 indirect_to_direct_roll_back(th, inode, path); 1748 indirect_to_direct_roll_back(th, inode, path);
1659 } 1749 }
1660 if (ret_value == NO_DISK_SPACE) 1750 if (ret_value == NO_DISK_SPACE)
@@ -1671,22 +1761,23 @@ int reiserfs_cut_from_item(struct reiserfs_transaction_handle *th,
1671 /* Calculate number of bytes that need to be cut from the item. */ 1761 /* Calculate number of bytes that need to be cut from the item. */
1672 quota_cut_bytes = 1762 quota_cut_bytes =
1673 (mode == 1763 (mode ==
1674 M_DELETE) ? ih_item_len(get_ih(path)) : -s_cut_balance. 1764 M_DELETE) ? ih_item_len(tp_item_head(path)) : -s_cut_balance.
1675 insert_size[0]; 1765 insert_size[0];
1676 if (retval2 == -1) 1766 if (retval2 == -1)
1677 ret_value = calc_deleted_bytes_number(&s_cut_balance, mode); 1767 ret_value = calc_deleted_bytes_number(&s_cut_balance, mode);
1678 else 1768 else
1679 ret_value = retval2; 1769 ret_value = retval2;
1680 1770
1681 /* For direct items, we only change the quota when deleting the last 1771 /*
1682 ** item. 1772 * For direct items, we only change the quota when deleting the last
1773 * item.
1683 */ 1774 */
1684 p_le_ih = PATH_PITEM_HEAD(s_cut_balance.tb_path); 1775 p_le_ih = tp_item_head(s_cut_balance.tb_path);
1685 if (!S_ISLNK(inode->i_mode) && is_direct_le_ih(p_le_ih)) { 1776 if (!S_ISLNK(inode->i_mode) && is_direct_le_ih(p_le_ih)) {
1686 if (mode == M_DELETE && 1777 if (mode == M_DELETE &&
1687 (le_ih_k_offset(p_le_ih) & (sb->s_blocksize - 1)) == 1778 (le_ih_k_offset(p_le_ih) & (sb->s_blocksize - 1)) ==
1688 1) { 1779 1) {
1689 // FIXME: this is to keep 3.5 happy 1780 /* FIXME: this is to keep 3.5 happy */
1690 REISERFS_I(inode)->i_first_direct_byte = U32_MAX; 1781 REISERFS_I(inode)->i_first_direct_byte = U32_MAX;
1691 quota_cut_bytes = sb->s_blocksize + UNFM_P_SIZE; 1782 quota_cut_bytes = sb->s_blocksize + UNFM_P_SIZE;
1692 } else { 1783 } else {
@@ -1696,10 +1787,12 @@ int reiserfs_cut_from_item(struct reiserfs_transaction_handle *th,
1696#ifdef CONFIG_REISERFS_CHECK 1787#ifdef CONFIG_REISERFS_CHECK
1697 if (is_inode_locked) { 1788 if (is_inode_locked) {
1698 struct item_head *le_ih = 1789 struct item_head *le_ih =
1699 PATH_PITEM_HEAD(s_cut_balance.tb_path); 1790 tp_item_head(s_cut_balance.tb_path);
1700 /* we are going to complete indirect2direct conversion. Make 1791 /*
1701 sure, that we exactly remove last unformatted node pointer 1792 * we are going to complete indirect2direct conversion. Make
1702 of the item */ 1793 * sure, that we exactly remove last unformatted node pointer
1794 * of the item
1795 */
1703 if (!is_indirect_le_ih(le_ih)) 1796 if (!is_indirect_le_ih(le_ih))
1704 reiserfs_panic(sb, "vs-5652", 1797 reiserfs_panic(sb, "vs-5652",
1705 "item must be indirect %h", le_ih); 1798 "item must be indirect %h", le_ih);
@@ -1717,17 +1810,20 @@ int reiserfs_cut_from_item(struct reiserfs_transaction_handle *th,
1717 "(CUT, insert_size==%d)", 1810 "(CUT, insert_size==%d)",
1718 le_ih, s_cut_balance.insert_size[0]); 1811 le_ih, s_cut_balance.insert_size[0]);
1719 } 1812 }
1720 /* it would be useful to make sure, that right neighboring 1813 /*
1721 item is direct item of this file */ 1814 * it would be useful to make sure, that right neighboring
1815 * item is direct item of this file
1816 */
1722 } 1817 }
1723#endif 1818#endif
1724 1819
1725 do_balance(&s_cut_balance, NULL, NULL, mode); 1820 do_balance(&s_cut_balance, NULL, NULL, mode);
1726 if (is_inode_locked) { 1821 if (is_inode_locked) {
1727 /* we've done an indirect->direct conversion. when the data block 1822 /*
1728 ** was freed, it was removed from the list of blocks that must 1823 * we've done an indirect->direct conversion. when the
1729 ** be flushed before the transaction commits, make sure to 1824 * data block was freed, it was removed from the list of
1730 ** unmap and invalidate it 1825 * blocks that must be flushed before the transaction
1826 * commits, make sure to unmap and invalidate it
1731 */ 1827 */
1732 unmap_buffers(page, tail_pos); 1828 unmap_buffers(page, tail_pos);
1733 REISERFS_I(inode)->i_flags &= ~i_pack_on_close_mask; 1829 REISERFS_I(inode)->i_flags &= ~i_pack_on_close_mask;
@@ -1758,20 +1854,25 @@ static void truncate_directory(struct reiserfs_transaction_handle *th,
1758 set_le_key_k_type(KEY_FORMAT_3_5, INODE_PKEY(inode), TYPE_STAT_DATA); 1854 set_le_key_k_type(KEY_FORMAT_3_5, INODE_PKEY(inode), TYPE_STAT_DATA);
1759} 1855}
1760 1856
1761/* Truncate file to the new size. Note, this must be called with a transaction 1857/*
1762 already started */ 1858 * Truncate file to the new size. Note, this must be called with a
1859 * transaction already started
1860 */
1763int reiserfs_do_truncate(struct reiserfs_transaction_handle *th, 1861int reiserfs_do_truncate(struct reiserfs_transaction_handle *th,
1764 struct inode *inode, /* ->i_size contains new size */ 1862 struct inode *inode, /* ->i_size contains new size */
1765 struct page *page, /* up to date for last block */ 1863 struct page *page, /* up to date for last block */
1766 int update_timestamps /* when it is called by 1864 /*
1767 file_release to convert 1865 * when it is called by file_release to convert
1768 the tail - no timestamps 1866 * the tail - no timestamps should be updated
1769 should be updated */ 1867 */
1868 int update_timestamps
1770 ) 1869 )
1771{ 1870{
1772 INITIALIZE_PATH(s_search_path); /* Path to the current object item. */ 1871 INITIALIZE_PATH(s_search_path); /* Path to the current object item. */
1773 struct item_head *p_le_ih; /* Pointer to an item header. */ 1872 struct item_head *p_le_ih; /* Pointer to an item header. */
1774 struct cpu_key s_item_key; /* Key to search for a previous file item. */ 1873
1874 /* Key to search for a previous file item. */
1875 struct cpu_key s_item_key;
1775 loff_t file_size, /* Old file size. */ 1876 loff_t file_size, /* Old file size. */
1776 new_file_size; /* New file size. */ 1877 new_file_size; /* New file size. */
1777 int deleted; /* Number of deleted or truncated bytes. */ 1878 int deleted; /* Number of deleted or truncated bytes. */
@@ -1784,8 +1885,8 @@ int reiserfs_do_truncate(struct reiserfs_transaction_handle *th,
1784 || S_ISLNK(inode->i_mode))) 1885 || S_ISLNK(inode->i_mode)))
1785 return 0; 1886 return 0;
1786 1887
1888 /* deletion of directory - no need to update timestamps */
1787 if (S_ISDIR(inode->i_mode)) { 1889 if (S_ISDIR(inode->i_mode)) {
1788 // deletion of directory - no need to update timestamps
1789 truncate_directory(th, inode); 1890 truncate_directory(th, inode);
1790 return 0; 1891 return 0;
1791 } 1892 }
@@ -1793,7 +1894,7 @@ int reiserfs_do_truncate(struct reiserfs_transaction_handle *th,
1793 /* Get new file size. */ 1894 /* Get new file size. */
1794 new_file_size = inode->i_size; 1895 new_file_size = inode->i_size;
1795 1896
1796 // FIXME: note, that key type is unimportant here 1897 /* FIXME: note, that key type is unimportant here */
1797 make_cpu_key(&s_item_key, inode, max_reiserfs_offset(inode), 1898 make_cpu_key(&s_item_key, inode, max_reiserfs_offset(inode),
1798 TYPE_DIRECT, 3); 1899 TYPE_DIRECT, 3);
1799 1900
@@ -1819,7 +1920,7 @@ int reiserfs_do_truncate(struct reiserfs_transaction_handle *th,
1819 s_search_path.pos_in_item--; 1920 s_search_path.pos_in_item--;
1820 1921
1821 /* Get real file size (total length of all file items) */ 1922 /* Get real file size (total length of all file items) */
1822 p_le_ih = PATH_PITEM_HEAD(&s_search_path); 1923 p_le_ih = tp_item_head(&s_search_path);
1823 if (is_statdata_le_ih(p_le_ih)) 1924 if (is_statdata_le_ih(p_le_ih))
1824 file_size = 0; 1925 file_size = 0;
1825 else { 1926 else {
@@ -1827,9 +1928,11 @@ int reiserfs_do_truncate(struct reiserfs_transaction_handle *th,
1827 int bytes = 1928 int bytes =
1828 op_bytes_number(p_le_ih, inode->i_sb->s_blocksize); 1929 op_bytes_number(p_le_ih, inode->i_sb->s_blocksize);
1829 1930
1830 /* this may mismatch with real file size: if last direct item 1931 /*
1831 had no padding zeros and last unformatted node had no free 1932 * this may mismatch with real file size: if last direct item
1832 space, this file would have this file size */ 1933 * had no padding zeros and last unformatted node had no free
1934 * space, this file would have this file size
1935 */
1833 file_size = offset + bytes - 1; 1936 file_size = offset + bytes - 1;
1834 } 1937 }
1835 /* 1938 /*
@@ -1867,18 +1970,20 @@ int reiserfs_do_truncate(struct reiserfs_transaction_handle *th,
1867 1970
1868 set_cpu_key_k_offset(&s_item_key, file_size); 1971 set_cpu_key_k_offset(&s_item_key, file_size);
1869 1972
1870 /* While there are bytes to truncate and previous file item is presented in the tree. */ 1973 /*
1974 * While there are bytes to truncate and previous
1975 * file item is presented in the tree.
1976 */
1871 1977
1872 /* 1978 /*
1873 ** This loop could take a really long time, and could log 1979 * This loop could take a really long time, and could log
1874 ** many more blocks than a transaction can hold. So, we do a polite 1980 * many more blocks than a transaction can hold. So, we do
1875 ** journal end here, and if the transaction needs ending, we make 1981 * a polite journal end here, and if the transaction needs
1876 ** sure the file is consistent before ending the current trans 1982 * ending, we make sure the file is consistent before ending
1877 ** and starting a new one 1983 * the current trans and starting a new one
1878 */ 1984 */
1879 if (journal_transaction_should_end(th, 0) || 1985 if (journal_transaction_should_end(th, 0) ||
1880 reiserfs_transaction_free_space(th) <= JOURNAL_FOR_FREE_BLOCK_AND_UPDATE_SD) { 1986 reiserfs_transaction_free_space(th) <= JOURNAL_FOR_FREE_BLOCK_AND_UPDATE_SD) {
1881 int orig_len_alloc = th->t_blocks_allocated;
1882 pathrelse(&s_search_path); 1987 pathrelse(&s_search_path);
1883 1988
1884 if (update_timestamps) { 1989 if (update_timestamps) {
@@ -1887,7 +1992,7 @@ int reiserfs_do_truncate(struct reiserfs_transaction_handle *th,
1887 } 1992 }
1888 reiserfs_update_sd(th, inode); 1993 reiserfs_update_sd(th, inode);
1889 1994
1890 err = journal_end(th, inode->i_sb, orig_len_alloc); 1995 err = journal_end(th);
1891 if (err) 1996 if (err)
1892 goto out; 1997 goto out;
1893 err = journal_begin(th, inode->i_sb, 1998 err = journal_begin(th, inode->i_sb,
@@ -1904,25 +2009,25 @@ int reiserfs_do_truncate(struct reiserfs_transaction_handle *th,
1904 "PAP-5680: truncate did not finish: new_file_size %Ld, current %Ld, oid %d", 2009 "PAP-5680: truncate did not finish: new_file_size %Ld, current %Ld, oid %d",
1905 new_file_size, file_size, s_item_key.on_disk_key.k_objectid); 2010 new_file_size, file_size, s_item_key.on_disk_key.k_objectid);
1906 2011
1907 update_and_out: 2012update_and_out:
1908 if (update_timestamps) { 2013 if (update_timestamps) {
1909 // this is truncate, not file closing 2014 /* this is truncate, not file closing */
1910 inode->i_mtime = CURRENT_TIME_SEC; 2015 inode->i_mtime = CURRENT_TIME_SEC;
1911 inode->i_ctime = CURRENT_TIME_SEC; 2016 inode->i_ctime = CURRENT_TIME_SEC;
1912 } 2017 }
1913 reiserfs_update_sd(th, inode); 2018 reiserfs_update_sd(th, inode);
1914 2019
1915 out: 2020out:
1916 pathrelse(&s_search_path); 2021 pathrelse(&s_search_path);
1917 return err; 2022 return err;
1918} 2023}
1919 2024
1920#ifdef CONFIG_REISERFS_CHECK 2025#ifdef CONFIG_REISERFS_CHECK
1921// this makes sure, that we __append__, not overwrite or add holes 2026/* this makes sure, that we __append__, not overwrite or add holes */
1922static void check_research_for_paste(struct treepath *path, 2027static void check_research_for_paste(struct treepath *path,
1923 const struct cpu_key *key) 2028 const struct cpu_key *key)
1924{ 2029{
1925 struct item_head *found_ih = get_ih(path); 2030 struct item_head *found_ih = tp_item_head(path);
1926 2031
1927 if (is_direct_le_ih(found_ih)) { 2032 if (is_direct_le_ih(found_ih)) {
1928 if (le_ih_k_offset(found_ih) + 2033 if (le_ih_k_offset(found_ih) +
@@ -1952,13 +2057,22 @@ static void check_research_for_paste(struct treepath *path,
1952} 2057}
1953#endif /* config reiserfs check */ 2058#endif /* config reiserfs check */
1954 2059
1955/* Paste bytes to the existing item. Returns bytes number pasted into the item. */ 2060/*
1956int reiserfs_paste_into_item(struct reiserfs_transaction_handle *th, struct treepath *search_path, /* Path to the pasted item. */ 2061 * Paste bytes to the existing item.
1957 const struct cpu_key *key, /* Key to search for the needed item. */ 2062 * Returns bytes number pasted into the item.
1958 struct inode *inode, /* Inode item belongs to */ 2063 */
1959 const char *body, /* Pointer to the bytes to paste. */ 2064int reiserfs_paste_into_item(struct reiserfs_transaction_handle *th,
2065 /* Path to the pasted item. */
2066 struct treepath *search_path,
2067 /* Key to search for the needed item. */
2068 const struct cpu_key *key,
2069 /* Inode item belongs to */
2070 struct inode *inode,
2071 /* Pointer to the bytes to paste. */
2072 const char *body,
2073 /* Size of pasted bytes. */
1960 int pasted_size) 2074 int pasted_size)
1961{ /* Size of pasted bytes. */ 2075{
1962 struct super_block *sb = inode->i_sb; 2076 struct super_block *sb = inode->i_sb;
1963 struct tree_balance s_paste_balance; 2077 struct tree_balance s_paste_balance;
1964 int retval; 2078 int retval;
@@ -1973,7 +2087,7 @@ int reiserfs_paste_into_item(struct reiserfs_transaction_handle *th, struct tree
1973 reiserfs_debug(inode->i_sb, REISERFS_DEBUG_CODE, 2087 reiserfs_debug(inode->i_sb, REISERFS_DEBUG_CODE,
1974 "reiserquota paste_into_item(): allocating %u id=%u type=%c", 2088 "reiserquota paste_into_item(): allocating %u id=%u type=%c",
1975 pasted_size, inode->i_uid, 2089 pasted_size, inode->i_uid,
1976 key2type(&(key->on_disk_key))); 2090 key2type(&key->on_disk_key));
1977#endif 2091#endif
1978 2092
1979 depth = reiserfs_write_unlock_nested(sb); 2093 depth = reiserfs_write_unlock_nested(sb);
@@ -1997,7 +2111,7 @@ int reiserfs_paste_into_item(struct reiserfs_transaction_handle *th, struct tree
1997 while ((retval = 2111 while ((retval =
1998 fix_nodes(M_PASTE, &s_paste_balance, NULL, 2112 fix_nodes(M_PASTE, &s_paste_balance, NULL,
1999 body)) == REPEAT_SEARCH) { 2113 body)) == REPEAT_SEARCH) {
2000 search_again: 2114search_again:
2001 /* file system changed while we were in the fix_nodes */ 2115 /* file system changed while we were in the fix_nodes */
2002 PROC_INFO_INC(th->t_super, paste_into_item_restarted); 2116 PROC_INFO_INC(th->t_super, paste_into_item_restarted);
2003 retval = 2117 retval =
@@ -2019,21 +2133,23 @@ int reiserfs_paste_into_item(struct reiserfs_transaction_handle *th, struct tree
2019#endif 2133#endif
2020 } 2134 }
2021 2135
2022 /* Perform balancing after all resources are collected by fix_nodes, and 2136 /*
2023 accessing them will not risk triggering schedule. */ 2137 * Perform balancing after all resources are collected by fix_nodes,
2138 * and accessing them will not risk triggering schedule.
2139 */
2024 if (retval == CARRY_ON) { 2140 if (retval == CARRY_ON) {
2025 do_balance(&s_paste_balance, NULL /*ih */ , body, M_PASTE); 2141 do_balance(&s_paste_balance, NULL /*ih */ , body, M_PASTE);
2026 return 0; 2142 return 0;
2027 } 2143 }
2028 retval = (retval == NO_DISK_SPACE) ? -ENOSPC : -EIO; 2144 retval = (retval == NO_DISK_SPACE) ? -ENOSPC : -EIO;
2029 error_out: 2145error_out:
2030 /* this also releases the path */ 2146 /* this also releases the path */
2031 unfix_nodes(&s_paste_balance); 2147 unfix_nodes(&s_paste_balance);
2032#ifdef REISERQUOTA_DEBUG 2148#ifdef REISERQUOTA_DEBUG
2033 reiserfs_debug(inode->i_sb, REISERFS_DEBUG_CODE, 2149 reiserfs_debug(inode->i_sb, REISERFS_DEBUG_CODE,
2034 "reiserquota paste_into_item(): freeing %u id=%u type=%c", 2150 "reiserquota paste_into_item(): freeing %u id=%u type=%c",
2035 pasted_size, inode->i_uid, 2151 pasted_size, inode->i_uid,
2036 key2type(&(key->on_disk_key))); 2152 key2type(&key->on_disk_key));
2037#endif 2153#endif
2038 depth = reiserfs_write_unlock_nested(sb); 2154 depth = reiserfs_write_unlock_nested(sb);
2039 dquot_free_space_nodirty(inode, pasted_size); 2155 dquot_free_space_nodirty(inode, pasted_size);
@@ -2041,7 +2157,8 @@ int reiserfs_paste_into_item(struct reiserfs_transaction_handle *th, struct tree
2041 return retval; 2157 return retval;
2042} 2158}
2043 2159
2044/* Insert new item into the buffer at the path. 2160/*
2161 * Insert new item into the buffer at the path.
2045 * th - active transaction handle 2162 * th - active transaction handle
2046 * path - path to the inserted item 2163 * path - path to the inserted item
2047 * ih - pointer to the item header to insert 2164 * ih - pointer to the item header to insert
@@ -2064,8 +2181,10 @@ int reiserfs_insert_item(struct reiserfs_transaction_handle *th,
2064 fs_gen = get_generation(inode->i_sb); 2181 fs_gen = get_generation(inode->i_sb);
2065 quota_bytes = ih_item_len(ih); 2182 quota_bytes = ih_item_len(ih);
2066 2183
2067 /* hack so the quota code doesn't have to guess if the file has 2184 /*
2068 ** a tail, links are always tails, so there's no guessing needed 2185 * hack so the quota code doesn't have to guess
2186 * if the file has a tail, links are always tails,
2187 * so there's no guessing needed
2069 */ 2188 */
2070 if (!S_ISLNK(inode->i_mode) && is_direct_le_ih(ih)) 2189 if (!S_ISLNK(inode->i_mode) && is_direct_le_ih(ih))
2071 quota_bytes = inode->i_sb->s_blocksize + UNFM_P_SIZE; 2190 quota_bytes = inode->i_sb->s_blocksize + UNFM_P_SIZE;
@@ -2074,8 +2193,10 @@ int reiserfs_insert_item(struct reiserfs_transaction_handle *th,
2074 "reiserquota insert_item(): allocating %u id=%u type=%c", 2193 "reiserquota insert_item(): allocating %u id=%u type=%c",
2075 quota_bytes, inode->i_uid, head2type(ih)); 2194 quota_bytes, inode->i_uid, head2type(ih));
2076#endif 2195#endif
2077 /* We can't dirty inode here. It would be immediately written but 2196 /*
2078 * appropriate stat item isn't inserted yet... */ 2197 * We can't dirty inode here. It would be immediately
2198 * written but appropriate stat item isn't inserted yet...
2199 */
2079 depth = reiserfs_write_unlock_nested(inode->i_sb); 2200 depth = reiserfs_write_unlock_nested(inode->i_sb);
2080 retval = dquot_alloc_space_nodirty(inode, quota_bytes); 2201 retval = dquot_alloc_space_nodirty(inode, quota_bytes);
2081 reiserfs_write_lock_nested(inode->i_sb, depth); 2202 reiserfs_write_lock_nested(inode->i_sb, depth);
@@ -2089,7 +2210,10 @@ int reiserfs_insert_item(struct reiserfs_transaction_handle *th,
2089#ifdef DISPLACE_NEW_PACKING_LOCALITIES 2210#ifdef DISPLACE_NEW_PACKING_LOCALITIES
2090 s_ins_balance.key = key->on_disk_key; 2211 s_ins_balance.key = key->on_disk_key;
2091#endif 2212#endif
2092 /* DQUOT_* can schedule, must check to be sure calling fix_nodes is safe */ 2213 /*
2214 * DQUOT_* can schedule, must check to be sure calling
2215 * fix_nodes is safe
2216 */
2093 if (inode && fs_changed(fs_gen, inode->i_sb)) { 2217 if (inode && fs_changed(fs_gen, inode->i_sb)) {
2094 goto search_again; 2218 goto search_again;
2095 } 2219 }
@@ -2097,7 +2221,7 @@ int reiserfs_insert_item(struct reiserfs_transaction_handle *th,
2097 while ((retval = 2221 while ((retval =
2098 fix_nodes(M_INSERT, &s_ins_balance, ih, 2222 fix_nodes(M_INSERT, &s_ins_balance, ih,
2099 body)) == REPEAT_SEARCH) { 2223 body)) == REPEAT_SEARCH) {
2100 search_again: 2224search_again:
2101 /* file system changed while we were in the fix_nodes */ 2225 /* file system changed while we were in the fix_nodes */
2102 PROC_INFO_INC(th->t_super, insert_item_restarted); 2226 PROC_INFO_INC(th->t_super, insert_item_restarted);
2103 retval = search_item(th->t_super, key, path); 2227 retval = search_item(th->t_super, key, path);
@@ -2121,7 +2245,7 @@ int reiserfs_insert_item(struct reiserfs_transaction_handle *th,
2121 } 2245 }
2122 2246
2123 retval = (retval == NO_DISK_SPACE) ? -ENOSPC : -EIO; 2247 retval = (retval == NO_DISK_SPACE) ? -ENOSPC : -EIO;
2124 error_out: 2248error_out:
2125 /* also releases the path */ 2249 /* also releases the path */
2126 unfix_nodes(&s_ins_balance); 2250 unfix_nodes(&s_ins_balance);
2127#ifdef REISERQUOTA_DEBUG 2251#ifdef REISERQUOTA_DEBUG
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index 9fb20426005e..a392cef6acc6 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -74,7 +74,7 @@ static int reiserfs_sync_fs(struct super_block *s, int wait)
74 dquot_writeback_dquots(s, -1); 74 dquot_writeback_dquots(s, -1);
75 reiserfs_write_lock(s); 75 reiserfs_write_lock(s);
76 if (!journal_begin(&th, s, 1)) 76 if (!journal_begin(&th, s, 1))
77 if (!journal_end_sync(&th, s, 1)) 77 if (!journal_end_sync(&th))
78 reiserfs_flush_old_commits(s); 78 reiserfs_flush_old_commits(s);
79 reiserfs_write_unlock(s); 79 reiserfs_write_unlock(s);
80 return 0; 80 return 0;
@@ -136,9 +136,9 @@ static int reiserfs_freeze(struct super_block *s)
136 } else { 136 } else {
137 reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s), 137 reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s),
138 1); 138 1);
139 journal_mark_dirty(&th, s, SB_BUFFER_WITH_SB(s)); 139 journal_mark_dirty(&th, SB_BUFFER_WITH_SB(s));
140 reiserfs_block_writes(&th); 140 reiserfs_block_writes(&th);
141 journal_end_sync(&th, s, 1); 141 journal_end_sync(&th);
142 } 142 }
143 } 143 }
144 reiserfs_write_unlock(s); 144 reiserfs_write_unlock(s);
@@ -153,13 +153,15 @@ static int reiserfs_unfreeze(struct super_block *s)
153 153
154extern const struct in_core_key MAX_IN_CORE_KEY; 154extern const struct in_core_key MAX_IN_CORE_KEY;
155 155
156/* this is used to delete "save link" when there are no items of a 156/*
157 file it points to. It can either happen if unlink is completed but 157 * this is used to delete "save link" when there are no items of a
158 "save unlink" removal, or if file has both unlink and truncate 158 * file it points to. It can either happen if unlink is completed but
159 pending and as unlink completes first (because key of "save link" 159 * "save unlink" removal, or if file has both unlink and truncate
160 protecting unlink is bigger that a key lf "save link" which 160 * pending and as unlink completes first (because key of "save link"
161 protects truncate), so there left no items to make truncate 161 * protecting unlink is bigger that a key lf "save link" which
162 completion on */ 162 * protects truncate), so there left no items to make truncate
163 * completion on
164 */
163static int remove_save_link_only(struct super_block *s, 165static int remove_save_link_only(struct super_block *s,
164 struct reiserfs_key *key, int oid_free) 166 struct reiserfs_key *key, int oid_free)
165{ 167{
@@ -176,7 +178,7 @@ static int remove_save_link_only(struct super_block *s,
176 /* removals are protected by direct items */ 178 /* removals are protected by direct items */
177 reiserfs_release_objectid(&th, le32_to_cpu(key->k_objectid)); 179 reiserfs_release_objectid(&th, le32_to_cpu(key->k_objectid));
178 180
179 return journal_end(&th, s, JOURNAL_PER_BALANCE_CNT); 181 return journal_end(&th);
180} 182}
181 183
182#ifdef CONFIG_QUOTA 184#ifdef CONFIG_QUOTA
@@ -258,7 +260,7 @@ static int finish_unfinished(struct super_block *s)
258 break; 260 break;
259 } 261 }
260 item_pos--; 262 item_pos--;
261 ih = B_N_PITEM_HEAD(bh, item_pos); 263 ih = item_head(bh, item_pos);
262 264
263 if (le32_to_cpu(ih->ih_key.k_dir_id) != MAX_KEY_OBJECTID) 265 if (le32_to_cpu(ih->ih_key.k_dir_id) != MAX_KEY_OBJECTID)
264 /* there are no "save" links anymore */ 266 /* there are no "save" links anymore */
@@ -271,7 +273,7 @@ static int finish_unfinished(struct super_block *s)
271 truncate = 0; 273 truncate = 0;
272 274
273 /* reiserfs_iget needs k_dirid and k_objectid only */ 275 /* reiserfs_iget needs k_dirid and k_objectid only */
274 item = B_I_PITEM(bh, ih); 276 item = ih_item_body(bh, ih);
275 obj_key.on_disk_key.k_dir_id = le32_to_cpu(*(__le32 *) item); 277 obj_key.on_disk_key.k_dir_id = le32_to_cpu(*(__le32 *) item);
276 obj_key.on_disk_key.k_objectid = 278 obj_key.on_disk_key.k_objectid =
277 le32_to_cpu(ih->ih_key.k_objectid); 279 le32_to_cpu(ih->ih_key.k_objectid);
@@ -282,8 +284,10 @@ static int finish_unfinished(struct super_block *s)
282 284
283 inode = reiserfs_iget(s, &obj_key); 285 inode = reiserfs_iget(s, &obj_key);
284 if (!inode) { 286 if (!inode) {
285 /* the unlink almost completed, it just did not manage to remove 287 /*
286 "save" link and release objectid */ 288 * the unlink almost completed, it just did not
289 * manage to remove "save" link and release objectid
290 */
287 reiserfs_warning(s, "vs-2180", "iget failed for %K", 291 reiserfs_warning(s, "vs-2180", "iget failed for %K",
288 &obj_key); 292 &obj_key);
289 retval = remove_save_link_only(s, &save_link_key, 1); 293 retval = remove_save_link_only(s, &save_link_key, 1);
@@ -303,10 +307,13 @@ static int finish_unfinished(struct super_block *s)
303 reiserfs_write_lock_nested(inode->i_sb, depth); 307 reiserfs_write_lock_nested(inode->i_sb, depth);
304 308
305 if (truncate && S_ISDIR(inode->i_mode)) { 309 if (truncate && S_ISDIR(inode->i_mode)) {
306 /* We got a truncate request for a dir which is impossible. 310 /*
307 The only imaginable way is to execute unfinished truncate request 311 * We got a truncate request for a dir which
308 then boot into old kernel, remove the file and create dir with 312 * is impossible. The only imaginable way is to
309 the same key. */ 313 * execute unfinished truncate request then boot
314 * into old kernel, remove the file and create dir
315 * with the same key.
316 */
310 reiserfs_warning(s, "green-2101", 317 reiserfs_warning(s, "green-2101",
311 "impossible truncate on a " 318 "impossible truncate on a "
312 "directory %k. Please report", 319 "directory %k. Please report",
@@ -320,14 +327,16 @@ static int finish_unfinished(struct super_block *s)
320 if (truncate) { 327 if (truncate) {
321 REISERFS_I(inode)->i_flags |= 328 REISERFS_I(inode)->i_flags |=
322 i_link_saved_truncate_mask; 329 i_link_saved_truncate_mask;
323 /* not completed truncate found. New size was committed together 330 /*
324 with "save" link */ 331 * not completed truncate found. New size was
332 * committed together with "save" link
333 */
325 reiserfs_info(s, "Truncating %k to %Ld ..", 334 reiserfs_info(s, "Truncating %k to %Ld ..",
326 INODE_PKEY(inode), inode->i_size); 335 INODE_PKEY(inode), inode->i_size);
327 reiserfs_truncate_file(inode, 336
328 0 337 /* don't update modification time */
329 /*don't update modification time */ 338 reiserfs_truncate_file(inode, 0);
330 ); 339
331 retval = remove_save_link(inode, truncate); 340 retval = remove_save_link(inode, truncate);
332 } else { 341 } else {
333 REISERFS_I(inode)->i_flags |= i_link_saved_unlink_mask; 342 REISERFS_I(inode)->i_flags |= i_link_saved_unlink_mask;
@@ -373,10 +382,12 @@ static int finish_unfinished(struct super_block *s)
373 return retval; 382 return retval;
374} 383}
375 384
376/* to protect file being unlinked from getting lost we "safe" link files 385/*
377 being unlinked. This link will be deleted in the same transaction with last 386 * to protect file being unlinked from getting lost we "safe" link files
378 item of file. mounting the filesystem we scan all these links and remove 387 * being unlinked. This link will be deleted in the same transaction with last
379 files which almost got lost */ 388 * item of file. mounting the filesystem we scan all these links and remove
389 * files which almost got lost
390 */
380void add_save_link(struct reiserfs_transaction_handle *th, 391void add_save_link(struct reiserfs_transaction_handle *th,
381 struct inode *inode, int truncate) 392 struct inode *inode, int truncate)
382{ 393{
@@ -495,7 +506,7 @@ int remove_save_link(struct inode *inode, int truncate)
495 } else 506 } else
496 REISERFS_I(inode)->i_flags &= ~i_link_saved_truncate_mask; 507 REISERFS_I(inode)->i_flags &= ~i_link_saved_truncate_mask;
497 508
498 return journal_end(&th, inode->i_sb, JOURNAL_PER_BALANCE_CNT); 509 return journal_end(&th);
499} 510}
500 511
501static void reiserfs_kill_sb(struct super_block *s) 512static void reiserfs_kill_sb(struct super_block *s)
@@ -530,19 +541,23 @@ static void reiserfs_put_super(struct super_block *s)
530 541
531 reiserfs_write_lock(s); 542 reiserfs_write_lock(s);
532 543
533 /* change file system state to current state if it was mounted with read-write permissions */ 544 /*
545 * change file system state to current state if it was mounted
546 * with read-write permissions
547 */
534 if (!(s->s_flags & MS_RDONLY)) { 548 if (!(s->s_flags & MS_RDONLY)) {
535 if (!journal_begin(&th, s, 10)) { 549 if (!journal_begin(&th, s, 10)) {
536 reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s), 550 reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s),
537 1); 551 1);
538 set_sb_umount_state(SB_DISK_SUPER_BLOCK(s), 552 set_sb_umount_state(SB_DISK_SUPER_BLOCK(s),
539 REISERFS_SB(s)->s_mount_state); 553 REISERFS_SB(s)->s_mount_state);
540 journal_mark_dirty(&th, s, SB_BUFFER_WITH_SB(s)); 554 journal_mark_dirty(&th, SB_BUFFER_WITH_SB(s));
541 } 555 }
542 } 556 }
543 557
544 /* note, journal_release checks for readonly mount, and can decide not 558 /*
545 ** to do a journal_end 559 * note, journal_release checks for readonly mount, and can
560 * decide not to do a journal_end
546 */ 561 */
547 journal_release(&th, s); 562 journal_release(&th, s);
548 563
@@ -559,6 +574,7 @@ static void reiserfs_put_super(struct super_block *s)
559 574
560 reiserfs_write_unlock(s); 575 reiserfs_write_unlock(s);
561 mutex_destroy(&REISERFS_SB(s)->lock); 576 mutex_destroy(&REISERFS_SB(s)->lock);
577 destroy_workqueue(REISERFS_SB(s)->commit_wq);
562 kfree(s->s_fs_info); 578 kfree(s->s_fs_info);
563 s->s_fs_info = NULL; 579 s->s_fs_info = NULL;
564} 580}
@@ -634,15 +650,16 @@ static void reiserfs_dirty_inode(struct inode *inode, int flags)
634 } 650 }
635 reiserfs_write_lock(inode->i_sb); 651 reiserfs_write_lock(inode->i_sb);
636 652
637 /* this is really only used for atime updates, so they don't have 653 /*
638 ** to be included in O_SYNC or fsync 654 * this is really only used for atime updates, so they don't have
655 * to be included in O_SYNC or fsync
639 */ 656 */
640 err = journal_begin(&th, inode->i_sb, 1); 657 err = journal_begin(&th, inode->i_sb, 1);
641 if (err) 658 if (err)
642 goto out; 659 goto out;
643 660
644 reiserfs_update_sd(&th, inode); 661 reiserfs_update_sd(&th, inode);
645 journal_end(&th, inode->i_sb, 1); 662 journal_end(&th);
646 663
647out: 664out:
648 reiserfs_write_unlock(inode->i_sb); 665 reiserfs_write_unlock(inode->i_sb);
@@ -788,31 +805,53 @@ static const struct export_operations reiserfs_export_ops = {
788 .get_parent = reiserfs_get_parent, 805 .get_parent = reiserfs_get_parent,
789}; 806};
790 807
791/* this struct is used in reiserfs_getopt () for containing the value for those 808/*
792 mount options that have values rather than being toggles. */ 809 * this struct is used in reiserfs_getopt () for containing the value for
810 * those mount options that have values rather than being toggles.
811 */
793typedef struct { 812typedef struct {
794 char *value; 813 char *value;
795 int setmask; /* bitmask which is to set on mount_options bitmask when this 814 /*
796 value is found, 0 is no bits are to be changed. */ 815 * bitmask which is to set on mount_options bitmask
797 int clrmask; /* bitmask which is to clear on mount_options bitmask when this 816 * when this value is found, 0 is no bits are to be changed.
798 value is found, 0 is no bits are to be changed. This is 817 */
799 applied BEFORE setmask */ 818 int setmask;
819 /*
820 * bitmask which is to clear on mount_options bitmask
821 * when this value is found, 0 is no bits are to be changed.
822 * This is applied BEFORE setmask
823 */
824 int clrmask;
800} arg_desc_t; 825} arg_desc_t;
801 826
802/* Set this bit in arg_required to allow empty arguments */ 827/* Set this bit in arg_required to allow empty arguments */
803#define REISERFS_OPT_ALLOWEMPTY 31 828#define REISERFS_OPT_ALLOWEMPTY 31
804 829
805/* this struct is used in reiserfs_getopt() for describing the set of reiserfs 830/*
806 mount options */ 831 * this struct is used in reiserfs_getopt() for describing the
832 * set of reiserfs mount options
833 */
807typedef struct { 834typedef struct {
808 char *option_name; 835 char *option_name;
809 int arg_required; /* 0 if argument is not required, not 0 otherwise */ 836
810 const arg_desc_t *values; /* list of values accepted by an option */ 837 /* 0 if argument is not required, not 0 otherwise */
811 int setmask; /* bitmask which is to set on mount_options bitmask when this 838 int arg_required;
812 value is found, 0 is no bits are to be changed. */ 839
813 int clrmask; /* bitmask which is to clear on mount_options bitmask when this 840 /* list of values accepted by an option */
814 value is found, 0 is no bits are to be changed. This is 841 const arg_desc_t *values;
815 applied BEFORE setmask */ 842
843 /*
844 * bitmask which is to set on mount_options bitmask
845 * when this value is found, 0 is no bits are to be changed.
846 */
847 int setmask;
848
849 /*
850 * bitmask which is to clear on mount_options bitmask
851 * when this value is found, 0 is no bits are to be changed.
852 * This is applied BEFORE setmask
853 */
854 int clrmask;
816} opt_desc_t; 855} opt_desc_t;
817 856
818/* possible values for -o data= */ 857/* possible values for -o data= */
@@ -833,8 +872,10 @@ static const arg_desc_t barrier_mode[] = {
833 {.value = NULL} 872 {.value = NULL}
834}; 873};
835 874
836/* possible values for "-o block-allocator=" and bits which are to be set in 875/*
837 s_mount_opt of reiserfs specific part of in-core super block */ 876 * possible values for "-o block-allocator=" and bits which are to be set in
877 * s_mount_opt of reiserfs specific part of in-core super block
878 */
838static const arg_desc_t balloc[] = { 879static const arg_desc_t balloc[] = {
839 {"noborder", 1 << REISERFS_NO_BORDER, 0}, 880 {"noborder", 1 << REISERFS_NO_BORDER, 0},
840 {"border", 0, 1 << REISERFS_NO_BORDER}, 881 {"border", 0, 1 << REISERFS_NO_BORDER},
@@ -864,21 +905,25 @@ static const arg_desc_t error_actions[] = {
864 {NULL, 0, 0}, 905 {NULL, 0, 0},
865}; 906};
866 907
867/* proceed only one option from a list *cur - string containing of mount options 908/*
868 opts - array of options which are accepted 909 * proceed only one option from a list *cur - string containing of mount
869 opt_arg - if option is found and requires an argument and if it is specifed 910 * options
870 in the input - pointer to the argument is stored here 911 * opts - array of options which are accepted
871 bit_flags - if option requires to set a certain bit - it is set here 912 * opt_arg - if option is found and requires an argument and if it is specifed
872 return -1 if unknown option is found, opt->arg_required otherwise */ 913 * in the input - pointer to the argument is stored here
914 * bit_flags - if option requires to set a certain bit - it is set here
915 * return -1 if unknown option is found, opt->arg_required otherwise
916 */
873static int reiserfs_getopt(struct super_block *s, char **cur, opt_desc_t * opts, 917static int reiserfs_getopt(struct super_block *s, char **cur, opt_desc_t * opts,
874 char **opt_arg, unsigned long *bit_flags) 918 char **opt_arg, unsigned long *bit_flags)
875{ 919{
876 char *p; 920 char *p;
877 /* foo=bar, 921 /*
878 ^ ^ ^ 922 * foo=bar,
879 | | +-- option_end 923 * ^ ^ ^
880 | +-- arg_start 924 * | | +-- option_end
881 +-- option_start 925 * | +-- arg_start
926 * +-- option_start
882 */ 927 */
883 const opt_desc_t *opt; 928 const opt_desc_t *opt;
884 const arg_desc_t *arg; 929 const arg_desc_t *arg;
@@ -893,9 +938,12 @@ static int reiserfs_getopt(struct super_block *s, char **cur, opt_desc_t * opts,
893 } 938 }
894 939
895 if (!strncmp(p, "alloc=", 6)) { 940 if (!strncmp(p, "alloc=", 6)) {
896 /* Ugly special case, probably we should redo options parser so that 941 /*
897 it can understand several arguments for some options, also so that 942 * Ugly special case, probably we should redo options
898 it can fill several bitfields with option values. */ 943 * parser so that it can understand several arguments for
944 * some options, also so that it can fill several bitfields
945 * with option values.
946 */
899 if (reiserfs_parse_alloc_options(s, p + 6)) { 947 if (reiserfs_parse_alloc_options(s, p + 6)) {
900 return -1; 948 return -1;
901 } else { 949 } else {
@@ -958,7 +1006,10 @@ static int reiserfs_getopt(struct super_block *s, char **cur, opt_desc_t * opts,
958 return -1; 1006 return -1;
959 } 1007 }
960 1008
961 /* move to the argument, or to next option if argument is not required */ 1009 /*
1010 * move to the argument, or to next option if argument is not
1011 * required
1012 */
962 p++; 1013 p++;
963 1014
964 if (opt->arg_required 1015 if (opt->arg_required
@@ -995,12 +1046,20 @@ static int reiserfs_getopt(struct super_block *s, char **cur, opt_desc_t * opts,
995} 1046}
996 1047
997/* returns 0 if something is wrong in option string, 1 - otherwise */ 1048/* returns 0 if something is wrong in option string, 1 - otherwise */
998static int reiserfs_parse_options(struct super_block *s, char *options, /* string given via mount's -o */ 1049static int reiserfs_parse_options(struct super_block *s,
1050
1051 /* string given via mount's -o */
1052 char *options,
1053
1054 /*
1055 * after the parsing phase, contains the
1056 * collection of bitflags defining what
1057 * mount options were selected.
1058 */
999 unsigned long *mount_options, 1059 unsigned long *mount_options,
1000 /* after the parsing phase, contains the 1060
1001 collection of bitflags defining what 1061 /* strtol-ed from NNN of resize=NNN */
1002 mount options were selected. */ 1062 unsigned long *blocks,
1003 unsigned long *blocks, /* strtol-ed from NNN of resize=NNN */
1004 char **jdev_name, 1063 char **jdev_name,
1005 unsigned int *commit_max_age, 1064 unsigned int *commit_max_age,
1006 char **qf_names, 1065 char **qf_names,
@@ -1010,7 +1069,10 @@ static int reiserfs_parse_options(struct super_block *s, char *options, /* strin
1010 char *arg = NULL; 1069 char *arg = NULL;
1011 char *pos; 1070 char *pos;
1012 opt_desc_t opts[] = { 1071 opt_desc_t opts[] = {
1013 /* Compatibility stuff, so that -o notail for old setups still work */ 1072 /*
1073 * Compatibility stuff, so that -o notail for old
1074 * setups still work
1075 */
1014 {"tails",.arg_required = 't',.values = tails}, 1076 {"tails",.arg_required = 't',.values = tails},
1015 {"notail",.clrmask = 1077 {"notail",.clrmask =
1016 (1 << REISERFS_LARGETAIL) | (1 << REISERFS_SMALLTAIL)}, 1078 (1 << REISERFS_LARGETAIL) | (1 << REISERFS_SMALLTAIL)},
@@ -1055,8 +1117,10 @@ static int reiserfs_parse_options(struct super_block *s, char *options, /* strin
1055 1117
1056 *blocks = 0; 1118 *blocks = 0;
1057 if (!options || !*options) 1119 if (!options || !*options)
1058 /* use default configuration: create tails, journaling on, no 1120 /*
1059 conversion to newest format */ 1121 * use default configuration: create tails, journaling on, no
1122 * conversion to newest format
1123 */
1060 return 1; 1124 return 1;
1061 1125
1062 for (pos = options; pos;) { 1126 for (pos = options; pos;) {
@@ -1109,7 +1173,8 @@ static int reiserfs_parse_options(struct super_block *s, char *options, /* strin
1109 1173
1110 if (c == 'j') { 1174 if (c == 'j') {
1111 if (arg && *arg && jdev_name) { 1175 if (arg && *arg && jdev_name) {
1112 if (*jdev_name) { //Hm, already assigned? 1176 /* Hm, already assigned? */
1177 if (*jdev_name) {
1113 reiserfs_warning(s, "super-6510", 1178 reiserfs_warning(s, "super-6510",
1114 "journal device was " 1179 "journal device was "
1115 "already specified to " 1180 "already specified to "
@@ -1362,8 +1427,10 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg)
1362 safe_mask |= 1 << REISERFS_USRQUOTA; 1427 safe_mask |= 1 << REISERFS_USRQUOTA;
1363 safe_mask |= 1 << REISERFS_GRPQUOTA; 1428 safe_mask |= 1 << REISERFS_GRPQUOTA;
1364 1429
1365 /* Update the bitmask, taking care to keep 1430 /*
1366 * the bits we're not allowed to change here */ 1431 * Update the bitmask, taking care to keep
1432 * the bits we're not allowed to change here
1433 */
1367 REISERFS_SB(s)->s_mount_opt = 1434 REISERFS_SB(s)->s_mount_opt =
1368 (REISERFS_SB(s)-> 1435 (REISERFS_SB(s)->
1369 s_mount_opt & ~safe_mask) | (mount_options & safe_mask); 1436 s_mount_opt & ~safe_mask) | (mount_options & safe_mask);
@@ -1410,7 +1477,7 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg)
1410 /* Mounting a rw partition read-only. */ 1477 /* Mounting a rw partition read-only. */
1411 reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s), 1); 1478 reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s), 1);
1412 set_sb_umount_state(rs, REISERFS_SB(s)->s_mount_state); 1479 set_sb_umount_state(rs, REISERFS_SB(s)->s_mount_state);
1413 journal_mark_dirty(&th, s, SB_BUFFER_WITH_SB(s)); 1480 journal_mark_dirty(&th, SB_BUFFER_WITH_SB(s));
1414 } else { 1481 } else {
1415 /* remount read-write */ 1482 /* remount read-write */
1416 if (!(s->s_flags & MS_RDONLY)) { 1483 if (!(s->s_flags & MS_RDONLY)) {
@@ -1427,7 +1494,9 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg)
1427 handle_data_mode(s, mount_options); 1494 handle_data_mode(s, mount_options);
1428 handle_barrier_mode(s, mount_options); 1495 handle_barrier_mode(s, mount_options);
1429 REISERFS_SB(s)->s_mount_state = sb_umount_state(rs); 1496 REISERFS_SB(s)->s_mount_state = sb_umount_state(rs);
1430 s->s_flags &= ~MS_RDONLY; /* now it is safe to call journal_begin */ 1497
1498 /* now it is safe to call journal_begin */
1499 s->s_flags &= ~MS_RDONLY;
1431 err = journal_begin(&th, s, 10); 1500 err = journal_begin(&th, s, 10);
1432 if (err) 1501 if (err)
1433 goto out_err_unlock; 1502 goto out_err_unlock;
@@ -1440,12 +1509,12 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg)
1440 if (!old_format_only(s)) 1509 if (!old_format_only(s))
1441 set_sb_mnt_count(rs, sb_mnt_count(rs) + 1); 1510 set_sb_mnt_count(rs, sb_mnt_count(rs) + 1);
1442 /* mark_buffer_dirty (SB_BUFFER_WITH_SB (s), 1); */ 1511 /* mark_buffer_dirty (SB_BUFFER_WITH_SB (s), 1); */
1443 journal_mark_dirty(&th, s, SB_BUFFER_WITH_SB(s)); 1512 journal_mark_dirty(&th, SB_BUFFER_WITH_SB(s));
1444 REISERFS_SB(s)->s_mount_state = REISERFS_VALID_FS; 1513 REISERFS_SB(s)->s_mount_state = REISERFS_VALID_FS;
1445 } 1514 }
1446 /* this will force a full flush of all journal lists */ 1515 /* this will force a full flush of all journal lists */
1447 SB_JOURNAL(s)->j_must_wait = 1; 1516 SB_JOURNAL(s)->j_must_wait = 1;
1448 err = journal_end(&th, s, 10); 1517 err = journal_end(&th);
1449 if (err) 1518 if (err)
1450 goto out_err_unlock; 1519 goto out_err_unlock;
1451 1520
@@ -1489,9 +1558,9 @@ static int read_super_block(struct super_block *s, int offset)
1489 brelse(bh); 1558 brelse(bh);
1490 return 1; 1559 return 1;
1491 } 1560 }
1492 // 1561 /*
1493 // ok, reiserfs signature (old or new) found in at the given offset 1562 * ok, reiserfs signature (old or new) found in at the given offset
1494 // 1563 */
1495 fs_blocksize = sb_blocksize(rs); 1564 fs_blocksize = sb_blocksize(rs);
1496 brelse(bh); 1565 brelse(bh);
1497 sb_set_blocksize(s, fs_blocksize); 1566 sb_set_blocksize(s, fs_blocksize);
@@ -1529,9 +1598,11 @@ static int read_super_block(struct super_block *s, int offset)
1529 SB_BUFFER_WITH_SB(s) = bh; 1598 SB_BUFFER_WITH_SB(s) = bh;
1530 SB_DISK_SUPER_BLOCK(s) = rs; 1599 SB_DISK_SUPER_BLOCK(s) = rs;
1531 1600
1601 /*
1602 * magic is of non-standard journal filesystem, look at s_version to
1603 * find which format is in use
1604 */
1532 if (is_reiserfs_jr(rs)) { 1605 if (is_reiserfs_jr(rs)) {
1533 /* magic is of non-standard journal filesystem, look at s_version to
1534 find which format is in use */
1535 if (sb_version(rs) == REISERFS_VERSION_2) 1606 if (sb_version(rs) == REISERFS_VERSION_2)
1536 reiserfs_info(s, "found reiserfs format \"3.6\"" 1607 reiserfs_info(s, "found reiserfs format \"3.6\""
1537 " with non-standard journal\n"); 1608 " with non-standard journal\n");
@@ -1545,8 +1616,10 @@ static int read_super_block(struct super_block *s, int offset)
1545 return 1; 1616 return 1;
1546 } 1617 }
1547 } else 1618 } else
1548 /* s_version of standard format may contain incorrect information, 1619 /*
1549 so we just look at the magic string */ 1620 * s_version of standard format may contain incorrect
1621 * information, so we just look at the magic string
1622 */
1550 reiserfs_info(s, 1623 reiserfs_info(s,
1551 "found reiserfs format \"%s\" with standard journal\n", 1624 "found reiserfs format \"%s\" with standard journal\n",
1552 is_reiserfs_3_5(rs) ? "3.5" : "3.6"); 1625 is_reiserfs_3_5(rs) ? "3.5" : "3.6");
@@ -1558,8 +1631,9 @@ static int read_super_block(struct super_block *s, int offset)
1558 s->dq_op = &reiserfs_quota_operations; 1631 s->dq_op = &reiserfs_quota_operations;
1559#endif 1632#endif
1560 1633
1561 /* new format is limited by the 32 bit wide i_blocks field, want to 1634 /*
1562 ** be one full block below that. 1635 * new format is limited by the 32 bit wide i_blocks field, want to
1636 * be one full block below that.
1563 */ 1637 */
1564 s->s_maxbytes = (512LL << 32) - s->s_blocksize; 1638 s->s_maxbytes = (512LL << 32) - s->s_blocksize;
1565 return 0; 1639 return 0;
@@ -1568,7 +1642,7 @@ static int read_super_block(struct super_block *s, int offset)
1568/* after journal replay, reread all bitmap and super blocks */ 1642/* after journal replay, reread all bitmap and super blocks */
1569static int reread_meta_blocks(struct super_block *s) 1643static int reread_meta_blocks(struct super_block *s)
1570{ 1644{
1571 ll_rw_block(READ, 1, &(SB_BUFFER_WITH_SB(s))); 1645 ll_rw_block(READ, 1, &SB_BUFFER_WITH_SB(s));
1572 wait_on_buffer(SB_BUFFER_WITH_SB(s)); 1646 wait_on_buffer(SB_BUFFER_WITH_SB(s));
1573 if (!buffer_uptodate(SB_BUFFER_WITH_SB(s))) { 1647 if (!buffer_uptodate(SB_BUFFER_WITH_SB(s))) {
1574 reiserfs_warning(s, "reiserfs-2504", "error reading the super"); 1648 reiserfs_warning(s, "reiserfs-2504", "error reading the super");
@@ -1578,14 +1652,15 @@ static int reread_meta_blocks(struct super_block *s)
1578 return 0; 1652 return 0;
1579} 1653}
1580 1654
1581///////////////////////////////////////////////////// 1655/* hash detection stuff */
1582// hash detection stuff
1583 1656
1584// if root directory is empty - we set default - Yura's - hash and 1657/*
1585// warn about it 1658 * if root directory is empty - we set default - Yura's - hash and
1586// FIXME: we look for only one name in a directory. If tea and yura 1659 * warn about it
1587// bith have the same value - we ask user to send report to the 1660 * FIXME: we look for only one name in a directory. If tea and yura
1588// mailing list 1661 * both have the same value - we ask user to send report to the
1662 * mailing list
1663 */
1589static __u32 find_hash_out(struct super_block *s) 1664static __u32 find_hash_out(struct super_block *s)
1590{ 1665{
1591 int retval; 1666 int retval;
@@ -1593,92 +1668,83 @@ static __u32 find_hash_out(struct super_block *s)
1593 struct cpu_key key; 1668 struct cpu_key key;
1594 INITIALIZE_PATH(path); 1669 INITIALIZE_PATH(path);
1595 struct reiserfs_dir_entry de; 1670 struct reiserfs_dir_entry de;
1671 struct reiserfs_de_head *deh;
1596 __u32 hash = DEFAULT_HASH; 1672 __u32 hash = DEFAULT_HASH;
1673 __u32 deh_hashval, teahash, r5hash, yurahash;
1597 1674
1598 inode = s->s_root->d_inode; 1675 inode = s->s_root->d_inode;
1599 1676
1600 do { // Some serious "goto"-hater was there ;) 1677 make_cpu_key(&key, inode, ~0, TYPE_DIRENTRY, 3);
1601 u32 teahash, r5hash, yurahash; 1678 retval = search_by_entry_key(s, &key, &path, &de);
1679 if (retval == IO_ERROR) {
1680 pathrelse(&path);
1681 return UNSET_HASH;
1682 }
1683 if (retval == NAME_NOT_FOUND)
1684 de.de_entry_num--;
1685
1686 set_de_name_and_namelen(&de);
1687 deh = de.de_deh + de.de_entry_num;
1602 1688
1603 make_cpu_key(&key, inode, ~0, TYPE_DIRENTRY, 3); 1689 if (deh_offset(deh) == DOT_DOT_OFFSET) {
1604 retval = search_by_entry_key(s, &key, &path, &de); 1690 /* allow override in this case */
1605 if (retval == IO_ERROR) { 1691 if (reiserfs_rupasov_hash(s))
1606 pathrelse(&path);
1607 return UNSET_HASH;
1608 }
1609 if (retval == NAME_NOT_FOUND)
1610 de.de_entry_num--;
1611 set_de_name_and_namelen(&de);
1612 if (deh_offset(&(de.de_deh[de.de_entry_num])) == DOT_DOT_OFFSET) {
1613 /* allow override in this case */
1614 if (reiserfs_rupasov_hash(s)) {
1615 hash = YURA_HASH;
1616 }
1617 reiserfs_info(s, "FS seems to be empty, autodetect "
1618 "is using the default hash\n");
1619 break;
1620 }
1621 r5hash = GET_HASH_VALUE(r5_hash(de.de_name, de.de_namelen));
1622 teahash = GET_HASH_VALUE(keyed_hash(de.de_name, de.de_namelen));
1623 yurahash = GET_HASH_VALUE(yura_hash(de.de_name, de.de_namelen));
1624 if (((teahash == r5hash)
1625 &&
1626 (GET_HASH_VALUE(deh_offset(&(de.de_deh[de.de_entry_num])))
1627 == r5hash)) || ((teahash == yurahash)
1628 && (yurahash ==
1629 GET_HASH_VALUE(deh_offset
1630 (&
1631 (de.
1632 de_deh[de.
1633 de_entry_num])))))
1634 || ((r5hash == yurahash)
1635 && (yurahash ==
1636 GET_HASH_VALUE(deh_offset
1637 (&(de.de_deh[de.de_entry_num])))))) {
1638 reiserfs_warning(s, "reiserfs-2506", "Unable to "
1639 "automatically detect hash function. "
1640 "Please mount with -o "
1641 "hash={tea,rupasov,r5}");
1642 hash = UNSET_HASH;
1643 break;
1644 }
1645 if (GET_HASH_VALUE(deh_offset(&(de.de_deh[de.de_entry_num]))) ==
1646 yurahash)
1647 hash = YURA_HASH; 1692 hash = YURA_HASH;
1648 else if (GET_HASH_VALUE 1693 reiserfs_info(s, "FS seems to be empty, autodetect is using the default hash\n");
1649 (deh_offset(&(de.de_deh[de.de_entry_num]))) == teahash) 1694 goto out;
1650 hash = TEA_HASH; 1695 }
1651 else if (GET_HASH_VALUE 1696
1652 (deh_offset(&(de.de_deh[de.de_entry_num]))) == r5hash) 1697 deh_hashval = GET_HASH_VALUE(deh_offset(deh));
1653 hash = R5_HASH; 1698 r5hash = GET_HASH_VALUE(r5_hash(de.de_name, de.de_namelen));
1654 else { 1699 teahash = GET_HASH_VALUE(keyed_hash(de.de_name, de.de_namelen));
1655 reiserfs_warning(s, "reiserfs-2506", 1700 yurahash = GET_HASH_VALUE(yura_hash(de.de_name, de.de_namelen));
1656 "Unrecognised hash function"); 1701
1657 hash = UNSET_HASH; 1702 if ((teahash == r5hash && deh_hashval == r5hash) ||
1658 } 1703 (teahash == yurahash && deh_hashval == yurahash) ||
1659 } while (0); 1704 (r5hash == yurahash && deh_hashval == yurahash)) {
1705 reiserfs_warning(s, "reiserfs-2506",
1706 "Unable to automatically detect hash "
1707 "function. Please mount with -o "
1708 "hash={tea,rupasov,r5}");
1709 hash = UNSET_HASH;
1710 goto out;
1711 }
1660 1712
1713 if (deh_hashval == yurahash)
1714 hash = YURA_HASH;
1715 else if (deh_hashval == teahash)
1716 hash = TEA_HASH;
1717 else if (deh_hashval == r5hash)
1718 hash = R5_HASH;
1719 else {
1720 reiserfs_warning(s, "reiserfs-2506",
1721 "Unrecognised hash function");
1722 hash = UNSET_HASH;
1723 }
1724out:
1661 pathrelse(&path); 1725 pathrelse(&path);
1662 return hash; 1726 return hash;
1663} 1727}
1664 1728
1665// finds out which hash names are sorted with 1729/* finds out which hash names are sorted with */
1666static int what_hash(struct super_block *s) 1730static int what_hash(struct super_block *s)
1667{ 1731{
1668 __u32 code; 1732 __u32 code;
1669 1733
1670 code = sb_hash_function_code(SB_DISK_SUPER_BLOCK(s)); 1734 code = sb_hash_function_code(SB_DISK_SUPER_BLOCK(s));
1671 1735
1672 /* reiserfs_hash_detect() == true if any of the hash mount options 1736 /*
1673 ** were used. We must check them to make sure the user isn't 1737 * reiserfs_hash_detect() == true if any of the hash mount options
1674 ** using a bad hash value 1738 * were used. We must check them to make sure the user isn't
1739 * using a bad hash value
1675 */ 1740 */
1676 if (code == UNSET_HASH || reiserfs_hash_detect(s)) 1741 if (code == UNSET_HASH || reiserfs_hash_detect(s))
1677 code = find_hash_out(s); 1742 code = find_hash_out(s);
1678 1743
1679 if (code != UNSET_HASH && reiserfs_hash_detect(s)) { 1744 if (code != UNSET_HASH && reiserfs_hash_detect(s)) {
1680 /* detection has found the hash, and we must check against the 1745 /*
1681 ** mount options 1746 * detection has found the hash, and we must check against the
1747 * mount options
1682 */ 1748 */
1683 if (reiserfs_rupasov_hash(s) && code != YURA_HASH) { 1749 if (reiserfs_rupasov_hash(s) && code != YURA_HASH) {
1684 reiserfs_warning(s, "reiserfs-2507", 1750 reiserfs_warning(s, "reiserfs-2507",
@@ -1700,7 +1766,10 @@ static int what_hash(struct super_block *s)
1700 code = UNSET_HASH; 1766 code = UNSET_HASH;
1701 } 1767 }
1702 } else { 1768 } else {
1703 /* find_hash_out was not called or could not determine the hash */ 1769 /*
1770 * find_hash_out was not called or
1771 * could not determine the hash
1772 */
1704 if (reiserfs_rupasov_hash(s)) { 1773 if (reiserfs_rupasov_hash(s)) {
1705 code = YURA_HASH; 1774 code = YURA_HASH;
1706 } else if (reiserfs_tea_hash(s)) { 1775 } else if (reiserfs_tea_hash(s)) {
@@ -1710,8 +1779,9 @@ static int what_hash(struct super_block *s)
1710 } 1779 }
1711 } 1780 }
1712 1781
1713 /* if we are mounted RW, and we have a new valid hash code, update 1782 /*
1714 ** the super 1783 * if we are mounted RW, and we have a new valid hash code, update
1784 * the super
1715 */ 1785 */
1716 if (code != UNSET_HASH && 1786 if (code != UNSET_HASH &&
1717 !(s->s_flags & MS_RDONLY) && 1787 !(s->s_flags & MS_RDONLY) &&
@@ -1721,7 +1791,7 @@ static int what_hash(struct super_block *s)
1721 return code; 1791 return code;
1722} 1792}
1723 1793
1724// return pointer to appropriate function 1794/* return pointer to appropriate function */
1725static hashf_t hash_function(struct super_block *s) 1795static hashf_t hash_function(struct super_block *s)
1726{ 1796{
1727 switch (what_hash(s)) { 1797 switch (what_hash(s)) {
@@ -1738,7 +1808,7 @@ static hashf_t hash_function(struct super_block *s)
1738 return NULL; 1808 return NULL;
1739} 1809}
1740 1810
1741// this is used to set up correct value for old partitions 1811/* this is used to set up correct value for old partitions */
1742static int function2code(hashf_t func) 1812static int function2code(hashf_t func)
1743{ 1813{
1744 if (func == keyed_hash) 1814 if (func == keyed_hash)
@@ -1748,7 +1818,7 @@ static int function2code(hashf_t func)
1748 if (func == r5_hash) 1818 if (func == r5_hash)
1749 return R5_HASH; 1819 return R5_HASH;
1750 1820
1751 BUG(); // should never happen 1821 BUG(); /* should never happen */
1752 1822
1753 return 0; 1823 return 0;
1754} 1824}
@@ -1783,8 +1853,7 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent)
1783 sbi->s_mount_opt |= (1 << REISERFS_SMALLTAIL); 1853 sbi->s_mount_opt |= (1 << REISERFS_SMALLTAIL);
1784 sbi->s_mount_opt |= (1 << REISERFS_ERROR_RO); 1854 sbi->s_mount_opt |= (1 << REISERFS_ERROR_RO);
1785 sbi->s_mount_opt |= (1 << REISERFS_BARRIER_FLUSH); 1855 sbi->s_mount_opt |= (1 << REISERFS_BARRIER_FLUSH);
1786 /* no preallocation minimum, be smart in 1856 /* no preallocation minimum, be smart in reiserfs_file_write instead */
1787 reiserfs_file_write instead */
1788 sbi->s_alloc_options.preallocmin = 0; 1857 sbi->s_alloc_options.preallocmin = 0;
1789 /* Preallocate by 16 blocks (17-1) at once */ 1858 /* Preallocate by 16 blocks (17-1) at once */
1790 sbi->s_alloc_options.preallocsize = 17; 1859 sbi->s_alloc_options.preallocsize = 17;
@@ -1796,9 +1865,17 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent)
1796 mutex_init(&sbi->lock); 1865 mutex_init(&sbi->lock);
1797 sbi->lock_depth = -1; 1866 sbi->lock_depth = -1;
1798 1867
1868 sbi->commit_wq = alloc_workqueue("reiserfs/%s", WQ_MEM_RECLAIM, 0,
1869 s->s_id);
1870 if (!sbi->commit_wq) {
1871 SWARN(silent, s, "", "Cannot allocate commit workqueue");
1872 errval = -ENOMEM;
1873 goto error_unlocked;
1874 }
1875
1799 jdev_name = NULL; 1876 jdev_name = NULL;
1800 if (reiserfs_parse_options 1877 if (reiserfs_parse_options
1801 (s, (char *)data, &(sbi->s_mount_opt), &blocks, &jdev_name, 1878 (s, (char *)data, &sbi->s_mount_opt, &blocks, &jdev_name,
1802 &commit_max_age, qf_names, &qfmt) == 0) { 1879 &commit_max_age, qf_names, &qfmt) == 0) {
1803 goto error_unlocked; 1880 goto error_unlocked;
1804 } 1881 }
@@ -1819,10 +1896,17 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent)
1819 goto error_unlocked; 1896 goto error_unlocked;
1820 } 1897 }
1821 1898
1822 /* try old format (undistributed bitmap, super block in 8-th 1k block of a device) */ 1899 /*
1900 * try old format (undistributed bitmap, super block in 8-th 1k
1901 * block of a device)
1902 */
1823 if (!read_super_block(s, REISERFS_OLD_DISK_OFFSET_IN_BYTES)) 1903 if (!read_super_block(s, REISERFS_OLD_DISK_OFFSET_IN_BYTES))
1824 old_format = 1; 1904 old_format = 1;
1825 /* try new format (64-th 1k block), which can contain reiserfs super block */ 1905
1906 /*
1907 * try new format (64-th 1k block), which can contain reiserfs
1908 * super block
1909 */
1826 else if (read_super_block(s, REISERFS_DISK_OFFSET_IN_BYTES)) { 1910 else if (read_super_block(s, REISERFS_DISK_OFFSET_IN_BYTES)) {
1827 SWARN(silent, s, "sh-2021", "can not find reiserfs on %s", 1911 SWARN(silent, s, "sh-2021", "can not find reiserfs on %s",
1828 s->s_id); 1912 s->s_id);
@@ -1830,9 +1914,11 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent)
1830 } 1914 }
1831 1915
1832 rs = SB_DISK_SUPER_BLOCK(s); 1916 rs = SB_DISK_SUPER_BLOCK(s);
1833 /* Let's do basic sanity check to verify that underlying device is not 1917 /*
1834 smaller than the filesystem. If the check fails then abort and scream, 1918 * Let's do basic sanity check to verify that underlying device is not
1835 because bad stuff will happen otherwise. */ 1919 * smaller than the filesystem. If the check fails then abort and
1920 * scream, because bad stuff will happen otherwise.
1921 */
1836 if (s->s_bdev && s->s_bdev->bd_inode 1922 if (s->s_bdev && s->s_bdev->bd_inode
1837 && i_size_read(s->s_bdev->bd_inode) < 1923 && i_size_read(s->s_bdev->bd_inode) <
1838 sb_block_count(rs) * sb_blocksize(rs)) { 1924 sb_block_count(rs) * sb_blocksize(rs)) {
@@ -1876,15 +1962,16 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent)
1876 printk("reiserfs: using flush barriers\n"); 1962 printk("reiserfs: using flush barriers\n");
1877 } 1963 }
1878 1964
1879 // set_device_ro(s->s_dev, 1) ;
1880 if (journal_init(s, jdev_name, old_format, commit_max_age)) { 1965 if (journal_init(s, jdev_name, old_format, commit_max_age)) {
1881 SWARN(silent, s, "sh-2022", 1966 SWARN(silent, s, "sh-2022",
1882 "unable to initialize journal space"); 1967 "unable to initialize journal space");
1883 goto error_unlocked; 1968 goto error_unlocked;
1884 } else { 1969 } else {
1885 jinit_done = 1; /* once this is set, journal_release must be called 1970 /*
1886 ** if we error out of the mount 1971 * once this is set, journal_release must be called
1887 */ 1972 * if we error out of the mount
1973 */
1974 jinit_done = 1;
1888 } 1975 }
1889 1976
1890 if (reread_meta_blocks(s)) { 1977 if (reread_meta_blocks(s)) {
@@ -1905,7 +1992,7 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent)
1905 args.dirid = REISERFS_ROOT_PARENT_OBJECTID; 1992 args.dirid = REISERFS_ROOT_PARENT_OBJECTID;
1906 root_inode = 1993 root_inode =
1907 iget5_locked(s, REISERFS_ROOT_OBJECTID, reiserfs_find_actor, 1994 iget5_locked(s, REISERFS_ROOT_OBJECTID, reiserfs_find_actor,
1908 reiserfs_init_locked_inode, (void *)(&args)); 1995 reiserfs_init_locked_inode, (void *)&args);
1909 if (!root_inode) { 1996 if (!root_inode) {
1910 SWARN(silent, s, "jmacd-10", "get root inode failed"); 1997 SWARN(silent, s, "jmacd-10", "get root inode failed");
1911 goto error_unlocked; 1998 goto error_unlocked;
@@ -1929,7 +2016,7 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent)
1929 s->s_root = d_make_root(root_inode); 2016 s->s_root = d_make_root(root_inode);
1930 if (!s->s_root) 2017 if (!s->s_root)
1931 goto error; 2018 goto error;
1932 // define and initialize hash function 2019 /* define and initialize hash function */
1933 sbi->s_hash_function = hash_function(s); 2020 sbi->s_hash_function = hash_function(s);
1934 if (sbi->s_hash_function == NULL) { 2021 if (sbi->s_hash_function == NULL) {
1935 dput(s->s_root); 2022 dput(s->s_root);
@@ -1939,11 +2026,11 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent)
1939 2026
1940 if (is_reiserfs_3_5(rs) 2027 if (is_reiserfs_3_5(rs)
1941 || (is_reiserfs_jr(rs) && SB_VERSION(s) == REISERFS_VERSION_1)) 2028 || (is_reiserfs_jr(rs) && SB_VERSION(s) == REISERFS_VERSION_1))
1942 set_bit(REISERFS_3_5, &(sbi->s_properties)); 2029 set_bit(REISERFS_3_5, &sbi->s_properties);
1943 else if (old_format) 2030 else if (old_format)
1944 set_bit(REISERFS_OLD_FORMAT, &(sbi->s_properties)); 2031 set_bit(REISERFS_OLD_FORMAT, &sbi->s_properties);
1945 else 2032 else
1946 set_bit(REISERFS_3_6, &(sbi->s_properties)); 2033 set_bit(REISERFS_3_6, &sbi->s_properties);
1947 2034
1948 if (!(s->s_flags & MS_RDONLY)) { 2035 if (!(s->s_flags & MS_RDONLY)) {
1949 2036
@@ -1958,10 +2045,12 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent)
1958 set_sb_umount_state(rs, REISERFS_ERROR_FS); 2045 set_sb_umount_state(rs, REISERFS_ERROR_FS);
1959 set_sb_fs_state(rs, 0); 2046 set_sb_fs_state(rs, 0);
1960 2047
1961 /* Clear out s_bmap_nr if it would wrap. We can handle this 2048 /*
2049 * Clear out s_bmap_nr if it would wrap. We can handle this
1962 * case, but older revisions can't. This will cause the 2050 * case, but older revisions can't. This will cause the
1963 * file system to fail mount on those older implementations, 2051 * file system to fail mount on those older implementations,
1964 * avoiding corruption. -jeffm */ 2052 * avoiding corruption. -jeffm
2053 */
1965 if (bmap_would_wrap(reiserfs_bmap_count(s)) && 2054 if (bmap_would_wrap(reiserfs_bmap_count(s)) &&
1966 sb_bmap_nr(rs) != 0) { 2055 sb_bmap_nr(rs) != 0) {
1967 reiserfs_warning(s, "super-2030", "This file system " 2056 reiserfs_warning(s, "super-2030", "This file system "
@@ -1974,8 +2063,10 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent)
1974 } 2063 }
1975 2064
1976 if (old_format_only(s)) { 2065 if (old_format_only(s)) {
1977 /* filesystem of format 3.5 either with standard or non-standard 2066 /*
1978 journal */ 2067 * filesystem of format 3.5 either with standard
2068 * or non-standard journal
2069 */
1979 if (convert_reiserfs(s)) { 2070 if (convert_reiserfs(s)) {
1980 /* and -o conv is given */ 2071 /* and -o conv is given */
1981 if (!silent) 2072 if (!silent)
@@ -1983,8 +2074,11 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent)
1983 "converting 3.5 filesystem to the 3.6 format"); 2074 "converting 3.5 filesystem to the 3.6 format");
1984 2075
1985 if (is_reiserfs_3_5(rs)) 2076 if (is_reiserfs_3_5(rs))
1986 /* put magic string of 3.6 format. 2.2 will not be able to 2077 /*
1987 mount this filesystem anymore */ 2078 * put magic string of 3.6 format.
2079 * 2.2 will not be able to
2080 * mount this filesystem anymore
2081 */
1988 memcpy(rs->s_v1.s_magic, 2082 memcpy(rs->s_v1.s_magic,
1989 reiserfs_3_6_magic_string, 2083 reiserfs_3_6_magic_string,
1990 sizeof 2084 sizeof
@@ -1992,8 +2086,8 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent)
1992 2086
1993 set_sb_version(rs, REISERFS_VERSION_2); 2087 set_sb_version(rs, REISERFS_VERSION_2);
1994 reiserfs_convert_objectid_map_v1(s); 2088 reiserfs_convert_objectid_map_v1(s);
1995 set_bit(REISERFS_3_6, &(sbi->s_properties)); 2089 set_bit(REISERFS_3_6, &sbi->s_properties);
1996 clear_bit(REISERFS_3_5, &(sbi->s_properties)); 2090 clear_bit(REISERFS_3_5, &sbi->s_properties);
1997 } else if (!silent) { 2091 } else if (!silent) {
1998 reiserfs_info(s, "using 3.5.x disk format\n"); 2092 reiserfs_info(s, "using 3.5.x disk format\n");
1999 } 2093 }
@@ -2001,8 +2095,8 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent)
2001 set_sb_mnt_count(rs, sb_mnt_count(rs) + 1); 2095 set_sb_mnt_count(rs, sb_mnt_count(rs) + 1);
2002 2096
2003 2097
2004 journal_mark_dirty(&th, s, SB_BUFFER_WITH_SB(s)); 2098 journal_mark_dirty(&th, SB_BUFFER_WITH_SB(s));
2005 errval = journal_end(&th, s, 1); 2099 errval = journal_end(&th);
2006 if (errval) { 2100 if (errval) {
2007 dput(s->s_root); 2101 dput(s->s_root);
2008 s->s_root = NULL; 2102 s->s_root = NULL;
@@ -2018,7 +2112,9 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent)
2018 } 2112 }
2019 reiserfs_write_lock(s); 2113 reiserfs_write_lock(s);
2020 2114
2021 /* look for files which were to be removed in previous session */ 2115 /*
2116 * look for files which were to be removed in previous session
2117 */
2022 finish_unfinished(s); 2118 finish_unfinished(s);
2023 } else { 2119 } else {
2024 if (old_format_only(s) && !silent) { 2120 if (old_format_only(s) && !silent) {
@@ -2034,7 +2130,9 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent)
2034 } 2130 }
2035 reiserfs_write_lock(s); 2131 reiserfs_write_lock(s);
2036 } 2132 }
2037 // mark hash in super block: it could be unset. overwrite should be ok 2133 /*
2134 * mark hash in super block: it could be unset. overwrite should be ok
2135 */
2038 set_sb_hash_function_code(rs, function2code(sbi->s_hash_function)); 2136 set_sb_hash_function_code(rs, function2code(sbi->s_hash_function));
2039 2137
2040 handle_attrs(s); 2138 handle_attrs(s);
@@ -2111,9 +2209,7 @@ static int reiserfs_write_dquot(struct dquot *dquot)
2111 depth = reiserfs_write_unlock_nested(dquot->dq_sb); 2209 depth = reiserfs_write_unlock_nested(dquot->dq_sb);
2112 ret = dquot_commit(dquot); 2210 ret = dquot_commit(dquot);
2113 reiserfs_write_lock_nested(dquot->dq_sb, depth); 2211 reiserfs_write_lock_nested(dquot->dq_sb, depth);
2114 err = 2212 err = journal_end(&th);
2115 journal_end(&th, dquot->dq_sb,
2116 REISERFS_QUOTA_TRANS_BLOCKS(dquot->dq_sb));
2117 if (!ret && err) 2213 if (!ret && err)
2118 ret = err; 2214 ret = err;
2119out: 2215out:
@@ -2136,9 +2232,7 @@ static int reiserfs_acquire_dquot(struct dquot *dquot)
2136 depth = reiserfs_write_unlock_nested(dquot->dq_sb); 2232 depth = reiserfs_write_unlock_nested(dquot->dq_sb);
2137 ret = dquot_acquire(dquot); 2233 ret = dquot_acquire(dquot);
2138 reiserfs_write_lock_nested(dquot->dq_sb, depth); 2234 reiserfs_write_lock_nested(dquot->dq_sb, depth);
2139 err = 2235 err = journal_end(&th);
2140 journal_end(&th, dquot->dq_sb,
2141 REISERFS_QUOTA_INIT_BLOCKS(dquot->dq_sb));
2142 if (!ret && err) 2236 if (!ret && err)
2143 ret = err; 2237 ret = err;
2144out: 2238out:
@@ -2163,9 +2257,7 @@ static int reiserfs_release_dquot(struct dquot *dquot)
2163 } 2257 }
2164 ret = dquot_release(dquot); 2258 ret = dquot_release(dquot);
2165 reiserfs_write_lock(dquot->dq_sb); 2259 reiserfs_write_lock(dquot->dq_sb);
2166 err = 2260 err = journal_end(&th);
2167 journal_end(&th, dquot->dq_sb,
2168 REISERFS_QUOTA_DEL_BLOCKS(dquot->dq_sb));
2169 if (!ret && err) 2261 if (!ret && err)
2170 ret = err; 2262 ret = err;
2171 reiserfs_write_unlock(dquot->dq_sb); 2263 reiserfs_write_unlock(dquot->dq_sb);
@@ -2198,7 +2290,7 @@ static int reiserfs_write_info(struct super_block *sb, int type)
2198 depth = reiserfs_write_unlock_nested(sb); 2290 depth = reiserfs_write_unlock_nested(sb);
2199 ret = dquot_commit_info(sb, type); 2291 ret = dquot_commit_info(sb, type);
2200 reiserfs_write_lock_nested(sb, depth); 2292 reiserfs_write_lock_nested(sb, depth);
2201 err = journal_end(&th, sb, 2); 2293 err = journal_end(&th);
2202 if (!ret && err) 2294 if (!ret && err)
2203 ret = err; 2295 ret = err;
2204out: 2296out:
@@ -2238,7 +2330,10 @@ static int reiserfs_quota_on(struct super_block *sb, int type, int format_id,
2238 goto out; 2330 goto out;
2239 } 2331 }
2240 inode = path->dentry->d_inode; 2332 inode = path->dentry->d_inode;
2241 /* We must not pack tails for quota files on reiserfs for quota IO to work */ 2333 /*
2334 * We must not pack tails for quota files on reiserfs for quota
2335 * IO to work
2336 */
2242 if (!(REISERFS_I(inode)->i_flags & i_nopack_mask)) { 2337 if (!(REISERFS_I(inode)->i_flags & i_nopack_mask)) {
2243 err = reiserfs_unpack(inode, NULL); 2338 err = reiserfs_unpack(inode, NULL);
2244 if (err) { 2339 if (err) {
@@ -2268,7 +2363,7 @@ static int reiserfs_quota_on(struct super_block *sb, int type, int format_id,
2268 err = journal_begin(&th, sb, 1); 2363 err = journal_begin(&th, sb, 1);
2269 if (err) 2364 if (err)
2270 goto out; 2365 goto out;
2271 err = journal_end_sync(&th, sb, 1); 2366 err = journal_end_sync(&th);
2272 if (err) 2367 if (err)
2273 goto out; 2368 goto out;
2274 } 2369 }
@@ -2279,10 +2374,12 @@ out:
2279 return err; 2374 return err;
2280} 2375}
2281 2376
2282/* Read data from quotafile - avoid pagecache and such because we cannot afford 2377/*
2378 * Read data from quotafile - avoid pagecache and such because we cannot afford
2283 * acquiring the locks... As quota files are never truncated and quota code 2379 * acquiring the locks... As quota files are never truncated and quota code
2284 * itself serializes the operations (and no one else should touch the files) 2380 * itself serializes the operations (and no one else should touch the files)
2285 * we don't have to be afraid of races */ 2381 * we don't have to be afraid of races
2382 */
2286static ssize_t reiserfs_quota_read(struct super_block *sb, int type, char *data, 2383static ssize_t reiserfs_quota_read(struct super_block *sb, int type, char *data,
2287 size_t len, loff_t off) 2384 size_t len, loff_t off)
2288{ 2385{
@@ -2303,7 +2400,10 @@ static ssize_t reiserfs_quota_read(struct super_block *sb, int type, char *data,
2303 sb->s_blocksize - offset < 2400 sb->s_blocksize - offset <
2304 toread ? sb->s_blocksize - offset : toread; 2401 toread ? sb->s_blocksize - offset : toread;
2305 tmp_bh.b_state = 0; 2402 tmp_bh.b_state = 0;
2306 /* Quota files are without tails so we can safely use this function */ 2403 /*
2404 * Quota files are without tails so we can safely
2405 * use this function
2406 */
2307 reiserfs_write_lock(sb); 2407 reiserfs_write_lock(sb);
2308 err = reiserfs_get_block(inode, blk, &tmp_bh, 0); 2408 err = reiserfs_get_block(inode, blk, &tmp_bh, 0);
2309 reiserfs_write_unlock(sb); 2409 reiserfs_write_unlock(sb);
@@ -2326,8 +2426,10 @@ static ssize_t reiserfs_quota_read(struct super_block *sb, int type, char *data,
2326 return len; 2426 return len;
2327} 2427}
2328 2428
2329/* Write to quotafile (we know the transaction is already started and has 2429/*
2330 * enough credits) */ 2430 * Write to quotafile (we know the transaction is already started and has
2431 * enough credits)
2432 */
2331static ssize_t reiserfs_quota_write(struct super_block *sb, int type, 2433static ssize_t reiserfs_quota_write(struct super_block *sb, int type,
2332 const char *data, size_t len, loff_t off) 2434 const char *data, size_t len, loff_t off)
2333{ 2435{
@@ -2368,7 +2470,7 @@ static ssize_t reiserfs_quota_write(struct super_block *sb, int type,
2368 unlock_buffer(bh); 2470 unlock_buffer(bh);
2369 reiserfs_write_lock(sb); 2471 reiserfs_write_lock(sb);
2370 reiserfs_prepare_for_journal(sb, bh, 1); 2472 reiserfs_prepare_for_journal(sb, bh, 1);
2371 journal_mark_dirty(current->journal_info, sb, bh); 2473 journal_mark_dirty(current->journal_info, bh);
2372 if (!journal_quota) 2474 if (!journal_quota)
2373 reiserfs_add_ordered_list(inode, bh); 2475 reiserfs_add_ordered_list(inode, bh);
2374 reiserfs_write_unlock(sb); 2476 reiserfs_write_unlock(sb);
@@ -2402,18 +2504,18 @@ static int __init init_reiserfs_fs(void)
2402{ 2504{
2403 int ret; 2505 int ret;
2404 2506
2405 if ((ret = init_inodecache())) { 2507 ret = init_inodecache();
2508 if (ret)
2406 return ret; 2509 return ret;
2407 }
2408 2510
2409 reiserfs_proc_info_global_init(); 2511 reiserfs_proc_info_global_init();
2410 2512
2411 ret = register_filesystem(&reiserfs_fs_type); 2513 ret = register_filesystem(&reiserfs_fs_type);
2514 if (ret)
2515 goto out;
2412 2516
2413 if (ret == 0) { 2517 return 0;
2414 return 0; 2518out:
2415 }
2416
2417 reiserfs_proc_info_global_done(); 2519 reiserfs_proc_info_global_done();
2418 destroy_inodecache(); 2520 destroy_inodecache();
2419 2521
diff --git a/fs/reiserfs/tail_conversion.c b/fs/reiserfs/tail_conversion.c
index 5e2624d12f70..f41e19b4bb42 100644
--- a/fs/reiserfs/tail_conversion.c
+++ b/fs/reiserfs/tail_conversion.c
@@ -1,5 +1,6 @@
1/* 1/*
2 * Copyright 1999 Hans Reiser, see reiserfs/README for licensing and copyright details 2 * Copyright 1999 Hans Reiser, see reiserfs/README for licensing and copyright
3 * details
3 */ 4 */
4 5
5#include <linux/time.h> 6#include <linux/time.h>
@@ -7,29 +8,41 @@
7#include <linux/buffer_head.h> 8#include <linux/buffer_head.h>
8#include "reiserfs.h" 9#include "reiserfs.h"
9 10
10/* access to tail : when one is going to read tail it must make sure, that is not running. 11/*
11 direct2indirect and indirect2direct can not run concurrently */ 12 * access to tail : when one is going to read tail it must make sure, that is
13 * not running. direct2indirect and indirect2direct can not run concurrently
14 */
12 15
13/* Converts direct items to an unformatted node. Panics if file has no 16/*
14 tail. -ENOSPC if no disk space for conversion */ 17 * Converts direct items to an unformatted node. Panics if file has no
15/* path points to first direct item of the file regarless of how many of 18 * tail. -ENOSPC if no disk space for conversion
16 them are there */ 19 */
20/*
21 * path points to first direct item of the file regardless of how many of
22 * them are there
23 */
17int direct2indirect(struct reiserfs_transaction_handle *th, struct inode *inode, 24int direct2indirect(struct reiserfs_transaction_handle *th, struct inode *inode,
18 struct treepath *path, struct buffer_head *unbh, 25 struct treepath *path, struct buffer_head *unbh,
19 loff_t tail_offset) 26 loff_t tail_offset)
20{ 27{
21 struct super_block *sb = inode->i_sb; 28 struct super_block *sb = inode->i_sb;
22 struct buffer_head *up_to_date_bh; 29 struct buffer_head *up_to_date_bh;
23 struct item_head *p_le_ih = PATH_PITEM_HEAD(path); 30 struct item_head *p_le_ih = tp_item_head(path);
24 unsigned long total_tail = 0; 31 unsigned long total_tail = 0;
25 struct cpu_key end_key; /* Key to search for the last byte of the 32
26 converted item. */ 33 /* Key to search for the last byte of the converted item. */
27 struct item_head ind_ih; /* new indirect item to be inserted or 34 struct cpu_key end_key;
28 key of unfm pointer to be pasted */ 35
29 int blk_size, retval; /* returned value for reiserfs_insert_item and clones */ 36 /*
30 unp_t unfm_ptr; /* Handle on an unformatted node 37 * new indirect item to be inserted or key
31 that will be inserted in the 38 * of unfm pointer to be pasted
32 tree. */ 39 */
40 struct item_head ind_ih;
41 int blk_size;
42 /* returned value for reiserfs_insert_item and clones */
43 int retval;
44 /* Handle on an unformatted node that will be inserted in the tree. */
45 unp_t unfm_ptr;
33 46
34 BUG_ON(!th->t_trans_id); 47 BUG_ON(!th->t_trans_id);
35 48
@@ -37,8 +50,10 @@ int direct2indirect(struct reiserfs_transaction_handle *th, struct inode *inode,
37 50
38 blk_size = sb->s_blocksize; 51 blk_size = sb->s_blocksize;
39 52
40 /* and key to search for append or insert pointer to the new 53 /*
41 unformatted node. */ 54 * and key to search for append or insert pointer to the new
55 * unformatted node.
56 */
42 copy_item_head(&ind_ih, p_le_ih); 57 copy_item_head(&ind_ih, p_le_ih);
43 set_le_ih_k_offset(&ind_ih, tail_offset); 58 set_le_ih_k_offset(&ind_ih, tail_offset);
44 set_le_ih_k_type(&ind_ih, TYPE_INDIRECT); 59 set_le_ih_k_type(&ind_ih, TYPE_INDIRECT);
@@ -55,7 +70,7 @@ int direct2indirect(struct reiserfs_transaction_handle *th, struct inode *inode,
55 return -EIO; 70 return -EIO;
56 } 71 }
57 72
58 p_le_ih = PATH_PITEM_HEAD(path); 73 p_le_ih = tp_item_head(path);
59 74
60 unfm_ptr = cpu_to_le32(unbh->b_blocknr); 75 unfm_ptr = cpu_to_le32(unbh->b_blocknr);
61 76
@@ -76,36 +91,43 @@ int direct2indirect(struct reiserfs_transaction_handle *th, struct inode *inode,
76 if (retval) { 91 if (retval) {
77 return retval; 92 return retval;
78 } 93 }
79 // note: from here there are two keys which have matching first 94 /*
80 // three key components. They only differ by the fourth one. 95 * note: from here there are two keys which have matching first
96 * three key components. They only differ by the fourth one.
97 */
81 98
82 /* Set the key to search for the direct items of the file */ 99 /* Set the key to search for the direct items of the file */
83 make_cpu_key(&end_key, inode, max_reiserfs_offset(inode), TYPE_DIRECT, 100 make_cpu_key(&end_key, inode, max_reiserfs_offset(inode), TYPE_DIRECT,
84 4); 101 4);
85 102
86 /* Move bytes from the direct items to the new unformatted node 103 /*
87 and delete them. */ 104 * Move bytes from the direct items to the new unformatted node
105 * and delete them.
106 */
88 while (1) { 107 while (1) {
89 int tail_size; 108 int tail_size;
90 109
91 /* end_key.k_offset is set so, that we will always have found 110 /*
92 last item of the file */ 111 * end_key.k_offset is set so, that we will always have found
112 * last item of the file
113 */
93 if (search_for_position_by_key(sb, &end_key, path) == 114 if (search_for_position_by_key(sb, &end_key, path) ==
94 POSITION_FOUND) 115 POSITION_FOUND)
95 reiserfs_panic(sb, "PAP-14050", 116 reiserfs_panic(sb, "PAP-14050",
96 "direct item (%K) not found", &end_key); 117 "direct item (%K) not found", &end_key);
97 p_le_ih = PATH_PITEM_HEAD(path); 118 p_le_ih = tp_item_head(path);
98 RFALSE(!is_direct_le_ih(p_le_ih), 119 RFALSE(!is_direct_le_ih(p_le_ih),
99 "vs-14055: direct item expected(%K), found %h", 120 "vs-14055: direct item expected(%K), found %h",
100 &end_key, p_le_ih); 121 &end_key, p_le_ih);
101 tail_size = (le_ih_k_offset(p_le_ih) & (blk_size - 1)) 122 tail_size = (le_ih_k_offset(p_le_ih) & (blk_size - 1))
102 + ih_item_len(p_le_ih) - 1; 123 + ih_item_len(p_le_ih) - 1;
103 124
104 /* we only send the unbh pointer if the buffer is not up to date. 125 /*
105 ** this avoids overwriting good data from writepage() with old data 126 * we only send the unbh pointer if the buffer is not
106 ** from the disk or buffer cache 127 * up to date. this avoids overwriting good data from
107 ** Special case: unbh->b_page will be NULL if we are coming through 128 * writepage() with old data from the disk or buffer cache
108 ** DIRECT_IO handler here. 129 * Special case: unbh->b_page will be NULL if we are coming
130 * through DIRECT_IO handler here.
109 */ 131 */
110 if (!unbh->b_page || buffer_uptodate(unbh) 132 if (!unbh->b_page || buffer_uptodate(unbh)
111 || PageUptodate(unbh->b_page)) { 133 || PageUptodate(unbh->b_page)) {
@@ -117,13 +139,15 @@ int direct2indirect(struct reiserfs_transaction_handle *th, struct inode *inode,
117 up_to_date_bh); 139 up_to_date_bh);
118 140
119 total_tail += retval; 141 total_tail += retval;
142
143 /* done: file does not have direct items anymore */
120 if (tail_size == retval) 144 if (tail_size == retval)
121 // done: file does not have direct items anymore
122 break; 145 break;
123 146
124 } 147 }
125 /* if we've copied bytes from disk into the page, we need to zero 148 /*
126 ** out the unused part of the block (it was not up to date before) 149 * if we've copied bytes from disk into the page, we need to zero
150 * out the unused part of the block (it was not up to date before)
127 */ 151 */
128 if (up_to_date_bh) { 152 if (up_to_date_bh) {
129 unsigned pgoff = 153 unsigned pgoff =
@@ -146,9 +170,11 @@ void reiserfs_unmap_buffer(struct buffer_head *bh)
146 BUG(); 170 BUG();
147 } 171 }
148 clear_buffer_dirty(bh); 172 clear_buffer_dirty(bh);
149 /* Remove the buffer from whatever list it belongs to. We are mostly 173 /*
150 interested in removing it from per-sb j_dirty_buffers list, to avoid 174 * Remove the buffer from whatever list it belongs to. We are mostly
151 BUG() on attempt to write not mapped buffer */ 175 * interested in removing it from per-sb j_dirty_buffers list, to avoid
176 * BUG() on attempt to write not mapped buffer
177 */
152 if ((!list_empty(&bh->b_assoc_buffers) || bh->b_private) && bh->b_page) { 178 if ((!list_empty(&bh->b_assoc_buffers) || bh->b_private) && bh->b_page) {
153 struct inode *inode = bh->b_page->mapping->host; 179 struct inode *inode = bh->b_page->mapping->host;
154 struct reiserfs_journal *j = SB_JOURNAL(inode->i_sb); 180 struct reiserfs_journal *j = SB_JOURNAL(inode->i_sb);
@@ -164,12 +190,14 @@ void reiserfs_unmap_buffer(struct buffer_head *bh)
164 unlock_buffer(bh); 190 unlock_buffer(bh);
165} 191}
166 192
167/* this first locks inode (neither reads nor sync are permitted), 193/*
168 reads tail through page cache, insert direct item. When direct item 194 * this first locks inode (neither reads nor sync are permitted),
169 inserted successfully inode is left locked. Return value is always 195 * reads tail through page cache, insert direct item. When direct item
170 what we expect from it (number of cut bytes). But when tail remains 196 * inserted successfully inode is left locked. Return value is always
171 in the unformatted node, we set mode to SKIP_BALANCING and unlock 197 * what we expect from it (number of cut bytes). But when tail remains
172 inode */ 198 * in the unformatted node, we set mode to SKIP_BALANCING and unlock
199 * inode
200 */
173int indirect2direct(struct reiserfs_transaction_handle *th, 201int indirect2direct(struct reiserfs_transaction_handle *th,
174 struct inode *inode, struct page *page, 202 struct inode *inode, struct page *page,
175 struct treepath *path, /* path to the indirect item. */ 203 struct treepath *path, /* path to the indirect item. */
@@ -194,7 +222,7 @@ int indirect2direct(struct reiserfs_transaction_handle *th,
194 *mode = M_SKIP_BALANCING; 222 *mode = M_SKIP_BALANCING;
195 223
196 /* store item head path points to. */ 224 /* store item head path points to. */
197 copy_item_head(&s_ih, PATH_PITEM_HEAD(path)); 225 copy_item_head(&s_ih, tp_item_head(path));
198 226
199 tail_len = (n_new_file_size & (block_size - 1)); 227 tail_len = (n_new_file_size & (block_size - 1));
200 if (get_inode_sd_version(inode) == STAT_DATA_V2) 228 if (get_inode_sd_version(inode) == STAT_DATA_V2)
@@ -207,9 +235,11 @@ int indirect2direct(struct reiserfs_transaction_handle *th,
207 1) * sb->s_blocksize; 235 1) * sb->s_blocksize;
208 pos1 = pos; 236 pos1 = pos;
209 237
210 // we are protected by i_mutex. The tail can not disapper, not 238 /*
211 // append can be done either 239 * we are protected by i_mutex. The tail can not disapper, not
212 // we are in truncate or packing tail in file_release 240 * append can be done either
241 * we are in truncate or packing tail in file_release
242 */
213 243
214 tail = (char *)kmap(page); /* this can schedule */ 244 tail = (char *)kmap(page); /* this can schedule */
215 245
@@ -220,7 +250,7 @@ int indirect2direct(struct reiserfs_transaction_handle *th,
220 reiserfs_panic(sb, "PAP-5520", 250 reiserfs_panic(sb, "PAP-5520",
221 "item to be converted %K does not exist", 251 "item to be converted %K does not exist",
222 item_key); 252 item_key);
223 copy_item_head(&s_ih, PATH_PITEM_HEAD(path)); 253 copy_item_head(&s_ih, tp_item_head(path));
224#ifdef CONFIG_REISERFS_CHECK 254#ifdef CONFIG_REISERFS_CHECK
225 pos = le_ih_k_offset(&s_ih) - 1 + 255 pos = le_ih_k_offset(&s_ih) - 1 +
226 (ih_item_len(&s_ih) / UNFM_P_SIZE - 256 (ih_item_len(&s_ih) / UNFM_P_SIZE -
@@ -236,9 +266,10 @@ int indirect2direct(struct reiserfs_transaction_handle *th,
236 pos1 + 1, TYPE_DIRECT, round_tail_len, 266 pos1 + 1, TYPE_DIRECT, round_tail_len,
237 0xffff /*ih_free_space */ ); 267 0xffff /*ih_free_space */ );
238 268
239 /* we want a pointer to the first byte of the tail in the page. 269 /*
240 ** the page was locked and this part of the page was up to date when 270 * we want a pointer to the first byte of the tail in the page.
241 ** indirect2direct was called, so we know the bytes are still valid 271 * the page was locked and this part of the page was up to date when
272 * indirect2direct was called, so we know the bytes are still valid
242 */ 273 */
243 tail = tail + (pos & (PAGE_CACHE_SIZE - 1)); 274 tail = tail + (pos & (PAGE_CACHE_SIZE - 1));
244 275
@@ -250,12 +281,14 @@ int indirect2direct(struct reiserfs_transaction_handle *th,
250 /* Insert tail as new direct item in the tree */ 281 /* Insert tail as new direct item in the tree */
251 if (reiserfs_insert_item(th, path, &key, &s_ih, inode, 282 if (reiserfs_insert_item(th, path, &key, &s_ih, inode,
252 tail ? tail : NULL) < 0) { 283 tail ? tail : NULL) < 0) {
253 /* No disk memory. So we can not convert last unformatted node 284 /*
254 to the direct item. In this case we used to adjust 285 * No disk memory. So we can not convert last unformatted node
255 indirect items's ih_free_space. Now ih_free_space is not 286 * to the direct item. In this case we used to adjust
256 used, it would be ideal to write zeros to corresponding 287 * indirect items's ih_free_space. Now ih_free_space is not
257 unformatted node. For now i_size is considered as guard for 288 * used, it would be ideal to write zeros to corresponding
258 going out of file size */ 289 * unformatted node. For now i_size is considered as guard for
290 * going out of file size
291 */
259 kunmap(page); 292 kunmap(page);
260 return block_size - round_tail_len; 293 return block_size - round_tail_len;
261 } 294 }
@@ -264,12 +297,16 @@ int indirect2direct(struct reiserfs_transaction_handle *th,
264 /* make sure to get the i_blocks changes from reiserfs_insert_item */ 297 /* make sure to get the i_blocks changes from reiserfs_insert_item */
265 reiserfs_update_sd(th, inode); 298 reiserfs_update_sd(th, inode);
266 299
267 // note: we have now the same as in above direct2indirect 300 /*
268 // conversion: there are two keys which have matching first three 301 * note: we have now the same as in above direct2indirect
269 // key components. They only differ by the fouhth one. 302 * conversion: there are two keys which have matching first three
303 * key components. They only differ by the fourth one.
304 */
270 305
271 /* We have inserted new direct item and must remove last 306 /*
272 unformatted node. */ 307 * We have inserted new direct item and must remove last
308 * unformatted node.
309 */
273 *mode = M_CUT; 310 *mode = M_CUT;
274 311
275 /* we store position of first direct item in the in-core inode */ 312 /* we store position of first direct item in the in-core inode */
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index 5cdfbd638b5c..ca416d099e7d 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c
@@ -56,9 +56,11 @@
56#define XAROOT_NAME "xattrs" 56#define XAROOT_NAME "xattrs"
57 57
58 58
59/* Helpers for inode ops. We do this so that we don't have all the VFS 59/*
60 * Helpers for inode ops. We do this so that we don't have all the VFS
60 * overhead and also for proper i_mutex annotation. 61 * overhead and also for proper i_mutex annotation.
61 * dir->i_mutex must be held for all of them. */ 62 * dir->i_mutex must be held for all of them.
63 */
62#ifdef CONFIG_REISERFS_FS_XATTR 64#ifdef CONFIG_REISERFS_FS_XATTR
63static int xattr_create(struct inode *dir, struct dentry *dentry, int mode) 65static int xattr_create(struct inode *dir, struct dentry *dentry, int mode)
64{ 66{
@@ -73,10 +75,12 @@ static int xattr_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
73 return dir->i_op->mkdir(dir, dentry, mode); 75 return dir->i_op->mkdir(dir, dentry, mode);
74} 76}
75 77
76/* We use I_MUTEX_CHILD here to silence lockdep. It's safe because xattr 78/*
79 * We use I_MUTEX_CHILD here to silence lockdep. It's safe because xattr
77 * mutation ops aren't called during rename or splace, which are the 80 * mutation ops aren't called during rename or splace, which are the
78 * only other users of I_MUTEX_CHILD. It violates the ordering, but that's 81 * only other users of I_MUTEX_CHILD. It violates the ordering, but that's
79 * better than allocating another subclass just for this code. */ 82 * better than allocating another subclass just for this code.
83 */
80static int xattr_unlink(struct inode *dir, struct dentry *dentry) 84static int xattr_unlink(struct inode *dir, struct dentry *dentry)
81{ 85{
82 int error; 86 int error;
@@ -166,9 +170,11 @@ static struct dentry *open_xa_dir(const struct inode *inode, int flags)
166 return xadir; 170 return xadir;
167} 171}
168 172
169/* The following are side effects of other operations that aren't explicitly 173/*
174 * The following are side effects of other operations that aren't explicitly
170 * modifying extended attributes. This includes operations such as permissions 175 * modifying extended attributes. This includes operations such as permissions
171 * or ownership changes, object deletions, etc. */ 176 * or ownership changes, object deletions, etc.
177 */
172struct reiserfs_dentry_buf { 178struct reiserfs_dentry_buf {
173 struct dir_context ctx; 179 struct dir_context ctx;
174 struct dentry *xadir; 180 struct dentry *xadir;
@@ -267,11 +273,13 @@ static int reiserfs_for_each_xattr(struct inode *inode,
267 cleanup_dentry_buf(&buf); 273 cleanup_dentry_buf(&buf);
268 274
269 if (!err) { 275 if (!err) {
270 /* We start a transaction here to avoid a ABBA situation 276 /*
277 * We start a transaction here to avoid a ABBA situation
271 * between the xattr root's i_mutex and the journal lock. 278 * between the xattr root's i_mutex and the journal lock.
272 * This doesn't incur much additional overhead since the 279 * This doesn't incur much additional overhead since the
273 * new transaction will just nest inside the 280 * new transaction will just nest inside the
274 * outer transaction. */ 281 * outer transaction.
282 */
275 int blocks = JOURNAL_PER_BALANCE_CNT * 2 + 2 + 283 int blocks = JOURNAL_PER_BALANCE_CNT * 2 + 2 +
276 4 * REISERFS_QUOTA_TRANS_BLOCKS(inode->i_sb); 284 4 * REISERFS_QUOTA_TRANS_BLOCKS(inode->i_sb);
277 struct reiserfs_transaction_handle th; 285 struct reiserfs_transaction_handle th;
@@ -284,7 +292,7 @@ static int reiserfs_for_each_xattr(struct inode *inode,
284 I_MUTEX_XATTR); 292 I_MUTEX_XATTR);
285 err = action(dir, data); 293 err = action(dir, data);
286 reiserfs_write_lock(inode->i_sb); 294 reiserfs_write_lock(inode->i_sb);
287 jerror = journal_end(&th, inode->i_sb, blocks); 295 jerror = journal_end(&th);
288 reiserfs_write_unlock(inode->i_sb); 296 reiserfs_write_unlock(inode->i_sb);
289 mutex_unlock(&dir->d_parent->d_inode->i_mutex); 297 mutex_unlock(&dir->d_parent->d_inode->i_mutex);
290 err = jerror ?: err; 298 err = jerror ?: err;
@@ -349,9 +357,11 @@ int reiserfs_chown_xattrs(struct inode *inode, struct iattr *attrs)
349} 357}
350 358
351#ifdef CONFIG_REISERFS_FS_XATTR 359#ifdef CONFIG_REISERFS_FS_XATTR
352/* Returns a dentry corresponding to a specific extended attribute file 360/*
361 * Returns a dentry corresponding to a specific extended attribute file
353 * for the inode. If flags allow, the file is created. Otherwise, a 362 * for the inode. If flags allow, the file is created. Otherwise, a
354 * valid or negative dentry, or an error is returned. */ 363 * valid or negative dentry, or an error is returned.
364 */
355static struct dentry *xattr_lookup(struct inode *inode, const char *name, 365static struct dentry *xattr_lookup(struct inode *inode, const char *name,
356 int flags) 366 int flags)
357{ 367{
@@ -400,8 +410,10 @@ static struct page *reiserfs_get_page(struct inode *dir, size_t n)
400{ 410{
401 struct address_space *mapping = dir->i_mapping; 411 struct address_space *mapping = dir->i_mapping;
402 struct page *page; 412 struct page *page;
403 /* We can deadlock if we try to free dentries, 413 /*
404 and an unlink/rmdir has just occurred - GFP_NOFS avoids this */ 414 * We can deadlock if we try to free dentries,
415 * and an unlink/rmdir has just occurred - GFP_NOFS avoids this
416 */
405 mapping_set_gfp_mask(mapping, GFP_NOFS); 417 mapping_set_gfp_mask(mapping, GFP_NOFS);
406 page = read_mapping_page(mapping, n >> PAGE_CACHE_SHIFT, NULL); 418 page = read_mapping_page(mapping, n >> PAGE_CACHE_SHIFT, NULL);
407 if (!IS_ERR(page)) { 419 if (!IS_ERR(page)) {
@@ -411,7 +423,7 @@ static struct page *reiserfs_get_page(struct inode *dir, size_t n)
411 } 423 }
412 return page; 424 return page;
413 425
414 fail: 426fail:
415 reiserfs_put_page(page); 427 reiserfs_put_page(page);
416 return ERR_PTR(-EIO); 428 return ERR_PTR(-EIO);
417} 429}
@@ -589,7 +601,7 @@ int reiserfs_xattr_set(struct inode *inode, const char *name,
589 buffer, buffer_size, flags); 601 buffer, buffer_size, flags);
590 602
591 reiserfs_write_lock(inode->i_sb); 603 reiserfs_write_lock(inode->i_sb);
592 error2 = journal_end(&th, inode->i_sb, jbegin_count); 604 error2 = journal_end(&th);
593 reiserfs_write_unlock(inode->i_sb); 605 reiserfs_write_unlock(inode->i_sb);
594 if (error == 0) 606 if (error == 0)
595 error = error2; 607 error = error2;
@@ -615,8 +627,10 @@ reiserfs_xattr_get(struct inode *inode, const char *name, void *buffer,
615 if (name == NULL) 627 if (name == NULL)
616 return -EINVAL; 628 return -EINVAL;
617 629
618 /* We can't have xattrs attached to v1 items since they don't have 630 /*
619 * generation numbers */ 631 * We can't have xattrs attached to v1 items since they don't have
632 * generation numbers
633 */
620 if (get_inode_sd_version(inode) == STAT_DATA_V1) 634 if (get_inode_sd_version(inode) == STAT_DATA_V1)
621 return -EOPNOTSUPP; 635 return -EOPNOTSUPP;
622 636
@@ -913,12 +927,16 @@ static const struct xattr_handler *reiserfs_xattr_handlers[] = {
913 927
914static int xattr_mount_check(struct super_block *s) 928static int xattr_mount_check(struct super_block *s)
915{ 929{
916 /* We need generation numbers to ensure that the oid mapping is correct 930 /*
917 * v3.5 filesystems don't have them. */ 931 * We need generation numbers to ensure that the oid mapping is correct
932 * v3.5 filesystems don't have them.
933 */
918 if (old_format_only(s)) { 934 if (old_format_only(s)) {
919 if (reiserfs_xattrs_optional(s)) { 935 if (reiserfs_xattrs_optional(s)) {
920 /* Old format filesystem, but optional xattrs have 936 /*
921 * been enabled. Error out. */ 937 * Old format filesystem, but optional xattrs have
938 * been enabled. Error out.
939 */
922 reiserfs_warning(s, "jdm-2005", 940 reiserfs_warning(s, "jdm-2005",
923 "xattrs/ACLs not supported " 941 "xattrs/ACLs not supported "
924 "on pre-v3.6 format filesystems. " 942 "on pre-v3.6 format filesystems. "
@@ -972,9 +990,11 @@ int reiserfs_lookup_privroot(struct super_block *s)
972 return err; 990 return err;
973} 991}
974 992
975/* We need to take a copy of the mount flags since things like 993/*
994 * We need to take a copy of the mount flags since things like
976 * MS_RDONLY don't get set until *after* we're called. 995 * MS_RDONLY don't get set until *after* we're called.
977 * mount_flags != mount_options */ 996 * mount_flags != mount_options
997 */
978int reiserfs_xattr_init(struct super_block *s, int mount_flags) 998int reiserfs_xattr_init(struct super_block *s, int mount_flags)
979{ 999{
980 int err = 0; 1000 int err = 0;
@@ -1007,8 +1027,8 @@ int reiserfs_xattr_init(struct super_block *s, int mount_flags)
1007 1027
1008error: 1028error:
1009 if (err) { 1029 if (err) {
1010 clear_bit(REISERFS_XATTRS_USER, &(REISERFS_SB(s)->s_mount_opt)); 1030 clear_bit(REISERFS_XATTRS_USER, &REISERFS_SB(s)->s_mount_opt);
1011 clear_bit(REISERFS_POSIXACL, &(REISERFS_SB(s)->s_mount_opt)); 1031 clear_bit(REISERFS_POSIXACL, &REISERFS_SB(s)->s_mount_opt);
1012 } 1032 }
1013 1033
1014 /* The super_block MS_POSIXACL must mirror the (no)acl mount option. */ 1034 /* The super_block MS_POSIXACL must mirror the (no)acl mount option. */
diff --git a/fs/reiserfs/xattr.h b/fs/reiserfs/xattr.h
index f59626c5d33b..857ec7e3016f 100644
--- a/fs/reiserfs/xattr.h
+++ b/fs/reiserfs/xattr.h
@@ -61,7 +61,8 @@ static inline loff_t reiserfs_xattr_nblocks(struct inode *inode, loff_t size)
61 return ret; 61 return ret;
62} 62}
63 63
64/* We may have to create up to 3 objects: xattr root, xattr dir, xattr file. 64/*
65 * We may have to create up to 3 objects: xattr root, xattr dir, xattr file.
65 * Let's try to be smart about it. 66 * Let's try to be smart about it.
66 * xattr root: We cache it. If it's not cached, we may need to create it. 67 * xattr root: We cache it. If it's not cached, we may need to create it.
67 * xattr dir: If anything has been loaded for this inode, we can set a flag 68 * xattr dir: If anything has been loaded for this inode, we can set a flag
diff --git a/fs/reiserfs/xattr_acl.c b/fs/reiserfs/xattr_acl.c
index a6ce532402dc..44503e293790 100644
--- a/fs/reiserfs/xattr_acl.c
+++ b/fs/reiserfs/xattr_acl.c
@@ -25,8 +25,10 @@ reiserfs_set_acl(struct inode *inode, struct posix_acl *acl, int type)
25 int size = acl ? posix_acl_xattr_size(acl->a_count) : 0; 25 int size = acl ? posix_acl_xattr_size(acl->a_count) : 0;
26 26
27 27
28 /* Pessimism: We can't assume that anything from the xattr root up 28 /*
29 * has been created. */ 29 * Pessimism: We can't assume that anything from the xattr root up
30 * has been created.
31 */
30 32
31 jcreate_blocks = reiserfs_xattr_jcreate_nblocks(inode) + 33 jcreate_blocks = reiserfs_xattr_jcreate_nblocks(inode) +
32 reiserfs_xattr_nblocks(inode, size) * 2; 34 reiserfs_xattr_nblocks(inode, size) * 2;
@@ -37,7 +39,7 @@ reiserfs_set_acl(struct inode *inode, struct posix_acl *acl, int type)
37 if (error == 0) { 39 if (error == 0) {
38 error = __reiserfs_set_acl(&th, inode, type, acl); 40 error = __reiserfs_set_acl(&th, inode, type, acl);
39 reiserfs_write_lock(inode->i_sb); 41 reiserfs_write_lock(inode->i_sb);
40 error2 = journal_end(&th, inode->i_sb, jcreate_blocks); 42 error2 = journal_end(&th);
41 reiserfs_write_unlock(inode->i_sb); 43 reiserfs_write_unlock(inode->i_sb);
42 if (error2) 44 if (error2)
43 error = error2; 45 error = error2;
@@ -111,7 +113,7 @@ static struct posix_acl *reiserfs_posix_acl_from_disk(const void *value, size_t
111 goto fail; 113 goto fail;
112 return acl; 114 return acl;
113 115
114 fail: 116fail:
115 posix_acl_release(acl); 117 posix_acl_release(acl);
116 return ERR_PTR(-EINVAL); 118 return ERR_PTR(-EINVAL);
117} 119}
@@ -164,7 +166,7 @@ static void *reiserfs_posix_acl_to_disk(const struct posix_acl *acl, size_t * si
164 } 166 }
165 return (char *)ext_acl; 167 return (char *)ext_acl;
166 168
167 fail: 169fail:
168 kfree(ext_acl); 170 kfree(ext_acl);
169 return ERR_PTR(-EINVAL); 171 return ERR_PTR(-EINVAL);
170} 172}
@@ -208,8 +210,10 @@ struct posix_acl *reiserfs_get_acl(struct inode *inode, int type)
208 210
209 retval = reiserfs_xattr_get(inode, name, value, size); 211 retval = reiserfs_xattr_get(inode, name, value, size);
210 if (retval == -ENODATA || retval == -ENOSYS) { 212 if (retval == -ENODATA || retval == -ENOSYS) {
211 /* This shouldn't actually happen as it should have 213 /*
212 been caught above.. but just in case */ 214 * This shouldn't actually happen as it should have
215 * been caught above.. but just in case
216 */
213 acl = NULL; 217 acl = NULL;
214 } else if (retval < 0) { 218 } else if (retval < 0) {
215 acl = ERR_PTR(retval); 219 acl = ERR_PTR(retval);
@@ -290,8 +294,10 @@ __reiserfs_set_acl(struct reiserfs_transaction_handle *th, struct inode *inode,
290 return error; 294 return error;
291} 295}
292 296
293/* dir->i_mutex: locked, 297/*
294 * inode is new and not released into the wild yet */ 298 * dir->i_mutex: locked,
299 * inode is new and not released into the wild yet
300 */
295int 301int
296reiserfs_inherit_default_acl(struct reiserfs_transaction_handle *th, 302reiserfs_inherit_default_acl(struct reiserfs_transaction_handle *th,
297 struct inode *dir, struct dentry *dentry, 303 struct inode *dir, struct dentry *dentry,
@@ -304,14 +310,18 @@ reiserfs_inherit_default_acl(struct reiserfs_transaction_handle *th,
304 if (S_ISLNK(inode->i_mode)) 310 if (S_ISLNK(inode->i_mode))
305 return 0; 311 return 0;
306 312
307 /* ACLs can only be used on "new" objects, so if it's an old object 313 /*
308 * there is nothing to inherit from */ 314 * ACLs can only be used on "new" objects, so if it's an old object
315 * there is nothing to inherit from
316 */
309 if (get_inode_sd_version(dir) == STAT_DATA_V1) 317 if (get_inode_sd_version(dir) == STAT_DATA_V1)
310 goto apply_umask; 318 goto apply_umask;
311 319
312 /* Don't apply ACLs to objects in the .reiserfs_priv tree.. This 320 /*
321 * Don't apply ACLs to objects in the .reiserfs_priv tree.. This
313 * would be useless since permissions are ignored, and a pain because 322 * would be useless since permissions are ignored, and a pain because
314 * it introduces locking cycles */ 323 * it introduces locking cycles
324 */
315 if (IS_PRIVATE(dir)) { 325 if (IS_PRIVATE(dir)) {
316 inode->i_flags |= S_PRIVATE; 326 inode->i_flags |= S_PRIVATE;
317 goto apply_umask; 327 goto apply_umask;
@@ -335,7 +345,7 @@ reiserfs_inherit_default_acl(struct reiserfs_transaction_handle *th,
335 345
336 return err; 346 return err;
337 347
338 apply_umask: 348apply_umask:
339 /* no ACL, apply umask */ 349 /* no ACL, apply umask */
340 inode->i_mode &= ~current_umask(); 350 inode->i_mode &= ~current_umask();
341 return err; 351 return err;