aboutsummaryrefslogtreecommitdiffstats
path: root/fs/reiserfs
diff options
context:
space:
mode:
authorJeff Mahoney <jeffm@suse.com>2014-04-23 10:00:36 -0400
committerJan Kara <jack@suse.cz>2014-05-06 16:52:19 -0400
commit098297b27d23ad9d0fc302e3417474d9342c6c14 (patch)
tree58f2054cd9933225ef1ae9c7febedc9160041af6 /fs/reiserfs
parent4cf5f7addf18ecae2ea49b11944976cbd26d5281 (diff)
reiserfs: cleanup, reformat comments to normal kernel style
This patch reformats comments in the reiserfs code to fit in 80 columns and to follow the style rules. There is no functional change but it helps make my eyes bleed less. Signed-off-by: Jeff Mahoney <jeffm@suse.com> Signed-off-by: Jan Kara <jack@suse.cz>
Diffstat (limited to 'fs/reiserfs')
-rw-r--r--fs/reiserfs/bitmap.c237
-rw-r--r--fs/reiserfs/dir.c77
-rw-r--r--fs/reiserfs/do_balan.c276
-rw-r--r--fs/reiserfs/file.c62
-rw-r--r--fs/reiserfs/fix_node.c967
-rw-r--r--fs/reiserfs/hashes.c15
-rw-r--r--fs/reiserfs/ibalance.c247
-rw-r--r--fs/reiserfs/inode.c1063
-rw-r--r--fs/reiserfs/ioctl.c23
-rw-r--r--fs/reiserfs/item_ops.c100
-rw-r--r--fs/reiserfs/journal.c1127
-rw-r--r--fs/reiserfs/lbalance.c349
-rw-r--r--fs/reiserfs/namei.c420
-rw-r--r--fs/reiserfs/objectid.c95
-rw-r--r--fs/reiserfs/prints.c152
-rw-r--r--fs/reiserfs/reiserfs.h1740
-rw-r--r--fs/reiserfs/resize.c63
-rw-r--r--fs/reiserfs/stree.c812
-rw-r--r--fs/reiserfs/super.c366
-rw-r--r--fs/reiserfs/tail_conversion.c151
-rw-r--r--fs/reiserfs/xattr.c60
-rw-r--r--fs/reiserfs/xattr.h3
-rw-r--r--fs/reiserfs/xattr_acl.c30
23 files changed, 5124 insertions, 3311 deletions
diff --git a/fs/reiserfs/bitmap.c b/fs/reiserfs/bitmap.c
index c3de6501a5cb..70daba6fa6a5 100644
--- a/fs/reiserfs/bitmap.c
+++ b/fs/reiserfs/bitmap.c
@@ -50,8 +50,10 @@ static inline void get_bit_address(struct super_block *s,
50 unsigned int *bmap_nr, 50 unsigned int *bmap_nr,
51 unsigned int *offset) 51 unsigned int *offset)
52{ 52{
53 /* It is in the bitmap block number equal to the block 53 /*
54 * number divided by the number of bits in a block. */ 54 * It is in the bitmap block number equal to the block
55 * number divided by the number of bits in a block.
56 */
55 *bmap_nr = block >> (s->s_blocksize_bits + 3); 57 *bmap_nr = block >> (s->s_blocksize_bits + 3);
56 /* Within that bitmap block it is located at bit offset *offset. */ 58 /* Within that bitmap block it is located at bit offset *offset. */
57 *offset = block & ((s->s_blocksize << 3) - 1); 59 *offset = block & ((s->s_blocksize << 3) - 1);
@@ -71,8 +73,10 @@ int is_reusable(struct super_block *s, b_blocknr_t block, int bit_value)
71 73
72 get_bit_address(s, block, &bmap, &offset); 74 get_bit_address(s, block, &bmap, &offset);
73 75
74 /* Old format filesystem? Unlikely, but the bitmaps are all up front so 76 /*
75 * we need to account for it. */ 77 * Old format filesystem? Unlikely, but the bitmaps are all
78 * up front so we need to account for it.
79 */
76 if (unlikely(test_bit(REISERFS_OLD_FORMAT, 80 if (unlikely(test_bit(REISERFS_OLD_FORMAT,
77 &(REISERFS_SB(s)->s_properties)))) { 81 &(REISERFS_SB(s)->s_properties)))) {
78 b_blocknr_t bmap1 = REISERFS_SB(s)->s_sbh->b_blocknr + 1; 82 b_blocknr_t bmap1 = REISERFS_SB(s)->s_sbh->b_blocknr + 1;
@@ -108,8 +112,11 @@ int is_reusable(struct super_block *s, b_blocknr_t block, int bit_value)
108 return 1; 112 return 1;
109} 113}
110 114
111/* searches in journal structures for a given block number (bmap, off). If block 115/*
112 is found in reiserfs journal it suggests next free block candidate to test. */ 116 * Searches in journal structures for a given block number (bmap, off).
117 * If block is found in reiserfs journal it suggests next free block
118 * candidate to test.
119 */
113static inline int is_block_in_journal(struct super_block *s, unsigned int bmap, 120static inline int is_block_in_journal(struct super_block *s, unsigned int bmap,
114 int off, int *next) 121 int off, int *next)
115{ 122{
@@ -120,7 +127,7 @@ static inline int is_block_in_journal(struct super_block *s, unsigned int bmap,
120 *next = tmp; 127 *next = tmp;
121 PROC_INFO_INC(s, scan_bitmap.in_journal_hint); 128 PROC_INFO_INC(s, scan_bitmap.in_journal_hint);
122 } else { 129 } else {
123 (*next) = off + 1; /* inc offset to avoid looping. */ 130 (*next) = off + 1; /* inc offset to avoid looping. */
124 PROC_INFO_INC(s, scan_bitmap.in_journal_nohint); 131 PROC_INFO_INC(s, scan_bitmap.in_journal_nohint);
125 } 132 }
126 PROC_INFO_INC(s, scan_bitmap.retry); 133 PROC_INFO_INC(s, scan_bitmap.retry);
@@ -129,8 +136,10 @@ static inline int is_block_in_journal(struct super_block *s, unsigned int bmap,
129 return 0; 136 return 0;
130} 137}
131 138
132/* it searches for a window of zero bits with given minimum and maximum lengths in one bitmap 139/*
133 * block; */ 140 * Searches for a window of zero bits with given minimum and maximum
141 * lengths in one bitmap block
142 */
134static int scan_bitmap_block(struct reiserfs_transaction_handle *th, 143static int scan_bitmap_block(struct reiserfs_transaction_handle *th,
135 unsigned int bmap_n, int *beg, int boundary, 144 unsigned int bmap_n, int *beg, int boundary,
136 int min, int max, int unfm) 145 int min, int max, int unfm)
@@ -146,10 +155,6 @@ static int scan_bitmap_block(struct reiserfs_transaction_handle *th,
146 RFALSE(bmap_n >= reiserfs_bmap_count(s), "Bitmap %u is out of " 155 RFALSE(bmap_n >= reiserfs_bmap_count(s), "Bitmap %u is out of "
147 "range (0..%u)", bmap_n, reiserfs_bmap_count(s) - 1); 156 "range (0..%u)", bmap_n, reiserfs_bmap_count(s) - 1);
148 PROC_INFO_INC(s, scan_bitmap.bmap); 157 PROC_INFO_INC(s, scan_bitmap.bmap);
149/* this is unclear and lacks comments, explain how journal bitmaps
150 work here for the reader. Convey a sense of the design here. What
151 is a window? */
152/* - I mean `a window of zero bits' as in description of this function - Zam. */
153 158
154 if (!bi) { 159 if (!bi) {
155 reiserfs_error(s, "jdm-4055", "NULL bitmap info pointer " 160 reiserfs_error(s, "jdm-4055", "NULL bitmap info pointer "
@@ -165,15 +170,18 @@ static int scan_bitmap_block(struct reiserfs_transaction_handle *th,
165 cont: 170 cont:
166 if (bi->free_count < min) { 171 if (bi->free_count < min) {
167 brelse(bh); 172 brelse(bh);
168 return 0; // No free blocks in this bitmap 173 return 0; /* No free blocks in this bitmap */
169 } 174 }
170 175
171 /* search for a first zero bit -- beginning of a window */ 176 /* search for a first zero bit -- beginning of a window */
172 *beg = reiserfs_find_next_zero_le_bit 177 *beg = reiserfs_find_next_zero_le_bit
173 ((unsigned long *)(bh->b_data), boundary, *beg); 178 ((unsigned long *)(bh->b_data), boundary, *beg);
174 179
175 if (*beg + min > boundary) { /* search for a zero bit fails or the rest of bitmap block 180 /*
176 * cannot contain a zero window of minimum size */ 181 * search for a zero bit fails or the rest of bitmap block
182 * cannot contain a zero window of minimum size
183 */
184 if (*beg + min > boundary) {
177 brelse(bh); 185 brelse(bh);
178 return 0; 186 return 0;
179 } 187 }
@@ -187,37 +195,63 @@ static int scan_bitmap_block(struct reiserfs_transaction_handle *th,
187 next = end; 195 next = end;
188 break; 196 break;
189 } 197 }
190 /* finding the other end of zero bit window requires looking into journal structures (in 198
191 * case of searching for free blocks for unformatted nodes) */ 199 /*
200 * finding the other end of zero bit window requires
201 * looking into journal structures (in case of
202 * searching for free blocks for unformatted nodes)
203 */
192 if (unfm && is_block_in_journal(s, bmap_n, end, &next)) 204 if (unfm && is_block_in_journal(s, bmap_n, end, &next))
193 break; 205 break;
194 } 206 }
195 207
196 /* now (*beg) points to beginning of zero bits window, 208 /*
197 * (end) points to one bit after the window end */ 209 * now (*beg) points to beginning of zero bits window,
198 if (end - *beg >= min) { /* it seems we have found window of proper size */ 210 * (end) points to one bit after the window end
211 */
212
213 /* found window of proper size */
214 if (end - *beg >= min) {
199 int i; 215 int i;
200 reiserfs_prepare_for_journal(s, bh, 1); 216 reiserfs_prepare_for_journal(s, bh, 1);
201 /* try to set all blocks used checking are they still free */ 217 /*
218 * try to set all blocks used checking are
219 * they still free
220 */
202 for (i = *beg; i < end; i++) { 221 for (i = *beg; i < end; i++) {
203 /* It seems that we should not check in journal again. */ 222 /* Don't check in journal again. */
204 if (reiserfs_test_and_set_le_bit 223 if (reiserfs_test_and_set_le_bit
205 (i, bh->b_data)) { 224 (i, bh->b_data)) {
206 /* bit was set by another process 225 /*
207 * while we slept in prepare_for_journal() */ 226 * bit was set by another process while
227 * we slept in prepare_for_journal()
228 */
208 PROC_INFO_INC(s, scan_bitmap.stolen); 229 PROC_INFO_INC(s, scan_bitmap.stolen);
209 if (i >= *beg + min) { /* we can continue with smaller set of allocated blocks, 230
210 * if length of this set is more or equal to `min' */ 231 /*
232 * we can continue with smaller set
233 * of allocated blocks, if length of
234 * this set is more or equal to `min'
235 */
236 if (i >= *beg + min) {
211 end = i; 237 end = i;
212 break; 238 break;
213 } 239 }
214 /* otherwise we clear all bit were set ... */ 240
241 /*
242 * otherwise we clear all bit
243 * were set ...
244 */
215 while (--i >= *beg) 245 while (--i >= *beg)
216 reiserfs_clear_le_bit 246 reiserfs_clear_le_bit
217 (i, bh->b_data); 247 (i, bh->b_data);
218 reiserfs_restore_prepared_buffer(s, bh); 248 reiserfs_restore_prepared_buffer(s, bh);
219 *beg = org; 249 *beg = org;
220 /* ... and search again in current block from beginning */ 250
251 /*
252 * Search again in current block
253 * from beginning
254 */
221 goto cont; 255 goto cont;
222 } 256 }
223 } 257 }
@@ -268,11 +302,13 @@ static inline int block_group_used(struct super_block *s, u32 id)
268 int bm = bmap_hash_id(s, id); 302 int bm = bmap_hash_id(s, id);
269 struct reiserfs_bitmap_info *info = &SB_AP_BITMAP(s)[bm]; 303 struct reiserfs_bitmap_info *info = &SB_AP_BITMAP(s)[bm];
270 304
271 /* If we don't have cached information on this bitmap block, we're 305 /*
306 * If we don't have cached information on this bitmap block, we're
272 * going to have to load it later anyway. Loading it here allows us 307 * going to have to load it later anyway. Loading it here allows us
273 * to make a better decision. This favors long-term performance gain 308 * to make a better decision. This favors long-term performance gain
274 * with a better on-disk layout vs. a short term gain of skipping the 309 * with a better on-disk layout vs. a short term gain of skipping the
275 * read and potentially having a bad placement. */ 310 * read and potentially having a bad placement.
311 */
276 if (info->free_count == UINT_MAX) { 312 if (info->free_count == UINT_MAX) {
277 struct buffer_head *bh = reiserfs_read_bitmap_block(s, bm); 313 struct buffer_head *bh = reiserfs_read_bitmap_block(s, bm);
278 brelse(bh); 314 brelse(bh);
@@ -305,17 +341,16 @@ __le32 reiserfs_choose_packing(struct inode * dir)
305 return packing; 341 return packing;
306} 342}
307 343
308/* Tries to find contiguous zero bit window (given size) in given region of 344/*
309 * bitmap and place new blocks there. Returns number of allocated blocks. */ 345 * Tries to find contiguous zero bit window (given size) in given region of
346 * bitmap and place new blocks there. Returns number of allocated blocks.
347 */
310static int scan_bitmap(struct reiserfs_transaction_handle *th, 348static int scan_bitmap(struct reiserfs_transaction_handle *th,
311 b_blocknr_t * start, b_blocknr_t finish, 349 b_blocknr_t * start, b_blocknr_t finish,
312 int min, int max, int unfm, sector_t file_block) 350 int min, int max, int unfm, sector_t file_block)
313{ 351{
314 int nr_allocated = 0; 352 int nr_allocated = 0;
315 struct super_block *s = th->t_super; 353 struct super_block *s = th->t_super;
316 /* find every bm and bmap and bmap_nr in this file, and change them all to bitmap_blocknr
317 * - Hans, it is not a block number - Zam. */
318
319 unsigned int bm, off; 354 unsigned int bm, off;
320 unsigned int end_bm, end_off; 355 unsigned int end_bm, end_off;
321 unsigned int off_max = s->s_blocksize << 3; 356 unsigned int off_max = s->s_blocksize << 3;
@@ -323,8 +358,10 @@ static int scan_bitmap(struct reiserfs_transaction_handle *th,
323 BUG_ON(!th->t_trans_id); 358 BUG_ON(!th->t_trans_id);
324 359
325 PROC_INFO_INC(s, scan_bitmap.call); 360 PROC_INFO_INC(s, scan_bitmap.call);
361
362 /* No point in looking for more free blocks */
326 if (SB_FREE_BLOCKS(s) <= 0) 363 if (SB_FREE_BLOCKS(s) <= 0)
327 return 0; // No point in looking for more free blocks 364 return 0;
328 365
329 get_bit_address(s, *start, &bm, &off); 366 get_bit_address(s, *start, &bm, &off);
330 get_bit_address(s, finish, &end_bm, &end_off); 367 get_bit_address(s, finish, &end_bm, &end_off);
@@ -333,7 +370,8 @@ static int scan_bitmap(struct reiserfs_transaction_handle *th,
333 if (end_bm > reiserfs_bmap_count(s)) 370 if (end_bm > reiserfs_bmap_count(s))
334 end_bm = reiserfs_bmap_count(s); 371 end_bm = reiserfs_bmap_count(s);
335 372
336 /* When the bitmap is more than 10% free, anyone can allocate. 373 /*
374 * When the bitmap is more than 10% free, anyone can allocate.
337 * When it's less than 10% free, only files that already use the 375 * When it's less than 10% free, only files that already use the
338 * bitmap are allowed. Once we pass 80% full, this restriction 376 * bitmap are allowed. Once we pass 80% full, this restriction
339 * is lifted. 377 * is lifted.
@@ -532,7 +570,8 @@ int reiserfs_parse_alloc_options(struct super_block *s, char *options)
532{ 570{
533 char *this_char, *value; 571 char *this_char, *value;
534 572
535 REISERFS_SB(s)->s_alloc_options.bits = 0; /* clear default settings */ 573 /* clear default settings */
574 REISERFS_SB(s)->s_alloc_options.bits = 0;
536 575
537 while ((this_char = strsep(&options, ":")) != NULL) { 576 while ((this_char = strsep(&options, ":")) != NULL) {
538 if ((value = strchr(this_char, '=')) != NULL) 577 if ((value = strchr(this_char, '=')) != NULL)
@@ -733,7 +772,7 @@ static inline void new_hashed_relocation(reiserfs_blocknr_hint_t * hint)
733 hash_in = (char *)&hint->key.k_dir_id; 772 hash_in = (char *)&hint->key.k_dir_id;
734 } else { 773 } else {
735 if (!hint->inode) { 774 if (!hint->inode) {
736 //hint->search_start = hint->beg; 775 /*hint->search_start = hint->beg;*/
737 hash_in = (char *)&hint->key.k_dir_id; 776 hash_in = (char *)&hint->key.k_dir_id;
738 } else 777 } else
739 if (TEST_OPTION(displace_based_on_dirid, hint->th->t_super)) 778 if (TEST_OPTION(displace_based_on_dirid, hint->th->t_super))
@@ -786,7 +825,8 @@ static void oid_groups(reiserfs_blocknr_hint_t * hint)
786 825
787 dirid = le32_to_cpu(INODE_PKEY(hint->inode)->k_dir_id); 826 dirid = le32_to_cpu(INODE_PKEY(hint->inode)->k_dir_id);
788 827
789 /* keep the root dir and it's first set of subdirs close to 828 /*
829 * keep the root dir and it's first set of subdirs close to
790 * the start of the disk 830 * the start of the disk
791 */ 831 */
792 if (dirid <= 2) 832 if (dirid <= 2)
@@ -800,7 +840,8 @@ static void oid_groups(reiserfs_blocknr_hint_t * hint)
800 } 840 }
801} 841}
802 842
803/* returns 1 if it finds an indirect item and gets valid hint info 843/*
844 * returns 1 if it finds an indirect item and gets valid hint info
804 * from it, otherwise 0 845 * from it, otherwise 0
805 */ 846 */
806static int get_left_neighbor(reiserfs_blocknr_hint_t * hint) 847static int get_left_neighbor(reiserfs_blocknr_hint_t * hint)
@@ -812,8 +853,11 @@ static int get_left_neighbor(reiserfs_blocknr_hint_t * hint)
812 __le32 *item; 853 __le32 *item;
813 int ret = 0; 854 int ret = 0;
814 855
815 if (!hint->path) /* reiserfs code can call this function w/o pointer to path 856 /*
816 * structure supplied; then we rely on supplied search_start */ 857 * reiserfs code can call this function w/o pointer to path
858 * structure supplied; then we rely on supplied search_start
859 */
860 if (!hint->path)
817 return 0; 861 return 0;
818 862
819 path = hint->path; 863 path = hint->path;
@@ -825,12 +869,13 @@ static int get_left_neighbor(reiserfs_blocknr_hint_t * hint)
825 869
826 hint->search_start = bh->b_blocknr; 870 hint->search_start = bh->b_blocknr;
827 871
872 /*
873 * for indirect item: go to left and look for the first non-hole entry
874 * in the indirect item
875 */
828 if (!hint->formatted_node && is_indirect_le_ih(ih)) { 876 if (!hint->formatted_node && is_indirect_le_ih(ih)) {
829 /* for indirect item: go to left and look for the first non-hole entry
830 in the indirect item */
831 if (pos_in_item == I_UNFM_NUM(ih)) 877 if (pos_in_item == I_UNFM_NUM(ih))
832 pos_in_item--; 878 pos_in_item--;
833// pos_in_item = I_UNFM_NUM (ih) - 1;
834 while (pos_in_item >= 0) { 879 while (pos_in_item >= 0) {
835 int t = get_block_num(item, pos_in_item); 880 int t = get_block_num(item, pos_in_item);
836 if (t) { 881 if (t) {
@@ -846,10 +891,12 @@ static int get_left_neighbor(reiserfs_blocknr_hint_t * hint)
846 return ret; 891 return ret;
847} 892}
848 893
849/* should be, if formatted node, then try to put on first part of the device 894/*
850 specified as number of percent with mount option device, else try to put 895 * should be, if formatted node, then try to put on first part of the device
851 on last of device. This is not to say it is good code to do so, 896 * specified as number of percent with mount option device, else try to put
852 but the effect should be measured. */ 897 * on last of device. This is not to say it is good code to do so,
898 * but the effect should be measured.
899 */
853static inline void set_border_in_hint(struct super_block *s, 900static inline void set_border_in_hint(struct super_block *s,
854 reiserfs_blocknr_hint_t * hint) 901 reiserfs_blocknr_hint_t * hint)
855{ 902{
@@ -975,21 +1022,27 @@ static void determine_search_start(reiserfs_blocknr_hint_t * hint,
975 set_border_in_hint(s, hint); 1022 set_border_in_hint(s, hint);
976 1023
977#ifdef DISPLACE_NEW_PACKING_LOCALITIES 1024#ifdef DISPLACE_NEW_PACKING_LOCALITIES
978 /* whenever we create a new directory, we displace it. At first we will 1025 /*
979 hash for location, later we might look for a moderately empty place for 1026 * whenever we create a new directory, we displace it. At first
980 it */ 1027 * we will hash for location, later we might look for a moderately
1028 * empty place for it
1029 */
981 if (displacing_new_packing_localities(s) 1030 if (displacing_new_packing_localities(s)
982 && hint->th->displace_new_blocks) { 1031 && hint->th->displace_new_blocks) {
983 displace_new_packing_locality(hint); 1032 displace_new_packing_locality(hint);
984 1033
985 /* we do not continue determine_search_start, 1034 /*
986 * if new packing locality is being displaced */ 1035 * we do not continue determine_search_start,
1036 * if new packing locality is being displaced
1037 */
987 return; 1038 return;
988 } 1039 }
989#endif 1040#endif
990 1041
991 /* all persons should feel encouraged to add more special cases here and 1042 /*
992 * test them */ 1043 * all persons should feel encouraged to add more special cases
1044 * here and test them
1045 */
993 1046
994 if (displacing_large_files(s) && !hint->formatted_node 1047 if (displacing_large_files(s) && !hint->formatted_node
995 && this_blocknr_allocation_would_make_it_a_large_file(hint)) { 1048 && this_blocknr_allocation_would_make_it_a_large_file(hint)) {
@@ -997,8 +1050,10 @@ static void determine_search_start(reiserfs_blocknr_hint_t * hint,
997 return; 1050 return;
998 } 1051 }
999 1052
1000 /* if none of our special cases is relevant, use the left neighbor in the 1053 /*
1001 tree order of the new node we are allocating for */ 1054 * if none of our special cases is relevant, use the left
1055 * neighbor in the tree order of the new node we are allocating for
1056 */
1002 if (hint->formatted_node && TEST_OPTION(hashed_formatted_nodes, s)) { 1057 if (hint->formatted_node && TEST_OPTION(hashed_formatted_nodes, s)) {
1003 hash_formatted_node(hint); 1058 hash_formatted_node(hint);
1004 return; 1059 return;
@@ -1006,10 +1061,13 @@ static void determine_search_start(reiserfs_blocknr_hint_t * hint,
1006 1061
1007 unfm_hint = get_left_neighbor(hint); 1062 unfm_hint = get_left_neighbor(hint);
1008 1063
1009 /* Mimic old block allocator behaviour, that is if VFS allowed for preallocation, 1064 /*
1010 new blocks are displaced based on directory ID. Also, if suggested search_start 1065 * Mimic old block allocator behaviour, that is if VFS allowed for
1011 is less than last preallocated block, we start searching from it, assuming that 1066 * preallocation, new blocks are displaced based on directory ID.
1012 HDD dataflow is faster in forward direction */ 1067 * Also, if suggested search_start is less than last preallocated
1068 * block, we start searching from it, assuming that HDD dataflow
1069 * is faster in forward direction
1070 */
1013 if (TEST_OPTION(old_way, s)) { 1071 if (TEST_OPTION(old_way, s)) {
1014 if (!hint->formatted_node) { 1072 if (!hint->formatted_node) {
1015 if (!reiserfs_hashed_relocation(s)) 1073 if (!reiserfs_hashed_relocation(s))
@@ -1038,11 +1096,13 @@ static void determine_search_start(reiserfs_blocknr_hint_t * hint,
1038 TEST_OPTION(old_hashed_relocation, s)) { 1096 TEST_OPTION(old_hashed_relocation, s)) {
1039 old_hashed_relocation(hint); 1097 old_hashed_relocation(hint);
1040 } 1098 }
1099
1041 /* new_hashed_relocation works with both formatted/unformatted nodes */ 1100 /* new_hashed_relocation works with both formatted/unformatted nodes */
1042 if ((!unfm_hint || hint->formatted_node) && 1101 if ((!unfm_hint || hint->formatted_node) &&
1043 TEST_OPTION(new_hashed_relocation, s)) { 1102 TEST_OPTION(new_hashed_relocation, s)) {
1044 new_hashed_relocation(hint); 1103 new_hashed_relocation(hint);
1045 } 1104 }
1105
1046 /* dirid grouping works only on unformatted nodes */ 1106 /* dirid grouping works only on unformatted nodes */
1047 if (!unfm_hint && !hint->formatted_node && TEST_OPTION(dirid_groups, s)) { 1107 if (!unfm_hint && !hint->formatted_node && TEST_OPTION(dirid_groups, s)) {
1048 dirid_groups(hint); 1108 dirid_groups(hint);
@@ -1080,8 +1140,6 @@ static int determine_prealloc_size(reiserfs_blocknr_hint_t * hint)
1080 return CARRY_ON; 1140 return CARRY_ON;
1081} 1141}
1082 1142
1083/* XXX I know it could be merged with upper-level function;
1084 but may be result function would be too complex. */
1085static inline int allocate_without_wrapping_disk(reiserfs_blocknr_hint_t * hint, 1143static inline int allocate_without_wrapping_disk(reiserfs_blocknr_hint_t * hint,
1086 b_blocknr_t * new_blocknrs, 1144 b_blocknr_t * new_blocknrs,
1087 b_blocknr_t start, 1145 b_blocknr_t start,
@@ -1109,7 +1167,10 @@ static inline int allocate_without_wrapping_disk(reiserfs_blocknr_hint_t * hint,
1109 1167
1110 /* do we have something to fill prealloc. array also ? */ 1168 /* do we have something to fill prealloc. array also ? */
1111 if (nr_allocated > 0) { 1169 if (nr_allocated > 0) {
1112 /* it means prealloc_size was greater that 0 and we do preallocation */ 1170 /*
1171 * it means prealloc_size was greater that 0 and
1172 * we do preallocation
1173 */
1113 list_add(&REISERFS_I(hint->inode)->i_prealloc_list, 1174 list_add(&REISERFS_I(hint->inode)->i_prealloc_list,
1114 &SB_JOURNAL(hint->th->t_super)-> 1175 &SB_JOURNAL(hint->th->t_super)->
1115 j_prealloc_list); 1176 j_prealloc_list);
@@ -1177,7 +1238,8 @@ static inline int blocknrs_and_prealloc_arrays_from_search_start
1177 start = 0; 1238 start = 0;
1178 finish = hint->beg; 1239 finish = hint->beg;
1179 break; 1240 break;
1180 default: /* We've tried searching everywhere, not enough space */ 1241 default:
1242 /* We've tried searching everywhere, not enough space */
1181 /* Free the blocks */ 1243 /* Free the blocks */
1182 if (!hint->formatted_node) { 1244 if (!hint->formatted_node) {
1183#ifdef REISERQUOTA_DEBUG 1245#ifdef REISERQUOTA_DEBUG
@@ -1262,8 +1324,11 @@ static int use_preallocated_list_if_available(reiserfs_blocknr_hint_t * hint,
1262 return amount_needed; 1324 return amount_needed;
1263} 1325}
1264 1326
1265int reiserfs_allocate_blocknrs(reiserfs_blocknr_hint_t * hint, b_blocknr_t * new_blocknrs, int amount_needed, int reserved_by_us /* Amount of blocks we have 1327int reiserfs_allocate_blocknrs(reiserfs_blocknr_hint_t *hint,
1266 already reserved */ ) 1328 b_blocknr_t *new_blocknrs,
1329 int amount_needed,
1330 /* Amount of blocks we have already reserved */
1331 int reserved_by_us)
1267{ 1332{
1268 int initial_amount_needed = amount_needed; 1333 int initial_amount_needed = amount_needed;
1269 int ret; 1334 int ret;
@@ -1275,15 +1340,21 @@ int reiserfs_allocate_blocknrs(reiserfs_blocknr_hint_t * hint, b_blocknr_t * new
1275 return NO_DISK_SPACE; 1340 return NO_DISK_SPACE;
1276 /* should this be if !hint->inode && hint->preallocate? */ 1341 /* should this be if !hint->inode && hint->preallocate? */
1277 /* do you mean hint->formatted_node can be removed ? - Zam */ 1342 /* do you mean hint->formatted_node can be removed ? - Zam */
1278 /* hint->formatted_node cannot be removed because we try to access 1343 /*
1279 inode information here, and there is often no inode assotiated with 1344 * hint->formatted_node cannot be removed because we try to access
1280 metadata allocations - green */ 1345 * inode information here, and there is often no inode associated with
1346 * metadata allocations - green
1347 */
1281 1348
1282 if (!hint->formatted_node && hint->preallocate) { 1349 if (!hint->formatted_node && hint->preallocate) {
1283 amount_needed = use_preallocated_list_if_available 1350 amount_needed = use_preallocated_list_if_available
1284 (hint, new_blocknrs, amount_needed); 1351 (hint, new_blocknrs, amount_needed);
1285 if (amount_needed == 0) /* all blocknrs we need we got from 1352
1286 prealloc. list */ 1353 /*
1354 * We have all the block numbers we need from the
1355 * prealloc list
1356 */
1357 if (amount_needed == 0)
1287 return CARRY_ON; 1358 return CARRY_ON;
1288 new_blocknrs += (initial_amount_needed - amount_needed); 1359 new_blocknrs += (initial_amount_needed - amount_needed);
1289 } 1360 }
@@ -1297,10 +1368,12 @@ int reiserfs_allocate_blocknrs(reiserfs_blocknr_hint_t * hint, b_blocknr_t * new
1297 ret = blocknrs_and_prealloc_arrays_from_search_start 1368 ret = blocknrs_and_prealloc_arrays_from_search_start
1298 (hint, new_blocknrs, amount_needed); 1369 (hint, new_blocknrs, amount_needed);
1299 1370
1300 /* we used prealloc. list to fill (partially) new_blocknrs array. If final allocation fails we 1371 /*
1301 * need to return blocks back to prealloc. list or just free them. -- Zam (I chose second 1372 * We used prealloc. list to fill (partially) new_blocknrs array.
1302 * variant) */ 1373 * If final allocation fails we need to return blocks back to
1303 1374 * prealloc. list or just free them. -- Zam (I chose second
1375 * variant)
1376 */
1304 if (ret != CARRY_ON) { 1377 if (ret != CARRY_ON) {
1305 while (amount_needed++ < initial_amount_needed) { 1378 while (amount_needed++ < initial_amount_needed) {
1306 reiserfs_free_block(hint->th, hint->inode, 1379 reiserfs_free_block(hint->th, hint->inode,
@@ -1339,8 +1412,10 @@ struct buffer_head *reiserfs_read_bitmap_block(struct super_block *sb,
1339 struct reiserfs_bitmap_info *info = SB_AP_BITMAP(sb) + bitmap; 1412 struct reiserfs_bitmap_info *info = SB_AP_BITMAP(sb) + bitmap;
1340 struct buffer_head *bh; 1413 struct buffer_head *bh;
1341 1414
1342 /* Way old format filesystems had the bitmaps packed up front. 1415 /*
1343 * I doubt there are any of these left, but just in case... */ 1416 * Way old format filesystems had the bitmaps packed up front.
1417 * I doubt there are any of these left, but just in case...
1418 */
1344 if (unlikely(test_bit(REISERFS_OLD_FORMAT, 1419 if (unlikely(test_bit(REISERFS_OLD_FORMAT,
1345 &(REISERFS_SB(sb)->s_properties)))) 1420 &(REISERFS_SB(sb)->s_properties))))
1346 block = REISERFS_SB(sb)->s_sbh->b_blocknr + 1 + bitmap; 1421 block = REISERFS_SB(sb)->s_sbh->b_blocknr + 1 + bitmap;
diff --git a/fs/reiserfs/dir.c b/fs/reiserfs/dir.c
index 1fe5cdeb5862..8d51f28d6345 100644
--- a/fs/reiserfs/dir.c
+++ b/fs/reiserfs/dir.c
@@ -59,7 +59,10 @@ static inline bool is_privroot_deh(struct inode *dir, struct reiserfs_de_head *d
59 59
60int reiserfs_readdir_inode(struct inode *inode, struct dir_context *ctx) 60int reiserfs_readdir_inode(struct inode *inode, struct dir_context *ctx)
61{ 61{
62 struct cpu_key pos_key; /* key of current position in the directory (key of directory entry) */ 62
63 /* key of current position in the directory (key of directory entry) */
64 struct cpu_key pos_key;
65
63 INITIALIZE_PATH(path_to_entry); 66 INITIALIZE_PATH(path_to_entry);
64 struct buffer_head *bh; 67 struct buffer_head *bh;
65 int item_num, entry_num; 68 int item_num, entry_num;
@@ -77,21 +80,28 @@ int reiserfs_readdir_inode(struct inode *inode, struct dir_context *ctx)
77 80
78 reiserfs_check_lock_depth(inode->i_sb, "readdir"); 81 reiserfs_check_lock_depth(inode->i_sb, "readdir");
79 82
80 /* form key for search the next directory entry using f_pos field of 83 /*
81 file structure */ 84 * form key for search the next directory entry using
85 * f_pos field of file structure
86 */
82 make_cpu_key(&pos_key, inode, ctx->pos ?: DOT_OFFSET, TYPE_DIRENTRY, 3); 87 make_cpu_key(&pos_key, inode, ctx->pos ?: DOT_OFFSET, TYPE_DIRENTRY, 3);
83 next_pos = cpu_key_k_offset(&pos_key); 88 next_pos = cpu_key_k_offset(&pos_key);
84 89
85 path_to_entry.reada = PATH_READA; 90 path_to_entry.reada = PATH_READA;
86 while (1) { 91 while (1) {
87 research: 92 research:
88 /* search the directory item, containing entry with specified key */ 93 /*
94 * search the directory item, containing entry with
95 * specified key
96 */
89 search_res = 97 search_res =
90 search_by_entry_key(inode->i_sb, &pos_key, &path_to_entry, 98 search_by_entry_key(inode->i_sb, &pos_key, &path_to_entry,
91 &de); 99 &de);
92 if (search_res == IO_ERROR) { 100 if (search_res == IO_ERROR) {
93 // FIXME: we could just skip part of directory which could 101 /*
94 // not be read 102 * FIXME: we could just skip part of directory
103 * which could not be read
104 */
95 ret = -EIO; 105 ret = -EIO;
96 goto out; 106 goto out;
97 } 107 }
@@ -109,14 +119,20 @@ int reiserfs_readdir_inode(struct inode *inode, struct dir_context *ctx)
109 "vs-9005 item_num == %d, item amount == %d", 119 "vs-9005 item_num == %d, item amount == %d",
110 item_num, B_NR_ITEMS(bh)); 120 item_num, B_NR_ITEMS(bh));
111 121
112 /* and entry must be not more than number of entries in the item */ 122 /*
123 * and entry must be not more than number of entries
124 * in the item
125 */
113 RFALSE(ih_entry_count(ih) < entry_num, 126 RFALSE(ih_entry_count(ih) < entry_num,
114 "vs-9010: entry number is too big %d (%d)", 127 "vs-9010: entry number is too big %d (%d)",
115 entry_num, ih_entry_count(ih)); 128 entry_num, ih_entry_count(ih));
116 129
130 /*
131 * go through all entries in the directory item beginning
132 * from the entry, that has been found
133 */
117 if (search_res == POSITION_FOUND 134 if (search_res == POSITION_FOUND
118 || entry_num < ih_entry_count(ih)) { 135 || entry_num < ih_entry_count(ih)) {
119 /* go through all entries in the directory item beginning from the entry, that has been found */
120 struct reiserfs_de_head *deh = 136 struct reiserfs_de_head *deh =
121 B_I_DEH(bh, ih) + entry_num; 137 B_I_DEH(bh, ih) + entry_num;
122 138
@@ -127,16 +143,18 @@ int reiserfs_readdir_inode(struct inode *inode, struct dir_context *ctx)
127 ino_t d_ino; 143 ino_t d_ino;
128 loff_t cur_pos = deh_offset(deh); 144 loff_t cur_pos = deh_offset(deh);
129 145
146 /* it is hidden entry */
130 if (!de_visible(deh)) 147 if (!de_visible(deh))
131 /* it is hidden entry */
132 continue; 148 continue;
133 d_reclen = entry_length(bh, ih, entry_num); 149 d_reclen = entry_length(bh, ih, entry_num);
134 d_name = B_I_DEH_ENTRY_FILE_NAME(bh, ih, deh); 150 d_name = B_I_DEH_ENTRY_FILE_NAME(bh, ih, deh);
135 151
136 if (d_reclen <= 0 || 152 if (d_reclen <= 0 ||
137 d_name + d_reclen > bh->b_data + bh->b_size) { 153 d_name + d_reclen > bh->b_data + bh->b_size) {
138 /* There is corrupted data in entry, 154 /*
139 * We'd better stop here */ 155 * There is corrupted data in entry,
156 * We'd better stop here
157 */
140 pathrelse(&path_to_entry); 158 pathrelse(&path_to_entry);
141 ret = -EIO; 159 ret = -EIO;
142 goto out; 160 goto out;
@@ -145,10 +163,10 @@ int reiserfs_readdir_inode(struct inode *inode, struct dir_context *ctx)
145 if (!d_name[d_reclen - 1]) 163 if (!d_name[d_reclen - 1])
146 d_reclen = strlen(d_name); 164 d_reclen = strlen(d_name);
147 165
166 /* too big to send back to VFS */
148 if (d_reclen > 167 if (d_reclen >
149 REISERFS_MAX_NAME(inode->i_sb-> 168 REISERFS_MAX_NAME(inode->i_sb->
150 s_blocksize)) { 169 s_blocksize)) {
151 /* too big to send back to VFS */
152 continue; 170 continue;
153 } 171 }
154 172
@@ -173,10 +191,14 @@ int reiserfs_readdir_inode(struct inode *inode, struct dir_context *ctx)
173 goto research; 191 goto research;
174 } 192 }
175 } 193 }
176 // Note, that we copy name to user space via temporary 194
177 // buffer (local_buf) because filldir will block if 195 /*
178 // user space buffer is swapped out. At that time 196 * Note, that we copy name to user space via
179 // entry can move to somewhere else 197 * temporary buffer (local_buf) because
198 * filldir will block if user space buffer is
199 * swapped out. At that time entry can move to
200 * somewhere else
201 */
180 memcpy(local_buf, d_name, d_reclen); 202 memcpy(local_buf, d_name, d_reclen);
181 203
182 /* 204 /*
@@ -209,22 +231,26 @@ int reiserfs_readdir_inode(struct inode *inode, struct dir_context *ctx)
209 } /* for */ 231 } /* for */
210 } 232 }
211 233
234 /* end of directory has been reached */
212 if (item_num != B_NR_ITEMS(bh) - 1) 235 if (item_num != B_NR_ITEMS(bh) - 1)
213 // end of directory has been reached
214 goto end; 236 goto end;
215 237
216 /* item we went through is last item of node. Using right 238 /*
217 delimiting key check is it directory end */ 239 * item we went through is last item of node. Using right
240 * delimiting key check is it directory end
241 */
218 rkey = get_rkey(&path_to_entry, inode->i_sb); 242 rkey = get_rkey(&path_to_entry, inode->i_sb);
219 if (!comp_le_keys(rkey, &MIN_KEY)) { 243 if (!comp_le_keys(rkey, &MIN_KEY)) {
220 /* set pos_key to key, that is the smallest and greater 244 /*
221 that key of the last entry in the item */ 245 * set pos_key to key, that is the smallest and greater
246 * that key of the last entry in the item
247 */
222 set_cpu_key_k_offset(&pos_key, next_pos); 248 set_cpu_key_k_offset(&pos_key, next_pos);
223 continue; 249 continue;
224 } 250 }
225 251
252 /* end of directory has been reached */
226 if (COMP_SHORT_KEYS(rkey, &pos_key)) { 253 if (COMP_SHORT_KEYS(rkey, &pos_key)) {
227 // end of directory has been reached
228 goto end; 254 goto end;
229 } 255 }
230 256
@@ -248,9 +274,10 @@ static int reiserfs_readdir(struct file *file, struct dir_context *ctx)
248 return reiserfs_readdir_inode(file_inode(file), ctx); 274 return reiserfs_readdir_inode(file_inode(file), ctx);
249} 275}
250 276
251/* compose directory item containing "." and ".." entries (entries are 277/*
252 not aligned to 4 byte boundary) */ 278 * compose directory item containing "." and ".." entries (entries are
253/* the last four params are LE */ 279 * not aligned to 4 byte boundary)
280 */
254void make_empty_dir_item_v1(char *body, __le32 dirid, __le32 objid, 281void make_empty_dir_item_v1(char *body, __le32 dirid, __le32 objid,
255 __le32 par_dirid, __le32 par_objid) 282 __le32 par_dirid, __le32 par_objid)
256{ 283{
diff --git a/fs/reiserfs/do_balan.c b/fs/reiserfs/do_balan.c
index 80b2b1b37169..399b2009b677 100644
--- a/fs/reiserfs/do_balan.c
+++ b/fs/reiserfs/do_balan.c
@@ -2,18 +2,13 @@
2 * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README 2 * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README
3 */ 3 */
4 4
5/* Now we have all buffers that must be used in balancing of the tree */ 5/*
6/* Further calculations can not cause schedule(), and thus the buffer */ 6 * Now we have all buffers that must be used in balancing of the tree
7/* tree will be stable until the balancing will be finished */ 7 * Further calculations can not cause schedule(), and thus the buffer
8/* balance the tree according to the analysis made before, */ 8 * tree will be stable until the balancing will be finished
9/* and using buffers obtained after all above. */ 9 * balance the tree according to the analysis made before,
10 10 * and using buffers obtained after all above.
11/** 11 */
12 ** balance_leaf_when_delete
13 ** balance_leaf
14 ** do_balance
15 **
16 **/
17 12
18#include <asm/uaccess.h> 13#include <asm/uaccess.h>
19#include <linux/time.h> 14#include <linux/time.h>
@@ -68,35 +63,39 @@ inline void do_balance_mark_leaf_dirty(struct tree_balance *tb,
68#define do_balance_mark_internal_dirty do_balance_mark_leaf_dirty 63#define do_balance_mark_internal_dirty do_balance_mark_leaf_dirty
69#define do_balance_mark_sb_dirty do_balance_mark_leaf_dirty 64#define do_balance_mark_sb_dirty do_balance_mark_leaf_dirty
70 65
71/* summary: 66/*
72 if deleting something ( tb->insert_size[0] < 0 ) 67 * summary:
73 return(balance_leaf_when_delete()); (flag d handled here) 68 * if deleting something ( tb->insert_size[0] < 0 )
74 else 69 * return(balance_leaf_when_delete()); (flag d handled here)
75 if lnum is larger than 0 we put items into the left node 70 * else
76 if rnum is larger than 0 we put items into the right node 71 * if lnum is larger than 0 we put items into the left node
77 if snum1 is larger than 0 we put items into the new node s1 72 * if rnum is larger than 0 we put items into the right node
78 if snum2 is larger than 0 we put items into the new node s2 73 * if snum1 is larger than 0 we put items into the new node s1
79Note that all *num* count new items being created. 74 * if snum2 is larger than 0 we put items into the new node s2
80 75 * Note that all *num* count new items being created.
81It would be easier to read balance_leaf() if each of these summary 76 *
82lines was a separate procedure rather than being inlined. I think 77 * It would be easier to read balance_leaf() if each of these summary
83that there are many passages here and in balance_leaf_when_delete() in 78 * lines was a separate procedure rather than being inlined. I think
84which two calls to one procedure can replace two passages, and it 79 * that there are many passages here and in balance_leaf_when_delete() in
85might save cache space and improve software maintenance costs to do so. 80 * which two calls to one procedure can replace two passages, and it
86 81 * might save cache space and improve software maintenance costs to do so.
87Vladimir made the perceptive comment that we should offload most of 82 *
88the decision making in this function into fix_nodes/check_balance, and 83 * Vladimir made the perceptive comment that we should offload most of
89then create some sort of structure in tb that says what actions should 84 * the decision making in this function into fix_nodes/check_balance, and
90be performed by do_balance. 85 * then create some sort of structure in tb that says what actions should
91 86 * be performed by do_balance.
92-Hans */ 87 *
93 88 * -Hans
94/* Balance leaf node in case of delete or cut: insert_size[0] < 0 89 */
90
91/*
92 * Balance leaf node in case of delete or cut: insert_size[0] < 0
95 * 93 *
96 * lnum, rnum can have values >= -1 94 * lnum, rnum can have values >= -1
97 * -1 means that the neighbor must be joined with S 95 * -1 means that the neighbor must be joined with S
98 * 0 means that nothing should be done with the neighbor 96 * 0 means that nothing should be done with the neighbor
99 * >0 means to shift entirely or partly the specified number of items to the neighbor 97 * >0 means to shift entirely or partly the specified number of items
98 * to the neighbor
100 */ 99 */
101static int balance_leaf_when_delete(struct tree_balance *tb, int flag) 100static int balance_leaf_when_delete(struct tree_balance *tb, int flag)
102{ 101{
@@ -149,8 +148,16 @@ static int balance_leaf_when_delete(struct tree_balance *tb, int flag)
149 case M_CUT:{ /* cut item in S[0] */ 148 case M_CUT:{ /* cut item in S[0] */
150 if (is_direntry_le_ih(ih)) { 149 if (is_direntry_le_ih(ih)) {
151 150
152 /* UFS unlink semantics are such that you can only delete one directory entry at a time. */ 151 /*
153 /* when we cut a directory tb->insert_size[0] means number of entries to be cut (always 1) */ 152 * UFS unlink semantics are such that you
153 * can only delete one directory entry at
154 * a time.
155 */
156
157 /*
158 * when we cut a directory tb->insert_size[0]
159 * means number of entries to be cut (always 1)
160 */
154 tb->insert_size[0] = -1; 161 tb->insert_size[0] = -1;
155 leaf_cut_from_buffer(&bi, item_pos, pos_in_item, 162 leaf_cut_from_buffer(&bi, item_pos, pos_in_item,
156 -tb->insert_size[0]); 163 -tb->insert_size[0]);
@@ -183,13 +190,22 @@ static int balance_leaf_when_delete(struct tree_balance *tb, int flag)
183 "UNKNOWN"), flag); 190 "UNKNOWN"), flag);
184 } 191 }
185 192
186 /* the rule is that no shifting occurs unless by shifting a node can be freed */ 193 /*
194 * the rule is that no shifting occurs unless by shifting
195 * a node can be freed
196 */
187 n = B_NR_ITEMS(tbS0); 197 n = B_NR_ITEMS(tbS0);
188 if (tb->lnum[0]) { /* L[0] takes part in balancing */ 198 /* L[0] takes part in balancing */
189 if (tb->lnum[0] == -1) { /* L[0] must be joined with S[0] */ 199 if (tb->lnum[0]) {
190 if (tb->rnum[0] == -1) { /* R[0] must be also joined with S[0] */ 200 /* L[0] must be joined with S[0] */
201 if (tb->lnum[0] == -1) {
202 /* R[0] must be also joined with S[0] */
203 if (tb->rnum[0] == -1) {
191 if (tb->FR[0] == PATH_H_PPARENT(tb->tb_path, 0)) { 204 if (tb->FR[0] == PATH_H_PPARENT(tb->tb_path, 0)) {
192 /* all contents of all the 3 buffers will be in L[0] */ 205 /*
206 * all contents of all the 3 buffers
207 * will be in L[0]
208 */
193 if (PATH_H_POSITION(tb->tb_path, 1) == 0 209 if (PATH_H_POSITION(tb->tb_path, 1) == 0
194 && 1 < B_NR_ITEMS(tb->FR[0])) 210 && 1 < B_NR_ITEMS(tb->FR[0]))
195 replace_key(tb, tb->CFL[0], 211 replace_key(tb, tb->CFL[0],
@@ -208,7 +224,10 @@ static int balance_leaf_when_delete(struct tree_balance *tb, int flag)
208 224
209 return 0; 225 return 0;
210 } 226 }
211 /* all contents of all the 3 buffers will be in R[0] */ 227 /*
228 * all contents of all the 3 buffers will
229 * be in R[0]
230 */
212 leaf_move_items(LEAF_FROM_S_TO_R, tb, n, -1, 231 leaf_move_items(LEAF_FROM_S_TO_R, tb, n, -1,
213 NULL); 232 NULL);
214 leaf_move_items(LEAF_FROM_L_TO_R, tb, 233 leaf_move_items(LEAF_FROM_L_TO_R, tb,
@@ -233,7 +252,11 @@ static int balance_leaf_when_delete(struct tree_balance *tb, int flag)
233 252
234 return 0; 253 return 0;
235 } 254 }
236 /* a part of contents of S[0] will be in L[0] and the rest part of S[0] will be in R[0] */ 255
256 /*
257 * a part of contents of S[0] will be in L[0] and the
258 * rest part of S[0] will be in R[0]
259 */
237 260
238 RFALSE((tb->lnum[0] + tb->rnum[0] < n) || 261 RFALSE((tb->lnum[0] + tb->rnum[0] < n) ||
239 (tb->lnum[0] + tb->rnum[0] > n + 1), 262 (tb->lnum[0] + tb->rnum[0] > n + 1),
@@ -1178,9 +1201,7 @@ struct buffer_head *get_FEB(struct tree_balance *tb)
1178 return tb->used[i]; 1201 return tb->used[i];
1179} 1202}
1180 1203
1181/* This is now used because reiserfs_free_block has to be able to 1204/* This is now used because reiserfs_free_block has to be able to schedule. */
1182** schedule.
1183*/
1184static void store_thrown(struct tree_balance *tb, struct buffer_head *bh) 1205static void store_thrown(struct tree_balance *tb, struct buffer_head *bh)
1185{ 1206{
1186 int i; 1207 int i;
@@ -1335,8 +1356,10 @@ static int check_before_balancing(struct tree_balance *tb)
1335 "mount point."); 1356 "mount point.");
1336 } 1357 }
1337 1358
1338 /* double check that buffers that we will modify are unlocked. (fix_nodes should already have 1359 /*
1339 prepped all of these for us). */ 1360 * double check that buffers that we will modify are unlocked.
1361 * (fix_nodes should already have prepped all of these for us).
1362 */
1340 if (tb->lnum[0]) { 1363 if (tb->lnum[0]) {
1341 retval |= locked_or_not_in_tree(tb, tb->L[0], "L[0]"); 1364 retval |= locked_or_not_in_tree(tb, tb->L[0], "L[0]");
1342 retval |= locked_or_not_in_tree(tb, tb->FL[0], "FL[0]"); 1365 retval |= locked_or_not_in_tree(tb, tb->FL[0], "FL[0]");
@@ -1429,49 +1452,51 @@ static void check_internal_levels(struct tree_balance *tb)
1429 1452
1430#endif 1453#endif
1431 1454
1432/* Now we have all of the buffers that must be used in balancing of 1455/*
1433 the tree. We rely on the assumption that schedule() will not occur 1456 * Now we have all of the buffers that must be used in balancing of
1434 while do_balance works. ( Only interrupt handlers are acceptable.) 1457 * the tree. We rely on the assumption that schedule() will not occur
1435 We balance the tree according to the analysis made before this, 1458 * while do_balance works. ( Only interrupt handlers are acceptable.)
1436 using buffers already obtained. For SMP support it will someday be 1459 * We balance the tree according to the analysis made before this,
1437 necessary to add ordered locking of tb. */ 1460 * using buffers already obtained. For SMP support it will someday be
1438 1461 * necessary to add ordered locking of tb.
1439/* Some interesting rules of balancing: 1462 */
1440
1441 we delete a maximum of two nodes per level per balancing: we never
1442 delete R, when we delete two of three nodes L, S, R then we move
1443 them into R.
1444
1445 we only delete L if we are deleting two nodes, if we delete only
1446 one node we delete S
1447
1448 if we shift leaves then we shift as much as we can: this is a
1449 deliberate policy of extremism in node packing which results in
1450 higher average utilization after repeated random balance operations
1451 at the cost of more memory copies and more balancing as a result of
1452 small insertions to full nodes.
1453
1454 if we shift internal nodes we try to evenly balance the node
1455 utilization, with consequent less balancing at the cost of lower
1456 utilization.
1457
1458 one could argue that the policy for directories in leaves should be
1459 that of internal nodes, but we will wait until another day to
1460 evaluate this.... It would be nice to someday measure and prove
1461 these assumptions as to what is optimal....
1462 1463
1463*/ 1464/*
1465 * Some interesting rules of balancing:
1466 * we delete a maximum of two nodes per level per balancing: we never
1467 * delete R, when we delete two of three nodes L, S, R then we move
1468 * them into R.
1469 *
1470 * we only delete L if we are deleting two nodes, if we delete only
1471 * one node we delete S
1472 *
1473 * if we shift leaves then we shift as much as we can: this is a
1474 * deliberate policy of extremism in node packing which results in
1475 * higher average utilization after repeated random balance operations
1476 * at the cost of more memory copies and more balancing as a result of
1477 * small insertions to full nodes.
1478 *
1479 * if we shift internal nodes we try to evenly balance the node
1480 * utilization, with consequent less balancing at the cost of lower
1481 * utilization.
1482 *
1483 * one could argue that the policy for directories in leaves should be
1484 * that of internal nodes, but we will wait until another day to
1485 * evaluate this.... It would be nice to someday measure and prove
1486 * these assumptions as to what is optimal....
1487 */
1464 1488
1465static inline void do_balance_starts(struct tree_balance *tb) 1489static inline void do_balance_starts(struct tree_balance *tb)
1466{ 1490{
1467 /* use print_cur_tb() to see initial state of struct 1491 /* use print_cur_tb() to see initial state of struct tree_balance */
1468 tree_balance */
1469 1492
1470 /* store_print_tb (tb); */ 1493 /* store_print_tb (tb); */
1471 1494
1472 /* do not delete, just comment it out */ 1495 /* do not delete, just comment it out */
1473/* print_tb(flag, PATH_LAST_POSITION(tb->tb_path), tb->tb_path->pos_in_item, tb, 1496 /*
1474 "check");*/ 1497 print_tb(flag, PATH_LAST_POSITION(tb->tb_path),
1498 tb->tb_path->pos_in_item, tb, "check");
1499 */
1475 RFALSE(check_before_balancing(tb), "PAP-12340: locked buffers in TB"); 1500 RFALSE(check_before_balancing(tb), "PAP-12340: locked buffers in TB");
1476#ifdef CONFIG_REISERFS_CHECK 1501#ifdef CONFIG_REISERFS_CHECK
1477 REISERFS_SB(tb->tb_sb)->cur_tb = tb; 1502 REISERFS_SB(tb->tb_sb)->cur_tb = tb;
@@ -1487,9 +1512,10 @@ static inline void do_balance_completed(struct tree_balance *tb)
1487 REISERFS_SB(tb->tb_sb)->cur_tb = NULL; 1512 REISERFS_SB(tb->tb_sb)->cur_tb = NULL;
1488#endif 1513#endif
1489 1514
1490 /* reiserfs_free_block is no longer schedule safe. So, we need to 1515 /*
1491 ** put the buffers we want freed on the thrown list during do_balance, 1516 * reiserfs_free_block is no longer schedule safe. So, we need to
1492 ** and then free them now 1517 * put the buffers we want freed on the thrown list during do_balance,
1518 * and then free them now
1493 */ 1519 */
1494 1520
1495 REISERFS_SB(tb->tb_sb)->s_do_balance++; 1521 REISERFS_SB(tb->tb_sb)->s_do_balance++;
@@ -1500,36 +1526,40 @@ static inline void do_balance_completed(struct tree_balance *tb)
1500 free_thrown(tb); 1526 free_thrown(tb);
1501} 1527}
1502 1528
1503void do_balance(struct tree_balance *tb, /* tree_balance structure */ 1529/*
1504 struct item_head *ih, /* item header of inserted item */ 1530 * do_balance - balance the tree
1505 const char *body, /* body of inserted item or bytes to paste */ 1531 *
1506 int flag) 1532 * @tb: tree_balance structure
1507{ /* i - insert, d - delete 1533 * @ih: item header of inserted item
1508 c - cut, p - paste 1534 * @body: body of inserted item or bytes to paste
1509 1535 * @flag: 'i' - insert, 'd' - delete, 'c' - cut, 'p' paste
1510 Cut means delete part of an item 1536 *
1511 (includes removing an entry from a 1537 * Cut means delete part of an item (includes removing an entry from a
1512 directory). 1538 * directory).
1513 1539 *
1514 Delete means delete whole item. 1540 * Delete means delete whole item.
1515 1541 *
1516 Insert means add a new item into the 1542 * Insert means add a new item into the tree.
1517 tree. 1543 *
1518 1544 * Paste means to append to the end of an existing file or to
1519 Paste means to append to the end of an 1545 * insert a directory entry.
1520 existing file or to insert a directory 1546 */
1521 entry. */ 1547void do_balance(struct tree_balance *tb, struct item_head *ih,
1522 int child_pos, /* position of a child node in its parent */ 1548 const char *body, int flag)
1523 h; /* level of the tree being processed */ 1549{
1524 struct item_head insert_key[2]; /* in our processing of one level 1550 int child_pos; /* position of a child node in its parent */
1525 we sometimes determine what 1551 int h; /* level of the tree being processed */
1526 must be inserted into the next 1552
1527 higher level. This insertion 1553 /*
1528 consists of a key or two keys 1554 * in our processing of one level we sometimes determine what
1529 and their corresponding 1555 * must be inserted into the next higher level. This insertion
1530 pointers */ 1556 * consists of a key or two keys and their corresponding
1531 struct buffer_head *insert_ptr[2]; /* inserted node-ptrs for the next 1557 * pointers
1532 level */ 1558 */
1559 struct item_head insert_key[2];
1560
1561 /* inserted node-ptrs for the next level */
1562 struct buffer_head *insert_ptr[2];
1533 1563
1534 tb->tb_mode = flag; 1564 tb->tb_mode = flag;
1535 tb->need_balance_dirty = 0; 1565 tb->need_balance_dirty = 0;
@@ -1549,9 +1579,11 @@ void do_balance(struct tree_balance *tb, /* tree_balance structure */
1549 atomic_inc(&(fs_generation(tb->tb_sb))); 1579 atomic_inc(&(fs_generation(tb->tb_sb)));
1550 do_balance_starts(tb); 1580 do_balance_starts(tb);
1551 1581
1552 /* balance leaf returns 0 except if combining L R and S into 1582 /*
1553 one node. see balance_internal() for explanation of this 1583 * balance_leaf returns 0 except if combining L R and S into
1554 line of code. */ 1584 * one node. see balance_internal() for explanation of this
1585 * line of code.
1586 */
1555 child_pos = PATH_H_B_ITEM_ORDER(tb->tb_path, 0) + 1587 child_pos = PATH_H_B_ITEM_ORDER(tb->tb_path, 0) +
1556 balance_leaf(tb, ih, body, flag, insert_key, insert_ptr); 1588 balance_leaf(tb, ih, body, flag, insert_key, insert_ptr);
1557 1589
diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c
index ed58d843d578..27399430664e 100644
--- a/fs/reiserfs/file.c
+++ b/fs/reiserfs/file.c
@@ -15,20 +15,20 @@
15#include <linux/quotaops.h> 15#include <linux/quotaops.h>
16 16
17/* 17/*
18** We pack the tails of files on file close, not at the time they are written. 18 * We pack the tails of files on file close, not at the time they are written.
19** This implies an unnecessary copy of the tail and an unnecessary indirect item 19 * This implies an unnecessary copy of the tail and an unnecessary indirect item
20** insertion/balancing, for files that are written in one write. 20 * insertion/balancing, for files that are written in one write.
21** It avoids unnecessary tail packings (balances) for files that are written in 21 * It avoids unnecessary tail packings (balances) for files that are written in
22** multiple writes and are small enough to have tails. 22 * multiple writes and are small enough to have tails.
23** 23 *
24** file_release is called by the VFS layer when the file is closed. If 24 * file_release is called by the VFS layer when the file is closed. If
25** this is the last open file descriptor, and the file 25 * this is the last open file descriptor, and the file
26** small enough to have a tail, and the tail is currently in an 26 * small enough to have a tail, and the tail is currently in an
27** unformatted node, the tail is converted back into a direct item. 27 * unformatted node, the tail is converted back into a direct item.
28** 28 *
29** We use reiserfs_truncate_file to pack the tail, since it already has 29 * We use reiserfs_truncate_file to pack the tail, since it already has
30** all the conditions coded. 30 * all the conditions coded.
31*/ 31 */
32static int reiserfs_file_release(struct inode *inode, struct file *filp) 32static int reiserfs_file_release(struct inode *inode, struct file *filp)
33{ 33{
34 34
@@ -57,14 +57,16 @@ static int reiserfs_file_release(struct inode *inode, struct file *filp)
57 } 57 }
58 58
59 reiserfs_write_lock(inode->i_sb); 59 reiserfs_write_lock(inode->i_sb);
60 /* freeing preallocation only involves relogging blocks that 60 /*
61 * freeing preallocation only involves relogging blocks that
61 * are already in the current transaction. preallocation gets 62 * are already in the current transaction. preallocation gets
62 * freed at the end of each transaction, so it is impossible for 63 * freed at the end of each transaction, so it is impossible for
63 * us to log any additional blocks (including quota blocks) 64 * us to log any additional blocks (including quota blocks)
64 */ 65 */
65 err = journal_begin(&th, inode->i_sb, 1); 66 err = journal_begin(&th, inode->i_sb, 1);
66 if (err) { 67 if (err) {
67 /* uh oh, we can't allow the inode to go away while there 68 /*
69 * uh oh, we can't allow the inode to go away while there
68 * is still preallocation blocks pending. Try to join the 70 * is still preallocation blocks pending. Try to join the
69 * aborted transaction 71 * aborted transaction
70 */ 72 */
@@ -72,11 +74,13 @@ static int reiserfs_file_release(struct inode *inode, struct file *filp)
72 err = journal_join_abort(&th, inode->i_sb, 1); 74 err = journal_join_abort(&th, inode->i_sb, 1);
73 75
74 if (err) { 76 if (err) {
75 /* hmpf, our choices here aren't good. We can pin the inode 77 /*
76 * which will disallow unmount from every happening, we can 78 * hmpf, our choices here aren't good. We can pin
77 * do nothing, which will corrupt random memory on unmount, 79 * the inode which will disallow unmount from ever
78 * or we can forcibly remove the file from the preallocation 80 * happening, we can do nothing, which will corrupt
79 * list, which will leak blocks on disk. Lets pin the inode 81 * random memory on unmount, or we can forcibly
82 * remove the file from the preallocation list, which
83 * will leak blocks on disk. Lets pin the inode
80 * and let the admin know what is going on. 84 * and let the admin know what is going on.
81 */ 85 */
82 igrab(inode); 86 igrab(inode);
@@ -102,10 +106,12 @@ static int reiserfs_file_release(struct inode *inode, struct file *filp)
102 (REISERFS_I(inode)->i_flags & i_pack_on_close_mask) && 106 (REISERFS_I(inode)->i_flags & i_pack_on_close_mask) &&
103 tail_has_to_be_packed(inode)) { 107 tail_has_to_be_packed(inode)) {
104 108
105 /* if regular file is released by last holder and it has been 109 /*
106 appended (we append by unformatted node only) or its direct 110 * if regular file is released by last holder and it has been
107 item(s) had to be converted, then it may have to be 111 * appended (we append by unformatted node only) or its direct
108 indirect2direct converted */ 112 * item(s) had to be converted, then it may have to be
113 * indirect2direct converted
114 */
109 err = reiserfs_truncate_file(inode, 0); 115 err = reiserfs_truncate_file(inode, 0);
110 } 116 }
111 out: 117 out:
@@ -117,8 +123,9 @@ static int reiserfs_file_release(struct inode *inode, struct file *filp)
117static int reiserfs_file_open(struct inode *inode, struct file *file) 123static int reiserfs_file_open(struct inode *inode, struct file *file)
118{ 124{
119 int err = dquot_file_open(inode, file); 125 int err = dquot_file_open(inode, file);
126
127 /* somebody might be tailpacking on final close; wait for it */
120 if (!atomic_inc_not_zero(&REISERFS_I(inode)->openers)) { 128 if (!atomic_inc_not_zero(&REISERFS_I(inode)->openers)) {
121 /* somebody might be tailpacking on final close; wait for it */
122 mutex_lock(&(REISERFS_I(inode)->tailpack)); 129 mutex_lock(&(REISERFS_I(inode)->tailpack));
123 atomic_inc(&REISERFS_I(inode)->openers); 130 atomic_inc(&REISERFS_I(inode)->openers);
124 mutex_unlock(&(REISERFS_I(inode)->tailpack)); 131 mutex_unlock(&(REISERFS_I(inode)->tailpack));
@@ -208,7 +215,8 @@ int reiserfs_commit_page(struct inode *inode, struct page *page,
208 journal_mark_dirty(&th, s, bh); 215 journal_mark_dirty(&th, s, bh);
209 } else if (!buffer_dirty(bh)) { 216 } else if (!buffer_dirty(bh)) {
210 mark_buffer_dirty(bh); 217 mark_buffer_dirty(bh);
211 /* do data=ordered on any page past the end 218 /*
219 * do data=ordered on any page past the end
212 * of file and any buffer marked BH_New. 220 * of file and any buffer marked BH_New.
213 */ 221 */
214 if (reiserfs_data_ordered(inode->i_sb) && 222 if (reiserfs_data_ordered(inode->i_sb) &&
diff --git a/fs/reiserfs/fix_node.c b/fs/reiserfs/fix_node.c
index b6a05a7f4658..144bd62c3e39 100644
--- a/fs/reiserfs/fix_node.c
+++ b/fs/reiserfs/fix_node.c
@@ -2,59 +2,32 @@
2 * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README 2 * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README
3 */ 3 */
4 4
5/**
6 ** old_item_num
7 ** old_entry_num
8 ** set_entry_sizes
9 ** create_virtual_node
10 ** check_left
11 ** check_right
12 ** directory_part_size
13 ** get_num_ver
14 ** set_parameters
15 ** is_leaf_removable
16 ** are_leaves_removable
17 ** get_empty_nodes
18 ** get_lfree
19 ** get_rfree
20 ** is_left_neighbor_in_cache
21 ** decrement_key
22 ** get_far_parent
23 ** get_parents
24 ** can_node_be_removed
25 ** ip_check_balance
26 ** dc_check_balance_internal
27 ** dc_check_balance_leaf
28 ** dc_check_balance
29 ** check_balance
30 ** get_direct_parent
31 ** get_neighbors
32 ** fix_nodes
33 **
34 **
35 **/
36
37#include <linux/time.h> 5#include <linux/time.h>
38#include <linux/slab.h> 6#include <linux/slab.h>
39#include <linux/string.h> 7#include <linux/string.h>
40#include "reiserfs.h" 8#include "reiserfs.h"
41#include <linux/buffer_head.h> 9#include <linux/buffer_head.h>
42 10
43/* To make any changes in the tree we find a node, that contains item 11/*
44 to be changed/deleted or position in the node we insert a new item 12 * To make any changes in the tree we find a node that contains item
45 to. We call this node S. To do balancing we need to decide what we 13 * to be changed/deleted or position in the node we insert a new item
46 will shift to left/right neighbor, or to a new node, where new item 14 * to. We call this node S. To do balancing we need to decide what we
47 will be etc. To make this analysis simpler we build virtual 15 * will shift to left/right neighbor, or to a new node, where new item
48 node. Virtual node is an array of items, that will replace items of 16 * will be etc. To make this analysis simpler we build virtual
49 node S. (For instance if we are going to delete an item, virtual 17 * node. Virtual node is an array of items, that will replace items of
50 node does not contain it). Virtual node keeps information about 18 * node S. (For instance if we are going to delete an item, virtual
51 item sizes and types, mergeability of first and last items, sizes 19 * node does not contain it). Virtual node keeps information about
52 of all entries in directory item. We use this array of items when 20 * item sizes and types, mergeability of first and last items, sizes
53 calculating what we can shift to neighbors and how many nodes we 21 * of all entries in directory item. We use this array of items when
54 have to have if we do not any shiftings, if we shift to left/right 22 * calculating what we can shift to neighbors and how many nodes we
55 neighbor or to both. */ 23 * have to have if we do not any shiftings, if we shift to left/right
56 24 * neighbor or to both.
57/* taking item number in virtual node, returns number of item, that it has in source buffer */ 25 */
26
27/*
28 * Takes item number in virtual node, returns number of item
29 * that it has in source buffer
30 */
58static inline int old_item_num(int new_num, int affected_item_num, int mode) 31static inline int old_item_num(int new_num, int affected_item_num, int mode)
59{ 32{
60 if (mode == M_PASTE || mode == M_CUT || new_num < affected_item_num) 33 if (mode == M_PASTE || mode == M_CUT || new_num < affected_item_num)
@@ -112,7 +85,10 @@ static void create_virtual_node(struct tree_balance *tb, int h)
112 && (vn->vn_mode != M_DELETE || vn->vn_affected_item_num)) 85 && (vn->vn_mode != M_DELETE || vn->vn_affected_item_num))
113 vn->vn_vi[0].vi_type |= VI_TYPE_LEFT_MERGEABLE; 86 vn->vn_vi[0].vi_type |= VI_TYPE_LEFT_MERGEABLE;
114 87
115 /* go through all items those remain in the virtual node (except for the new (inserted) one) */ 88 /*
89 * go through all items that remain in the virtual
90 * node (except for the new (inserted) one)
91 */
116 for (new_num = 0; new_num < vn->vn_nr_item; new_num++) { 92 for (new_num = 0; new_num < vn->vn_nr_item; new_num++) {
117 int j; 93 int j;
118 struct virtual_item *vi = vn->vn_vi + new_num; 94 struct virtual_item *vi = vn->vn_vi + new_num;
@@ -131,8 +107,10 @@ static void create_virtual_node(struct tree_balance *tb, int h)
131 vi->vi_item = ih_item_body(Sh, ih + j); 107 vi->vi_item = ih_item_body(Sh, ih + j);
132 vi->vi_uarea = vn->vn_free_ptr; 108 vi->vi_uarea = vn->vn_free_ptr;
133 109
134 // FIXME: there is no check, that item operation did not 110 /*
135 // consume too much memory 111 * FIXME: there is no check that item operation did not
112 * consume too much memory
113 */
136 vn->vn_free_ptr += 114 vn->vn_free_ptr +=
137 op_create_vi(vn, vi, is_affected, tb->insert_size[0]); 115 op_create_vi(vn, vi, is_affected, tb->insert_size[0]);
138 if (tb->vn_buf + tb->vn_buf_size < vn->vn_free_ptr) 116 if (tb->vn_buf + tb->vn_buf_size < vn->vn_free_ptr)
@@ -145,7 +123,8 @@ static void create_virtual_node(struct tree_balance *tb, int h)
145 123
146 if (vn->vn_mode == M_PASTE || vn->vn_mode == M_CUT) { 124 if (vn->vn_mode == M_PASTE || vn->vn_mode == M_CUT) {
147 vn->vn_vi[new_num].vi_item_len += tb->insert_size[0]; 125 vn->vn_vi[new_num].vi_item_len += tb->insert_size[0];
148 vi->vi_new_data = vn->vn_data; // pointer to data which is going to be pasted 126 /* pointer to data which is going to be pasted */
127 vi->vi_new_data = vn->vn_data;
149 } 128 }
150 } 129 }
151 130
@@ -164,7 +143,10 @@ static void create_virtual_node(struct tree_balance *tb, int h)
164 tb->insert_size[0]); 143 tb->insert_size[0]);
165 } 144 }
166 145
167 /* set right merge flag we take right delimiting key and check whether it is a mergeable item */ 146 /*
147 * set right merge flag we take right delimiting key and
148 * check whether it is a mergeable item
149 */
168 if (tb->CFR[0]) { 150 if (tb->CFR[0]) {
169 struct reiserfs_key *key; 151 struct reiserfs_key *key;
170 152
@@ -179,12 +161,19 @@ static void create_virtual_node(struct tree_balance *tb, int h)
179 if (op_is_left_mergeable(key, Sh->b_size) && 161 if (op_is_left_mergeable(key, Sh->b_size) &&
180 !(vn->vn_mode != M_DELETE 162 !(vn->vn_mode != M_DELETE
181 || vn->vn_affected_item_num != B_NR_ITEMS(Sh) - 1)) { 163 || vn->vn_affected_item_num != B_NR_ITEMS(Sh) - 1)) {
182 /* we delete last item and it could be merged with right neighbor's first item */ 164 /*
165 * we delete last item and it could be merged
166 * with right neighbor's first item
167 */
183 if (! 168 if (!
184 (B_NR_ITEMS(Sh) == 1 169 (B_NR_ITEMS(Sh) == 1
185 && is_direntry_le_ih(item_head(Sh, 0)) 170 && is_direntry_le_ih(item_head(Sh, 0))
186 && ih_entry_count(item_head(Sh, 0)) == 1)) { 171 && ih_entry_count(item_head(Sh, 0)) == 1)) {
187 /* node contains more than 1 item, or item is not directory item, or this item contains more than 1 entry */ 172 /*
173 * node contains more than 1 item, or item
174 * is not directory item, or this item
175 * contains more than 1 entry
176 */
188 print_block(Sh, 0, -1, -1); 177 print_block(Sh, 0, -1, -1);
189 reiserfs_panic(tb->tb_sb, "vs-8045", 178 reiserfs_panic(tb->tb_sb, "vs-8045",
190 "rdkey %k, affected item==%d " 179 "rdkey %k, affected item==%d "
@@ -198,8 +187,10 @@ static void create_virtual_node(struct tree_balance *tb, int h)
198 } 187 }
199} 188}
200 189
201/* using virtual node check, how many items can be shifted to left 190/*
202 neighbor */ 191 * Using virtual node check, how many items can be
192 * shifted to left neighbor
193 */
203static void check_left(struct tree_balance *tb, int h, int cur_free) 194static void check_left(struct tree_balance *tb, int h, int cur_free)
204{ 195{
205 int i; 196 int i;
@@ -259,9 +250,13 @@ static void check_left(struct tree_balance *tb, int h, int cur_free)
259 } 250 }
260 251
261 /* the item cannot be shifted entirely, try to split it */ 252 /* the item cannot be shifted entirely, try to split it */
262 /* check whether L[0] can hold ih and at least one byte of the item body */ 253 /*
254 * check whether L[0] can hold ih and at least one byte
255 * of the item body
256 */
257
258 /* cannot shift even a part of the current item */
263 if (cur_free <= ih_size) { 259 if (cur_free <= ih_size) {
264 /* cannot shift even a part of the current item */
265 tb->lbytes = -1; 260 tb->lbytes = -1;
266 return; 261 return;
267 } 262 }
@@ -278,8 +273,10 @@ static void check_left(struct tree_balance *tb, int h, int cur_free)
278 return; 273 return;
279} 274}
280 275
281/* using virtual node check, how many items can be shifted to right 276/*
282 neighbor */ 277 * Using virtual node check, how many items can be
278 * shifted to right neighbor
279 */
283static void check_right(struct tree_balance *tb, int h, int cur_free) 280static void check_right(struct tree_balance *tb, int h, int cur_free)
284{ 281{
285 int i; 282 int i;
@@ -338,13 +335,21 @@ static void check_right(struct tree_balance *tb, int h, int cur_free)
338 continue; 335 continue;
339 } 336 }
340 337
341 /* check whether R[0] can hold ih and at least one byte of the item body */ 338 /*
342 if (cur_free <= ih_size) { /* cannot shift even a part of the current item */ 339 * check whether R[0] can hold ih and at least one
340 * byte of the item body
341 */
342
343 /* cannot shift even a part of the current item */
344 if (cur_free <= ih_size) {
343 tb->rbytes = -1; 345 tb->rbytes = -1;
344 return; 346 return;
345 } 347 }
346 348
347 /* R[0] can hold the header of the item and at least one byte of its body */ 349 /*
350 * R[0] can hold the header of the item and at least
351 * one byte of its body
352 */
348 cur_free -= ih_size; /* cur_free is still > 0 */ 353 cur_free -= ih_size; /* cur_free is still > 0 */
349 354
350 tb->rbytes = op_check_right(vi, cur_free); 355 tb->rbytes = op_check_right(vi, cur_free);
@@ -361,45 +366,64 @@ static void check_right(struct tree_balance *tb, int h, int cur_free)
361/* 366/*
362 * from - number of items, which are shifted to left neighbor entirely 367 * from - number of items, which are shifted to left neighbor entirely
363 * to - number of item, which are shifted to right neighbor entirely 368 * to - number of item, which are shifted to right neighbor entirely
364 * from_bytes - number of bytes of boundary item (or directory entries) which are shifted to left neighbor 369 * from_bytes - number of bytes of boundary item (or directory entries)
365 * to_bytes - number of bytes of boundary item (or directory entries) which are shifted to right neighbor */ 370 * which are shifted to left neighbor
371 * to_bytes - number of bytes of boundary item (or directory entries)
372 * which are shifted to right neighbor
373 */
366static int get_num_ver(int mode, struct tree_balance *tb, int h, 374static int get_num_ver(int mode, struct tree_balance *tb, int h,
367 int from, int from_bytes, 375 int from, int from_bytes,
368 int to, int to_bytes, short *snum012, int flow) 376 int to, int to_bytes, short *snum012, int flow)
369{ 377{
370 int i; 378 int i;
371 int cur_free; 379 int cur_free;
372 // int bytes;
373 int units; 380 int units;
374 struct virtual_node *vn = tb->tb_vn; 381 struct virtual_node *vn = tb->tb_vn;
375 // struct virtual_item * vi;
376
377 int total_node_size, max_node_size, current_item_size; 382 int total_node_size, max_node_size, current_item_size;
378 int needed_nodes; 383 int needed_nodes;
379 int start_item, /* position of item we start filling node from */ 384
380 end_item, /* position of item we finish filling node by */ 385 /* position of item we start filling node from */
381 start_bytes, /* number of first bytes (entries for directory) of start_item-th item 386 int start_item;
382 we do not include into node that is being filled */ 387
383 end_bytes; /* number of last bytes (entries for directory) of end_item-th item 388 /* position of item we finish filling node by */
384 we do node include into node that is being filled */ 389 int end_item;
385 int split_item_positions[2]; /* these are positions in virtual item of 390
386 items, that are split between S[0] and 391 /*
387 S1new and S1new and S2new */ 392 * number of first bytes (entries for directory) of start_item-th item
393 * we do not include into node that is being filled
394 */
395 int start_bytes;
396
397 /*
398 * number of last bytes (entries for directory) of end_item-th item
399 * we do node include into node that is being filled
400 */
401 int end_bytes;
402
403 /*
404 * these are positions in virtual item of items, that are split
405 * between S[0] and S1new and S1new and S2new
406 */
407 int split_item_positions[2];
388 408
389 split_item_positions[0] = -1; 409 split_item_positions[0] = -1;
390 split_item_positions[1] = -1; 410 split_item_positions[1] = -1;
391 411
392 /* We only create additional nodes if we are in insert or paste mode 412 /*
393 or we are in replace mode at the internal level. If h is 0 and 413 * We only create additional nodes if we are in insert or paste mode
394 the mode is M_REPLACE then in fix_nodes we change the mode to 414 * or we are in replace mode at the internal level. If h is 0 and
395 paste or insert before we get here in the code. */ 415 * the mode is M_REPLACE then in fix_nodes we change the mode to
416 * paste or insert before we get here in the code.
417 */
396 RFALSE(tb->insert_size[h] < 0 || (mode != M_INSERT && mode != M_PASTE), 418 RFALSE(tb->insert_size[h] < 0 || (mode != M_INSERT && mode != M_PASTE),
397 "vs-8100: insert_size < 0 in overflow"); 419 "vs-8100: insert_size < 0 in overflow");
398 420
399 max_node_size = MAX_CHILD_SIZE(PATH_H_PBUFFER(tb->tb_path, h)); 421 max_node_size = MAX_CHILD_SIZE(PATH_H_PBUFFER(tb->tb_path, h));
400 422
401 /* snum012 [0-2] - number of items, that lay 423 /*
402 to S[0], first new node and second new node */ 424 * snum012 [0-2] - number of items, that lay
425 * to S[0], first new node and second new node
426 */
403 snum012[3] = -1; /* s1bytes */ 427 snum012[3] = -1; /* s1bytes */
404 snum012[4] = -1; /* s2bytes */ 428 snum012[4] = -1; /* s2bytes */
405 429
@@ -416,20 +440,22 @@ static int get_num_ver(int mode, struct tree_balance *tb, int h,
416 total_node_size = 0; 440 total_node_size = 0;
417 cur_free = max_node_size; 441 cur_free = max_node_size;
418 442
419 // start from 'from'-th item 443 /* start from 'from'-th item */
420 start_item = from; 444 start_item = from;
421 // skip its first 'start_bytes' units 445 /* skip its first 'start_bytes' units */
422 start_bytes = ((from_bytes != -1) ? from_bytes : 0); 446 start_bytes = ((from_bytes != -1) ? from_bytes : 0);
423 447
424 // last included item is the 'end_item'-th one 448 /* last included item is the 'end_item'-th one */
425 end_item = vn->vn_nr_item - to - 1; 449 end_item = vn->vn_nr_item - to - 1;
426 // do not count last 'end_bytes' units of 'end_item'-th item 450 /* do not count last 'end_bytes' units of 'end_item'-th item */
427 end_bytes = (to_bytes != -1) ? to_bytes : 0; 451 end_bytes = (to_bytes != -1) ? to_bytes : 0;
428 452
429 /* go through all item beginning from the start_item-th item and ending by 453 /*
430 the end_item-th item. Do not count first 'start_bytes' units of 454 * go through all item beginning from the start_item-th item
431 'start_item'-th item and last 'end_bytes' of 'end_item'-th item */ 455 * and ending by the end_item-th item. Do not count first
432 456 * 'start_bytes' units of 'start_item'-th item and last
457 * 'end_bytes' of 'end_item'-th item
458 */
433 for (i = start_item; i <= end_item; i++) { 459 for (i = start_item; i <= end_item; i++) {
434 struct virtual_item *vi = vn->vn_vi + i; 460 struct virtual_item *vi = vn->vn_vi + i;
435 int skip_from_end = ((i == end_item) ? end_bytes : 0); 461 int skip_from_end = ((i == end_item) ? end_bytes : 0);
@@ -439,7 +465,10 @@ static int get_num_ver(int mode, struct tree_balance *tb, int h,
439 /* get size of current item */ 465 /* get size of current item */
440 current_item_size = vi->vi_item_len; 466 current_item_size = vi->vi_item_len;
441 467
442 /* do not take in calculation head part (from_bytes) of from-th item */ 468 /*
469 * do not take in calculation head part (from_bytes)
470 * of from-th item
471 */
443 current_item_size -= 472 current_item_size -=
444 op_part_size(vi, 0 /*from start */ , start_bytes); 473 op_part_size(vi, 0 /*from start */ , start_bytes);
445 474
@@ -455,9 +484,11 @@ static int get_num_ver(int mode, struct tree_balance *tb, int h,
455 continue; 484 continue;
456 } 485 }
457 486
487 /*
488 * virtual item length is longer, than max size of item in
489 * a node. It is impossible for direct item
490 */
458 if (current_item_size > max_node_size) { 491 if (current_item_size > max_node_size) {
459 /* virtual item length is longer, than max size of item in
460 a node. It is impossible for direct item */
461 RFALSE(is_direct_le_ih(vi->vi_ih), 492 RFALSE(is_direct_le_ih(vi->vi_ih),
462 "vs-8110: " 493 "vs-8110: "
463 "direct item length is %d. It can not be longer than %d", 494 "direct item length is %d. It can not be longer than %d",
@@ -466,15 +497,18 @@ static int get_num_ver(int mode, struct tree_balance *tb, int h,
466 flow = 1; 497 flow = 1;
467 } 498 }
468 499
500 /* as we do not split items, take new node and continue */
469 if (!flow) { 501 if (!flow) {
470 /* as we do not split items, take new node and continue */
471 needed_nodes++; 502 needed_nodes++;
472 i--; 503 i--;
473 total_node_size = 0; 504 total_node_size = 0;
474 continue; 505 continue;
475 } 506 }
476 // calculate number of item units which fit into node being 507
477 // filled 508 /*
509 * calculate number of item units which fit into node being
510 * filled
511 */
478 { 512 {
479 int free_space; 513 int free_space;
480 514
@@ -482,17 +516,17 @@ static int get_num_ver(int mode, struct tree_balance *tb, int h,
482 units = 516 units =
483 op_check_left(vi, free_space, start_bytes, 517 op_check_left(vi, free_space, start_bytes,
484 skip_from_end); 518 skip_from_end);
519 /*
520 * nothing fits into current node, take new
521 * node and continue
522 */
485 if (units == -1) { 523 if (units == -1) {
486 /* nothing fits into current node, take new node and continue */
487 needed_nodes++, i--, total_node_size = 0; 524 needed_nodes++, i--, total_node_size = 0;
488 continue; 525 continue;
489 } 526 }
490 } 527 }
491 528
492 /* something fits into the current node */ 529 /* something fits into the current node */
493 //if (snum012[3] != -1 || needed_nodes != 1)
494 // reiserfs_panic (tb->tb_sb, "vs-8115: get_num_ver: too many nodes required");
495 //snum012[needed_nodes - 1 + 3] = op_unit_num (vi) - start_bytes - units;
496 start_bytes += units; 530 start_bytes += units;
497 snum012[needed_nodes - 1 + 3] = units; 531 snum012[needed_nodes - 1 + 3] = units;
498 532
@@ -508,9 +542,11 @@ static int get_num_ver(int mode, struct tree_balance *tb, int h,
508 total_node_size = 0; 542 total_node_size = 0;
509 } 543 }
510 544
511 // sum012[4] (if it is not -1) contains number of units of which 545 /*
512 // are to be in S1new, snum012[3] - to be in S0. They are supposed 546 * sum012[4] (if it is not -1) contains number of units of which
513 // to be S1bytes and S2bytes correspondingly, so recalculate 547 * are to be in S1new, snum012[3] - to be in S0. They are supposed
548 * to be S1bytes and S2bytes correspondingly, so recalculate
549 */
514 if (snum012[4] > 0) { 550 if (snum012[4] > 0) {
515 int split_item_num; 551 int split_item_num;
516 int bytes_to_r, bytes_to_l; 552 int bytes_to_r, bytes_to_l;
@@ -527,7 +563,7 @@ static int get_num_ver(int mode, struct tree_balance *tb, int h,
527 ((split_item_positions[0] == 563 ((split_item_positions[0] ==
528 split_item_positions[1]) ? snum012[3] : 0); 564 split_item_positions[1]) ? snum012[3] : 0);
529 565
530 // s2bytes 566 /* s2bytes */
531 snum012[4] = 567 snum012[4] =
532 op_unit_num(&vn->vn_vi[split_item_num]) - snum012[4] - 568 op_unit_num(&vn->vn_vi[split_item_num]) - snum012[4] -
533 bytes_to_r - bytes_to_l - bytes_to_S1new; 569 bytes_to_r - bytes_to_l - bytes_to_S1new;
@@ -555,7 +591,7 @@ static int get_num_ver(int mode, struct tree_balance *tb, int h,
555 ((split_item_positions[0] == split_item_positions[1] 591 ((split_item_positions[0] == split_item_positions[1]
556 && snum012[4] != -1) ? snum012[4] : 0); 592 && snum012[4] != -1) ? snum012[4] : 0);
557 593
558 // s1bytes 594 /* s1bytes */
559 snum012[3] = 595 snum012[3] =
560 op_unit_num(&vn->vn_vi[split_item_num]) - snum012[3] - 596 op_unit_num(&vn->vn_vi[split_item_num]) - snum012[3] -
561 bytes_to_r - bytes_to_l - bytes_to_S2new; 597 bytes_to_r - bytes_to_l - bytes_to_S2new;
@@ -565,7 +601,8 @@ static int get_num_ver(int mode, struct tree_balance *tb, int h,
565} 601}
566 602
567 603
568/* Set parameters for balancing. 604/*
605 * Set parameters for balancing.
569 * Performs write of results of analysis of balancing into structure tb, 606 * Performs write of results of analysis of balancing into structure tb,
570 * where it will later be used by the functions that actually do the balancing. 607 * where it will later be used by the functions that actually do the balancing.
571 * Parameters: 608 * Parameters:
@@ -575,11 +612,12 @@ static int get_num_ver(int mode, struct tree_balance *tb, int h,
575 * rnum number of items from S[h] that must be shifted to R[h]; 612 * rnum number of items from S[h] that must be shifted to R[h];
576 * blk_num number of blocks that S[h] will be splitted into; 613 * blk_num number of blocks that S[h] will be splitted into;
577 * s012 number of items that fall into splitted nodes. 614 * s012 number of items that fall into splitted nodes.
578 * lbytes number of bytes which flow to the left neighbor from the item that is not 615 * lbytes number of bytes which flow to the left neighbor from the
579 * not shifted entirely 616 * item that is not not shifted entirely
580 * rbytes number of bytes which flow to the right neighbor from the item that is not 617 * rbytes number of bytes which flow to the right neighbor from the
581 * not shifted entirely 618 * item that is not not shifted entirely
582 * s1bytes number of bytes which flow to the first new node when S[0] splits (this number is contained in s012 array) 619 * s1bytes number of bytes which flow to the first new node when
620 * S[0] splits (this number is contained in s012 array)
583 */ 621 */
584 622
585static void set_parameters(struct tree_balance *tb, int h, int lnum, 623static void set_parameters(struct tree_balance *tb, int h, int lnum,
@@ -590,7 +628,8 @@ static void set_parameters(struct tree_balance *tb, int h, int lnum,
590 tb->rnum[h] = rnum; 628 tb->rnum[h] = rnum;
591 tb->blknum[h] = blk_num; 629 tb->blknum[h] = blk_num;
592 630
593 if (h == 0) { /* only for leaf level */ 631 /* only for leaf level */
632 if (h == 0) {
594 if (s012 != NULL) { 633 if (s012 != NULL) {
595 tb->s0num = *s012++, 634 tb->s0num = *s012++,
596 tb->s1num = *s012++, tb->s2num = *s012++; 635 tb->s1num = *s012++, tb->s2num = *s012++;
@@ -607,8 +646,10 @@ static void set_parameters(struct tree_balance *tb, int h, int lnum,
607 PROC_INFO_ADD(tb->tb_sb, rbytes[h], rb); 646 PROC_INFO_ADD(tb->tb_sb, rbytes[h], rb);
608} 647}
609 648
610/* check, does node disappear if we shift tb->lnum[0] items to left 649/*
611 neighbor and tb->rnum[0] to the right one. */ 650 * check if node disappears if we shift tb->lnum[0] items to left
651 * neighbor and tb->rnum[0] to the right one.
652 */
612static int is_leaf_removable(struct tree_balance *tb) 653static int is_leaf_removable(struct tree_balance *tb)
613{ 654{
614 struct virtual_node *vn = tb->tb_vn; 655 struct virtual_node *vn = tb->tb_vn;
@@ -616,8 +657,10 @@ static int is_leaf_removable(struct tree_balance *tb)
616 int size; 657 int size;
617 int remain_items; 658 int remain_items;
618 659
619 /* number of items, that will be shifted to left (right) neighbor 660 /*
620 entirely */ 661 * number of items that will be shifted to left (right) neighbor
662 * entirely
663 */
621 to_left = tb->lnum[0] - ((tb->lbytes != -1) ? 1 : 0); 664 to_left = tb->lnum[0] - ((tb->lbytes != -1) ? 1 : 0);
622 to_right = tb->rnum[0] - ((tb->rbytes != -1) ? 1 : 0); 665 to_right = tb->rnum[0] - ((tb->rbytes != -1) ? 1 : 0);
623 remain_items = vn->vn_nr_item; 666 remain_items = vn->vn_nr_item;
@@ -625,18 +668,18 @@ static int is_leaf_removable(struct tree_balance *tb)
625 /* how many items remain in S[0] after shiftings to neighbors */ 668 /* how many items remain in S[0] after shiftings to neighbors */
626 remain_items -= (to_left + to_right); 669 remain_items -= (to_left + to_right);
627 670
671 /* all content of node can be shifted to neighbors */
628 if (remain_items < 1) { 672 if (remain_items < 1) {
629 /* all content of node can be shifted to neighbors */
630 set_parameters(tb, 0, to_left, vn->vn_nr_item - to_left, 0, 673 set_parameters(tb, 0, to_left, vn->vn_nr_item - to_left, 0,
631 NULL, -1, -1); 674 NULL, -1, -1);
632 return 1; 675 return 1;
633 } 676 }
634 677
678 /* S[0] is not removable */
635 if (remain_items > 1 || tb->lbytes == -1 || tb->rbytes == -1) 679 if (remain_items > 1 || tb->lbytes == -1 || tb->rbytes == -1)
636 /* S[0] is not removable */
637 return 0; 680 return 0;
638 681
639 /* check, whether we can divide 1 remaining item between neighbors */ 682 /* check whether we can divide 1 remaining item between neighbors */
640 683
641 /* get size of remaining item (in item units) */ 684 /* get size of remaining item (in item units) */
642 size = op_unit_num(&(vn->vn_vi[to_left])); 685 size = op_unit_num(&(vn->vn_vi[to_left]));
@@ -680,18 +723,23 @@ static int are_leaves_removable(struct tree_balance *tb, int lfree, int rfree)
680 && !comp_short_le_keys(&(ih->ih_key), 723 && !comp_short_le_keys(&(ih->ih_key),
681 internal_key(tb->CFR[0], 724 internal_key(tb->CFR[0],
682 tb->rkey[0]))) 725 tb->rkey[0])))
726 /*
727 * Directory must be in correct state here: that is
728 * somewhere at the left side should exist first
729 * directory item. But the item being deleted can
730 * not be that first one because its right neighbor
731 * is item of the same directory. (But first item
732 * always gets deleted in last turn). So, neighbors
733 * of deleted item can be merged, so we can save
734 * ih_size
735 */
683 if (is_direntry_le_ih(ih)) { 736 if (is_direntry_le_ih(ih)) {
684 /* Directory must be in correct state here: that is
685 somewhere at the left side should exist first directory
686 item. But the item being deleted can not be that first
687 one because its right neighbor is item of the same
688 directory. (But first item always gets deleted in last
689 turn). So, neighbors of deleted item can be merged, so
690 we can save ih_size */
691 ih_size = IH_SIZE; 737 ih_size = IH_SIZE;
692 738
693 /* we might check that left neighbor exists and is of the 739 /*
694 same directory */ 740 * we might check that left neighbor exists
741 * and is of the same directory
742 */
695 RFALSE(le_ih_k_offset(ih) == DOT_OFFSET, 743 RFALSE(le_ih_k_offset(ih) == DOT_OFFSET,
696 "vs-8130: first directory item can not be removed until directory is not empty"); 744 "vs-8130: first directory item can not be removed until directory is not empty");
697 } 745 }
@@ -770,7 +818,8 @@ static void free_buffers_in_tb(struct tree_balance *tb)
770 } 818 }
771} 819}
772 820
773/* Get new buffers for storing new nodes that are created while balancing. 821/*
822 * Get new buffers for storing new nodes that are created while balancing.
774 * Returns: SCHEDULE_OCCURRED - schedule occurred while the function worked; 823 * Returns: SCHEDULE_OCCURRED - schedule occurred while the function worked;
775 * CARRY_ON - schedule didn't occur while the function worked; 824 * CARRY_ON - schedule didn't occur while the function worked;
776 * NO_DISK_SPACE - no disk space. 825 * NO_DISK_SPACE - no disk space.
@@ -778,28 +827,33 @@ static void free_buffers_in_tb(struct tree_balance *tb)
778/* The function is NOT SCHEDULE-SAFE! */ 827/* The function is NOT SCHEDULE-SAFE! */
779static int get_empty_nodes(struct tree_balance *tb, int h) 828static int get_empty_nodes(struct tree_balance *tb, int h)
780{ 829{
781 struct buffer_head *new_bh, 830 struct buffer_head *new_bh, *Sh = PATH_H_PBUFFER(tb->tb_path, h);
782 *Sh = PATH_H_PBUFFER(tb->tb_path, h);
783 b_blocknr_t *blocknr, blocknrs[MAX_AMOUNT_NEEDED] = { 0, }; 831 b_blocknr_t *blocknr, blocknrs[MAX_AMOUNT_NEEDED] = { 0, };
784 int counter, number_of_freeblk, amount_needed, /* number of needed empty blocks */ 832 int counter, number_of_freeblk;
785 retval = CARRY_ON; 833 int amount_needed; /* number of needed empty blocks */
834 int retval = CARRY_ON;
786 struct super_block *sb = tb->tb_sb; 835 struct super_block *sb = tb->tb_sb;
787 836
788 /* number_of_freeblk is the number of empty blocks which have been 837 /*
789 acquired for use by the balancing algorithm minus the number of 838 * number_of_freeblk is the number of empty blocks which have been
790 empty blocks used in the previous levels of the analysis, 839 * acquired for use by the balancing algorithm minus the number of
791 number_of_freeblk = tb->cur_blknum can be non-zero if a schedule occurs 840 * empty blocks used in the previous levels of the analysis,
792 after empty blocks are acquired, and the balancing analysis is 841 * number_of_freeblk = tb->cur_blknum can be non-zero if a schedule
793 then restarted, amount_needed is the number needed by this level 842 * occurs after empty blocks are acquired, and the balancing analysis
794 (h) of the balancing analysis. 843 * is then restarted, amount_needed is the number needed by this
795 844 * level (h) of the balancing analysis.
796 Note that for systems with many processes writing, it would be 845 *
797 more layout optimal to calculate the total number needed by all 846 * Note that for systems with many processes writing, it would be
798 levels and then to run reiserfs_new_blocks to get all of them at once. */ 847 * more layout optimal to calculate the total number needed by all
799 848 * levels and then to run reiserfs_new_blocks to get all of them at
800 /* Initiate number_of_freeblk to the amount acquired prior to the restart of 849 * once.
801 the analysis or 0 if not restarted, then subtract the amount needed 850 */
802 by all of the levels of the tree below h. */ 851
852 /*
853 * Initiate number_of_freeblk to the amount acquired prior to the
854 * restart of the analysis or 0 if not restarted, then subtract the
855 * amount needed by all of the levels of the tree below h.
856 */
803 /* blknum includes S[h], so we subtract 1 in this calculation */ 857 /* blknum includes S[h], so we subtract 1 in this calculation */
804 for (counter = 0, number_of_freeblk = tb->cur_blknum; 858 for (counter = 0, number_of_freeblk = tb->cur_blknum;
805 counter < h; counter++) 859 counter < h; counter++)
@@ -810,13 +864,19 @@ static int get_empty_nodes(struct tree_balance *tb, int h)
810 /* Allocate missing empty blocks. */ 864 /* Allocate missing empty blocks. */
811 /* if Sh == 0 then we are getting a new root */ 865 /* if Sh == 0 then we are getting a new root */
812 amount_needed = (Sh) ? (tb->blknum[h] - 1) : 1; 866 amount_needed = (Sh) ? (tb->blknum[h] - 1) : 1;
813 /* Amount_needed = the amount that we need more than the amount that we have. */ 867 /*
868 * Amount_needed = the amount that we need more than the
869 * amount that we have.
870 */
814 if (amount_needed > number_of_freeblk) 871 if (amount_needed > number_of_freeblk)
815 amount_needed -= number_of_freeblk; 872 amount_needed -= number_of_freeblk;
816 else /* If we have enough already then there is nothing to do. */ 873 else /* If we have enough already then there is nothing to do. */
817 return CARRY_ON; 874 return CARRY_ON;
818 875
819 /* No need to check quota - is not allocated for blocks used for formatted nodes */ 876 /*
877 * No need to check quota - is not allocated for blocks used
878 * for formatted nodes
879 */
820 if (reiserfs_new_form_blocknrs(tb, blocknrs, 880 if (reiserfs_new_form_blocknrs(tb, blocknrs,
821 amount_needed) == NO_DISK_SPACE) 881 amount_needed) == NO_DISK_SPACE)
822 return NO_DISK_SPACE; 882 return NO_DISK_SPACE;
@@ -849,8 +909,10 @@ static int get_empty_nodes(struct tree_balance *tb, int h)
849 return retval; 909 return retval;
850} 910}
851 911
852/* Get free space of the left neighbor, which is stored in the parent 912/*
853 * node of the left neighbor. */ 913 * Get free space of the left neighbor, which is stored in the parent
914 * node of the left neighbor.
915 */
854static int get_lfree(struct tree_balance *tb, int h) 916static int get_lfree(struct tree_balance *tb, int h)
855{ 917{
856 struct buffer_head *l, *f; 918 struct buffer_head *l, *f;
@@ -870,7 +932,8 @@ static int get_lfree(struct tree_balance *tb, int h)
870 return (MAX_CHILD_SIZE(f) - dc_size(B_N_CHILD(f, order))); 932 return (MAX_CHILD_SIZE(f) - dc_size(B_N_CHILD(f, order)));
871} 933}
872 934
873/* Get free space of the right neighbor, 935/*
936 * Get free space of the right neighbor,
874 * which is stored in the parent node of the right neighbor. 937 * which is stored in the parent node of the right neighbor.
875 */ 938 */
876static int get_rfree(struct tree_balance *tb, int h) 939static int get_rfree(struct tree_balance *tb, int h)
@@ -916,7 +979,10 @@ static int is_left_neighbor_in_cache(struct tree_balance *tb, int h)
916 "vs-8165: F[h] (%b) or FL[h] (%b) is invalid", 979 "vs-8165: F[h] (%b) or FL[h] (%b) is invalid",
917 father, tb->FL[h]); 980 father, tb->FL[h]);
918 981
919 /* Get position of the pointer to the left neighbor into the left father. */ 982 /*
983 * Get position of the pointer to the left neighbor
984 * into the left father.
985 */
920 left_neighbor_position = (father == tb->FL[h]) ? 986 left_neighbor_position = (father == tb->FL[h]) ?
921 tb->lkey[h] : B_NR_ITEMS(tb->FL[h]); 987 tb->lkey[h] : B_NR_ITEMS(tb->FL[h]);
922 /* Get left neighbor block number. */ 988 /* Get left neighbor block number. */
@@ -940,17 +1006,20 @@ static int is_left_neighbor_in_cache(struct tree_balance *tb, int h)
940 1006
941static void decrement_key(struct cpu_key *key) 1007static void decrement_key(struct cpu_key *key)
942{ 1008{
943 // call item specific function for this key 1009 /* call item specific function for this key */
944 item_ops[cpu_key_k_type(key)]->decrement_key(key); 1010 item_ops[cpu_key_k_type(key)]->decrement_key(key);
945} 1011}
946 1012
947/* Calculate far left/right parent of the left/right neighbor of the current node, that 1013/*
948 * is calculate the left/right (FL[h]/FR[h]) neighbor of the parent F[h]. 1014 * Calculate far left/right parent of the left/right neighbor of the
1015 * current node, that is calculate the left/right (FL[h]/FR[h]) neighbor
1016 * of the parent F[h].
949 * Calculate left/right common parent of the current node and L[h]/R[h]. 1017 * Calculate left/right common parent of the current node and L[h]/R[h].
950 * Calculate left/right delimiting key position. 1018 * Calculate left/right delimiting key position.
951 * Returns: PATH_INCORRECT - path in the tree is not correct; 1019 * Returns: PATH_INCORRECT - path in the tree is not correct
952 SCHEDULE_OCCURRED - schedule occurred while the function worked; 1020 * SCHEDULE_OCCURRED - schedule occurred while the function worked
953 * CARRY_ON - schedule didn't occur while the function worked; 1021 * CARRY_ON - schedule didn't occur while the function
1022 * worked
954 */ 1023 */
955static int get_far_parent(struct tree_balance *tb, 1024static int get_far_parent(struct tree_balance *tb,
956 int h, 1025 int h,
@@ -966,8 +1035,10 @@ static int get_far_parent(struct tree_balance *tb,
966 first_last_position = 0, 1035 first_last_position = 0,
967 path_offset = PATH_H_PATH_OFFSET(path, h); 1036 path_offset = PATH_H_PATH_OFFSET(path, h);
968 1037
969 /* Starting from F[h] go upwards in the tree, and look for the common 1038 /*
970 ancestor of F[h], and its neighbor l/r, that should be obtained. */ 1039 * Starting from F[h] go upwards in the tree, and look for the common
1040 * ancestor of F[h], and its neighbor l/r, that should be obtained.
1041 */
971 1042
972 counter = path_offset; 1043 counter = path_offset;
973 1044
@@ -975,21 +1046,33 @@ static int get_far_parent(struct tree_balance *tb,
975 "PAP-8180: invalid path length"); 1046 "PAP-8180: invalid path length");
976 1047
977 for (; counter > FIRST_PATH_ELEMENT_OFFSET; counter--) { 1048 for (; counter > FIRST_PATH_ELEMENT_OFFSET; counter--) {
978 /* Check whether parent of the current buffer in the path is really parent in the tree. */ 1049 /*
1050 * Check whether parent of the current buffer in the path
1051 * is really parent in the tree.
1052 */
979 if (!B_IS_IN_TREE 1053 if (!B_IS_IN_TREE
980 (parent = PATH_OFFSET_PBUFFER(path, counter - 1))) 1054 (parent = PATH_OFFSET_PBUFFER(path, counter - 1)))
981 return REPEAT_SEARCH; 1055 return REPEAT_SEARCH;
1056
982 /* Check whether position in the parent is correct. */ 1057 /* Check whether position in the parent is correct. */
983 if ((position = 1058 if ((position =
984 PATH_OFFSET_POSITION(path, 1059 PATH_OFFSET_POSITION(path,
985 counter - 1)) > 1060 counter - 1)) >
986 B_NR_ITEMS(parent)) 1061 B_NR_ITEMS(parent))
987 return REPEAT_SEARCH; 1062 return REPEAT_SEARCH;
988 /* Check whether parent at the path really points to the child. */ 1063
1064 /*
1065 * Check whether parent at the path really points
1066 * to the child.
1067 */
989 if (B_N_CHILD_NUM(parent, position) != 1068 if (B_N_CHILD_NUM(parent, position) !=
990 PATH_OFFSET_PBUFFER(path, counter)->b_blocknr) 1069 PATH_OFFSET_PBUFFER(path, counter)->b_blocknr)
991 return REPEAT_SEARCH; 1070 return REPEAT_SEARCH;
992 /* Return delimiting key if position in the parent is not equal to first/last one. */ 1071
1072 /*
1073 * Return delimiting key if position in the parent is not
1074 * equal to first/last one.
1075 */
993 if (c_lr_par == RIGHT_PARENTS) 1076 if (c_lr_par == RIGHT_PARENTS)
994 first_last_position = B_NR_ITEMS(parent); 1077 first_last_position = B_NR_ITEMS(parent);
995 if (position != first_last_position) { 1078 if (position != first_last_position) {
@@ -1002,7 +1085,10 @@ static int get_far_parent(struct tree_balance *tb,
1002 1085
1003 /* if we are in the root of the tree, then there is no common father */ 1086 /* if we are in the root of the tree, then there is no common father */
1004 if (counter == FIRST_PATH_ELEMENT_OFFSET) { 1087 if (counter == FIRST_PATH_ELEMENT_OFFSET) {
1005 /* Check whether first buffer in the path is the root of the tree. */ 1088 /*
1089 * Check whether first buffer in the path is the
1090 * root of the tree.
1091 */
1006 if (PATH_OFFSET_PBUFFER 1092 if (PATH_OFFSET_PBUFFER
1007 (tb->tb_path, 1093 (tb->tb_path,
1008 FIRST_PATH_ELEMENT_OFFSET)->b_blocknr == 1094 FIRST_PATH_ELEMENT_OFFSET)->b_blocknr ==
@@ -1031,8 +1117,11 @@ static int get_far_parent(struct tree_balance *tb,
1031 } 1117 }
1032 } 1118 }
1033 1119
1034 /* So, we got common parent of the current node and its left/right neighbor. 1120 /*
1035 Now we are geting the parent of the left/right neighbor. */ 1121 * So, we got common parent of the current node and its
1122 * left/right neighbor. Now we are getting the parent of the
1123 * left/right neighbor.
1124 */
1036 1125
1037 /* Form key to get parent of the left/right neighbor. */ 1126 /* Form key to get parent of the left/right neighbor. */
1038 le_key2cpu_key(&s_lr_father_key, 1127 le_key2cpu_key(&s_lr_father_key,
@@ -1050,7 +1139,7 @@ static int get_far_parent(struct tree_balance *tb,
1050 if (search_by_key 1139 if (search_by_key
1051 (tb->tb_sb, &s_lr_father_key, &s_path_to_neighbor_father, 1140 (tb->tb_sb, &s_lr_father_key, &s_path_to_neighbor_father,
1052 h + 1) == IO_ERROR) 1141 h + 1) == IO_ERROR)
1053 // path is released 1142 /* path is released */
1054 return IO_ERROR; 1143 return IO_ERROR;
1055 1144
1056 if (FILESYSTEM_CHANGED_TB(tb)) { 1145 if (FILESYSTEM_CHANGED_TB(tb)) {
@@ -1071,12 +1160,15 @@ static int get_far_parent(struct tree_balance *tb,
1071 return CARRY_ON; 1160 return CARRY_ON;
1072} 1161}
1073 1162
1074/* Get parents of neighbors of node in the path(S[path_offset]) and common parents of 1163/*
1075 * S[path_offset] and L[path_offset]/R[path_offset]: F[path_offset], FL[path_offset], 1164 * Get parents of neighbors of node in the path(S[path_offset]) and
1076 * FR[path_offset], CFL[path_offset], CFR[path_offset]. 1165 * common parents of S[path_offset] and L[path_offset]/R[path_offset]:
1077 * Calculate numbers of left and right delimiting keys position: lkey[path_offset], rkey[path_offset]. 1166 * F[path_offset], FL[path_offset], FR[path_offset], CFL[path_offset],
1078 * Returns: SCHEDULE_OCCURRED - schedule occurred while the function worked; 1167 * CFR[path_offset].
1079 * CARRY_ON - schedule didn't occur while the function worked; 1168 * Calculate numbers of left and right delimiting keys position:
1169 * lkey[path_offset], rkey[path_offset].
1170 * Returns: SCHEDULE_OCCURRED - schedule occurred while the function worked
1171 * CARRY_ON - schedule didn't occur while the function worked
1080 */ 1172 */
1081static int get_parents(struct tree_balance *tb, int h) 1173static int get_parents(struct tree_balance *tb, int h)
1082{ 1174{
@@ -1088,8 +1180,11 @@ static int get_parents(struct tree_balance *tb, int h)
1088 1180
1089 /* Current node is the root of the tree or will be root of the tree */ 1181 /* Current node is the root of the tree or will be root of the tree */
1090 if (path_offset <= FIRST_PATH_ELEMENT_OFFSET) { 1182 if (path_offset <= FIRST_PATH_ELEMENT_OFFSET) {
1091 /* The root can not have parents. 1183 /*
1092 Release nodes which previously were obtained as parents of the current node neighbors. */ 1184 * The root can not have parents.
1185 * Release nodes which previously were obtained as
1186 * parents of the current node neighbors.
1187 */
1093 brelse(tb->FL[h]); 1188 brelse(tb->FL[h]);
1094 brelse(tb->CFL[h]); 1189 brelse(tb->CFL[h]);
1095 brelse(tb->FR[h]); 1190 brelse(tb->FR[h]);
@@ -1111,10 +1206,14 @@ static int get_parents(struct tree_balance *tb, int h)
1111 get_bh(curf); 1206 get_bh(curf);
1112 tb->lkey[h] = position - 1; 1207 tb->lkey[h] = position - 1;
1113 } else { 1208 } else {
1114 /* Calculate current parent of L[path_offset], which is the left neighbor of the current node. 1209 /*
1115 Calculate current common parent of L[path_offset] and the current node. Note that 1210 * Calculate current parent of L[path_offset], which is the
1116 CFL[path_offset] not equal FL[path_offset] and CFL[path_offset] not equal F[path_offset]. 1211 * left neighbor of the current node. Calculate current
1117 Calculate lkey[path_offset]. */ 1212 * common parent of L[path_offset] and the current node.
1213 * Note that CFL[path_offset] not equal FL[path_offset] and
1214 * CFL[path_offset] not equal F[path_offset].
1215 * Calculate lkey[path_offset].
1216 */
1118 if ((ret = get_far_parent(tb, h + 1, &curf, 1217 if ((ret = get_far_parent(tb, h + 1, &curf,
1119 &curcf, 1218 &curcf,
1120 LEFT_PARENTS)) != CARRY_ON) 1219 LEFT_PARENTS)) != CARRY_ON)
@@ -1130,19 +1229,22 @@ static int get_parents(struct tree_balance *tb, int h)
1130 (curcf && !B_IS_IN_TREE(curcf)), 1229 (curcf && !B_IS_IN_TREE(curcf)),
1131 "PAP-8195: FL (%b) or CFL (%b) is invalid", curf, curcf); 1230 "PAP-8195: FL (%b) or CFL (%b) is invalid", curf, curcf);
1132 1231
1133/* Get parent FR[h] of R[h]. */ 1232 /* Get parent FR[h] of R[h]. */
1134 1233
1135/* Current node is the last child of F[h]. FR[h] != F[h]. */ 1234 /* Current node is the last child of F[h]. FR[h] != F[h]. */
1136 if (position == B_NR_ITEMS(PATH_H_PBUFFER(path, h + 1))) { 1235 if (position == B_NR_ITEMS(PATH_H_PBUFFER(path, h + 1))) {
1137/* Calculate current parent of R[h], which is the right neighbor of F[h]. 1236 /*
1138 Calculate current common parent of R[h] and current node. Note that CFR[h] 1237 * Calculate current parent of R[h], which is the right
1139 not equal FR[path_offset] and CFR[h] not equal F[h]. */ 1238 * neighbor of F[h]. Calculate current common parent of
1239 * R[h] and current node. Note that CFR[h] not equal
1240 * FR[path_offset] and CFR[h] not equal F[h].
1241 */
1140 if ((ret = 1242 if ((ret =
1141 get_far_parent(tb, h + 1, &curf, &curcf, 1243 get_far_parent(tb, h + 1, &curf, &curcf,
1142 RIGHT_PARENTS)) != CARRY_ON) 1244 RIGHT_PARENTS)) != CARRY_ON)
1143 return ret; 1245 return ret;
1144 } else { 1246 } else {
1145/* Current node is not the last child of its parent F[h]. */ 1247 /* Current node is not the last child of its parent F[h]. */
1146 curf = PATH_OFFSET_PBUFFER(path, path_offset - 1); 1248 curf = PATH_OFFSET_PBUFFER(path, path_offset - 1);
1147 curcf = PATH_OFFSET_PBUFFER(path, path_offset - 1); 1249 curcf = PATH_OFFSET_PBUFFER(path, path_offset - 1);
1148 get_bh(curf); 1250 get_bh(curf);
@@ -1165,8 +1267,10 @@ static int get_parents(struct tree_balance *tb, int h)
1165 return CARRY_ON; 1267 return CARRY_ON;
1166} 1268}
1167 1269
1168/* it is possible to remove node as result of shiftings to 1270/*
1169 neighbors even when we insert or paste item. */ 1271 * it is possible to remove node as result of shiftings to
1272 * neighbors even when we insert or paste item.
1273 */
1170static inline int can_node_be_removed(int mode, int lfree, int sfree, int rfree, 1274static inline int can_node_be_removed(int mode, int lfree, int sfree, int rfree,
1171 struct tree_balance *tb, int h) 1275 struct tree_balance *tb, int h)
1172{ 1276{
@@ -1189,7 +1293,8 @@ static inline int can_node_be_removed(int mode, int lfree, int sfree, int rfree,
1189 && op_is_left_mergeable(r_key, Sh->b_size)) ? IH_SIZE : 0) 1293 && op_is_left_mergeable(r_key, Sh->b_size)) ? IH_SIZE : 0)
1190 + ((h) ? KEY_SIZE : 0)) { 1294 + ((h) ? KEY_SIZE : 0)) {
1191 /* node can not be removed */ 1295 /* node can not be removed */
1192 if (sfree >= levbytes) { /* new item fits into node S[h] without any shifting */ 1296 if (sfree >= levbytes) {
1297 /* new item fits into node S[h] without any shifting */
1193 if (!h) 1298 if (!h)
1194 tb->s0num = 1299 tb->s0num =
1195 B_NR_ITEMS(Sh) + 1300 B_NR_ITEMS(Sh) +
@@ -1202,7 +1307,8 @@ static inline int can_node_be_removed(int mode, int lfree, int sfree, int rfree,
1202 return !NO_BALANCING_NEEDED; 1307 return !NO_BALANCING_NEEDED;
1203} 1308}
1204 1309
1205/* Check whether current node S[h] is balanced when increasing its size by 1310/*
1311 * Check whether current node S[h] is balanced when increasing its size by
1206 * Inserting or Pasting. 1312 * Inserting or Pasting.
1207 * Calculate parameters for balancing for current level h. 1313 * Calculate parameters for balancing for current level h.
1208 * Parameters: 1314 * Parameters:
@@ -1219,39 +1325,48 @@ static inline int can_node_be_removed(int mode, int lfree, int sfree, int rfree,
1219static int ip_check_balance(struct tree_balance *tb, int h) 1325static int ip_check_balance(struct tree_balance *tb, int h)
1220{ 1326{
1221 struct virtual_node *vn = tb->tb_vn; 1327 struct virtual_node *vn = tb->tb_vn;
1222 int levbytes, /* Number of bytes that must be inserted into (value 1328 /*
1223 is negative if bytes are deleted) buffer which 1329 * Number of bytes that must be inserted into (value is negative
1224 contains node being balanced. The mnemonic is 1330 * if bytes are deleted) buffer which contains node being balanced.
1225 that the attempted change in node space used level 1331 * The mnemonic is that the attempted change in node space used
1226 is levbytes bytes. */ 1332 * level is levbytes bytes.
1227 ret; 1333 */
1334 int levbytes;
1335 int ret;
1228 1336
1229 int lfree, sfree, rfree /* free space in L, S and R */ ; 1337 int lfree, sfree, rfree /* free space in L, S and R */ ;
1230 1338
1231 /* nver is short for number of vertixes, and lnver is the number if 1339 /*
1232 we shift to the left, rnver is the number if we shift to the 1340 * nver is short for number of vertixes, and lnver is the number if
1233 right, and lrnver is the number if we shift in both directions. 1341 * we shift to the left, rnver is the number if we shift to the
1234 The goal is to minimize first the number of vertixes, and second, 1342 * right, and lrnver is the number if we shift in both directions.
1235 the number of vertixes whose contents are changed by shifting, 1343 * The goal is to minimize first the number of vertixes, and second,
1236 and third the number of uncached vertixes whose contents are 1344 * the number of vertixes whose contents are changed by shifting,
1237 changed by shifting and must be read from disk. */ 1345 * and third the number of uncached vertixes whose contents are
1346 * changed by shifting and must be read from disk.
1347 */
1238 int nver, lnver, rnver, lrnver; 1348 int nver, lnver, rnver, lrnver;
1239 1349
1240 /* used at leaf level only, S0 = S[0] is the node being balanced, 1350 /*
1241 sInum [ I = 0,1,2 ] is the number of items that will 1351 * used at leaf level only, S0 = S[0] is the node being balanced,
1242 remain in node SI after balancing. S1 and S2 are new 1352 * sInum [ I = 0,1,2 ] is the number of items that will
1243 nodes that might be created. */ 1353 * remain in node SI after balancing. S1 and S2 are new
1354 * nodes that might be created.
1355 */
1244 1356
1245 /* we perform 8 calls to get_num_ver(). For each call we calculate five parameters. 1357 /*
1246 where 4th parameter is s1bytes and 5th - s2bytes 1358 * we perform 8 calls to get_num_ver(). For each call we
1359 * calculate five parameters. where 4th parameter is s1bytes
1360 * and 5th - s2bytes
1361 *
1362 * s0num, s1num, s2num for 8 cases
1363 * 0,1 - do not shift and do not shift but bottle
1364 * 2 - shift only whole item to left
1365 * 3 - shift to left and bottle as much as possible
1366 * 4,5 - shift to right (whole items and as much as possible
1367 * 6,7 - shift to both directions (whole items and as much as possible)
1247 */ 1368 */
1248 short snum012[40] = { 0, }; /* s0num, s1num, s2num for 8 cases 1369 short snum012[40] = { 0, };
1249 0,1 - do not shift and do not shift but bottle
1250 2 - shift only whole item to left
1251 3 - shift to left and bottle as much as possible
1252 4,5 - shift to right (whole items and as much as possible
1253 6,7 - shift to both directions (whole items and as much as possible)
1254 */
1255 1370
1256 /* Sh is the node whose balance is currently being checked */ 1371 /* Sh is the node whose balance is currently being checked */
1257 struct buffer_head *Sh; 1372 struct buffer_head *Sh;
@@ -1265,9 +1380,10 @@ static int ip_check_balance(struct tree_balance *tb, int h)
1265 reiserfs_panic(tb->tb_sb, "vs-8210", 1380 reiserfs_panic(tb->tb_sb, "vs-8210",
1266 "S[0] can not be 0"); 1381 "S[0] can not be 0");
1267 switch (ret = get_empty_nodes(tb, h)) { 1382 switch (ret = get_empty_nodes(tb, h)) {
1383 /* no balancing for higher levels needed */
1268 case CARRY_ON: 1384 case CARRY_ON:
1269 set_parameters(tb, h, 0, 0, 1, NULL, -1, -1); 1385 set_parameters(tb, h, 0, 0, 1, NULL, -1, -1);
1270 return NO_BALANCING_NEEDED; /* no balancing for higher levels needed */ 1386 return NO_BALANCING_NEEDED;
1271 1387
1272 case NO_DISK_SPACE: 1388 case NO_DISK_SPACE:
1273 case REPEAT_SEARCH: 1389 case REPEAT_SEARCH:
@@ -1278,7 +1394,9 @@ static int ip_check_balance(struct tree_balance *tb, int h)
1278 } 1394 }
1279 } 1395 }
1280 1396
1281 if ((ret = get_parents(tb, h)) != CARRY_ON) /* get parents of S[h] neighbors. */ 1397 /* get parents of S[h] neighbors. */
1398 ret = get_parents(tb, h);
1399 if (ret != CARRY_ON)
1282 return ret; 1400 return ret;
1283 1401
1284 sfree = B_FREE_SPACE(Sh); 1402 sfree = B_FREE_SPACE(Sh);
@@ -1287,38 +1405,44 @@ static int ip_check_balance(struct tree_balance *tb, int h)
1287 rfree = get_rfree(tb, h); 1405 rfree = get_rfree(tb, h);
1288 lfree = get_lfree(tb, h); 1406 lfree = get_lfree(tb, h);
1289 1407
1408 /* and new item fits into node S[h] without any shifting */
1290 if (can_node_be_removed(vn->vn_mode, lfree, sfree, rfree, tb, h) == 1409 if (can_node_be_removed(vn->vn_mode, lfree, sfree, rfree, tb, h) ==
1291 NO_BALANCING_NEEDED) 1410 NO_BALANCING_NEEDED)
1292 /* and new item fits into node S[h] without any shifting */
1293 return NO_BALANCING_NEEDED; 1411 return NO_BALANCING_NEEDED;
1294 1412
1295 create_virtual_node(tb, h); 1413 create_virtual_node(tb, h);
1296 1414
1297 /* 1415 /*
1298 determine maximal number of items we can shift to the left neighbor (in tb structure) 1416 * determine maximal number of items we can shift to the left
1299 and the maximal number of bytes that can flow to the left neighbor 1417 * neighbor (in tb structure) and the maximal number of bytes
1300 from the left most liquid item that cannot be shifted from S[0] entirely (returned value) 1418 * that can flow to the left neighbor from the left most liquid
1419 * item that cannot be shifted from S[0] entirely (returned value)
1301 */ 1420 */
1302 check_left(tb, h, lfree); 1421 check_left(tb, h, lfree);
1303 1422
1304 /* 1423 /*
1305 determine maximal number of items we can shift to the right neighbor (in tb structure) 1424 * determine maximal number of items we can shift to the right
1306 and the maximal number of bytes that can flow to the right neighbor 1425 * neighbor (in tb structure) and the maximal number of bytes
1307 from the right most liquid item that cannot be shifted from S[0] entirely (returned value) 1426 * that can flow to the right neighbor from the right most liquid
1427 * item that cannot be shifted from S[0] entirely (returned value)
1308 */ 1428 */
1309 check_right(tb, h, rfree); 1429 check_right(tb, h, rfree);
1310 1430
1311 /* all contents of internal node S[h] can be moved into its 1431 /*
1312 neighbors, S[h] will be removed after balancing */ 1432 * all contents of internal node S[h] can be moved into its
1433 * neighbors, S[h] will be removed after balancing
1434 */
1313 if (h && (tb->rnum[h] + tb->lnum[h] >= vn->vn_nr_item + 1)) { 1435 if (h && (tb->rnum[h] + tb->lnum[h] >= vn->vn_nr_item + 1)) {
1314 int to_r; 1436 int to_r;
1315 1437
1316 /* Since we are working on internal nodes, and our internal 1438 /*
1317 nodes have fixed size entries, then we can balance by the 1439 * Since we are working on internal nodes, and our internal
1318 number of items rather than the space they consume. In this 1440 * nodes have fixed size entries, then we can balance by the
1319 routine we set the left node equal to the right node, 1441 * number of items rather than the space they consume. In this
1320 allowing a difference of less than or equal to 1 child 1442 * routine we set the left node equal to the right node,
1321 pointer. */ 1443 * allowing a difference of less than or equal to 1 child
1444 * pointer.
1445 */
1322 to_r = 1446 to_r =
1323 ((MAX_NR_KEY(Sh) << 1) + 2 - tb->lnum[h] - tb->rnum[h] + 1447 ((MAX_NR_KEY(Sh) << 1) + 2 - tb->lnum[h] - tb->rnum[h] +
1324 vn->vn_nr_item + 1) / 2 - (MAX_NR_KEY(Sh) + 1 - 1448 vn->vn_nr_item + 1) / 2 - (MAX_NR_KEY(Sh) + 1 -
@@ -1328,7 +1452,10 @@ static int ip_check_balance(struct tree_balance *tb, int h)
1328 return CARRY_ON; 1452 return CARRY_ON;
1329 } 1453 }
1330 1454
1331 /* this checks balance condition, that any two neighboring nodes can not fit in one node */ 1455 /*
1456 * this checks balance condition, that any two neighboring nodes
1457 * can not fit in one node
1458 */
1332 RFALSE(h && 1459 RFALSE(h &&
1333 (tb->lnum[h] >= vn->vn_nr_item + 1 || 1460 (tb->lnum[h] >= vn->vn_nr_item + 1 ||
1334 tb->rnum[h] >= vn->vn_nr_item + 1), 1461 tb->rnum[h] >= vn->vn_nr_item + 1),
@@ -1337,16 +1464,22 @@ static int ip_check_balance(struct tree_balance *tb, int h)
1337 (tb->rnum[h] >= vn->vn_nr_item && (tb->rbytes == -1))), 1464 (tb->rnum[h] >= vn->vn_nr_item && (tb->rbytes == -1))),
1338 "vs-8225: tree is not balanced on leaf level"); 1465 "vs-8225: tree is not balanced on leaf level");
1339 1466
1340 /* all contents of S[0] can be moved into its neighbors 1467 /*
1341 S[0] will be removed after balancing. */ 1468 * all contents of S[0] can be moved into its neighbors
1469 * S[0] will be removed after balancing.
1470 */
1342 if (!h && is_leaf_removable(tb)) 1471 if (!h && is_leaf_removable(tb))
1343 return CARRY_ON; 1472 return CARRY_ON;
1344 1473
1345 /* why do we perform this check here rather than earlier?? 1474 /*
1346 Answer: we can win 1 node in some cases above. Moreover we 1475 * why do we perform this check here rather than earlier??
1347 checked it above, when we checked, that S[0] is not removable 1476 * Answer: we can win 1 node in some cases above. Moreover we
1348 in principle */ 1477 * checked it above, when we checked, that S[0] is not removable
1349 if (sfree >= levbytes) { /* new item fits into node S[h] without any shifting */ 1478 * in principle
1479 */
1480
1481 /* new item fits into node S[h] without any shifting */
1482 if (sfree >= levbytes) {
1350 if (!h) 1483 if (!h)
1351 tb->s0num = vn->vn_nr_item; 1484 tb->s0num = vn->vn_nr_item;
1352 set_parameters(tb, h, 0, 0, 1, NULL, -1, -1); 1485 set_parameters(tb, h, 0, 0, 1, NULL, -1, -1);
@@ -1355,18 +1488,19 @@ static int ip_check_balance(struct tree_balance *tb, int h)
1355 1488
1356 { 1489 {
1357 int lpar, rpar, nset, lset, rset, lrset; 1490 int lpar, rpar, nset, lset, rset, lrset;
1358 /* 1491 /* regular overflowing of the node */
1359 * regular overflowing of the node
1360 */
1361 1492
1362 /* get_num_ver works in 2 modes (FLOW & NO_FLOW) 1493 /*
1363 lpar, rpar - number of items we can shift to left/right neighbor (including splitting item) 1494 * get_num_ver works in 2 modes (FLOW & NO_FLOW)
1364 nset, lset, rset, lrset - shows, whether flowing items give better packing 1495 * lpar, rpar - number of items we can shift to left/right
1496 * neighbor (including splitting item)
1497 * nset, lset, rset, lrset - shows, whether flowing items
1498 * give better packing
1365 */ 1499 */
1366#define FLOW 1 1500#define FLOW 1
1367#define NO_FLOW 0 /* do not any splitting */ 1501#define NO_FLOW 0 /* do not any splitting */
1368 1502
1369 /* we choose one the following */ 1503 /* we choose one of the following */
1370#define NOTHING_SHIFT_NO_FLOW 0 1504#define NOTHING_SHIFT_NO_FLOW 0
1371#define NOTHING_SHIFT_FLOW 5 1505#define NOTHING_SHIFT_FLOW 5
1372#define LEFT_SHIFT_NO_FLOW 10 1506#define LEFT_SHIFT_NO_FLOW 10
@@ -1379,10 +1513,13 @@ static int ip_check_balance(struct tree_balance *tb, int h)
1379 lpar = tb->lnum[h]; 1513 lpar = tb->lnum[h];
1380 rpar = tb->rnum[h]; 1514 rpar = tb->rnum[h];
1381 1515
1382 /* calculate number of blocks S[h] must be split into when 1516 /*
1383 nothing is shifted to the neighbors, 1517 * calculate number of blocks S[h] must be split into when
1384 as well as number of items in each part of the split node (s012 numbers), 1518 * nothing is shifted to the neighbors, as well as number of
1385 and number of bytes (s1bytes) of the shared drop which flow to S1 if any */ 1519 * items in each part of the split node (s012 numbers),
1520 * and number of bytes (s1bytes) of the shared drop which
1521 * flow to S1 if any
1522 */
1386 nset = NOTHING_SHIFT_NO_FLOW; 1523 nset = NOTHING_SHIFT_NO_FLOW;
1387 nver = get_num_ver(vn->vn_mode, tb, h, 1524 nver = get_num_ver(vn->vn_mode, tb, h,
1388 0, -1, h ? vn->vn_nr_item : 0, -1, 1525 0, -1, h ? vn->vn_nr_item : 0, -1,
@@ -1391,7 +1528,10 @@ static int ip_check_balance(struct tree_balance *tb, int h)
1391 if (!h) { 1528 if (!h) {
1392 int nver1; 1529 int nver1;
1393 1530
1394 /* note, that in this case we try to bottle between S[0] and S1 (S1 - the first new node) */ 1531 /*
1532 * note, that in this case we try to bottle
1533 * between S[0] and S1 (S1 - the first new node)
1534 */
1395 nver1 = get_num_ver(vn->vn_mode, tb, h, 1535 nver1 = get_num_ver(vn->vn_mode, tb, h,
1396 0, -1, 0, -1, 1536 0, -1, 0, -1,
1397 snum012 + NOTHING_SHIFT_FLOW, FLOW); 1537 snum012 + NOTHING_SHIFT_FLOW, FLOW);
@@ -1399,11 +1539,13 @@ static int ip_check_balance(struct tree_balance *tb, int h)
1399 nset = NOTHING_SHIFT_FLOW, nver = nver1; 1539 nset = NOTHING_SHIFT_FLOW, nver = nver1;
1400 } 1540 }
1401 1541
1402 /* calculate number of blocks S[h] must be split into when 1542 /*
1403 l_shift_num first items and l_shift_bytes of the right most 1543 * calculate number of blocks S[h] must be split into when
1404 liquid item to be shifted are shifted to the left neighbor, 1544 * l_shift_num first items and l_shift_bytes of the right
1405 as well as number of items in each part of the splitted node (s012 numbers), 1545 * most liquid item to be shifted are shifted to the left
1406 and number of bytes (s1bytes) of the shared drop which flow to S1 if any 1546 * neighbor, as well as number of items in each part of the
1547 * splitted node (s012 numbers), and number of bytes
1548 * (s1bytes) of the shared drop which flow to S1 if any
1407 */ 1549 */
1408 lset = LEFT_SHIFT_NO_FLOW; 1550 lset = LEFT_SHIFT_NO_FLOW;
1409 lnver = get_num_ver(vn->vn_mode, tb, h, 1551 lnver = get_num_ver(vn->vn_mode, tb, h,
@@ -1422,11 +1564,13 @@ static int ip_check_balance(struct tree_balance *tb, int h)
1422 lset = LEFT_SHIFT_FLOW, lnver = lnver1; 1564 lset = LEFT_SHIFT_FLOW, lnver = lnver1;
1423 } 1565 }
1424 1566
1425 /* calculate number of blocks S[h] must be split into when 1567 /*
1426 r_shift_num first items and r_shift_bytes of the left most 1568 * calculate number of blocks S[h] must be split into when
1427 liquid item to be shifted are shifted to the right neighbor, 1569 * r_shift_num first items and r_shift_bytes of the left most
1428 as well as number of items in each part of the splitted node (s012 numbers), 1570 * liquid item to be shifted are shifted to the right neighbor,
1429 and number of bytes (s1bytes) of the shared drop which flow to S1 if any 1571 * as well as number of items in each part of the splitted
1572 * node (s012 numbers), and number of bytes (s1bytes) of the
1573 * shared drop which flow to S1 if any
1430 */ 1574 */
1431 rset = RIGHT_SHIFT_NO_FLOW; 1575 rset = RIGHT_SHIFT_NO_FLOW;
1432 rnver = get_num_ver(vn->vn_mode, tb, h, 1576 rnver = get_num_ver(vn->vn_mode, tb, h,
@@ -1451,10 +1595,12 @@ static int ip_check_balance(struct tree_balance *tb, int h)
1451 rset = RIGHT_SHIFT_FLOW, rnver = rnver1; 1595 rset = RIGHT_SHIFT_FLOW, rnver = rnver1;
1452 } 1596 }
1453 1597
1454 /* calculate number of blocks S[h] must be split into when 1598 /*
1455 items are shifted in both directions, 1599 * calculate number of blocks S[h] must be split into when
1456 as well as number of items in each part of the splitted node (s012 numbers), 1600 * items are shifted in both directions, as well as number
1457 and number of bytes (s1bytes) of the shared drop which flow to S1 if any 1601 * of items in each part of the splitted node (s012 numbers),
1602 * and number of bytes (s1bytes) of the shared drop which
1603 * flow to S1 if any
1458 */ 1604 */
1459 lrset = LR_SHIFT_NO_FLOW; 1605 lrset = LR_SHIFT_NO_FLOW;
1460 lrnver = get_num_ver(vn->vn_mode, tb, h, 1606 lrnver = get_num_ver(vn->vn_mode, tb, h,
@@ -1481,10 +1627,12 @@ static int ip_check_balance(struct tree_balance *tb, int h)
1481 lrset = LR_SHIFT_FLOW, lrnver = lrnver1; 1627 lrset = LR_SHIFT_FLOW, lrnver = lrnver1;
1482 } 1628 }
1483 1629
1484 /* Our general shifting strategy is: 1630 /*
1485 1) to minimized number of new nodes; 1631 * Our general shifting strategy is:
1486 2) to minimized number of neighbors involved in shifting; 1632 * 1) to minimized number of new nodes;
1487 3) to minimized number of disk reads; */ 1633 * 2) to minimized number of neighbors involved in shifting;
1634 * 3) to minimized number of disk reads;
1635 */
1488 1636
1489 /* we can win TWO or ONE nodes by shifting in both directions */ 1637 /* we can win TWO or ONE nodes by shifting in both directions */
1490 if (lrnver < lnver && lrnver < rnver) { 1638 if (lrnver < lnver && lrnver < rnver) {
@@ -1508,42 +1656,59 @@ static int ip_check_balance(struct tree_balance *tb, int h)
1508 return CARRY_ON; 1656 return CARRY_ON;
1509 } 1657 }
1510 1658
1511 /* if shifting doesn't lead to better packing then don't shift */ 1659 /*
1660 * if shifting doesn't lead to better packing
1661 * then don't shift
1662 */
1512 if (nver == lrnver) { 1663 if (nver == lrnver) {
1513 set_parameters(tb, h, 0, 0, nver, snum012 + nset, -1, 1664 set_parameters(tb, h, 0, 0, nver, snum012 + nset, -1,
1514 -1); 1665 -1);
1515 return CARRY_ON; 1666 return CARRY_ON;
1516 } 1667 }
1517 1668
1518 /* now we know that for better packing shifting in only one 1669 /*
1519 direction either to the left or to the right is required */ 1670 * now we know that for better packing shifting in only one
1671 * direction either to the left or to the right is required
1672 */
1520 1673
1521 /* if shifting to the left is better than shifting to the right */ 1674 /*
1675 * if shifting to the left is better than
1676 * shifting to the right
1677 */
1522 if (lnver < rnver) { 1678 if (lnver < rnver) {
1523 SET_PAR_SHIFT_LEFT; 1679 SET_PAR_SHIFT_LEFT;
1524 return CARRY_ON; 1680 return CARRY_ON;
1525 } 1681 }
1526 1682
1527 /* if shifting to the right is better than shifting to the left */ 1683 /*
1684 * if shifting to the right is better than
1685 * shifting to the left
1686 */
1528 if (lnver > rnver) { 1687 if (lnver > rnver) {
1529 SET_PAR_SHIFT_RIGHT; 1688 SET_PAR_SHIFT_RIGHT;
1530 return CARRY_ON; 1689 return CARRY_ON;
1531 } 1690 }
1532 1691
1533 /* now shifting in either direction gives the same number 1692 /*
1534 of nodes and we can make use of the cached neighbors */ 1693 * now shifting in either direction gives the same number
1694 * of nodes and we can make use of the cached neighbors
1695 */
1535 if (is_left_neighbor_in_cache(tb, h)) { 1696 if (is_left_neighbor_in_cache(tb, h)) {
1536 SET_PAR_SHIFT_LEFT; 1697 SET_PAR_SHIFT_LEFT;
1537 return CARRY_ON; 1698 return CARRY_ON;
1538 } 1699 }
1539 1700
1540 /* shift to the right independently on whether the right neighbor in cache or not */ 1701 /*
1702 * shift to the right independently on whether the
1703 * right neighbor in cache or not
1704 */
1541 SET_PAR_SHIFT_RIGHT; 1705 SET_PAR_SHIFT_RIGHT;
1542 return CARRY_ON; 1706 return CARRY_ON;
1543 } 1707 }
1544} 1708}
1545 1709
1546/* Check whether current node S[h] is balanced when Decreasing its size by 1710/*
1711 * Check whether current node S[h] is balanced when Decreasing its size by
1547 * Deleting or Cutting for INTERNAL node of S+tree. 1712 * Deleting or Cutting for INTERNAL node of S+tree.
1548 * Calculate parameters for balancing for current level h. 1713 * Calculate parameters for balancing for current level h.
1549 * Parameters: 1714 * Parameters:
@@ -1563,8 +1728,10 @@ static int dc_check_balance_internal(struct tree_balance *tb, int h)
1563{ 1728{
1564 struct virtual_node *vn = tb->tb_vn; 1729 struct virtual_node *vn = tb->tb_vn;
1565 1730
1566 /* Sh is the node whose balance is currently being checked, 1731 /*
1567 and Fh is its father. */ 1732 * Sh is the node whose balance is currently being checked,
1733 * and Fh is its father.
1734 */
1568 struct buffer_head *Sh, *Fh; 1735 struct buffer_head *Sh, *Fh;
1569 int maxsize, ret; 1736 int maxsize, ret;
1570 int lfree, rfree /* free space in L and R */ ; 1737 int lfree, rfree /* free space in L and R */ ;
@@ -1574,19 +1741,25 @@ static int dc_check_balance_internal(struct tree_balance *tb, int h)
1574 1741
1575 maxsize = MAX_CHILD_SIZE(Sh); 1742 maxsize = MAX_CHILD_SIZE(Sh);
1576 1743
1577/* using tb->insert_size[h], which is negative in this case, create_virtual_node calculates: */ 1744 /*
1578/* new_nr_item = number of items node would have if operation is */ 1745 * using tb->insert_size[h], which is negative in this case,
1579/* performed without balancing (new_nr_item); */ 1746 * create_virtual_node calculates:
1747 * new_nr_item = number of items node would have if operation is
1748 * performed without balancing (new_nr_item);
1749 */
1580 create_virtual_node(tb, h); 1750 create_virtual_node(tb, h);
1581 1751
1582 if (!Fh) { /* S[h] is the root. */ 1752 if (!Fh) { /* S[h] is the root. */
1753 /* no balancing for higher levels needed */
1583 if (vn->vn_nr_item > 0) { 1754 if (vn->vn_nr_item > 0) {
1584 set_parameters(tb, h, 0, 0, 1, NULL, -1, -1); 1755 set_parameters(tb, h, 0, 0, 1, NULL, -1, -1);
1585 return NO_BALANCING_NEEDED; /* no balancing for higher levels needed */ 1756 return NO_BALANCING_NEEDED;
1586 } 1757 }
1587 /* new_nr_item == 0. 1758 /*
1759 * new_nr_item == 0.
1588 * Current root will be deleted resulting in 1760 * Current root will be deleted resulting in
1589 * decrementing the tree height. */ 1761 * decrementing the tree height.
1762 */
1590 set_parameters(tb, h, 0, 0, 0, NULL, -1, -1); 1763 set_parameters(tb, h, 0, 0, 0, NULL, -1, -1);
1591 return CARRY_ON; 1764 return CARRY_ON;
1592 } 1765 }
@@ -1602,12 +1775,18 @@ static int dc_check_balance_internal(struct tree_balance *tb, int h)
1602 check_left(tb, h, lfree); 1775 check_left(tb, h, lfree);
1603 check_right(tb, h, rfree); 1776 check_right(tb, h, rfree);
1604 1777
1605 if (vn->vn_nr_item >= MIN_NR_KEY(Sh)) { /* Balance condition for the internal node is valid. 1778 /*
1606 * In this case we balance only if it leads to better packing. */ 1779 * Balance condition for the internal node is valid.
1607 if (vn->vn_nr_item == MIN_NR_KEY(Sh)) { /* Here we join S[h] with one of its neighbors, 1780 * In this case we balance only if it leads to better packing.
1608 * which is impossible with greater values of new_nr_item. */ 1781 */
1782 if (vn->vn_nr_item >= MIN_NR_KEY(Sh)) {
1783 /*
1784 * Here we join S[h] with one of its neighbors,
1785 * which is impossible with greater values of new_nr_item.
1786 */
1787 if (vn->vn_nr_item == MIN_NR_KEY(Sh)) {
1788 /* All contents of S[h] can be moved to L[h]. */
1609 if (tb->lnum[h] >= vn->vn_nr_item + 1) { 1789 if (tb->lnum[h] >= vn->vn_nr_item + 1) {
1610 /* All contents of S[h] can be moved to L[h]. */
1611 int n; 1790 int n;
1612 int order_L; 1791 int order_L;
1613 1792
@@ -1623,8 +1802,8 @@ static int dc_check_balance_internal(struct tree_balance *tb, int h)
1623 return CARRY_ON; 1802 return CARRY_ON;
1624 } 1803 }
1625 1804
1805 /* All contents of S[h] can be moved to R[h]. */
1626 if (tb->rnum[h] >= vn->vn_nr_item + 1) { 1806 if (tb->rnum[h] >= vn->vn_nr_item + 1) {
1627 /* All contents of S[h] can be moved to R[h]. */
1628 int n; 1807 int n;
1629 int order_R; 1808 int order_R;
1630 1809
@@ -1641,8 +1820,11 @@ static int dc_check_balance_internal(struct tree_balance *tb, int h)
1641 } 1820 }
1642 } 1821 }
1643 1822
1823 /*
1824 * All contents of S[h] can be moved to the neighbors
1825 * (L[h] & R[h]).
1826 */
1644 if (tb->rnum[h] + tb->lnum[h] >= vn->vn_nr_item + 1) { 1827 if (tb->rnum[h] + tb->lnum[h] >= vn->vn_nr_item + 1) {
1645 /* All contents of S[h] can be moved to the neighbors (L[h] & R[h]). */
1646 int to_r; 1828 int to_r;
1647 1829
1648 to_r = 1830 to_r =
@@ -1659,7 +1841,10 @@ static int dc_check_balance_internal(struct tree_balance *tb, int h)
1659 return NO_BALANCING_NEEDED; 1841 return NO_BALANCING_NEEDED;
1660 } 1842 }
1661 1843
1662 /* Current node contain insufficient number of items. Balancing is required. */ 1844 /*
1845 * Current node contain insufficient number of items.
1846 * Balancing is required.
1847 */
1663 /* Check whether we can merge S[h] with left neighbor. */ 1848 /* Check whether we can merge S[h] with left neighbor. */
1664 if (tb->lnum[h] >= vn->vn_nr_item + 1) 1849 if (tb->lnum[h] >= vn->vn_nr_item + 1)
1665 if (is_left_neighbor_in_cache(tb, h) 1850 if (is_left_neighbor_in_cache(tb, h)
@@ -1726,7 +1911,8 @@ static int dc_check_balance_internal(struct tree_balance *tb, int h)
1726 return CARRY_ON; 1911 return CARRY_ON;
1727} 1912}
1728 1913
1729/* Check whether current node S[h] is balanced when Decreasing its size by 1914/*
1915 * Check whether current node S[h] is balanced when Decreasing its size by
1730 * Deleting or Truncating for LEAF node of S+tree. 1916 * Deleting or Truncating for LEAF node of S+tree.
1731 * Calculate parameters for balancing for current level h. 1917 * Calculate parameters for balancing for current level h.
1732 * Parameters: 1918 * Parameters:
@@ -1743,15 +1929,21 @@ static int dc_check_balance_leaf(struct tree_balance *tb, int h)
1743{ 1929{
1744 struct virtual_node *vn = tb->tb_vn; 1930 struct virtual_node *vn = tb->tb_vn;
1745 1931
1746 /* Number of bytes that must be deleted from 1932 /*
1747 (value is negative if bytes are deleted) buffer which 1933 * Number of bytes that must be deleted from
1748 contains node being balanced. The mnemonic is that the 1934 * (value is negative if bytes are deleted) buffer which
1749 attempted change in node space used level is levbytes bytes. */ 1935 * contains node being balanced. The mnemonic is that the
1936 * attempted change in node space used level is levbytes bytes.
1937 */
1750 int levbytes; 1938 int levbytes;
1939
1751 /* the maximal item size */ 1940 /* the maximal item size */
1752 int maxsize, ret; 1941 int maxsize, ret;
1753 /* S0 is the node whose balance is currently being checked, 1942
1754 and F0 is its father. */ 1943 /*
1944 * S0 is the node whose balance is currently being checked,
1945 * and F0 is its father.
1946 */
1755 struct buffer_head *S0, *F0; 1947 struct buffer_head *S0, *F0;
1756 int lfree, rfree /* free space in L and R */ ; 1948 int lfree, rfree /* free space in L and R */ ;
1757 1949
@@ -1784,9 +1976,11 @@ static int dc_check_balance_leaf(struct tree_balance *tb, int h)
1784 if (are_leaves_removable(tb, lfree, rfree)) 1976 if (are_leaves_removable(tb, lfree, rfree))
1785 return CARRY_ON; 1977 return CARRY_ON;
1786 1978
1787 /* determine maximal number of items we can shift to the left/right neighbor 1979 /*
1788 and the maximal number of bytes that can flow to the left/right neighbor 1980 * determine maximal number of items we can shift to the left/right
1789 from the left/right most liquid item that cannot be shifted from S[0] entirely 1981 * neighbor and the maximal number of bytes that can flow to the
1982 * left/right neighbor from the left/right most liquid item that
1983 * cannot be shifted from S[0] entirely
1790 */ 1984 */
1791 check_left(tb, h, lfree); 1985 check_left(tb, h, lfree);
1792 check_right(tb, h, rfree); 1986 check_right(tb, h, rfree);
@@ -1810,7 +2004,10 @@ static int dc_check_balance_leaf(struct tree_balance *tb, int h)
1810 return CARRY_ON; 2004 return CARRY_ON;
1811 } 2005 }
1812 2006
1813 /* All contents of S[0] can be moved to the neighbors (L[0] & R[0]). Set parameters and return */ 2007 /*
2008 * All contents of S[0] can be moved to the neighbors (L[0] & R[0]).
2009 * Set parameters and return
2010 */
1814 if (is_leaf_removable(tb)) 2011 if (is_leaf_removable(tb))
1815 return CARRY_ON; 2012 return CARRY_ON;
1816 2013
@@ -1820,7 +2017,8 @@ static int dc_check_balance_leaf(struct tree_balance *tb, int h)
1820 return NO_BALANCING_NEEDED; 2017 return NO_BALANCING_NEEDED;
1821} 2018}
1822 2019
1823/* Check whether current node S[h] is balanced when Decreasing its size by 2020/*
2021 * Check whether current node S[h] is balanced when Decreasing its size by
1824 * Deleting or Cutting. 2022 * Deleting or Cutting.
1825 * Calculate parameters for balancing for current level h. 2023 * Calculate parameters for balancing for current level h.
1826 * Parameters: 2024 * Parameters:
@@ -1844,15 +2042,16 @@ static int dc_check_balance(struct tree_balance *tb, int h)
1844 return dc_check_balance_leaf(tb, h); 2042 return dc_check_balance_leaf(tb, h);
1845} 2043}
1846 2044
1847/* Check whether current node S[h] is balanced. 2045/*
2046 * Check whether current node S[h] is balanced.
1848 * Calculate parameters for balancing for current level h. 2047 * Calculate parameters for balancing for current level h.
1849 * Parameters: 2048 * Parameters:
1850 * 2049 *
1851 * tb tree_balance structure: 2050 * tb tree_balance structure:
1852 * 2051 *
1853 * tb is a large structure that must be read about in the header file 2052 * tb is a large structure that must be read about in the header
1854 * at the same time as this procedure if the reader is to successfully 2053 * file at the same time as this procedure if the reader is
1855 * understand this procedure 2054 * to successfully understand this procedure
1856 * 2055 *
1857 * h current level of the node; 2056 * h current level of the node;
1858 * inum item number in S[h]; 2057 * inum item number in S[h];
@@ -1882,8 +2081,8 @@ static int check_balance(int mode,
1882 RFALSE(mode == M_INSERT && !vn->vn_ins_ih, 2081 RFALSE(mode == M_INSERT && !vn->vn_ins_ih,
1883 "vs-8255: ins_ih can not be 0 in insert mode"); 2082 "vs-8255: ins_ih can not be 0 in insert mode");
1884 2083
2084 /* Calculate balance parameters when size of node is increasing. */
1885 if (tb->insert_size[h] > 0) 2085 if (tb->insert_size[h] > 0)
1886 /* Calculate balance parameters when size of node is increasing. */
1887 return ip_check_balance(tb, h); 2086 return ip_check_balance(tb, h);
1888 2087
1889 /* Calculate balance parameters when size of node is decreasing. */ 2088 /* Calculate balance parameters when size of node is decreasing. */
@@ -1911,21 +2110,23 @@ static int get_direct_parent(struct tree_balance *tb, int h)
1911 PATH_OFFSET_POSITION(path, path_offset - 1) = 0; 2110 PATH_OFFSET_POSITION(path, path_offset - 1) = 0;
1912 return CARRY_ON; 2111 return CARRY_ON;
1913 } 2112 }
1914 return REPEAT_SEARCH; /* Root is changed and we must recalculate the path. */ 2113 /* Root is changed and we must recalculate the path. */
2114 return REPEAT_SEARCH;
1915 } 2115 }
1916 2116
2117 /* Parent in the path is not in the tree. */
1917 if (!B_IS_IN_TREE 2118 if (!B_IS_IN_TREE
1918 (bh = PATH_OFFSET_PBUFFER(path, path_offset - 1))) 2119 (bh = PATH_OFFSET_PBUFFER(path, path_offset - 1)))
1919 return REPEAT_SEARCH; /* Parent in the path is not in the tree. */ 2120 return REPEAT_SEARCH;
1920 2121
1921 if ((position = 2122 if ((position =
1922 PATH_OFFSET_POSITION(path, 2123 PATH_OFFSET_POSITION(path,
1923 path_offset - 1)) > B_NR_ITEMS(bh)) 2124 path_offset - 1)) > B_NR_ITEMS(bh))
1924 return REPEAT_SEARCH; 2125 return REPEAT_SEARCH;
1925 2126
2127 /* Parent in the path is not parent of the current node in the tree. */
1926 if (B_N_CHILD_NUM(bh, position) != 2128 if (B_N_CHILD_NUM(bh, position) !=
1927 PATH_OFFSET_PBUFFER(path, path_offset)->b_blocknr) 2129 PATH_OFFSET_PBUFFER(path, path_offset)->b_blocknr)
1928 /* Parent in the path is not parent of the current node in the tree. */
1929 return REPEAT_SEARCH; 2130 return REPEAT_SEARCH;
1930 2131
1931 if (buffer_locked(bh)) { 2132 if (buffer_locked(bh)) {
@@ -1936,10 +2137,15 @@ static int get_direct_parent(struct tree_balance *tb, int h)
1936 return REPEAT_SEARCH; 2137 return REPEAT_SEARCH;
1937 } 2138 }
1938 2139
1939 return CARRY_ON; /* Parent in the path is unlocked and really parent of the current node. */ 2140 /*
2141 * Parent in the path is unlocked and really parent
2142 * of the current node.
2143 */
2144 return CARRY_ON;
1940} 2145}
1941 2146
1942/* Using lnum[h] and rnum[h] we should determine what neighbors 2147/*
2148 * Using lnum[h] and rnum[h] we should determine what neighbors
1943 * of S[h] we 2149 * of S[h] we
1944 * need in order to balance S[h], and get them if necessary. 2150 * need in order to balance S[h], and get them if necessary.
1945 * Returns: SCHEDULE_OCCURRED - schedule occurred while the function worked; 2151 * Returns: SCHEDULE_OCCURRED - schedule occurred while the function worked;
@@ -1997,7 +2203,7 @@ static int get_neighbors(struct tree_balance *tb, int h)
1997 } 2203 }
1998 2204
1999 /* We need right neighbor to balance S[path_offset]. */ 2205 /* We need right neighbor to balance S[path_offset]. */
2000 if (tb->rnum[h]) { /* We need right neighbor to balance S[path_offset]. */ 2206 if (tb->rnum[h]) {
2001 PROC_INFO_INC(sb, need_r_neighbor[h]); 2207 PROC_INFO_INC(sb, need_r_neighbor[h]);
2002 bh = PATH_OFFSET_PBUFFER(tb->tb_path, path_offset); 2208 bh = PATH_OFFSET_PBUFFER(tb->tb_path, path_offset);
2003 2209
@@ -2053,9 +2259,11 @@ static int get_virtual_node_size(struct super_block *sb, struct buffer_head *bh)
2053 (max_num_of_entries - 1) * sizeof(__u16)); 2259 (max_num_of_entries - 1) * sizeof(__u16));
2054} 2260}
2055 2261
2056/* maybe we should fail balancing we are going to perform when kmalloc 2262/*
2057 fails several times. But now it will loop until kmalloc gets 2263 * maybe we should fail balancing we are going to perform when kmalloc
2058 required memory */ 2264 * fails several times. But now it will loop until kmalloc gets
2265 * required memory
2266 */
2059static int get_mem_for_virtual_node(struct tree_balance *tb) 2267static int get_mem_for_virtual_node(struct tree_balance *tb)
2060{ 2268{
2061 int check_fs = 0; 2269 int check_fs = 0;
@@ -2064,8 +2272,8 @@ static int get_mem_for_virtual_node(struct tree_balance *tb)
2064 2272
2065 size = get_virtual_node_size(tb->tb_sb, PATH_PLAST_BUFFER(tb->tb_path)); 2273 size = get_virtual_node_size(tb->tb_sb, PATH_PLAST_BUFFER(tb->tb_path));
2066 2274
2275 /* we have to allocate more memory for virtual node */
2067 if (size > tb->vn_buf_size) { 2276 if (size > tb->vn_buf_size) {
2068 /* we have to allocate more memory for virtual node */
2069 if (tb->vn_buf) { 2277 if (tb->vn_buf) {
2070 /* free memory allocated before */ 2278 /* free memory allocated before */
2071 kfree(tb->vn_buf); 2279 kfree(tb->vn_buf);
@@ -2079,10 +2287,12 @@ static int get_mem_for_virtual_node(struct tree_balance *tb)
2079 /* get memory for virtual item */ 2287 /* get memory for virtual item */
2080 buf = kmalloc(size, GFP_ATOMIC | __GFP_NOWARN); 2288 buf = kmalloc(size, GFP_ATOMIC | __GFP_NOWARN);
2081 if (!buf) { 2289 if (!buf) {
2082 /* getting memory with GFP_KERNEL priority may involve 2290 /*
2083 balancing now (due to indirect_to_direct conversion on 2291 * getting memory with GFP_KERNEL priority may involve
2084 dcache shrinking). So, release path and collected 2292 * balancing now (due to indirect_to_direct conversion
2085 resources here */ 2293 * on dcache shrinking). So, release path and collected
2294 * resources here
2295 */
2086 free_buffers_in_tb(tb); 2296 free_buffers_in_tb(tb);
2087 buf = kmalloc(size, GFP_NOFS); 2297 buf = kmalloc(size, GFP_NOFS);
2088 if (!buf) { 2298 if (!buf) {
@@ -2168,8 +2378,10 @@ static int wait_tb_buffers_until_unlocked(struct tree_balance *tb)
2168 for (i = tb->tb_path->path_length; 2378 for (i = tb->tb_path->path_length;
2169 !locked && i > ILLEGAL_PATH_ELEMENT_OFFSET; i--) { 2379 !locked && i > ILLEGAL_PATH_ELEMENT_OFFSET; i--) {
2170 if (PATH_OFFSET_PBUFFER(tb->tb_path, i)) { 2380 if (PATH_OFFSET_PBUFFER(tb->tb_path, i)) {
2171 /* if I understand correctly, we can only be sure the last buffer 2381 /*
2172 ** in the path is in the tree --clm 2382 * if I understand correctly, we can only
2383 * be sure the last buffer in the path is
2384 * in the tree --clm
2173 */ 2385 */
2174#ifdef CONFIG_REISERFS_CHECK 2386#ifdef CONFIG_REISERFS_CHECK
2175 if (PATH_PLAST_BUFFER(tb->tb_path) == 2387 if (PATH_PLAST_BUFFER(tb->tb_path) ==
@@ -2256,13 +2468,15 @@ static int wait_tb_buffers_until_unlocked(struct tree_balance *tb)
2256 } 2468 }
2257 } 2469 }
2258 } 2470 }
2259 /* as far as I can tell, this is not required. The FEB list seems 2471
2260 ** to be full of newly allocated nodes, which will never be locked, 2472 /*
2261 ** dirty, or anything else. 2473 * as far as I can tell, this is not required. The FEB list
2262 ** To be safe, I'm putting in the checks and waits in. For the moment, 2474 * seems to be full of newly allocated nodes, which will
2263 ** they are needed to keep the code in journal.c from complaining 2475 * never be locked, dirty, or anything else.
2264 ** about the buffer. That code is inside CONFIG_REISERFS_CHECK as well. 2476 * To be safe, I'm putting in the checks and waits in.
2265 ** --clm 2477 * For the moment, they are needed to keep the code in
2478 * journal.c from complaining about the buffer.
2479 * That code is inside CONFIG_REISERFS_CHECK as well. --clm
2266 */ 2480 */
2267 for (i = 0; !locked && i < MAX_FEB_SIZE; i++) { 2481 for (i = 0; !locked && i < MAX_FEB_SIZE; i++) {
2268 if (tb->FEB[i]) { 2482 if (tb->FEB[i]) {
@@ -2300,7 +2514,8 @@ static int wait_tb_buffers_until_unlocked(struct tree_balance *tb)
2300 return CARRY_ON; 2514 return CARRY_ON;
2301} 2515}
2302 2516
2303/* Prepare for balancing, that is 2517/*
2518 * Prepare for balancing, that is
2304 * get all necessary parents, and neighbors; 2519 * get all necessary parents, and neighbors;
2305 * analyze what and where should be moved; 2520 * analyze what and where should be moved;
2306 * get sufficient number of new nodes; 2521 * get sufficient number of new nodes;
@@ -2309,13 +2524,14 @@ static int wait_tb_buffers_until_unlocked(struct tree_balance *tb)
2309 * When ported to SMP kernels, only at the last moment after all needed nodes 2524 * When ported to SMP kernels, only at the last moment after all needed nodes
2310 * are collected in cache, will the resources be locked using the usual 2525 * are collected in cache, will the resources be locked using the usual
2311 * textbook ordered lock acquisition algorithms. Note that ensuring that 2526 * textbook ordered lock acquisition algorithms. Note that ensuring that
2312 * this code neither write locks what it does not need to write lock nor locks out of order 2527 * this code neither write locks what it does not need to write lock nor locks
2313 * will be a pain in the butt that could have been avoided. Grumble grumble. -Hans 2528 * out of order will be a pain in the butt that could have been avoided.
2529 * Grumble grumble. -Hans
2314 * 2530 *
2315 * fix is meant in the sense of render unchanging 2531 * fix is meant in the sense of render unchanging
2316 * 2532 *
2317 * Latency might be improved by first gathering a list of what buffers are needed 2533 * Latency might be improved by first gathering a list of what buffers
2318 * and then getting as many of them in parallel as possible? -Hans 2534 * are needed and then getting as many of them in parallel as possible? -Hans
2319 * 2535 *
2320 * Parameters: 2536 * Parameters:
2321 * op_mode i - insert, d - delete, c - cut (truncate), p - paste (append) 2537 * op_mode i - insert, d - delete, c - cut (truncate), p - paste (append)
@@ -2335,8 +2551,9 @@ int fix_nodes(int op_mode, struct tree_balance *tb,
2335 int ret, h, item_num = PATH_LAST_POSITION(tb->tb_path); 2551 int ret, h, item_num = PATH_LAST_POSITION(tb->tb_path);
2336 int pos_in_item; 2552 int pos_in_item;
2337 2553
2338 /* we set wait_tb_buffers_run when we have to restore any dirty bits cleared 2554 /*
2339 ** during wait_tb_buffers_run 2555 * we set wait_tb_buffers_run when we have to restore any dirty
2556 * bits cleared during wait_tb_buffers_run
2340 */ 2557 */
2341 int wait_tb_buffers_run = 0; 2558 int wait_tb_buffers_run = 0;
2342 struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path); 2559 struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path);
@@ -2347,10 +2564,11 @@ int fix_nodes(int op_mode, struct tree_balance *tb,
2347 2564
2348 tb->fs_gen = get_generation(tb->tb_sb); 2565 tb->fs_gen = get_generation(tb->tb_sb);
2349 2566
2350 /* we prepare and log the super here so it will already be in the 2567 /*
2351 ** transaction when do_balance needs to change it. 2568 * we prepare and log the super here so it will already be in the
2352 ** This way do_balance won't have to schedule when trying to prepare 2569 * transaction when do_balance needs to change it.
2353 ** the super for logging 2570 * This way do_balance won't have to schedule when trying to prepare
2571 * the super for logging
2354 */ 2572 */
2355 reiserfs_prepare_for_journal(tb->tb_sb, 2573 reiserfs_prepare_for_journal(tb->tb_sb,
2356 SB_BUFFER_WITH_SB(tb->tb_sb), 1); 2574 SB_BUFFER_WITH_SB(tb->tb_sb), 1);
@@ -2408,7 +2626,7 @@ int fix_nodes(int op_mode, struct tree_balance *tb,
2408#endif 2626#endif
2409 2627
2410 if (get_mem_for_virtual_node(tb) == REPEAT_SEARCH) 2628 if (get_mem_for_virtual_node(tb) == REPEAT_SEARCH)
2411 // FIXME: maybe -ENOMEM when tb->vn_buf == 0? Now just repeat 2629 /* FIXME: maybe -ENOMEM when tb->vn_buf == 0? Now just repeat */
2412 return REPEAT_SEARCH; 2630 return REPEAT_SEARCH;
2413 2631
2414 /* Starting from the leaf level; for all levels h of the tree. */ 2632 /* Starting from the leaf level; for all levels h of the tree. */
@@ -2427,7 +2645,10 @@ int fix_nodes(int op_mode, struct tree_balance *tb,
2427 goto repeat; 2645 goto repeat;
2428 if (h != MAX_HEIGHT - 1) 2646 if (h != MAX_HEIGHT - 1)
2429 tb->insert_size[h + 1] = 0; 2647 tb->insert_size[h + 1] = 0;
2430 /* ok, analysis and resource gathering are complete */ 2648 /*
2649 * ok, analysis and resource gathering
2650 * are complete
2651 */
2431 break; 2652 break;
2432 } 2653 }
2433 goto repeat; 2654 goto repeat;
@@ -2437,15 +2658,19 @@ int fix_nodes(int op_mode, struct tree_balance *tb,
2437 if (ret != CARRY_ON) 2658 if (ret != CARRY_ON)
2438 goto repeat; 2659 goto repeat;
2439 2660
2440 /* No disk space, or schedule occurred and analysis may be 2661 /*
2441 * invalid and needs to be redone. */ 2662 * No disk space, or schedule occurred and analysis may be
2663 * invalid and needs to be redone.
2664 */
2442 ret = get_empty_nodes(tb, h); 2665 ret = get_empty_nodes(tb, h);
2443 if (ret != CARRY_ON) 2666 if (ret != CARRY_ON)
2444 goto repeat; 2667 goto repeat;
2445 2668
2669 /*
2670 * We have a positive insert size but no nodes exist on this
2671 * level, this means that we are creating a new root.
2672 */
2446 if (!PATH_H_PBUFFER(tb->tb_path, h)) { 2673 if (!PATH_H_PBUFFER(tb->tb_path, h)) {
2447 /* We have a positive insert size but no nodes exist on this
2448 level, this means that we are creating a new root. */
2449 2674
2450 RFALSE(tb->blknum[h] != 1, 2675 RFALSE(tb->blknum[h] != 1,
2451 "PAP-8350: creating new empty root"); 2676 "PAP-8350: creating new empty root");
@@ -2453,11 +2678,13 @@ int fix_nodes(int op_mode, struct tree_balance *tb,
2453 if (h < MAX_HEIGHT - 1) 2678 if (h < MAX_HEIGHT - 1)
2454 tb->insert_size[h + 1] = 0; 2679 tb->insert_size[h + 1] = 0;
2455 } else if (!PATH_H_PBUFFER(tb->tb_path, h + 1)) { 2680 } else if (!PATH_H_PBUFFER(tb->tb_path, h + 1)) {
2681 /*
2682 * The tree needs to be grown, so this node S[h]
2683 * which is the root node is split into two nodes,
2684 * and a new node (S[h+1]) will be created to
2685 * become the root node.
2686 */
2456 if (tb->blknum[h] > 1) { 2687 if (tb->blknum[h] > 1) {
2457 /* The tree needs to be grown, so this node S[h]
2458 which is the root node is split into two nodes,
2459 and a new node (S[h+1]) will be created to
2460 become the root node. */
2461 2688
2462 RFALSE(h == MAX_HEIGHT - 1, 2689 RFALSE(h == MAX_HEIGHT - 1,
2463 "PAP-8355: attempt to create too high of a tree"); 2690 "PAP-8355: attempt to create too high of a tree");
@@ -2488,11 +2715,13 @@ int fix_nodes(int op_mode, struct tree_balance *tb,
2488 } 2715 }
2489 2716
2490 repeat: 2717 repeat:
2491 // fix_nodes was unable to perform its calculation due to 2718 /*
2492 // filesystem got changed under us, lack of free disk space or i/o 2719 * fix_nodes was unable to perform its calculation due to
2493 // failure. If the first is the case - the search will be 2720 * filesystem got changed under us, lack of free disk space or i/o
2494 // repeated. For now - free all resources acquired so far except 2721 * failure. If the first is the case - the search will be
2495 // for the new allocated nodes 2722 * repeated. For now - free all resources acquired so far except
2723 * for the new allocated nodes
2724 */
2496 { 2725 {
2497 int i; 2726 int i;
2498 2727
@@ -2548,8 +2777,6 @@ int fix_nodes(int op_mode, struct tree_balance *tb,
2548 2777
2549} 2778}
2550 2779
2551/* Anatoly will probably forgive me renaming tb to tb. I just
2552 wanted to make lines shorter */
2553void unfix_nodes(struct tree_balance *tb) 2780void unfix_nodes(struct tree_balance *tb)
2554{ 2781{
2555 int i; 2782 int i;
@@ -2578,8 +2805,10 @@ void unfix_nodes(struct tree_balance *tb)
2578 for (i = 0; i < MAX_FEB_SIZE; i++) { 2805 for (i = 0; i < MAX_FEB_SIZE; i++) {
2579 if (tb->FEB[i]) { 2806 if (tb->FEB[i]) {
2580 b_blocknr_t blocknr = tb->FEB[i]->b_blocknr; 2807 b_blocknr_t blocknr = tb->FEB[i]->b_blocknr;
2581 /* de-allocated block which was not used by balancing and 2808 /*
2582 bforget about buffer for it */ 2809 * de-allocated block which was not used by
2810 * balancing and bforget about buffer for it
2811 */
2583 brelse(tb->FEB[i]); 2812 brelse(tb->FEB[i]);
2584 reiserfs_free_block(tb->transaction_handle, NULL, 2813 reiserfs_free_block(tb->transaction_handle, NULL,
2585 blocknr, 0); 2814 blocknr, 0);
diff --git a/fs/reiserfs/hashes.c b/fs/reiserfs/hashes.c
index 91b0cc1242a2..7a26c4fe6c46 100644
--- a/fs/reiserfs/hashes.c
+++ b/fs/reiserfs/hashes.c
@@ -12,12 +12,6 @@
12 * Yura's function is added (04/07/2000) 12 * Yura's function is added (04/07/2000)
13 */ 13 */
14 14
15//
16// keyed_hash
17// yura_hash
18// r5_hash
19//
20
21#include <linux/kernel.h> 15#include <linux/kernel.h>
22#include "reiserfs.h" 16#include "reiserfs.h"
23#include <asm/types.h> 17#include <asm/types.h>
@@ -56,7 +50,7 @@ u32 keyed_hash(const signed char *msg, int len)
56 u32 pad; 50 u32 pad;
57 int i; 51 int i;
58 52
59 // assert(len >= 0 && len < 256); 53 /* assert(len >= 0 && len < 256); */
60 54
61 pad = (u32) len | ((u32) len << 8); 55 pad = (u32) len | ((u32) len << 8);
62 pad |= pad << 16; 56 pad |= pad << 16;
@@ -127,9 +121,10 @@ u32 keyed_hash(const signed char *msg, int len)
127 return h0 ^ h1; 121 return h0 ^ h1;
128} 122}
129 123
130/* What follows in this file is copyright 2000 by Hans Reiser, and the 124/*
131 * licensing of what follows is governed by reiserfs/README */ 125 * What follows in this file is copyright 2000 by Hans Reiser, and the
132 126 * licensing of what follows is governed by reiserfs/README
127 */
133u32 yura_hash(const signed char *msg, int len) 128u32 yura_hash(const signed char *msg, int len)
134{ 129{
135 int j, pow; 130 int j, pow;
diff --git a/fs/reiserfs/ibalance.c b/fs/reiserfs/ibalance.c
index ae26a271da35..c4a696714148 100644
--- a/fs/reiserfs/ibalance.c
+++ b/fs/reiserfs/ibalance.c
@@ -12,7 +12,10 @@
12int balance_internal(struct tree_balance *, 12int balance_internal(struct tree_balance *,
13 int, int, struct item_head *, struct buffer_head **); 13 int, int, struct item_head *, struct buffer_head **);
14 14
15/* modes of internal_shift_left, internal_shift_right and internal_insert_childs */ 15/*
16 * modes of internal_shift_left, internal_shift_right and
17 * internal_insert_childs
18 */
16#define INTERNAL_SHIFT_FROM_S_TO_L 0 19#define INTERNAL_SHIFT_FROM_S_TO_L 0
17#define INTERNAL_SHIFT_FROM_R_TO_S 1 20#define INTERNAL_SHIFT_FROM_R_TO_S 1
18#define INTERNAL_SHIFT_FROM_L_TO_S 2 21#define INTERNAL_SHIFT_FROM_L_TO_S 2
@@ -32,7 +35,9 @@ static void internal_define_dest_src_infos(int shift_mode,
32 memset(src_bi, 0, sizeof(struct buffer_info)); 35 memset(src_bi, 0, sizeof(struct buffer_info));
33 /* define dest, src, dest parent, dest position */ 36 /* define dest, src, dest parent, dest position */
34 switch (shift_mode) { 37 switch (shift_mode) {
35 case INTERNAL_SHIFT_FROM_S_TO_L: /* used in internal_shift_left */ 38
39 /* used in internal_shift_left */
40 case INTERNAL_SHIFT_FROM_S_TO_L:
36 src_bi->tb = tb; 41 src_bi->tb = tb;
37 src_bi->bi_bh = PATH_H_PBUFFER(tb->tb_path, h); 42 src_bi->bi_bh = PATH_H_PBUFFER(tb->tb_path, h);
38 src_bi->bi_parent = PATH_H_PPARENT(tb->tb_path, h); 43 src_bi->bi_parent = PATH_H_PPARENT(tb->tb_path, h);
@@ -52,12 +57,14 @@ static void internal_define_dest_src_infos(int shift_mode,
52 dest_bi->tb = tb; 57 dest_bi->tb = tb;
53 dest_bi->bi_bh = PATH_H_PBUFFER(tb->tb_path, h); 58 dest_bi->bi_bh = PATH_H_PBUFFER(tb->tb_path, h);
54 dest_bi->bi_parent = PATH_H_PPARENT(tb->tb_path, h); 59 dest_bi->bi_parent = PATH_H_PPARENT(tb->tb_path, h);
55 dest_bi->bi_position = PATH_H_POSITION(tb->tb_path, h + 1); /* dest position is analog of dest->b_item_order */ 60 /* dest position is analog of dest->b_item_order */
61 dest_bi->bi_position = PATH_H_POSITION(tb->tb_path, h + 1);
56 *d_key = tb->lkey[h]; 62 *d_key = tb->lkey[h];
57 *cf = tb->CFL[h]; 63 *cf = tb->CFL[h];
58 break; 64 break;
59 65
60 case INTERNAL_SHIFT_FROM_R_TO_S: /* used in internal_shift_left */ 66 /* used in internal_shift_left */
67 case INTERNAL_SHIFT_FROM_R_TO_S:
61 src_bi->tb = tb; 68 src_bi->tb = tb;
62 src_bi->bi_bh = tb->R[h]; 69 src_bi->bi_bh = tb->R[h];
63 src_bi->bi_parent = tb->FR[h]; 70 src_bi->bi_parent = tb->FR[h];
@@ -111,7 +118,8 @@ static void internal_define_dest_src_infos(int shift_mode,
111 } 118 }
112} 119}
113 120
114/* Insert count node pointers into buffer cur before position to + 1. 121/*
122 * Insert count node pointers into buffer cur before position to + 1.
115 * Insert count items into buffer cur before position to. 123 * Insert count items into buffer cur before position to.
116 * Items and node pointers are specified by inserted and bh respectively. 124 * Items and node pointers are specified by inserted and bh respectively.
117 */ 125 */
@@ -190,8 +198,10 @@ static void internal_insert_childs(struct buffer_info *cur_bi,
190 198
191} 199}
192 200
193/* Delete del_num items and node pointers from buffer cur starting from * 201/*
194 * the first_i'th item and first_p'th pointers respectively. */ 202 * Delete del_num items and node pointers from buffer cur starting from
203 * the first_i'th item and first_p'th pointers respectively.
204 */
195static void internal_delete_pointers_items(struct buffer_info *cur_bi, 205static void internal_delete_pointers_items(struct buffer_info *cur_bi,
196 int first_p, 206 int first_p,
197 int first_i, int del_num) 207 int first_i, int del_num)
@@ -270,22 +280,30 @@ static void internal_delete_childs(struct buffer_info *cur_bi, int from, int n)
270 280
271 i_from = (from == 0) ? from : from - 1; 281 i_from = (from == 0) ? from : from - 1;
272 282
273 /* delete n pointers starting from `from' position in CUR; 283 /*
274 delete n keys starting from 'i_from' position in CUR; 284 * delete n pointers starting from `from' position in CUR;
285 * delete n keys starting from 'i_from' position in CUR;
275 */ 286 */
276 internal_delete_pointers_items(cur_bi, from, i_from, n); 287 internal_delete_pointers_items(cur_bi, from, i_from, n);
277} 288}
278 289
279/* copy cpy_num node pointers and cpy_num - 1 items from buffer src to buffer dest 290/*
280* last_first == FIRST_TO_LAST means, that we copy first items from src to tail of dest 291 * copy cpy_num node pointers and cpy_num - 1 items from buffer src to buffer
281 * last_first == LAST_TO_FIRST means, that we copy last items from src to head of dest 292 * dest
293 * last_first == FIRST_TO_LAST means that we copy first items
294 * from src to tail of dest
295 * last_first == LAST_TO_FIRST means that we copy last items
296 * from src to head of dest
282 */ 297 */
283static void internal_copy_pointers_items(struct buffer_info *dest_bi, 298static void internal_copy_pointers_items(struct buffer_info *dest_bi,
284 struct buffer_head *src, 299 struct buffer_head *src,
285 int last_first, int cpy_num) 300 int last_first, int cpy_num)
286{ 301{
287 /* ATTENTION! Number of node pointers in DEST is equal to number of items in DEST * 302 /*
288 * as delimiting key have already inserted to buffer dest.*/ 303 * ATTENTION! Number of node pointers in DEST is equal to number
304 * of items in DEST as delimiting key have already inserted to
305 * buffer dest.
306 */
289 struct buffer_head *dest = dest_bi->bi_bh; 307 struct buffer_head *dest = dest_bi->bi_bh;
290 int nr_dest, nr_src; 308 int nr_dest, nr_src;
291 int dest_order, src_order; 309 int dest_order, src_order;
@@ -366,7 +384,9 @@ static void internal_copy_pointers_items(struct buffer_info *dest_bi,
366 384
367} 385}
368 386
369/* Copy cpy_num node pointers and cpy_num - 1 items from buffer src to buffer dest. 387/*
388 * Copy cpy_num node pointers and cpy_num - 1 items from buffer src to
389 * buffer dest.
370 * Delete cpy_num - del_par items and node pointers from buffer src. 390 * Delete cpy_num - del_par items and node pointers from buffer src.
371 * last_first == FIRST_TO_LAST means, that we copy/delete first items from src. 391 * last_first == FIRST_TO_LAST means, that we copy/delete first items from src.
372 * last_first == LAST_TO_FIRST means, that we copy/delete last items from src. 392 * last_first == LAST_TO_FIRST means, that we copy/delete last items from src.
@@ -385,8 +405,10 @@ static void internal_move_pointers_items(struct buffer_info *dest_bi,
385 if (last_first == FIRST_TO_LAST) { /* shift_left occurs */ 405 if (last_first == FIRST_TO_LAST) { /* shift_left occurs */
386 first_pointer = 0; 406 first_pointer = 0;
387 first_item = 0; 407 first_item = 0;
388 /* delete cpy_num - del_par pointers and keys starting for pointers with first_pointer, 408 /*
389 for key - with first_item */ 409 * delete cpy_num - del_par pointers and keys starting for
410 * pointers with first_pointer, for key - with first_item
411 */
390 internal_delete_pointers_items(src_bi, first_pointer, 412 internal_delete_pointers_items(src_bi, first_pointer,
391 first_item, cpy_num - del_par); 413 first_item, cpy_num - del_par);
392 } else { /* shift_right occurs */ 414 } else { /* shift_right occurs */
@@ -404,7 +426,9 @@ static void internal_move_pointers_items(struct buffer_info *dest_bi,
404} 426}
405 427
406/* Insert n_src'th key of buffer src before n_dest'th key of buffer dest. */ 428/* Insert n_src'th key of buffer src before n_dest'th key of buffer dest. */
407static void internal_insert_key(struct buffer_info *dest_bi, int dest_position_before, /* insert key before key with n_dest number */ 429static void internal_insert_key(struct buffer_info *dest_bi,
430 /* insert key before key with n_dest number */
431 int dest_position_before,
408 struct buffer_head *src, int src_position) 432 struct buffer_head *src, int src_position)
409{ 433{
410 struct buffer_head *dest = dest_bi->bi_bh; 434 struct buffer_head *dest = dest_bi->bi_bh;
@@ -453,13 +477,19 @@ static void internal_insert_key(struct buffer_info *dest_bi, int dest_position_b
453 } 477 }
454} 478}
455 479
456/* Insert d_key'th (delimiting) key from buffer cfl to tail of dest. 480/*
457 * Copy pointer_amount node pointers and pointer_amount - 1 items from buffer src to buffer dest. 481 * Insert d_key'th (delimiting) key from buffer cfl to tail of dest.
482 * Copy pointer_amount node pointers and pointer_amount - 1 items from
483 * buffer src to buffer dest.
458 * Replace d_key'th key in buffer cfl. 484 * Replace d_key'th key in buffer cfl.
459 * Delete pointer_amount items and node pointers from buffer src. 485 * Delete pointer_amount items and node pointers from buffer src.
460 */ 486 */
461/* this can be invoked both to shift from S to L and from R to S */ 487/* this can be invoked both to shift from S to L and from R to S */
462static void internal_shift_left(int mode, /* INTERNAL_FROM_S_TO_L | INTERNAL_FROM_R_TO_S */ 488static void internal_shift_left(
489 /*
490 * INTERNAL_FROM_S_TO_L | INTERNAL_FROM_R_TO_S
491 */
492 int mode,
463 struct tree_balance *tb, 493 struct tree_balance *tb,
464 int h, int pointer_amount) 494 int h, int pointer_amount)
465{ 495{
@@ -473,7 +503,10 @@ static void internal_shift_left(int mode, /* INTERNAL_FROM_S_TO_L | INTERNAL_FRO
473 /*printk("pointer_amount = %d\n",pointer_amount); */ 503 /*printk("pointer_amount = %d\n",pointer_amount); */
474 504
475 if (pointer_amount) { 505 if (pointer_amount) {
476 /* insert delimiting key from common father of dest and src to node dest into position B_NR_ITEM(dest) */ 506 /*
507 * insert delimiting key from common father of dest and
508 * src to node dest into position B_NR_ITEM(dest)
509 */
477 internal_insert_key(&dest_bi, B_NR_ITEMS(dest_bi.bi_bh), cf, 510 internal_insert_key(&dest_bi, B_NR_ITEMS(dest_bi.bi_bh), cf,
478 d_key_position); 511 d_key_position);
479 512
@@ -492,7 +525,8 @@ static void internal_shift_left(int mode, /* INTERNAL_FROM_S_TO_L | INTERNAL_FRO
492 525
493} 526}
494 527
495/* Insert delimiting key to L[h]. 528/*
529 * Insert delimiting key to L[h].
496 * Copy n node pointers and n - 1 items from buffer S[h] to L[h]. 530 * Copy n node pointers and n - 1 items from buffer S[h] to L[h].
497 * Delete n - 1 items and node pointers from buffer S[h]. 531 * Delete n - 1 items and node pointers from buffer S[h].
498 */ 532 */
@@ -507,23 +541,27 @@ static void internal_shift1_left(struct tree_balance *tb,
507 internal_define_dest_src_infos(INTERNAL_SHIFT_FROM_S_TO_L, tb, h, 541 internal_define_dest_src_infos(INTERNAL_SHIFT_FROM_S_TO_L, tb, h,
508 &dest_bi, &src_bi, &d_key_position, &cf); 542 &dest_bi, &src_bi, &d_key_position, &cf);
509 543
510 if (pointer_amount > 0) /* insert lkey[h]-th key from CFL[h] to left neighbor L[h] */ 544 /* insert lkey[h]-th key from CFL[h] to left neighbor L[h] */
545 if (pointer_amount > 0)
511 internal_insert_key(&dest_bi, B_NR_ITEMS(dest_bi.bi_bh), cf, 546 internal_insert_key(&dest_bi, B_NR_ITEMS(dest_bi.bi_bh), cf,
512 d_key_position); 547 d_key_position);
513 /* internal_insert_key (tb->L[h], B_NR_ITEM(tb->L[h]), tb->CFL[h], tb->lkey[h]); */
514 548
515 /* last parameter is del_parameter */ 549 /* last parameter is del_parameter */
516 internal_move_pointers_items(&dest_bi, &src_bi, FIRST_TO_LAST, 550 internal_move_pointers_items(&dest_bi, &src_bi, FIRST_TO_LAST,
517 pointer_amount, 1); 551 pointer_amount, 1);
518 /* internal_move_pointers_items (tb->L[h], tb->S[h], FIRST_TO_LAST, pointer_amount, 1); */
519} 552}
520 553
521/* Insert d_key'th (delimiting) key from buffer cfr to head of dest. 554/*
555 * Insert d_key'th (delimiting) key from buffer cfr to head of dest.
522 * Copy n node pointers and n - 1 items from buffer src to buffer dest. 556 * Copy n node pointers and n - 1 items from buffer src to buffer dest.
523 * Replace d_key'th key in buffer cfr. 557 * Replace d_key'th key in buffer cfr.
524 * Delete n items and node pointers from buffer src. 558 * Delete n items and node pointers from buffer src.
525 */ 559 */
526static void internal_shift_right(int mode, /* INTERNAL_FROM_S_TO_R | INTERNAL_FROM_L_TO_S */ 560static void internal_shift_right(
561 /*
562 * INTERNAL_FROM_S_TO_R | INTERNAL_FROM_L_TO_S
563 */
564 int mode,
527 struct tree_balance *tb, 565 struct tree_balance *tb,
528 int h, int pointer_amount) 566 int h, int pointer_amount)
529{ 567{
@@ -538,7 +576,10 @@ static void internal_shift_right(int mode, /* INTERNAL_FROM_S_TO_R | INTERNAL_FR
538 nr = B_NR_ITEMS(src_bi.bi_bh); 576 nr = B_NR_ITEMS(src_bi.bi_bh);
539 577
540 if (pointer_amount > 0) { 578 if (pointer_amount > 0) {
541 /* insert delimiting key from common father of dest and src to dest node into position 0 */ 579 /*
580 * insert delimiting key from common father of dest
581 * and src to dest node into position 0
582 */
542 internal_insert_key(&dest_bi, 0, cf, d_key_position); 583 internal_insert_key(&dest_bi, 0, cf, d_key_position);
543 if (nr == pointer_amount - 1) { 584 if (nr == pointer_amount - 1) {
544 RFALSE(src_bi.bi_bh != PATH_H_PBUFFER(tb->tb_path, h) /*tb->S[h] */ || 585 RFALSE(src_bi.bi_bh != PATH_H_PBUFFER(tb->tb_path, h) /*tb->S[h] */ ||
@@ -559,7 +600,8 @@ static void internal_shift_right(int mode, /* INTERNAL_FROM_S_TO_R | INTERNAL_FR
559 pointer_amount, 0); 600 pointer_amount, 0);
560} 601}
561 602
562/* Insert delimiting key to R[h]. 603/*
604 * Insert delimiting key to R[h].
563 * Copy n node pointers and n - 1 items from buffer S[h] to R[h]. 605 * Copy n node pointers and n - 1 items from buffer S[h] to R[h].
564 * Delete n - 1 items and node pointers from buffer S[h]. 606 * Delete n - 1 items and node pointers from buffer S[h].
565 */ 607 */
@@ -574,18 +616,19 @@ static void internal_shift1_right(struct tree_balance *tb,
574 internal_define_dest_src_infos(INTERNAL_SHIFT_FROM_S_TO_R, tb, h, 616 internal_define_dest_src_infos(INTERNAL_SHIFT_FROM_S_TO_R, tb, h,
575 &dest_bi, &src_bi, &d_key_position, &cf); 617 &dest_bi, &src_bi, &d_key_position, &cf);
576 618
577 if (pointer_amount > 0) /* insert rkey from CFR[h] to right neighbor R[h] */ 619 /* insert rkey from CFR[h] to right neighbor R[h] */
620 if (pointer_amount > 0)
578 internal_insert_key(&dest_bi, 0, cf, d_key_position); 621 internal_insert_key(&dest_bi, 0, cf, d_key_position);
579 /* internal_insert_key (tb->R[h], 0, tb->CFR[h], tb->rkey[h]); */
580 622
581 /* last parameter is del_parameter */ 623 /* last parameter is del_parameter */
582 internal_move_pointers_items(&dest_bi, &src_bi, LAST_TO_FIRST, 624 internal_move_pointers_items(&dest_bi, &src_bi, LAST_TO_FIRST,
583 pointer_amount, 1); 625 pointer_amount, 1);
584 /* internal_move_pointers_items (tb->R[h], tb->S[h], LAST_TO_FIRST, pointer_amount, 1); */
585} 626}
586 627
587/* Delete insert_num node pointers together with their left items 628/*
588 * and balance current node.*/ 629 * Delete insert_num node pointers together with their left items
630 * and balance current node.
631 */
589static void balance_internal_when_delete(struct tree_balance *tb, 632static void balance_internal_when_delete(struct tree_balance *tb,
590 int h, int child_pos) 633 int h, int child_pos)
591{ 634{
@@ -626,9 +669,11 @@ static void balance_internal_when_delete(struct tree_balance *tb,
626 new_root = tb->R[h - 1]; 669 new_root = tb->R[h - 1];
627 else 670 else
628 new_root = tb->L[h - 1]; 671 new_root = tb->L[h - 1];
629 /* switch super block's tree root block number to the new value */ 672 /*
673 * switch super block's tree root block
674 * number to the new value */
630 PUT_SB_ROOT_BLOCK(tb->tb_sb, new_root->b_blocknr); 675 PUT_SB_ROOT_BLOCK(tb->tb_sb, new_root->b_blocknr);
631 //REISERFS_SB(tb->tb_sb)->s_rs->s_tree_height --; 676 /*REISERFS_SB(tb->tb_sb)->s_rs->s_tree_height --; */
632 PUT_SB_TREE_HEIGHT(tb->tb_sb, 677 PUT_SB_TREE_HEIGHT(tb->tb_sb,
633 SB_TREE_HEIGHT(tb->tb_sb) - 1); 678 SB_TREE_HEIGHT(tb->tb_sb) - 1);
634 679
@@ -636,8 +681,8 @@ static void balance_internal_when_delete(struct tree_balance *tb,
636 REISERFS_SB(tb->tb_sb)->s_sbh, 681 REISERFS_SB(tb->tb_sb)->s_sbh,
637 1); 682 1);
638 /*&&&&&&&&&&&&&&&&&&&&&& */ 683 /*&&&&&&&&&&&&&&&&&&&&&& */
684 /* use check_internal if new root is an internal node */
639 if (h > 1) 685 if (h > 1)
640 /* use check_internal if new root is an internal node */
641 check_internal(new_root); 686 check_internal(new_root);
642 /*&&&&&&&&&&&&&&&&&&&&&& */ 687 /*&&&&&&&&&&&&&&&&&&&&&& */
643 688
@@ -648,7 +693,8 @@ static void balance_internal_when_delete(struct tree_balance *tb,
648 return; 693 return;
649 } 694 }
650 695
651 if (tb->L[h] && tb->lnum[h] == -B_NR_ITEMS(tb->L[h]) - 1) { /* join S[h] with L[h] */ 696 /* join S[h] with L[h] */
697 if (tb->L[h] && tb->lnum[h] == -B_NR_ITEMS(tb->L[h]) - 1) {
652 698
653 RFALSE(tb->rnum[h] != 0, 699 RFALSE(tb->rnum[h] != 0,
654 "invalid tb->rnum[%d]==%d when joining S[h] with L[h]", 700 "invalid tb->rnum[%d]==%d when joining S[h] with L[h]",
@@ -660,7 +706,8 @@ static void balance_internal_when_delete(struct tree_balance *tb,
660 return; 706 return;
661 } 707 }
662 708
663 if (tb->R[h] && tb->rnum[h] == -B_NR_ITEMS(tb->R[h]) - 1) { /* join S[h] with R[h] */ 709 /* join S[h] with R[h] */
710 if (tb->R[h] && tb->rnum[h] == -B_NR_ITEMS(tb->R[h]) - 1) {
664 RFALSE(tb->lnum[h] != 0, 711 RFALSE(tb->lnum[h] != 0,
665 "invalid tb->lnum[%d]==%d when joining S[h] with R[h]", 712 "invalid tb->lnum[%d]==%d when joining S[h] with R[h]",
666 h, tb->lnum[h]); 713 h, tb->lnum[h]);
@@ -671,17 +718,18 @@ static void balance_internal_when_delete(struct tree_balance *tb,
671 return; 718 return;
672 } 719 }
673 720
674 if (tb->lnum[h] < 0) { /* borrow from left neighbor L[h] */ 721 /* borrow from left neighbor L[h] */
722 if (tb->lnum[h] < 0) {
675 RFALSE(tb->rnum[h] != 0, 723 RFALSE(tb->rnum[h] != 0,
676 "wrong tb->rnum[%d]==%d when borrow from L[h]", h, 724 "wrong tb->rnum[%d]==%d when borrow from L[h]", h,
677 tb->rnum[h]); 725 tb->rnum[h]);
678 /*internal_shift_right (tb, h, tb->L[h], tb->CFL[h], tb->lkey[h], tb->S[h], -tb->lnum[h]); */
679 internal_shift_right(INTERNAL_SHIFT_FROM_L_TO_S, tb, h, 726 internal_shift_right(INTERNAL_SHIFT_FROM_L_TO_S, tb, h,
680 -tb->lnum[h]); 727 -tb->lnum[h]);
681 return; 728 return;
682 } 729 }
683 730
684 if (tb->rnum[h] < 0) { /* borrow from right neighbor R[h] */ 731 /* borrow from right neighbor R[h] */
732 if (tb->rnum[h] < 0) {
685 RFALSE(tb->lnum[h] != 0, 733 RFALSE(tb->lnum[h] != 0,
686 "invalid tb->lnum[%d]==%d when borrow from R[h]", 734 "invalid tb->lnum[%d]==%d when borrow from R[h]",
687 h, tb->lnum[h]); 735 h, tb->lnum[h]);
@@ -689,7 +737,8 @@ static void balance_internal_when_delete(struct tree_balance *tb,
689 return; 737 return;
690 } 738 }
691 739
692 if (tb->lnum[h] > 0) { /* split S[h] into two parts and put them into neighbors */ 740 /* split S[h] into two parts and put them into neighbors */
741 if (tb->lnum[h] > 0) {
693 RFALSE(tb->rnum[h] == 0 || tb->lnum[h] + tb->rnum[h] != n + 1, 742 RFALSE(tb->rnum[h] == 0 || tb->lnum[h] + tb->rnum[h] != n + 1,
694 "invalid tb->lnum[%d]==%d or tb->rnum[%d]==%d when S[h](item number == %d) is split between them", 743 "invalid tb->lnum[%d]==%d or tb->rnum[%d]==%d when S[h](item number == %d) is split between them",
695 h, tb->lnum[h], h, tb->rnum[h], n); 744 h, tb->lnum[h], h, tb->rnum[h], n);
@@ -737,29 +786,36 @@ static void replace_rkey(struct tree_balance *tb, int h, struct item_head *key)
737 do_balance_mark_internal_dirty(tb, tb->CFR[h], 0); 786 do_balance_mark_internal_dirty(tb, tb->CFR[h], 0);
738} 787}
739 788
740int balance_internal(struct tree_balance *tb, /* tree_balance structure */ 789
741 int h, /* level of the tree */ 790/*
742 int child_pos, struct item_head *insert_key, /* key for insertion on higher level */ 791 * if inserting/pasting {
743 struct buffer_head **insert_ptr /* node for insertion on higher level */ 792 * child_pos is the position of the node-pointer in S[h] that
744 ) 793 * pointed to S[h-1] before balancing of the h-1 level;
745 /* if inserting/pasting 794 * this means that new pointers and items must be inserted AFTER
746 { 795 * child_pos
747 child_pos is the position of the node-pointer in S[h] that * 796 * } else {
748 pointed to S[h-1] before balancing of the h-1 level; * 797 * it is the position of the leftmost pointer that must be deleted
749 this means that new pointers and items must be inserted AFTER * 798 * (together with its corresponding key to the left of the pointer)
750 child_pos 799 * as a result of the previous level's balancing.
751 } 800 * }
752 else 801 */
753 { 802
754 it is the position of the leftmost pointer that must be deleted (together with 803int balance_internal(struct tree_balance *tb,
755 its corresponding key to the left of the pointer) 804 int h, /* level of the tree */
756 as a result of the previous level's balancing. 805 int child_pos,
757 } 806 /* key for insertion on higher level */
758 */ 807 struct item_head *insert_key,
808 /* node for insertion on higher level */
809 struct buffer_head **insert_ptr)
759{ 810{
760 struct buffer_head *tbSh = PATH_H_PBUFFER(tb->tb_path, h); 811 struct buffer_head *tbSh = PATH_H_PBUFFER(tb->tb_path, h);
761 struct buffer_info bi; 812 struct buffer_info bi;
762 int order; /* we return this: it is 0 if there is no S[h], else it is tb->S[h]->b_item_order */ 813
814 /*
815 * we return this: it is 0 if there is no S[h],
816 * else it is tb->S[h]->b_item_order
817 */
818 int order;
763 int insert_num, n, k; 819 int insert_num, n, k;
764 struct buffer_head *S_new; 820 struct buffer_head *S_new;
765 struct item_head new_insert_key; 821 struct item_head new_insert_key;
@@ -774,8 +830,10 @@ int balance_internal(struct tree_balance *tb, /* tree_balance structure
774 (tbSh) ? PATH_H_POSITION(tb->tb_path, 830 (tbSh) ? PATH_H_POSITION(tb->tb_path,
775 h + 1) /*tb->S[h]->b_item_order */ : 0; 831 h + 1) /*tb->S[h]->b_item_order */ : 0;
776 832
777 /* Using insert_size[h] calculate the number insert_num of items 833 /*
778 that must be inserted to or deleted from S[h]. */ 834 * Using insert_size[h] calculate the number insert_num of items
835 * that must be inserted to or deleted from S[h].
836 */
779 insert_num = tb->insert_size[h] / ((int)(KEY_SIZE + DC_SIZE)); 837 insert_num = tb->insert_size[h] / ((int)(KEY_SIZE + DC_SIZE));
780 838
781 /* Check whether insert_num is proper * */ 839 /* Check whether insert_num is proper * */
@@ -794,23 +852,21 @@ int balance_internal(struct tree_balance *tb, /* tree_balance structure
794 852
795 k = 0; 853 k = 0;
796 if (tb->lnum[h] > 0) { 854 if (tb->lnum[h] > 0) {
797 /* shift lnum[h] items from S[h] to the left neighbor L[h]. 855 /*
798 check how many of new items fall into L[h] or CFL[h] after 856 * shift lnum[h] items from S[h] to the left neighbor L[h].
799 shifting */ 857 * check how many of new items fall into L[h] or CFL[h] after
858 * shifting
859 */
800 n = B_NR_ITEMS(tb->L[h]); /* number of items in L[h] */ 860 n = B_NR_ITEMS(tb->L[h]); /* number of items in L[h] */
801 if (tb->lnum[h] <= child_pos) { 861 if (tb->lnum[h] <= child_pos) {
802 /* new items don't fall into L[h] or CFL[h] */ 862 /* new items don't fall into L[h] or CFL[h] */
803 internal_shift_left(INTERNAL_SHIFT_FROM_S_TO_L, tb, h, 863 internal_shift_left(INTERNAL_SHIFT_FROM_S_TO_L, tb, h,
804 tb->lnum[h]); 864 tb->lnum[h]);
805 /*internal_shift_left (tb->L[h],tb->CFL[h],tb->lkey[h],tbSh,tb->lnum[h]); */
806 child_pos -= tb->lnum[h]; 865 child_pos -= tb->lnum[h];
807 } else if (tb->lnum[h] > child_pos + insert_num) { 866 } else if (tb->lnum[h] > child_pos + insert_num) {
808 /* all new items fall into L[h] */ 867 /* all new items fall into L[h] */
809 internal_shift_left(INTERNAL_SHIFT_FROM_S_TO_L, tb, h, 868 internal_shift_left(INTERNAL_SHIFT_FROM_S_TO_L, tb, h,
810 tb->lnum[h] - insert_num); 869 tb->lnum[h] - insert_num);
811 /* internal_shift_left(tb->L[h],tb->CFL[h],tb->lkey[h],tbSh,
812 tb->lnum[h]-insert_num);
813 */
814 /* insert insert_num keys and node-pointers into L[h] */ 870 /* insert insert_num keys and node-pointers into L[h] */
815 bi.tb = tb; 871 bi.tb = tb;
816 bi.bi_bh = tb->L[h]; 872 bi.bi_bh = tb->L[h];
@@ -826,7 +882,10 @@ int balance_internal(struct tree_balance *tb, /* tree_balance structure
826 } else { 882 } else {
827 struct disk_child *dc; 883 struct disk_child *dc;
828 884
829 /* some items fall into L[h] or CFL[h], but some don't fall */ 885 /*
886 * some items fall into L[h] or CFL[h],
887 * but some don't fall
888 */
830 internal_shift1_left(tb, h, child_pos + 1); 889 internal_shift1_left(tb, h, child_pos + 1);
831 /* calculate number of new items that fall into L[h] */ 890 /* calculate number of new items that fall into L[h] */
832 k = tb->lnum[h] - child_pos - 1; 891 k = tb->lnum[h] - child_pos - 1;
@@ -841,7 +900,10 @@ int balance_internal(struct tree_balance *tb, /* tree_balance structure
841 900
842 replace_lkey(tb, h, insert_key + k); 901 replace_lkey(tb, h, insert_key + k);
843 902
844 /* replace the first node-ptr in S[h] by node-ptr to insert_ptr[k] */ 903 /*
904 * replace the first node-ptr in S[h] by
905 * node-ptr to insert_ptr[k]
906 */
845 dc = B_N_CHILD(tbSh, 0); 907 dc = B_N_CHILD(tbSh, 0);
846 put_dc_size(dc, 908 put_dc_size(dc,
847 MAX_CHILD_SIZE(insert_ptr[k]) - 909 MAX_CHILD_SIZE(insert_ptr[k]) -
@@ -860,17 +922,17 @@ int balance_internal(struct tree_balance *tb, /* tree_balance structure
860 /* tb->lnum[h] > 0 */ 922 /* tb->lnum[h] > 0 */
861 if (tb->rnum[h] > 0) { 923 if (tb->rnum[h] > 0) {
862 /*shift rnum[h] items from S[h] to the right neighbor R[h] */ 924 /*shift rnum[h] items from S[h] to the right neighbor R[h] */
863 /* check how many of new items fall into R or CFR after shifting */ 925 /*
926 * check how many of new items fall into R or CFR
927 * after shifting
928 */
864 n = B_NR_ITEMS(tbSh); /* number of items in S[h] */ 929 n = B_NR_ITEMS(tbSh); /* number of items in S[h] */
865 if (n - tb->rnum[h] >= child_pos) 930 if (n - tb->rnum[h] >= child_pos)
866 /* new items fall into S[h] */ 931 /* new items fall into S[h] */
867 /*internal_shift_right(tb,h,tbSh,tb->CFR[h],tb->rkey[h],tb->R[h],tb->rnum[h]); */
868 internal_shift_right(INTERNAL_SHIFT_FROM_S_TO_R, tb, h, 932 internal_shift_right(INTERNAL_SHIFT_FROM_S_TO_R, tb, h,
869 tb->rnum[h]); 933 tb->rnum[h]);
870 else if (n + insert_num - tb->rnum[h] < child_pos) { 934 else if (n + insert_num - tb->rnum[h] < child_pos) {
871 /* all new items fall into R[h] */ 935 /* all new items fall into R[h] */
872 /*internal_shift_right(tb,h,tbSh,tb->CFR[h],tb->rkey[h],tb->R[h],
873 tb->rnum[h] - insert_num); */
874 internal_shift_right(INTERNAL_SHIFT_FROM_S_TO_R, tb, h, 936 internal_shift_right(INTERNAL_SHIFT_FROM_S_TO_R, tb, h,
875 tb->rnum[h] - insert_num); 937 tb->rnum[h] - insert_num);
876 938
@@ -904,7 +966,10 @@ int balance_internal(struct tree_balance *tb, /* tree_balance structure
904 966
905 replace_rkey(tb, h, insert_key + insert_num - k - 1); 967 replace_rkey(tb, h, insert_key + insert_num - k - 1);
906 968
907 /* replace the first node-ptr in R[h] by node-ptr insert_ptr[insert_num-k-1] */ 969 /*
970 * replace the first node-ptr in R[h] by
971 * node-ptr insert_ptr[insert_num-k-1]
972 */
908 dc = B_N_CHILD(tb->R[h], 0); 973 dc = B_N_CHILD(tb->R[h], 0);
909 put_dc_size(dc, 974 put_dc_size(dc,
910 MAX_CHILD_SIZE(insert_ptr 975 MAX_CHILD_SIZE(insert_ptr
@@ -921,7 +986,7 @@ int balance_internal(struct tree_balance *tb, /* tree_balance structure
921 } 986 }
922 } 987 }
923 988
924 /** Fill new node that appears instead of S[h] **/ 989 /** Fill new node that appears instead of S[h] **/
925 RFALSE(tb->blknum[h] > 2, "blknum can not be > 2 for internal level"); 990 RFALSE(tb->blknum[h] > 2, "blknum can not be > 2 for internal level");
926 RFALSE(tb->blknum[h] < 0, "blknum can not be < 0"); 991 RFALSE(tb->blknum[h] < 0, "blknum can not be < 0");
927 992
@@ -1002,11 +1067,13 @@ int balance_internal(struct tree_balance *tb, /* tree_balance structure
1002 /* last parameter is del_par */ 1067 /* last parameter is del_par */
1003 internal_move_pointers_items(&dest_bi, &src_bi, 1068 internal_move_pointers_items(&dest_bi, &src_bi,
1004 LAST_TO_FIRST, snum, 0); 1069 LAST_TO_FIRST, snum, 0);
1005 /* internal_move_pointers_items(S_new, tbSh, LAST_TO_FIRST, snum, 0); */
1006 } else if (n + insert_num - snum < child_pos) { 1070 } else if (n + insert_num - snum < child_pos) {
1007 /* all new items fall into S_new */ 1071 /* all new items fall into S_new */
1008 /* store the delimiting key for the next level */ 1072 /* store the delimiting key for the next level */
1009 /* new_insert_key = (n + insert_item - snum)'th key in S[h] */ 1073 /*
1074 * new_insert_key = (n + insert_item - snum)'th
1075 * key in S[h]
1076 */
1010 memcpy(&new_insert_key, 1077 memcpy(&new_insert_key,
1011 internal_key(tbSh, n + insert_num - snum), 1078 internal_key(tbSh, n + insert_num - snum),
1012 KEY_SIZE); 1079 KEY_SIZE);
@@ -1014,9 +1081,11 @@ int balance_internal(struct tree_balance *tb, /* tree_balance structure
1014 internal_move_pointers_items(&dest_bi, &src_bi, 1081 internal_move_pointers_items(&dest_bi, &src_bi,
1015 LAST_TO_FIRST, 1082 LAST_TO_FIRST,
1016 snum - insert_num, 0); 1083 snum - insert_num, 0);
1017 /* internal_move_pointers_items(S_new,tbSh,1,snum - insert_num,0); */
1018 1084
1019 /* insert insert_num keys and node-pointers into S_new */ 1085 /*
1086 * insert insert_num keys and node-pointers
1087 * into S_new
1088 */
1020 internal_insert_childs(&dest_bi, 1089 internal_insert_childs(&dest_bi,
1021 /*S_new,tb->S[h-1]->b_next, */ 1090 /*S_new,tb->S[h-1]->b_next, */
1022 child_pos - n - insert_num + 1091 child_pos - n - insert_num +
@@ -1033,7 +1102,6 @@ int balance_internal(struct tree_balance *tb, /* tree_balance structure
1033 internal_move_pointers_items(&dest_bi, &src_bi, 1102 internal_move_pointers_items(&dest_bi, &src_bi,
1034 LAST_TO_FIRST, 1103 LAST_TO_FIRST,
1035 n - child_pos + 1, 1); 1104 n - child_pos + 1, 1);
1036 /* internal_move_pointers_items(S_new,tbSh,1,n - child_pos + 1,1); */
1037 /* calculate number of new items that fall into S_new */ 1105 /* calculate number of new items that fall into S_new */
1038 k = snum - n + child_pos - 1; 1106 k = snum - n + child_pos - 1;
1039 1107
@@ -1043,7 +1111,10 @@ int balance_internal(struct tree_balance *tb, /* tree_balance structure
1043 /* new_insert_key = insert_key[insert_num - k - 1] */ 1111 /* new_insert_key = insert_key[insert_num - k - 1] */
1044 memcpy(&new_insert_key, insert_key + insert_num - k - 1, 1112 memcpy(&new_insert_key, insert_key + insert_num - k - 1,
1045 KEY_SIZE); 1113 KEY_SIZE);
1046 /* replace first node-ptr in S_new by node-ptr to insert_ptr[insert_num-k-1] */ 1114 /*
1115 * replace first node-ptr in S_new by node-ptr
1116 * to insert_ptr[insert_num-k-1]
1117 */
1047 1118
1048 dc = B_N_CHILD(S_new, 0); 1119 dc = B_N_CHILD(S_new, 0);
1049 put_dc_size(dc, 1120 put_dc_size(dc,
@@ -1066,7 +1137,7 @@ int balance_internal(struct tree_balance *tb, /* tree_balance structure
1066 || buffer_dirty(S_new), "cm-00001: bad S_new (%b)", 1137 || buffer_dirty(S_new), "cm-00001: bad S_new (%b)",
1067 S_new); 1138 S_new);
1068 1139
1069 // S_new is released in unfix_nodes 1140 /* S_new is released in unfix_nodes */
1070 } 1141 }
1071 1142
1072 n = B_NR_ITEMS(tbSh); /*number of items in S[h] */ 1143 n = B_NR_ITEMS(tbSh); /*number of items in S[h] */
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index b8d3ffb1f722..cc2095943ec6 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -25,7 +25,10 @@ int reiserfs_commit_write(struct file *f, struct page *page,
25 25
26void reiserfs_evict_inode(struct inode *inode) 26void reiserfs_evict_inode(struct inode *inode)
27{ 27{
28 /* We need blocks for transaction + (user+group) quota update (possibly delete) */ 28 /*
29 * We need blocks for transaction + (user+group) quota
30 * update (possibly delete)
31 */
29 int jbegin_count = 32 int jbegin_count =
30 JOURNAL_PER_BALANCE_CNT * 2 + 33 JOURNAL_PER_BALANCE_CNT * 2 +
31 2 * REISERFS_QUOTA_INIT_BLOCKS(inode->i_sb); 34 2 * REISERFS_QUOTA_INIT_BLOCKS(inode->i_sb);
@@ -39,8 +42,12 @@ void reiserfs_evict_inode(struct inode *inode)
39 if (inode->i_nlink) 42 if (inode->i_nlink)
40 goto no_delete; 43 goto no_delete;
41 44
42 /* The = 0 happens when we abort creating a new inode for some reason like lack of space.. */ 45 /*
43 if (!(inode->i_state & I_NEW) && INODE_PKEY(inode)->k_objectid != 0) { /* also handles bad_inode case */ 46 * The = 0 happens when we abort creating a new inode
47 * for some reason like lack of space..
48 * also handles bad_inode case
49 */
50 if (!(inode->i_state & I_NEW) && INODE_PKEY(inode)->k_objectid != 0) {
44 51
45 reiserfs_delete_xattrs(inode); 52 reiserfs_delete_xattrs(inode);
46 53
@@ -54,9 +61,11 @@ void reiserfs_evict_inode(struct inode *inode)
54 61
55 err = reiserfs_delete_object(&th, inode); 62 err = reiserfs_delete_object(&th, inode);
56 63
57 /* Do quota update inside a transaction for journaled quotas. We must do that 64 /*
58 * after delete_object so that quota updates go into the same transaction as 65 * Do quota update inside a transaction for journaled quotas.
59 * stat data deletion */ 66 * We must do that after delete_object so that quota updates
67 * go into the same transaction as stat data deletion
68 */
60 if (!err) { 69 if (!err) {
61 int depth = reiserfs_write_unlock_nested(inode->i_sb); 70 int depth = reiserfs_write_unlock_nested(inode->i_sb);
62 dquot_free_inode(inode); 71 dquot_free_inode(inode);
@@ -66,22 +75,29 @@ void reiserfs_evict_inode(struct inode *inode)
66 if (journal_end(&th, inode->i_sb, jbegin_count)) 75 if (journal_end(&th, inode->i_sb, jbegin_count))
67 goto out; 76 goto out;
68 77
69 /* check return value from reiserfs_delete_object after 78 /*
79 * check return value from reiserfs_delete_object after
70 * ending the transaction 80 * ending the transaction
71 */ 81 */
72 if (err) 82 if (err)
73 goto out; 83 goto out;
74 84
75 /* all items of file are deleted, so we can remove "save" link */ 85 /*
76 remove_save_link(inode, 0 /* not truncate */ ); /* we can't do anything 86 * all items of file are deleted, so we can remove
77 * about an error here */ 87 * "save" link
88 * we can't do anything about an error here
89 */
90 remove_save_link(inode, 0 /* not truncate */);
78out: 91out:
79 reiserfs_write_unlock(inode->i_sb); 92 reiserfs_write_unlock(inode->i_sb);
80 } else { 93 } else {
81 /* no object items are in the tree */ 94 /* no object items are in the tree */
82 ; 95 ;
83 } 96 }
84 clear_inode(inode); /* note this must go after the journal_end to prevent deadlock */ 97
98 /* note this must go after the journal_end to prevent deadlock */
99 clear_inode(inode);
100
85 dquot_drop(inode); 101 dquot_drop(inode);
86 inode->i_blocks = 0; 102 inode->i_blocks = 0;
87 return; 103 return;
@@ -103,8 +119,10 @@ static void _make_cpu_key(struct cpu_key *key, int version, __u32 dirid,
103 key->key_length = length; 119 key->key_length = length;
104} 120}
105 121
106/* take base of inode_key (it comes from inode always) (dirid, objectid) and version from an inode, set 122/*
107 offset and type of key */ 123 * take base of inode_key (it comes from inode always) (dirid, objectid)
124 * and version from an inode, set offset and type of key
125 */
108void make_cpu_key(struct cpu_key *key, struct inode *inode, loff_t offset, 126void make_cpu_key(struct cpu_key *key, struct inode *inode, loff_t offset,
109 int type, int length) 127 int type, int length)
110{ 128{
@@ -114,9 +132,7 @@ void make_cpu_key(struct cpu_key *key, struct inode *inode, loff_t offset,
114 length); 132 length);
115} 133}
116 134
117// 135/* when key is 0, do not set version and short key */
118// when key is 0, do not set version and short key
119//
120inline void make_le_item_head(struct item_head *ih, const struct cpu_key *key, 136inline void make_le_item_head(struct item_head *ih, const struct cpu_key *key,
121 int version, 137 int version,
122 loff_t offset, int type, int length, 138 loff_t offset, int type, int length,
@@ -132,43 +148,47 @@ inline void make_le_item_head(struct item_head *ih, const struct cpu_key *key,
132 set_le_ih_k_type(ih, type); 148 set_le_ih_k_type(ih, type);
133 put_ih_item_len(ih, length); 149 put_ih_item_len(ih, length);
134 /* set_ih_free_space (ih, 0); */ 150 /* set_ih_free_space (ih, 0); */
135 // for directory items it is entry count, for directs and stat 151 /*
136 // datas - 0xffff, for indirects - 0 152 * for directory items it is entry count, for directs and stat
153 * datas - 0xffff, for indirects - 0
154 */
137 put_ih_entry_count(ih, entry_count); 155 put_ih_entry_count(ih, entry_count);
138} 156}
139 157
140// 158/*
141// FIXME: we might cache recently accessed indirect item 159 * FIXME: we might cache recently accessed indirect item
142 160 * Ugh. Not too eager for that....
143// Ugh. Not too eager for that.... 161 * I cut the code until such time as I see a convincing argument (benchmark).
144// I cut the code until such time as I see a convincing argument (benchmark). 162 * I don't want a bloated inode struct..., and I don't like code complexity....
145// I don't want a bloated inode struct..., and I don't like code complexity.... 163 */
146
147/* cutting the code is fine, since it really isn't in use yet and is easy
148** to add back in. But, Vladimir has a really good idea here. Think
149** about what happens for reading a file. For each page,
150** The VFS layer calls reiserfs_readpage, who searches the tree to find
151** an indirect item. This indirect item has X number of pointers, where
152** X is a big number if we've done the block allocation right. But,
153** we only use one or two of these pointers during each call to readpage,
154** needlessly researching again later on.
155**
156** The size of the cache could be dynamic based on the size of the file.
157**
158** I'd also like to see us cache the location the stat data item, since
159** we are needlessly researching for that frequently.
160**
161** --chris
162*/
163 164
164/* If this page has a file tail in it, and 165/*
165** it was read in by get_block_create_0, the page data is valid, 166 * cutting the code is fine, since it really isn't in use yet and is easy
166** but tail is still sitting in a direct item, and we can't write to 167 * to add back in. But, Vladimir has a really good idea here. Think
167** it. So, look through this page, and check all the mapped buffers 168 * about what happens for reading a file. For each page,
168** to make sure they have valid block numbers. Any that don't need 169 * The VFS layer calls reiserfs_readpage, who searches the tree to find
169** to be unmapped, so that __block_write_begin will correctly call 170 * an indirect item. This indirect item has X number of pointers, where
170** reiserfs_get_block to convert the tail into an unformatted node 171 * X is a big number if we've done the block allocation right. But,
171*/ 172 * we only use one or two of these pointers during each call to readpage,
173 * needlessly researching again later on.
174 *
175 * The size of the cache could be dynamic based on the size of the file.
176 *
177 * I'd also like to see us cache the location the stat data item, since
178 * we are needlessly researching for that frequently.
179 *
180 * --chris
181 */
182
183/*
184 * If this page has a file tail in it, and
185 * it was read in by get_block_create_0, the page data is valid,
186 * but tail is still sitting in a direct item, and we can't write to
187 * it. So, look through this page, and check all the mapped buffers
188 * to make sure they have valid block numbers. Any that don't need
189 * to be unmapped, so that __block_write_begin will correctly call
190 * reiserfs_get_block to convert the tail into an unformatted node
191 */
172static inline void fix_tail_page_for_writing(struct page *page) 192static inline void fix_tail_page_for_writing(struct page *page)
173{ 193{
174 struct buffer_head *head, *next, *bh; 194 struct buffer_head *head, *next, *bh;
@@ -186,8 +206,10 @@ static inline void fix_tail_page_for_writing(struct page *page)
186 } 206 }
187} 207}
188 208
189/* reiserfs_get_block does not need to allocate a block only if it has been 209/*
190 done already or non-hole position has been found in the indirect item */ 210 * reiserfs_get_block does not need to allocate a block only if it has been
211 * done already or non-hole position has been found in the indirect item
212 */
191static inline int allocation_needed(int retval, b_blocknr_t allocated, 213static inline int allocation_needed(int retval, b_blocknr_t allocated,
192 struct item_head *ih, 214 struct item_head *ih,
193 __le32 * item, int pos_in_item) 215 __le32 * item, int pos_in_item)
@@ -211,14 +233,16 @@ static inline void set_block_dev_mapped(struct buffer_head *bh,
211 map_bh(bh, inode->i_sb, block); 233 map_bh(bh, inode->i_sb, block);
212} 234}
213 235
214// 236/*
215// files which were created in the earlier version can not be longer, 237 * files which were created in the earlier version can not be longer,
216// than 2 gb 238 * than 2 gb
217// 239 */
218static int file_capable(struct inode *inode, sector_t block) 240static int file_capable(struct inode *inode, sector_t block)
219{ 241{
220 if (get_inode_item_key_version(inode) != KEY_FORMAT_3_5 || // it is new file. 242 /* it is new file. */
221 block < (1 << (31 - inode->i_sb->s_blocksize_bits))) // old file, but 'block' is inside of 2gb 243 if (get_inode_item_key_version(inode) != KEY_FORMAT_3_5 ||
244 /* old file, but 'block' is inside of 2gb */
245 block < (1 << (31 - inode->i_sb->s_blocksize_bits)))
222 return 1; 246 return 1;
223 247
224 return 0; 248 return 0;
@@ -250,14 +274,14 @@ static int restart_transaction(struct reiserfs_transaction_handle *th,
250 return err; 274 return err;
251} 275}
252 276
253// it is called by get_block when create == 0. Returns block number 277/*
254// for 'block'-th logical block of file. When it hits direct item it 278 * it is called by get_block when create == 0. Returns block number
255// returns 0 (being called from bmap) or read direct item into piece 279 * for 'block'-th logical block of file. When it hits direct item it
256// of page (bh_result) 280 * returns 0 (being called from bmap) or read direct item into piece
257 281 * of page (bh_result)
258// Please improve the english/clarity in the comment above, as it is 282 * Please improve the english/clarity in the comment above, as it is
259// hard to understand. 283 * hard to understand.
260 284 */
261static int _get_block_create_0(struct inode *inode, sector_t block, 285static int _get_block_create_0(struct inode *inode, sector_t block,
262 struct buffer_head *bh_result, int args) 286 struct buffer_head *bh_result, int args)
263{ 287{
@@ -273,7 +297,7 @@ static int _get_block_create_0(struct inode *inode, sector_t block,
273 int done = 0; 297 int done = 0;
274 unsigned long offset; 298 unsigned long offset;
275 299
276 // prepare the key to look for the 'block'-th block of file 300 /* prepare the key to look for the 'block'-th block of file */
277 make_cpu_key(&key, inode, 301 make_cpu_key(&key, inode,
278 (loff_t) block * inode->i_sb->s_blocksize + 1, TYPE_ANY, 302 (loff_t) block * inode->i_sb->s_blocksize + 1, TYPE_ANY,
279 3); 303 3);
@@ -285,23 +309,28 @@ static int _get_block_create_0(struct inode *inode, sector_t block,
285 kunmap(bh_result->b_page); 309 kunmap(bh_result->b_page);
286 if (result == IO_ERROR) 310 if (result == IO_ERROR)
287 return -EIO; 311 return -EIO;
288 // We do not return -ENOENT if there is a hole but page is uptodate, because it means 312 /*
289 // That there is some MMAPED data associated with it that is yet to be written to disk. 313 * We do not return -ENOENT if there is a hole but page is
314 * uptodate, because it means that there is some MMAPED data
315 * associated with it that is yet to be written to disk.
316 */
290 if ((args & GET_BLOCK_NO_HOLE) 317 if ((args & GET_BLOCK_NO_HOLE)
291 && !PageUptodate(bh_result->b_page)) { 318 && !PageUptodate(bh_result->b_page)) {
292 return -ENOENT; 319 return -ENOENT;
293 } 320 }
294 return 0; 321 return 0;
295 } 322 }
296 // 323
297 bh = get_last_bh(&path); 324 bh = get_last_bh(&path);
298 ih = tp_item_head(&path); 325 ih = tp_item_head(&path);
299 if (is_indirect_le_ih(ih)) { 326 if (is_indirect_le_ih(ih)) {
300 __le32 *ind_item = (__le32 *) ih_item_body(bh, ih); 327 __le32 *ind_item = (__le32 *) ih_item_body(bh, ih);
301 328
302 /* FIXME: here we could cache indirect item or part of it in 329 /*
303 the inode to avoid search_by_key in case of subsequent 330 * FIXME: here we could cache indirect item or part of it in
304 access to file */ 331 * the inode to avoid search_by_key in case of subsequent
332 * access to file
333 */
305 blocknr = get_block_num(ind_item, path.pos_in_item); 334 blocknr = get_block_num(ind_item, path.pos_in_item);
306 ret = 0; 335 ret = 0;
307 if (blocknr) { 336 if (blocknr) {
@@ -311,8 +340,12 @@ static int _get_block_create_0(struct inode *inode, sector_t block,
311 set_buffer_boundary(bh_result); 340 set_buffer_boundary(bh_result);
312 } 341 }
313 } else 342 } else
314 // We do not return -ENOENT if there is a hole but page is uptodate, because it means 343 /*
315 // That there is some MMAPED data associated with it that is yet to be written to disk. 344 * We do not return -ENOENT if there is a hole but
345 * page is uptodate, because it means that there is
346 * some MMAPED data associated with it that is
347 * yet to be written to disk.
348 */
316 if ((args & GET_BLOCK_NO_HOLE) 349 if ((args & GET_BLOCK_NO_HOLE)
317 && !PageUptodate(bh_result->b_page)) { 350 && !PageUptodate(bh_result->b_page)) {
318 ret = -ENOENT; 351 ret = -ENOENT;
@@ -323,41 +356,45 @@ static int _get_block_create_0(struct inode *inode, sector_t block,
323 kunmap(bh_result->b_page); 356 kunmap(bh_result->b_page);
324 return ret; 357 return ret;
325 } 358 }
326 // requested data are in direct item(s) 359 /* requested data are in direct item(s) */
327 if (!(args & GET_BLOCK_READ_DIRECT)) { 360 if (!(args & GET_BLOCK_READ_DIRECT)) {
328 // we are called by bmap. FIXME: we can not map block of file 361 /*
329 // when it is stored in direct item(s) 362 * we are called by bmap. FIXME: we can not map block of file
363 * when it is stored in direct item(s)
364 */
330 pathrelse(&path); 365 pathrelse(&path);
331 if (p) 366 if (p)
332 kunmap(bh_result->b_page); 367 kunmap(bh_result->b_page);
333 return -ENOENT; 368 return -ENOENT;
334 } 369 }
335 370
336 /* if we've got a direct item, and the buffer or page was uptodate, 371 /*
337 ** we don't want to pull data off disk again. skip to the 372 * if we've got a direct item, and the buffer or page was uptodate,
338 ** end, where we map the buffer and return 373 * we don't want to pull data off disk again. skip to the
374 * end, where we map the buffer and return
339 */ 375 */
340 if (buffer_uptodate(bh_result)) { 376 if (buffer_uptodate(bh_result)) {
341 goto finished; 377 goto finished;
342 } else 378 } else
343 /* 379 /*
344 ** grab_tail_page can trigger calls to reiserfs_get_block on up to date 380 * grab_tail_page can trigger calls to reiserfs_get_block on
345 ** pages without any buffers. If the page is up to date, we don't want 381 * up to date pages without any buffers. If the page is up
346 ** read old data off disk. Set the up to date bit on the buffer instead 382 * to date, we don't want read old data off disk. Set the up
347 ** and jump to the end 383 * to date bit on the buffer instead and jump to the end
348 */ 384 */
349 if (!bh_result->b_page || PageUptodate(bh_result->b_page)) { 385 if (!bh_result->b_page || PageUptodate(bh_result->b_page)) {
350 set_buffer_uptodate(bh_result); 386 set_buffer_uptodate(bh_result);
351 goto finished; 387 goto finished;
352 } 388 }
353 // read file tail into part of page 389 /* read file tail into part of page */
354 offset = (cpu_key_k_offset(&key) - 1) & (PAGE_CACHE_SIZE - 1); 390 offset = (cpu_key_k_offset(&key) - 1) & (PAGE_CACHE_SIZE - 1);
355 copy_item_head(&tmp_ih, ih); 391 copy_item_head(&tmp_ih, ih);
356 392
357 /* we only want to kmap if we are reading the tail into the page. 393 /*
358 ** this is not the common case, so we don't kmap until we are 394 * we only want to kmap if we are reading the tail into the page.
359 ** sure we need to. But, this means the item might move if 395 * this is not the common case, so we don't kmap until we are
360 ** kmap schedules 396 * sure we need to. But, this means the item might move if
397 * kmap schedules
361 */ 398 */
362 if (!p) 399 if (!p)
363 p = (char *)kmap(bh_result->b_page); 400 p = (char *)kmap(bh_result->b_page);
@@ -368,10 +405,11 @@ static int _get_block_create_0(struct inode *inode, sector_t block,
368 if (!is_direct_le_ih(ih)) { 405 if (!is_direct_le_ih(ih)) {
369 BUG(); 406 BUG();
370 } 407 }
371 /* make sure we don't read more bytes than actually exist in 408 /*
372 ** the file. This can happen in odd cases where i_size isn't 409 * make sure we don't read more bytes than actually exist in
373 ** correct, and when direct item padding results in a few 410 * the file. This can happen in odd cases where i_size isn't
374 ** extra bytes at the end of the direct item 411 * correct, and when direct item padding results in a few
412 * extra bytes at the end of the direct item
375 */ 413 */
376 if ((le_ih_k_offset(ih) + path.pos_in_item) > inode->i_size) 414 if ((le_ih_k_offset(ih) + path.pos_in_item) > inode->i_size)
377 break; 415 break;
@@ -390,18 +428,20 @@ static int _get_block_create_0(struct inode *inode, sector_t block,
390 428
391 p += chars; 429 p += chars;
392 430
431 /*
432 * we done, if read direct item is not the last item of
433 * node FIXME: we could try to check right delimiting key
434 * to see whether direct item continues in the right
435 * neighbor or rely on i_size
436 */
393 if (PATH_LAST_POSITION(&path) != (B_NR_ITEMS(bh) - 1)) 437 if (PATH_LAST_POSITION(&path) != (B_NR_ITEMS(bh) - 1))
394 // we done, if read direct item is not the last item of
395 // node FIXME: we could try to check right delimiting key
396 // to see whether direct item continues in the right
397 // neighbor or rely on i_size
398 break; 438 break;
399 439
400 // update key to look for the next piece 440 /* update key to look for the next piece */
401 set_cpu_key_k_offset(&key, cpu_key_k_offset(&key) + chars); 441 set_cpu_key_k_offset(&key, cpu_key_k_offset(&key) + chars);
402 result = search_for_position_by_key(inode->i_sb, &key, &path); 442 result = search_for_position_by_key(inode->i_sb, &key, &path);
403 if (result != POSITION_FOUND) 443 if (result != POSITION_FOUND)
404 // i/o error most likely 444 /* i/o error most likely */
405 break; 445 break;
406 bh = get_last_bh(&path); 446 bh = get_last_bh(&path);
407 ih = tp_item_head(&path); 447 ih = tp_item_head(&path);
@@ -416,7 +456,8 @@ static int _get_block_create_0(struct inode *inode, sector_t block,
416 if (result == IO_ERROR) 456 if (result == IO_ERROR)
417 return -EIO; 457 return -EIO;
418 458
419 /* this buffer has valid data, but isn't valid for io. mapping it to 459 /*
460 * this buffer has valid data, but isn't valid for io. mapping it to
420 * block #0 tells the rest of reiserfs it just has a tail in it 461 * block #0 tells the rest of reiserfs it just has a tail in it
421 */ 462 */
422 map_bh(bh_result, inode->i_sb, 0); 463 map_bh(bh_result, inode->i_sb, 0);
@@ -424,8 +465,10 @@ static int _get_block_create_0(struct inode *inode, sector_t block,
424 return 0; 465 return 0;
425} 466}
426 467
427// this is called to create file map. So, _get_block_create_0 will not 468/*
428// read direct item 469 * this is called to create file map. So, _get_block_create_0 will not
470 * read direct item
471 */
429static int reiserfs_bmap(struct inode *inode, sector_t block, 472static int reiserfs_bmap(struct inode *inode, sector_t block,
430 struct buffer_head *bh_result, int create) 473 struct buffer_head *bh_result, int create)
431{ 474{
@@ -439,22 +482,23 @@ static int reiserfs_bmap(struct inode *inode, sector_t block,
439 return 0; 482 return 0;
440} 483}
441 484
442/* special version of get_block that is only used by grab_tail_page right 485/*
443** now. It is sent to __block_write_begin, and when you try to get a 486 * special version of get_block that is only used by grab_tail_page right
444** block past the end of the file (or a block from a hole) it returns 487 * now. It is sent to __block_write_begin, and when you try to get a
445** -ENOENT instead of a valid buffer. __block_write_begin expects to 488 * block past the end of the file (or a block from a hole) it returns
446** be able to do i/o on the buffers returned, unless an error value 489 * -ENOENT instead of a valid buffer. __block_write_begin expects to
447** is also returned. 490 * be able to do i/o on the buffers returned, unless an error value
448** 491 * is also returned.
449** So, this allows __block_write_begin to be used for reading a single block 492 *
450** in a page. Where it does not produce a valid page for holes, or past the 493 * So, this allows __block_write_begin to be used for reading a single block
451** end of the file. This turns out to be exactly what we need for reading 494 * in a page. Where it does not produce a valid page for holes, or past the
452** tails for conversion. 495 * end of the file. This turns out to be exactly what we need for reading
453** 496 * tails for conversion.
454** The point of the wrapper is forcing a certain value for create, even 497 *
455** though the VFS layer is calling this function with create==1. If you 498 * The point of the wrapper is forcing a certain value for create, even
456** don't want to send create == GET_BLOCK_NO_HOLE to reiserfs_get_block, 499 * though the VFS layer is calling this function with create==1. If you
457** don't use this function. 500 * don't want to send create == GET_BLOCK_NO_HOLE to reiserfs_get_block,
501 * don't use this function.
458*/ 502*/
459static int reiserfs_get_block_create_0(struct inode *inode, sector_t block, 503static int reiserfs_get_block_create_0(struct inode *inode, sector_t block,
460 struct buffer_head *bh_result, 504 struct buffer_head *bh_result,
@@ -463,8 +507,10 @@ static int reiserfs_get_block_create_0(struct inode *inode, sector_t block,
463 return reiserfs_get_block(inode, block, bh_result, GET_BLOCK_NO_HOLE); 507 return reiserfs_get_block(inode, block, bh_result, GET_BLOCK_NO_HOLE);
464} 508}
465 509
466/* This is special helper for reiserfs_get_block in case we are executing 510/*
467 direct_IO request. */ 511 * This is special helper for reiserfs_get_block in case we are executing
512 * direct_IO request.
513 */
468static int reiserfs_get_blocks_direct_io(struct inode *inode, 514static int reiserfs_get_blocks_direct_io(struct inode *inode,
469 sector_t iblock, 515 sector_t iblock,
470 struct buffer_head *bh_result, 516 struct buffer_head *bh_result,
@@ -474,9 +520,11 @@ static int reiserfs_get_blocks_direct_io(struct inode *inode,
474 520
475 bh_result->b_page = NULL; 521 bh_result->b_page = NULL;
476 522
477 /* We set the b_size before reiserfs_get_block call since it is 523 /*
478 referenced in convert_tail_for_hole() that may be called from 524 * We set the b_size before reiserfs_get_block call since it is
479 reiserfs_get_block() */ 525 * referenced in convert_tail_for_hole() that may be called from
526 * reiserfs_get_block()
527 */
480 bh_result->b_size = (1 << inode->i_blkbits); 528 bh_result->b_size = (1 << inode->i_blkbits);
481 529
482 ret = reiserfs_get_block(inode, iblock, bh_result, 530 ret = reiserfs_get_block(inode, iblock, bh_result,
@@ -486,14 +534,18 @@ static int reiserfs_get_blocks_direct_io(struct inode *inode,
486 534
487 /* don't allow direct io onto tail pages */ 535 /* don't allow direct io onto tail pages */
488 if (buffer_mapped(bh_result) && bh_result->b_blocknr == 0) { 536 if (buffer_mapped(bh_result) && bh_result->b_blocknr == 0) {
489 /* make sure future calls to the direct io funcs for this offset 537 /*
490 ** in the file fail by unmapping the buffer 538 * make sure future calls to the direct io funcs for this
539 * offset in the file fail by unmapping the buffer
491 */ 540 */
492 clear_buffer_mapped(bh_result); 541 clear_buffer_mapped(bh_result);
493 ret = -EINVAL; 542 ret = -EINVAL;
494 } 543 }
495 /* Possible unpacked tail. Flush the data before pages have 544
496 disappeared */ 545 /*
546 * Possible unpacked tail. Flush the data before pages have
547 * disappeared
548 */
497 if (REISERFS_I(inode)->i_flags & i_pack_on_close_mask) { 549 if (REISERFS_I(inode)->i_flags & i_pack_on_close_mask) {
498 int err; 550 int err;
499 551
@@ -512,15 +564,15 @@ static int reiserfs_get_blocks_direct_io(struct inode *inode,
512} 564}
513 565
514/* 566/*
515** helper function for when reiserfs_get_block is called for a hole 567 * helper function for when reiserfs_get_block is called for a hole
516** but the file tail is still in a direct item 568 * but the file tail is still in a direct item
517** bh_result is the buffer head for the hole 569 * bh_result is the buffer head for the hole
518** tail_offset is the offset of the start of the tail in the file 570 * tail_offset is the offset of the start of the tail in the file
519** 571 *
520** This calls prepare_write, which will start a new transaction 572 * This calls prepare_write, which will start a new transaction
521** you should not be in a transaction, or have any paths held when you 573 * you should not be in a transaction, or have any paths held when you
522** call this. 574 * call this.
523*/ 575 */
524static int convert_tail_for_hole(struct inode *inode, 576static int convert_tail_for_hole(struct inode *inode,
525 struct buffer_head *bh_result, 577 struct buffer_head *bh_result,
526 loff_t tail_offset) 578 loff_t tail_offset)
@@ -540,9 +592,10 @@ static int convert_tail_for_hole(struct inode *inode,
540 tail_end = (tail_start | (bh_result->b_size - 1)) + 1; 592 tail_end = (tail_start | (bh_result->b_size - 1)) + 1;
541 593
542 index = tail_offset >> PAGE_CACHE_SHIFT; 594 index = tail_offset >> PAGE_CACHE_SHIFT;
543 /* hole_page can be zero in case of direct_io, we are sure 595 /*
544 that we cannot get here if we write with O_DIRECT into 596 * hole_page can be zero in case of direct_io, we are sure
545 tail page */ 597 * that we cannot get here if we write with O_DIRECT into tail page
598 */
546 if (!hole_page || index != hole_page->index) { 599 if (!hole_page || index != hole_page->index) {
547 tail_page = grab_cache_page(inode->i_mapping, index); 600 tail_page = grab_cache_page(inode->i_mapping, index);
548 retval = -ENOMEM; 601 retval = -ENOMEM;
@@ -553,14 +606,15 @@ static int convert_tail_for_hole(struct inode *inode,
553 tail_page = hole_page; 606 tail_page = hole_page;
554 } 607 }
555 608
556 /* we don't have to make sure the conversion did not happen while 609 /*
557 ** we were locking the page because anyone that could convert 610 * we don't have to make sure the conversion did not happen while
558 ** must first take i_mutex. 611 * we were locking the page because anyone that could convert
559 ** 612 * must first take i_mutex.
560 ** We must fix the tail page for writing because it might have buffers 613 *
561 ** that are mapped, but have a block number of 0. This indicates tail 614 * We must fix the tail page for writing because it might have buffers
562 ** data that has been read directly into the page, and 615 * that are mapped, but have a block number of 0. This indicates tail
563 ** __block_write_begin won't trigger a get_block in this case. 616 * data that has been read directly into the page, and
617 * __block_write_begin won't trigger a get_block in this case.
564 */ 618 */
565 fix_tail_page_for_writing(tail_page); 619 fix_tail_page_for_writing(tail_page);
566 retval = __reiserfs_write_begin(tail_page, tail_start, 620 retval = __reiserfs_write_begin(tail_page, tail_start,
@@ -604,7 +658,8 @@ int reiserfs_get_block(struct inode *inode, sector_t block,
604 struct buffer_head *bh_result, int create) 658 struct buffer_head *bh_result, int create)
605{ 659{
606 int repeat, retval = 0; 660 int repeat, retval = 0;
607 b_blocknr_t allocated_block_nr = 0; // b_blocknr_t is (unsigned) 32 bit int 661 /* b_blocknr_t is (unsigned) 32 bit int*/
662 b_blocknr_t allocated_block_nr = 0;
608 INITIALIZE_PATH(path); 663 INITIALIZE_PATH(path);
609 int pos_in_item; 664 int pos_in_item;
610 struct cpu_key key; 665 struct cpu_key key;
@@ -614,12 +669,14 @@ int reiserfs_get_block(struct inode *inode, sector_t block,
614 int done; 669 int done;
615 int fs_gen; 670 int fs_gen;
616 struct reiserfs_transaction_handle *th = NULL; 671 struct reiserfs_transaction_handle *th = NULL;
617 /* space reserved in transaction batch: 672 /*
618 . 3 balancings in direct->indirect conversion 673 * space reserved in transaction batch:
619 . 1 block involved into reiserfs_update_sd() 674 * . 3 balancings in direct->indirect conversion
620 XXX in practically impossible worst case direct2indirect() 675 * . 1 block involved into reiserfs_update_sd()
621 can incur (much) more than 3 balancings. 676 * XXX in practically impossible worst case direct2indirect()
622 quota update for user, group */ 677 * can incur (much) more than 3 balancings.
678 * quota update for user, group
679 */
623 int jbegin_count = 680 int jbegin_count =
624 JOURNAL_PER_BALANCE_CNT * 3 + 1 + 681 JOURNAL_PER_BALANCE_CNT * 3 + 1 +
625 2 * REISERFS_QUOTA_TRANS_BLOCKS(inode->i_sb); 682 2 * REISERFS_QUOTA_TRANS_BLOCKS(inode->i_sb);
@@ -636,8 +693,9 @@ int reiserfs_get_block(struct inode *inode, sector_t block,
636 return -EFBIG; 693 return -EFBIG;
637 } 694 }
638 695
639 /* if !create, we aren't changing the FS, so we don't need to 696 /*
640 ** log anything, so we don't need to start a transaction 697 * if !create, we aren't changing the FS, so we don't need to
698 * log anything, so we don't need to start a transaction
641 */ 699 */
642 if (!(create & GET_BLOCK_CREATE)) { 700 if (!(create & GET_BLOCK_CREATE)) {
643 int ret; 701 int ret;
@@ -647,6 +705,7 @@ int reiserfs_get_block(struct inode *inode, sector_t block,
647 reiserfs_write_unlock(inode->i_sb); 705 reiserfs_write_unlock(inode->i_sb);
648 return ret; 706 return ret;
649 } 707 }
708
650 /* 709 /*
651 * if we're already in a transaction, make sure to close 710 * if we're already in a transaction, make sure to close
652 * any new transactions we start in this func 711 * any new transactions we start in this func
@@ -655,8 +714,10 @@ int reiserfs_get_block(struct inode *inode, sector_t block,
655 reiserfs_transaction_running(inode->i_sb)) 714 reiserfs_transaction_running(inode->i_sb))
656 dangle = 0; 715 dangle = 0;
657 716
658 /* If file is of such a size, that it might have a tail and tails are enabled 717 /*
659 ** we should mark it as possibly needing tail packing on close 718 * If file is of such a size, that it might have a tail and
719 * tails are enabled we should mark it as possibly needing
720 * tail packing on close
660 */ 721 */
661 if ((have_large_tails(inode->i_sb) 722 if ((have_large_tails(inode->i_sb)
662 && inode->i_size < i_block_size(inode) * 4) 723 && inode->i_size < i_block_size(inode) * 4)
@@ -703,11 +764,12 @@ int reiserfs_get_block(struct inode *inode, sector_t block,
703 _allocate_block(th, block, inode, &allocated_block_nr, 764 _allocate_block(th, block, inode, &allocated_block_nr,
704 &path, create); 765 &path, create);
705 766
767 /*
768 * restart the transaction to give the journal a chance to free
769 * some blocks. releases the path, so we have to go back to
770 * research if we succeed on the second try
771 */
706 if (repeat == NO_DISK_SPACE || repeat == QUOTA_EXCEEDED) { 772 if (repeat == NO_DISK_SPACE || repeat == QUOTA_EXCEEDED) {
707 /* restart the transaction to give the journal a chance to free
708 ** some blocks. releases the path, so we have to go back to
709 ** research if we succeed on the second try
710 */
711 SB_JOURNAL(inode->i_sb)->j_next_async_flush = 1; 773 SB_JOURNAL(inode->i_sb)->j_next_async_flush = 1;
712 retval = restart_transaction(th, inode, &path); 774 retval = restart_transaction(th, inode, &path);
713 if (retval) 775 if (retval)
@@ -734,9 +796,11 @@ int reiserfs_get_block(struct inode *inode, sector_t block,
734 796
735 if (indirect_item_found(retval, ih)) { 797 if (indirect_item_found(retval, ih)) {
736 b_blocknr_t unfm_ptr; 798 b_blocknr_t unfm_ptr;
737 /* 'block'-th block is in the file already (there is 799 /*
738 corresponding cell in some indirect item). But it may be 800 * 'block'-th block is in the file already (there is
739 zero unformatted node pointer (hole) */ 801 * corresponding cell in some indirect item). But it may be
802 * zero unformatted node pointer (hole)
803 */
740 unfm_ptr = get_block_num(item, pos_in_item); 804 unfm_ptr = get_block_num(item, pos_in_item);
741 if (unfm_ptr == 0) { 805 if (unfm_ptr == 0) {
742 /* use allocated block to plug the hole */ 806 /* use allocated block to plug the hole */
@@ -764,9 +828,10 @@ int reiserfs_get_block(struct inode *inode, sector_t block,
764 828
765 reiserfs_write_unlock(inode->i_sb); 829 reiserfs_write_unlock(inode->i_sb);
766 830
767 /* the item was found, so new blocks were not added to the file 831 /*
768 ** there is no need to make sure the inode is updated with this 832 * the item was found, so new blocks were not added to the file
769 ** transaction 833 * there is no need to make sure the inode is updated with this
834 * transaction
770 */ 835 */
771 return retval; 836 return retval;
772 } 837 }
@@ -776,9 +841,11 @@ int reiserfs_get_block(struct inode *inode, sector_t block,
776 goto start_trans; 841 goto start_trans;
777 } 842 }
778 843
779 /* desired position is not found or is in the direct item. We have 844 /*
780 to append file with holes up to 'block'-th block converting 845 * desired position is not found or is in the direct item. We have
781 direct items to indirect one if necessary */ 846 * to append file with holes up to 'block'-th block converting
847 * direct items to indirect one if necessary
848 */
782 done = 0; 849 done = 0;
783 do { 850 do {
784 if (is_statdata_le_ih(ih)) { 851 if (is_statdata_le_ih(ih)) {
@@ -790,16 +857,18 @@ int reiserfs_get_block(struct inode *inode, sector_t block,
790 TYPE_INDIRECT, UNFM_P_SIZE, 857 TYPE_INDIRECT, UNFM_P_SIZE,
791 0 /* free_space */ ); 858 0 /* free_space */ );
792 859
860 /*
861 * we are going to add 'block'-th block to the file.
862 * Use allocated block for that
863 */
793 if (cpu_key_k_offset(&key) == 1) { 864 if (cpu_key_k_offset(&key) == 1) {
794 /* we are going to add 'block'-th block to the file. Use
795 allocated block for that */
796 unp = cpu_to_le32(allocated_block_nr); 865 unp = cpu_to_le32(allocated_block_nr);
797 set_block_dev_mapped(bh_result, 866 set_block_dev_mapped(bh_result,
798 allocated_block_nr, inode); 867 allocated_block_nr, inode);
799 set_buffer_new(bh_result); 868 set_buffer_new(bh_result);
800 done = 1; 869 done = 1;
801 } 870 }
802 tmp_key = key; // ;) 871 tmp_key = key; /* ;) */
803 set_cpu_key_k_offset(&tmp_key, 1); 872 set_cpu_key_k_offset(&tmp_key, 1);
804 PATH_LAST_POSITION(&path)++; 873 PATH_LAST_POSITION(&path)++;
805 874
@@ -809,9 +878,12 @@ int reiserfs_get_block(struct inode *inode, sector_t block,
809 if (retval) { 878 if (retval) {
810 reiserfs_free_block(th, inode, 879 reiserfs_free_block(th, inode,
811 allocated_block_nr, 1); 880 allocated_block_nr, 1);
812 goto failure; // retval == -ENOSPC, -EDQUOT or -EIO or -EEXIST 881 /*
882 * retval == -ENOSPC, -EDQUOT or -EIO
883 * or -EEXIST
884 */
885 goto failure;
813 } 886 }
814 //mark_tail_converted (inode);
815 } else if (is_direct_le_ih(ih)) { 887 } else if (is_direct_le_ih(ih)) {
816 /* direct item has to be converted */ 888 /* direct item has to be converted */
817 loff_t tail_offset; 889 loff_t tail_offset;
@@ -819,18 +891,24 @@ int reiserfs_get_block(struct inode *inode, sector_t block,
819 tail_offset = 891 tail_offset =
820 ((le_ih_k_offset(ih) - 892 ((le_ih_k_offset(ih) -
821 1) & ~(inode->i_sb->s_blocksize - 1)) + 1; 893 1) & ~(inode->i_sb->s_blocksize - 1)) + 1;
894
895 /*
896 * direct item we just found fits into block we have
897 * to map. Convert it into unformatted node: use
898 * bh_result for the conversion
899 */
822 if (tail_offset == cpu_key_k_offset(&key)) { 900 if (tail_offset == cpu_key_k_offset(&key)) {
823 /* direct item we just found fits into block we have
824 to map. Convert it into unformatted node: use
825 bh_result for the conversion */
826 set_block_dev_mapped(bh_result, 901 set_block_dev_mapped(bh_result,
827 allocated_block_nr, inode); 902 allocated_block_nr, inode);
828 unbh = bh_result; 903 unbh = bh_result;
829 done = 1; 904 done = 1;
830 } else { 905 } else {
831 /* we have to padd file tail stored in direct item(s) 906 /*
832 up to block size and convert it to unformatted 907 * we have to pad file tail stored in direct
833 node. FIXME: this should also get into page cache */ 908 * item(s) up to block size and convert it
909 * to unformatted node. FIXME: this should
910 * also get into page cache
911 */
834 912
835 pathrelse(&path); 913 pathrelse(&path);
836 /* 914 /*
@@ -859,7 +937,10 @@ int reiserfs_get_block(struct inode *inode, sector_t block,
859 inode->i_ino, 937 inode->i_ino,
860 retval); 938 retval);
861 if (allocated_block_nr) { 939 if (allocated_block_nr) {
862 /* the bitmap, the super, and the stat data == 3 */ 940 /*
941 * the bitmap, the super,
942 * and the stat data == 3
943 */
863 if (!th) 944 if (!th)
864 th = reiserfs_persistent_transaction(inode->i_sb, 3); 945 th = reiserfs_persistent_transaction(inode->i_sb, 3);
865 if (th) 946 if (th)
@@ -881,43 +962,57 @@ int reiserfs_get_block(struct inode *inode, sector_t block,
881 allocated_block_nr, 1); 962 allocated_block_nr, 1);
882 goto failure; 963 goto failure;
883 } 964 }
884 /* it is important the set_buffer_uptodate is done after 965 /*
885 ** the direct2indirect. The buffer might contain valid 966 * it is important the set_buffer_uptodate is done
886 ** data newer than the data on disk (read by readpage, changed, 967 * after the direct2indirect. The buffer might
887 ** and then sent here by writepage). direct2indirect needs 968 * contain valid data newer than the data on disk
888 ** to know if unbh was already up to date, so it can decide 969 * (read by readpage, changed, and then sent here by
889 ** if the data in unbh needs to be replaced with data from 970 * writepage). direct2indirect needs to know if unbh
890 ** the disk 971 * was already up to date, so it can decide if the
972 * data in unbh needs to be replaced with data from
973 * the disk
891 */ 974 */
892 set_buffer_uptodate(unbh); 975 set_buffer_uptodate(unbh);
893 976
894 /* unbh->b_page == NULL in case of DIRECT_IO request, this means 977 /*
895 buffer will disappear shortly, so it should not be added to 978 * unbh->b_page == NULL in case of DIRECT_IO request,
979 * this means buffer will disappear shortly, so it
980 * should not be added to
896 */ 981 */
897 if (unbh->b_page) { 982 if (unbh->b_page) {
898 /* we've converted the tail, so we must 983 /*
899 ** flush unbh before the transaction commits 984 * we've converted the tail, so we must
985 * flush unbh before the transaction commits
900 */ 986 */
901 reiserfs_add_tail_list(inode, unbh); 987 reiserfs_add_tail_list(inode, unbh);
902 988
903 /* mark it dirty now to prevent commit_write from adding 989 /*
904 ** this buffer to the inode's dirty buffer list 990 * mark it dirty now to prevent commit_write
991 * from adding this buffer to the inode's
992 * dirty buffer list
905 */ 993 */
906 /* 994 /*
907 * AKPM: changed __mark_buffer_dirty to mark_buffer_dirty(). 995 * AKPM: changed __mark_buffer_dirty to
908 * It's still atomic, but it sets the page dirty too, 996 * mark_buffer_dirty(). It's still atomic,
909 * which makes it eligible for writeback at any time by the 997 * but it sets the page dirty too, which makes
910 * VM (which was also the case with __mark_buffer_dirty()) 998 * it eligible for writeback at any time by the
999 * VM (which was also the case with
1000 * __mark_buffer_dirty())
911 */ 1001 */
912 mark_buffer_dirty(unbh); 1002 mark_buffer_dirty(unbh);
913 } 1003 }
914 } else { 1004 } else {
915 /* append indirect item with holes if needed, when appending 1005 /*
916 pointer to 'block'-th block use block, which is already 1006 * append indirect item with holes if needed, when
917 allocated */ 1007 * appending pointer to 'block'-th block use block,
1008 * which is already allocated
1009 */
918 struct cpu_key tmp_key; 1010 struct cpu_key tmp_key;
919 unp_t unf_single = 0; // We use this in case we need to allocate only 1011 /*
920 // one block which is a fastpath 1012 * We use this in case we need to allocate
1013 * only one block which is a fastpath
1014 */
1015 unp_t unf_single = 0;
921 unp_t *un; 1016 unp_t *un;
922 __u64 max_to_insert = 1017 __u64 max_to_insert =
923 MAX_ITEM_LEN(inode->i_sb->s_blocksize) / 1018 MAX_ITEM_LEN(inode->i_sb->s_blocksize) /
@@ -926,14 +1021,17 @@ int reiserfs_get_block(struct inode *inode, sector_t block,
926 1021
927 RFALSE(pos_in_item != ih_item_len(ih) / UNFM_P_SIZE, 1022 RFALSE(pos_in_item != ih_item_len(ih) / UNFM_P_SIZE,
928 "vs-804: invalid position for append"); 1023 "vs-804: invalid position for append");
929 /* indirect item has to be appended, set up key of that position */ 1024 /*
1025 * indirect item has to be appended,
1026 * set up key of that position
1027 * (key type is unimportant)
1028 */
930 make_cpu_key(&tmp_key, inode, 1029 make_cpu_key(&tmp_key, inode,
931 le_key_k_offset(version, 1030 le_key_k_offset(version,
932 &(ih->ih_key)) + 1031 &(ih->ih_key)) +
933 op_bytes_number(ih, 1032 op_bytes_number(ih,
934 inode->i_sb->s_blocksize), 1033 inode->i_sb->s_blocksize),
935 //pos_in_item * inode->i_sb->s_blocksize, 1034 TYPE_INDIRECT, 3);
936 TYPE_INDIRECT, 3); // key type is unimportant
937 1035
938 RFALSE(cpu_key_k_offset(&tmp_key) > cpu_key_k_offset(&key), 1036 RFALSE(cpu_key_k_offset(&tmp_key) > cpu_key_k_offset(&key),
939 "green-805: invalid offset"); 1037 "green-805: invalid offset");
@@ -954,8 +1052,10 @@ int reiserfs_get_block(struct inode *inode, sector_t block,
954 } 1052 }
955 } 1053 }
956 if (blocks_needed <= max_to_insert) { 1054 if (blocks_needed <= max_to_insert) {
957 /* we are going to add target block to the file. Use allocated 1055 /*
958 block for that */ 1056 * we are going to add target block to
1057 * the file. Use allocated block for that
1058 */
959 un[blocks_needed - 1] = 1059 un[blocks_needed - 1] =
960 cpu_to_le32(allocated_block_nr); 1060 cpu_to_le32(allocated_block_nr);
961 set_block_dev_mapped(bh_result, 1061 set_block_dev_mapped(bh_result,
@@ -964,8 +1064,11 @@ int reiserfs_get_block(struct inode *inode, sector_t block,
964 done = 1; 1064 done = 1;
965 } else { 1065 } else {
966 /* paste hole to the indirect item */ 1066 /* paste hole to the indirect item */
967 /* If kmalloc failed, max_to_insert becomes zero and it means we 1067 /*
968 only have space for one block */ 1068 * If kmalloc failed, max_to_insert becomes
1069 * zero and it means we only have space for
1070 * one block
1071 */
969 blocks_needed = 1072 blocks_needed =
970 max_to_insert ? max_to_insert : 1; 1073 max_to_insert ? max_to_insert : 1;
971 } 1074 }
@@ -984,9 +1087,12 @@ int reiserfs_get_block(struct inode *inode, sector_t block,
984 goto failure; 1087 goto failure;
985 } 1088 }
986 if (!done) { 1089 if (!done) {
987 /* We need to mark new file size in case this function will be 1090 /*
988 interrupted/aborted later on. And we may do this only for 1091 * We need to mark new file size in case
989 holes. */ 1092 * this function will be interrupted/aborted
1093 * later on. And we may do this only for
1094 * holes.
1095 */
990 inode->i_size += 1096 inode->i_size +=
991 inode->i_sb->s_blocksize * blocks_needed; 1097 inode->i_sb->s_blocksize * blocks_needed;
992 } 1098 }
@@ -995,13 +1101,13 @@ int reiserfs_get_block(struct inode *inode, sector_t block,
995 if (done == 1) 1101 if (done == 1)
996 break; 1102 break;
997 1103
998 /* this loop could log more blocks than we had originally asked 1104 /*
999 ** for. So, we have to allow the transaction to end if it is 1105 * this loop could log more blocks than we had originally
1000 ** too big or too full. Update the inode so things are 1106 * asked for. So, we have to allow the transaction to end
1001 ** consistent if we crash before the function returns 1107 * if it is too big or too full. Update the inode so things
1002 ** 1108 * are consistent if we crash before the function returns
1003 ** release the path so that anybody waiting on the path before 1109 * release the path so that anybody waiting on the path before
1004 ** ending their transaction will be able to continue. 1110 * ending their transaction will be able to continue.
1005 */ 1111 */
1006 if (journal_transaction_should_end(th, th->t_blocks_allocated)) { 1112 if (journal_transaction_should_end(th, th->t_blocks_allocated)) {
1007 retval = restart_transaction(th, inode, &path); 1113 retval = restart_transaction(th, inode, &path);
@@ -1060,8 +1166,10 @@ reiserfs_readpages(struct file *file, struct address_space *mapping,
1060 return mpage_readpages(mapping, pages, nr_pages, reiserfs_get_block); 1166 return mpage_readpages(mapping, pages, nr_pages, reiserfs_get_block);
1061} 1167}
1062 1168
1063/* Compute real number of used bytes by file 1169/*
1064 * Following three functions can go away when we'll have enough space in stat item 1170 * Compute real number of used bytes by file
1171 * Following three functions can go away when we'll have enough space in
1172 * stat item
1065 */ 1173 */
1066static int real_space_diff(struct inode *inode, int sd_size) 1174static int real_space_diff(struct inode *inode, int sd_size)
1067{ 1175{
@@ -1071,13 +1179,14 @@ static int real_space_diff(struct inode *inode, int sd_size)
1071 if (S_ISLNK(inode->i_mode) || S_ISDIR(inode->i_mode)) 1179 if (S_ISLNK(inode->i_mode) || S_ISDIR(inode->i_mode))
1072 return sd_size; 1180 return sd_size;
1073 1181
1074 /* End of file is also in full block with indirect reference, so round 1182 /*
1075 ** up to the next block. 1183 * End of file is also in full block with indirect reference, so round
1076 ** 1184 * up to the next block.
1077 ** there is just no way to know if the tail is actually packed 1185 *
1078 ** on the file, so we have to assume it isn't. When we pack the 1186 * there is just no way to know if the tail is actually packed
1079 ** tail, we add 4 bytes to pretend there really is an unformatted 1187 * on the file, so we have to assume it isn't. When we pack the
1080 ** node pointer 1188 * tail, we add 4 bytes to pretend there really is an unformatted
1189 * node pointer
1081 */ 1190 */
1082 bytes = 1191 bytes =
1083 ((inode->i_size + 1192 ((inode->i_size +
@@ -1108,29 +1217,29 @@ static inline ulong to_fake_used_blocks(struct inode *inode, int sd_size)
1108 bytes += (loff_t) 511; 1217 bytes += (loff_t) 511;
1109 } 1218 }
1110 1219
1111 /* files from before the quota patch might i_blocks such that 1220 /*
1112 ** bytes < real_space. Deal with that here to prevent it from 1221 * files from before the quota patch might i_blocks such that
1113 ** going negative. 1222 * bytes < real_space. Deal with that here to prevent it from
1223 * going negative.
1114 */ 1224 */
1115 if (bytes < real_space) 1225 if (bytes < real_space)
1116 return 0; 1226 return 0;
1117 return (bytes - real_space) >> 9; 1227 return (bytes - real_space) >> 9;
1118} 1228}
1119 1229
1120// 1230/*
1121// BAD: new directories have stat data of new type and all other items 1231 * BAD: new directories have stat data of new type and all other items
1122// of old type. Version stored in the inode says about body items, so 1232 * of old type. Version stored in the inode says about body items, so
1123// in update_stat_data we can not rely on inode, but have to check 1233 * in update_stat_data we can not rely on inode, but have to check
1124// item version directly 1234 * item version directly
1125// 1235 */
1126 1236
1127// called by read_locked_inode 1237/* called by read_locked_inode */
1128static void init_inode(struct inode *inode, struct treepath *path) 1238static void init_inode(struct inode *inode, struct treepath *path)
1129{ 1239{
1130 struct buffer_head *bh; 1240 struct buffer_head *bh;
1131 struct item_head *ih; 1241 struct item_head *ih;
1132 __u32 rdev; 1242 __u32 rdev;
1133 //int version = ITEM_VERSION_1;
1134 1243
1135 bh = PATH_PLAST_BUFFER(path); 1244 bh = PATH_PLAST_BUFFER(path);
1136 ih = tp_item_head(path); 1245 ih = tp_item_head(path);
@@ -1168,20 +1277,26 @@ static void init_inode(struct inode *inode, struct treepath *path)
1168 inode->i_generation = le32_to_cpu(INODE_PKEY(inode)->k_dir_id); 1277 inode->i_generation = le32_to_cpu(INODE_PKEY(inode)->k_dir_id);
1169 blocks = (inode->i_size + 511) >> 9; 1278 blocks = (inode->i_size + 511) >> 9;
1170 blocks = _ROUND_UP(blocks, inode->i_sb->s_blocksize >> 9); 1279 blocks = _ROUND_UP(blocks, inode->i_sb->s_blocksize >> 9);
1280
1281 /*
1282 * there was a bug in <=3.5.23 when i_blocks could take
1283 * negative values. Starting from 3.5.17 this value could
1284 * even be stored in stat data. For such files we set
1285 * i_blocks based on file size. Just 2 notes: this can be
1286 * wrong for sparse files. On-disk value will be only
1287 * updated if file's inode will ever change
1288 */
1171 if (inode->i_blocks > blocks) { 1289 if (inode->i_blocks > blocks) {
1172 // there was a bug in <=3.5.23 when i_blocks could take negative
1173 // values. Starting from 3.5.17 this value could even be stored in
1174 // stat data. For such files we set i_blocks based on file
1175 // size. Just 2 notes: this can be wrong for sparce files. On-disk value will be
1176 // only updated if file's inode will ever change
1177 inode->i_blocks = blocks; 1290 inode->i_blocks = blocks;
1178 } 1291 }
1179 1292
1180 rdev = sd_v1_rdev(sd); 1293 rdev = sd_v1_rdev(sd);
1181 REISERFS_I(inode)->i_first_direct_byte = 1294 REISERFS_I(inode)->i_first_direct_byte =
1182 sd_v1_first_direct_byte(sd); 1295 sd_v1_first_direct_byte(sd);
1183 /* an early bug in the quota code can give us an odd number for the 1296
1184 ** block count. This is incorrect, fix it here. 1297 /*
1298 * an early bug in the quota code can give us an odd
1299 * number for the block count. This is incorrect, fix it here.
1185 */ 1300 */
1186 if (inode->i_blocks & 1) { 1301 if (inode->i_blocks & 1) {
1187 inode->i_blocks++; 1302 inode->i_blocks++;
@@ -1189,12 +1304,16 @@ static void init_inode(struct inode *inode, struct treepath *path)
1189 inode_set_bytes(inode, 1304 inode_set_bytes(inode,
1190 to_real_used_space(inode, inode->i_blocks, 1305 to_real_used_space(inode, inode->i_blocks,
1191 SD_V1_SIZE)); 1306 SD_V1_SIZE));
1192 /* nopack is initially zero for v1 objects. For v2 objects, 1307 /*
1193 nopack is initialised from sd_attrs */ 1308 * nopack is initially zero for v1 objects. For v2 objects,
1309 * nopack is initialised from sd_attrs
1310 */
1194 REISERFS_I(inode)->i_flags &= ~i_nopack_mask; 1311 REISERFS_I(inode)->i_flags &= ~i_nopack_mask;
1195 } else { 1312 } else {
1196 // new stat data found, but object may have old items 1313 /*
1197 // (directories and symlinks) 1314 * new stat data found, but object may have old items
1315 * (directories and symlinks)
1316 */
1198 struct stat_data *sd = (struct stat_data *)ih_item_body(bh, ih); 1317 struct stat_data *sd = (struct stat_data *)ih_item_body(bh, ih);
1199 1318
1200 inode->i_mode = sd_v2_mode(sd); 1319 inode->i_mode = sd_v2_mode(sd);
@@ -1225,8 +1344,10 @@ static void init_inode(struct inode *inode, struct treepath *path)
1225 inode_set_bytes(inode, 1344 inode_set_bytes(inode,
1226 to_real_used_space(inode, inode->i_blocks, 1345 to_real_used_space(inode, inode->i_blocks,
1227 SD_V2_SIZE)); 1346 SD_V2_SIZE));
1228 /* read persistent inode attributes from sd and initialise 1347 /*
1229 generic inode flags from them */ 1348 * read persistent inode attributes from sd and initialise
1349 * generic inode flags from them
1350 */
1230 REISERFS_I(inode)->i_attrs = sd_v2_attrs(sd); 1351 REISERFS_I(inode)->i_attrs = sd_v2_attrs(sd);
1231 sd_attrs_to_i_attrs(sd_v2_attrs(sd), inode); 1352 sd_attrs_to_i_attrs(sd_v2_attrs(sd), inode);
1232 } 1353 }
@@ -1249,7 +1370,7 @@ static void init_inode(struct inode *inode, struct treepath *path)
1249 } 1370 }
1250} 1371}
1251 1372
1252// update new stat data with inode fields 1373/* update new stat data with inode fields */
1253static void inode2sd(void *sd, struct inode *inode, loff_t size) 1374static void inode2sd(void *sd, struct inode *inode, loff_t size)
1254{ 1375{
1255 struct stat_data *sd_v2 = (struct stat_data *)sd; 1376 struct stat_data *sd_v2 = (struct stat_data *)sd;
@@ -1273,7 +1394,7 @@ static void inode2sd(void *sd, struct inode *inode, loff_t size)
1273 set_sd_v2_attrs(sd_v2, flags); 1394 set_sd_v2_attrs(sd_v2, flags);
1274} 1395}
1275 1396
1276// used to copy inode's fields to old stat data 1397/* used to copy inode's fields to old stat data */
1277static void inode2sd_v1(void *sd, struct inode *inode, loff_t size) 1398static void inode2sd_v1(void *sd, struct inode *inode, loff_t size)
1278{ 1399{
1279 struct stat_data_v1 *sd_v1 = (struct stat_data_v1 *)sd; 1400 struct stat_data_v1 *sd_v1 = (struct stat_data_v1 *)sd;
@@ -1292,14 +1413,15 @@ static void inode2sd_v1(void *sd, struct inode *inode, loff_t size)
1292 else 1413 else
1293 set_sd_v1_blocks(sd_v1, to_fake_used_blocks(inode, SD_V1_SIZE)); 1414 set_sd_v1_blocks(sd_v1, to_fake_used_blocks(inode, SD_V1_SIZE));
1294 1415
1295 // Sigh. i_first_direct_byte is back 1416 /* Sigh. i_first_direct_byte is back */
1296 set_sd_v1_first_direct_byte(sd_v1, 1417 set_sd_v1_first_direct_byte(sd_v1,
1297 REISERFS_I(inode)->i_first_direct_byte); 1418 REISERFS_I(inode)->i_first_direct_byte);
1298} 1419}
1299 1420
1300/* NOTE, you must prepare the buffer head before sending it here, 1421/*
1301** and then log it after the call 1422 * NOTE, you must prepare the buffer head before sending it here,
1302*/ 1423 * and then log it after the call
1424 */
1303static void update_stat_data(struct treepath *path, struct inode *inode, 1425static void update_stat_data(struct treepath *path, struct inode *inode,
1304 loff_t size) 1426 loff_t size)
1305{ 1427{
@@ -1313,8 +1435,8 @@ static void update_stat_data(struct treepath *path, struct inode *inode,
1313 reiserfs_panic(inode->i_sb, "vs-13065", "key %k, found item %h", 1435 reiserfs_panic(inode->i_sb, "vs-13065", "key %k, found item %h",
1314 INODE_PKEY(inode), ih); 1436 INODE_PKEY(inode), ih);
1315 1437
1438 /* path points to old stat data */
1316 if (stat_data_v1(ih)) { 1439 if (stat_data_v1(ih)) {
1317 // path points to old stat data
1318 inode2sd_v1(ih_item_body(bh, ih), inode, size); 1440 inode2sd_v1(ih_item_body(bh, ih), inode, size);
1319 } else { 1441 } else {
1320 inode2sd(ih_item_body(bh, ih), inode, size); 1442 inode2sd(ih_item_body(bh, ih), inode, size);
@@ -1335,7 +1457,8 @@ void reiserfs_update_sd_size(struct reiserfs_transaction_handle *th,
1335 1457
1336 BUG_ON(!th->t_trans_id); 1458 BUG_ON(!th->t_trans_id);
1337 1459
1338 make_cpu_key(&key, inode, SD_OFFSET, TYPE_STAT_DATA, 3); //key type is unimportant 1460 /* key type is unimportant */
1461 make_cpu_key(&key, inode, SD_OFFSET, TYPE_STAT_DATA, 3);
1339 1462
1340 for (;;) { 1463 for (;;) {
1341 int pos; 1464 int pos;
@@ -1363,19 +1486,22 @@ void reiserfs_update_sd_size(struct reiserfs_transaction_handle *th,
1363 return; 1486 return;
1364 } 1487 }
1365 1488
1366 /* sigh, prepare_for_journal might schedule. When it schedules the 1489 /*
1367 ** FS might change. We have to detect that, and loop back to the 1490 * sigh, prepare_for_journal might schedule. When it
1368 ** search if the stat data item has moved 1491 * schedules the FS might change. We have to detect that,
1492 * and loop back to the search if the stat data item has moved
1369 */ 1493 */
1370 bh = get_last_bh(&path); 1494 bh = get_last_bh(&path);
1371 ih = tp_item_head(&path); 1495 ih = tp_item_head(&path);
1372 copy_item_head(&tmp_ih, ih); 1496 copy_item_head(&tmp_ih, ih);
1373 fs_gen = get_generation(inode->i_sb); 1497 fs_gen = get_generation(inode->i_sb);
1374 reiserfs_prepare_for_journal(inode->i_sb, bh, 1); 1498 reiserfs_prepare_for_journal(inode->i_sb, bh, 1);
1499
1500 /* Stat_data item has been moved after scheduling. */
1375 if (fs_changed(fs_gen, inode->i_sb) 1501 if (fs_changed(fs_gen, inode->i_sb)
1376 && item_moved(&tmp_ih, &path)) { 1502 && item_moved(&tmp_ih, &path)) {
1377 reiserfs_restore_prepared_buffer(inode->i_sb, bh); 1503 reiserfs_restore_prepared_buffer(inode->i_sb, bh);
1378 continue; /* Stat_data item has been moved after scheduling. */ 1504 continue;
1379 } 1505 }
1380 break; 1506 break;
1381 } 1507 }
@@ -1385,23 +1511,23 @@ void reiserfs_update_sd_size(struct reiserfs_transaction_handle *th,
1385 return; 1511 return;
1386} 1512}
1387 1513
1388/* reiserfs_read_locked_inode is called to read the inode off disk, and it 1514/*
1389** does a make_bad_inode when things go wrong. But, we need to make sure 1515 * reiserfs_read_locked_inode is called to read the inode off disk, and it
1390** and clear the key in the private portion of the inode, otherwise a 1516 * does a make_bad_inode when things go wrong. But, we need to make sure
1391** corresponding iput might try to delete whatever object the inode last 1517 * and clear the key in the private portion of the inode, otherwise a
1392** represented. 1518 * corresponding iput might try to delete whatever object the inode last
1393*/ 1519 * represented.
1520 */
1394static void reiserfs_make_bad_inode(struct inode *inode) 1521static void reiserfs_make_bad_inode(struct inode *inode)
1395{ 1522{
1396 memset(INODE_PKEY(inode), 0, KEY_SIZE); 1523 memset(INODE_PKEY(inode), 0, KEY_SIZE);
1397 make_bad_inode(inode); 1524 make_bad_inode(inode);
1398} 1525}
1399 1526
1400// 1527/*
1401// initially this function was derived from minix or ext2's analog and 1528 * initially this function was derived from minix or ext2's analog and
1402// evolved as the prototype did 1529 * evolved as the prototype did
1403// 1530 */
1404
1405int reiserfs_init_locked_inode(struct inode *inode, void *p) 1531int reiserfs_init_locked_inode(struct inode *inode, void *p)
1406{ 1532{
1407 struct reiserfs_iget_args *args = (struct reiserfs_iget_args *)p; 1533 struct reiserfs_iget_args *args = (struct reiserfs_iget_args *)p;
@@ -1410,8 +1536,10 @@ int reiserfs_init_locked_inode(struct inode *inode, void *p)
1410 return 0; 1536 return 0;
1411} 1537}
1412 1538
1413/* looks for stat data in the tree, and fills up the fields of in-core 1539/*
1414 inode stat data fields */ 1540 * looks for stat data in the tree, and fills up the fields of in-core
1541 * inode stat data fields
1542 */
1415void reiserfs_read_locked_inode(struct inode *inode, 1543void reiserfs_read_locked_inode(struct inode *inode,
1416 struct reiserfs_iget_args *args) 1544 struct reiserfs_iget_args *args)
1417{ 1545{
@@ -1422,8 +1550,10 @@ void reiserfs_read_locked_inode(struct inode *inode,
1422 1550
1423 dirino = args->dirid; 1551 dirino = args->dirid;
1424 1552
1425 /* set version 1, version 2 could be used too, because stat data 1553 /*
1426 key is the same in both versions */ 1554 * set version 1, version 2 could be used too, because stat data
1555 * key is the same in both versions
1556 */
1427 key.version = KEY_FORMAT_3_5; 1557 key.version = KEY_FORMAT_3_5;
1428 key.on_disk_key.k_dir_id = dirino; 1558 key.on_disk_key.k_dir_id = dirino;
1429 key.on_disk_key.k_objectid = inode->i_ino; 1559 key.on_disk_key.k_objectid = inode->i_ino;
@@ -1439,8 +1569,9 @@ void reiserfs_read_locked_inode(struct inode *inode,
1439 reiserfs_make_bad_inode(inode); 1569 reiserfs_make_bad_inode(inode);
1440 return; 1570 return;
1441 } 1571 }
1572
1573 /* a stale NFS handle can trigger this without it being an error */
1442 if (retval != ITEM_FOUND) { 1574 if (retval != ITEM_FOUND) {
1443 /* a stale NFS handle can trigger this without it being an error */
1444 pathrelse(&path_to_sd); 1575 pathrelse(&path_to_sd);
1445 reiserfs_make_bad_inode(inode); 1576 reiserfs_make_bad_inode(inode);
1446 clear_nlink(inode); 1577 clear_nlink(inode);
@@ -1449,20 +1580,25 @@ void reiserfs_read_locked_inode(struct inode *inode,
1449 1580
1450 init_inode(inode, &path_to_sd); 1581 init_inode(inode, &path_to_sd);
1451 1582
1452 /* It is possible that knfsd is trying to access inode of a file 1583 /*
1453 that is being removed from the disk by some other thread. As we 1584 * It is possible that knfsd is trying to access inode of a file
1454 update sd on unlink all that is required is to check for nlink 1585 * that is being removed from the disk by some other thread. As we
1455 here. This bug was first found by Sizif when debugging 1586 * update sd on unlink all that is required is to check for nlink
1456 SquidNG/Butterfly, forgotten, and found again after Philippe 1587 * here. This bug was first found by Sizif when debugging
1457 Gramoulle <philippe.gramoulle@mmania.com> reproduced it. 1588 * SquidNG/Butterfly, forgotten, and found again after Philippe
1458 1589 * Gramoulle <philippe.gramoulle@mmania.com> reproduced it.
1459 More logical fix would require changes in fs/inode.c:iput() to 1590
1460 remove inode from hash-table _after_ fs cleaned disk stuff up and 1591 * More logical fix would require changes in fs/inode.c:iput() to
1461 in iget() to return NULL if I_FREEING inode is found in 1592 * remove inode from hash-table _after_ fs cleaned disk stuff up and
1462 hash-table. */ 1593 * in iget() to return NULL if I_FREEING inode is found in
1463 /* Currently there is one place where it's ok to meet inode with 1594 * hash-table.
1464 nlink==0: processing of open-unlinked and half-truncated files 1595 */
1465 during mount (fs/reiserfs/super.c:finish_unfinished()). */ 1596
1597 /*
1598 * Currently there is one place where it's ok to meet inode with
1599 * nlink==0: processing of open-unlinked and half-truncated files
1600 * during mount (fs/reiserfs/super.c:finish_unfinished()).
1601 */
1466 if ((inode->i_nlink == 0) && 1602 if ((inode->i_nlink == 0) &&
1467 !REISERFS_SB(inode->i_sb)->s_is_unlinked_ok) { 1603 !REISERFS_SB(inode->i_sb)->s_is_unlinked_ok) {
1468 reiserfs_warning(inode->i_sb, "vs-13075", 1604 reiserfs_warning(inode->i_sb, "vs-13075",
@@ -1472,7 +1608,8 @@ void reiserfs_read_locked_inode(struct inode *inode,
1472 reiserfs_make_bad_inode(inode); 1608 reiserfs_make_bad_inode(inode);
1473 } 1609 }
1474 1610
1475 reiserfs_check_path(&path_to_sd); /* init inode should be relsing */ 1611 /* init inode should be relsing */
1612 reiserfs_check_path(&path_to_sd);
1476 1613
1477 /* 1614 /*
1478 * Stat data v1 doesn't support ACLs. 1615 * Stat data v1 doesn't support ACLs.
@@ -1481,7 +1618,7 @@ void reiserfs_read_locked_inode(struct inode *inode,
1481 cache_no_acl(inode); 1618 cache_no_acl(inode);
1482} 1619}
1483 1620
1484/** 1621/*
1485 * reiserfs_find_actor() - "find actor" reiserfs supplies to iget5_locked(). 1622 * reiserfs_find_actor() - "find actor" reiserfs supplies to iget5_locked().
1486 * 1623 *
1487 * @inode: inode from hash table to check 1624 * @inode: inode from hash table to check
@@ -1556,7 +1693,8 @@ static struct dentry *reiserfs_get_dentry(struct super_block *sb,
1556struct dentry *reiserfs_fh_to_dentry(struct super_block *sb, struct fid *fid, 1693struct dentry *reiserfs_fh_to_dentry(struct super_block *sb, struct fid *fid,
1557 int fh_len, int fh_type) 1694 int fh_len, int fh_type)
1558{ 1695{
1559 /* fhtype happens to reflect the number of u32s encoded. 1696 /*
1697 * fhtype happens to reflect the number of u32s encoded.
1560 * due to a bug in earlier code, fhtype might indicate there 1698 * due to a bug in earlier code, fhtype might indicate there
1561 * are more u32s then actually fitted. 1699 * are more u32s then actually fitted.
1562 * so if fhtype seems to be more than len, reduce fhtype. 1700 * so if fhtype seems to be more than len, reduce fhtype.
@@ -1625,13 +1763,16 @@ int reiserfs_encode_fh(struct inode *inode, __u32 * data, int *lenp,
1625 return *lenp; 1763 return *lenp;
1626} 1764}
1627 1765
1628/* looks for stat data, then copies fields to it, marks the buffer 1766/*
1629 containing stat data as dirty */ 1767 * looks for stat data, then copies fields to it, marks the buffer
1630/* reiserfs inodes are never really dirty, since the dirty inode call 1768 * containing stat data as dirty
1631** always logs them. This call allows the VFS inode marking routines 1769 */
1632** to properly mark inodes for datasync and such, but only actually 1770/*
1633** does something when called for a synchronous update. 1771 * reiserfs inodes are never really dirty, since the dirty inode call
1634*/ 1772 * always logs them. This call allows the VFS inode marking routines
1773 * to properly mark inodes for datasync and such, but only actually
1774 * does something when called for a synchronous update.
1775 */
1635int reiserfs_write_inode(struct inode *inode, struct writeback_control *wbc) 1776int reiserfs_write_inode(struct inode *inode, struct writeback_control *wbc)
1636{ 1777{
1637 struct reiserfs_transaction_handle th; 1778 struct reiserfs_transaction_handle th;
@@ -1639,10 +1780,12 @@ int reiserfs_write_inode(struct inode *inode, struct writeback_control *wbc)
1639 1780
1640 if (inode->i_sb->s_flags & MS_RDONLY) 1781 if (inode->i_sb->s_flags & MS_RDONLY)
1641 return -EROFS; 1782 return -EROFS;
1642 /* memory pressure can sometimes initiate write_inode calls with sync == 1, 1783 /*
1643 ** these cases are just when the system needs ram, not when the 1784 * memory pressure can sometimes initiate write_inode calls with
1644 ** inode needs to reach disk for safety, and they can safely be 1785 * sync == 1,
1645 ** ignored because the altered inode has already been logged. 1786 * these cases are just when the system needs ram, not when the
1787 * inode needs to reach disk for safety, and they can safely be
1788 * ignored because the altered inode has already been logged.
1646 */ 1789 */
1647 if (wbc->sync_mode == WB_SYNC_ALL && !(current->flags & PF_MEMALLOC)) { 1790 if (wbc->sync_mode == WB_SYNC_ALL && !(current->flags & PF_MEMALLOC)) {
1648 reiserfs_write_lock(inode->i_sb); 1791 reiserfs_write_lock(inode->i_sb);
@@ -1655,8 +1798,10 @@ int reiserfs_write_inode(struct inode *inode, struct writeback_control *wbc)
1655 return 0; 1798 return 0;
1656} 1799}
1657 1800
1658/* stat data of new object is inserted already, this inserts the item 1801/*
1659 containing "." and ".." entries */ 1802 * stat data of new object is inserted already, this inserts the item
1803 * containing "." and ".." entries
1804 */
1660static int reiserfs_new_directory(struct reiserfs_transaction_handle *th, 1805static int reiserfs_new_directory(struct reiserfs_transaction_handle *th,
1661 struct inode *inode, 1806 struct inode *inode,
1662 struct item_head *ih, struct treepath *path, 1807 struct item_head *ih, struct treepath *path,
@@ -1674,9 +1819,11 @@ static int reiserfs_new_directory(struct reiserfs_transaction_handle *th,
1674 le32_to_cpu(ih->ih_key.k_objectid), DOT_OFFSET, 1819 le32_to_cpu(ih->ih_key.k_objectid), DOT_OFFSET,
1675 TYPE_DIRENTRY, 3 /*key length */ ); 1820 TYPE_DIRENTRY, 3 /*key length */ );
1676 1821
1677 /* compose item head for new item. Directories consist of items of 1822 /*
1678 old type (ITEM_VERSION_1). Do not set key (second arg is 0), it 1823 * compose item head for new item. Directories consist of items of
1679 is done by reiserfs_new_inode */ 1824 * old type (ITEM_VERSION_1). Do not set key (second arg is 0), it
1825 * is done by reiserfs_new_inode
1826 */
1680 if (old_format_only(sb)) { 1827 if (old_format_only(sb)) {
1681 make_le_item_head(ih, NULL, KEY_FORMAT_3_5, DOT_OFFSET, 1828 make_le_item_head(ih, NULL, KEY_FORMAT_3_5, DOT_OFFSET,
1682 TYPE_DIRENTRY, EMPTY_DIR_SIZE_V1, 2); 1829 TYPE_DIRENTRY, EMPTY_DIR_SIZE_V1, 2);
@@ -1714,9 +1861,12 @@ static int reiserfs_new_directory(struct reiserfs_transaction_handle *th,
1714 return reiserfs_insert_item(th, path, &key, ih, inode, body); 1861 return reiserfs_insert_item(th, path, &key, ih, inode, body);
1715} 1862}
1716 1863
1717/* stat data of object has been inserted, this inserts the item 1864/*
1718 containing the body of symlink */ 1865 * stat data of object has been inserted, this inserts the item
1719static int reiserfs_new_symlink(struct reiserfs_transaction_handle *th, struct inode *inode, /* Inode of symlink */ 1866 * containing the body of symlink
1867 */
1868static int reiserfs_new_symlink(struct reiserfs_transaction_handle *th,
1869 struct inode *inode,
1720 struct item_head *ih, 1870 struct item_head *ih,
1721 struct treepath *path, const char *symname, 1871 struct treepath *path, const char *symname,
1722 int item_len) 1872 int item_len)
@@ -1754,15 +1904,26 @@ static int reiserfs_new_symlink(struct reiserfs_transaction_handle *th, struct i
1754 return reiserfs_insert_item(th, path, &key, ih, inode, symname); 1904 return reiserfs_insert_item(th, path, &key, ih, inode, symname);
1755} 1905}
1756 1906
1757/* inserts the stat data into the tree, and then calls 1907/*
1758 reiserfs_new_directory (to insert ".", ".." item if new object is 1908 * inserts the stat data into the tree, and then calls
1759 directory) or reiserfs_new_symlink (to insert symlink body if new 1909 * reiserfs_new_directory (to insert ".", ".." item if new object is
1760 object is symlink) or nothing (if new object is regular file) 1910 * directory) or reiserfs_new_symlink (to insert symlink body if new
1761 1911 * object is symlink) or nothing (if new object is regular file)
1762 NOTE! uid and gid must already be set in the inode. If we return 1912
1763 non-zero due to an error, we have to drop the quota previously allocated 1913 * NOTE! uid and gid must already be set in the inode. If we return
1764 for the fresh inode. This can only be done outside a transaction, so 1914 * non-zero due to an error, we have to drop the quota previously allocated
1765 if we return non-zero, we also end the transaction. */ 1915 * for the fresh inode. This can only be done outside a transaction, so
1916 * if we return non-zero, we also end the transaction.
1917 *
1918 * @th: active transaction handle
1919 * @dir: parent directory for new inode
1920 * @mode: mode of new inode
1921 * @symname: symlink contents if inode is symlink
1922 * @isize: 0 for regular file, EMPTY_DIR_SIZE for dirs, strlen(symname) for
1923 * symlinks
1924 * @inode: inode to be filled
1925 * @security: optional security context to associate with this inode
1926 */
1766int reiserfs_new_inode(struct reiserfs_transaction_handle *th, 1927int reiserfs_new_inode(struct reiserfs_transaction_handle *th,
1767 struct inode *dir, umode_t mode, const char *symname, 1928 struct inode *dir, umode_t mode, const char *symname,
1768 /* 0 for regular, EMTRY_DIR_SIZE for dirs, 1929 /* 0 for regular, EMTRY_DIR_SIZE for dirs,
@@ -1820,10 +1981,11 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th,
1820 } 1981 }
1821 1982
1822 if (old_format_only(sb)) 1983 if (old_format_only(sb))
1823 /* not a perfect generation count, as object ids can be reused, but 1984 /*
1824 ** this is as good as reiserfs can do right now. 1985 * not a perfect generation count, as object ids can be reused,
1825 ** note that the private part of inode isn't filled in yet, we have 1986 * but this is as good as reiserfs can do right now.
1826 ** to use the directory. 1987 * note that the private part of inode isn't filled in yet,
1988 * we have to use the directory.
1827 */ 1989 */
1828 inode->i_generation = le32_to_cpu(INODE_PKEY(dir)->k_objectid); 1990 inode->i_generation = le32_to_cpu(INODE_PKEY(dir)->k_objectid);
1829 else 1991 else
@@ -1878,9 +2040,9 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th,
1878 goto out_bad_inode; 2040 goto out_bad_inode;
1879 } 2041 }
1880 if (old_format_only(sb)) { 2042 if (old_format_only(sb)) {
2043 /* i_uid or i_gid is too big to be stored in stat data v3.5 */
1881 if (i_uid_read(inode) & ~0xffff || i_gid_read(inode) & ~0xffff) { 2044 if (i_uid_read(inode) & ~0xffff || i_gid_read(inode) & ~0xffff) {
1882 pathrelse(&path_to_key); 2045 pathrelse(&path_to_key);
1883 /* i_uid or i_gid is too big to be stored in stat data v3.5 */
1884 err = -EINVAL; 2046 err = -EINVAL;
1885 goto out_bad_inode; 2047 goto out_bad_inode;
1886 } 2048 }
@@ -1888,9 +2050,11 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th,
1888 } else { 2050 } else {
1889 inode2sd(&sd, inode, inode->i_size); 2051 inode2sd(&sd, inode, inode->i_size);
1890 } 2052 }
1891 // store in in-core inode the key of stat data and version all 2053 /*
1892 // object items will have (directory items will have old offset 2054 * store in in-core inode the key of stat data and version all
1893 // format, other new objects will consist of new items) 2055 * object items will have (directory items will have old offset
2056 * format, other new objects will consist of new items)
2057 */
1894 if (old_format_only(sb) || S_ISDIR(mode) || S_ISLNK(mode)) 2058 if (old_format_only(sb) || S_ISDIR(mode) || S_ISLNK(mode))
1895 set_inode_item_key_version(inode, KEY_FORMAT_3_5); 2059 set_inode_item_key_version(inode, KEY_FORMAT_3_5);
1896 else 2060 else
@@ -1975,10 +2139,6 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th,
1975 2139
1976 return 0; 2140 return 0;
1977 2141
1978/* it looks like you can easily compress these two goto targets into
1979 * one. Keeping it like this doesn't actually hurt anything, and they
1980 * are place holders for what the quota code actually needs.
1981 */
1982 out_bad_inode: 2142 out_bad_inode:
1983 /* Invalidate the object, nothing was inserted yet */ 2143 /* Invalidate the object, nothing was inserted yet */
1984 INODE_PKEY(inode)->k_objectid = 0; 2144 INODE_PKEY(inode)->k_objectid = 0;
@@ -1990,7 +2150,10 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th,
1990 2150
1991 out_end_trans: 2151 out_end_trans:
1992 journal_end(th, th->t_super, th->t_blocks_allocated); 2152 journal_end(th, th->t_super, th->t_blocks_allocated);
1993 /* Drop can be outside and it needs more credits so it's better to have it outside */ 2153 /*
2154 * Drop can be outside and it needs more credits so it's better
2155 * to have it outside
2156 */
1994 depth = reiserfs_write_unlock_nested(inode->i_sb); 2157 depth = reiserfs_write_unlock_nested(inode->i_sb);
1995 dquot_drop(inode); 2158 dquot_drop(inode);
1996 reiserfs_write_lock_nested(inode->i_sb, depth); 2159 reiserfs_write_lock_nested(inode->i_sb, depth);
@@ -2006,25 +2169,26 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th,
2006} 2169}
2007 2170
2008/* 2171/*
2009** finds the tail page in the page cache, 2172 * finds the tail page in the page cache,
2010** reads the last block in. 2173 * reads the last block in.
2011** 2174 *
2012** On success, page_result is set to a locked, pinned page, and bh_result 2175 * On success, page_result is set to a locked, pinned page, and bh_result
2013** is set to an up to date buffer for the last block in the file. returns 0. 2176 * is set to an up to date buffer for the last block in the file. returns 0.
2014** 2177 *
2015** tail conversion is not done, so bh_result might not be valid for writing 2178 * tail conversion is not done, so bh_result might not be valid for writing
2016** check buffer_mapped(bh_result) and bh_result->b_blocknr != 0 before 2179 * check buffer_mapped(bh_result) and bh_result->b_blocknr != 0 before
2017** trying to write the block. 2180 * trying to write the block.
2018** 2181 *
2019** on failure, nonzero is returned, page_result and bh_result are untouched. 2182 * on failure, nonzero is returned, page_result and bh_result are untouched.
2020*/ 2183 */
2021static int grab_tail_page(struct inode *inode, 2184static int grab_tail_page(struct inode *inode,
2022 struct page **page_result, 2185 struct page **page_result,
2023 struct buffer_head **bh_result) 2186 struct buffer_head **bh_result)
2024{ 2187{
2025 2188
2026 /* we want the page with the last byte in the file, 2189 /*
2027 ** not the page that will hold the next byte for appending 2190 * we want the page with the last byte in the file,
2191 * not the page that will hold the next byte for appending
2028 */ 2192 */
2029 unsigned long index = (inode->i_size - 1) >> PAGE_CACHE_SHIFT; 2193 unsigned long index = (inode->i_size - 1) >> PAGE_CACHE_SHIFT;
2030 unsigned long pos = 0; 2194 unsigned long pos = 0;
@@ -2036,10 +2200,11 @@ static int grab_tail_page(struct inode *inode,
2036 struct page *page; 2200 struct page *page;
2037 int error; 2201 int error;
2038 2202
2039 /* we know that we are only called with inode->i_size > 0. 2203 /*
2040 ** we also know that a file tail can never be as big as a block 2204 * we know that we are only called with inode->i_size > 0.
2041 ** If i_size % blocksize == 0, our file is currently block aligned 2205 * we also know that a file tail can never be as big as a block
2042 ** and it won't need converting or zeroing after a truncate. 2206 * If i_size % blocksize == 0, our file is currently block aligned
2207 * and it won't need converting or zeroing after a truncate.
2043 */ 2208 */
2044 if ((offset & (blocksize - 1)) == 0) { 2209 if ((offset & (blocksize - 1)) == 0) {
2045 return -ENOENT; 2210 return -ENOENT;
@@ -2068,10 +2233,11 @@ static int grab_tail_page(struct inode *inode,
2068 } while (bh != head); 2233 } while (bh != head);
2069 2234
2070 if (!buffer_uptodate(bh)) { 2235 if (!buffer_uptodate(bh)) {
2071 /* note, this should never happen, prepare_write should 2236 /*
2072 ** be taking care of this for us. If the buffer isn't up to date, 2237 * note, this should never happen, prepare_write should be
2073 ** I've screwed up the code to find the buffer, or the code to 2238 * taking care of this for us. If the buffer isn't up to
2074 ** call prepare_write 2239 * date, I've screwed up the code to find the buffer, or the
2240 * code to call prepare_write
2075 */ 2241 */
2076 reiserfs_error(inode->i_sb, "clm-6000", 2242 reiserfs_error(inode->i_sb, "clm-6000",
2077 "error reading block %lu", bh->b_blocknr); 2243 "error reading block %lu", bh->b_blocknr);
@@ -2091,11 +2257,11 @@ static int grab_tail_page(struct inode *inode,
2091} 2257}
2092 2258
2093/* 2259/*
2094** vfs version of truncate file. Must NOT be called with 2260 * vfs version of truncate file. Must NOT be called with
2095** a transaction already started. 2261 * a transaction already started.
2096** 2262 *
2097** some code taken from block_truncate_page 2263 * some code taken from block_truncate_page
2098*/ 2264 */
2099int reiserfs_truncate_file(struct inode *inode, int update_timestamps) 2265int reiserfs_truncate_file(struct inode *inode, int update_timestamps)
2100{ 2266{
2101 struct reiserfs_transaction_handle th; 2267 struct reiserfs_transaction_handle th;
@@ -2113,9 +2279,11 @@ int reiserfs_truncate_file(struct inode *inode, int update_timestamps)
2113 if (inode->i_size > 0) { 2279 if (inode->i_size > 0) {
2114 error = grab_tail_page(inode, &page, &bh); 2280 error = grab_tail_page(inode, &page, &bh);
2115 if (error) { 2281 if (error) {
2116 // -ENOENT means we truncated past the end of the file, 2282 /*
2117 // and get_block_create_0 could not find a block to read in, 2283 * -ENOENT means we truncated past the end of the
2118 // which is ok. 2284 * file, and get_block_create_0 could not find a
2285 * block to read in, which is ok.
2286 */
2119 if (error != -ENOENT) 2287 if (error != -ENOENT)
2120 reiserfs_error(inode->i_sb, "clm-6001", 2288 reiserfs_error(inode->i_sb, "clm-6001",
2121 "grab_tail_page failed %d", 2289 "grab_tail_page failed %d",
@@ -2125,25 +2293,30 @@ int reiserfs_truncate_file(struct inode *inode, int update_timestamps)
2125 } 2293 }
2126 } 2294 }
2127 2295
2128 /* so, if page != NULL, we have a buffer head for the offset at 2296 /*
2129 ** the end of the file. if the bh is mapped, and bh->b_blocknr != 0, 2297 * so, if page != NULL, we have a buffer head for the offset at
2130 ** then we have an unformatted node. Otherwise, we have a direct item, 2298 * the end of the file. if the bh is mapped, and bh->b_blocknr != 0,
2131 ** and no zeroing is required on disk. We zero after the truncate, 2299 * then we have an unformatted node. Otherwise, we have a direct item,
2132 ** because the truncate might pack the item anyway 2300 * and no zeroing is required on disk. We zero after the truncate,
2133 ** (it will unmap bh if it packs). 2301 * because the truncate might pack the item anyway
2302 * (it will unmap bh if it packs).
2303 *
2304 * it is enough to reserve space in transaction for 2 balancings:
2305 * one for "save" link adding and another for the first
2306 * cut_from_item. 1 is for update_sd
2134 */ 2307 */
2135 /* it is enough to reserve space in transaction for 2 balancings:
2136 one for "save" link adding and another for the first
2137 cut_from_item. 1 is for update_sd */
2138 error = journal_begin(&th, inode->i_sb, 2308 error = journal_begin(&th, inode->i_sb,
2139 JOURNAL_PER_BALANCE_CNT * 2 + 1); 2309 JOURNAL_PER_BALANCE_CNT * 2 + 1);
2140 if (error) 2310 if (error)
2141 goto out; 2311 goto out;
2142 reiserfs_update_inode_transaction(inode); 2312 reiserfs_update_inode_transaction(inode);
2143 if (update_timestamps) 2313 if (update_timestamps)
2144 /* we are doing real truncate: if the system crashes before the last 2314 /*
2145 transaction of truncating gets committed - on reboot the file 2315 * we are doing real truncate: if the system crashes
2146 either appears truncated properly or not truncated at all */ 2316 * before the last transaction of truncating gets committed
2317 * - on reboot the file either appears truncated properly
2318 * or not truncated at all
2319 */
2147 add_save_link(&th, inode, 1); 2320 add_save_link(&th, inode, 1);
2148 err2 = reiserfs_do_truncate(&th, inode, page, update_timestamps); 2321 err2 = reiserfs_do_truncate(&th, inode, page, update_timestamps);
2149 error = 2322 error =
@@ -2212,7 +2385,10 @@ static int map_block_for_writepage(struct inode *inode,
2212 int copy_size; 2385 int copy_size;
2213 int trans_running = 0; 2386 int trans_running = 0;
2214 2387
2215 /* catch places below that try to log something without starting a trans */ 2388 /*
2389 * catch places below that try to log something without
2390 * starting a trans
2391 */
2216 th.t_trans_id = 0; 2392 th.t_trans_id = 0;
2217 2393
2218 if (!buffer_uptodate(bh_result)) { 2394 if (!buffer_uptodate(bh_result)) {
@@ -2331,7 +2507,8 @@ static int map_block_for_writepage(struct inode *inode,
2331 kunmap(bh_result->b_page); 2507 kunmap(bh_result->b_page);
2332 2508
2333 if (!retval && buffer_mapped(bh_result) && bh_result->b_blocknr == 0) { 2509 if (!retval && buffer_mapped(bh_result) && bh_result->b_blocknr == 0) {
2334 /* we've copied data from the page into the direct item, so the 2510 /*
2511 * we've copied data from the page into the direct item, so the
2335 * buffer in the page is now clean, mark it to reflect that. 2512 * buffer in the page is now clean, mark it to reflect that.
2336 */ 2513 */
2337 lock_buffer(bh_result); 2514 lock_buffer(bh_result);
@@ -2370,7 +2547,8 @@ static int reiserfs_write_full_page(struct page *page,
2370 return 0; 2547 return 0;
2371 } 2548 }
2372 2549
2373 /* The page dirty bit is cleared before writepage is called, which 2550 /*
2551 * The page dirty bit is cleared before writepage is called, which
2374 * means we have to tell create_empty_buffers to make dirty buffers 2552 * means we have to tell create_empty_buffers to make dirty buffers
2375 * The page really should be up to date at this point, so tossing 2553 * The page really should be up to date at this point, so tossing
2376 * in the BH_Uptodate is just a sanity check. 2554 * in the BH_Uptodate is just a sanity check.
@@ -2381,8 +2559,9 @@ static int reiserfs_write_full_page(struct page *page,
2381 } 2559 }
2382 head = page_buffers(page); 2560 head = page_buffers(page);
2383 2561
2384 /* last page in the file, zero out any contents past the 2562 /*
2385 ** last byte in the file 2563 * last page in the file, zero out any contents past the
2564 * last byte in the file
2386 */ 2565 */
2387 if (page->index >= end_index) { 2566 if (page->index >= end_index) {
2388 unsigned last_offset; 2567 unsigned last_offset;
@@ -2412,7 +2591,8 @@ static int reiserfs_write_full_page(struct page *page,
2412 (!buffer_mapped(bh) || (buffer_mapped(bh) 2591 (!buffer_mapped(bh) || (buffer_mapped(bh)
2413 && bh->b_blocknr == 2592 && bh->b_blocknr ==
2414 0))) { 2593 0))) {
2415 /* not mapped yet, or it points to a direct item, search 2594 /*
2595 * not mapped yet, or it points to a direct item, search
2416 * the btree for the mapping info, and log any direct 2596 * the btree for the mapping info, and log any direct
2417 * items found 2597 * items found
2418 */ 2598 */
@@ -2453,7 +2633,8 @@ static int reiserfs_write_full_page(struct page *page,
2453 journal_mark_dirty(&th, s, bh); 2633 journal_mark_dirty(&th, s, bh);
2454 continue; 2634 continue;
2455 } 2635 }
2456 /* from this point on, we know the buffer is mapped to a 2636 /*
2637 * from this point on, we know the buffer is mapped to a
2457 * real block and not a direct item 2638 * real block and not a direct item
2458 */ 2639 */
2459 if (wbc->sync_mode != WB_SYNC_NONE) { 2640 if (wbc->sync_mode != WB_SYNC_NONE) {
@@ -2520,7 +2701,8 @@ static int reiserfs_write_full_page(struct page *page,
2520 return error; 2701 return error;
2521 2702
2522 fail: 2703 fail:
2523 /* catches various errors, we need to make sure any valid dirty blocks 2704 /*
2705 * catches various errors, we need to make sure any valid dirty blocks
2524 * get to the media. The page is currently locked and not marked for 2706 * get to the media. The page is currently locked and not marked for
2525 * writeback 2707 * writeback
2526 */ 2708 */
@@ -2533,8 +2715,8 @@ static int reiserfs_write_full_page(struct page *page,
2533 mark_buffer_async_write(bh); 2715 mark_buffer_async_write(bh);
2534 } else { 2716 } else {
2535 /* 2717 /*
2536 * clear any dirty bits that might have come from getting 2718 * clear any dirty bits that might have come from
2537 * attached to a dirty page 2719 * getting attached to a dirty page
2538 */ 2720 */
2539 clear_buffer_dirty(bh); 2721 clear_buffer_dirty(bh);
2540 } 2722 }
@@ -2614,15 +2796,18 @@ static int reiserfs_write_begin(struct file *file,
2614 ret = __block_write_begin(page, pos, len, reiserfs_get_block); 2796 ret = __block_write_begin(page, pos, len, reiserfs_get_block);
2615 if (ret && reiserfs_transaction_running(inode->i_sb)) { 2797 if (ret && reiserfs_transaction_running(inode->i_sb)) {
2616 struct reiserfs_transaction_handle *th = current->journal_info; 2798 struct reiserfs_transaction_handle *th = current->journal_info;
2617 /* this gets a little ugly. If reiserfs_get_block returned an 2799 /*
2618 * error and left a transacstion running, we've got to close it, 2800 * this gets a little ugly. If reiserfs_get_block returned an
2619 * and we've got to free handle if it was a persistent transaction. 2801 * error and left a transacstion running, we've got to close
2802 * it, and we've got to free handle if it was a persistent
2803 * transaction.
2620 * 2804 *
2621 * But, if we had nested into an existing transaction, we need 2805 * But, if we had nested into an existing transaction, we need
2622 * to just drop the ref count on the handle. 2806 * to just drop the ref count on the handle.
2623 * 2807 *
2624 * If old_ref == 0, the transaction is from reiserfs_get_block, 2808 * If old_ref == 0, the transaction is from reiserfs_get_block,
2625 * and it was a persistent trans. Otherwise, it was nested above. 2809 * and it was a persistent trans. Otherwise, it was nested
2810 * above.
2626 */ 2811 */
2627 if (th->t_refcount > old_ref) { 2812 if (th->t_refcount > old_ref) {
2628 if (old_ref) 2813 if (old_ref)
@@ -2671,15 +2856,18 @@ int __reiserfs_write_begin(struct page *page, unsigned from, unsigned len)
2671 ret = __block_write_begin(page, from, len, reiserfs_get_block); 2856 ret = __block_write_begin(page, from, len, reiserfs_get_block);
2672 if (ret && reiserfs_transaction_running(inode->i_sb)) { 2857 if (ret && reiserfs_transaction_running(inode->i_sb)) {
2673 struct reiserfs_transaction_handle *th = current->journal_info; 2858 struct reiserfs_transaction_handle *th = current->journal_info;
2674 /* this gets a little ugly. If reiserfs_get_block returned an 2859 /*
2675 * error and left a transacstion running, we've got to close it, 2860 * this gets a little ugly. If reiserfs_get_block returned an
2676 * and we've got to free handle if it was a persistent transaction. 2861 * error and left a transacstion running, we've got to close
2862 * it, and we've got to free handle if it was a persistent
2863 * transaction.
2677 * 2864 *
2678 * But, if we had nested into an existing transaction, we need 2865 * But, if we had nested into an existing transaction, we need
2679 * to just drop the ref count on the handle. 2866 * to just drop the ref count on the handle.
2680 * 2867 *
2681 * If old_ref == 0, the transaction is from reiserfs_get_block, 2868 * If old_ref == 0, the transaction is from reiserfs_get_block,
2682 * and it was a persistent trans. Otherwise, it was nested above. 2869 * and it was a persistent trans. Otherwise, it was nested
2870 * above.
2683 */ 2871 */
2684 if (th->t_refcount > old_ref) { 2872 if (th->t_refcount > old_ref) {
2685 if (old_ref) 2873 if (old_ref)
@@ -2734,17 +2922,20 @@ static int reiserfs_write_end(struct file *file, struct address_space *mapping,
2734 2922
2735 reiserfs_commit_page(inode, page, start, start + copied); 2923 reiserfs_commit_page(inode, page, start, start + copied);
2736 2924
2737 /* generic_commit_write does this for us, but does not update the 2925 /*
2738 ** transaction tracking stuff when the size changes. So, we have 2926 * generic_commit_write does this for us, but does not update the
2739 ** to do the i_size updates here. 2927 * transaction tracking stuff when the size changes. So, we have
2928 * to do the i_size updates here.
2740 */ 2929 */
2741 if (pos + copied > inode->i_size) { 2930 if (pos + copied > inode->i_size) {
2742 struct reiserfs_transaction_handle myth; 2931 struct reiserfs_transaction_handle myth;
2743 reiserfs_write_lock(inode->i_sb); 2932 reiserfs_write_lock(inode->i_sb);
2744 locked = true; 2933 locked = true;
2745 /* If the file have grown beyond the border where it 2934 /*
2746 can have a tail, unmark it as needing a tail 2935 * If the file have grown beyond the border where it
2747 packing */ 2936 * can have a tail, unmark it as needing a tail
2937 * packing
2938 */
2748 if ((have_large_tails(inode->i_sb) 2939 if ((have_large_tails(inode->i_sb)
2749 && inode->i_size > i_block_size(inode) * 4) 2940 && inode->i_size > i_block_size(inode) * 4)
2750 || (have_small_tails(inode->i_sb) 2941 || (have_small_tails(inode->i_sb)
@@ -2759,8 +2950,8 @@ static int reiserfs_write_end(struct file *file, struct address_space *mapping,
2759 inode->i_size = pos + copied; 2950 inode->i_size = pos + copied;
2760 /* 2951 /*
2761 * this will just nest into our transaction. It's important 2952 * this will just nest into our transaction. It's important
2762 * to use mark_inode_dirty so the inode gets pushed around on the 2953 * to use mark_inode_dirty so the inode gets pushed around on
2763 * dirty lists, and so that O_SYNC works as expected 2954 * the dirty lists, and so that O_SYNC works as expected
2764 */ 2955 */
2765 mark_inode_dirty(inode); 2956 mark_inode_dirty(inode);
2766 reiserfs_update_sd(&myth, inode); 2957 reiserfs_update_sd(&myth, inode);
@@ -2822,15 +3013,18 @@ int reiserfs_commit_write(struct file *f, struct page *page,
2822 } 3013 }
2823 reiserfs_commit_page(inode, page, from, to); 3014 reiserfs_commit_page(inode, page, from, to);
2824 3015
2825 /* generic_commit_write does this for us, but does not update the 3016 /*
2826 ** transaction tracking stuff when the size changes. So, we have 3017 * generic_commit_write does this for us, but does not update the
2827 ** to do the i_size updates here. 3018 * transaction tracking stuff when the size changes. So, we have
3019 * to do the i_size updates here.
2828 */ 3020 */
2829 if (pos > inode->i_size) { 3021 if (pos > inode->i_size) {
2830 struct reiserfs_transaction_handle myth; 3022 struct reiserfs_transaction_handle myth;
2831 /* If the file have grown beyond the border where it 3023 /*
2832 can have a tail, unmark it as needing a tail 3024 * If the file have grown beyond the border where it
2833 packing */ 3025 * can have a tail, unmark it as needing a tail
3026 * packing
3027 */
2834 if ((have_large_tails(inode->i_sb) 3028 if ((have_large_tails(inode->i_sb)
2835 && inode->i_size > i_block_size(inode) * 4) 3029 && inode->i_size > i_block_size(inode) * 4)
2836 || (have_small_tails(inode->i_sb) 3030 || (have_small_tails(inode->i_sb)
@@ -2845,8 +3039,8 @@ int reiserfs_commit_write(struct file *f, struct page *page,
2845 inode->i_size = pos; 3039 inode->i_size = pos;
2846 /* 3040 /*
2847 * this will just nest into our transaction. It's important 3041 * this will just nest into our transaction. It's important
2848 * to use mark_inode_dirty so the inode gets pushed around on the 3042 * to use mark_inode_dirty so the inode gets pushed around
2849 * dirty lists, and so that O_SYNC works as expected 3043 * on the dirty lists, and so that O_SYNC works as expected
2850 */ 3044 */
2851 mark_inode_dirty(inode); 3045 mark_inode_dirty(inode);
2852 reiserfs_update_sd(&myth, inode); 3046 reiserfs_update_sd(&myth, inode);
@@ -2924,9 +3118,10 @@ void i_attrs_to_sd_attrs(struct inode *inode, __u16 * sd_attrs)
2924 } 3118 }
2925} 3119}
2926 3120
2927/* decide if this buffer needs to stay around for data logging or ordered 3121/*
2928** write purposes 3122 * decide if this buffer needs to stay around for data logging or ordered
2929*/ 3123 * write purposes
3124 */
2930static int invalidatepage_can_drop(struct inode *inode, struct buffer_head *bh) 3125static int invalidatepage_can_drop(struct inode *inode, struct buffer_head *bh)
2931{ 3126{
2932 int ret = 1; 3127 int ret = 1;
@@ -2937,7 +3132,8 @@ static int invalidatepage_can_drop(struct inode *inode, struct buffer_head *bh)
2937 if (!buffer_mapped(bh)) { 3132 if (!buffer_mapped(bh)) {
2938 goto free_jh; 3133 goto free_jh;
2939 } 3134 }
2940 /* the page is locked, and the only places that log a data buffer 3135 /*
3136 * the page is locked, and the only places that log a data buffer
2941 * also lock the page. 3137 * also lock the page.
2942 */ 3138 */
2943 if (reiserfs_file_data_log(inode)) { 3139 if (reiserfs_file_data_log(inode)) {
@@ -2952,7 +3148,8 @@ static int invalidatepage_can_drop(struct inode *inode, struct buffer_head *bh)
2952 struct reiserfs_journal_list *jl; 3148 struct reiserfs_journal_list *jl;
2953 struct reiserfs_jh *jh = bh->b_private; 3149 struct reiserfs_jh *jh = bh->b_private;
2954 3150
2955 /* why is this safe? 3151 /*
3152 * why is this safe?
2956 * reiserfs_setattr updates i_size in the on disk 3153 * reiserfs_setattr updates i_size in the on disk
2957 * stat data before allowing vmtruncate to be called. 3154 * stat data before allowing vmtruncate to be called.
2958 * 3155 *
@@ -3080,8 +3277,10 @@ static int reiserfs_releasepage(struct page *page, gfp_t unused_gfp_flags)
3080 return ret; 3277 return ret;
3081} 3278}
3082 3279
3083/* We thank Mingming Cao for helping us understand in great detail what 3280/*
3084 to do in this section of the code. */ 3281 * We thank Mingming Cao for helping us understand in great detail what
3282 * to do in this section of the code.
3283 */
3085static ssize_t reiserfs_direct_IO(int rw, struct kiocb *iocb, 3284static ssize_t reiserfs_direct_IO(int rw, struct kiocb *iocb,
3086 const struct iovec *iov, loff_t offset, 3285 const struct iovec *iov, loff_t offset,
3087 unsigned long nr_segs) 3286 unsigned long nr_segs)
@@ -3127,8 +3326,9 @@ int reiserfs_setattr(struct dentry *dentry, struct iattr *attr)
3127 dquot_initialize(inode); 3326 dquot_initialize(inode);
3128 reiserfs_write_lock(inode->i_sb); 3327 reiserfs_write_lock(inode->i_sb);
3129 if (attr->ia_valid & ATTR_SIZE) { 3328 if (attr->ia_valid & ATTR_SIZE) {
3130 /* version 2 items will be caught by the s_maxbytes check 3329 /*
3131 ** done for us in vmtruncate 3330 * version 2 items will be caught by the s_maxbytes check
3331 * done for us in vmtruncate
3132 */ 3332 */
3133 if (get_inode_item_key_version(inode) == KEY_FORMAT_3_5 && 3333 if (get_inode_item_key_version(inode) == KEY_FORMAT_3_5 &&
3134 attr->ia_size > MAX_NON_LFS) { 3334 attr->ia_size > MAX_NON_LFS) {
@@ -3189,7 +3389,10 @@ int reiserfs_setattr(struct dentry *dentry, struct iattr *attr)
3189 if (error) 3389 if (error)
3190 return error; 3390 return error;
3191 3391
3192 /* (user+group)*(old+new) structure - we count quota info and , inode write (sb, inode) */ 3392 /*
3393 * (user+group)*(old+new) structure - we count quota
3394 * info and , inode write (sb, inode)
3395 */
3193 reiserfs_write_lock(inode->i_sb); 3396 reiserfs_write_lock(inode->i_sb);
3194 error = journal_begin(&th, inode->i_sb, jbegin_count); 3397 error = journal_begin(&th, inode->i_sb, jbegin_count);
3195 reiserfs_write_unlock(inode->i_sb); 3398 reiserfs_write_unlock(inode->i_sb);
@@ -3203,8 +3406,10 @@ int reiserfs_setattr(struct dentry *dentry, struct iattr *attr)
3203 goto out; 3406 goto out;
3204 } 3407 }
3205 3408
3206 /* Update corresponding info in inode so that everything is in 3409 /*
3207 * one transaction */ 3410 * Update corresponding info in inode so that everything
3411 * is in one transaction
3412 */
3208 if (attr->ia_valid & ATTR_UID) 3413 if (attr->ia_valid & ATTR_UID)
3209 inode->i_uid = attr->ia_uid; 3414 inode->i_uid = attr->ia_uid;
3210 if (attr->ia_valid & ATTR_GID) 3415 if (attr->ia_valid & ATTR_GID)
diff --git a/fs/reiserfs/ioctl.c b/fs/reiserfs/ioctl.c
index 946ccbf5b5a1..a4197c3240b9 100644
--- a/fs/reiserfs/ioctl.c
+++ b/fs/reiserfs/ioctl.c
@@ -15,7 +15,8 @@
15 * reiserfs_ioctl - handler for ioctl for inode 15 * reiserfs_ioctl - handler for ioctl for inode
16 * supported commands: 16 * supported commands:
17 * 1) REISERFS_IOC_UNPACK - try to unpack tail from direct item into indirect 17 * 1) REISERFS_IOC_UNPACK - try to unpack tail from direct item into indirect
18 * and prevent packing file (argument arg has to be non-zero) 18 * and prevent packing file (argument arg has t
19 * be non-zero)
19 * 2) REISERFS_IOC_[GS]ETFLAGS, REISERFS_IOC_[GS]ETVERSION 20 * 2) REISERFS_IOC_[GS]ETFLAGS, REISERFS_IOC_[GS]ETVERSION
20 * 3) That's all for a while ... 21 * 3) That's all for a while ...
21 */ 22 */
@@ -132,7 +133,10 @@ setversion_out:
132long reiserfs_compat_ioctl(struct file *file, unsigned int cmd, 133long reiserfs_compat_ioctl(struct file *file, unsigned int cmd,
133 unsigned long arg) 134 unsigned long arg)
134{ 135{
135 /* These are just misnamed, they actually get/put from/to user an int */ 136 /*
137 * These are just misnamed, they actually
138 * get/put from/to user an int
139 */
136 switch (cmd) { 140 switch (cmd) {
137 case REISERFS_IOC32_UNPACK: 141 case REISERFS_IOC32_UNPACK:
138 cmd = REISERFS_IOC_UNPACK; 142 cmd = REISERFS_IOC_UNPACK;
@@ -160,10 +164,10 @@ long reiserfs_compat_ioctl(struct file *file, unsigned int cmd,
160int reiserfs_commit_write(struct file *f, struct page *page, 164int reiserfs_commit_write(struct file *f, struct page *page,
161 unsigned from, unsigned to); 165 unsigned from, unsigned to);
162/* 166/*
163** reiserfs_unpack 167 * reiserfs_unpack
164** Function try to convert tail from direct item into indirect. 168 * Function try to convert tail from direct item into indirect.
165** It set up nopack attribute in the REISERFS_I(inode)->nopack 169 * It set up nopack attribute in the REISERFS_I(inode)->nopack
166*/ 170 */
167int reiserfs_unpack(struct inode *inode, struct file *filp) 171int reiserfs_unpack(struct inode *inode, struct file *filp)
168{ 172{
169 int retval = 0; 173 int retval = 0;
@@ -194,9 +198,10 @@ int reiserfs_unpack(struct inode *inode, struct file *filp)
194 goto out; 198 goto out;
195 } 199 }
196 200
197 /* we unpack by finding the page with the tail, and calling 201 /*
198 ** __reiserfs_write_begin on that page. This will force a 202 * we unpack by finding the page with the tail, and calling
199 ** reiserfs_get_block to unpack the tail for us. 203 * __reiserfs_write_begin on that page. This will force a
204 * reiserfs_get_block to unpack the tail for us.
200 */ 205 */
201 index = inode->i_size >> PAGE_CACHE_SHIFT; 206 index = inode->i_size >> PAGE_CACHE_SHIFT;
202 mapping = inode->i_mapping; 207 mapping = inode->i_mapping;
diff --git a/fs/reiserfs/item_ops.c b/fs/reiserfs/item_ops.c
index c9f136527386..cb6b826ca5e9 100644
--- a/fs/reiserfs/item_ops.c
+++ b/fs/reiserfs/item_ops.c
@@ -5,15 +5,17 @@
5#include <linux/time.h> 5#include <linux/time.h>
6#include "reiserfs.h" 6#include "reiserfs.h"
7 7
8// this contains item handlers for old item types: sd, direct, 8/*
9// indirect, directory 9 * this contains item handlers for old item types: sd, direct,
10 * indirect, directory
11 */
10 12
11/* and where are the comments? how about saying where we can find an 13/*
12 explanation of each item handler method? -Hans */ 14 * and where are the comments? how about saying where we can find an
15 * explanation of each item handler method? -Hans
16 */
13 17
14////////////////////////////////////////////////////////////////////////////// 18/* stat data functions */
15// stat data functions
16//
17static int sd_bytes_number(struct item_head *ih, int block_size) 19static int sd_bytes_number(struct item_head *ih, int block_size)
18{ 20{
19 return 0; 21 return 0;
@@ -60,7 +62,7 @@ static void sd_print_item(struct item_head *ih, char *item)
60 62
61static void sd_check_item(struct item_head *ih, char *item) 63static void sd_check_item(struct item_head *ih, char *item)
62{ 64{
63 // FIXME: type something here! 65 /* unused */
64} 66}
65 67
66static int sd_create_vi(struct virtual_node *vn, 68static int sd_create_vi(struct virtual_node *vn,
@@ -68,7 +70,6 @@ static int sd_create_vi(struct virtual_node *vn,
68 int is_affected, int insert_size) 70 int is_affected, int insert_size)
69{ 71{
70 vi->vi_index = TYPE_STAT_DATA; 72 vi->vi_index = TYPE_STAT_DATA;
71 //vi->vi_type |= VI_TYPE_STAT_DATA;// not needed?
72 return 0; 73 return 0;
73} 74}
74 75
@@ -117,15 +118,13 @@ static struct item_operations stat_data_ops = {
117 .print_vi = sd_print_vi 118 .print_vi = sd_print_vi
118}; 119};
119 120
120////////////////////////////////////////////////////////////////////////////// 121/* direct item functions */
121// direct item functions
122//
123static int direct_bytes_number(struct item_head *ih, int block_size) 122static int direct_bytes_number(struct item_head *ih, int block_size)
124{ 123{
125 return ih_item_len(ih); 124 return ih_item_len(ih);
126} 125}
127 126
128// FIXME: this should probably switch to indirect as well 127/* FIXME: this should probably switch to indirect as well */
129static void direct_decrement_key(struct cpu_key *key) 128static void direct_decrement_key(struct cpu_key *key)
130{ 129{
131 cpu_key_k_offset_dec(key); 130 cpu_key_k_offset_dec(key);
@@ -144,7 +143,7 @@ static void direct_print_item(struct item_head *ih, char *item)
144{ 143{
145 int j = 0; 144 int j = 0;
146 145
147// return; 146/* return; */
148 printk("\""); 147 printk("\"");
149 while (j < ih_item_len(ih)) 148 while (j < ih_item_len(ih))
150 printk("%c", item[j++]); 149 printk("%c", item[j++]);
@@ -153,7 +152,7 @@ static void direct_print_item(struct item_head *ih, char *item)
153 152
154static void direct_check_item(struct item_head *ih, char *item) 153static void direct_check_item(struct item_head *ih, char *item)
155{ 154{
156 // FIXME: type something here! 155 /* unused */
157} 156}
158 157
159static int direct_create_vi(struct virtual_node *vn, 158static int direct_create_vi(struct virtual_node *vn,
@@ -161,7 +160,6 @@ static int direct_create_vi(struct virtual_node *vn,
161 int is_affected, int insert_size) 160 int is_affected, int insert_size)
162{ 161{
163 vi->vi_index = TYPE_DIRECT; 162 vi->vi_index = TYPE_DIRECT;
164 //vi->vi_type |= VI_TYPE_DIRECT;
165 return 0; 163 return 0;
166} 164}
167 165
@@ -211,16 +209,13 @@ static struct item_operations direct_ops = {
211 .print_vi = direct_print_vi 209 .print_vi = direct_print_vi
212}; 210};
213 211
214////////////////////////////////////////////////////////////////////////////// 212/* indirect item functions */
215// indirect item functions
216//
217
218static int indirect_bytes_number(struct item_head *ih, int block_size) 213static int indirect_bytes_number(struct item_head *ih, int block_size)
219{ 214{
220 return ih_item_len(ih) / UNFM_P_SIZE * block_size; //- get_ih_free_space (ih); 215 return ih_item_len(ih) / UNFM_P_SIZE * block_size;
221} 216}
222 217
223// decrease offset, if it becomes 0, change type to stat data 218/* decrease offset, if it becomes 0, change type to stat data */
224static void indirect_decrement_key(struct cpu_key *key) 219static void indirect_decrement_key(struct cpu_key *key)
225{ 220{
226 cpu_key_k_offset_dec(key); 221 cpu_key_k_offset_dec(key);
@@ -228,7 +223,7 @@ static void indirect_decrement_key(struct cpu_key *key)
228 set_cpu_key_k_type(key, TYPE_STAT_DATA); 223 set_cpu_key_k_type(key, TYPE_STAT_DATA);
229} 224}
230 225
231// if it is not first item of the body, then it is mergeable 226/* if it is not first item of the body, then it is mergeable */
232static int indirect_is_left_mergeable(struct reiserfs_key *key, 227static int indirect_is_left_mergeable(struct reiserfs_key *key,
233 unsigned long bsize) 228 unsigned long bsize)
234{ 229{
@@ -236,7 +231,7 @@ static int indirect_is_left_mergeable(struct reiserfs_key *key,
236 return (le_key_k_offset(version, key) != 1); 231 return (le_key_k_offset(version, key) != 1);
237} 232}
238 233
239// printing of indirect item 234/* printing of indirect item */
240static void start_new_sequence(__u32 * start, int *len, __u32 new) 235static void start_new_sequence(__u32 * start, int *len, __u32 new)
241{ 236{
242 *start = new; 237 *start = new;
@@ -295,7 +290,7 @@ static void indirect_print_item(struct item_head *ih, char *item)
295 290
296static void indirect_check_item(struct item_head *ih, char *item) 291static void indirect_check_item(struct item_head *ih, char *item)
297{ 292{
298 // FIXME: type something here! 293 /* unused */
299} 294}
300 295
301static int indirect_create_vi(struct virtual_node *vn, 296static int indirect_create_vi(struct virtual_node *vn,
@@ -303,7 +298,6 @@ static int indirect_create_vi(struct virtual_node *vn,
303 int is_affected, int insert_size) 298 int is_affected, int insert_size)
304{ 299{
305 vi->vi_index = TYPE_INDIRECT; 300 vi->vi_index = TYPE_INDIRECT;
306 //vi->vi_type |= VI_TYPE_INDIRECT;
307 return 0; 301 return 0;
308} 302}
309 303
@@ -321,16 +315,19 @@ static int indirect_check_right(struct virtual_item *vi, int free)
321 return indirect_check_left(vi, free, 0, 0); 315 return indirect_check_left(vi, free, 0, 0);
322} 316}
323 317
324// return size in bytes of 'units' units. If first == 0 - calculate from the head (left), otherwise - from tail (right) 318/*
319 * return size in bytes of 'units' units. If first == 0 - calculate
320 * from the head (left), otherwise - from tail (right)
321 */
325static int indirect_part_size(struct virtual_item *vi, int first, int units) 322static int indirect_part_size(struct virtual_item *vi, int first, int units)
326{ 323{
327 // unit of indirect item is byte (yet) 324 /* unit of indirect item is byte (yet) */
328 return units; 325 return units;
329} 326}
330 327
331static int indirect_unit_num(struct virtual_item *vi) 328static int indirect_unit_num(struct virtual_item *vi)
332{ 329{
333 // unit of indirect item is byte (yet) 330 /* unit of indirect item is byte (yet) */
334 return vi->vi_item_len - IH_SIZE; 331 return vi->vi_item_len - IH_SIZE;
335} 332}
336 333
@@ -356,10 +353,7 @@ static struct item_operations indirect_ops = {
356 .print_vi = indirect_print_vi 353 .print_vi = indirect_print_vi
357}; 354};
358 355
359////////////////////////////////////////////////////////////////////////////// 356/* direntry functions */
360// direntry functions
361//
362
363static int direntry_bytes_number(struct item_head *ih, int block_size) 357static int direntry_bytes_number(struct item_head *ih, int block_size)
364{ 358{
365 reiserfs_warning(NULL, "vs-16090", 359 reiserfs_warning(NULL, "vs-16090",
@@ -428,7 +422,7 @@ static void direntry_check_item(struct item_head *ih, char *item)
428 int i; 422 int i;
429 struct reiserfs_de_head *deh; 423 struct reiserfs_de_head *deh;
430 424
431 // FIXME: type something here! 425 /* unused */
432 deh = (struct reiserfs_de_head *)item; 426 deh = (struct reiserfs_de_head *)item;
433 for (i = 0; i < ih_entry_count(ih); i++, deh++) { 427 for (i = 0; i < ih_entry_count(ih); i++, deh++) {
434 ; 428 ;
@@ -439,7 +433,8 @@ static void direntry_check_item(struct item_head *ih, char *item)
439 433
440/* 434/*
441 * function returns old entry number in directory item in real node 435 * function returns old entry number in directory item in real node
442 * using new entry number in virtual item in virtual node */ 436 * using new entry number in virtual item in virtual node
437 */
443static inline int old_entry_num(int is_affected, int virtual_entry_num, 438static inline int old_entry_num(int is_affected, int virtual_entry_num,
444 int pos_in_item, int mode) 439 int pos_in_item, int mode)
445{ 440{
@@ -463,9 +458,11 @@ static inline int old_entry_num(int is_affected, int virtual_entry_num,
463 return virtual_entry_num - 1; 458 return virtual_entry_num - 1;
464} 459}
465 460
466/* Create an array of sizes of directory entries for virtual 461/*
467 item. Return space used by an item. FIXME: no control over 462 * Create an array of sizes of directory entries for virtual
468 consuming of space used by this item handler */ 463 * item. Return space used by an item. FIXME: no control over
464 * consuming of space used by this item handler
465 */
469static int direntry_create_vi(struct virtual_node *vn, 466static int direntry_create_vi(struct virtual_node *vn,
470 struct virtual_item *vi, 467 struct virtual_item *vi,
471 int is_affected, int insert_size) 468 int is_affected, int insert_size)
@@ -529,10 +526,10 @@ static int direntry_create_vi(struct virtual_node *vn,
529 526
530} 527}
531 528
532// 529/*
533// return number of entries which may fit into specified amount of 530 * return number of entries which may fit into specified amount of
534// free space, or -1 if free space is not enough even for 1 entry 531 * free space, or -1 if free space is not enough even for 1 entry
535// 532 */
536static int direntry_check_left(struct virtual_item *vi, int free, 533static int direntry_check_left(struct virtual_item *vi, int free,
537 int start_skip, int end_skip) 534 int start_skip, int end_skip)
538{ 535{
@@ -541,8 +538,8 @@ static int direntry_check_left(struct virtual_item *vi, int free,
541 struct direntry_uarea *dir_u = vi->vi_uarea; 538 struct direntry_uarea *dir_u = vi->vi_uarea;
542 539
543 for (i = start_skip; i < dir_u->entry_count - end_skip; i++) { 540 for (i = start_skip; i < dir_u->entry_count - end_skip; i++) {
541 /* i-th entry doesn't fit into the remaining free space */
544 if (dir_u->entry_sizes[i] > free) 542 if (dir_u->entry_sizes[i] > free)
545 /* i-th entry doesn't fit into the remaining free space */
546 break; 543 break;
547 544
548 free -= dir_u->entry_sizes[i]; 545 free -= dir_u->entry_sizes[i];
@@ -570,8 +567,8 @@ static int direntry_check_right(struct virtual_item *vi, int free)
570 struct direntry_uarea *dir_u = vi->vi_uarea; 567 struct direntry_uarea *dir_u = vi->vi_uarea;
571 568
572 for (i = dir_u->entry_count - 1; i >= 0; i--) { 569 for (i = dir_u->entry_count - 1; i >= 0; i--) {
570 /* i-th entry doesn't fit into the remaining free space */
573 if (dir_u->entry_sizes[i] > free) 571 if (dir_u->entry_sizes[i] > free)
574 /* i-th entry doesn't fit into the remaining free space */
575 break; 572 break;
576 573
577 free -= dir_u->entry_sizes[i]; 574 free -= dir_u->entry_sizes[i];
@@ -643,9 +640,7 @@ static struct item_operations direntry_ops = {
643 .print_vi = direntry_print_vi 640 .print_vi = direntry_print_vi
644}; 641};
645 642
646////////////////////////////////////////////////////////////////////////////// 643/* Error catching functions to catch errors caused by incorrect item types. */
647// Error catching functions to catch errors caused by incorrect item types.
648//
649static int errcatch_bytes_number(struct item_head *ih, int block_size) 644static int errcatch_bytes_number(struct item_head *ih, int block_size)
650{ 645{
651 reiserfs_warning(NULL, "green-16001", 646 reiserfs_warning(NULL, "green-16001",
@@ -685,8 +680,12 @@ static int errcatch_create_vi(struct virtual_node *vn,
685{ 680{
686 reiserfs_warning(NULL, "green-16006", 681 reiserfs_warning(NULL, "green-16006",
687 "Invalid item type observed, run fsck ASAP"); 682 "Invalid item type observed, run fsck ASAP");
688 return 0; // We might return -1 here as well, but it won't help as create_virtual_node() from where 683 /*
689 // this operation is called from is of return type void. 684 * We might return -1 here as well, but it won't help as
685 * create_virtual_node() from where this operation is called
686 * from is of return type void.
687 */
688 return 0;
690} 689}
691 690
692static int errcatch_check_left(struct virtual_item *vi, int free, 691static int errcatch_check_left(struct virtual_item *vi, int free,
@@ -739,9 +738,6 @@ static struct item_operations errcatch_ops = {
739 errcatch_print_vi 738 errcatch_print_vi
740}; 739};
741 740
742//////////////////////////////////////////////////////////////////////////////
743//
744//
745#if ! (TYPE_STAT_DATA == 0 && TYPE_INDIRECT == 1 && TYPE_DIRECT == 2 && TYPE_DIRENTRY == 3) 741#if ! (TYPE_STAT_DATA == 0 && TYPE_INDIRECT == 1 && TYPE_DIRECT == 2 && TYPE_DIRENTRY == 3)
746#error Item types must use disk-format assigned values. 742#error Item types must use disk-format assigned values.
747#endif 743#endif
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index 225921126455..48f03e5d16ef 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -1,38 +1,38 @@
1/* 1/*
2** Write ahead logging implementation copyright Chris Mason 2000 2 * Write ahead logging implementation copyright Chris Mason 2000
3** 3 *
4** The background commits make this code very interrelated, and 4 * The background commits make this code very interrelated, and
5** overly complex. I need to rethink things a bit....The major players: 5 * overly complex. I need to rethink things a bit....The major players:
6** 6 *
7** journal_begin -- call with the number of blocks you expect to log. 7 * journal_begin -- call with the number of blocks you expect to log.
8** If the current transaction is too 8 * If the current transaction is too
9** old, it will block until the current transaction is 9 * old, it will block until the current transaction is
10** finished, and then start a new one. 10 * finished, and then start a new one.
11** Usually, your transaction will get joined in with 11 * Usually, your transaction will get joined in with
12** previous ones for speed. 12 * previous ones for speed.
13** 13 *
14** journal_join -- same as journal_begin, but won't block on the current 14 * journal_join -- same as journal_begin, but won't block on the current
15** transaction regardless of age. Don't ever call 15 * transaction regardless of age. Don't ever call
16** this. Ever. There are only two places it should be 16 * this. Ever. There are only two places it should be
17** called from, and they are both inside this file. 17 * called from, and they are both inside this file.
18** 18 *
19** journal_mark_dirty -- adds blocks into this transaction. clears any flags 19 * journal_mark_dirty -- adds blocks into this transaction. clears any flags
20** that might make them get sent to disk 20 * that might make them get sent to disk
21** and then marks them BH_JDirty. Puts the buffer head 21 * and then marks them BH_JDirty. Puts the buffer head
22** into the current transaction hash. 22 * into the current transaction hash.
23** 23 *
24** journal_end -- if the current transaction is batchable, it does nothing 24 * journal_end -- if the current transaction is batchable, it does nothing
25** otherwise, it could do an async/synchronous commit, or 25 * otherwise, it could do an async/synchronous commit, or
26** a full flush of all log and real blocks in the 26 * a full flush of all log and real blocks in the
27** transaction. 27 * transaction.
28** 28 *
29** flush_old_commits -- if the current transaction is too old, it is ended and 29 * flush_old_commits -- if the current transaction is too old, it is ended and
30** commit blocks are sent to disk. Forces commit blocks 30 * commit blocks are sent to disk. Forces commit blocks
31** to disk for all backgrounded commits that have been 31 * to disk for all backgrounded commits that have been
32** around too long. 32 * around too long.
33** -- Note, if you call this as an immediate flush from 33 * -- Note, if you call this as an immediate flush from
34** from within kupdate, it will ignore the immediate flag 34 * from within kupdate, it will ignore the immediate flag
35*/ 35 */
36 36
37#include <linux/time.h> 37#include <linux/time.h>
38#include <linux/semaphore.h> 38#include <linux/semaphore.h>
@@ -58,16 +58,19 @@
58#define JOURNAL_WORK_ENTRY(h) (list_entry((h), struct reiserfs_journal_list, \ 58#define JOURNAL_WORK_ENTRY(h) (list_entry((h), struct reiserfs_journal_list, \
59 j_working_list)) 59 j_working_list))
60 60
61#define JOURNAL_TRANS_HALF 1018 /* must be correct to keep the desc and commit 61/* must be correct to keep the desc and commit structs at 4k */
62 structs at 4k */ 62#define JOURNAL_TRANS_HALF 1018
63#define BUFNR 64 /*read ahead */ 63#define BUFNR 64 /*read ahead */
64 64
65/* cnode stat bits. Move these into reiserfs_fs.h */ 65/* cnode stat bits. Move these into reiserfs_fs.h */
66 66
67#define BLOCK_FREED 2 /* this block was freed, and can't be written. */ 67/* this block was freed, and can't be written. */
68#define BLOCK_FREED_HOLDER 3 /* this block was freed during this transaction, and can't be written */ 68#define BLOCK_FREED 2
69/* this block was freed during this transaction, and can't be written */
70#define BLOCK_FREED_HOLDER 3
69 71
70#define BLOCK_NEEDS_FLUSH 4 /* used in flush_journal_list */ 72/* used in flush_journal_list */
73#define BLOCK_NEEDS_FLUSH 4
71#define BLOCK_DIRTIED 5 74#define BLOCK_DIRTIED 5
72 75
73/* journal list state bits */ 76/* journal list state bits */
@@ -100,8 +103,10 @@ static void queue_log_writer(struct super_block *s);
100/* values for join in do_journal_begin_r */ 103/* values for join in do_journal_begin_r */
101enum { 104enum {
102 JBEGIN_REG = 0, /* regular journal begin */ 105 JBEGIN_REG = 0, /* regular journal begin */
103 JBEGIN_JOIN = 1, /* join the running transaction if at all possible */ 106 /* join the running transaction if at all possible */
104 JBEGIN_ABORT = 2, /* called from cleanup code, ignores aborted flag */ 107 JBEGIN_JOIN = 1,
108 /* called from cleanup code, ignores aborted flag */
109 JBEGIN_ABORT = 2,
105}; 110};
106 111
107static int do_journal_begin_r(struct reiserfs_transaction_handle *th, 112static int do_journal_begin_r(struct reiserfs_transaction_handle *th,
@@ -116,10 +121,11 @@ static void init_journal_hash(struct super_block *sb)
116} 121}
117 122
118/* 123/*
119** clears BH_Dirty and sticks the buffer on the clean list. Called because I can't allow refile_buffer to 124 * clears BH_Dirty and sticks the buffer on the clean list. Called because
120** make schedule happen after I've freed a block. Look at remove_from_transaction and journal_mark_freed for 125 * I can't allow refile_buffer to make schedule happen after I've freed a
121** more details. 126 * block. Look at remove_from_transaction and journal_mark_freed for
122*/ 127 * more details.
128 */
123static int reiserfs_clean_and_file_buffer(struct buffer_head *bh) 129static int reiserfs_clean_and_file_buffer(struct buffer_head *bh)
124{ 130{
125 if (bh) { 131 if (bh) {
@@ -197,7 +203,8 @@ static void allocate_bitmap_nodes(struct super_block *sb)
197 list_add(&bn->list, &journal->j_bitmap_nodes); 203 list_add(&bn->list, &journal->j_bitmap_nodes);
198 journal->j_free_bitmap_nodes++; 204 journal->j_free_bitmap_nodes++;
199 } else { 205 } else {
200 break; /* this is ok, we'll try again when more are needed */ 206 /* this is ok, we'll try again when more are needed */
207 break;
201 } 208 }
202 } 209 }
203} 210}
@@ -232,8 +239,8 @@ static void cleanup_bitmap_list(struct super_block *sb,
232} 239}
233 240
234/* 241/*
235** only call this on FS unmount. 242 * only call this on FS unmount.
236*/ 243 */
237static int free_list_bitmaps(struct super_block *sb, 244static int free_list_bitmaps(struct super_block *sb,
238 struct reiserfs_list_bitmap *jb_array) 245 struct reiserfs_list_bitmap *jb_array)
239{ 246{
@@ -268,9 +275,9 @@ static int free_bitmap_nodes(struct super_block *sb)
268} 275}
269 276
270/* 277/*
271** get memory for JOURNAL_NUM_BITMAPS worth of bitmaps. 278 * get memory for JOURNAL_NUM_BITMAPS worth of bitmaps.
272** jb_array is the array to be filled in. 279 * jb_array is the array to be filled in.
273*/ 280 */
274int reiserfs_allocate_list_bitmaps(struct super_block *sb, 281int reiserfs_allocate_list_bitmaps(struct super_block *sb,
275 struct reiserfs_list_bitmap *jb_array, 282 struct reiserfs_list_bitmap *jb_array,
276 unsigned int bmap_nr) 283 unsigned int bmap_nr)
@@ -299,9 +306,9 @@ int reiserfs_allocate_list_bitmaps(struct super_block *sb,
299} 306}
300 307
301/* 308/*
302** find an available list bitmap. If you can't find one, flush a commit list 309 * find an available list bitmap. If you can't find one, flush a commit list
303** and try again 310 * and try again
304*/ 311 */
305static struct reiserfs_list_bitmap *get_list_bitmap(struct super_block *sb, 312static struct reiserfs_list_bitmap *get_list_bitmap(struct super_block *sb,
306 struct reiserfs_journal_list 313 struct reiserfs_journal_list
307 *jl) 314 *jl)
@@ -325,18 +332,18 @@ static struct reiserfs_list_bitmap *get_list_bitmap(struct super_block *sb,
325 break; 332 break;
326 } 333 }
327 } 334 }
328 if (jb->journal_list) { /* double check to make sure if flushed correctly */ 335 /* double check to make sure if flushed correctly */
336 if (jb->journal_list)
329 return NULL; 337 return NULL;
330 }
331 jb->journal_list = jl; 338 jb->journal_list = jl;
332 return jb; 339 return jb;
333} 340}
334 341
335/* 342/*
336** allocates a new chunk of X nodes, and links them all together as a list. 343 * allocates a new chunk of X nodes, and links them all together as a list.
337** Uses the cnode->next and cnode->prev pointers 344 * Uses the cnode->next and cnode->prev pointers
338** returns NULL on failure 345 * returns NULL on failure
339*/ 346 */
340static struct reiserfs_journal_cnode *allocate_cnodes(int num_cnodes) 347static struct reiserfs_journal_cnode *allocate_cnodes(int num_cnodes)
341{ 348{
342 struct reiserfs_journal_cnode *head; 349 struct reiserfs_journal_cnode *head;
@@ -358,9 +365,7 @@ static struct reiserfs_journal_cnode *allocate_cnodes(int num_cnodes)
358 return head; 365 return head;
359} 366}
360 367
361/* 368/* pulls a cnode off the free list, or returns NULL on failure */
362** pulls a cnode off the free list, or returns NULL on failure
363*/
364static struct reiserfs_journal_cnode *get_cnode(struct super_block *sb) 369static struct reiserfs_journal_cnode *get_cnode(struct super_block *sb)
365{ 370{
366 struct reiserfs_journal_cnode *cn; 371 struct reiserfs_journal_cnode *cn;
@@ -386,8 +391,8 @@ static struct reiserfs_journal_cnode *get_cnode(struct super_block *sb)
386} 391}
387 392
388/* 393/*
389** returns a cnode to the free list 394 * returns a cnode to the free list
390*/ 395 */
391static void free_cnode(struct super_block *sb, 396static void free_cnode(struct super_block *sb,
392 struct reiserfs_journal_cnode *cn) 397 struct reiserfs_journal_cnode *cn)
393{ 398{
@@ -412,7 +417,10 @@ static void clear_prepared_bits(struct buffer_head *bh)
412 clear_buffer_journal_restore_dirty(bh); 417 clear_buffer_journal_restore_dirty(bh);
413} 418}
414 419
415/* return a cnode with same dev, block number and size in table, or null if not found */ 420/*
421 * return a cnode with same dev, block number and size in table,
422 * or null if not found
423 */
416static inline struct reiserfs_journal_cnode *get_journal_hash_dev(struct 424static inline struct reiserfs_journal_cnode *get_journal_hash_dev(struct
417 super_block 425 super_block
418 *sb, 426 *sb,
@@ -432,23 +440,24 @@ static inline struct reiserfs_journal_cnode *get_journal_hash_dev(struct
432} 440}
433 441
434/* 442/*
435** this actually means 'can this block be reallocated yet?'. If you set search_all, a block can only be allocated 443 * this actually means 'can this block be reallocated yet?'. If you set
436** if it is not in the current transaction, was not freed by the current transaction, and has no chance of ever 444 * search_all, a block can only be allocated if it is not in the current
437** being overwritten by a replay after crashing. 445 * transaction, was not freed by the current transaction, and has no chance
438** 446 * of ever being overwritten by a replay after crashing.
439** If you don't set search_all, a block can only be allocated if it is not in the current transaction. Since deleting 447 *
440** a block removes it from the current transaction, this case should never happen. If you don't set search_all, make 448 * If you don't set search_all, a block can only be allocated if it is not
441** sure you never write the block without logging it. 449 * in the current transaction. Since deleting a block removes it from the
442** 450 * current transaction, this case should never happen. If you don't set
443** next_zero_bit is a suggestion about the next block to try for find_forward. 451 * search_all, make sure you never write the block without logging it.
444** when bl is rejected because it is set in a journal list bitmap, we search 452 *
445** for the next zero bit in the bitmap that rejected bl. Then, we return that 453 * next_zero_bit is a suggestion about the next block to try for find_forward.
446** through next_zero_bit for find_forward to try. 454 * when bl is rejected because it is set in a journal list bitmap, we search
447** 455 * for the next zero bit in the bitmap that rejected bl. Then, we return
448** Just because we return something in next_zero_bit does not mean we won't 456 * that through next_zero_bit for find_forward to try.
449** reject it on the next call to reiserfs_in_journal 457 *
450** 458 * Just because we return something in next_zero_bit does not mean we won't
451*/ 459 * reject it on the next call to reiserfs_in_journal
460 */
452int reiserfs_in_journal(struct super_block *sb, 461int reiserfs_in_journal(struct super_block *sb,
453 unsigned int bmap_nr, int bit_nr, int search_all, 462 unsigned int bmap_nr, int bit_nr, int search_all,
454 b_blocknr_t * next_zero_bit) 463 b_blocknr_t * next_zero_bit)
@@ -462,9 +471,11 @@ int reiserfs_in_journal(struct super_block *sb,
462 *next_zero_bit = 0; /* always start this at zero. */ 471 *next_zero_bit = 0; /* always start this at zero. */
463 472
464 PROC_INFO_INC(sb, journal.in_journal); 473 PROC_INFO_INC(sb, journal.in_journal);
465 /* If we aren't doing a search_all, this is a metablock, and it will be logged before use. 474 /*
466 ** if we crash before the transaction that freed it commits, this transaction won't 475 * If we aren't doing a search_all, this is a metablock, and it
467 ** have committed either, and the block will never be written 476 * will be logged before use. if we crash before the transaction
477 * that freed it commits, this transaction won't have committed
478 * either, and the block will never be written
468 */ 479 */
469 if (search_all) { 480 if (search_all) {
470 for (i = 0; i < JOURNAL_NUM_BITMAPS; i++) { 481 for (i = 0; i < JOURNAL_NUM_BITMAPS; i++) {
@@ -504,8 +515,7 @@ int reiserfs_in_journal(struct super_block *sb,
504 return 0; 515 return 0;
505} 516}
506 517
507/* insert cn into table 518/* insert cn into table */
508*/
509static inline void insert_journal_hash(struct reiserfs_journal_cnode **table, 519static inline void insert_journal_hash(struct reiserfs_journal_cnode **table,
510 struct reiserfs_journal_cnode *cn) 520 struct reiserfs_journal_cnode *cn)
511{ 521{
@@ -551,10 +561,10 @@ static inline void put_journal_list(struct super_block *s,
551} 561}
552 562
553/* 563/*
554** this used to be much more involved, and I'm keeping it just in case things get ugly again. 564 * this used to be much more involved, and I'm keeping it just in case
555** it gets called by flush_commit_list, and cleans up any data stored about blocks freed during a 565 * things get ugly again. it gets called by flush_commit_list, and
556** transaction. 566 * cleans up any data stored about blocks freed during a transaction.
557*/ 567 */
558static void cleanup_freed_for_journal_list(struct super_block *sb, 568static void cleanup_freed_for_journal_list(struct super_block *sb,
559 struct reiserfs_journal_list *jl) 569 struct reiserfs_journal_list *jl)
560{ 570{
@@ -753,7 +763,8 @@ static inline int __add_jh(struct reiserfs_journal *j, struct buffer_head *bh,
753 get_bh(bh); 763 get_bh(bh);
754 jh = alloc_jh(); 764 jh = alloc_jh();
755 spin_lock(&j->j_dirty_buffers_lock); 765 spin_lock(&j->j_dirty_buffers_lock);
756 /* buffer must be locked for __add_jh, should be able to have 766 /*
767 * buffer must be locked for __add_jh, should be able to have
757 * two adds at the same time 768 * two adds at the same time
758 */ 769 */
759 BUG_ON(bh->b_private); 770 BUG_ON(bh->b_private);
@@ -811,7 +822,8 @@ static int write_ordered_buffers(spinlock_t * lock,
811 spin_lock(lock); 822 spin_lock(lock);
812 goto loop_next; 823 goto loop_next;
813 } 824 }
814 /* in theory, dirty non-uptodate buffers should never get here, 825 /*
826 * in theory, dirty non-uptodate buffers should never get here,
815 * but the upper layer io error paths still have a few quirks. 827 * but the upper layer io error paths still have a few quirks.
816 * Handle them here as gracefully as we can 828 * Handle them here as gracefully as we can
817 */ 829 */
@@ -849,13 +861,14 @@ static int write_ordered_buffers(spinlock_t * lock,
849 if (!buffer_uptodate(bh)) { 861 if (!buffer_uptodate(bh)) {
850 ret = -EIO; 862 ret = -EIO;
851 } 863 }
852 /* ugly interaction with invalidatepage here. 864 /*
853 * reiserfs_invalidate_page will pin any buffer that has a valid 865 * ugly interaction with invalidatepage here.
854 * journal head from an older transaction. If someone else sets 866 * reiserfs_invalidate_page will pin any buffer that has a
855 * our buffer dirty after we write it in the first loop, and 867 * valid journal head from an older transaction. If someone
856 * then someone truncates the page away, nobody will ever write 868 * else sets our buffer dirty after we write it in the first
857 * the buffer. We're safe if we write the page one last time 869 * loop, and then someone truncates the page away, nobody
858 * after freeing the journal header. 870 * will ever write the buffer. We're safe if we write the
871 * page one last time after freeing the journal header.
859 */ 872 */
860 if (buffer_dirty(bh) && unlikely(bh->b_page->mapping == NULL)) { 873 if (buffer_dirty(bh) && unlikely(bh->b_page->mapping == NULL)) {
861 spin_unlock(lock); 874 spin_unlock(lock);
@@ -916,9 +929,11 @@ static int flush_older_commits(struct super_block *s,
916 if (!journal_list_still_alive(s, trans_id)) 929 if (!journal_list_still_alive(s, trans_id))
917 return 1; 930 return 1;
918 931
919 /* the one we just flushed is gone, this means all 932 /*
920 * older lists are also gone, so first_jl is no longer 933 * the one we just flushed is gone, this means
921 * valid either. Go back to the beginning. 934 * all older lists are also gone, so first_jl
935 * is no longer valid either. Go back to the
936 * beginning.
922 */ 937 */
923 if (!journal_list_still_alive 938 if (!journal_list_still_alive
924 (s, other_trans_id)) { 939 (s, other_trans_id)) {
@@ -951,12 +966,12 @@ static int reiserfs_async_progress_wait(struct super_block *s)
951} 966}
952 967
953/* 968/*
954** if this journal list still has commit blocks unflushed, send them to disk. 969 * if this journal list still has commit blocks unflushed, send them to disk.
955** 970 *
956** log areas must be flushed in order (transaction 2 can't commit before transaction 1) 971 * log areas must be flushed in order (transaction 2 can't commit before
957** Before the commit block can by written, every other log block must be safely on disk 972 * transaction 1) Before the commit block can by written, every other log
958** 973 * block must be safely on disk
959*/ 974 */
960static int flush_commit_list(struct super_block *s, 975static int flush_commit_list(struct super_block *s,
961 struct reiserfs_journal_list *jl, int flushall) 976 struct reiserfs_journal_list *jl, int flushall)
962{ 977{
@@ -975,8 +990,9 @@ static int flush_commit_list(struct super_block *s,
975 return 0; 990 return 0;
976 } 991 }
977 992
978 /* before we can put our commit blocks on disk, we have to make sure everyone older than 993 /*
979 ** us is on disk too 994 * before we can put our commit blocks on disk, we have to make
995 * sure everyone older than us is on disk too
980 */ 996 */
981 BUG_ON(jl->j_len <= 0); 997 BUG_ON(jl->j_len <= 0);
982 BUG_ON(trans_id == journal->j_trans_id); 998 BUG_ON(trans_id == journal->j_trans_id);
@@ -984,7 +1000,10 @@ static int flush_commit_list(struct super_block *s,
984 get_journal_list(jl); 1000 get_journal_list(jl);
985 if (flushall) { 1001 if (flushall) {
986 if (flush_older_commits(s, jl) == 1) { 1002 if (flush_older_commits(s, jl) == 1) {
987 /* list disappeared during flush_older_commits. return */ 1003 /*
1004 * list disappeared during flush_older_commits.
1005 * return
1006 */
988 goto put_jl; 1007 goto put_jl;
989 } 1008 }
990 } 1009 }
@@ -1056,9 +1075,10 @@ static int flush_commit_list(struct super_block *s,
1056 depth = reiserfs_write_unlock_nested(s); 1075 depth = reiserfs_write_unlock_nested(s);
1057 __wait_on_buffer(tbh); 1076 __wait_on_buffer(tbh);
1058 reiserfs_write_lock_nested(s, depth); 1077 reiserfs_write_lock_nested(s, depth);
1059 // since we're using ll_rw_blk above, it might have skipped over 1078 /*
1060 // a locked buffer. Double check here 1079 * since we're using ll_rw_blk above, it might have skipped
1061 // 1080 * over a locked buffer. Double check here
1081 */
1062 /* redundant, sync_dirty_buffer() checks */ 1082 /* redundant, sync_dirty_buffer() checks */
1063 if (buffer_dirty(tbh)) { 1083 if (buffer_dirty(tbh)) {
1064 depth = reiserfs_write_unlock_nested(s); 1084 depth = reiserfs_write_unlock_nested(s);
@@ -1072,17 +1092,21 @@ static int flush_commit_list(struct super_block *s,
1072#endif 1092#endif
1073 retval = -EIO; 1093 retval = -EIO;
1074 } 1094 }
1075 put_bh(tbh); /* once for journal_find_get_block */ 1095 /* once for journal_find_get_block */
1076 put_bh(tbh); /* once due to original getblk in do_journal_end */ 1096 put_bh(tbh);
1097 /* once due to original getblk in do_journal_end */
1098 put_bh(tbh);
1077 atomic_dec(&(jl->j_commit_left)); 1099 atomic_dec(&(jl->j_commit_left));
1078 } 1100 }
1079 1101
1080 BUG_ON(atomic_read(&(jl->j_commit_left)) != 1); 1102 BUG_ON(atomic_read(&(jl->j_commit_left)) != 1);
1081 1103
1082 /* If there was a write error in the journal - we can't commit 1104 /*
1105 * If there was a write error in the journal - we can't commit
1083 * this transaction - it will be invalid and, if successful, 1106 * this transaction - it will be invalid and, if successful,
1084 * will just end up propagating the write error out to 1107 * will just end up propagating the write error out to
1085 * the file system. */ 1108 * the file system.
1109 */
1086 if (likely(!retval && !reiserfs_is_journal_aborted (journal))) { 1110 if (likely(!retval && !reiserfs_is_journal_aborted (journal))) {
1087 if (buffer_dirty(jl->j_commit_bh)) 1111 if (buffer_dirty(jl->j_commit_bh))
1088 BUG(); 1112 BUG();
@@ -1095,9 +1119,11 @@ static int flush_commit_list(struct super_block *s,
1095 reiserfs_write_lock_nested(s, depth); 1119 reiserfs_write_lock_nested(s, depth);
1096 } 1120 }
1097 1121
1098 /* If there was a write error in the journal - we can't commit this 1122 /*
1123 * If there was a write error in the journal - we can't commit this
1099 * transaction - it will be invalid and, if successful, will just end 1124 * transaction - it will be invalid and, if successful, will just end
1100 * up propagating the write error out to the filesystem. */ 1125 * up propagating the write error out to the filesystem.
1126 */
1101 if (unlikely(!buffer_uptodate(jl->j_commit_bh))) { 1127 if (unlikely(!buffer_uptodate(jl->j_commit_bh))) {
1102#ifdef CONFIG_REISERFS_CHECK 1128#ifdef CONFIG_REISERFS_CHECK
1103 reiserfs_warning(s, "journal-615", "buffer write failed"); 1129 reiserfs_warning(s, "journal-615", "buffer write failed");
@@ -1112,7 +1138,10 @@ static int flush_commit_list(struct super_block *s,
1112 } 1138 }
1113 journal->j_last_commit_id = jl->j_trans_id; 1139 journal->j_last_commit_id = jl->j_trans_id;
1114 1140
1115 /* now, every commit block is on the disk. It is safe to allow blocks freed during this transaction to be reallocated */ 1141 /*
1142 * now, every commit block is on the disk. It is safe to allow
1143 * blocks freed during this transaction to be reallocated
1144 */
1116 cleanup_freed_for_journal_list(s, jl); 1145 cleanup_freed_for_journal_list(s, jl);
1117 1146
1118 retval = retval ? retval : journal->j_errno; 1147 retval = retval ? retval : journal->j_errno;
@@ -1136,9 +1165,9 @@ static int flush_commit_list(struct super_block *s,
1136} 1165}
1137 1166
1138/* 1167/*
1139** flush_journal_list frequently needs to find a newer transaction for a given block. This does that, or 1168 * flush_journal_list frequently needs to find a newer transaction for a
1140** returns NULL if it can't find anything 1169 * given block. This does that, or returns NULL if it can't find anything
1141*/ 1170 */
1142static struct reiserfs_journal_list *find_newer_jl_for_cn(struct 1171static struct reiserfs_journal_list *find_newer_jl_for_cn(struct
1143 reiserfs_journal_cnode 1172 reiserfs_journal_cnode
1144 *cn) 1173 *cn)
@@ -1162,10 +1191,11 @@ static void remove_journal_hash(struct super_block *,
1162 int); 1191 int);
1163 1192
1164/* 1193/*
1165** once all the real blocks have been flushed, it is safe to remove them from the 1194 * once all the real blocks have been flushed, it is safe to remove them
1166** journal list for this transaction. Aside from freeing the cnode, this also allows the 1195 * from the journal list for this transaction. Aside from freeing the
1167** block to be reallocated for data blocks if it had been deleted. 1196 * cnode, this also allows the block to be reallocated for data blocks
1168*/ 1197 * if it had been deleted.
1198 */
1169static void remove_all_from_journal_list(struct super_block *sb, 1199static void remove_all_from_journal_list(struct super_block *sb,
1170 struct reiserfs_journal_list *jl, 1200 struct reiserfs_journal_list *jl,
1171 int debug) 1201 int debug)
@@ -1174,8 +1204,9 @@ static void remove_all_from_journal_list(struct super_block *sb,
1174 struct reiserfs_journal_cnode *cn, *last; 1204 struct reiserfs_journal_cnode *cn, *last;
1175 cn = jl->j_realblock; 1205 cn = jl->j_realblock;
1176 1206
1177 /* which is better, to lock once around the whole loop, or 1207 /*
1178 ** to lock for each call to remove_journal_hash? 1208 * which is better, to lock once around the whole loop, or
1209 * to lock for each call to remove_journal_hash?
1179 */ 1210 */
1180 while (cn) { 1211 while (cn) {
1181 if (cn->blocknr != 0) { 1212 if (cn->blocknr != 0) {
@@ -1197,12 +1228,13 @@ static void remove_all_from_journal_list(struct super_block *sb,
1197} 1228}
1198 1229
1199/* 1230/*
1200** if this timestamp is greater than the timestamp we wrote last to the header block, write it to the header block. 1231 * if this timestamp is greater than the timestamp we wrote last to the
1201** once this is done, I can safely say the log area for this transaction won't ever be replayed, and I can start 1232 * header block, write it to the header block. once this is done, I can
1202** releasing blocks in this transaction for reuse as data blocks. 1233 * safely say the log area for this transaction won't ever be replayed,
1203** called by flush_journal_list, before it calls remove_all_from_journal_list 1234 * and I can start releasing blocks in this transaction for reuse as data
1204** 1235 * blocks. called by flush_journal_list, before it calls
1205*/ 1236 * remove_all_from_journal_list
1237 */
1206static int _update_journal_header_block(struct super_block *sb, 1238static int _update_journal_header_block(struct super_block *sb,
1207 unsigned long offset, 1239 unsigned long offset,
1208 unsigned int trans_id) 1240 unsigned int trans_id)
@@ -1272,7 +1304,8 @@ static int flush_older_journal_lists(struct super_block *sb,
1272 struct reiserfs_journal *journal = SB_JOURNAL(sb); 1304 struct reiserfs_journal *journal = SB_JOURNAL(sb);
1273 unsigned int trans_id = jl->j_trans_id; 1305 unsigned int trans_id = jl->j_trans_id;
1274 1306
1275 /* we know we are the only ones flushing things, no extra race 1307 /*
1308 * we know we are the only ones flushing things, no extra race
1276 * protection is required. 1309 * protection is required.
1277 */ 1310 */
1278 restart: 1311 restart:
@@ -1302,15 +1335,16 @@ static void del_from_work_list(struct super_block *s,
1302 } 1335 }
1303} 1336}
1304 1337
1305/* flush a journal list, both commit and real blocks 1338/*
1306** 1339 * flush a journal list, both commit and real blocks
1307** always set flushall to 1, unless you are calling from inside 1340 *
1308** flush_journal_list 1341 * always set flushall to 1, unless you are calling from inside
1309** 1342 * flush_journal_list
1310** IMPORTANT. This can only be called while there are no journal writers, 1343 *
1311** and the journal is locked. That means it can only be called from 1344 * IMPORTANT. This can only be called while there are no journal writers,
1312** do_journal_end, or by journal_release 1345 * and the journal is locked. That means it can only be called from
1313*/ 1346 * do_journal_end, or by journal_release
1347 */
1314static int flush_journal_list(struct super_block *s, 1348static int flush_journal_list(struct super_block *s,
1315 struct reiserfs_journal_list *jl, int flushall) 1349 struct reiserfs_journal_list *jl, int flushall)
1316{ 1350{
@@ -1352,8 +1386,9 @@ static int flush_journal_list(struct super_block *s,
1352 goto flush_older_and_return; 1386 goto flush_older_and_return;
1353 } 1387 }
1354 1388
1355 /* start by putting the commit list on disk. This will also flush 1389 /*
1356 ** the commit lists of any olders transactions 1390 * start by putting the commit list on disk. This will also flush
1391 * the commit lists of any olders transactions
1357 */ 1392 */
1358 flush_commit_list(s, jl, 1); 1393 flush_commit_list(s, jl, 1);
1359 1394
@@ -1367,8 +1402,9 @@ static int flush_journal_list(struct super_block *s,
1367 goto flush_older_and_return; 1402 goto flush_older_and_return;
1368 } 1403 }
1369 1404
1370 /* loop through each cnode, see if we need to write it, 1405 /*
1371 ** or wait on a more recent transaction, or just ignore it 1406 * loop through each cnode, see if we need to write it,
1407 * or wait on a more recent transaction, or just ignore it
1372 */ 1408 */
1373 if (atomic_read(&(journal->j_wcount)) != 0) { 1409 if (atomic_read(&(journal->j_wcount)) != 0) {
1374 reiserfs_panic(s, "journal-844", "journal list is flushing, " 1410 reiserfs_panic(s, "journal-844", "journal list is flushing, "
@@ -1384,20 +1420,25 @@ static int flush_journal_list(struct super_block *s,
1384 goto free_cnode; 1420 goto free_cnode;
1385 } 1421 }
1386 1422
1387 /* This transaction failed commit. Don't write out to the disk */ 1423 /*
1424 * This transaction failed commit.
1425 * Don't write out to the disk
1426 */
1388 if (!(jl->j_state & LIST_DIRTY)) 1427 if (!(jl->j_state & LIST_DIRTY))
1389 goto free_cnode; 1428 goto free_cnode;
1390 1429
1391 pjl = find_newer_jl_for_cn(cn); 1430 pjl = find_newer_jl_for_cn(cn);
1392 /* the order is important here. We check pjl to make sure we 1431 /*
1393 ** don't clear BH_JDirty_wait if we aren't the one writing this 1432 * the order is important here. We check pjl to make sure we
1394 ** block to disk 1433 * don't clear BH_JDirty_wait if we aren't the one writing this
1434 * block to disk
1395 */ 1435 */
1396 if (!pjl && cn->bh) { 1436 if (!pjl && cn->bh) {
1397 saved_bh = cn->bh; 1437 saved_bh = cn->bh;
1398 1438
1399 /* we do this to make sure nobody releases the buffer while 1439 /*
1400 ** we are working with it 1440 * we do this to make sure nobody releases the
1441 * buffer while we are working with it
1401 */ 1442 */
1402 get_bh(saved_bh); 1443 get_bh(saved_bh);
1403 1444
@@ -1406,13 +1447,17 @@ static int flush_journal_list(struct super_block *s,
1406 was_jwait = 1; 1447 was_jwait = 1;
1407 was_dirty = 1; 1448 was_dirty = 1;
1408 } else if (can_dirty(cn)) { 1449 } else if (can_dirty(cn)) {
1409 /* everything with !pjl && jwait should be writable */ 1450 /*
1451 * everything with !pjl && jwait
1452 * should be writable
1453 */
1410 BUG(); 1454 BUG();
1411 } 1455 }
1412 } 1456 }
1413 1457
1414 /* if someone has this block in a newer transaction, just make 1458 /*
1415 ** sure they are committed, and don't try writing it to disk 1459 * if someone has this block in a newer transaction, just make
1460 * sure they are committed, and don't try writing it to disk
1416 */ 1461 */
1417 if (pjl) { 1462 if (pjl) {
1418 if (atomic_read(&pjl->j_commit_left)) 1463 if (atomic_read(&pjl->j_commit_left))
@@ -1420,16 +1465,18 @@ static int flush_journal_list(struct super_block *s,
1420 goto free_cnode; 1465 goto free_cnode;
1421 } 1466 }
1422 1467
1423 /* bh == NULL when the block got to disk on its own, OR, 1468 /*
1424 ** the block got freed in a future transaction 1469 * bh == NULL when the block got to disk on its own, OR,
1470 * the block got freed in a future transaction
1425 */ 1471 */
1426 if (saved_bh == NULL) { 1472 if (saved_bh == NULL) {
1427 goto free_cnode; 1473 goto free_cnode;
1428 } 1474 }
1429 1475
1430 /* this should never happen. kupdate_one_transaction has this list 1476 /*
1431 ** locked while it works, so we should never see a buffer here that 1477 * this should never happen. kupdate_one_transaction has
1432 ** is not marked JDirty_wait 1478 * this list locked while it works, so we should never see a
1479 * buffer here that is not marked JDirty_wait
1433 */ 1480 */
1434 if ((!was_jwait) && !buffer_locked(saved_bh)) { 1481 if ((!was_jwait) && !buffer_locked(saved_bh)) {
1435 reiserfs_warning(s, "journal-813", 1482 reiserfs_warning(s, "journal-813",
@@ -1440,7 +1487,10 @@ static int flush_journal_list(struct super_block *s,
1440 was_jwait ? ' ' : '!'); 1487 was_jwait ? ' ' : '!');
1441 } 1488 }
1442 if (was_dirty) { 1489 if (was_dirty) {
1443 /* we inc again because saved_bh gets decremented at free_cnode */ 1490 /*
1491 * we inc again because saved_bh gets decremented
1492 * at free_cnode
1493 */
1444 get_bh(saved_bh); 1494 get_bh(saved_bh);
1445 set_bit(BLOCK_NEEDS_FLUSH, &cn->state); 1495 set_bit(BLOCK_NEEDS_FLUSH, &cn->state);
1446 lock_buffer(saved_bh); 1496 lock_buffer(saved_bh);
@@ -1460,7 +1510,10 @@ static int flush_journal_list(struct super_block *s,
1460 last = cn; 1510 last = cn;
1461 cn = cn->next; 1511 cn = cn->next;
1462 if (saved_bh) { 1512 if (saved_bh) {
1463 /* we incremented this to keep others from taking the buffer head away */ 1513 /*
1514 * we incremented this to keep others from
1515 * taking the buffer head away
1516 */
1464 put_bh(saved_bh); 1517 put_bh(saved_bh);
1465 if (atomic_read(&(saved_bh->b_count)) < 0) { 1518 if (atomic_read(&(saved_bh->b_count)) < 0) {
1466 reiserfs_warning(s, "journal-945", 1519 reiserfs_warning(s, "journal-945",
@@ -1492,8 +1545,10 @@ static int flush_journal_list(struct super_block *s,
1492#endif 1545#endif
1493 err = -EIO; 1546 err = -EIO;
1494 } 1547 }
1495 /* note, we must clear the JDirty_wait bit after the up to date 1548 /*
1496 ** check, otherwise we race against our flushpage routine 1549 * note, we must clear the JDirty_wait bit
1550 * after the up to date check, otherwise we
1551 * race against our flushpage routine
1497 */ 1552 */
1498 BUG_ON(!test_clear_buffer_journal_dirty 1553 BUG_ON(!test_clear_buffer_journal_dirty
1499 (cn->bh)); 1554 (cn->bh));
@@ -1513,23 +1568,25 @@ static int flush_journal_list(struct super_block *s,
1513 __func__); 1568 __func__);
1514 flush_older_and_return: 1569 flush_older_and_return:
1515 1570
1516 /* before we can update the journal header block, we _must_ flush all 1571 /*
1517 ** real blocks from all older transactions to disk. This is because 1572 * before we can update the journal header block, we _must_ flush all
1518 ** once the header block is updated, this transaction will not be 1573 * real blocks from all older transactions to disk. This is because
1519 ** replayed after a crash 1574 * once the header block is updated, this transaction will not be
1575 * replayed after a crash
1520 */ 1576 */
1521 if (flushall) { 1577 if (flushall) {
1522 flush_older_journal_lists(s, jl); 1578 flush_older_journal_lists(s, jl);
1523 } 1579 }
1524 1580
1525 err = journal->j_errno; 1581 err = journal->j_errno;
1526 /* before we can remove everything from the hash tables for this 1582 /*
1527 ** transaction, we must make sure it can never be replayed 1583 * before we can remove everything from the hash tables for this
1528 ** 1584 * transaction, we must make sure it can never be replayed
1529 ** since we are only called from do_journal_end, we know for sure there 1585 *
1530 ** are no allocations going on while we are flushing journal lists. So, 1586 * since we are only called from do_journal_end, we know for sure there
1531 ** we only need to update the journal header block for the last list 1587 * are no allocations going on while we are flushing journal lists. So,
1532 ** being flushed 1588 * we only need to update the journal header block for the last list
1589 * being flushed
1533 */ 1590 */
1534 if (!err && flushall) { 1591 if (!err && flushall) {
1535 err = 1592 err =
@@ -1554,7 +1611,8 @@ static int flush_journal_list(struct super_block *s,
1554 } 1611 }
1555 journal->j_last_flush_id = jl->j_trans_id; 1612 journal->j_last_flush_id = jl->j_trans_id;
1556 1613
1557 /* not strictly required since we are freeing the list, but it should 1614 /*
1615 * not strictly required since we are freeing the list, but it should
1558 * help find code using dead lists later on 1616 * help find code using dead lists later on
1559 */ 1617 */
1560 jl->j_len = 0; 1618 jl->j_len = 0;
@@ -1585,15 +1643,17 @@ static int write_one_transaction(struct super_block *s,
1585 1643
1586 cn = jl->j_realblock; 1644 cn = jl->j_realblock;
1587 while (cn) { 1645 while (cn) {
1588 /* if the blocknr == 0, this has been cleared from the hash, 1646 /*
1589 ** skip it 1647 * if the blocknr == 0, this has been cleared from the hash,
1648 * skip it
1590 */ 1649 */
1591 if (cn->blocknr == 0) { 1650 if (cn->blocknr == 0) {
1592 goto next; 1651 goto next;
1593 } 1652 }
1594 if (cn->bh && can_dirty(cn) && buffer_dirty(cn->bh)) { 1653 if (cn->bh && can_dirty(cn) && buffer_dirty(cn->bh)) {
1595 struct buffer_head *tmp_bh; 1654 struct buffer_head *tmp_bh;
1596 /* we can race against journal_mark_freed when we try 1655 /*
1656 * we can race against journal_mark_freed when we try
1597 * to lock_buffer(cn->bh), so we have to inc the buffer 1657 * to lock_buffer(cn->bh), so we have to inc the buffer
1598 * count, and recheck things after locking 1658 * count, and recheck things after locking
1599 */ 1659 */
@@ -1630,15 +1690,17 @@ static int dirty_one_transaction(struct super_block *s,
1630 jl->j_state |= LIST_DIRTY; 1690 jl->j_state |= LIST_DIRTY;
1631 cn = jl->j_realblock; 1691 cn = jl->j_realblock;
1632 while (cn) { 1692 while (cn) {
1633 /* look for a more recent transaction that logged this 1693 /*
1634 ** buffer. Only the most recent transaction with a buffer in 1694 * look for a more recent transaction that logged this
1635 ** it is allowed to send that buffer to disk 1695 * buffer. Only the most recent transaction with a buffer in
1696 * it is allowed to send that buffer to disk
1636 */ 1697 */
1637 pjl = find_newer_jl_for_cn(cn); 1698 pjl = find_newer_jl_for_cn(cn);
1638 if (!pjl && cn->blocknr && cn->bh 1699 if (!pjl && cn->blocknr && cn->bh
1639 && buffer_journal_dirty(cn->bh)) { 1700 && buffer_journal_dirty(cn->bh)) {
1640 BUG_ON(!can_dirty(cn)); 1701 BUG_ON(!can_dirty(cn));
1641 /* if the buffer is prepared, it will either be logged 1702 /*
1703 * if the buffer is prepared, it will either be logged
1642 * or restored. If restored, we need to make sure 1704 * or restored. If restored, we need to make sure
1643 * it actually gets marked dirty 1705 * it actually gets marked dirty
1644 */ 1706 */
@@ -1675,7 +1737,8 @@ static int kupdate_transactions(struct super_block *s,
1675 goto done; 1737 goto done;
1676 } 1738 }
1677 1739
1678 /* we've got j_flush_mutex held, nobody is going to delete any 1740 /*
1741 * we've got j_flush_mutex held, nobody is going to delete any
1679 * of these lists out from underneath us 1742 * of these lists out from underneath us
1680 */ 1743 */
1681 while ((num_trans && transactions_flushed < num_trans) || 1744 while ((num_trans && transactions_flushed < num_trans) ||
@@ -1714,15 +1777,16 @@ static int kupdate_transactions(struct super_block *s,
1714 return ret; 1777 return ret;
1715} 1778}
1716 1779
1717/* for o_sync and fsync heavy applications, they tend to use 1780/*
1718** all the journa list slots with tiny transactions. These 1781 * for o_sync and fsync heavy applications, they tend to use
1719** trigger lots and lots of calls to update the header block, which 1782 * all the journa list slots with tiny transactions. These
1720** adds seeks and slows things down. 1783 * trigger lots and lots of calls to update the header block, which
1721** 1784 * adds seeks and slows things down.
1722** This function tries to clear out a large chunk of the journal lists 1785 *
1723** at once, which makes everything faster since only the newest journal 1786 * This function tries to clear out a large chunk of the journal lists
1724** list updates the header block 1787 * at once, which makes everything faster since only the newest journal
1725*/ 1788 * list updates the header block
1789 */
1726static int flush_used_journal_lists(struct super_block *s, 1790static int flush_used_journal_lists(struct super_block *s,
1727 struct reiserfs_journal_list *jl) 1791 struct reiserfs_journal_list *jl)
1728{ 1792{
@@ -1759,9 +1823,11 @@ static int flush_used_journal_lists(struct super_block *s,
1759 } 1823 }
1760 get_journal_list(jl); 1824 get_journal_list(jl);
1761 get_journal_list(flush_jl); 1825 get_journal_list(flush_jl);
1762 /* try to find a group of blocks we can flush across all the 1826
1763 ** transactions, but only bother if we've actually spanned 1827 /*
1764 ** across multiple lists 1828 * try to find a group of blocks we can flush across all the
1829 * transactions, but only bother if we've actually spanned
1830 * across multiple lists
1765 */ 1831 */
1766 if (flush_jl != jl) { 1832 if (flush_jl != jl) {
1767 ret = kupdate_transactions(s, jl, &tjl, &trans_id, len, i); 1833 ret = kupdate_transactions(s, jl, &tjl, &trans_id, len, i);
@@ -1773,9 +1839,9 @@ static int flush_used_journal_lists(struct super_block *s,
1773} 1839}
1774 1840
1775/* 1841/*
1776** removes any nodes in table with name block and dev as bh. 1842 * removes any nodes in table with name block and dev as bh.
1777** only touchs the hnext and hprev pointers. 1843 * only touchs the hnext and hprev pointers.
1778*/ 1844 */
1779void remove_journal_hash(struct super_block *sb, 1845void remove_journal_hash(struct super_block *sb,
1780 struct reiserfs_journal_cnode **table, 1846 struct reiserfs_journal_cnode **table,
1781 struct reiserfs_journal_list *jl, 1847 struct reiserfs_journal_list *jl,
@@ -1804,7 +1870,11 @@ void remove_journal_hash(struct super_block *sb,
1804 cur->blocknr = 0; 1870 cur->blocknr = 0;
1805 cur->sb = NULL; 1871 cur->sb = NULL;
1806 cur->state = 0; 1872 cur->state = 0;
1807 if (cur->bh && cur->jlist) /* anybody who clears the cur->bh will also dec the nonzerolen */ 1873 /*
1874 * anybody who clears the cur->bh will also
1875 * dec the nonzerolen
1876 */
1877 if (cur->bh && cur->jlist)
1808 atomic_dec(&(cur->jlist->j_nonzerolen)); 1878 atomic_dec(&(cur->jlist->j_nonzerolen));
1809 cur->bh = NULL; 1879 cur->bh = NULL;
1810 cur->jlist = NULL; 1880 cur->jlist = NULL;
@@ -1825,17 +1895,18 @@ static void free_journal_ram(struct super_block *sb)
1825 if (journal->j_header_bh) { 1895 if (journal->j_header_bh) {
1826 brelse(journal->j_header_bh); 1896 brelse(journal->j_header_bh);
1827 } 1897 }
1828 /* j_header_bh is on the journal dev, make sure not to release the journal 1898 /*
1829 * dev until we brelse j_header_bh 1899 * j_header_bh is on the journal dev, make sure
1900 * not to release the journal dev until we brelse j_header_bh
1830 */ 1901 */
1831 release_journal_dev(sb, journal); 1902 release_journal_dev(sb, journal);
1832 vfree(journal); 1903 vfree(journal);
1833} 1904}
1834 1905
1835/* 1906/*
1836** call on unmount. Only set error to 1 if you haven't made your way out 1907 * call on unmount. Only set error to 1 if you haven't made your way out
1837** of read_super() yet. Any other caller must keep error at 0. 1908 * of read_super() yet. Any other caller must keep error at 0.
1838*/ 1909 */
1839static int do_journal_release(struct reiserfs_transaction_handle *th, 1910static int do_journal_release(struct reiserfs_transaction_handle *th,
1840 struct super_block *sb, int error) 1911 struct super_block *sb, int error)
1841{ 1912{
@@ -1843,14 +1914,19 @@ static int do_journal_release(struct reiserfs_transaction_handle *th,
1843 int flushed = 0; 1914 int flushed = 0;
1844 struct reiserfs_journal *journal = SB_JOURNAL(sb); 1915 struct reiserfs_journal *journal = SB_JOURNAL(sb);
1845 1916
1846 /* we only want to flush out transactions if we were called with error == 0 1917 /*
1918 * we only want to flush out transactions if we were
1919 * called with error == 0
1847 */ 1920 */
1848 if (!error && !(sb->s_flags & MS_RDONLY)) { 1921 if (!error && !(sb->s_flags & MS_RDONLY)) {
1849 /* end the current trans */ 1922 /* end the current trans */
1850 BUG_ON(!th->t_trans_id); 1923 BUG_ON(!th->t_trans_id);
1851 do_journal_end(th, sb, 10, FLUSH_ALL); 1924 do_journal_end(th, sb, 10, FLUSH_ALL);
1852 1925
1853 /* make sure something gets logged to force our way into the flush code */ 1926 /*
1927 * make sure something gets logged to force
1928 * our way into the flush code
1929 */
1854 if (!journal_join(&myth, sb, 1)) { 1930 if (!journal_join(&myth, sb, 1)) {
1855 reiserfs_prepare_for_journal(sb, 1931 reiserfs_prepare_for_journal(sb,
1856 SB_BUFFER_WITH_SB(sb), 1932 SB_BUFFER_WITH_SB(sb),
@@ -1894,25 +1970,24 @@ static int do_journal_release(struct reiserfs_transaction_handle *th,
1894 return 0; 1970 return 0;
1895} 1971}
1896 1972
1897/* 1973/* * call on unmount. flush all journal trans, release all alloc'd ram */
1898** call on unmount. flush all journal trans, release all alloc'd ram
1899*/
1900int journal_release(struct reiserfs_transaction_handle *th, 1974int journal_release(struct reiserfs_transaction_handle *th,
1901 struct super_block *sb) 1975 struct super_block *sb)
1902{ 1976{
1903 return do_journal_release(th, sb, 0); 1977 return do_journal_release(th, sb, 0);
1904} 1978}
1905 1979
1906/* 1980/* only call from an error condition inside reiserfs_read_super! */
1907** only call from an error condition inside reiserfs_read_super!
1908*/
1909int journal_release_error(struct reiserfs_transaction_handle *th, 1981int journal_release_error(struct reiserfs_transaction_handle *th,
1910 struct super_block *sb) 1982 struct super_block *sb)
1911{ 1983{
1912 return do_journal_release(th, sb, 1); 1984 return do_journal_release(th, sb, 1);
1913} 1985}
1914 1986
1915/* compares description block with commit block. returns 1 if they differ, 0 if they are the same */ 1987/*
1988 * compares description block with commit block.
1989 * returns 1 if they differ, 0 if they are the same
1990 */
1916static int journal_compare_desc_commit(struct super_block *sb, 1991static int journal_compare_desc_commit(struct super_block *sb,
1917 struct reiserfs_journal_desc *desc, 1992 struct reiserfs_journal_desc *desc,
1918 struct reiserfs_journal_commit *commit) 1993 struct reiserfs_journal_commit *commit)
@@ -1926,11 +2001,12 @@ static int journal_compare_desc_commit(struct super_block *sb,
1926 return 0; 2001 return 0;
1927} 2002}
1928 2003
1929/* returns 0 if it did not find a description block 2004/*
1930** returns -1 if it found a corrupt commit block 2005 * returns 0 if it did not find a description block
1931** returns 1 if both desc and commit were valid 2006 * returns -1 if it found a corrupt commit block
1932** NOTE: only called during fs mount 2007 * returns 1 if both desc and commit were valid
1933*/ 2008 * NOTE: only called during fs mount
2009 */
1934static int journal_transaction_is_valid(struct super_block *sb, 2010static int journal_transaction_is_valid(struct super_block *sb,
1935 struct buffer_head *d_bh, 2011 struct buffer_head *d_bh,
1936 unsigned int *oldest_invalid_trans_id, 2012 unsigned int *oldest_invalid_trans_id,
@@ -1976,7 +2052,10 @@ static int journal_transaction_is_valid(struct super_block *sb,
1976 } 2052 }
1977 offset = d_bh->b_blocknr - SB_ONDISK_JOURNAL_1st_BLOCK(sb); 2053 offset = d_bh->b_blocknr - SB_ONDISK_JOURNAL_1st_BLOCK(sb);
1978 2054
1979 /* ok, we have a journal description block, lets see if the transaction was valid */ 2055 /*
2056 * ok, we have a journal description block,
2057 * let's see if the transaction was valid
2058 */
1980 c_bh = 2059 c_bh =
1981 journal_bread(sb, 2060 journal_bread(sb,
1982 SB_ONDISK_JOURNAL_1st_BLOCK(sb) + 2061 SB_ONDISK_JOURNAL_1st_BLOCK(sb) +
@@ -2028,11 +2107,11 @@ static void brelse_array(struct buffer_head **heads, int num)
2028} 2107}
2029 2108
2030/* 2109/*
2031** given the start, and values for the oldest acceptable transactions, 2110 * given the start, and values for the oldest acceptable transactions,
2032** this either reads in a replays a transaction, or returns because the 2111 * this either reads in a replays a transaction, or returns because the
2033** transaction is invalid, or too old. 2112 * transaction is invalid, or too old.
2034** NOTE: only called during fs mount 2113 * NOTE: only called during fs mount
2035*/ 2114 */
2036static int journal_read_transaction(struct super_block *sb, 2115static int journal_read_transaction(struct super_block *sb,
2037 unsigned long cur_dblock, 2116 unsigned long cur_dblock,
2038 unsigned long oldest_start, 2117 unsigned long oldest_start,
@@ -2106,7 +2185,10 @@ static int journal_read_transaction(struct super_block *sb,
2106 } 2185 }
2107 2186
2108 trans_id = get_desc_trans_id(desc); 2187 trans_id = get_desc_trans_id(desc);
2109 /* now we know we've got a good transaction, and it was inside the valid time ranges */ 2188 /*
2189 * now we know we've got a good transaction, and it was
2190 * inside the valid time ranges
2191 */
2110 log_blocks = kmalloc(get_desc_trans_len(desc) * 2192 log_blocks = kmalloc(get_desc_trans_len(desc) *
2111 sizeof(struct buffer_head *), GFP_NOFS); 2193 sizeof(struct buffer_head *), GFP_NOFS);
2112 real_blocks = kmalloc(get_desc_trans_len(desc) * 2194 real_blocks = kmalloc(get_desc_trans_len(desc) *
@@ -2213,7 +2295,10 @@ static int journal_read_transaction(struct super_block *sb,
2213 "journal-1095: setting journal " "start to offset %ld", 2295 "journal-1095: setting journal " "start to offset %ld",
2214 cur_dblock - SB_ONDISK_JOURNAL_1st_BLOCK(sb)); 2296 cur_dblock - SB_ONDISK_JOURNAL_1st_BLOCK(sb));
2215 2297
2216 /* init starting values for the first transaction, in case this is the last transaction to be replayed. */ 2298 /*
2299 * init starting values for the first transaction, in case
2300 * this is the last transaction to be replayed.
2301 */
2217 journal->j_start = cur_dblock - SB_ONDISK_JOURNAL_1st_BLOCK(sb); 2302 journal->j_start = cur_dblock - SB_ONDISK_JOURNAL_1st_BLOCK(sb);
2218 journal->j_last_flush_trans_id = trans_id; 2303 journal->j_last_flush_trans_id = trans_id;
2219 journal->j_trans_id = trans_id + 1; 2304 journal->j_trans_id = trans_id + 1;
@@ -2227,12 +2312,14 @@ static int journal_read_transaction(struct super_block *sb,
2227 return 0; 2312 return 0;
2228} 2313}
2229 2314
2230/* This function reads blocks starting from block and to max_block of bufsize 2315/*
2231 size (but no more than BUFNR blocks at a time). This proved to improve 2316 * This function reads blocks starting from block and to max_block of bufsize
2232 mounting speed on self-rebuilding raid5 arrays at least. 2317 * size (but no more than BUFNR blocks at a time). This proved to improve
2233 Right now it is only used from journal code. But later we might use it 2318 * mounting speed on self-rebuilding raid5 arrays at least.
2234 from other places. 2319 * Right now it is only used from journal code. But later we might use it
2235 Note: Do not use journal_getblk/sb_getblk functions here! */ 2320 * from other places.
2321 * Note: Do not use journal_getblk/sb_getblk functions here!
2322 */
2236static struct buffer_head *reiserfs_breada(struct block_device *dev, 2323static struct buffer_head *reiserfs_breada(struct block_device *dev,
2237 b_blocknr_t block, int bufsize, 2324 b_blocknr_t block, int bufsize,
2238 b_blocknr_t max_block) 2325 b_blocknr_t max_block)
@@ -2271,15 +2358,17 @@ static struct buffer_head *reiserfs_breada(struct block_device *dev,
2271} 2358}
2272 2359
2273/* 2360/*
2274** read and replay the log 2361 * read and replay the log
2275** on a clean unmount, the journal header's next unflushed pointer will 2362 * on a clean unmount, the journal header's next unflushed pointer will be
2276** be to an invalid transaction. This tests that before finding all the 2363 * to an invalid transaction. This tests that before finding all the
2277** transactions in the log, which makes normal mount times fast. 2364 * transactions in the log, which makes normal mount times fast.
2278** After a crash, this starts with the next unflushed transaction, and 2365 *
2279** replays until it finds one too old, or invalid. 2366 * After a crash, this starts with the next unflushed transaction, and
2280** On exit, it sets things up so the first transaction will work correctly. 2367 * replays until it finds one too old, or invalid.
2281** NOTE: only called during fs mount 2368 *
2282*/ 2369 * On exit, it sets things up so the first transaction will work correctly.
2370 * NOTE: only called during fs mount
2371 */
2283static int journal_read(struct super_block *sb) 2372static int journal_read(struct super_block *sb)
2284{ 2373{
2285 struct reiserfs_journal *journal = SB_JOURNAL(sb); 2374 struct reiserfs_journal *journal = SB_JOURNAL(sb);
@@ -2303,9 +2392,10 @@ static int journal_read(struct super_block *sb)
2303 bdevname(journal->j_dev_bd, b)); 2392 bdevname(journal->j_dev_bd, b));
2304 start = get_seconds(); 2393 start = get_seconds();
2305 2394
2306 /* step 1, read in the journal header block. Check the transaction it says 2395 /*
2307 ** is the first unflushed, and if that transaction is not valid, 2396 * step 1, read in the journal header block. Check the transaction
2308 ** replay is done 2397 * it says is the first unflushed, and if that transaction is not
2398 * valid, replay is done
2309 */ 2399 */
2310 journal->j_header_bh = journal_bread(sb, 2400 journal->j_header_bh = journal_bread(sb,
2311 SB_ONDISK_JOURNAL_1st_BLOCK(sb) 2401 SB_ONDISK_JOURNAL_1st_BLOCK(sb)
@@ -2329,9 +2419,10 @@ static int journal_read(struct super_block *sb)
2329 le32_to_cpu(jh->j_last_flush_trans_id)); 2419 le32_to_cpu(jh->j_last_flush_trans_id));
2330 valid_journal_header = 1; 2420 valid_journal_header = 1;
2331 2421
2332 /* now, we try to read the first unflushed offset. If it is not valid, 2422 /*
2333 ** there is nothing more we can do, and it makes no sense to read 2423 * now, we try to read the first unflushed offset. If it
2334 ** through the whole log. 2424 * is not valid, there is nothing more we can do, and it
2425 * makes no sense to read through the whole log.
2335 */ 2426 */
2336 d_bh = 2427 d_bh =
2337 journal_bread(sb, 2428 journal_bread(sb,
@@ -2345,15 +2436,19 @@ static int journal_read(struct super_block *sb)
2345 goto start_log_replay; 2436 goto start_log_replay;
2346 } 2437 }
2347 2438
2348 /* ok, there are transactions that need to be replayed. start with the first log block, find 2439 /*
2349 ** all the valid transactions, and pick out the oldest. 2440 * ok, there are transactions that need to be replayed. start
2441 * with the first log block, find all the valid transactions, and
2442 * pick out the oldest.
2350 */ 2443 */
2351 while (continue_replay 2444 while (continue_replay
2352 && cur_dblock < 2445 && cur_dblock <
2353 (SB_ONDISK_JOURNAL_1st_BLOCK(sb) + 2446 (SB_ONDISK_JOURNAL_1st_BLOCK(sb) +
2354 SB_ONDISK_JOURNAL_SIZE(sb))) { 2447 SB_ONDISK_JOURNAL_SIZE(sb))) {
2355 /* Note that it is required for blocksize of primary fs device and journal 2448 /*
2356 device to be the same */ 2449 * Note that it is required for blocksize of primary fs
2450 * device and journal device to be the same
2451 */
2357 d_bh = 2452 d_bh =
2358 reiserfs_breada(journal->j_dev_bd, cur_dblock, 2453 reiserfs_breada(journal->j_dev_bd, cur_dblock,
2359 sb->s_blocksize, 2454 sb->s_blocksize,
@@ -2431,9 +2526,11 @@ static int journal_read(struct super_block *sb)
2431 reiserfs_debug(sb, REISERFS_DEBUG_CODE, 2526 reiserfs_debug(sb, REISERFS_DEBUG_CODE,
2432 "journal-1225: No valid " "transactions found"); 2527 "journal-1225: No valid " "transactions found");
2433 } 2528 }
2434 /* j_start does not get set correctly if we don't replay any transactions. 2529 /*
2435 ** if we had a valid journal_header, set j_start to the first unflushed transaction value, 2530 * j_start does not get set correctly if we don't replay any
2436 ** copy the trans_id from the header 2531 * transactions. if we had a valid journal_header, set j_start
2532 * to the first unflushed transaction value, copy the trans_id
2533 * from the header
2437 */ 2534 */
2438 if (valid_journal_header && replay_count == 0) { 2535 if (valid_journal_header && replay_count == 0) {
2439 journal->j_start = le32_to_cpu(jh->j_first_unflushed_offset); 2536 journal->j_start = le32_to_cpu(jh->j_first_unflushed_offset);
@@ -2462,8 +2559,9 @@ static int journal_read(struct super_block *sb)
2462 _update_journal_header_block(sb, journal->j_start, 2559 _update_journal_header_block(sb, journal->j_start,
2463 journal->j_last_flush_trans_id)) { 2560 journal->j_last_flush_trans_id)) {
2464 reiserfs_write_unlock(sb); 2561 reiserfs_write_unlock(sb);
2465 /* replay failed, caller must call free_journal_ram and abort 2562 /*
2466 ** the mount 2563 * replay failed, caller must call free_journal_ram and abort
2564 * the mount
2467 */ 2565 */
2468 return -1; 2566 return -1;
2469 } 2567 }
@@ -2556,7 +2654,7 @@ static int journal_init_dev(struct super_block *super,
2556 return 0; 2654 return 0;
2557} 2655}
2558 2656
2559/** 2657/*
2560 * When creating/tuning a file system user can assign some 2658 * When creating/tuning a file system user can assign some
2561 * journal params within boundaries which depend on the ratio 2659 * journal params within boundaries which depend on the ratio
2562 * blocksize/standard_blocksize. 2660 * blocksize/standard_blocksize.
@@ -2574,8 +2672,7 @@ static int check_advise_trans_params(struct super_block *sb,
2574 struct reiserfs_journal *journal) 2672 struct reiserfs_journal *journal)
2575{ 2673{
2576 if (journal->j_trans_max) { 2674 if (journal->j_trans_max) {
2577 /* Non-default journal params. 2675 /* Non-default journal params. Do sanity check for them. */
2578 Do sanity check for them. */
2579 int ratio = 1; 2676 int ratio = 1;
2580 if (sb->s_blocksize < REISERFS_STANDARD_BLKSIZE) 2677 if (sb->s_blocksize < REISERFS_STANDARD_BLKSIZE)
2581 ratio = REISERFS_STANDARD_BLKSIZE / sb->s_blocksize; 2678 ratio = REISERFS_STANDARD_BLKSIZE / sb->s_blocksize;
@@ -2597,10 +2694,12 @@ static int check_advise_trans_params(struct super_block *sb,
2597 return 1; 2694 return 1;
2598 } 2695 }
2599 } else { 2696 } else {
2600 /* Default journal params. 2697 /*
2601 The file system was created by old version 2698 * Default journal params.
2602 of mkreiserfs, so some fields contain zeros, 2699 * The file system was created by old version
2603 and we need to advise proper values for them */ 2700 * of mkreiserfs, so some fields contain zeros,
2701 * and we need to advise proper values for them
2702 */
2604 if (sb->s_blocksize != REISERFS_STANDARD_BLKSIZE) { 2703 if (sb->s_blocksize != REISERFS_STANDARD_BLKSIZE) {
2605 reiserfs_warning(sb, "sh-464", "bad blocksize (%u)", 2704 reiserfs_warning(sb, "sh-464", "bad blocksize (%u)",
2606 sb->s_blocksize); 2705 sb->s_blocksize);
@@ -2613,9 +2712,7 @@ static int check_advise_trans_params(struct super_block *sb,
2613 return 0; 2712 return 0;
2614} 2713}
2615 2714
2616/* 2715/* must be called once on fs mount. calls journal_read for you */
2617** must be called once on fs mount. calls journal_read for you
2618*/
2619int journal_init(struct super_block *sb, const char *j_dev_name, 2716int journal_init(struct super_block *sb, const char *j_dev_name,
2620 int old_format, unsigned int commit_max_age) 2717 int old_format, unsigned int commit_max_age)
2621{ 2718{
@@ -2654,8 +2751,10 @@ int journal_init(struct super_block *sb, const char *j_dev_name,
2654 REISERFS_DISK_OFFSET_IN_BYTES / 2751 REISERFS_DISK_OFFSET_IN_BYTES /
2655 sb->s_blocksize + 2); 2752 sb->s_blocksize + 2);
2656 2753
2657 /* Sanity check to see is the standard journal fitting within first bitmap 2754 /*
2658 (actual for small blocksizes) */ 2755 * Sanity check to see is the standard journal fitting
2756 * within first bitmap (actual for small blocksizes)
2757 */
2659 if (!SB_ONDISK_JOURNAL_DEVICE(sb) && 2758 if (!SB_ONDISK_JOURNAL_DEVICE(sb) &&
2660 (SB_JOURNAL_1st_RESERVED_BLOCK(sb) + 2759 (SB_JOURNAL_1st_RESERVED_BLOCK(sb) +
2661 SB_ONDISK_JOURNAL_SIZE(sb) > sb->s_blocksize * 8)) { 2760 SB_ONDISK_JOURNAL_SIZE(sb) > sb->s_blocksize * 8)) {
@@ -2803,10 +2902,10 @@ int journal_init(struct super_block *sb, const char *j_dev_name,
2803} 2902}
2804 2903
2805/* 2904/*
2806** test for a polite end of the current transaction. Used by file_write, and should 2905 * test for a polite end of the current transaction. Used by file_write,
2807** be used by delete to make sure they don't write more than can fit inside a single 2906 * and should be used by delete to make sure they don't write more than
2808** transaction 2907 * can fit inside a single transaction
2809*/ 2908 */
2810int journal_transaction_should_end(struct reiserfs_transaction_handle *th, 2909int journal_transaction_should_end(struct reiserfs_transaction_handle *th,
2811 int new_alloc) 2910 int new_alloc)
2812{ 2911{
@@ -2829,8 +2928,7 @@ int journal_transaction_should_end(struct reiserfs_transaction_handle *th,
2829 return 0; 2928 return 0;
2830} 2929}
2831 2930
2832/* this must be called inside a transaction 2931/* this must be called inside a transaction */
2833*/
2834void reiserfs_block_writes(struct reiserfs_transaction_handle *th) 2932void reiserfs_block_writes(struct reiserfs_transaction_handle *th)
2835{ 2933{
2836 struct reiserfs_journal *journal = SB_JOURNAL(th->t_super); 2934 struct reiserfs_journal *journal = SB_JOURNAL(th->t_super);
@@ -2840,8 +2938,7 @@ void reiserfs_block_writes(struct reiserfs_transaction_handle *th)
2840 return; 2938 return;
2841} 2939}
2842 2940
2843/* this must be called without a transaction started 2941/* this must be called without a transaction started */
2844*/
2845void reiserfs_allow_writes(struct super_block *s) 2942void reiserfs_allow_writes(struct super_block *s)
2846{ 2943{
2847 struct reiserfs_journal *journal = SB_JOURNAL(s); 2944 struct reiserfs_journal *journal = SB_JOURNAL(s);
@@ -2849,8 +2946,7 @@ void reiserfs_allow_writes(struct super_block *s)
2849 wake_up(&journal->j_join_wait); 2946 wake_up(&journal->j_join_wait);
2850} 2947}
2851 2948
2852/* this must be called without a transaction started 2949/* this must be called without a transaction started */
2853*/
2854void reiserfs_wait_on_write_block(struct super_block *s) 2950void reiserfs_wait_on_write_block(struct super_block *s)
2855{ 2951{
2856 struct reiserfs_journal *journal = SB_JOURNAL(s); 2952 struct reiserfs_journal *journal = SB_JOURNAL(s);
@@ -2912,11 +3008,12 @@ static void let_transaction_grow(struct super_block *sb, unsigned int trans_id)
2912 } 3008 }
2913} 3009}
2914 3010
2915/* join == true if you must join an existing transaction. 3011/*
2916** join == false if you can deal with waiting for others to finish 3012 * join == true if you must join an existing transaction.
2917** 3013 * join == false if you can deal with waiting for others to finish
2918** this will block until the transaction is joinable. send the number of blocks you 3014 *
2919** expect to use in nblocks. 3015 * this will block until the transaction is joinable. send the number of
3016 * blocks you expect to use in nblocks.
2920*/ 3017*/
2921static int do_journal_begin_r(struct reiserfs_transaction_handle *th, 3018static int do_journal_begin_r(struct reiserfs_transaction_handle *th,
2922 struct super_block *sb, unsigned long nblocks, 3019 struct super_block *sb, unsigned long nblocks,
@@ -2957,9 +3054,11 @@ static int do_journal_begin_r(struct reiserfs_transaction_handle *th,
2957 } 3054 }
2958 now = get_seconds(); 3055 now = get_seconds();
2959 3056
2960 /* if there is no room in the journal OR 3057 /*
2961 ** if this transaction is too old, and we weren't called joinable, wait for it to finish before beginning 3058 * if there is no room in the journal OR
2962 ** we don't sleep if there aren't other writers 3059 * if this transaction is too old, and we weren't called joinable,
3060 * wait for it to finish before beginning we don't sleep if there
3061 * aren't other writers
2963 */ 3062 */
2964 3063
2965 if ((!join && journal->j_must_wait > 0) || 3064 if ((!join && journal->j_must_wait > 0) ||
@@ -2973,7 +3072,8 @@ static int do_journal_begin_r(struct reiserfs_transaction_handle *th,
2973 || (!join && journal->j_cnode_free < (journal->j_trans_max * 3))) { 3072 || (!join && journal->j_cnode_free < (journal->j_trans_max * 3))) {
2974 3073
2975 old_trans_id = journal->j_trans_id; 3074 old_trans_id = journal->j_trans_id;
2976 unlock_journal(sb); /* allow others to finish this transaction */ 3075 /* allow others to finish this transaction */
3076 unlock_journal(sb);
2977 3077
2978 if (!join && (journal->j_len_alloc + nblocks + 2) >= 3078 if (!join && (journal->j_len_alloc + nblocks + 2) >=
2979 journal->j_max_batch && 3079 journal->j_max_batch &&
@@ -2985,8 +3085,9 @@ static int do_journal_begin_r(struct reiserfs_transaction_handle *th,
2985 goto relock; 3085 goto relock;
2986 } 3086 }
2987 } 3087 }
2988 /* don't mess with joining the transaction if all we have to do is 3088 /*
2989 * wait for someone else to do a commit 3089 * don't mess with joining the transaction if all we
3090 * have to do is wait for someone else to do a commit
2990 */ 3091 */
2991 if (atomic_read(&journal->j_jlock)) { 3092 if (atomic_read(&journal->j_jlock)) {
2992 while (journal->j_trans_id == old_trans_id && 3093 while (journal->j_trans_id == old_trans_id &&
@@ -3027,9 +3128,11 @@ static int do_journal_begin_r(struct reiserfs_transaction_handle *th,
3027 3128
3028 out_fail: 3129 out_fail:
3029 memset(th, 0, sizeof(*th)); 3130 memset(th, 0, sizeof(*th));
3030 /* Re-set th->t_super, so we can properly keep track of how many 3131 /*
3132 * Re-set th->t_super, so we can properly keep track of how many
3031 * persistent transactions there are. We need to do this so if this 3133 * persistent transactions there are. We need to do this so if this
3032 * call is part of a failed restart_transaction, we can free it later */ 3134 * call is part of a failed restart_transaction, we can free it later
3135 */
3033 th->t_super = sb; 3136 th->t_super = sb;
3034 return retval; 3137 return retval;
3035} 3138}
@@ -3042,14 +3145,15 @@ struct reiserfs_transaction_handle *reiserfs_persistent_transaction(struct
3042 int ret; 3145 int ret;
3043 struct reiserfs_transaction_handle *th; 3146 struct reiserfs_transaction_handle *th;
3044 3147
3045 /* if we're nesting into an existing transaction. It will be 3148 /*
3046 ** persistent on its own 3149 * if we're nesting into an existing transaction. It will be
3150 * persistent on its own
3047 */ 3151 */
3048 if (reiserfs_transaction_running(s)) { 3152 if (reiserfs_transaction_running(s)) {
3049 th = current->journal_info; 3153 th = current->journal_info;
3050 th->t_refcount++; 3154 th->t_refcount++;
3051 BUG_ON(th->t_refcount < 2); 3155 BUG_ON(th->t_refcount < 2);
3052 3156
3053 return th; 3157 return th;
3054 } 3158 }
3055 th = kmalloc(sizeof(struct reiserfs_transaction_handle), GFP_NOFS); 3159 th = kmalloc(sizeof(struct reiserfs_transaction_handle), GFP_NOFS);
@@ -3085,8 +3189,9 @@ static int journal_join(struct reiserfs_transaction_handle *th,
3085{ 3189{
3086 struct reiserfs_transaction_handle *cur_th = current->journal_info; 3190 struct reiserfs_transaction_handle *cur_th = current->journal_info;
3087 3191
3088 /* this keeps do_journal_end from NULLing out the current->journal_info 3192 /*
3089 ** pointer 3193 * this keeps do_journal_end from NULLing out the
3194 * current->journal_info pointer
3090 */ 3195 */
3091 th->t_handle_save = cur_th; 3196 th->t_handle_save = cur_th;
3092 BUG_ON(cur_th && cur_th->t_refcount > 1); 3197 BUG_ON(cur_th && cur_th->t_refcount > 1);
@@ -3098,8 +3203,9 @@ int journal_join_abort(struct reiserfs_transaction_handle *th,
3098{ 3203{
3099 struct reiserfs_transaction_handle *cur_th = current->journal_info; 3204 struct reiserfs_transaction_handle *cur_th = current->journal_info;
3100 3205
3101 /* this keeps do_journal_end from NULLing out the current->journal_info 3206 /*
3102 ** pointer 3207 * this keeps do_journal_end from NULLing out the
3208 * current->journal_info pointer
3103 */ 3209 */
3104 th->t_handle_save = cur_th; 3210 th->t_handle_save = cur_th;
3105 BUG_ON(cur_th && cur_th->t_refcount > 1); 3211 BUG_ON(cur_th && cur_th->t_refcount > 1);
@@ -3125,9 +3231,10 @@ int journal_begin(struct reiserfs_transaction_handle *th,
3125 "journal_info != 0"); 3231 "journal_info != 0");
3126 return 0; 3232 return 0;
3127 } else { 3233 } else {
3128 /* we've ended up with a handle from a different filesystem. 3234 /*
3129 ** save it and restore on journal_end. This should never 3235 * we've ended up with a handle from a different
3130 ** really happen... 3236 * filesystem. save it and restore on journal_end.
3237 * This should never really happen...
3131 */ 3238 */
3132 reiserfs_warning(sb, "clm-2100", 3239 reiserfs_warning(sb, "clm-2100",
3133 "nesting info a different FS"); 3240 "nesting info a different FS");
@@ -3140,9 +3247,10 @@ int journal_begin(struct reiserfs_transaction_handle *th,
3140 ret = do_journal_begin_r(th, sb, nblocks, JBEGIN_REG); 3247 ret = do_journal_begin_r(th, sb, nblocks, JBEGIN_REG);
3141 BUG_ON(current->journal_info != th); 3248 BUG_ON(current->journal_info != th);
3142 3249
3143 /* I guess this boils down to being the reciprocal of clm-2100 above. 3250 /*
3144 * If do_journal_begin_r fails, we need to put it back, since journal_end 3251 * I guess this boils down to being the reciprocal of clm-2100 above.
3145 * won't be called to do it. */ 3252 * If do_journal_begin_r fails, we need to put it back, since
3253 * journal_end won't be called to do it. */
3146 if (ret) 3254 if (ret)
3147 current->journal_info = th->t_handle_save; 3255 current->journal_info = th->t_handle_save;
3148 else 3256 else
@@ -3152,14 +3260,15 @@ int journal_begin(struct reiserfs_transaction_handle *th,
3152} 3260}
3153 3261
3154/* 3262/*
3155** puts bh into the current transaction. If it was already there, reorders removes the 3263 * puts bh into the current transaction. If it was already there, reorders
3156** old pointers from the hash, and puts new ones in (to make sure replay happen in the right order). 3264 * removes the old pointers from the hash, and puts new ones in (to make
3157** 3265 * sure replay happen in the right order).
3158** if it was dirty, cleans and files onto the clean list. I can't let it be dirty again until the 3266 *
3159** transaction is committed. 3267 * if it was dirty, cleans and files onto the clean list. I can't let it
3160** 3268 * be dirty again until the transaction is committed.
3161** if j_len, is bigger than j_len_alloc, it pushes j_len_alloc to 10 + j_len. 3269 *
3162*/ 3270 * if j_len, is bigger than j_len_alloc, it pushes j_len_alloc to 10 + j_len.
3271 */
3163int journal_mark_dirty(struct reiserfs_transaction_handle *th, 3272int journal_mark_dirty(struct reiserfs_transaction_handle *th,
3164 struct super_block *sb, struct buffer_head *bh) 3273 struct super_block *sb, struct buffer_head *bh)
3165{ 3274{
@@ -3184,9 +3293,10 @@ int journal_mark_dirty(struct reiserfs_transaction_handle *th,
3184 return 0; 3293 return 0;
3185 } 3294 }
3186 3295
3187 /* this must be turned into a panic instead of a warning. We can't allow 3296 /*
3188 ** a dirty or journal_dirty or locked buffer to be logged, as some changes 3297 * this must be turned into a panic instead of a warning. We can't
3189 ** could get to disk too early. NOT GOOD. 3298 * allow a dirty or journal_dirty or locked buffer to be logged, as
3299 * some changes could get to disk too early. NOT GOOD.
3190 */ 3300 */
3191 if (!prepared || buffer_dirty(bh)) { 3301 if (!prepared || buffer_dirty(bh)) {
3192 reiserfs_warning(sb, "journal-1777", 3302 reiserfs_warning(sb, "journal-1777",
@@ -3205,8 +3315,10 @@ int journal_mark_dirty(struct reiserfs_transaction_handle *th,
3205 atomic_read(&(journal->j_wcount))); 3315 atomic_read(&(journal->j_wcount)));
3206 return 1; 3316 return 1;
3207 } 3317 }
3208 /* this error means I've screwed up, and we've overflowed the transaction. 3318 /*
3209 ** Nothing can be done here, except make the FS readonly or panic. 3319 * this error means I've screwed up, and we've overflowed
3320 * the transaction. Nothing can be done here, except make the
3321 * FS readonly or panic.
3210 */ 3322 */
3211 if (journal->j_len >= journal->j_trans_max) { 3323 if (journal->j_len >= journal->j_trans_max) {
3212 reiserfs_panic(th->t_super, "journal-1413", 3324 reiserfs_panic(th->t_super, "journal-1413",
@@ -3280,8 +3392,9 @@ int journal_end(struct reiserfs_transaction_handle *th,
3280 struct reiserfs_transaction_handle *cur_th = 3392 struct reiserfs_transaction_handle *cur_th =
3281 current->journal_info; 3393 current->journal_info;
3282 3394
3283 /* we aren't allowed to close a nested transaction on a different 3395 /*
3284 ** filesystem from the one in the task struct 3396 * we aren't allowed to close a nested transaction on a
3397 * different filesystem from the one in the task struct
3285 */ 3398 */
3286 BUG_ON(cur_th->t_super != th->t_super); 3399 BUG_ON(cur_th->t_super != th->t_super);
3287 3400
@@ -3295,13 +3408,14 @@ int journal_end(struct reiserfs_transaction_handle *th,
3295 } 3408 }
3296} 3409}
3297 3410
3298/* removes from the current transaction, relsing and descrementing any counters. 3411/*
3299** also files the removed buffer directly onto the clean list 3412 * removes from the current transaction, relsing and descrementing any counters.
3300** 3413 * also files the removed buffer directly onto the clean list
3301** called by journal_mark_freed when a block has been deleted 3414 *
3302** 3415 * called by journal_mark_freed when a block has been deleted
3303** returns 1 if it cleaned and relsed the buffer. 0 otherwise 3416 *
3304*/ 3417 * returns 1 if it cleaned and relsed the buffer. 0 otherwise
3418 */
3305static int remove_from_transaction(struct super_block *sb, 3419static int remove_from_transaction(struct super_block *sb,
3306 b_blocknr_t blocknr, int already_cleaned) 3420 b_blocknr_t blocknr, int already_cleaned)
3307{ 3421{
@@ -3350,15 +3464,16 @@ static int remove_from_transaction(struct super_block *sb,
3350} 3464}
3351 3465
3352/* 3466/*
3353** for any cnode in a journal list, it can only be dirtied of all the 3467 * for any cnode in a journal list, it can only be dirtied of all the
3354** transactions that include it are committed to disk. 3468 * transactions that include it are committed to disk.
3355** this checks through each transaction, and returns 1 if you are allowed to dirty, 3469 * this checks through each transaction, and returns 1 if you are allowed
3356** and 0 if you aren't 3470 * to dirty, and 0 if you aren't
3357** 3471 *
3358** it is called by dirty_journal_list, which is called after flush_commit_list has gotten all the log 3472 * it is called by dirty_journal_list, which is called after
3359** blocks for a given transaction on disk 3473 * flush_commit_list has gotten all the log blocks for a given
3360** 3474 * transaction on disk
3361*/ 3475 *
3476 */
3362static int can_dirty(struct reiserfs_journal_cnode *cn) 3477static int can_dirty(struct reiserfs_journal_cnode *cn)
3363{ 3478{
3364 struct super_block *sb = cn->sb; 3479 struct super_block *sb = cn->sb;
@@ -3366,9 +3481,10 @@ static int can_dirty(struct reiserfs_journal_cnode *cn)
3366 struct reiserfs_journal_cnode *cur = cn->hprev; 3481 struct reiserfs_journal_cnode *cur = cn->hprev;
3367 int can_dirty = 1; 3482 int can_dirty = 1;
3368 3483
3369 /* first test hprev. These are all newer than cn, so any node here 3484 /*
3370 ** with the same block number and dev means this node can't be sent 3485 * first test hprev. These are all newer than cn, so any node here
3371 ** to disk right now. 3486 * with the same block number and dev means this node can't be sent
3487 * to disk right now.
3372 */ 3488 */
3373 while (cur && can_dirty) { 3489 while (cur && can_dirty) {
3374 if (cur->jlist && cur->bh && cur->blocknr && cur->sb == sb && 3490 if (cur->jlist && cur->bh && cur->blocknr && cur->sb == sb &&
@@ -3377,8 +3493,9 @@ static int can_dirty(struct reiserfs_journal_cnode *cn)
3377 } 3493 }
3378 cur = cur->hprev; 3494 cur = cur->hprev;
3379 } 3495 }
3380 /* then test hnext. These are all older than cn. As long as they 3496 /*
3381 ** are committed to the log, it is safe to write cn to disk 3497 * then test hnext. These are all older than cn. As long as they
3498 * are committed to the log, it is safe to write cn to disk
3382 */ 3499 */
3383 cur = cn->hnext; 3500 cur = cn->hnext;
3384 while (cur && can_dirty) { 3501 while (cur && can_dirty) {
@@ -3392,9 +3509,10 @@ static int can_dirty(struct reiserfs_journal_cnode *cn)
3392 return can_dirty; 3509 return can_dirty;
3393} 3510}
3394 3511
3395/* syncs the commit blocks, but does not force the real buffers to disk 3512/*
3396** will wait until the current transaction is done/committed before returning 3513 * syncs the commit blocks, but does not force the real buffers to disk
3397*/ 3514 * will wait until the current transaction is done/committed before returning
3515 */
3398int journal_end_sync(struct reiserfs_transaction_handle *th, 3516int journal_end_sync(struct reiserfs_transaction_handle *th,
3399 struct super_block *sb, unsigned long nblocks) 3517 struct super_block *sb, unsigned long nblocks)
3400{ 3518{
@@ -3411,9 +3529,7 @@ int journal_end_sync(struct reiserfs_transaction_handle *th,
3411 return do_journal_end(th, sb, nblocks, COMMIT_NOW | WAIT); 3529 return do_journal_end(th, sb, nblocks, COMMIT_NOW | WAIT);
3412} 3530}
3413 3531
3414/* 3532/* writeback the pending async commits to disk */
3415** writeback the pending async commits to disk
3416*/
3417static void flush_async_commits(struct work_struct *work) 3533static void flush_async_commits(struct work_struct *work)
3418{ 3534{
3419 struct reiserfs_journal *journal = 3535 struct reiserfs_journal *journal =
@@ -3433,9 +3549,9 @@ static void flush_async_commits(struct work_struct *work)
3433} 3549}
3434 3550
3435/* 3551/*
3436** flushes any old transactions to disk 3552 * flushes any old transactions to disk
3437** ends the current transaction if it is too old 3553 * ends the current transaction if it is too old
3438*/ 3554 */
3439void reiserfs_flush_old_commits(struct super_block *sb) 3555void reiserfs_flush_old_commits(struct super_block *sb)
3440{ 3556{
3441 time_t now; 3557 time_t now;
@@ -3443,13 +3559,15 @@ void reiserfs_flush_old_commits(struct super_block *sb)
3443 struct reiserfs_journal *journal = SB_JOURNAL(sb); 3559 struct reiserfs_journal *journal = SB_JOURNAL(sb);
3444 3560
3445 now = get_seconds(); 3561 now = get_seconds();
3446 /* safety check so we don't flush while we are replaying the log during 3562 /*
3563 * safety check so we don't flush while we are replaying the log during
3447 * mount 3564 * mount
3448 */ 3565 */
3449 if (list_empty(&journal->j_journal_list)) 3566 if (list_empty(&journal->j_journal_list))
3450 return; 3567 return;
3451 3568
3452 /* check the current transaction. If there are no writers, and it is 3569 /*
3570 * check the current transaction. If there are no writers, and it is
3453 * too old, finish it, and force the commit blocks to disk 3571 * too old, finish it, and force the commit blocks to disk
3454 */ 3572 */
3455 if (atomic_read(&journal->j_wcount) <= 0 && 3573 if (atomic_read(&journal->j_wcount) <= 0 &&
@@ -3463,8 +3581,10 @@ void reiserfs_flush_old_commits(struct super_block *sb)
3463 journal_mark_dirty(&th, sb, 3581 journal_mark_dirty(&th, sb,
3464 SB_BUFFER_WITH_SB(sb)); 3582 SB_BUFFER_WITH_SB(sb));
3465 3583
3466 /* we're only being called from kreiserfsd, it makes no sense to do 3584 /*
3467 ** an async commit so that kreiserfsd can do it later 3585 * we're only being called from kreiserfsd, it makes
3586 * no sense to do an async commit so that kreiserfsd
3587 * can do it later
3468 */ 3588 */
3469 do_journal_end(&th, sb, 1, COMMIT_NOW | WAIT); 3589 do_journal_end(&th, sb, 1, COMMIT_NOW | WAIT);
3470 } 3590 }
@@ -3472,16 +3592,20 @@ void reiserfs_flush_old_commits(struct super_block *sb)
3472} 3592}
3473 3593
3474/* 3594/*
3475** returns 0 if do_journal_end should return right away, returns 1 if do_journal_end should finish the commit 3595 * returns 0 if do_journal_end should return right away, returns 1 if
3476** 3596 * do_journal_end should finish the commit
3477** if the current transaction is too old, but still has writers, this will wait on j_join_wait until all 3597 *
3478** the writers are done. By the time it wakes up, the transaction it was called has already ended, so it just 3598 * if the current transaction is too old, but still has writers, this will
3479** flushes the commit list and returns 0. 3599 * wait on j_join_wait until all the writers are done. By the time it
3480** 3600 * wakes up, the transaction it was called has already ended, so it just
3481** Won't batch when flush or commit_now is set. Also won't batch when others are waiting on j_join_wait. 3601 * flushes the commit list and returns 0.
3482** 3602 *
3483** Note, we can't allow the journal_end to proceed while there are still writers in the log. 3603 * Won't batch when flush or commit_now is set. Also won't batch when
3484*/ 3604 * others are waiting on j_join_wait.
3605 *
3606 * Note, we can't allow the journal_end to proceed while there are still
3607 * writers in the log.
3608 */
3485static int check_journal_end(struct reiserfs_transaction_handle *th, 3609static int check_journal_end(struct reiserfs_transaction_handle *th,
3486 struct super_block *sb, unsigned long nblocks, 3610 struct super_block *sb, unsigned long nblocks,
3487 int flags) 3611 int flags)
@@ -3503,21 +3627,25 @@ static int check_journal_end(struct reiserfs_transaction_handle *th,
3503 } 3627 }
3504 3628
3505 journal->j_len_alloc -= (th->t_blocks_allocated - th->t_blocks_logged); 3629 journal->j_len_alloc -= (th->t_blocks_allocated - th->t_blocks_logged);
3506 if (atomic_read(&(journal->j_wcount)) > 0) { /* <= 0 is allowed. unmounting might not call begin */ 3630 /* <= 0 is allowed. unmounting might not call begin */
3631 if (atomic_read(&(journal->j_wcount)) > 0)
3507 atomic_dec(&(journal->j_wcount)); 3632 atomic_dec(&(journal->j_wcount));
3508 }
3509 3633
3510 /* BUG, deal with case where j_len is 0, but people previously freed blocks need to be released 3634 /*
3511 ** will be dealt with by next transaction that actually writes something, but should be taken 3635 * BUG, deal with case where j_len is 0, but people previously
3512 ** care of in this trans 3636 * freed blocks need to be released will be dealt with by next
3637 * transaction that actually writes something, but should be taken
3638 * care of in this trans
3513 */ 3639 */
3514 BUG_ON(journal->j_len == 0); 3640 BUG_ON(journal->j_len == 0);
3515 3641
3516 /* if wcount > 0, and we are called to with flush or commit_now, 3642 /*
3517 ** we wait on j_join_wait. We will wake up when the last writer has 3643 * if wcount > 0, and we are called to with flush or commit_now,
3518 ** finished the transaction, and started it on its way to the disk. 3644 * we wait on j_join_wait. We will wake up when the last writer has
3519 ** Then, we flush the commit or journal list, and just return 0 3645 * finished the transaction, and started it on its way to the disk.
3520 ** because the rest of journal end was already done for this transaction. 3646 * Then, we flush the commit or journal list, and just return 0
3647 * because the rest of journal end was already done for this
3648 * transaction.
3521 */ 3649 */
3522 if (atomic_read(&(journal->j_wcount)) > 0) { 3650 if (atomic_read(&(journal->j_wcount)) > 0) {
3523 if (flush || commit_now) { 3651 if (flush || commit_now) {
@@ -3533,7 +3661,10 @@ static int check_journal_end(struct reiserfs_transaction_handle *th,
3533 } 3661 }
3534 unlock_journal(sb); 3662 unlock_journal(sb);
3535 3663
3536 /* sleep while the current transaction is still j_jlocked */ 3664 /*
3665 * sleep while the current transaction is
3666 * still j_jlocked
3667 */
3537 while (journal->j_trans_id == trans_id) { 3668 while (journal->j_trans_id == trans_id) {
3538 if (atomic_read(&journal->j_jlock)) { 3669 if (atomic_read(&journal->j_jlock)) {
3539 queue_log_writer(sb); 3670 queue_log_writer(sb);
@@ -3547,7 +3678,7 @@ static int check_journal_end(struct reiserfs_transaction_handle *th,
3547 } 3678 }
3548 } 3679 }
3549 BUG_ON(journal->j_trans_id == trans_id); 3680 BUG_ON(journal->j_trans_id == trans_id);
3550 3681
3551 if (commit_now 3682 if (commit_now
3552 && journal_list_still_alive(sb, trans_id) 3683 && journal_list_still_alive(sb, trans_id)
3553 && wait_on_commit) { 3684 && wait_on_commit) {
@@ -3585,19 +3716,22 @@ static int check_journal_end(struct reiserfs_transaction_handle *th,
3585} 3716}
3586 3717
3587/* 3718/*
3588** Does all the work that makes deleting blocks safe. 3719 * Does all the work that makes deleting blocks safe.
3589** when deleting a block mark BH_JNew, just remove it from the current transaction, clean it's buffer_head and move on. 3720 * when deleting a block mark BH_JNew, just remove it from the current
3590** 3721 * transaction, clean it's buffer_head and move on.
3591** otherwise: 3722 *
3592** set a bit for the block in the journal bitmap. That will prevent it from being allocated for unformatted nodes 3723 * otherwise:
3593** before this transaction has finished. 3724 * set a bit for the block in the journal bitmap. That will prevent it from
3594** 3725 * being allocated for unformatted nodes before this transaction has finished.
3595** mark any cnodes for this block as BLOCK_FREED, and clear their bh pointers. That will prevent any old transactions with 3726 *
3596** this block from trying to flush to the real location. Since we aren't removing the cnode from the journal_list_hash, 3727 * mark any cnodes for this block as BLOCK_FREED, and clear their bh pointers.
3597** the block can't be reallocated yet. 3728 * That will prevent any old transactions with this block from trying to flush
3598** 3729 * to the real location. Since we aren't removing the cnode from the
3599** Then remove it from the current transaction, decrementing any counters and filing it on the clean list. 3730 * journal_list_hash, *the block can't be reallocated yet.
3600*/ 3731 *
3732 * Then remove it from the current transaction, decrementing any counters and
3733 * filing it on the clean list.
3734 */
3601int journal_mark_freed(struct reiserfs_transaction_handle *th, 3735int journal_mark_freed(struct reiserfs_transaction_handle *th,
3602 struct super_block *sb, b_blocknr_t blocknr) 3736 struct super_block *sb, b_blocknr_t blocknr)
3603{ 3737{
@@ -3620,7 +3754,10 @@ int journal_mark_freed(struct reiserfs_transaction_handle *th,
3620 reiserfs_clean_and_file_buffer(bh); 3754 reiserfs_clean_and_file_buffer(bh);
3621 cleaned = remove_from_transaction(sb, blocknr, cleaned); 3755 cleaned = remove_from_transaction(sb, blocknr, cleaned);
3622 } else { 3756 } else {
3623 /* set the bit for this block in the journal bitmap for this transaction */ 3757 /*
3758 * set the bit for this block in the journal bitmap
3759 * for this transaction
3760 */
3624 jb = journal->j_current_jl->j_list_bitmap; 3761 jb = journal->j_current_jl->j_list_bitmap;
3625 if (!jb) { 3762 if (!jb) {
3626 reiserfs_panic(sb, "journal-1702", 3763 reiserfs_panic(sb, "journal-1702",
@@ -3636,17 +3773,22 @@ int journal_mark_freed(struct reiserfs_transaction_handle *th,
3636 } 3773 }
3637 cleaned = remove_from_transaction(sb, blocknr, cleaned); 3774 cleaned = remove_from_transaction(sb, blocknr, cleaned);
3638 3775
3639 /* find all older transactions with this block, make sure they don't try to write it out */ 3776 /*
3777 * find all older transactions with this block,
3778 * make sure they don't try to write it out
3779 */
3640 cn = get_journal_hash_dev(sb, journal->j_list_hash_table, 3780 cn = get_journal_hash_dev(sb, journal->j_list_hash_table,
3641 blocknr); 3781 blocknr);
3642 while (cn) { 3782 while (cn) {
3643 if (sb == cn->sb && blocknr == cn->blocknr) { 3783 if (sb == cn->sb && blocknr == cn->blocknr) {
3644 set_bit(BLOCK_FREED, &cn->state); 3784 set_bit(BLOCK_FREED, &cn->state);
3645 if (cn->bh) { 3785 if (cn->bh) {
3786 /*
3787 * remove_from_transaction will brelse
3788 * the buffer if it was in the current
3789 * trans
3790 */
3646 if (!cleaned) { 3791 if (!cleaned) {
3647 /* remove_from_transaction will brelse the buffer if it was
3648 ** in the current trans
3649 */
3650 clear_buffer_journal_dirty(cn-> 3792 clear_buffer_journal_dirty(cn->
3651 bh); 3793 bh);
3652 clear_buffer_dirty(cn->bh); 3794 clear_buffer_dirty(cn->bh);
@@ -3661,7 +3803,11 @@ int journal_mark_freed(struct reiserfs_transaction_handle *th,
3661 "cn->bh->b_count < 0"); 3803 "cn->bh->b_count < 0");
3662 } 3804 }
3663 } 3805 }
3664 if (cn->jlist) { /* since we are clearing the bh, we MUST dec nonzerolen */ 3806 /*
3807 * since we are clearing the bh,
3808 * we MUST dec nonzerolen
3809 */
3810 if (cn->jlist) {
3665 atomic_dec(& 3811 atomic_dec(&
3666 (cn->jlist-> 3812 (cn->jlist->
3667 j_nonzerolen)); 3813 j_nonzerolen));
@@ -3697,10 +3843,16 @@ static int __commit_trans_jl(struct inode *inode, unsigned long id,
3697 struct reiserfs_journal *journal = SB_JOURNAL(sb); 3843 struct reiserfs_journal *journal = SB_JOURNAL(sb);
3698 int ret = 0; 3844 int ret = 0;
3699 3845
3700 /* is it from the current transaction, or from an unknown transaction? */ 3846 /*
3847 * is it from the current transaction,
3848 * or from an unknown transaction?
3849 */
3701 if (id == journal->j_trans_id) { 3850 if (id == journal->j_trans_id) {
3702 jl = journal->j_current_jl; 3851 jl = journal->j_current_jl;
3703 /* try to let other writers come in and grow this transaction */ 3852 /*
3853 * try to let other writers come in and
3854 * grow this transaction
3855 */
3704 let_transaction_grow(sb, id); 3856 let_transaction_grow(sb, id);
3705 if (journal->j_trans_id != id) { 3857 if (journal->j_trans_id != id) {
3706 goto flush_commit_only; 3858 goto flush_commit_only;
@@ -3724,7 +3876,8 @@ static int __commit_trans_jl(struct inode *inode, unsigned long id,
3724 ret = 1; 3876 ret = 1;
3725 3877
3726 } else { 3878 } else {
3727 /* this gets tricky, we have to make sure the journal list in 3879 /*
3880 * this gets tricky, we have to make sure the journal list in
3728 * the inode still exists. We know the list is still around 3881 * the inode still exists. We know the list is still around
3729 * if we've got a larger transaction id than the oldest list 3882 * if we've got a larger transaction id than the oldest list
3730 */ 3883 */
@@ -3751,7 +3904,8 @@ int reiserfs_commit_for_inode(struct inode *inode)
3751 unsigned int id = REISERFS_I(inode)->i_trans_id; 3904 unsigned int id = REISERFS_I(inode)->i_trans_id;
3752 struct reiserfs_journal_list *jl = REISERFS_I(inode)->i_jl; 3905 struct reiserfs_journal_list *jl = REISERFS_I(inode)->i_jl;
3753 3906
3754 /* for the whole inode, assume unset id means it was 3907 /*
3908 * for the whole inode, assume unset id means it was
3755 * changed in the current transaction. More conservative 3909 * changed in the current transaction. More conservative
3756 */ 3910 */
3757 if (!id || !jl) { 3911 if (!id || !jl) {
@@ -3789,12 +3943,11 @@ void reiserfs_restore_prepared_buffer(struct super_block *sb,
3789 3943
3790extern struct tree_balance *cur_tb; 3944extern struct tree_balance *cur_tb;
3791/* 3945/*
3792** before we can change a metadata block, we have to make sure it won't 3946 * before we can change a metadata block, we have to make sure it won't
3793** be written to disk while we are altering it. So, we must: 3947 * be written to disk while we are altering it. So, we must:
3794** clean it 3948 * clean it
3795** wait on it. 3949 * wait on it.
3796** 3950 */
3797*/
3798int reiserfs_prepare_for_journal(struct super_block *sb, 3951int reiserfs_prepare_for_journal(struct super_block *sb,
3799 struct buffer_head *bh, int wait) 3952 struct buffer_head *bh, int wait)
3800{ 3953{
@@ -3815,15 +3968,15 @@ int reiserfs_prepare_for_journal(struct super_block *sb,
3815} 3968}
3816 3969
3817/* 3970/*
3818** long and ugly. If flush, will not return until all commit 3971 * long and ugly. If flush, will not return until all commit
3819** blocks and all real buffers in the trans are on disk. 3972 * blocks and all real buffers in the trans are on disk.
3820** If no_async, won't return until all commit blocks are on disk. 3973 * If no_async, won't return until all commit blocks are on disk.
3821** 3974 *
3822** keep reading, there are comments as you go along 3975 * keep reading, there are comments as you go along
3823** 3976 *
3824** If the journal is aborted, we just clean up. Things like flushing 3977 * If the journal is aborted, we just clean up. Things like flushing
3825** journal lists, etc just won't happen. 3978 * journal lists, etc just won't happen.
3826*/ 3979 */
3827static int do_journal_end(struct reiserfs_transaction_handle *th, 3980static int do_journal_end(struct reiserfs_transaction_handle *th,
3828 struct super_block *sb, unsigned long nblocks, 3981 struct super_block *sb, unsigned long nblocks,
3829 int flags) 3982 int flags)
@@ -3850,8 +4003,10 @@ static int do_journal_end(struct reiserfs_transaction_handle *th,
3850 BUG_ON(th->t_refcount > 1); 4003 BUG_ON(th->t_refcount > 1);
3851 BUG_ON(!th->t_trans_id); 4004 BUG_ON(!th->t_trans_id);
3852 4005
3853 /* protect flush_older_commits from doing mistakes if the 4006 /*
3854 transaction ID counter gets overflowed. */ 4007 * protect flush_older_commits from doing mistakes if the
4008 * transaction ID counter gets overflowed.
4009 */
3855 if (th->t_trans_id == ~0U) 4010 if (th->t_trans_id == ~0U)
3856 flags |= FLUSH_ALL | COMMIT_NOW | WAIT; 4011 flags |= FLUSH_ALL | COMMIT_NOW | WAIT;
3857 flush = flags & FLUSH_ALL; 4012 flush = flags & FLUSH_ALL;
@@ -3875,8 +4030,10 @@ static int do_journal_end(struct reiserfs_transaction_handle *th,
3875 wait_on_commit = 1; 4030 wait_on_commit = 1;
3876 } 4031 }
3877 4032
3878 /* check_journal_end locks the journal, and unlocks if it does not return 1 4033 /*
3879 ** it tells us if we should continue with the journal_end, or just return 4034 * check_journal_end locks the journal, and unlocks if it does
4035 * not return 1 it tells us if we should continue with the
4036 * journal_end, or just return
3880 */ 4037 */
3881 if (!check_journal_end(th, sb, nblocks, flags)) { 4038 if (!check_journal_end(th, sb, nblocks, flags)) {
3882 reiserfs_schedule_old_flush(sb); 4039 reiserfs_schedule_old_flush(sb);
@@ -3891,19 +4048,23 @@ static int do_journal_end(struct reiserfs_transaction_handle *th,
3891 } 4048 }
3892 4049
3893 /* 4050 /*
3894 ** j must wait means we have to flush the log blocks, and the real blocks for 4051 * j must wait means we have to flush the log blocks, and the
3895 ** this transaction 4052 * real blocks for this transaction
3896 */ 4053 */
3897 if (journal->j_must_wait > 0) { 4054 if (journal->j_must_wait > 0) {
3898 flush = 1; 4055 flush = 1;
3899 } 4056 }
3900#ifdef REISERFS_PREALLOCATE 4057#ifdef REISERFS_PREALLOCATE
3901 /* quota ops might need to nest, setup the journal_info pointer for them 4058 /*
3902 * and raise the refcount so that it is > 0. */ 4059 * quota ops might need to nest, setup the journal_info pointer
4060 * for them and raise the refcount so that it is > 0.
4061 */
3903 current->journal_info = th; 4062 current->journal_info = th;
3904 th->t_refcount++; 4063 th->t_refcount++;
3905 reiserfs_discard_all_prealloc(th); /* it should not involve new blocks into 4064
3906 * the transaction */ 4065 /* it should not involve new blocks into the transaction */
4066 reiserfs_discard_all_prealloc(th);
4067
3907 th->t_refcount--; 4068 th->t_refcount--;
3908 current->journal_info = th->t_handle_save; 4069 current->journal_info = th->t_handle_save;
3909#endif 4070#endif
@@ -3919,7 +4080,10 @@ static int do_journal_end(struct reiserfs_transaction_handle *th,
3919 memcpy(get_journal_desc_magic(d_bh), JOURNAL_DESC_MAGIC, 8); 4080 memcpy(get_journal_desc_magic(d_bh), JOURNAL_DESC_MAGIC, 8);
3920 set_desc_trans_id(desc, journal->j_trans_id); 4081 set_desc_trans_id(desc, journal->j_trans_id);
3921 4082
3922 /* setup commit block. Don't write (keep it clean too) this one until after everyone else is written */ 4083 /*
4084 * setup commit block. Don't write (keep it clean too) this one
4085 * until after everyone else is written
4086 */
3923 c_bh = journal_getblk(sb, SB_ONDISK_JOURNAL_1st_BLOCK(sb) + 4087 c_bh = journal_getblk(sb, SB_ONDISK_JOURNAL_1st_BLOCK(sb) +
3924 ((journal->j_start + journal->j_len + 4088 ((journal->j_start + journal->j_len +
3925 1) % SB_ONDISK_JOURNAL_SIZE(sb))); 4089 1) % SB_ONDISK_JOURNAL_SIZE(sb)));
@@ -3931,7 +4095,8 @@ static int do_journal_end(struct reiserfs_transaction_handle *th,
3931 /* init this journal list */ 4095 /* init this journal list */
3932 jl = journal->j_current_jl; 4096 jl = journal->j_current_jl;
3933 4097
3934 /* we lock the commit before doing anything because 4098 /*
4099 * we lock the commit before doing anything because
3935 * we want to make sure nobody tries to run flush_commit_list until 4100 * we want to make sure nobody tries to run flush_commit_list until
3936 * the new transaction is fully setup, and we've already flushed the 4101 * the new transaction is fully setup, and we've already flushed the
3937 * ordered bh list 4102 * ordered bh list
@@ -3951,9 +4116,10 @@ static int do_journal_end(struct reiserfs_transaction_handle *th,
3951 atomic_set(&jl->j_commit_left, journal->j_len + 2); 4116 atomic_set(&jl->j_commit_left, journal->j_len + 2);
3952 jl->j_realblock = NULL; 4117 jl->j_realblock = NULL;
3953 4118
3954 /* The ENTIRE FOR LOOP MUST not cause schedule to occur. 4119 /*
3955 ** for each real block, add it to the journal list hash, 4120 * The ENTIRE FOR LOOP MUST not cause schedule to occur.
3956 ** copy into real block index array in the commit or desc block 4121 * for each real block, add it to the journal list hash,
4122 * copy into real block index array in the commit or desc block
3957 */ 4123 */
3958 trans_half = journal_trans_half(sb->s_blocksize); 4124 trans_half = journal_trans_half(sb->s_blocksize);
3959 for (i = 0, cn = journal->j_first; cn; cn = cn->next, i++) { 4125 for (i = 0, cn = journal->j_first; cn; cn = cn->next, i++) {
@@ -3972,9 +4138,10 @@ static int do_journal_end(struct reiserfs_transaction_handle *th,
3972 last_cn->next = jl_cn; 4138 last_cn->next = jl_cn;
3973 } 4139 }
3974 last_cn = jl_cn; 4140 last_cn = jl_cn;
3975 /* make sure the block we are trying to log is not a block 4141 /*
3976 of journal or reserved area */ 4142 * make sure the block we are trying to log
3977 4143 * is not a block of journal or reserved area
4144 */
3978 if (is_block_in_log_or_reserved_area 4145 if (is_block_in_log_or_reserved_area
3979 (sb, cn->bh->b_blocknr)) { 4146 (sb, cn->bh->b_blocknr)) {
3980 reiserfs_panic(sb, "journal-2332", 4147 reiserfs_panic(sb, "journal-2332",
@@ -4004,19 +4171,26 @@ static int do_journal_end(struct reiserfs_transaction_handle *th,
4004 set_desc_trans_id(desc, journal->j_trans_id); 4171 set_desc_trans_id(desc, journal->j_trans_id);
4005 set_commit_trans_len(commit, journal->j_len); 4172 set_commit_trans_len(commit, journal->j_len);
4006 4173
4007 /* special check in case all buffers in the journal were marked for not logging */ 4174 /*
4175 * special check in case all buffers in the journal
4176 * were marked for not logging
4177 */
4008 BUG_ON(journal->j_len == 0); 4178 BUG_ON(journal->j_len == 0);
4009 4179
4010 /* we're about to dirty all the log blocks, mark the description block 4180 /*
4181 * we're about to dirty all the log blocks, mark the description block
4011 * dirty now too. Don't mark the commit block dirty until all the 4182 * dirty now too. Don't mark the commit block dirty until all the
4012 * others are on disk 4183 * others are on disk
4013 */ 4184 */
4014 mark_buffer_dirty(d_bh); 4185 mark_buffer_dirty(d_bh);
4015 4186
4016 /* first data block is j_start + 1, so add one to cur_write_start wherever you use it */ 4187 /*
4188 * first data block is j_start + 1, so add one to
4189 * cur_write_start wherever you use it
4190 */
4017 cur_write_start = journal->j_start; 4191 cur_write_start = journal->j_start;
4018 cn = journal->j_first; 4192 cn = journal->j_first;
4019 jindex = 1; /* start at one so we don't get the desc again */ 4193 jindex = 1; /* start at one so we don't get the desc again */
4020 while (cn) { 4194 while (cn) {
4021 clear_buffer_journal_new(cn->bh); 4195 clear_buffer_journal_new(cn->bh);
4022 /* copy all the real blocks into log area. dirty log blocks */ 4196 /* copy all the real blocks into log area. dirty log blocks */
@@ -4042,7 +4216,10 @@ static int do_journal_end(struct reiserfs_transaction_handle *th,
4042 set_buffer_journal_dirty(cn->bh); 4216 set_buffer_journal_dirty(cn->bh);
4043 clear_buffer_journaled(cn->bh); 4217 clear_buffer_journaled(cn->bh);
4044 } else { 4218 } else {
4045 /* JDirty cleared sometime during transaction. don't log this one */ 4219 /*
4220 * JDirty cleared sometime during transaction.
4221 * don't log this one
4222 */
4046 reiserfs_warning(sb, "journal-2048", 4223 reiserfs_warning(sb, "journal-2048",
4047 "BAD, buffer in journal hash, " 4224 "BAD, buffer in journal hash, "
4048 "but not JDirty!"); 4225 "but not JDirty!");
@@ -4054,9 +4231,10 @@ static int do_journal_end(struct reiserfs_transaction_handle *th,
4054 reiserfs_cond_resched(sb); 4231 reiserfs_cond_resched(sb);
4055 } 4232 }
4056 4233
4057 /* we are done with both the c_bh and d_bh, but 4234 /*
4058 ** c_bh must be written after all other commit blocks, 4235 * we are done with both the c_bh and d_bh, but
4059 ** so we dirty/relse c_bh in flush_commit_list, with commit_left <= 1. 4236 * c_bh must be written after all other commit blocks,
4237 * so we dirty/relse c_bh in flush_commit_list, with commit_left <= 1.
4060 */ 4238 */
4061 4239
4062 journal->j_current_jl = alloc_journal_list(sb); 4240 journal->j_current_jl = alloc_journal_list(sb);
@@ -4087,15 +4265,18 @@ static int do_journal_end(struct reiserfs_transaction_handle *th,
4087 journal->j_next_async_flush = 0; 4265 journal->j_next_async_flush = 0;
4088 init_journal_hash(sb); 4266 init_journal_hash(sb);
4089 4267
4090 // make sure reiserfs_add_jh sees the new current_jl before we 4268 /*
4091 // write out the tails 4269 * make sure reiserfs_add_jh sees the new current_jl before we
4270 * write out the tails
4271 */
4092 smp_mb(); 4272 smp_mb();
4093 4273
4094 /* tail conversion targets have to hit the disk before we end the 4274 /*
4275 * tail conversion targets have to hit the disk before we end the
4095 * transaction. Otherwise a later transaction might repack the tail 4276 * transaction. Otherwise a later transaction might repack the tail
4096 * before this transaction commits, leaving the data block unflushed and 4277 * before this transaction commits, leaving the data block unflushed
4097 * clean, if we crash before the later transaction commits, the data block 4278 * and clean, if we crash before the later transaction commits, the
4098 * is lost. 4279 * data block is lost.
4099 */ 4280 */
4100 if (!list_empty(&jl->j_tail_bh_list)) { 4281 if (!list_empty(&jl->j_tail_bh_list)) {
4101 depth = reiserfs_write_unlock_nested(sb); 4282 depth = reiserfs_write_unlock_nested(sb);
@@ -4106,12 +4287,13 @@ static int do_journal_end(struct reiserfs_transaction_handle *th,
4106 BUG_ON(!list_empty(&jl->j_tail_bh_list)); 4287 BUG_ON(!list_empty(&jl->j_tail_bh_list));
4107 mutex_unlock(&jl->j_commit_mutex); 4288 mutex_unlock(&jl->j_commit_mutex);
4108 4289
4109 /* honor the flush wishes from the caller, simple commits can 4290 /*
4110 ** be done outside the journal lock, they are done below 4291 * honor the flush wishes from the caller, simple commits can
4111 ** 4292 * be done outside the journal lock, they are done below
4112 ** if we don't flush the commit list right now, we put it into 4293 *
4113 ** the work queue so the people waiting on the async progress work 4294 * if we don't flush the commit list right now, we put it into
4114 ** queue don't wait for this proc to flush journal lists and such. 4295 * the work queue so the people waiting on the async progress work
4296 * queue don't wait for this proc to flush journal lists and such.
4115 */ 4297 */
4116 if (flush) { 4298 if (flush) {
4117 flush_commit_list(sb, jl, 1); 4299 flush_commit_list(sb, jl, 1);
@@ -4120,9 +4302,10 @@ static int do_journal_end(struct reiserfs_transaction_handle *th,
4120 queue_delayed_work(REISERFS_SB(sb)->commit_wq, 4302 queue_delayed_work(REISERFS_SB(sb)->commit_wq,
4121 &journal->j_work, HZ / 10); 4303 &journal->j_work, HZ / 10);
4122 4304
4123 /* if the next transaction has any chance of wrapping, flush 4305 /*
4124 ** transactions that might get overwritten. If any journal lists are very 4306 * if the next transaction has any chance of wrapping, flush
4125 ** old flush them as well. 4307 * transactions that might get overwritten. If any journal lists
4308 * are very old flush them as well.
4126 */ 4309 */
4127 first_jl: 4310 first_jl:
4128 list_for_each_safe(entry, safe, &journal->j_journal_list) { 4311 list_for_each_safe(entry, safe, &journal->j_journal_list) {
@@ -4135,8 +4318,10 @@ static int do_journal_end(struct reiserfs_transaction_handle *th,
4135 } else if ((journal->j_start + 4318 } else if ((journal->j_start +
4136 journal->j_trans_max + 1) < 4319 journal->j_trans_max + 1) <
4137 SB_ONDISK_JOURNAL_SIZE(sb)) { 4320 SB_ONDISK_JOURNAL_SIZE(sb)) {
4138 /* if we don't cross into the next transaction and we don't 4321 /*
4139 * wrap, there is no way we can overlap any later transactions 4322 * if we don't cross into the next
4323 * transaction and we don't wrap, there is
4324 * no way we can overlap any later transactions
4140 * break now 4325 * break now
4141 */ 4326 */
4142 break; 4327 break;
@@ -4150,10 +4335,12 @@ static int do_journal_end(struct reiserfs_transaction_handle *th,
4150 flush_used_journal_lists(sb, temp_jl); 4335 flush_used_journal_lists(sb, temp_jl);
4151 goto first_jl; 4336 goto first_jl;
4152 } else { 4337 } else {
4153 /* we don't overlap anything from out start to the end of the 4338 /*
4154 * log, and our wrapped portion doesn't overlap anything at 4339 * we don't overlap anything from out start
4155 * the start of the log. We can break 4340 * to the end of the log, and our wrapped
4156 */ 4341 * portion doesn't overlap anything at
4342 * the start of the log. We can break
4343 */
4157 break; 4344 break;
4158 } 4345 }
4159 } 4346 }
@@ -4181,9 +4368,11 @@ static int do_journal_end(struct reiserfs_transaction_handle *th,
4181 reiserfs_check_lock_depth(sb, "journal end2"); 4368 reiserfs_check_lock_depth(sb, "journal end2");
4182 4369
4183 memset(th, 0, sizeof(*th)); 4370 memset(th, 0, sizeof(*th));
4184 /* Re-set th->t_super, so we can properly keep track of how many 4371 /*
4372 * Re-set th->t_super, so we can properly keep track of how many
4185 * persistent transactions there are. We need to do this so if this 4373 * persistent transactions there are. We need to do this so if this
4186 * call is part of a failed restart_transaction, we can free it later */ 4374 * call is part of a failed restart_transaction, we can free it later
4375 */
4187 th->t_super = sb; 4376 th->t_super = sb;
4188 4377
4189 return journal->j_errno; 4378 return journal->j_errno;
diff --git a/fs/reiserfs/lbalance.c b/fs/reiserfs/lbalance.c
index b46399d98f84..d48a9e7507a1 100644
--- a/fs/reiserfs/lbalance.c
+++ b/fs/reiserfs/lbalance.c
@@ -8,28 +8,21 @@
8#include "reiserfs.h" 8#include "reiserfs.h"
9#include <linux/buffer_head.h> 9#include <linux/buffer_head.h>
10 10
11/* these are used in do_balance.c */ 11/*
12 12 * copy copy_count entries from source directory item to dest buffer
13/* leaf_move_items 13 * (creating new item if needed)
14 leaf_shift_left 14 */
15 leaf_shift_right
16 leaf_delete_items
17 leaf_insert_into_buf
18 leaf_paste_in_buffer
19 leaf_cut_from_buffer
20 leaf_paste_entries
21 */
22
23/* copy copy_count entries from source directory item to dest buffer (creating new item if needed) */
24static void leaf_copy_dir_entries(struct buffer_info *dest_bi, 15static void leaf_copy_dir_entries(struct buffer_info *dest_bi,
25 struct buffer_head *source, int last_first, 16 struct buffer_head *source, int last_first,
26 int item_num, int from, int copy_count) 17 int item_num, int from, int copy_count)
27{ 18{
28 struct buffer_head *dest = dest_bi->bi_bh; 19 struct buffer_head *dest = dest_bi->bi_bh;
29 int item_num_in_dest; /* either the number of target item, 20 /*
30 or if we must create a new item, 21 * either the number of target item, or if we must create a
31 the number of the item we will 22 * new item, the number of the item we will create it next to
32 create it next to */ 23 */
24 int item_num_in_dest;
25
33 struct item_head *ih; 26 struct item_head *ih;
34 struct reiserfs_de_head *deh; 27 struct reiserfs_de_head *deh;
35 int copy_records_len; /* length of all records in item to be copied */ 28 int copy_records_len; /* length of all records in item to be copied */
@@ -39,7 +32,10 @@ static void leaf_copy_dir_entries(struct buffer_info *dest_bi,
39 32
40 RFALSE(!is_direntry_le_ih(ih), "vs-10000: item must be directory item"); 33 RFALSE(!is_direntry_le_ih(ih), "vs-10000: item must be directory item");
41 34
42 /* length of all record to be copied and first byte of the last of them */ 35 /*
36 * length of all record to be copied and first byte of
37 * the last of them
38 */
43 deh = B_I_DEH(source, ih); 39 deh = B_I_DEH(source, ih);
44 if (copy_count) { 40 if (copy_count) {
45 copy_records_len = (from ? deh_location(&(deh[from - 1])) : 41 copy_records_len = (from ? deh_location(&(deh[from - 1])) :
@@ -59,7 +55,10 @@ static void leaf_copy_dir_entries(struct buffer_info *dest_bi,
59 LAST_TO_FIRST) ? ((B_NR_ITEMS(dest)) ? 0 : -1) : (B_NR_ITEMS(dest) 55 LAST_TO_FIRST) ? ((B_NR_ITEMS(dest)) ? 0 : -1) : (B_NR_ITEMS(dest)
60 - 1); 56 - 1);
61 57
62 /* if there are no items in dest or the first/last item in dest is not item of the same directory */ 58 /*
59 * if there are no items in dest or the first/last item in
60 * dest is not item of the same directory
61 */
63 if ((item_num_in_dest == -1) || 62 if ((item_num_in_dest == -1) ||
64 (last_first == FIRST_TO_LAST && le_ih_k_offset(ih) == DOT_OFFSET) || 63 (last_first == FIRST_TO_LAST && le_ih_k_offset(ih) == DOT_OFFSET) ||
65 (last_first == LAST_TO_FIRST 64 (last_first == LAST_TO_FIRST
@@ -83,11 +82,17 @@ static void leaf_copy_dir_entries(struct buffer_info *dest_bi,
83 if (from < ih_entry_count(ih)) { 82 if (from < ih_entry_count(ih)) {
84 set_le_ih_k_offset(&new_ih, 83 set_le_ih_k_offset(&new_ih,
85 deh_offset(&(deh[from]))); 84 deh_offset(&(deh[from])));
86 /*memcpy (&new_ih.ih_key.k_offset, &deh[from].deh_offset, SHORT_KEY_SIZE); */
87 } else { 85 } else {
88 /* no entries will be copied to this item in this function */ 86 /*
87 * no entries will be copied to this
88 * item in this function
89 */
89 set_le_ih_k_offset(&new_ih, U32_MAX); 90 set_le_ih_k_offset(&new_ih, U32_MAX);
90 /* this item is not yet valid, but we want I_IS_DIRECTORY_ITEM to return 1 for it, so we -1 */ 91 /*
92 * this item is not yet valid, but we
93 * want I_IS_DIRECTORY_ITEM to return 1
94 * for it, so we -1
95 */
91 } 96 }
92 set_le_key_k_type(KEY_FORMAT_3_5, &(new_ih.ih_key), 97 set_le_key_k_type(KEY_FORMAT_3_5, &(new_ih.ih_key),
93 TYPE_DIRENTRY); 98 TYPE_DIRENTRY);
@@ -119,30 +124,38 @@ static void leaf_copy_dir_entries(struct buffer_info *dest_bi,
119 DEH_SIZE * copy_count + copy_records_len); 124 DEH_SIZE * copy_count + copy_records_len);
120} 125}
121 126
122/* Copy the first (if last_first == FIRST_TO_LAST) or last (last_first == LAST_TO_FIRST) item or 127/*
123 part of it or nothing (see the return 0 below) from SOURCE to the end 128 * Copy the first (if last_first == FIRST_TO_LAST) or last
124 (if last_first) or beginning (!last_first) of the DEST */ 129 * (last_first == LAST_TO_FIRST) item or part of it or nothing
130 * (see the return 0 below) from SOURCE to the end (if last_first)
131 * or beginning (!last_first) of the DEST
132 */
125/* returns 1 if anything was copied, else 0 */ 133/* returns 1 if anything was copied, else 0 */
126static int leaf_copy_boundary_item(struct buffer_info *dest_bi, 134static int leaf_copy_boundary_item(struct buffer_info *dest_bi,
127 struct buffer_head *src, int last_first, 135 struct buffer_head *src, int last_first,
128 int bytes_or_entries) 136 int bytes_or_entries)
129{ 137{
130 struct buffer_head *dest = dest_bi->bi_bh; 138 struct buffer_head *dest = dest_bi->bi_bh;
131 int dest_nr_item, src_nr_item; /* number of items in the source and destination buffers */ 139 /* number of items in the source and destination buffers */
140 int dest_nr_item, src_nr_item;
132 struct item_head *ih; 141 struct item_head *ih;
133 struct item_head *dih; 142 struct item_head *dih;
134 143
135 dest_nr_item = B_NR_ITEMS(dest); 144 dest_nr_item = B_NR_ITEMS(dest);
136 145
146 /*
147 * if ( DEST is empty or first item of SOURCE and last item of
148 * DEST are the items of different objects or of different types )
149 * then there is no need to treat this item differently from the
150 * other items that we copy, so we return
151 */
137 if (last_first == FIRST_TO_LAST) { 152 if (last_first == FIRST_TO_LAST) {
138 /* if ( DEST is empty or first item of SOURCE and last item of DEST are the items of different objects
139 or of different types ) then there is no need to treat this item differently from the other items
140 that we copy, so we return */
141 ih = item_head(src, 0); 153 ih = item_head(src, 0);
142 dih = item_head(dest, dest_nr_item - 1); 154 dih = item_head(dest, dest_nr_item - 1);
155
156 /* there is nothing to merge */
143 if (!dest_nr_item 157 if (!dest_nr_item
144 || (!op_is_left_mergeable(&(ih->ih_key), src->b_size))) 158 || (!op_is_left_mergeable(&(ih->ih_key), src->b_size)))
145 /* there is nothing to merge */
146 return 0; 159 return 0;
147 160
148 RFALSE(!ih_item_len(ih), 161 RFALSE(!ih_item_len(ih),
@@ -157,8 +170,11 @@ static int leaf_copy_boundary_item(struct buffer_info *dest_bi,
157 return 1; 170 return 1;
158 } 171 }
159 172
160 /* copy part of the body of the first item of SOURCE to the end of the body of the last item of the DEST 173 /*
161 part defined by 'bytes_or_entries'; if bytes_or_entries == -1 copy whole body; don't create new item header 174 * copy part of the body of the first item of SOURCE
175 * to the end of the body of the last item of the DEST
176 * part defined by 'bytes_or_entries'; if bytes_or_entries
177 * == -1 copy whole body; don't create new item header
162 */ 178 */
163 if (bytes_or_entries == -1) 179 if (bytes_or_entries == -1)
164 bytes_or_entries = ih_item_len(ih); 180 bytes_or_entries = ih_item_len(ih);
@@ -176,8 +192,10 @@ static int leaf_copy_boundary_item(struct buffer_info *dest_bi,
176 } 192 }
177#endif 193#endif
178 194
179 /* merge first item (or its part) of src buffer with the last 195 /*
180 item of dest buffer. Both are of the same file */ 196 * merge first item (or its part) of src buffer with the last
197 * item of dest buffer. Both are of the same file
198 */
181 leaf_paste_in_buffer(dest_bi, 199 leaf_paste_in_buffer(dest_bi,
182 dest_nr_item - 1, ih_item_len(dih), 200 dest_nr_item - 1, ih_item_len(dih),
183 bytes_or_entries, ih_item_body(src, ih), 0); 201 bytes_or_entries, ih_item_body(src, ih), 0);
@@ -195,8 +213,9 @@ static int leaf_copy_boundary_item(struct buffer_info *dest_bi,
195 213
196 /* copy boundary item to right (last_first == LAST_TO_FIRST) */ 214 /* copy boundary item to right (last_first == LAST_TO_FIRST) */
197 215
198 /* ( DEST is empty or last item of SOURCE and first item of DEST 216 /*
199 are the items of different object or of different types ) 217 * (DEST is empty or last item of SOURCE and first item of DEST
218 * are the items of different object or of different types)
200 */ 219 */
201 src_nr_item = B_NR_ITEMS(src); 220 src_nr_item = B_NR_ITEMS(src);
202 ih = item_head(src, src_nr_item - 1); 221 ih = item_head(src, src_nr_item - 1);
@@ -206,8 +225,11 @@ static int leaf_copy_boundary_item(struct buffer_info *dest_bi,
206 return 0; 225 return 0;
207 226
208 if (is_direntry_le_ih(ih)) { 227 if (is_direntry_le_ih(ih)) {
228 /*
229 * bytes_or_entries = entries number in last
230 * item body of SOURCE
231 */
209 if (bytes_or_entries == -1) 232 if (bytes_or_entries == -1)
210 /* bytes_or_entries = entries number in last item body of SOURCE */
211 bytes_or_entries = ih_entry_count(ih); 233 bytes_or_entries = ih_entry_count(ih);
212 234
213 leaf_copy_dir_entries(dest_bi, src, LAST_TO_FIRST, 235 leaf_copy_dir_entries(dest_bi, src, LAST_TO_FIRST,
@@ -217,9 +239,11 @@ static int leaf_copy_boundary_item(struct buffer_info *dest_bi,
217 return 1; 239 return 1;
218 } 240 }
219 241
220 /* copy part of the body of the last item of SOURCE to the begin of the body of the first item of the DEST; 242 /*
221 part defined by 'bytes_or_entries'; if byte_or_entriess == -1 copy whole body; change first item key of the DEST; 243 * copy part of the body of the last item of SOURCE to the
222 don't create new item header 244 * begin of the body of the first item of the DEST; part defined
245 * by 'bytes_or_entries'; if byte_or_entriess == -1 copy whole body;
246 * change first item key of the DEST; don't create new item header
223 */ 247 */
224 248
225 RFALSE(is_indirect_le_ih(ih) && get_ih_free_space(ih), 249 RFALSE(is_indirect_le_ih(ih) && get_ih_free_space(ih),
@@ -276,9 +300,12 @@ static int leaf_copy_boundary_item(struct buffer_info *dest_bi,
276 return 1; 300 return 1;
277} 301}
278 302
279/* copy cpy_mun items from buffer src to buffer dest 303/*
280 * last_first == FIRST_TO_LAST means, that we copy cpy_num items beginning from first-th item in src to tail of dest 304 * copy cpy_mun items from buffer src to buffer dest
281 * last_first == LAST_TO_FIRST means, that we copy cpy_num items beginning from first-th item in src to head of dest 305 * last_first == FIRST_TO_LAST means, that we copy cpy_num items beginning
306 * from first-th item in src to tail of dest
307 * last_first == LAST_TO_FIRST means, that we copy cpy_num items beginning
308 * from first-th item in src to head of dest
282 */ 309 */
283static void leaf_copy_items_entirely(struct buffer_info *dest_bi, 310static void leaf_copy_items_entirely(struct buffer_info *dest_bi,
284 struct buffer_head *src, int last_first, 311 struct buffer_head *src, int last_first,
@@ -311,7 +338,10 @@ static void leaf_copy_items_entirely(struct buffer_info *dest_bi,
311 nr = blkh_nr_item(blkh); 338 nr = blkh_nr_item(blkh);
312 free_space = blkh_free_space(blkh); 339 free_space = blkh_free_space(blkh);
313 340
314 /* we will insert items before 0-th or nr-th item in dest buffer. It depends of last_first parameter */ 341 /*
342 * we will insert items before 0-th or nr-th item in dest buffer.
343 * It depends of last_first parameter
344 */
315 dest_before = (last_first == LAST_TO_FIRST) ? 0 : nr; 345 dest_before = (last_first == LAST_TO_FIRST) ? 0 : nr;
316 346
317 /* location of head of first new item */ 347 /* location of head of first new item */
@@ -377,8 +407,10 @@ static void leaf_copy_items_entirely(struct buffer_info *dest_bi,
377 } 407 }
378} 408}
379 409
380/* This function splits the (liquid) item into two items (useful when 410/*
381 shifting part of an item into another node.) */ 411 * This function splits the (liquid) item into two items (useful when
412 * shifting part of an item into another node.)
413 */
382static void leaf_item_bottle(struct buffer_info *dest_bi, 414static void leaf_item_bottle(struct buffer_info *dest_bi,
383 struct buffer_head *src, int last_first, 415 struct buffer_head *src, int last_first,
384 int item_num, int cpy_bytes) 416 int item_num, int cpy_bytes)
@@ -390,7 +422,10 @@ static void leaf_item_bottle(struct buffer_info *dest_bi,
390 "vs-10170: bytes == - 1 means: do not split item"); 422 "vs-10170: bytes == - 1 means: do not split item");
391 423
392 if (last_first == FIRST_TO_LAST) { 424 if (last_first == FIRST_TO_LAST) {
393 /* if ( if item in position item_num in buffer SOURCE is directory item ) */ 425 /*
426 * if ( if item in position item_num in buffer SOURCE
427 * is directory item )
428 */
394 ih = item_head(src, item_num); 429 ih = item_head(src, item_num);
395 if (is_direntry_le_ih(ih)) 430 if (is_direntry_le_ih(ih))
396 leaf_copy_dir_entries(dest_bi, src, FIRST_TO_LAST, 431 leaf_copy_dir_entries(dest_bi, src, FIRST_TO_LAST,
@@ -398,9 +433,11 @@ static void leaf_item_bottle(struct buffer_info *dest_bi,
398 else { 433 else {
399 struct item_head n_ih; 434 struct item_head n_ih;
400 435
401 /* copy part of the body of the item number 'item_num' of SOURCE to the end of the DEST 436 /*
402 part defined by 'cpy_bytes'; create new item header; change old item_header (????); 437 * copy part of the body of the item number 'item_num'
403 n_ih = new item_header; 438 * of SOURCE to the end of the DEST part defined by
439 * 'cpy_bytes'; create new item header; change old
440 * item_header (????); n_ih = new item_header;
404 */ 441 */
405 memcpy(&n_ih, ih, IH_SIZE); 442 memcpy(&n_ih, ih, IH_SIZE);
406 put_ih_item_len(&n_ih, cpy_bytes); 443 put_ih_item_len(&n_ih, cpy_bytes);
@@ -419,7 +456,10 @@ static void leaf_item_bottle(struct buffer_info *dest_bi,
419 item_body(src, item_num), 0); 456 item_body(src, item_num), 0);
420 } 457 }
421 } else { 458 } else {
422 /* if ( if item in position item_num in buffer SOURCE is directory item ) */ 459 /*
460 * if ( if item in position item_num in buffer
461 * SOURCE is directory item )
462 */
423 ih = item_head(src, item_num); 463 ih = item_head(src, item_num);
424 if (is_direntry_le_ih(ih)) 464 if (is_direntry_le_ih(ih))
425 leaf_copy_dir_entries(dest_bi, src, LAST_TO_FIRST, 465 leaf_copy_dir_entries(dest_bi, src, LAST_TO_FIRST,
@@ -429,13 +469,16 @@ static void leaf_item_bottle(struct buffer_info *dest_bi,
429 else { 469 else {
430 struct item_head n_ih; 470 struct item_head n_ih;
431 471
432 /* copy part of the body of the item number 'item_num' of SOURCE to the begin of the DEST 472 /*
433 part defined by 'cpy_bytes'; create new item header; 473 * copy part of the body of the item number 'item_num'
434 n_ih = new item_header; 474 * of SOURCE to the begin of the DEST part defined by
475 * 'cpy_bytes'; create new item header;
476 * n_ih = new item_header;
435 */ 477 */
436 memcpy(&n_ih, ih, SHORT_KEY_SIZE); 478 memcpy(&n_ih, ih, SHORT_KEY_SIZE);
437 479
438 n_ih.ih_version = ih->ih_version; /* JDM Endian safe, both le */ 480 /* Endian safe, both le */
481 n_ih.ih_version = ih->ih_version;
439 482
440 if (is_direct_le_ih(ih)) { 483 if (is_direct_le_ih(ih)) {
441 set_le_ih_k_offset(&n_ih, 484 set_le_ih_k_offset(&n_ih,
@@ -459,7 +502,8 @@ static void leaf_item_bottle(struct buffer_info *dest_bi,
459 /* set item length */ 502 /* set item length */
460 put_ih_item_len(&n_ih, cpy_bytes); 503 put_ih_item_len(&n_ih, cpy_bytes);
461 504
462 n_ih.ih_version = ih->ih_version; /* JDM Endian safe, both le */ 505 /* Endian safe, both le */
506 n_ih.ih_version = ih->ih_version;
463 507
464 leaf_insert_into_buf(dest_bi, 0, &n_ih, 508 leaf_insert_into_buf(dest_bi, 0, &n_ih,
465 item_body(src, item_num) + 509 item_body(src, item_num) +
@@ -468,10 +512,12 @@ static void leaf_item_bottle(struct buffer_info *dest_bi,
468 } 512 }
469} 513}
470 514
471/* If cpy_bytes equals minus one than copy cpy_num whole items from SOURCE to DEST. 515/*
472 If cpy_bytes not equal to minus one than copy cpy_num-1 whole items from SOURCE to DEST. 516 * If cpy_bytes equals minus one than copy cpy_num whole items from SOURCE
473 From last item copy cpy_num bytes for regular item and cpy_num directory entries for 517 * to DEST. If cpy_bytes not equal to minus one than copy cpy_num-1 whole
474 directory item. */ 518 * items from SOURCE to DEST. From last item copy cpy_num bytes for regular
519 * item and cpy_num directory entries for directory item.
520 */
475static int leaf_copy_items(struct buffer_info *dest_bi, struct buffer_head *src, 521static int leaf_copy_items(struct buffer_info *dest_bi, struct buffer_head *src,
476 int last_first, int cpy_num, int cpy_bytes) 522 int last_first, int cpy_num, int cpy_bytes)
477{ 523{
@@ -498,22 +544,34 @@ static int leaf_copy_items(struct buffer_info *dest_bi, struct buffer_head *src,
498 else 544 else
499 bytes = -1; 545 bytes = -1;
500 546
501 /* copy the first item or it part or nothing to the end of the DEST (i = leaf_copy_boundary_item(DEST,SOURCE,0,bytes)) */ 547 /*
548 * copy the first item or it part or nothing to the end of
549 * the DEST (i = leaf_copy_boundary_item(DEST,SOURCE,0,bytes))
550 */
502 i = leaf_copy_boundary_item(dest_bi, src, FIRST_TO_LAST, bytes); 551 i = leaf_copy_boundary_item(dest_bi, src, FIRST_TO_LAST, bytes);
503 cpy_num -= i; 552 cpy_num -= i;
504 if (cpy_num == 0) 553 if (cpy_num == 0)
505 return i; 554 return i;
506 pos += i; 555 pos += i;
507 if (cpy_bytes == -1) 556 if (cpy_bytes == -1)
508 /* copy first cpy_num items starting from position 'pos' of SOURCE to end of DEST */ 557 /*
558 * copy first cpy_num items starting from position
559 * 'pos' of SOURCE to end of DEST
560 */
509 leaf_copy_items_entirely(dest_bi, src, FIRST_TO_LAST, 561 leaf_copy_items_entirely(dest_bi, src, FIRST_TO_LAST,
510 pos, cpy_num); 562 pos, cpy_num);
511 else { 563 else {
512 /* copy first cpy_num-1 items starting from position 'pos-1' of the SOURCE to the end of the DEST */ 564 /*
565 * copy first cpy_num-1 items starting from position
566 * 'pos-1' of the SOURCE to the end of the DEST
567 */
513 leaf_copy_items_entirely(dest_bi, src, FIRST_TO_LAST, 568 leaf_copy_items_entirely(dest_bi, src, FIRST_TO_LAST,
514 pos, cpy_num - 1); 569 pos, cpy_num - 1);
515 570
516 /* copy part of the item which number is cpy_num+pos-1 to the end of the DEST */ 571 /*
572 * copy part of the item which number is
573 * cpy_num+pos-1 to the end of the DEST
574 */
517 leaf_item_bottle(dest_bi, src, FIRST_TO_LAST, 575 leaf_item_bottle(dest_bi, src, FIRST_TO_LAST,
518 cpy_num + pos - 1, cpy_bytes); 576 cpy_num + pos - 1, cpy_bytes);
519 } 577 }
@@ -525,7 +583,11 @@ static int leaf_copy_items(struct buffer_info *dest_bi, struct buffer_head *src,
525 else 583 else
526 bytes = -1; 584 bytes = -1;
527 585
528 /* copy the last item or it part or nothing to the begin of the DEST (i = leaf_copy_boundary_item(DEST,SOURCE,1,bytes)); */ 586 /*
587 * copy the last item or it part or nothing to the
588 * begin of the DEST
589 * (i = leaf_copy_boundary_item(DEST,SOURCE,1,bytes));
590 */
529 i = leaf_copy_boundary_item(dest_bi, src, LAST_TO_FIRST, bytes); 591 i = leaf_copy_boundary_item(dest_bi, src, LAST_TO_FIRST, bytes);
530 592
531 cpy_num -= i; 593 cpy_num -= i;
@@ -534,15 +596,24 @@ static int leaf_copy_items(struct buffer_info *dest_bi, struct buffer_head *src,
534 596
535 pos = src_nr_item - cpy_num - i; 597 pos = src_nr_item - cpy_num - i;
536 if (cpy_bytes == -1) { 598 if (cpy_bytes == -1) {
537 /* starting from position 'pos' copy last cpy_num items of SOURCE to begin of DEST */ 599 /*
600 * starting from position 'pos' copy last cpy_num
601 * items of SOURCE to begin of DEST
602 */
538 leaf_copy_items_entirely(dest_bi, src, LAST_TO_FIRST, 603 leaf_copy_items_entirely(dest_bi, src, LAST_TO_FIRST,
539 pos, cpy_num); 604 pos, cpy_num);
540 } else { 605 } else {
541 /* copy last cpy_num-1 items starting from position 'pos+1' of the SOURCE to the begin of the DEST; */ 606 /*
607 * copy last cpy_num-1 items starting from position
608 * 'pos+1' of the SOURCE to the begin of the DEST;
609 */
542 leaf_copy_items_entirely(dest_bi, src, LAST_TO_FIRST, 610 leaf_copy_items_entirely(dest_bi, src, LAST_TO_FIRST,
543 pos + 1, cpy_num - 1); 611 pos + 1, cpy_num - 1);
544 612
545 /* copy part of the item which number is pos to the begin of the DEST */ 613 /*
614 * copy part of the item which number is pos to
615 * the begin of the DEST
616 */
546 leaf_item_bottle(dest_bi, src, LAST_TO_FIRST, pos, 617 leaf_item_bottle(dest_bi, src, LAST_TO_FIRST, pos,
547 cpy_bytes); 618 cpy_bytes);
548 } 619 }
@@ -550,9 +621,11 @@ static int leaf_copy_items(struct buffer_info *dest_bi, struct buffer_head *src,
550 return i; 621 return i;
551} 622}
552 623
553/* there are types of coping: from S[0] to L[0], from S[0] to R[0], 624/*
554 from R[0] to L[0]. for each of these we have to define parent and 625 * there are types of coping: from S[0] to L[0], from S[0] to R[0],
555 positions of destination and source buffers */ 626 * from R[0] to L[0]. for each of these we have to define parent and
627 * positions of destination and source buffers
628 */
556static void leaf_define_dest_src_infos(int shift_mode, struct tree_balance *tb, 629static void leaf_define_dest_src_infos(int shift_mode, struct tree_balance *tb,
557 struct buffer_info *dest_bi, 630 struct buffer_info *dest_bi,
558 struct buffer_info *src_bi, 631 struct buffer_info *src_bi,
@@ -568,7 +641,9 @@ static void leaf_define_dest_src_infos(int shift_mode, struct tree_balance *tb,
568 src_bi->tb = tb; 641 src_bi->tb = tb;
569 src_bi->bi_bh = PATH_PLAST_BUFFER(tb->tb_path); 642 src_bi->bi_bh = PATH_PLAST_BUFFER(tb->tb_path);
570 src_bi->bi_parent = PATH_H_PPARENT(tb->tb_path, 0); 643 src_bi->bi_parent = PATH_H_PPARENT(tb->tb_path, 0);
571 src_bi->bi_position = PATH_H_B_ITEM_ORDER(tb->tb_path, 0); /* src->b_item_order */ 644
645 /* src->b_item_order */
646 src_bi->bi_position = PATH_H_B_ITEM_ORDER(tb->tb_path, 0);
572 dest_bi->tb = tb; 647 dest_bi->tb = tb;
573 dest_bi->bi_bh = tb->L[0]; 648 dest_bi->bi_bh = tb->L[0];
574 dest_bi->bi_parent = tb->FL[0]; 649 dest_bi->bi_parent = tb->FL[0];
@@ -633,8 +708,10 @@ static void leaf_define_dest_src_infos(int shift_mode, struct tree_balance *tb,
633 shift_mode, src_bi->bi_bh, dest_bi->bi_bh); 708 shift_mode, src_bi->bi_bh, dest_bi->bi_bh);
634} 709}
635 710
636/* copy mov_num items and mov_bytes of the (mov_num-1)th item to 711/*
637 neighbor. Delete them from source */ 712 * copy mov_num items and mov_bytes of the (mov_num-1)th item to
713 * neighbor. Delete them from source
714 */
638int leaf_move_items(int shift_mode, struct tree_balance *tb, int mov_num, 715int leaf_move_items(int shift_mode, struct tree_balance *tb, int mov_num,
639 int mov_bytes, struct buffer_head *Snew) 716 int mov_bytes, struct buffer_head *Snew)
640{ 717{
@@ -657,18 +734,24 @@ int leaf_move_items(int shift_mode, struct tree_balance *tb, int mov_num,
657 return ret_value; 734 return ret_value;
658} 735}
659 736
660/* Shift shift_num items (and shift_bytes of last shifted item if shift_bytes != -1) 737/*
661 from S[0] to L[0] and replace the delimiting key */ 738 * Shift shift_num items (and shift_bytes of last shifted item if
739 * shift_bytes != -1) from S[0] to L[0] and replace the delimiting key
740 */
662int leaf_shift_left(struct tree_balance *tb, int shift_num, int shift_bytes) 741int leaf_shift_left(struct tree_balance *tb, int shift_num, int shift_bytes)
663{ 742{
664 struct buffer_head *S0 = PATH_PLAST_BUFFER(tb->tb_path); 743 struct buffer_head *S0 = PATH_PLAST_BUFFER(tb->tb_path);
665 int i; 744 int i;
666 745
667 /* move shift_num (and shift_bytes bytes) items from S[0] to left neighbor L[0] */ 746 /*
747 * move shift_num (and shift_bytes bytes) items from S[0]
748 * to left neighbor L[0]
749 */
668 i = leaf_move_items(LEAF_FROM_S_TO_L, tb, shift_num, shift_bytes, NULL); 750 i = leaf_move_items(LEAF_FROM_S_TO_L, tb, shift_num, shift_bytes, NULL);
669 751
670 if (shift_num) { 752 if (shift_num) {
671 if (B_NR_ITEMS(S0) == 0) { /* number of items in S[0] == 0 */ 753 /* number of items in S[0] == 0 */
754 if (B_NR_ITEMS(S0) == 0) {
672 755
673 RFALSE(shift_bytes != -1, 756 RFALSE(shift_bytes != -1,
674 "vs-10270: S0 is empty now, but shift_bytes != -1 (%d)", 757 "vs-10270: S0 is empty now, but shift_bytes != -1 (%d)",
@@ -704,13 +787,18 @@ int leaf_shift_left(struct tree_balance *tb, int shift_num, int shift_bytes)
704 787
705/* CLEANING STOPPED HERE */ 788/* CLEANING STOPPED HERE */
706 789
707/* Shift shift_num (shift_bytes) items from S[0] to the right neighbor, and replace the delimiting key */ 790/*
791 * Shift shift_num (shift_bytes) items from S[0] to the right neighbor,
792 * and replace the delimiting key
793 */
708int leaf_shift_right(struct tree_balance *tb, int shift_num, int shift_bytes) 794int leaf_shift_right(struct tree_balance *tb, int shift_num, int shift_bytes)
709{ 795{
710 // struct buffer_head * S0 = PATH_PLAST_BUFFER (tb->tb_path);
711 int ret_value; 796 int ret_value;
712 797
713 /* move shift_num (and shift_bytes) items from S[0] to right neighbor R[0] */ 798 /*
799 * move shift_num (and shift_bytes) items from S[0] to
800 * right neighbor R[0]
801 */
714 ret_value = 802 ret_value =
715 leaf_move_items(LEAF_FROM_S_TO_R, tb, shift_num, shift_bytes, NULL); 803 leaf_move_items(LEAF_FROM_S_TO_R, tb, shift_num, shift_bytes, NULL);
716 804
@@ -725,12 +813,16 @@ int leaf_shift_right(struct tree_balance *tb, int shift_num, int shift_bytes)
725 813
726static void leaf_delete_items_entirely(struct buffer_info *bi, 814static void leaf_delete_items_entirely(struct buffer_info *bi,
727 int first, int del_num); 815 int first, int del_num);
728/* If del_bytes == -1, starting from position 'first' delete del_num items in whole in buffer CUR. 816/*
729 If not. 817 * If del_bytes == -1, starting from position 'first' delete del_num
730 If last_first == 0. Starting from position 'first' delete del_num-1 items in whole. Delete part of body of 818 * items in whole in buffer CUR.
731 the first item. Part defined by del_bytes. Don't delete first item header 819 * If not.
732 If last_first == 1. Starting from position 'first+1' delete del_num-1 items in whole. Delete part of body of 820 * If last_first == 0. Starting from position 'first' delete del_num-1
733 the last item . Part defined by del_bytes. Don't delete last item header. 821 * items in whole. Delete part of body of the first item. Part defined by
822 * del_bytes. Don't delete first item header
823 * If last_first == 1. Starting from position 'first+1' delete del_num-1
824 * items in whole. Delete part of body of the last item . Part defined by
825 * del_bytes. Don't delete last item header.
734*/ 826*/
735void leaf_delete_items(struct buffer_info *cur_bi, int last_first, 827void leaf_delete_items(struct buffer_info *cur_bi, int last_first,
736 int first, int del_num, int del_bytes) 828 int first, int del_num, int del_bytes)
@@ -761,32 +853,43 @@ void leaf_delete_items(struct buffer_info *cur_bi, int last_first,
761 leaf_delete_items_entirely(cur_bi, first, del_num); 853 leaf_delete_items_entirely(cur_bi, first, del_num);
762 else { 854 else {
763 if (last_first == FIRST_TO_LAST) { 855 if (last_first == FIRST_TO_LAST) {
764 /* delete del_num-1 items beginning from item in position first */ 856 /*
857 * delete del_num-1 items beginning from
858 * item in position first
859 */
765 leaf_delete_items_entirely(cur_bi, first, del_num - 1); 860 leaf_delete_items_entirely(cur_bi, first, del_num - 1);
766 861
767 /* delete the part of the first item of the bh 862 /*
768 do not delete item header 863 * delete the part of the first item of the bh
864 * do not delete item header
769 */ 865 */
770 leaf_cut_from_buffer(cur_bi, 0, 0, del_bytes); 866 leaf_cut_from_buffer(cur_bi, 0, 0, del_bytes);
771 } else { 867 } else {
772 struct item_head *ih; 868 struct item_head *ih;
773 int len; 869 int len;
774 870
775 /* delete del_num-1 items beginning from item in position first+1 */ 871 /*
872 * delete del_num-1 items beginning from
873 * item in position first+1
874 */
776 leaf_delete_items_entirely(cur_bi, first + 1, 875 leaf_delete_items_entirely(cur_bi, first + 1,
777 del_num - 1); 876 del_num - 1);
778 877
779 ih = item_head(bh, B_NR_ITEMS(bh) - 1); 878 ih = item_head(bh, B_NR_ITEMS(bh) - 1);
780 if (is_direntry_le_ih(ih)) 879 if (is_direntry_le_ih(ih))
781 /* the last item is directory */ 880 /* the last item is directory */
782 /* len = numbers of directory entries in this item */ 881 /*
882 * len = numbers of directory entries
883 * in this item
884 */
783 len = ih_entry_count(ih); 885 len = ih_entry_count(ih);
784 else 886 else
785 /* len = body len of item */ 887 /* len = body len of item */
786 len = ih_item_len(ih); 888 len = ih_item_len(ih);
787 889
788 /* delete the part of the last item of the bh 890 /*
789 do not delete item header 891 * delete the part of the last item of the bh
892 * do not delete item header
790 */ 893 */
791 leaf_cut_from_buffer(cur_bi, B_NR_ITEMS(bh) - 1, 894 leaf_cut_from_buffer(cur_bi, B_NR_ITEMS(bh) - 1,
792 len - del_bytes, del_bytes); 895 len - del_bytes, del_bytes);
@@ -867,8 +970,10 @@ void leaf_insert_into_buf(struct buffer_info *bi, int before,
867 } 970 }
868} 971}
869 972
870/* paste paste_size bytes to affected_item_num-th item. 973/*
871 When item is a directory, this only prepare space for new entries */ 974 * paste paste_size bytes to affected_item_num-th item.
975 * When item is a directory, this only prepare space for new entries
976 */
872void leaf_paste_in_buffer(struct buffer_info *bi, int affected_item_num, 977void leaf_paste_in_buffer(struct buffer_info *bi, int affected_item_num,
873 int pos_in_item, int paste_size, 978 int pos_in_item, int paste_size,
874 const char *body, int zeros_number) 979 const char *body, int zeros_number)
@@ -957,10 +1062,12 @@ void leaf_paste_in_buffer(struct buffer_info *bi, int affected_item_num,
957 } 1062 }
958} 1063}
959 1064
960/* cuts DEL_COUNT entries beginning from FROM-th entry. Directory item 1065/*
961 does not have free space, so it moves DEHs and remaining records as 1066 * cuts DEL_COUNT entries beginning from FROM-th entry. Directory item
962 necessary. Return value is size of removed part of directory item 1067 * does not have free space, so it moves DEHs and remaining records as
963 in bytes. */ 1068 * necessary. Return value is size of removed part of directory item
1069 * in bytes.
1070 */
964static int leaf_cut_entries(struct buffer_head *bh, 1071static int leaf_cut_entries(struct buffer_head *bh,
965 struct item_head *ih, int from, int del_count) 1072 struct item_head *ih, int from, int del_count)
966{ 1073{
@@ -971,8 +1078,10 @@ static int leaf_cut_entries(struct buffer_head *bh,
971 int cut_records_len; /* length of all removed records */ 1078 int cut_records_len; /* length of all removed records */
972 int i; 1079 int i;
973 1080
974 /* make sure, that item is directory and there are enough entries to 1081 /*
975 remove */ 1082 * make sure that item is directory and there are enough entries to
1083 * remove
1084 */
976 RFALSE(!is_direntry_le_ih(ih), "10180: item is not directory item"); 1085 RFALSE(!is_direntry_le_ih(ih), "10180: item is not directory item");
977 RFALSE(ih_entry_count(ih) < from + del_count, 1086 RFALSE(ih_entry_count(ih) < from + del_count,
978 "10185: item contains not enough entries: entry_count = %d, from = %d, to delete = %d", 1087 "10185: item contains not enough entries: entry_count = %d, from = %d, to delete = %d",
@@ -987,8 +1096,10 @@ static int leaf_cut_entries(struct buffer_head *bh,
987 /* entry head array */ 1096 /* entry head array */
988 deh = B_I_DEH(bh, ih); 1097 deh = B_I_DEH(bh, ih);
989 1098
990 /* first byte of remaining entries, those are BEFORE cut entries 1099 /*
991 (prev_record) and length of all removed records (cut_records_len) */ 1100 * first byte of remaining entries, those are BEFORE cut entries
1101 * (prev_record) and length of all removed records (cut_records_len)
1102 */
992 prev_record_offset = 1103 prev_record_offset =
993 (from ? deh_location(&(deh[from - 1])) : ih_item_len(ih)); 1104 (from ? deh_location(&(deh[from - 1])) : ih_item_len(ih));
994 cut_records_len = prev_record_offset /*from_record */ - 1105 cut_records_len = prev_record_offset /*from_record */ -
@@ -1021,14 +1132,15 @@ static int leaf_cut_entries(struct buffer_head *bh,
1021 return DEH_SIZE * del_count + cut_records_len; 1132 return DEH_SIZE * del_count + cut_records_len;
1022} 1133}
1023 1134
1024/* when cut item is part of regular file 1135/*
1025 pos_in_item - first byte that must be cut 1136 * when cut item is part of regular file
1026 cut_size - number of bytes to be cut beginning from pos_in_item 1137 * pos_in_item - first byte that must be cut
1027 1138 * cut_size - number of bytes to be cut beginning from pos_in_item
1028 when cut item is part of directory 1139 *
1029 pos_in_item - number of first deleted entry 1140 * when cut item is part of directory
1030 cut_size - count of deleted entries 1141 * pos_in_item - number of first deleted entry
1031 */ 1142 * cut_size - count of deleted entries
1143 */
1032void leaf_cut_from_buffer(struct buffer_info *bi, int cut_item_num, 1144void leaf_cut_from_buffer(struct buffer_info *bi, int cut_item_num,
1033 int pos_in_item, int cut_size) 1145 int pos_in_item, int cut_size)
1034{ 1146{
@@ -1055,7 +1167,6 @@ void leaf_cut_from_buffer(struct buffer_info *bi, int cut_item_num,
1055 cut_item_num); 1167 cut_item_num);
1056 /* change item key by key of first entry in the item */ 1168 /* change item key by key of first entry in the item */
1057 set_le_ih_k_offset(ih, deh_offset(B_I_DEH(bh, ih))); 1169 set_le_ih_k_offset(ih, deh_offset(B_I_DEH(bh, ih)));
1058 /*memcpy (&ih->ih_key.k_offset, &(B_I_DEH (bh, ih)->deh_offset), SHORT_KEY_SIZE); */
1059 } 1170 }
1060 } else { 1171 } else {
1061 /* item is direct or indirect */ 1172 /* item is direct or indirect */
@@ -1195,7 +1306,10 @@ static void leaf_delete_items_entirely(struct buffer_info *bi,
1195 } 1306 }
1196} 1307}
1197 1308
1198/* paste new_entry_count entries (new_dehs, records) into position before to item_num-th item */ 1309/*
1310 * paste new_entry_count entries (new_dehs, records) into position
1311 * before to item_num-th item
1312 */
1199void leaf_paste_entries(struct buffer_info *bi, 1313void leaf_paste_entries(struct buffer_info *bi,
1200 int item_num, 1314 int item_num,
1201 int before, 1315 int before,
@@ -1215,7 +1329,10 @@ void leaf_paste_entries(struct buffer_info *bi,
1215 1329
1216 ih = item_head(bh, item_num); 1330 ih = item_head(bh, item_num);
1217 1331
1218 /* make sure, that item is directory, and there are enough records in it */ 1332 /*
1333 * make sure, that item is directory, and there are enough
1334 * records in it
1335 */
1219 RFALSE(!is_direntry_le_ih(ih), "10225: item is not directory item"); 1336 RFALSE(!is_direntry_le_ih(ih), "10225: item is not directory item");
1220 RFALSE(ih_entry_count(ih) < before, 1337 RFALSE(ih_entry_count(ih) < before,
1221 "10230: there are no entry we paste entries before. entry_count = %d, before = %d", 1338 "10230: there are no entry we paste entries before. entry_count = %d, before = %d",
@@ -1277,8 +1394,6 @@ void leaf_paste_entries(struct buffer_info *bi,
1277 /* change item key if necessary (when we paste before 0-th entry */ 1394 /* change item key if necessary (when we paste before 0-th entry */
1278 if (!before) { 1395 if (!before) {
1279 set_le_ih_k_offset(ih, deh_offset(new_dehs)); 1396 set_le_ih_k_offset(ih, deh_offset(new_dehs));
1280/* memcpy (&ih->ih_key.k_offset,
1281 &new_dehs->deh_offset, SHORT_KEY_SIZE);*/
1282 } 1397 }
1283#ifdef CONFIG_REISERFS_CHECK 1398#ifdef CONFIG_REISERFS_CHECK
1284 { 1399 {
diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c
index 1ce8fbea4749..6bc38de8357f 100644
--- a/fs/reiserfs/namei.c
+++ b/fs/reiserfs/namei.c
@@ -22,8 +22,10 @@
22#define INC_DIR_INODE_NLINK(i) if (i->i_nlink != 1) { inc_nlink(i); if (i->i_nlink >= REISERFS_LINK_MAX) set_nlink(i, 1); } 22#define INC_DIR_INODE_NLINK(i) if (i->i_nlink != 1) { inc_nlink(i); if (i->i_nlink >= REISERFS_LINK_MAX) set_nlink(i, 1); }
23#define DEC_DIR_INODE_NLINK(i) if (i->i_nlink != 1) drop_nlink(i); 23#define DEC_DIR_INODE_NLINK(i) if (i->i_nlink != 1) drop_nlink(i);
24 24
25// directory item contains array of entry headers. This performs 25/*
26// binary search through that array 26 * directory item contains array of entry headers. This performs
27 * binary search through that array
28 */
27static int bin_search_in_dir_item(struct reiserfs_dir_entry *de, loff_t off) 29static int bin_search_in_dir_item(struct reiserfs_dir_entry *de, loff_t off)
28{ 30{
29 struct item_head *ih = de->de_ih; 31 struct item_head *ih = de->de_ih;
@@ -43,7 +45,7 @@ static int bin_search_in_dir_item(struct reiserfs_dir_entry *de, loff_t off)
43 lbound = j + 1; 45 lbound = j + 1;
44 continue; 46 continue;
45 } 47 }
46 // this is not name found, but matched third key component 48 /* this is not name found, but matched third key component */
47 de->de_entry_num = j; 49 de->de_entry_num = j;
48 return NAME_FOUND; 50 return NAME_FOUND;
49 } 51 }
@@ -52,7 +54,9 @@ static int bin_search_in_dir_item(struct reiserfs_dir_entry *de, loff_t off)
52 return NAME_NOT_FOUND; 54 return NAME_NOT_FOUND;
53} 55}
54 56
55// comment? maybe something like set de to point to what the path points to? 57/*
58 * comment? maybe something like set de to point to what the path points to?
59 */
56static inline void set_de_item_location(struct reiserfs_dir_entry *de, 60static inline void set_de_item_location(struct reiserfs_dir_entry *de,
57 struct treepath *path) 61 struct treepath *path)
58{ 62{
@@ -62,7 +66,9 @@ static inline void set_de_item_location(struct reiserfs_dir_entry *de,
62 de->de_item_num = PATH_LAST_POSITION(path); 66 de->de_item_num = PATH_LAST_POSITION(path);
63} 67}
64 68
65// de_bh, de_ih, de_deh (points to first element of array), de_item_num is set 69/*
70 * de_bh, de_ih, de_deh (points to first element of array), de_item_num is set
71 */
66inline void set_de_name_and_namelen(struct reiserfs_dir_entry *de) 72inline void set_de_name_and_namelen(struct reiserfs_dir_entry *de)
67{ 73{
68 struct reiserfs_de_head *deh = de->de_deh + de->de_entry_num; 74 struct reiserfs_de_head *deh = de->de_deh + de->de_entry_num;
@@ -76,7 +82,7 @@ inline void set_de_name_and_namelen(struct reiserfs_dir_entry *de)
76 de->de_namelen = strlen(de->de_name); 82 de->de_namelen = strlen(de->de_name);
77} 83}
78 84
79// what entry points to 85/* what entry points to */
80static inline void set_de_object_key(struct reiserfs_dir_entry *de) 86static inline void set_de_object_key(struct reiserfs_dir_entry *de)
81{ 87{
82 BUG_ON(de->de_entry_num >= ih_entry_count(de->de_ih)); 88 BUG_ON(de->de_entry_num >= ih_entry_count(de->de_ih));
@@ -100,17 +106,16 @@ static inline void store_de_entry_key(struct reiserfs_dir_entry *de)
100 set_cpu_key_k_type(&(de->de_entry_key), TYPE_DIRENTRY); 106 set_cpu_key_k_type(&(de->de_entry_key), TYPE_DIRENTRY);
101} 107}
102 108
103/* We assign a key to each directory item, and place multiple entries 109/*
104in a single directory item. A directory item has a key equal to the 110 * We assign a key to each directory item, and place multiple entries in a
105key of the first directory entry in it. 111 * single directory item. A directory item has a key equal to the key of
106 112 * the first directory entry in it.
107This function first calls search_by_key, then, if item whose first 113
108entry matches is not found it looks for the entry inside directory 114 * This function first calls search_by_key, then, if item whose first entry
109item found by search_by_key. Fills the path to the entry, and to the 115 * matches is not found it looks for the entry inside directory item found
110entry position in the item 116 * by search_by_key. Fills the path to the entry, and to the entry position
111 117 * in the item
112*/ 118 */
113
114/* The function is NOT SCHEDULE-SAFE! */ 119/* The function is NOT SCHEDULE-SAFE! */
115int search_by_entry_key(struct super_block *sb, const struct cpu_key *key, 120int search_by_entry_key(struct super_block *sb, const struct cpu_key *key,
116 struct treepath *path, struct reiserfs_dir_entry *de) 121 struct treepath *path, struct reiserfs_dir_entry *de)
@@ -152,12 +157,17 @@ int search_by_entry_key(struct super_block *sb, const struct cpu_key *key,
152 } 157 }
153#endif /* CONFIG_REISERFS_CHECK */ 158#endif /* CONFIG_REISERFS_CHECK */
154 159
155 /* binary search in directory item by third componen t of the 160 /*
156 key. sets de->de_entry_num of de */ 161 * binary search in directory item by third component of the
162 * key. sets de->de_entry_num of de
163 */
157 retval = bin_search_in_dir_item(de, cpu_key_k_offset(key)); 164 retval = bin_search_in_dir_item(de, cpu_key_k_offset(key));
158 path->pos_in_item = de->de_entry_num; 165 path->pos_in_item = de->de_entry_num;
159 if (retval != NAME_NOT_FOUND) { 166 if (retval != NAME_NOT_FOUND) {
160 // ugly, but rename needs de_bh, de_deh, de_name, de_namelen, de_objectid set 167 /*
168 * ugly, but rename needs de_bh, de_deh, de_name,
169 * de_namelen, de_objectid set
170 */
161 set_de_name_and_namelen(de); 171 set_de_name_and_namelen(de);
162 set_de_object_key(de); 172 set_de_object_key(de);
163 } 173 }
@@ -166,11 +176,12 @@ int search_by_entry_key(struct super_block *sb, const struct cpu_key *key,
166 176
167/* Keyed 32-bit hash function using TEA in a Davis-Meyer function */ 177/* Keyed 32-bit hash function using TEA in a Davis-Meyer function */
168 178
169/* The third component is hashed, and you can choose from more than 179/*
170 one hash function. Per directory hashes are not yet implemented 180 * The third component is hashed, and you can choose from more than
171 but are thought about. This function should be moved to hashes.c 181 * one hash function. Per directory hashes are not yet implemented
172 Jedi, please do so. -Hans */ 182 * but are thought about. This function should be moved to hashes.c
173 183 * Jedi, please do so. -Hans
184 */
174static __u32 get_third_component(struct super_block *s, 185static __u32 get_third_component(struct super_block *s,
175 const char *name, int len) 186 const char *name, int len)
176{ 187{
@@ -183,11 +194,13 @@ static __u32 get_third_component(struct super_block *s,
183 194
184 res = REISERFS_SB(s)->s_hash_function(name, len); 195 res = REISERFS_SB(s)->s_hash_function(name, len);
185 196
186 // take bits from 7-th to 30-th including both bounds 197 /* take bits from 7-th to 30-th including both bounds */
187 res = GET_HASH_VALUE(res); 198 res = GET_HASH_VALUE(res);
188 if (res == 0) 199 if (res == 0)
189 // needed to have no names before "." and ".." those have hash 200 /*
190 // value == 0 and generation conters 1 and 2 accordingly 201 * needed to have no names before "." and ".." those have hash
202 * value == 0 and generation conters 1 and 2 accordingly
203 */
191 res = 128; 204 res = 128;
192 return res + MAX_GENERATION_NUMBER; 205 return res + MAX_GENERATION_NUMBER;
193} 206}
@@ -208,7 +221,7 @@ static int reiserfs_match(struct reiserfs_dir_entry *de,
208 221
209/* de's de_bh, de_ih, de_deh, de_item_num, de_entry_num are set already */ 222/* de's de_bh, de_ih, de_deh, de_item_num, de_entry_num are set already */
210 223
211 /* used when hash collisions exist */ 224/* used when hash collisions exist */
212 225
213static int linear_search_in_dir_item(struct cpu_key *key, 226static int linear_search_in_dir_item(struct cpu_key *key,
214 struct reiserfs_dir_entry *de, 227 struct reiserfs_dir_entry *de,
@@ -232,43 +245,50 @@ static int linear_search_in_dir_item(struct cpu_key *key,
232 deh += i; 245 deh += i;
233 246
234 for (; i >= 0; i--, deh--) { 247 for (; i >= 0; i--, deh--) {
248 /* hash value does not match, no need to check whole name */
235 if (GET_HASH_VALUE(deh_offset(deh)) != 249 if (GET_HASH_VALUE(deh_offset(deh)) !=
236 GET_HASH_VALUE(cpu_key_k_offset(key))) { 250 GET_HASH_VALUE(cpu_key_k_offset(key))) {
237 // hash value does not match, no need to check whole name
238 return NAME_NOT_FOUND; 251 return NAME_NOT_FOUND;
239 } 252 }
240 253
241 /* mark, that this generation number is used */ 254 /* mark that this generation number is used */
242 if (de->de_gen_number_bit_string) 255 if (de->de_gen_number_bit_string)
243 set_bit(GET_GENERATION_NUMBER(deh_offset(deh)), 256 set_bit(GET_GENERATION_NUMBER(deh_offset(deh)),
244 de->de_gen_number_bit_string); 257 de->de_gen_number_bit_string);
245 258
246 // calculate pointer to name and namelen 259 /* calculate pointer to name and namelen */
247 de->de_entry_num = i; 260 de->de_entry_num = i;
248 set_de_name_and_namelen(de); 261 set_de_name_and_namelen(de);
249 262
263 /*
264 * de's de_name, de_namelen, de_recordlen are set.
265 * Fill the rest.
266 */
250 if ((retval = 267 if ((retval =
251 reiserfs_match(de, name, namelen)) != NAME_NOT_FOUND) { 268 reiserfs_match(de, name, namelen)) != NAME_NOT_FOUND) {
252 // de's de_name, de_namelen, de_recordlen are set. Fill the rest:
253 269
254 // key of pointed object 270 /* key of pointed object */
255 set_de_object_key(de); 271 set_de_object_key(de);
256 272
257 store_de_entry_key(de); 273 store_de_entry_key(de);
258 274
259 // retval can be NAME_FOUND or NAME_FOUND_INVISIBLE 275 /* retval can be NAME_FOUND or NAME_FOUND_INVISIBLE */
260 return retval; 276 return retval;
261 } 277 }
262 } 278 }
263 279
264 if (GET_GENERATION_NUMBER(le_ih_k_offset(de->de_ih)) == 0) 280 if (GET_GENERATION_NUMBER(le_ih_k_offset(de->de_ih)) == 0)
265 /* we have reached left most entry in the node. In common we 281 /*
266 have to go to the left neighbor, but if generation counter 282 * we have reached left most entry in the node. In common we
267 is 0 already, we know for sure, that there is no name with 283 * have to go to the left neighbor, but if generation counter
268 the same hash value */ 284 * is 0 already, we know for sure, that there is no name with
269 // FIXME: this work correctly only because hash value can not 285 * the same hash value
270 // be 0. Btw, in case of Yura's hash it is probably possible, 286 */
271 // so, this is a bug 287 /*
288 * FIXME: this work correctly only because hash value can not
289 * be 0. Btw, in case of Yura's hash it is probably possible,
290 * so, this is a bug
291 */
272 return NAME_NOT_FOUND; 292 return NAME_NOT_FOUND;
273 293
274 RFALSE(de->de_item_num, 294 RFALSE(de->de_item_num,
@@ -277,8 +297,10 @@ static int linear_search_in_dir_item(struct cpu_key *key,
277 return GOTO_PREVIOUS_ITEM; 297 return GOTO_PREVIOUS_ITEM;
278} 298}
279 299
280// may return NAME_FOUND, NAME_FOUND_INVISIBLE, NAME_NOT_FOUND 300/*
281// FIXME: should add something like IOERROR 301 * may return NAME_FOUND, NAME_FOUND_INVISIBLE, NAME_NOT_FOUND
302 * FIXME: should add something like IOERROR
303 */
282static int reiserfs_find_entry(struct inode *dir, const char *name, int namelen, 304static int reiserfs_find_entry(struct inode *dir, const char *name, int namelen,
283 struct treepath *path_to_entry, 305 struct treepath *path_to_entry,
284 struct reiserfs_dir_entry *de) 306 struct reiserfs_dir_entry *de)
@@ -307,13 +329,19 @@ static int reiserfs_find_entry(struct inode *dir, const char *name, int namelen,
307 retval = 329 retval =
308 linear_search_in_dir_item(&key_to_search, de, name, 330 linear_search_in_dir_item(&key_to_search, de, name,
309 namelen); 331 namelen);
332 /*
333 * there is no need to scan directory anymore.
334 * Given entry found or does not exist
335 */
310 if (retval != GOTO_PREVIOUS_ITEM) { 336 if (retval != GOTO_PREVIOUS_ITEM) {
311 /* there is no need to scan directory anymore. Given entry found or does not exist */
312 path_to_entry->pos_in_item = de->de_entry_num; 337 path_to_entry->pos_in_item = de->de_entry_num;
313 return retval; 338 return retval;
314 } 339 }
315 340
316 /* there is left neighboring item of this directory and given entry can be there */ 341 /*
342 * there is left neighboring item of this directory
343 * and given entry can be there
344 */
317 set_cpu_key_k_offset(&key_to_search, 345 set_cpu_key_k_offset(&key_to_search,
318 le_ih_k_offset(de->de_ih) - 1); 346 le_ih_k_offset(de->de_ih) - 1);
319 pathrelse(path_to_entry); 347 pathrelse(path_to_entry);
@@ -347,8 +375,10 @@ static struct dentry *reiserfs_lookup(struct inode *dir, struct dentry *dentry,
347 return ERR_PTR(-EACCES); 375 return ERR_PTR(-EACCES);
348 } 376 }
349 377
350 /* Propagate the private flag so we know we're 378 /*
351 * in the priv tree */ 379 * Propagate the private flag so we know we're
380 * in the priv tree
381 */
352 if (IS_PRIVATE(dir)) 382 if (IS_PRIVATE(dir))
353 inode->i_flags |= S_PRIVATE; 383 inode->i_flags |= S_PRIVATE;
354 } 384 }
@@ -361,9 +391,9 @@ static struct dentry *reiserfs_lookup(struct inode *dir, struct dentry *dentry,
361} 391}
362 392
363/* 393/*
364** looks up the dentry of the parent directory for child. 394 * looks up the dentry of the parent directory for child.
365** taken from ext2_get_parent 395 * taken from ext2_get_parent
366*/ 396 */
367struct dentry *reiserfs_get_parent(struct dentry *child) 397struct dentry *reiserfs_get_parent(struct dentry *child)
368{ 398{
369 int retval; 399 int retval;
@@ -406,8 +436,13 @@ static int reiserfs_add_entry(struct reiserfs_transaction_handle *th,
406 struct reiserfs_dir_entry de; 436 struct reiserfs_dir_entry de;
407 DECLARE_BITMAP(bit_string, MAX_GENERATION_NUMBER + 1); 437 DECLARE_BITMAP(bit_string, MAX_GENERATION_NUMBER + 1);
408 int gen_number; 438 int gen_number;
409 char small_buf[32 + DEH_SIZE]; /* 48 bytes now and we avoid kmalloc 439
410 if we create file with short name */ 440 /*
441 * 48 bytes now and we avoid kmalloc if we
442 * create file with short name
443 */
444 char small_buf[32 + DEH_SIZE];
445
411 char *buffer; 446 char *buffer;
412 int buflen, paste_size; 447 int buflen, paste_size;
413 int retval; 448 int retval;
@@ -439,21 +474,30 @@ static int reiserfs_add_entry(struct reiserfs_transaction_handle *th,
439 (get_inode_sd_version(dir) == 474 (get_inode_sd_version(dir) ==
440 STAT_DATA_V1) ? (DEH_SIZE + namelen) : buflen; 475 STAT_DATA_V1) ? (DEH_SIZE + namelen) : buflen;
441 476
442 /* fill buffer : directory entry head, name[, dir objectid | , stat data | ,stat data, dir objectid ] */ 477 /*
478 * fill buffer : directory entry head, name[, dir objectid | ,
479 * stat data | ,stat data, dir objectid ]
480 */
443 deh = (struct reiserfs_de_head *)buffer; 481 deh = (struct reiserfs_de_head *)buffer;
444 deh->deh_location = 0; /* JDM Endian safe if 0 */ 482 deh->deh_location = 0; /* JDM Endian safe if 0 */
445 put_deh_offset(deh, cpu_key_k_offset(&entry_key)); 483 put_deh_offset(deh, cpu_key_k_offset(&entry_key));
446 deh->deh_state = 0; /* JDM Endian safe if 0 */ 484 deh->deh_state = 0; /* JDM Endian safe if 0 */
447 /* put key (ino analog) to de */ 485 /* put key (ino analog) to de */
448 deh->deh_dir_id = INODE_PKEY(inode)->k_dir_id; /* safe: k_dir_id is le */ 486
449 deh->deh_objectid = INODE_PKEY(inode)->k_objectid; /* safe: k_objectid is le */ 487 /* safe: k_dir_id is le */
488 deh->deh_dir_id = INODE_PKEY(inode)->k_dir_id;
489 /* safe: k_objectid is le */
490 deh->deh_objectid = INODE_PKEY(inode)->k_objectid;
450 491
451 /* copy name */ 492 /* copy name */
452 memcpy((char *)(deh + 1), name, namelen); 493 memcpy((char *)(deh + 1), name, namelen);
453 /* padd by 0s to the 4 byte boundary */ 494 /* padd by 0s to the 4 byte boundary */
454 padd_item((char *)(deh + 1), ROUND_UP(namelen), namelen); 495 padd_item((char *)(deh + 1), ROUND_UP(namelen), namelen);
455 496
456 /* entry is ready to be pasted into tree, set 'visibility' and 'stat data in entry' attributes */ 497 /*
498 * entry is ready to be pasted into tree, set 'visibility'
499 * and 'stat data in entry' attributes
500 */
457 mark_de_without_sd(deh); 501 mark_de_without_sd(deh);
458 visible ? mark_de_visible(deh) : mark_de_hidden(deh); 502 visible ? mark_de_visible(deh) : mark_de_hidden(deh);
459 503
@@ -499,7 +543,8 @@ static int reiserfs_add_entry(struct reiserfs_transaction_handle *th,
499 /* update max-hash-collisions counter in reiserfs_sb_info */ 543 /* update max-hash-collisions counter in reiserfs_sb_info */
500 PROC_INFO_MAX(th->t_super, max_hash_collisions, gen_number); 544 PROC_INFO_MAX(th->t_super, max_hash_collisions, gen_number);
501 545
502 if (gen_number != 0) { /* we need to re-search for the insertion point */ 546 /* we need to re-search for the insertion point */
547 if (gen_number != 0) {
503 if (search_by_entry_key(dir->i_sb, &entry_key, &path, &de) != 548 if (search_by_entry_key(dir->i_sb, &entry_key, &path, &de) !=
504 NAME_NOT_FOUND) { 549 NAME_NOT_FOUND) {
505 reiserfs_warning(dir->i_sb, "vs-7032", 550 reiserfs_warning(dir->i_sb, "vs-7032",
@@ -527,18 +572,19 @@ static int reiserfs_add_entry(struct reiserfs_transaction_handle *th,
527 dir->i_size += paste_size; 572 dir->i_size += paste_size;
528 dir->i_mtime = dir->i_ctime = CURRENT_TIME_SEC; 573 dir->i_mtime = dir->i_ctime = CURRENT_TIME_SEC;
529 if (!S_ISDIR(inode->i_mode) && visible) 574 if (!S_ISDIR(inode->i_mode) && visible)
530 // reiserfs_mkdir or reiserfs_rename will do that by itself 575 /* reiserfs_mkdir or reiserfs_rename will do that by itself */
531 reiserfs_update_sd(th, dir); 576 reiserfs_update_sd(th, dir);
532 577
533 reiserfs_check_path(&path); 578 reiserfs_check_path(&path);
534 return 0; 579 return 0;
535} 580}
536 581
537/* quota utility function, call if you've had to abort after calling 582/*
538** new_inode_init, and have not called reiserfs_new_inode yet. 583 * quota utility function, call if you've had to abort after calling
539** This should only be called on inodes that do not have stat data 584 * new_inode_init, and have not called reiserfs_new_inode yet.
540** inserted into the tree yet. 585 * This should only be called on inodes that do not have stat data
541*/ 586 * inserted into the tree yet.
587 */
542static int drop_new_inode(struct inode *inode) 588static int drop_new_inode(struct inode *inode)
543{ 589{
544 dquot_drop(inode); 590 dquot_drop(inode);
@@ -548,18 +594,23 @@ static int drop_new_inode(struct inode *inode)
548 return 0; 594 return 0;
549} 595}
550 596
551/* utility function that does setup for reiserfs_new_inode. 597/*
552** dquot_initialize needs lots of credits so it's better to have it 598 * utility function that does setup for reiserfs_new_inode.
553** outside of a transaction, so we had to pull some bits of 599 * dquot_initialize needs lots of credits so it's better to have it
554** reiserfs_new_inode out into this func. 600 * outside of a transaction, so we had to pull some bits of
555*/ 601 * reiserfs_new_inode out into this func.
602 */
556static int new_inode_init(struct inode *inode, struct inode *dir, umode_t mode) 603static int new_inode_init(struct inode *inode, struct inode *dir, umode_t mode)
557{ 604{
558 /* Make inode invalid - just in case we are going to drop it before 605 /*
559 * the initialization happens */ 606 * Make inode invalid - just in case we are going to drop it before
607 * the initialization happens
608 */
560 INODE_PKEY(inode)->k_objectid = 0; 609 INODE_PKEY(inode)->k_objectid = 0;
561 /* the quota init calls have to know who to charge the quota to, so 610
562 ** we have to set uid and gid here 611 /*
612 * the quota init calls have to know who to charge the quota to, so
613 * we have to set uid and gid here
563 */ 614 */
564 inode_init_owner(inode, dir, mode); 615 inode_init_owner(inode, dir, mode);
565 dquot_initialize(inode); 616 dquot_initialize(inode);
@@ -571,7 +622,10 @@ static int reiserfs_create(struct inode *dir, struct dentry *dentry, umode_t mod
571{ 622{
572 int retval; 623 int retval;
573 struct inode *inode; 624 struct inode *inode;
574 /* We need blocks for transaction + (user+group)*(quotas for new inode + update of quota for directory owner) */ 625 /*
626 * We need blocks for transaction + (user+group)*(quotas
627 * for new inode + update of quota for directory owner)
628 */
575 int jbegin_count = 629 int jbegin_count =
576 JOURNAL_PER_BALANCE_CNT * 2 + 630 JOURNAL_PER_BALANCE_CNT * 2 +
577 2 * (REISERFS_QUOTA_INIT_BLOCKS(dir->i_sb) + 631 2 * (REISERFS_QUOTA_INIT_BLOCKS(dir->i_sb) +
@@ -644,7 +698,10 @@ static int reiserfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode
644 struct inode *inode; 698 struct inode *inode;
645 struct reiserfs_transaction_handle th; 699 struct reiserfs_transaction_handle th;
646 struct reiserfs_security_handle security; 700 struct reiserfs_security_handle security;
647 /* We need blocks for transaction + (user+group)*(quotas for new inode + update of quota for directory owner) */ 701 /*
702 * We need blocks for transaction + (user+group)*(quotas
703 * for new inode + update of quota for directory owner)
704 */
648 int jbegin_count = 705 int jbegin_count =
649 JOURNAL_PER_BALANCE_CNT * 3 + 706 JOURNAL_PER_BALANCE_CNT * 3 +
650 2 * (REISERFS_QUOTA_INIT_BLOCKS(dir->i_sb) + 707 2 * (REISERFS_QUOTA_INIT_BLOCKS(dir->i_sb) +
@@ -685,7 +742,7 @@ static int reiserfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode
685 inode->i_op = &reiserfs_special_inode_operations; 742 inode->i_op = &reiserfs_special_inode_operations;
686 init_special_inode(inode, inode->i_mode, rdev); 743 init_special_inode(inode, inode->i_mode, rdev);
687 744
688 //FIXME: needed for block and char devices only 745 /* FIXME: needed for block and char devices only */
689 reiserfs_update_sd(&th, inode); 746 reiserfs_update_sd(&th, inode);
690 747
691 reiserfs_update_inode_transaction(inode); 748 reiserfs_update_inode_transaction(inode);
@@ -721,7 +778,10 @@ static int reiserfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode
721 struct inode *inode; 778 struct inode *inode;
722 struct reiserfs_transaction_handle th; 779 struct reiserfs_transaction_handle th;
723 struct reiserfs_security_handle security; 780 struct reiserfs_security_handle security;
724 /* We need blocks for transaction + (user+group)*(quotas for new inode + update of quota for directory owner) */ 781 /*
782 * We need blocks for transaction + (user+group)*(quotas
783 * for new inode + update of quota for directory owner)
784 */
725 int jbegin_count = 785 int jbegin_count =
726 JOURNAL_PER_BALANCE_CNT * 3 + 786 JOURNAL_PER_BALANCE_CNT * 3 +
727 2 * (REISERFS_QUOTA_INIT_BLOCKS(dir->i_sb) + 787 2 * (REISERFS_QUOTA_INIT_BLOCKS(dir->i_sb) +
@@ -730,7 +790,10 @@ static int reiserfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode
730 dquot_initialize(dir); 790 dquot_initialize(dir);
731 791
732#ifdef DISPLACE_NEW_PACKING_LOCALITIES 792#ifdef DISPLACE_NEW_PACKING_LOCALITIES
733 /* set flag that new packing locality created and new blocks for the content * of that directory are not displaced yet */ 793 /*
794 * set flag that new packing locality created and new blocks
795 * for the content of that directory are not displaced yet
796 */
734 REISERFS_I(dir)->new_packing_locality = 1; 797 REISERFS_I(dir)->new_packing_locality = 1;
735#endif 798#endif
736 mode = S_IFDIR | mode; 799 mode = S_IFDIR | mode;
@@ -754,8 +817,9 @@ static int reiserfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode
754 goto out_failed; 817 goto out_failed;
755 } 818 }
756 819
757 /* inc the link count now, so another writer doesn't overflow it while 820 /*
758 ** we sleep later on. 821 * inc the link count now, so another writer doesn't overflow
822 * it while we sleep later on.
759 */ 823 */
760 INC_DIR_INODE_NLINK(dir) 824 INC_DIR_INODE_NLINK(dir)
761 825
@@ -774,7 +838,7 @@ static int reiserfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode
774 inode->i_op = &reiserfs_dir_inode_operations; 838 inode->i_op = &reiserfs_dir_inode_operations;
775 inode->i_fop = &reiserfs_dir_operations; 839 inode->i_fop = &reiserfs_dir_operations;
776 840
777 // note, _this_ add_entry will not update dir's stat data 841 /* note, _this_ add_entry will not update dir's stat data */
778 retval = 842 retval =
779 reiserfs_add_entry(&th, dir, dentry->d_name.name, 843 reiserfs_add_entry(&th, dir, dentry->d_name.name,
780 dentry->d_name.len, inode, 1 /*visible */ ); 844 dentry->d_name.len, inode, 1 /*visible */ );
@@ -790,7 +854,7 @@ static int reiserfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode
790 iput(inode); 854 iput(inode);
791 goto out_failed; 855 goto out_failed;
792 } 856 }
793 // the above add_entry did not update dir's stat data 857 /* the above add_entry did not update dir's stat data */
794 reiserfs_update_sd(&th, dir); 858 reiserfs_update_sd(&th, dir);
795 859
796 unlock_new_inode(inode); 860 unlock_new_inode(inode);
@@ -803,10 +867,11 @@ out_failed:
803 867
804static inline int reiserfs_empty_dir(struct inode *inode) 868static inline int reiserfs_empty_dir(struct inode *inode)
805{ 869{
806 /* we can cheat because an old format dir cannot have 870 /*
807 ** EMPTY_DIR_SIZE, and a new format dir cannot have 871 * we can cheat because an old format dir cannot have
808 ** EMPTY_DIR_SIZE_V1. So, if the inode is either size, 872 * EMPTY_DIR_SIZE, and a new format dir cannot have
809 ** regardless of disk format version, the directory is empty. 873 * EMPTY_DIR_SIZE_V1. So, if the inode is either size,
874 * regardless of disk format version, the directory is empty.
810 */ 875 */
811 if (inode->i_size != EMPTY_DIR_SIZE && 876 if (inode->i_size != EMPTY_DIR_SIZE &&
812 inode->i_size != EMPTY_DIR_SIZE_V1) { 877 inode->i_size != EMPTY_DIR_SIZE_V1) {
@@ -824,10 +889,12 @@ static int reiserfs_rmdir(struct inode *dir, struct dentry *dentry)
824 INITIALIZE_PATH(path); 889 INITIALIZE_PATH(path);
825 struct reiserfs_dir_entry de; 890 struct reiserfs_dir_entry de;
826 891
827 /* we will be doing 2 balancings and update 2 stat data, we change quotas 892 /*
828 * of the owner of the directory and of the owner of the parent directory. 893 * we will be doing 2 balancings and update 2 stat data, we
829 * The quota structure is possibly deleted only on last iput => outside 894 * change quotas of the owner of the directory and of the owner
830 * of this transaction */ 895 * of the parent directory. The quota structure is possibly
896 * deleted only on last iput => outside of this transaction
897 */
831 jbegin_count = 898 jbegin_count =
832 JOURNAL_PER_BALANCE_CNT * 2 + 2 + 899 JOURNAL_PER_BALANCE_CNT * 2 + 2 +
833 4 * REISERFS_QUOTA_TRANS_BLOCKS(dir->i_sb); 900 4 * REISERFS_QUOTA_TRANS_BLOCKS(dir->i_sb);
@@ -856,8 +923,9 @@ static int reiserfs_rmdir(struct inode *dir, struct dentry *dentry)
856 reiserfs_update_inode_transaction(dir); 923 reiserfs_update_inode_transaction(dir);
857 924
858 if (de.de_objectid != inode->i_ino) { 925 if (de.de_objectid != inode->i_ino) {
859 // FIXME: compare key of an object and a key found in the 926 /*
860 // entry 927 * FIXME: compare key of an object and a key found in the entry
928 */
861 retval = -EIO; 929 retval = -EIO;
862 goto end_rmdir; 930 goto end_rmdir;
863 } 931 }
@@ -895,9 +963,11 @@ static int reiserfs_rmdir(struct inode *dir, struct dentry *dentry)
895 return retval; 963 return retval;
896 964
897 end_rmdir: 965 end_rmdir:
898 /* we must release path, because we did not call 966 /*
899 reiserfs_cut_from_item, or reiserfs_cut_from_item does not 967 * we must release path, because we did not call
900 release path if operation was not complete */ 968 * reiserfs_cut_from_item, or reiserfs_cut_from_item does not
969 * release path if operation was not complete
970 */
901 pathrelse(&path); 971 pathrelse(&path);
902 err = journal_end(&th, dir->i_sb, jbegin_count); 972 err = journal_end(&th, dir->i_sb, jbegin_count);
903 reiserfs_write_unlock(dir->i_sb); 973 reiserfs_write_unlock(dir->i_sb);
@@ -918,10 +988,13 @@ static int reiserfs_unlink(struct inode *dir, struct dentry *dentry)
918 988
919 inode = dentry->d_inode; 989 inode = dentry->d_inode;
920 990
921 /* in this transaction we can be doing at max two balancings and update 991 /*
922 * two stat datas, we change quotas of the owner of the directory and of 992 * in this transaction we can be doing at max two balancings and
923 * the owner of the parent directory. The quota structure is possibly 993 * update two stat datas, we change quotas of the owner of the
924 * deleted only on iput => outside of this transaction */ 994 * directory and of the owner of the parent directory. The quota
995 * structure is possibly deleted only on iput => outside of
996 * this transaction
997 */
925 jbegin_count = 998 jbegin_count =
926 JOURNAL_PER_BALANCE_CNT * 2 + 2 + 999 JOURNAL_PER_BALANCE_CNT * 2 + 2 +
927 4 * REISERFS_QUOTA_TRANS_BLOCKS(dir->i_sb); 1000 4 * REISERFS_QUOTA_TRANS_BLOCKS(dir->i_sb);
@@ -946,8 +1019,9 @@ static int reiserfs_unlink(struct inode *dir, struct dentry *dentry)
946 reiserfs_update_inode_transaction(dir); 1019 reiserfs_update_inode_transaction(dir);
947 1020
948 if (de.de_objectid != inode->i_ino) { 1021 if (de.de_objectid != inode->i_ino) {
949 // FIXME: compare key of an object and a key found in the 1022 /*
950 // entry 1023 * FIXME: compare key of an object and a key found in the entry
1024 */
951 retval = -EIO; 1025 retval = -EIO;
952 goto end_unlink; 1026 goto end_unlink;
953 } 1027 }
@@ -1011,7 +1085,10 @@ static int reiserfs_symlink(struct inode *parent_dir,
1011 struct reiserfs_transaction_handle th; 1085 struct reiserfs_transaction_handle th;
1012 struct reiserfs_security_handle security; 1086 struct reiserfs_security_handle security;
1013 int mode = S_IFLNK | S_IRWXUGO; 1087 int mode = S_IFLNK | S_IRWXUGO;
1014 /* We need blocks for transaction + (user+group)*(quotas for new inode + update of quota for directory owner) */ 1088 /*
1089 * We need blocks for transaction + (user+group)*(quotas for
1090 * new inode + update of quota for directory owner)
1091 */
1015 int jbegin_count = 1092 int jbegin_count =
1016 JOURNAL_PER_BALANCE_CNT * 3 + 1093 JOURNAL_PER_BALANCE_CNT * 3 +
1017 2 * (REISERFS_QUOTA_INIT_BLOCKS(parent_dir->i_sb) + 1094 2 * (REISERFS_QUOTA_INIT_BLOCKS(parent_dir->i_sb) +
@@ -1070,10 +1147,6 @@ static int reiserfs_symlink(struct inode *parent_dir,
1070 inode->i_op = &reiserfs_symlink_inode_operations; 1147 inode->i_op = &reiserfs_symlink_inode_operations;
1071 inode->i_mapping->a_ops = &reiserfs_address_space_operations; 1148 inode->i_mapping->a_ops = &reiserfs_address_space_operations;
1072 1149
1073 // must be sure this inode is written with this transaction
1074 //
1075 //reiserfs_update_sd (&th, inode, READ_BLOCKS);
1076
1077 retval = reiserfs_add_entry(&th, parent_dir, dentry->d_name.name, 1150 retval = reiserfs_add_entry(&th, parent_dir, dentry->d_name.name,
1078 dentry->d_name.len, inode, 1 /*visible */ ); 1151 dentry->d_name.len, inode, 1 /*visible */ );
1079 if (retval) { 1152 if (retval) {
@@ -1102,7 +1175,10 @@ static int reiserfs_link(struct dentry *old_dentry, struct inode *dir,
1102 int retval; 1175 int retval;
1103 struct inode *inode = old_dentry->d_inode; 1176 struct inode *inode = old_dentry->d_inode;
1104 struct reiserfs_transaction_handle th; 1177 struct reiserfs_transaction_handle th;
1105 /* We need blocks for transaction + update of quotas for the owners of the directory */ 1178 /*
1179 * We need blocks for transaction + update of quotas for
1180 * the owners of the directory
1181 */
1106 int jbegin_count = 1182 int jbegin_count =
1107 JOURNAL_PER_BALANCE_CNT * 3 + 1183 JOURNAL_PER_BALANCE_CNT * 3 +
1108 2 * REISERFS_QUOTA_TRANS_BLOCKS(dir->i_sb); 1184 2 * REISERFS_QUOTA_TRANS_BLOCKS(dir->i_sb);
@@ -1111,7 +1187,7 @@ static int reiserfs_link(struct dentry *old_dentry, struct inode *dir,
1111 1187
1112 reiserfs_write_lock(dir->i_sb); 1188 reiserfs_write_lock(dir->i_sb);
1113 if (inode->i_nlink >= REISERFS_LINK_MAX) { 1189 if (inode->i_nlink >= REISERFS_LINK_MAX) {
1114 //FIXME: sd_nlink is 32 bit for new files 1190 /* FIXME: sd_nlink is 32 bit for new files */
1115 reiserfs_write_unlock(dir->i_sb); 1191 reiserfs_write_unlock(dir->i_sb);
1116 return -EMLINK; 1192 return -EMLINK;
1117 } 1193 }
@@ -1158,9 +1234,9 @@ static int de_still_valid(const char *name, int len,
1158{ 1234{
1159 struct reiserfs_dir_entry tmp = *de; 1235 struct reiserfs_dir_entry tmp = *de;
1160 1236
1161 // recalculate pointer to name and name length 1237 /* recalculate pointer to name and name length */
1162 set_de_name_and_namelen(&tmp); 1238 set_de_name_and_namelen(&tmp);
1163 // FIXME: could check more 1239 /* FIXME: could check more */
1164 if (tmp.de_namelen != len || memcmp(name, de->de_name, len)) 1240 if (tmp.de_namelen != len || memcmp(name, de->de_name, len))
1165 return 0; 1241 return 0;
1166 return 1; 1242 return 1;
@@ -1217,14 +1293,16 @@ static int reiserfs_rename(struct inode *old_dir, struct dentry *old_dentry,
1217 unsigned long savelink = 1; 1293 unsigned long savelink = 1;
1218 struct timespec ctime; 1294 struct timespec ctime;
1219 1295
1220 /* three balancings: (1) old name removal, (2) new name insertion 1296 /*
1221 and (3) maybe "save" link insertion 1297 * three balancings: (1) old name removal, (2) new name insertion
1222 stat data updates: (1) old directory, 1298 * and (3) maybe "save" link insertion
1223 (2) new directory and (3) maybe old object stat data (when it is 1299 * stat data updates: (1) old directory,
1224 directory) and (4) maybe stat data of object to which new entry 1300 * (2) new directory and (3) maybe old object stat data (when it is
1225 pointed initially and (5) maybe block containing ".." of 1301 * directory) and (4) maybe stat data of object to which new entry
1226 renamed directory 1302 * pointed initially and (5) maybe block containing ".." of
1227 quota updates: two parent directories */ 1303 * renamed directory
1304 * quota updates: two parent directories
1305 */
1228 jbegin_count = 1306 jbegin_count =
1229 JOURNAL_PER_BALANCE_CNT * 3 + 5 + 1307 JOURNAL_PER_BALANCE_CNT * 3 + 5 +
1230 4 * REISERFS_QUOTA_TRANS_BLOCKS(old_dir->i_sb); 1308 4 * REISERFS_QUOTA_TRANS_BLOCKS(old_dir->i_sb);
@@ -1235,8 +1313,10 @@ static int reiserfs_rename(struct inode *old_dir, struct dentry *old_dentry,
1235 old_inode = old_dentry->d_inode; 1313 old_inode = old_dentry->d_inode;
1236 new_dentry_inode = new_dentry->d_inode; 1314 new_dentry_inode = new_dentry->d_inode;
1237 1315
1238 // make sure, that oldname still exists and points to an object we 1316 /*
1239 // are going to rename 1317 * make sure that oldname still exists and points to an object we
1318 * are going to rename
1319 */
1240 old_de.de_gen_number_bit_string = NULL; 1320 old_de.de_gen_number_bit_string = NULL;
1241 reiserfs_write_lock(old_dir->i_sb); 1321 reiserfs_write_lock(old_dir->i_sb);
1242 retval = 1322 retval =
@@ -1256,10 +1336,11 @@ static int reiserfs_rename(struct inode *old_dir, struct dentry *old_dentry,
1256 1336
1257 old_inode_mode = old_inode->i_mode; 1337 old_inode_mode = old_inode->i_mode;
1258 if (S_ISDIR(old_inode_mode)) { 1338 if (S_ISDIR(old_inode_mode)) {
1259 // make sure, that directory being renamed has correct ".." 1339 /*
1260 // and that its new parent directory has not too many links 1340 * make sure that directory being renamed has correct ".."
1261 // already 1341 * and that its new parent directory has not too many links
1262 1342 * already
1343 */
1263 if (new_dentry_inode) { 1344 if (new_dentry_inode) {
1264 if (!reiserfs_empty_dir(new_dentry_inode)) { 1345 if (!reiserfs_empty_dir(new_dentry_inode)) {
1265 reiserfs_write_unlock(old_dir->i_sb); 1346 reiserfs_write_unlock(old_dir->i_sb);
@@ -1267,8 +1348,9 @@ static int reiserfs_rename(struct inode *old_dir, struct dentry *old_dentry,
1267 } 1348 }
1268 } 1349 }
1269 1350
1270 /* directory is renamed, its parent directory will be changed, 1351 /*
1271 ** so find ".." entry 1352 * directory is renamed, its parent directory will be changed,
1353 * so find ".." entry
1272 */ 1354 */
1273 dot_dot_de.de_gen_number_bit_string = NULL; 1355 dot_dot_de.de_gen_number_bit_string = NULL;
1274 retval = 1356 retval =
@@ -1311,8 +1393,9 @@ static int reiserfs_rename(struct inode *old_dir, struct dentry *old_dentry,
1311 reiserfs_update_inode_transaction(old_dir); 1393 reiserfs_update_inode_transaction(old_dir);
1312 reiserfs_update_inode_transaction(new_dir); 1394 reiserfs_update_inode_transaction(new_dir);
1313 1395
1314 /* this makes it so an fsync on an open fd for the old name will 1396 /*
1315 ** commit the rename operation 1397 * this makes it so an fsync on an open fd for the old name will
1398 * commit the rename operation
1316 */ 1399 */
1317 reiserfs_update_inode_transaction(old_inode); 1400 reiserfs_update_inode_transaction(old_inode);
1318 1401
@@ -1320,7 +1403,10 @@ static int reiserfs_rename(struct inode *old_dir, struct dentry *old_dentry,
1320 reiserfs_update_inode_transaction(new_dentry_inode); 1403 reiserfs_update_inode_transaction(new_dentry_inode);
1321 1404
1322 while (1) { 1405 while (1) {
1323 // look for old name using corresponding entry key (found by reiserfs_find_entry) 1406 /*
1407 * look for old name using corresponding entry key
1408 * (found by reiserfs_find_entry)
1409 */
1324 if ((retval = 1410 if ((retval =
1325 search_by_entry_key(new_dir->i_sb, &old_de.de_entry_key, 1411 search_by_entry_key(new_dir->i_sb, &old_de.de_entry_key,
1326 &old_entry_path, 1412 &old_entry_path,
@@ -1335,14 +1421,18 @@ static int reiserfs_rename(struct inode *old_dir, struct dentry *old_dentry,
1335 1421
1336 reiserfs_prepare_for_journal(old_inode->i_sb, old_de.de_bh, 1); 1422 reiserfs_prepare_for_journal(old_inode->i_sb, old_de.de_bh, 1);
1337 1423
1338 // look for new name by reiserfs_find_entry 1424 /* look for new name by reiserfs_find_entry */
1339 new_de.de_gen_number_bit_string = NULL; 1425 new_de.de_gen_number_bit_string = NULL;
1340 retval = 1426 retval =
1341 reiserfs_find_entry(new_dir, new_dentry->d_name.name, 1427 reiserfs_find_entry(new_dir, new_dentry->d_name.name,
1342 new_dentry->d_name.len, &new_entry_path, 1428 new_dentry->d_name.len, &new_entry_path,
1343 &new_de); 1429 &new_de);
1344 // reiserfs_add_entry should not return IO_ERROR, because it is called with essentially same parameters from 1430 /*
1345 // reiserfs_add_entry above, and we'll catch any i/o errors before we get here. 1431 * reiserfs_add_entry should not return IO_ERROR,
1432 * because it is called with essentially same parameters from
1433 * reiserfs_add_entry above, and we'll catch any i/o errors
1434 * before we get here.
1435 */
1346 if (retval != NAME_FOUND_INVISIBLE && retval != NAME_FOUND) { 1436 if (retval != NAME_FOUND_INVISIBLE && retval != NAME_FOUND) {
1347 pathrelse(&new_entry_path); 1437 pathrelse(&new_entry_path);
1348 pathrelse(&old_entry_path); 1438 pathrelse(&old_entry_path);
@@ -1370,22 +1460,26 @@ static int reiserfs_rename(struct inode *old_dir, struct dentry *old_dentry,
1370 } 1460 }
1371 copy_item_head(&dot_dot_ih, 1461 copy_item_head(&dot_dot_ih,
1372 tp_item_head(&dot_dot_entry_path)); 1462 tp_item_head(&dot_dot_entry_path));
1373 // node containing ".." gets into transaction 1463 /* node containing ".." gets into transaction */
1374 reiserfs_prepare_for_journal(old_inode->i_sb, 1464 reiserfs_prepare_for_journal(old_inode->i_sb,
1375 dot_dot_de.de_bh, 1); 1465 dot_dot_de.de_bh, 1);
1376 } 1466 }
1377 /* we should check seals here, not do 1467 /*
1378 this stuff, yes? Then, having 1468 * we should check seals here, not do
1379 gathered everything into RAM we 1469 * this stuff, yes? Then, having
1380 should lock the buffers, yes? -Hans */ 1470 * gathered everything into RAM we
1381 /* probably. our rename needs to hold more 1471 * should lock the buffers, yes? -Hans
1382 ** than one path at once. The seals would 1472 */
1383 ** have to be written to deal with multi-path 1473 /*
1384 ** issues -chris 1474 * probably. our rename needs to hold more
1475 * than one path at once. The seals would
1476 * have to be written to deal with multi-path
1477 * issues -chris
1385 */ 1478 */
1386 /* sanity checking before doing the rename - avoid races many 1479 /*
1387 ** of the above checks could have scheduled. We have to be 1480 * sanity checking before doing the rename - avoid races many
1388 ** sure our items haven't been shifted by another process. 1481 * of the above checks could have scheduled. We have to be
1482 * sure our items haven't been shifted by another process.
1389 */ 1483 */
1390 if (item_moved(&new_entry_ih, &new_entry_path) || 1484 if (item_moved(&new_entry_ih, &new_entry_path) ||
1391 !entry_points_to_object(new_dentry->d_name.name, 1485 !entry_points_to_object(new_dentry->d_name.name,
@@ -1430,8 +1524,10 @@ static int reiserfs_rename(struct inode *old_dir, struct dentry *old_dentry,
1430 break; 1524 break;
1431 } 1525 }
1432 1526
1433 /* ok, all the changes can be done in one fell swoop when we 1527 /*
1434 have claimed all the buffers needed. */ 1528 * ok, all the changes can be done in one fell swoop when we
1529 * have claimed all the buffers needed.
1530 */
1435 1531
1436 mark_de_visible(new_de.de_deh + new_de.de_entry_num); 1532 mark_de_visible(new_de.de_deh + new_de.de_entry_num);
1437 set_ino_in_dir_entry(&new_de, INODE_PKEY(old_inode)); 1533 set_ino_in_dir_entry(&new_de, INODE_PKEY(old_inode));
@@ -1442,12 +1538,14 @@ static int reiserfs_rename(struct inode *old_dir, struct dentry *old_dentry,
1442 ctime = CURRENT_TIME_SEC; 1538 ctime = CURRENT_TIME_SEC;
1443 old_dir->i_ctime = old_dir->i_mtime = ctime; 1539 old_dir->i_ctime = old_dir->i_mtime = ctime;
1444 new_dir->i_ctime = new_dir->i_mtime = ctime; 1540 new_dir->i_ctime = new_dir->i_mtime = ctime;
1445 /* thanks to Alex Adriaanse <alex_a@caltech.edu> for patch which adds ctime update of 1541 /*
1446 renamed object */ 1542 * thanks to Alex Adriaanse <alex_a@caltech.edu> for patch
1543 * which adds ctime update of renamed object
1544 */
1447 old_inode->i_ctime = ctime; 1545 old_inode->i_ctime = ctime;
1448 1546
1449 if (new_dentry_inode) { 1547 if (new_dentry_inode) {
1450 // adjust link number of the victim 1548 /* adjust link number of the victim */
1451 if (S_ISDIR(new_dentry_inode->i_mode)) { 1549 if (S_ISDIR(new_dentry_inode->i_mode)) {
1452 clear_nlink(new_dentry_inode); 1550 clear_nlink(new_dentry_inode);
1453 } else { 1551 } else {
@@ -1462,21 +1560,28 @@ static int reiserfs_rename(struct inode *old_dir, struct dentry *old_dentry,
1462 set_ino_in_dir_entry(&dot_dot_de, INODE_PKEY(new_dir)); 1560 set_ino_in_dir_entry(&dot_dot_de, INODE_PKEY(new_dir));
1463 journal_mark_dirty(&th, new_dir->i_sb, dot_dot_de.de_bh); 1561 journal_mark_dirty(&th, new_dir->i_sb, dot_dot_de.de_bh);
1464 1562
1563 /*
1564 * there (in new_dir) was no directory, so it got new link
1565 * (".." of renamed directory)
1566 */
1465 if (!new_dentry_inode) 1567 if (!new_dentry_inode)
1466 /* there (in new_dir) was no directory, so it got new link
1467 (".." of renamed directory) */
1468 INC_DIR_INODE_NLINK(new_dir); 1568 INC_DIR_INODE_NLINK(new_dir);
1469 1569
1470 /* old directory lost one link - ".. " of renamed directory */ 1570 /* old directory lost one link - ".. " of renamed directory */
1471 DEC_DIR_INODE_NLINK(old_dir); 1571 DEC_DIR_INODE_NLINK(old_dir);
1472 } 1572 }
1473 // looks like in 2.3.99pre3 brelse is atomic. so we can use pathrelse 1573 /*
1574 * looks like in 2.3.99pre3 brelse is atomic.
1575 * so we can use pathrelse
1576 */
1474 pathrelse(&new_entry_path); 1577 pathrelse(&new_entry_path);
1475 pathrelse(&dot_dot_entry_path); 1578 pathrelse(&dot_dot_entry_path);
1476 1579
1477 // FIXME: this reiserfs_cut_from_item's return value may screw up 1580 /*
1478 // anybody, but it will panic if will not be able to find the 1581 * FIXME: this reiserfs_cut_from_item's return value may screw up
1479 // entry. This needs one more clean up 1582 * anybody, but it will panic if will not be able to find the
1583 * entry. This needs one more clean up
1584 */
1480 if (reiserfs_cut_from_item 1585 if (reiserfs_cut_from_item
1481 (&th, &old_entry_path, &(old_de.de_entry_key), old_dir, NULL, 1586 (&th, &old_entry_path, &(old_de.de_entry_key), old_dir, NULL,
1482 0) < 0) 1587 0) < 0)
@@ -1501,11 +1606,8 @@ static int reiserfs_rename(struct inode *old_dir, struct dentry *old_dentry,
1501 return retval; 1606 return retval;
1502} 1607}
1503 1608
1504/* 1609/* directories can handle most operations... */
1505 * directories can handle most operations...
1506 */
1507const struct inode_operations reiserfs_dir_inode_operations = { 1610const struct inode_operations reiserfs_dir_inode_operations = {
1508 //&reiserfs_dir_operations, /* default_file_ops */
1509 .create = reiserfs_create, 1611 .create = reiserfs_create,
1510 .lookup = reiserfs_lookup, 1612 .lookup = reiserfs_lookup,
1511 .link = reiserfs_link, 1613 .link = reiserfs_link,
diff --git a/fs/reiserfs/objectid.c b/fs/reiserfs/objectid.c
index f732d6a5251d..99f66f885785 100644
--- a/fs/reiserfs/objectid.c
+++ b/fs/reiserfs/objectid.c
@@ -7,7 +7,7 @@
7#include <linux/time.h> 7#include <linux/time.h>
8#include "reiserfs.h" 8#include "reiserfs.h"
9 9
10// find where objectid map starts 10/* find where objectid map starts */
11#define objectid_map(s,rs) (old_format_only (s) ? \ 11#define objectid_map(s,rs) (old_format_only (s) ? \
12 (__le32 *)((struct reiserfs_super_block_v1 *)(rs) + 1) :\ 12 (__le32 *)((struct reiserfs_super_block_v1 *)(rs) + 1) :\
13 (__le32 *)((rs) + 1)) 13 (__le32 *)((rs) + 1))
@@ -20,7 +20,7 @@ static void check_objectid_map(struct super_block *s, __le32 * map)
20 reiserfs_panic(s, "vs-15010", "map corrupted: %lx", 20 reiserfs_panic(s, "vs-15010", "map corrupted: %lx",
21 (long unsigned int)le32_to_cpu(map[0])); 21 (long unsigned int)le32_to_cpu(map[0]));
22 22
23 // FIXME: add something else here 23 /* FIXME: add something else here */
24} 24}
25 25
26#else 26#else
@@ -29,19 +29,21 @@ static void check_objectid_map(struct super_block *s, __le32 * map)
29} 29}
30#endif 30#endif
31 31
32/* When we allocate objectids we allocate the first unused objectid. 32/*
33 Each sequence of objectids in use (the odd sequences) is followed 33 * When we allocate objectids we allocate the first unused objectid.
34 by a sequence of objectids not in use (the even sequences). We 34 * Each sequence of objectids in use (the odd sequences) is followed
35 only need to record the last objectid in each of these sequences 35 * by a sequence of objectids not in use (the even sequences). We
36 (both the odd and even sequences) in order to fully define the 36 * only need to record the last objectid in each of these sequences
37 boundaries of the sequences. A consequence of allocating the first 37 * (both the odd and even sequences) in order to fully define the
38 objectid not in use is that under most conditions this scheme is 38 * boundaries of the sequences. A consequence of allocating the first
39 extremely compact. The exception is immediately after a sequence 39 * objectid not in use is that under most conditions this scheme is
40 of operations which deletes a large number of objects of 40 * extremely compact. The exception is immediately after a sequence
41 non-sequential objectids, and even then it will become compact 41 * of operations which deletes a large number of objects of
42 again as soon as more objects are created. Note that many 42 * non-sequential objectids, and even then it will become compact
43 interesting optimizations of layout could result from complicating 43 * again as soon as more objects are created. Note that many
44 objectid assignment, but we have deferred making them for now. */ 44 * interesting optimizations of layout could result from complicating
45 * objectid assignment, but we have deferred making them for now.
46 */
45 47
46/* get unique object identifier */ 48/* get unique object identifier */
47__u32 reiserfs_get_unused_objectid(struct reiserfs_transaction_handle *th) 49__u32 reiserfs_get_unused_objectid(struct reiserfs_transaction_handle *th)
@@ -64,19 +66,23 @@ __u32 reiserfs_get_unused_objectid(struct reiserfs_transaction_handle *th)
64 return 0; 66 return 0;
65 } 67 }
66 68
67 /* This incrementation allocates the first unused objectid. That 69 /*
68 is to say, the first entry on the objectid map is the first 70 * This incrementation allocates the first unused objectid. That
69 unused objectid, and by incrementing it we use it. See below 71 * is to say, the first entry on the objectid map is the first
70 where we check to see if we eliminated a sequence of unused 72 * unused objectid, and by incrementing it we use it. See below
71 objectids.... */ 73 * where we check to see if we eliminated a sequence of unused
74 * objectids....
75 */
72 map[1] = cpu_to_le32(unused_objectid + 1); 76 map[1] = cpu_to_le32(unused_objectid + 1);
73 77
74 /* Now we check to see if we eliminated the last remaining member of 78 /*
75 the first even sequence (and can eliminate the sequence by 79 * Now we check to see if we eliminated the last remaining member of
76 eliminating its last objectid from oids), and can collapse the 80 * the first even sequence (and can eliminate the sequence by
77 first two odd sequences into one sequence. If so, then the net 81 * eliminating its last objectid from oids), and can collapse the
78 result is to eliminate a pair of objectids from oids. We do this 82 * first two odd sequences into one sequence. If so, then the net
79 by shifting the entire map to the left. */ 83 * result is to eliminate a pair of objectids from oids. We do this
84 * by shifting the entire map to the left.
85 */
80 if (sb_oid_cursize(rs) > 2 && map[1] == map[2]) { 86 if (sb_oid_cursize(rs) > 2 && map[1] == map[2]) {
81 memmove(map + 1, map + 3, 87 memmove(map + 1, map + 3,
82 (sb_oid_cursize(rs) - 3) * sizeof(__u32)); 88 (sb_oid_cursize(rs) - 3) * sizeof(__u32));
@@ -97,30 +103,33 @@ void reiserfs_release_objectid(struct reiserfs_transaction_handle *th,
97 int i = 0; 103 int i = 0;
98 104
99 BUG_ON(!th->t_trans_id); 105 BUG_ON(!th->t_trans_id);
100 //return; 106 /*return; */
101 check_objectid_map(s, map); 107 check_objectid_map(s, map);
102 108
103 reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s), 1); 109 reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s), 1);
104 journal_mark_dirty(th, s, SB_BUFFER_WITH_SB(s)); 110 journal_mark_dirty(th, s, SB_BUFFER_WITH_SB(s));
105 111
106 /* start at the beginning of the objectid map (i = 0) and go to 112 /*
107 the end of it (i = disk_sb->s_oid_cursize). Linear search is 113 * start at the beginning of the objectid map (i = 0) and go to
108 what we use, though it is possible that binary search would be 114 * the end of it (i = disk_sb->s_oid_cursize). Linear search is
109 more efficient after performing lots of deletions (which is 115 * what we use, though it is possible that binary search would be
110 when oids is large.) We only check even i's. */ 116 * more efficient after performing lots of deletions (which is
117 * when oids is large.) We only check even i's.
118 */
111 while (i < sb_oid_cursize(rs)) { 119 while (i < sb_oid_cursize(rs)) {
112 if (objectid_to_release == le32_to_cpu(map[i])) { 120 if (objectid_to_release == le32_to_cpu(map[i])) {
113 /* This incrementation unallocates the objectid. */ 121 /* This incrementation unallocates the objectid. */
114 //map[i]++;
115 le32_add_cpu(&map[i], 1); 122 le32_add_cpu(&map[i], 1);
116 123
117 /* Did we unallocate the last member of an odd sequence, and can shrink oids? */ 124 /*
125 * Did we unallocate the last member of an
126 * odd sequence, and can shrink oids?
127 */
118 if (map[i] == map[i + 1]) { 128 if (map[i] == map[i + 1]) {
119 /* shrink objectid map */ 129 /* shrink objectid map */
120 memmove(map + i, map + i + 2, 130 memmove(map + i, map + i + 2,
121 (sb_oid_cursize(rs) - i - 131 (sb_oid_cursize(rs) - i -
122 2) * sizeof(__u32)); 132 2) * sizeof(__u32));
123 //disk_sb->s_oid_cursize -= 2;
124 set_sb_oid_cursize(rs, sb_oid_cursize(rs) - 2); 133 set_sb_oid_cursize(rs, sb_oid_cursize(rs) - 2);
125 134
126 RFALSE(sb_oid_cursize(rs) < 2 || 135 RFALSE(sb_oid_cursize(rs) < 2 ||
@@ -135,14 +144,19 @@ void reiserfs_release_objectid(struct reiserfs_transaction_handle *th,
135 objectid_to_release < le32_to_cpu(map[i + 1])) { 144 objectid_to_release < le32_to_cpu(map[i + 1])) {
136 /* size of objectid map is not changed */ 145 /* size of objectid map is not changed */
137 if (objectid_to_release + 1 == le32_to_cpu(map[i + 1])) { 146 if (objectid_to_release + 1 == le32_to_cpu(map[i + 1])) {
138 //objectid_map[i+1]--;
139 le32_add_cpu(&map[i + 1], -1); 147 le32_add_cpu(&map[i + 1], -1);
140 return; 148 return;
141 } 149 }
142 150
143 /* JDM comparing two little-endian values for equality -- safe */ 151 /*
152 * JDM comparing two little-endian values for
153 * equality -- safe
154 */
155 /*
156 * objectid map must be expanded, but
157 * there is no space
158 */
144 if (sb_oid_cursize(rs) == sb_oid_maxsize(rs)) { 159 if (sb_oid_cursize(rs) == sb_oid_maxsize(rs)) {
145 /* objectid map must be expanded, but there is no space */
146 PROC_INFO_INC(s, leaked_oid); 160 PROC_INFO_INC(s, leaked_oid);
147 return; 161 return;
148 } 162 }
@@ -178,8 +192,9 @@ int reiserfs_convert_objectid_map_v1(struct super_block *s)
178 new_objectid_map = (__le32 *) (disk_sb + 1); 192 new_objectid_map = (__le32 *) (disk_sb + 1);
179 193
180 if (cur_size > new_size) { 194 if (cur_size > new_size) {
181 /* mark everyone used that was listed as free at the end of the objectid 195 /*
182 ** map 196 * mark everyone used that was listed as free at
197 * the end of the objectid map
183 */ 198 */
184 objectid_map[new_size - 1] = objectid_map[cur_size - 1]; 199 objectid_map[new_size - 1] = objectid_map[cur_size - 1];
185 set_sb_oid_cursize(disk_sb, new_size); 200 set_sb_oid_cursize(disk_sb, new_size);
diff --git a/fs/reiserfs/prints.c b/fs/reiserfs/prints.c
index 41f788148d44..c7425fdf19f9 100644
--- a/fs/reiserfs/prints.c
+++ b/fs/reiserfs/prints.c
@@ -172,18 +172,19 @@ static char *is_there_reiserfs_struct(char *fmt, int *what)
172 return k; 172 return k;
173} 173}
174 174
175/* debugging reiserfs we used to print out a lot of different 175/*
176 variables, like keys, item headers, buffer heads etc. Values of 176 * debugging reiserfs we used to print out a lot of different
177 most fields matter. So it took a long time just to write 177 * variables, like keys, item headers, buffer heads etc. Values of
178 appropriative printk. With this reiserfs_warning you can use format 178 * most fields matter. So it took a long time just to write
179 specification for complex structures like you used to do with 179 * appropriative printk. With this reiserfs_warning you can use format
180 printfs for integers, doubles and pointers. For instance, to print 180 * specification for complex structures like you used to do with
181 out key structure you have to write just: 181 * printfs for integers, doubles and pointers. For instance, to print
182 reiserfs_warning ("bad key %k", key); 182 * out key structure you have to write just:
183 instead of 183 * reiserfs_warning ("bad key %k", key);
184 printk ("bad key %lu %lu %lu %lu", key->k_dir_id, key->k_objectid, 184 * instead of
185 key->k_offset, key->k_uniqueness); 185 * printk ("bad key %lu %lu %lu %lu", key->k_dir_id, key->k_objectid,
186*/ 186 * key->k_offset, key->k_uniqueness);
187 */
187static DEFINE_SPINLOCK(error_lock); 188static DEFINE_SPINLOCK(error_lock);
188static void prepare_error_buf(const char *fmt, va_list args) 189static void prepare_error_buf(const char *fmt, va_list args)
189{ 190{
@@ -243,15 +244,16 @@ static void prepare_error_buf(const char *fmt, va_list args)
243 244
244} 245}
245 246
246/* in addition to usual conversion specifiers this accepts reiserfs 247/*
247 specific conversion specifiers: 248 * in addition to usual conversion specifiers this accepts reiserfs
248 %k to print little endian key, 249 * specific conversion specifiers:
249 %K to print cpu key, 250 * %k to print little endian key,
250 %h to print item_head, 251 * %K to print cpu key,
251 %t to print directory entry 252 * %h to print item_head,
252 %z to print block head (arg must be struct buffer_head * 253 * %t to print directory entry
253 %b to print buffer_head 254 * %z to print block head (arg must be struct buffer_head *
254*/ 255 * %b to print buffer_head
256 */
255 257
256#define do_reiserfs_warning(fmt)\ 258#define do_reiserfs_warning(fmt)\
257{\ 259{\
@@ -304,50 +306,52 @@ void reiserfs_debug(struct super_block *s, int level, const char *fmt, ...)
304#endif 306#endif
305} 307}
306 308
307/* The format: 309/*
308 310 * The format:
309 maintainer-errorid: [function-name:] message 311 *
310 312 * maintainer-errorid: [function-name:] message
311 where errorid is unique to the maintainer and function-name is 313 *
312 optional, is recommended, so that anyone can easily find the bug 314 * where errorid is unique to the maintainer and function-name is
313 with a simple grep for the short to type string 315 * optional, is recommended, so that anyone can easily find the bug
314 maintainer-errorid. Don't bother with reusing errorids, there are 316 * with a simple grep for the short to type string
315 lots of numbers out there. 317 * maintainer-errorid. Don't bother with reusing errorids, there are
316 318 * lots of numbers out there.
317 Example: 319 *
318 320 * Example:
319 reiserfs_panic( 321 *
320 p_sb, "reiser-29: reiserfs_new_blocknrs: " 322 * reiserfs_panic(
321 "one of search_start or rn(%d) is equal to MAX_B_NUM," 323 * p_sb, "reiser-29: reiserfs_new_blocknrs: "
322 "which means that we are optimizing location based on the bogus location of a temp buffer (%p).", 324 * "one of search_start or rn(%d) is equal to MAX_B_NUM,"
323 rn, bh 325 * "which means that we are optimizing location based on the "
324 ); 326 * "bogus location of a temp buffer (%p).",
325 327 * rn, bh
326 Regular panic()s sometimes clear the screen before the message can 328 * );
327 be read, thus the need for the while loop. 329 *
328 330 * Regular panic()s sometimes clear the screen before the message can
329 Numbering scheme for panic used by Vladimir and Anatoly( Hans completely ignores this scheme, and considers it 331 * be read, thus the need for the while loop.
330 pointless complexity): 332 *
331 333 * Numbering scheme for panic used by Vladimir and Anatoly( Hans completely
332 panics in reiserfs.h have numbers from 1000 to 1999 334 * ignores this scheme, and considers it pointless complexity):
333 super.c 2000 to 2999 335 *
334 preserve.c (unused) 3000 to 3999 336 * panics in reiserfs_fs.h have numbers from 1000 to 1999
335 bitmap.c 4000 to 4999 337 * super.c 2000 to 2999
336 stree.c 5000 to 5999 338 * preserve.c (unused) 3000 to 3999
337 prints.c 6000 to 6999 339 * bitmap.c 4000 to 4999
338 namei.c 7000 to 7999 340 * stree.c 5000 to 5999
339 fix_nodes.c 8000 to 8999 341 * prints.c 6000 to 6999
340 dir.c 9000 to 9999 342 * namei.c 7000 to 7999
341 lbalance.c 10000 to 10999 343 * fix_nodes.c 8000 to 8999
342 ibalance.c 11000 to 11999 not ready 344 * dir.c 9000 to 9999
343 do_balan.c 12000 to 12999 345 * lbalance.c 10000 to 10999
344 inode.c 13000 to 13999 346 * ibalance.c 11000 to 11999 not ready
345 file.c 14000 to 14999 347 * do_balan.c 12000 to 12999
346 objectid.c 15000 - 15999 348 * inode.c 13000 to 13999
347 buffer.c 16000 - 16999 349 * file.c 14000 to 14999
348 symlink.c 17000 - 17999 350 * objectid.c 15000 - 15999
349 351 * buffer.c 16000 - 16999
350 . */ 352 * symlink.c 17000 - 17999
353 *
354 * . */
351 355
352void __reiserfs_panic(struct super_block *sb, const char *id, 356void __reiserfs_panic(struct super_block *sb, const char *id,
353 const char *function, const char *fmt, ...) 357 const char *function, const char *fmt, ...)
@@ -411,9 +415,11 @@ void reiserfs_abort(struct super_block *sb, int errno, const char *fmt, ...)
411 reiserfs_abort_journal(sb, errno); 415 reiserfs_abort_journal(sb, errno);
412} 416}
413 417
414/* this prints internal nodes (4 keys/items in line) (dc_number, 418/*
415 dc_size)[k_dirid, k_objectid, k_offset, k_uniqueness](dc_number, 419 * this prints internal nodes (4 keys/items in line) (dc_number,
416 dc_size)...*/ 420 * dc_size)[k_dirid, k_objectid, k_offset, k_uniqueness](dc_number,
421 * dc_size)...
422 */
417static int print_internal(struct buffer_head *bh, int first, int last) 423static int print_internal(struct buffer_head *bh, int first, int last)
418{ 424{
419 struct reiserfs_key *key; 425 struct reiserfs_key *key;
@@ -543,9 +549,11 @@ static int print_super_block(struct buffer_head *bh)
543 printk("Block count %u\n", sb_block_count(rs)); 549 printk("Block count %u\n", sb_block_count(rs));
544 printk("Blocksize %d\n", sb_blocksize(rs)); 550 printk("Blocksize %d\n", sb_blocksize(rs));
545 printk("Free blocks %u\n", sb_free_blocks(rs)); 551 printk("Free blocks %u\n", sb_free_blocks(rs));
546 // FIXME: this would be confusing if 552 /*
547 // someone stores reiserfs super block in some data block ;) 553 * FIXME: this would be confusing if
554 * someone stores reiserfs super block in some data block ;)
548// skipped = (bh->b_blocknr * bh->b_size) / sb_blocksize(rs); 555// skipped = (bh->b_blocknr * bh->b_size) / sb_blocksize(rs);
556 */
549 skipped = bh->b_blocknr; 557 skipped = bh->b_blocknr;
550 data_blocks = sb_block_count(rs) - skipped - 1 - sb_bmap_nr(rs) - 558 data_blocks = sb_block_count(rs) - skipped - 1 - sb_bmap_nr(rs) -
551 (!is_reiserfs_jr(rs) ? sb_jp_journal_size(rs) + 559 (!is_reiserfs_jr(rs) ? sb_jp_journal_size(rs) +
@@ -581,8 +589,8 @@ static int print_desc_block(struct buffer_head *bh)
581 589
582 return 0; 590 return 0;
583} 591}
584 592/* ..., int print_mode, int first, int last) */
585void print_block(struct buffer_head *bh, ...) //int print_mode, int first, int last) 593void print_block(struct buffer_head *bh, ...)
586{ 594{
587 va_list args; 595 va_list args;
588 int mode, first, last; 596 int mode, first, last;
diff --git a/fs/reiserfs/reiserfs.h b/fs/reiserfs/reiserfs.h
index 35bfde10ca0f..2195e7f2297f 100644
--- a/fs/reiserfs/reiserfs.h
+++ b/fs/reiserfs/reiserfs.h
@@ -1,5 +1,6 @@
1/* 1/*
2 * Copyright 1996, 1997, 1998 Hans Reiser, see reiserfs/README for licensing and copyright details 2 * Copyright 1996, 1997, 1998 Hans Reiser, see reiserfs/README for
3 * licensing and copyright details
3 */ 4 */
4 5
5#include <linux/reiserfs_fs.h> 6#include <linux/reiserfs_fs.h>
@@ -23,52 +24,73 @@
23 24
24struct reiserfs_journal_list; 25struct reiserfs_journal_list;
25 26
26/** bitmasks for i_flags field in reiserfs-specific part of inode */ 27/* bitmasks for i_flags field in reiserfs-specific part of inode */
27typedef enum { 28typedef enum {
28 /** this says what format of key do all items (but stat data) of 29 /*
29 an object have. If this is set, that format is 3.6 otherwise 30 * this says what format of key do all items (but stat data) of
30 - 3.5 */ 31 * an object have. If this is set, that format is 3.6 otherwise - 3.5
32 */
31 i_item_key_version_mask = 0x0001, 33 i_item_key_version_mask = 0x0001,
32 /** If this is unset, object has 3.5 stat data, otherwise, it has 34
33 3.6 stat data with 64bit size, 32bit nlink etc. */ 35 /*
36 * If this is unset, object has 3.5 stat data, otherwise,
37 * it has 3.6 stat data with 64bit size, 32bit nlink etc.
38 */
34 i_stat_data_version_mask = 0x0002, 39 i_stat_data_version_mask = 0x0002,
35 /** file might need tail packing on close */ 40
41 /* file might need tail packing on close */
36 i_pack_on_close_mask = 0x0004, 42 i_pack_on_close_mask = 0x0004,
37 /** don't pack tail of file */ 43
44 /* don't pack tail of file */
38 i_nopack_mask = 0x0008, 45 i_nopack_mask = 0x0008,
39 /** If those is set, "safe link" was created for this file during 46
40 truncate or unlink. Safe link is used to avoid leakage of disk 47 /*
41 space on crash with some files open, but unlinked. */ 48 * If either of these are set, "safe link" was created for this
49 * file during truncate or unlink. Safe link is used to avoid
50 * leakage of disk space on crash with some files open, but unlinked.
51 */
42 i_link_saved_unlink_mask = 0x0010, 52 i_link_saved_unlink_mask = 0x0010,
43 i_link_saved_truncate_mask = 0x0020, 53 i_link_saved_truncate_mask = 0x0020,
54
44 i_has_xattr_dir = 0x0040, 55 i_has_xattr_dir = 0x0040,
45 i_data_log = 0x0080, 56 i_data_log = 0x0080,
46} reiserfs_inode_flags; 57} reiserfs_inode_flags;
47 58
48struct reiserfs_inode_info { 59struct reiserfs_inode_info {
49 __u32 i_key[4]; /* key is still 4 32 bit integers */ 60 __u32 i_key[4]; /* key is still 4 32 bit integers */
50 /** transient inode flags that are never stored on disk. Bitmasks 61
51 for this field are defined above. */ 62 /*
63 * transient inode flags that are never stored on disk. Bitmasks
64 * for this field are defined above.
65 */
52 __u32 i_flags; 66 __u32 i_flags;
53 67
54 __u32 i_first_direct_byte; // offset of first byte stored in direct item. 68 /* offset of first byte stored in direct item. */
69 __u32 i_first_direct_byte;
55 70
56 /* copy of persistent inode flags read from sd_attrs. */ 71 /* copy of persistent inode flags read from sd_attrs. */
57 __u32 i_attrs; 72 __u32 i_attrs;
58 73
59 int i_prealloc_block; /* first unused block of a sequence of unused blocks */ 74 /* first unused block of a sequence of unused blocks */
75 int i_prealloc_block;
60 int i_prealloc_count; /* length of that sequence */ 76 int i_prealloc_count; /* length of that sequence */
61 struct list_head i_prealloc_list; /* per-transaction list of inodes which
62 * have preallocated blocks */
63 77
64 unsigned new_packing_locality:1; /* new_packig_locality is created; new blocks 78 /* per-transaction list of inodes which have preallocated blocks */
65 * for the contents of this directory should be 79 struct list_head i_prealloc_list;
66 * displaced */ 80
81 /*
82 * new_packing_locality is created; new blocks for the contents
83 * of this directory should be displaced
84 */
85 unsigned new_packing_locality:1;
67 86
68 /* we use these for fsync or O_SYNC to decide which transaction 87 /*
69 ** needs to be committed in order for this inode to be properly 88 * we use these for fsync or O_SYNC to decide which transaction
70 ** flushed */ 89 * needs to be committed in order for this inode to be properly
90 * flushed
91 */
71 unsigned int i_trans_id; 92 unsigned int i_trans_id;
93
72 struct reiserfs_journal_list *i_jl; 94 struct reiserfs_journal_list *i_jl;
73 atomic_t openers; 95 atomic_t openers;
74 struct mutex tailpack; 96 struct mutex tailpack;
@@ -82,9 +104,10 @@ typedef enum {
82 reiserfs_attrs_cleared = 0x00000001, 104 reiserfs_attrs_cleared = 0x00000001,
83} reiserfs_super_block_flags; 105} reiserfs_super_block_flags;
84 106
85/* struct reiserfs_super_block accessors/mutators 107/*
86 * since this is a disk structure, it will always be in 108 * struct reiserfs_super_block accessors/mutators since this is a disk
87 * little endian format. */ 109 * structure, it will always be in little endian format.
110 */
88#define sb_block_count(sbp) (le32_to_cpu((sbp)->s_v1.s_block_count)) 111#define sb_block_count(sbp) (le32_to_cpu((sbp)->s_v1.s_block_count))
89#define set_sb_block_count(sbp,v) ((sbp)->s_v1.s_block_count = cpu_to_le32(v)) 112#define set_sb_block_count(sbp,v) ((sbp)->s_v1.s_block_count = cpu_to_le32(v))
90#define sb_free_blocks(sbp) (le32_to_cpu((sbp)->s_v1.s_free_blocks)) 113#define sb_free_blocks(sbp) (le32_to_cpu((sbp)->s_v1.s_free_blocks))
@@ -152,48 +175,61 @@ typedef enum {
152 175
153/* LOGGING -- */ 176/* LOGGING -- */
154 177
155/* These all interelate for performance. 178/*
156** 179 * These all interelate for performance.
157** If the journal block count is smaller than n transactions, you lose speed. 180 *
158** I don't know what n is yet, I'm guessing 8-16. 181 * If the journal block count is smaller than n transactions, you lose speed.
159** 182 * I don't know what n is yet, I'm guessing 8-16.
160** typical transaction size depends on the application, how often fsync is 183 *
161** called, and how many metadata blocks you dirty in a 30 second period. 184 * typical transaction size depends on the application, how often fsync is
162** The more small files (<16k) you use, the larger your transactions will 185 * called, and how many metadata blocks you dirty in a 30 second period.
163** be. 186 * The more small files (<16k) you use, the larger your transactions will
164** 187 * be.
165** If your journal fills faster than dirty buffers get flushed to disk, it must flush them before allowing the journal 188 *
166** to wrap, which slows things down. If you need high speed meta data updates, the journal should be big enough 189 * If your journal fills faster than dirty buffers get flushed to disk, it
167** to prevent wrapping before dirty meta blocks get to disk. 190 * must flush them before allowing the journal to wrap, which slows things
168** 191 * down. If you need high speed meta data updates, the journal should be
169** If the batch max is smaller than the transaction max, you'll waste space at the end of the journal 192 * big enough to prevent wrapping before dirty meta blocks get to disk.
170** because journal_end sets the next transaction to start at 0 if the next transaction has any chance of wrapping. 193 *
171** 194 * If the batch max is smaller than the transaction max, you'll waste space
172** The large the batch max age, the better the speed, and the more meta data changes you'll lose after a crash. 195 * at the end of the journal because journal_end sets the next transaction
173** 196 * to start at 0 if the next transaction has any chance of wrapping.
174*/ 197 *
198 * The large the batch max age, the better the speed, and the more meta
199 * data changes you'll lose after a crash.
200 */
175 201
176/* don't mess with these for a while */ 202/* don't mess with these for a while */
177 /* we have a node size define somewhere in reiserfs_fs.h. -Hans */ 203/* we have a node size define somewhere in reiserfs_fs.h. -Hans */
178#define JOURNAL_BLOCK_SIZE 4096 /* BUG gotta get rid of this */ 204#define JOURNAL_BLOCK_SIZE 4096 /* BUG gotta get rid of this */
179#define JOURNAL_MAX_CNODE 1500 /* max cnodes to allocate. */ 205#define JOURNAL_MAX_CNODE 1500 /* max cnodes to allocate. */
180#define JOURNAL_HASH_SIZE 8192 206#define JOURNAL_HASH_SIZE 8192
181#define JOURNAL_NUM_BITMAPS 5 /* number of copies of the bitmaps to have floating. Must be >= 2 */ 207
182 208/* number of copies of the bitmaps to have floating. Must be >= 2 */
183/* One of these for every block in every transaction 209#define JOURNAL_NUM_BITMAPS 5
184** Each one is in two hash tables. First, a hash of the current transaction, and after journal_end, a 210
185** hash of all the in memory transactions. 211/*
186** next and prev are used by the current transaction (journal_hash). 212 * One of these for every block in every transaction
187** hnext and hprev are used by journal_list_hash. If a block is in more than one transaction, the journal_list_hash 213 * Each one is in two hash tables. First, a hash of the current transaction,
188** links it in multiple times. This allows flush_journal_list to remove just the cnode belonging 214 * and after journal_end, a hash of all the in memory transactions.
189** to a given transaction. 215 * next and prev are used by the current transaction (journal_hash).
190*/ 216 * hnext and hprev are used by journal_list_hash. If a block is in more
217 * than one transaction, the journal_list_hash links it in multiple times.
218 * This allows flush_journal_list to remove just the cnode belonging to a
219 * given transaction.
220 */
191struct reiserfs_journal_cnode { 221struct reiserfs_journal_cnode {
192 struct buffer_head *bh; /* real buffer head */ 222 struct buffer_head *bh; /* real buffer head */
193 struct super_block *sb; /* dev of real buffer head */ 223 struct super_block *sb; /* dev of real buffer head */
194 __u32 blocknr; /* block number of real buffer head, == 0 when buffer on disk */ 224
225 /* block number of real buffer head, == 0 when buffer on disk */
226 __u32 blocknr;
227
195 unsigned long state; 228 unsigned long state;
196 struct reiserfs_journal_list *jlist; /* journal list this cnode lives in */ 229
230 /* journal list this cnode lives in */
231 struct reiserfs_journal_list *jlist;
232
197 struct reiserfs_journal_cnode *next; /* next in transaction list */ 233 struct reiserfs_journal_cnode *next; /* next in transaction list */
198 struct reiserfs_journal_cnode *prev; /* prev in transaction list */ 234 struct reiserfs_journal_cnode *prev; /* prev in transaction list */
199 struct reiserfs_journal_cnode *hprev; /* prev in hash list */ 235 struct reiserfs_journal_cnode *hprev; /* prev in hash list */
@@ -212,18 +248,22 @@ struct reiserfs_list_bitmap {
212}; 248};
213 249
214/* 250/*
215** one of these for each transaction. The most important part here is the j_realblock. 251 * one of these for each transaction. The most important part here is the
216** this list of cnodes is used to hash all the blocks in all the commits, to mark all the 252 * j_realblock. this list of cnodes is used to hash all the blocks in all
217** real buffer heads dirty once all the commits hit the disk, 253 * the commits, to mark all the real buffer heads dirty once all the commits
218** and to make sure every real block in a transaction is on disk before allowing the log area 254 * hit the disk, and to make sure every real block in a transaction is on
219** to be overwritten */ 255 * disk before allowing the log area to be overwritten
256 */
220struct reiserfs_journal_list { 257struct reiserfs_journal_list {
221 unsigned long j_start; 258 unsigned long j_start;
222 unsigned long j_state; 259 unsigned long j_state;
223 unsigned long j_len; 260 unsigned long j_len;
224 atomic_t j_nonzerolen; 261 atomic_t j_nonzerolen;
225 atomic_t j_commit_left; 262 atomic_t j_commit_left;
226 atomic_t j_older_commits_done; /* all commits older than this on disk */ 263
264 /* all commits older than this on disk */
265 atomic_t j_older_commits_done;
266
227 struct mutex j_commit_mutex; 267 struct mutex j_commit_mutex;
228 unsigned int j_trans_id; 268 unsigned int j_trans_id;
229 time_t j_timestamp; 269 time_t j_timestamp;
@@ -234,11 +274,15 @@ struct reiserfs_journal_list {
234 /* time ordered list of all active transactions */ 274 /* time ordered list of all active transactions */
235 struct list_head j_list; 275 struct list_head j_list;
236 276
237 /* time ordered list of all transactions we haven't tried to flush yet */ 277 /*
278 * time ordered list of all transactions we haven't tried
279 * to flush yet
280 */
238 struct list_head j_working_list; 281 struct list_head j_working_list;
239 282
240 /* list of tail conversion targets in need of flush before commit */ 283 /* list of tail conversion targets in need of flush before commit */
241 struct list_head j_tail_bh_list; 284 struct list_head j_tail_bh_list;
285
242 /* list of data=ordered buffers in need of flush before commit */ 286 /* list of data=ordered buffers in need of flush before commit */
243 struct list_head j_bh_list; 287 struct list_head j_bh_list;
244 int j_refcount; 288 int j_refcount;
@@ -246,46 +290,83 @@ struct reiserfs_journal_list {
246 290
247struct reiserfs_journal { 291struct reiserfs_journal {
248 struct buffer_head **j_ap_blocks; /* journal blocks on disk */ 292 struct buffer_head **j_ap_blocks; /* journal blocks on disk */
249 struct reiserfs_journal_cnode *j_last; /* newest journal block */ 293 /* newest journal block */
250 struct reiserfs_journal_cnode *j_first; /* oldest journal block. start here for traverse */ 294 struct reiserfs_journal_cnode *j_last;
295
296 /* oldest journal block. start here for traverse */
297 struct reiserfs_journal_cnode *j_first;
251 298
252 struct block_device *j_dev_bd; 299 struct block_device *j_dev_bd;
253 fmode_t j_dev_mode; 300 fmode_t j_dev_mode;
254 int j_1st_reserved_block; /* first block on s_dev of reserved area journal */ 301
302 /* first block on s_dev of reserved area journal */
303 int j_1st_reserved_block;
255 304
256 unsigned long j_state; 305 unsigned long j_state;
257 unsigned int j_trans_id; 306 unsigned int j_trans_id;
258 unsigned long j_mount_id; 307 unsigned long j_mount_id;
259 unsigned long j_start; /* start of current waiting commit (index into j_ap_blocks) */ 308
309 /* start of current waiting commit (index into j_ap_blocks) */
310 unsigned long j_start;
260 unsigned long j_len; /* length of current waiting commit */ 311 unsigned long j_len; /* length of current waiting commit */
261 unsigned long j_len_alloc; /* number of buffers requested by journal_begin() */ 312
313 /* number of buffers requested by journal_begin() */
314 unsigned long j_len_alloc;
315
262 atomic_t j_wcount; /* count of writers for current commit */ 316 atomic_t j_wcount; /* count of writers for current commit */
263 unsigned long j_bcount; /* batch count. allows turning X transactions into 1 */ 317
264 unsigned long j_first_unflushed_offset; /* first unflushed transactions offset */ 318 /* batch count. allows turning X transactions into 1 */
265 unsigned j_last_flush_trans_id; /* last fully flushed journal timestamp */ 319 unsigned long j_bcount;
320
321 /* first unflushed transactions offset */
322 unsigned long j_first_unflushed_offset;
323
324 /* last fully flushed journal timestamp */
325 unsigned j_last_flush_trans_id;
326
266 struct buffer_head *j_header_bh; 327 struct buffer_head *j_header_bh;
267 328
268 time_t j_trans_start_time; /* time this transaction started */ 329 time_t j_trans_start_time; /* time this transaction started */
269 struct mutex j_mutex; 330 struct mutex j_mutex;
270 struct mutex j_flush_mutex; 331 struct mutex j_flush_mutex;
271 wait_queue_head_t j_join_wait; /* wait for current transaction to finish before starting new one */ 332
272 atomic_t j_jlock; /* lock for j_join_wait */ 333 /* wait for current transaction to finish before starting new one */
334 wait_queue_head_t j_join_wait;
335
336 atomic_t j_jlock; /* lock for j_join_wait */
273 int j_list_bitmap_index; /* number of next list bitmap to use */ 337 int j_list_bitmap_index; /* number of next list bitmap to use */
274 int j_must_wait; /* no more journal begins allowed. MUST sleep on j_join_wait */ 338
275 int j_next_full_flush; /* next journal_end will flush all journal list */ 339 /* no more journal begins allowed. MUST sleep on j_join_wait */
276 int j_next_async_flush; /* next journal_end will flush all async commits */ 340 int j_must_wait;
341
342 /* next journal_end will flush all journal list */
343 int j_next_full_flush;
344
345 /* next journal_end will flush all async commits */
346 int j_next_async_flush;
277 347
278 int j_cnode_used; /* number of cnodes on the used list */ 348 int j_cnode_used; /* number of cnodes on the used list */
279 int j_cnode_free; /* number of cnodes on the free list */ 349 int j_cnode_free; /* number of cnodes on the free list */
280 350
281 unsigned int j_trans_max; /* max number of blocks in a transaction. */ 351 /* max number of blocks in a transaction. */
282 unsigned int j_max_batch; /* max number of blocks to batch into a trans */ 352 unsigned int j_trans_max;
283 unsigned int j_max_commit_age; /* in seconds, how old can an async commit be */ 353
284 unsigned int j_max_trans_age; /* in seconds, how old can a transaction be */ 354 /* max number of blocks to batch into a trans */
285 unsigned int j_default_max_commit_age; /* the default for the max commit age */ 355 unsigned int j_max_batch;
356
357 /* in seconds, how old can an async commit be */
358 unsigned int j_max_commit_age;
359
360 /* in seconds, how old can a transaction be */
361 unsigned int j_max_trans_age;
362
363 /* the default for the max commit age */
364 unsigned int j_default_max_commit_age;
286 365
287 struct reiserfs_journal_cnode *j_cnode_free_list; 366 struct reiserfs_journal_cnode *j_cnode_free_list;
288 struct reiserfs_journal_cnode *j_cnode_free_orig; /* orig pointer returned from vmalloc */ 367
368 /* orig pointer returned from vmalloc */
369 struct reiserfs_journal_cnode *j_cnode_free_orig;
289 370
290 struct reiserfs_journal_list *j_current_jl; 371 struct reiserfs_journal_list *j_current_jl;
291 int j_free_bitmap_nodes; 372 int j_free_bitmap_nodes;
@@ -306,14 +387,21 @@ struct reiserfs_journal {
306 387
307 /* list of all active transactions */ 388 /* list of all active transactions */
308 struct list_head j_journal_list; 389 struct list_head j_journal_list;
390
309 /* lists that haven't been touched by writeback attempts */ 391 /* lists that haven't been touched by writeback attempts */
310 struct list_head j_working_list; 392 struct list_head j_working_list;
311 393
312 struct reiserfs_list_bitmap j_list_bitmap[JOURNAL_NUM_BITMAPS]; /* array of bitmaps to record the deleted blocks */ 394 /* hash table for real buffer heads in current trans */
313 struct reiserfs_journal_cnode *j_hash_table[JOURNAL_HASH_SIZE]; /* hash table for real buffer heads in current trans */ 395 struct reiserfs_journal_cnode *j_hash_table[JOURNAL_HASH_SIZE];
314 struct reiserfs_journal_cnode *j_list_hash_table[JOURNAL_HASH_SIZE]; /* hash table for all the real buffer heads in all 396
315 the transactions */ 397 /* hash table for all the real buffer heads in all the transactions */
316 struct list_head j_prealloc_list; /* list of inodes which have preallocated blocks */ 398 struct reiserfs_journal_cnode *j_list_hash_table[JOURNAL_HASH_SIZE];
399
400 /* array of bitmaps to record the deleted blocks */
401 struct reiserfs_list_bitmap j_list_bitmap[JOURNAL_NUM_BITMAPS];
402
403 /* list of inodes which have preallocated blocks */
404 struct list_head j_prealloc_list;
317 int j_persistent_trans; 405 int j_persistent_trans;
318 unsigned long j_max_trans_size; 406 unsigned long j_max_trans_size;
319 unsigned long j_max_batch_size; 407 unsigned long j_max_batch_size;
@@ -328,11 +416,12 @@ struct reiserfs_journal {
328 416
329enum journal_state_bits { 417enum journal_state_bits {
330 J_WRITERS_BLOCKED = 1, /* set when new writers not allowed */ 418 J_WRITERS_BLOCKED = 1, /* set when new writers not allowed */
331 J_WRITERS_QUEUED, /* set when log is full due to too many writers */ 419 J_WRITERS_QUEUED, /* set when log is full due to too many writers */
332 J_ABORTED, /* set when log is aborted */ 420 J_ABORTED, /* set when log is aborted */
333}; 421};
334 422
335#define JOURNAL_DESC_MAGIC "ReIsErLB" /* ick. magic string to find desc blocks in the journal */ 423/* ick. magic string to find desc blocks in the journal */
424#define JOURNAL_DESC_MAGIC "ReIsErLB"
336 425
337typedef __u32(*hashf_t) (const signed char *, int); 426typedef __u32(*hashf_t) (const signed char *, int);
338 427
@@ -364,7 +453,10 @@ typedef struct reiserfs_proc_info_data {
364 stat_cnt_t leaked_oid; 453 stat_cnt_t leaked_oid;
365 stat_cnt_t leaves_removable; 454 stat_cnt_t leaves_removable;
366 455
367 /* balances per level. Use explicit 5 as MAX_HEIGHT is not visible yet. */ 456 /*
457 * balances per level.
458 * Use explicit 5 as MAX_HEIGHT is not visible yet.
459 */
368 stat_cnt_t balance_at[5]; /* XXX */ 460 stat_cnt_t balance_at[5]; /* XXX */
369 /* sbk == search_by_key */ 461 /* sbk == search_by_key */
370 stat_cnt_t sbk_read_at[5]; /* XXX */ 462 stat_cnt_t sbk_read_at[5]; /* XXX */
@@ -416,18 +508,24 @@ typedef struct reiserfs_proc_info_data {
416 508
417/* reiserfs union of in-core super block data */ 509/* reiserfs union of in-core super block data */
418struct reiserfs_sb_info { 510struct reiserfs_sb_info {
419 struct buffer_head *s_sbh; /* Buffer containing the super block */ 511 /* Buffer containing the super block */
420 /* both the comment and the choice of 512 struct buffer_head *s_sbh;
421 name are unclear for s_rs -Hans */ 513
422 struct reiserfs_super_block *s_rs; /* Pointer to the super block in the buffer */ 514 /* Pointer to the on-disk super block in the buffer */
515 struct reiserfs_super_block *s_rs;
423 struct reiserfs_bitmap_info *s_ap_bitmap; 516 struct reiserfs_bitmap_info *s_ap_bitmap;
424 struct reiserfs_journal *s_journal; /* pointer to journal information */ 517
518 /* pointer to journal information */
519 struct reiserfs_journal *s_journal;
520
425 unsigned short s_mount_state; /* reiserfs state (valid, invalid) */ 521 unsigned short s_mount_state; /* reiserfs state (valid, invalid) */
426 522
427 /* Serialize writers access, replace the old bkl */ 523 /* Serialize writers access, replace the old bkl */
428 struct mutex lock; 524 struct mutex lock;
525
429 /* Owner of the lock (can be recursive) */ 526 /* Owner of the lock (can be recursive) */
430 struct task_struct *lock_owner; 527 struct task_struct *lock_owner;
528
431 /* Depth of the lock, start from -1 like the bkl */ 529 /* Depth of the lock, start from -1 like the bkl */
432 int lock_depth; 530 int lock_depth;
433 531
@@ -435,30 +533,50 @@ struct reiserfs_sb_info {
435 533
436 /* Comment? -Hans */ 534 /* Comment? -Hans */
437 void (*end_io_handler) (struct buffer_head *, int); 535 void (*end_io_handler) (struct buffer_head *, int);
438 hashf_t s_hash_function; /* pointer to function which is used 536
439 to sort names in directory. Set on 537 /*
440 mount */ 538 * pointer to function which is used to sort names in directory.
441 unsigned long s_mount_opt; /* reiserfs's mount options are set 539 * Set on mount
442 here (currently - NOTAIL, NOLOG, 540 */
443 REPLAYONLY) */ 541 hashf_t s_hash_function;
444 542
445 struct { /* This is a structure that describes block allocator options */ 543 /* reiserfs's mount options are set here */
446 unsigned long bits; /* Bitfield for enable/disable kind of options */ 544 unsigned long s_mount_opt;
447 unsigned long large_file_size; /* size started from which we consider file to be a large one(in blocks) */ 545
546 /* This is a structure that describes block allocator options */
547 struct {
548 /* Bitfield for enable/disable kind of options */
549 unsigned long bits;
550
551 /*
552 * size started from which we consider file
553 * to be a large one (in blocks)
554 */
555 unsigned long large_file_size;
556
448 int border; /* percentage of disk, border takes */ 557 int border; /* percentage of disk, border takes */
449 int preallocmin; /* Minimal file size (in blocks) starting from which we do preallocations */ 558
450 int preallocsize; /* Number of blocks we try to prealloc when file 559 /*
451 reaches preallocmin size (in blocks) or 560 * Minimal file size (in blocks) starting
452 prealloc_list is empty. */ 561 * from which we do preallocations
562 */
563 int preallocmin;
564
565 /*
566 * Number of blocks we try to prealloc when file
567 * reaches preallocmin size (in blocks) or prealloc_list
568 is empty.
569 */
570 int preallocsize;
453 } s_alloc_options; 571 } s_alloc_options;
454 572
455 /* Comment? -Hans */ 573 /* Comment? -Hans */
456 wait_queue_head_t s_wait; 574 wait_queue_head_t s_wait;
457 /* To be obsoleted soon by per buffer seals.. -Hans */ 575 /* increased by one every time the tree gets re-balanced */
458 atomic_t s_generation_counter; // increased by one every time the 576 atomic_t s_generation_counter;
459 // tree gets re-balanced 577
460 unsigned long s_properties; /* File system properties. Currently holds 578 /* File system properties. Currently holds on-disk FS format */
461 on-disk FS format */ 579 unsigned long s_properties;
462 580
463 /* session statistics */ 581 /* session statistics */
464 int s_disk_reads; 582 int s_disk_reads;
@@ -471,14 +589,23 @@ struct reiserfs_sb_info {
471 int s_bmaps_without_search; 589 int s_bmaps_without_search;
472 int s_direct2indirect; 590 int s_direct2indirect;
473 int s_indirect2direct; 591 int s_indirect2direct;
474 /* set up when it's ok for reiserfs_read_inode2() to read from 592
475 disk inode with nlink==0. Currently this is only used during 593 /*
476 finish_unfinished() processing at mount time */ 594 * set up when it's ok for reiserfs_read_inode2() to read from
595 * disk inode with nlink==0. Currently this is only used during
596 * finish_unfinished() processing at mount time
597 */
477 int s_is_unlinked_ok; 598 int s_is_unlinked_ok;
599
478 reiserfs_proc_info_data_t s_proc_info_data; 600 reiserfs_proc_info_data_t s_proc_info_data;
479 struct proc_dir_entry *procdir; 601 struct proc_dir_entry *procdir;
480 int reserved_blocks; /* amount of blocks reserved for further allocations */ 602
481 spinlock_t bitmap_lock; /* this lock on now only used to protect reserved_blocks variable */ 603 /* amount of blocks reserved for further allocations */
604 int reserved_blocks;
605
606
607 /* this lock on now only used to protect reserved_blocks variable */
608 spinlock_t bitmap_lock;
482 struct dentry *priv_root; /* root of /.reiserfs_priv */ 609 struct dentry *priv_root; /* root of /.reiserfs_priv */
483 struct dentry *xattr_root; /* root of /.reiserfs_priv/xattrs */ 610 struct dentry *xattr_root; /* root of /.reiserfs_priv/xattrs */
484 int j_errno; 611 int j_errno;
@@ -494,14 +621,13 @@ struct reiserfs_sb_info {
494 char *s_jdev; /* Stored jdev for mount option showing */ 621 char *s_jdev; /* Stored jdev for mount option showing */
495#ifdef CONFIG_REISERFS_CHECK 622#ifdef CONFIG_REISERFS_CHECK
496 623
497 struct tree_balance *cur_tb; /* 624 /*
498 * Detects whether more than one 625 * Detects whether more than one copy of tb exists per superblock
499 * copy of tb exists per superblock 626 * as a means of checking whether do_balance is executing
500 * as a means of checking whether 627 * concurrently against another tree reader/writer on a same
501 * do_balance is executing concurrently 628 * mount point.
502 * against another tree reader/writer 629 */
503 * on a same mount point. 630 struct tree_balance *cur_tb;
504 */
505#endif 631#endif
506}; 632};
507 633
@@ -510,25 +636,36 @@ struct reiserfs_sb_info {
510#define REISERFS_3_6 1 636#define REISERFS_3_6 1
511#define REISERFS_OLD_FORMAT 2 637#define REISERFS_OLD_FORMAT 2
512 638
513enum reiserfs_mount_options {
514/* Mount options */ 639/* Mount options */
515 REISERFS_LARGETAIL, /* large tails will be created in a session */ 640enum reiserfs_mount_options {
516 REISERFS_SMALLTAIL, /* small (for files less than block size) tails will be created in a session */ 641 /* large tails will be created in a session */
517 REPLAYONLY, /* replay journal and return 0. Use by fsck */ 642 REISERFS_LARGETAIL,
518 REISERFS_CONVERT, /* -o conv: causes conversion of old 643 /*
519 format super block to the new 644 * small (for files less than block size) tails will
520 format. If not specified - old 645 * be created in a session
521 partition will be dealt with in a 646 */
522 manner of 3.5.x */ 647 REISERFS_SMALLTAIL,
523 648
524/* -o hash={tea, rupasov, r5, detect} is meant for properly mounting 649 /* replay journal and return 0. Use by fsck */
525** reiserfs disks from 3.5.19 or earlier. 99% of the time, this option 650 REPLAYONLY,
526** is not required. If the normal autodection code can't determine which 651
527** hash to use (because both hashes had the same value for a file) 652 /*
528** use this option to force a specific hash. It won't allow you to override 653 * -o conv: causes conversion of old format super block to the
529** the existing hash on the FS, so if you have a tea hash disk, and mount 654 * new format. If not specified - old partition will be dealt
530** with -o hash=rupasov, the mount will fail. 655 * with in a manner of 3.5.x
531*/ 656 */
657 REISERFS_CONVERT,
658
659 /*
660 * -o hash={tea, rupasov, r5, detect} is meant for properly mounting
661 * reiserfs disks from 3.5.19 or earlier. 99% of the time, this
662 * option is not required. If the normal autodection code can't
663 * determine which hash to use (because both hashes had the same
664 * value for a file) use this option to force a specific hash.
665 * It won't allow you to override the existing hash on the FS, so
666 * if you have a tea hash disk, and mount with -o hash=rupasov,
667 * the mount will fail.
668 */
532 FORCE_TEA_HASH, /* try to force tea hash on mount */ 669 FORCE_TEA_HASH, /* try to force tea hash on mount */
533 FORCE_RUPASOV_HASH, /* try to force rupasov hash on mount */ 670 FORCE_RUPASOV_HASH, /* try to force rupasov hash on mount */
534 FORCE_R5_HASH, /* try to force rupasov hash on mount */ 671 FORCE_R5_HASH, /* try to force rupasov hash on mount */
@@ -538,9 +675,11 @@ enum reiserfs_mount_options {
538 REISERFS_DATA_ORDERED, 675 REISERFS_DATA_ORDERED,
539 REISERFS_DATA_WRITEBACK, 676 REISERFS_DATA_WRITEBACK,
540 677
541/* used for testing experimental features, makes benchmarking new 678 /*
542 features with and without more convenient, should never be used by 679 * used for testing experimental features, makes benchmarking new
543 users in any code shipped to users (ideally) */ 680 * features with and without more convenient, should never be used by
681 * users in any code shipped to users (ideally)
682 */
544 683
545 REISERFS_NO_BORDER, 684 REISERFS_NO_BORDER,
546 REISERFS_NO_UNHASHED_RELOCATION, 685 REISERFS_NO_UNHASHED_RELOCATION,
@@ -707,28 +846,28 @@ static inline void reiserfs_cond_resched(struct super_block *s)
707 846
708struct fid; 847struct fid;
709 848
710/* in reading the #defines, it may help to understand that they employ 849/*
711 the following abbreviations: 850 * in reading the #defines, it may help to understand that they employ
712 851 * the following abbreviations:
713 B = Buffer 852 *
714 I = Item header 853 * B = Buffer
715 H = Height within the tree (should be changed to LEV) 854 * I = Item header
716 N = Number of the item in the node 855 * H = Height within the tree (should be changed to LEV)
717 STAT = stat data 856 * N = Number of the item in the node
718 DEH = Directory Entry Header 857 * STAT = stat data
719 EC = Entry Count 858 * DEH = Directory Entry Header
720 E = Entry number 859 * EC = Entry Count
721 UL = Unsigned Long 860 * E = Entry number
722 BLKH = BLocK Header 861 * UL = Unsigned Long
723 UNFM = UNForMatted node 862 * BLKH = BLocK Header
724 DC = Disk Child 863 * UNFM = UNForMatted node
725 P = Path 864 * DC = Disk Child
726 865 * P = Path
727 These #defines are named by concatenating these abbreviations, 866 *
728 where first comes the arguments, and last comes the return value, 867 * These #defines are named by concatenating these abbreviations,
729 of the macro. 868 * where first comes the arguments, and last comes the return value,
730 869 * of the macro.
731*/ 870 */
732 871
733#define USE_INODE_GENERATION_COUNTER 872#define USE_INODE_GENERATION_COUNTER
734 873
@@ -739,14 +878,17 @@ struct fid;
739/* n must be power of 2 */ 878/* n must be power of 2 */
740#define _ROUND_UP(x,n) (((x)+(n)-1u) & ~((n)-1u)) 879#define _ROUND_UP(x,n) (((x)+(n)-1u) & ~((n)-1u))
741 880
742// to be ok for alpha and others we have to align structures to 8 byte 881/*
743// boundary. 882 * to be ok for alpha and others we have to align structures to 8 byte
744// FIXME: do not change 4 by anything else: there is code which relies on that 883 * boundary.
884 * FIXME: do not change 4 by anything else: there is code which relies on that
885 */
745#define ROUND_UP(x) _ROUND_UP(x,8LL) 886#define ROUND_UP(x) _ROUND_UP(x,8LL)
746 887
747/* debug levels. Right now, CONFIG_REISERFS_CHECK means print all debug 888/*
748** messages. 889 * debug levels. Right now, CONFIG_REISERFS_CHECK means print all debug
749*/ 890 * messages.
891 */
750#define REISERFS_DEBUG_CODE 5 /* extra messages to help find/debug errors */ 892#define REISERFS_DEBUG_CODE 5 /* extra messages to help find/debug errors */
751 893
752void __reiserfs_warning(struct super_block *s, const char *id, 894void __reiserfs_warning(struct super_block *s, const char *id,
@@ -755,7 +897,7 @@ void __reiserfs_warning(struct super_block *s, const char *id,
755 __reiserfs_warning(s, id, __func__, fmt, ##args) 897 __reiserfs_warning(s, id, __func__, fmt, ##args)
756/* assertions handling */ 898/* assertions handling */
757 899
758/** always check a condition and panic if it's false. */ 900/* always check a condition and panic if it's false. */
759#define __RASSERT(cond, scond, format, args...) \ 901#define __RASSERT(cond, scond, format, args...) \
760do { \ 902do { \
761 if (!(cond)) \ 903 if (!(cond)) \
@@ -778,35 +920,48 @@ do { \
778 * Disk Data Structures 920 * Disk Data Structures
779 */ 921 */
780 922
781/***************************************************************************/ 923/***************************************************************************
782/* SUPER BLOCK */ 924 * SUPER BLOCK *
783/***************************************************************************/ 925 ***************************************************************************/
784 926
785/* 927/*
786 * Structure of super block on disk, a version of which in RAM is often accessed as REISERFS_SB(s)->s_rs 928 * Structure of super block on disk, a version of which in RAM is often
787 * the version in RAM is part of a larger structure containing fields never written to disk. 929 * accessed as REISERFS_SB(s)->s_rs. The version in RAM is part of a larger
930 * structure containing fields never written to disk.
788 */ 931 */
789#define UNSET_HASH 0 // read_super will guess about, what hash names 932#define UNSET_HASH 0 /* Detect hash on disk */
790 // in directories were sorted with
791#define TEA_HASH 1 933#define TEA_HASH 1
792#define YURA_HASH 2 934#define YURA_HASH 2
793#define R5_HASH 3 935#define R5_HASH 3
794#define DEFAULT_HASH R5_HASH 936#define DEFAULT_HASH R5_HASH
795 937
796struct journal_params { 938struct journal_params {
797 __le32 jp_journal_1st_block; /* where does journal start from on its 939 /* where does journal start from on its * device */
798 * device */ 940 __le32 jp_journal_1st_block;
799 __le32 jp_journal_dev; /* journal device st_rdev */ 941
800 __le32 jp_journal_size; /* size of the journal */ 942 /* journal device st_rdev */
801 __le32 jp_journal_trans_max; /* max number of blocks in a transaction. */ 943 __le32 jp_journal_dev;
802 __le32 jp_journal_magic; /* random value made on fs creation (this 944
803 * was sb_journal_block_count) */ 945 /* size of the journal */
804 __le32 jp_journal_max_batch; /* max number of blocks to batch into a 946 __le32 jp_journal_size;
805 * trans */ 947
806 __le32 jp_journal_max_commit_age; /* in seconds, how old can an async 948 /* max number of blocks in a transaction. */
807 * commit be */ 949 __le32 jp_journal_trans_max;
808 __le32 jp_journal_max_trans_age; /* in seconds, how old can a transaction 950
809 * be */ 951 /*
952 * random value made on fs creation
953 * (this was sb_journal_block_count)
954 */
955 __le32 jp_journal_magic;
956
957 /* max number of blocks to batch into a trans */
958 __le32 jp_journal_max_batch;
959
960 /* in seconds, how old can an async commit be */
961 __le32 jp_journal_max_commit_age;
962
963 /* in seconds, how old can a transaction be */
964 __le32 jp_journal_max_trans_age;
810}; 965};
811 966
812/* this is the super from 3.5.X, where X >= 10 */ 967/* this is the super from 3.5.X, where X >= 10 */
@@ -816,26 +971,48 @@ struct reiserfs_super_block_v1 {
816 __le32 s_root_block; /* root block number */ 971 __le32 s_root_block; /* root block number */
817 struct journal_params s_journal; 972 struct journal_params s_journal;
818 __le16 s_blocksize; /* block size */ 973 __le16 s_blocksize; /* block size */
819 __le16 s_oid_maxsize; /* max size of object id array, see 974
820 * get_objectid() commentary */ 975 /* max size of object id array, see get_objectid() commentary */
976 __le16 s_oid_maxsize;
821 __le16 s_oid_cursize; /* current size of object id array */ 977 __le16 s_oid_cursize; /* current size of object id array */
822 __le16 s_umount_state; /* this is set to 1 when filesystem was 978
823 * umounted, to 2 - when not */ 979 /* this is set to 1 when filesystem was umounted, to 2 - when not */
824 char s_magic[10]; /* reiserfs magic string indicates that 980 __le16 s_umount_state;
825 * file system is reiserfs: 981
826 * "ReIsErFs" or "ReIsEr2Fs" or "ReIsEr3Fs" */ 982 /*
827 __le16 s_fs_state; /* it is set to used by fsck to mark which 983 * reiserfs magic string indicates that file system is reiserfs:
828 * phase of rebuilding is done */ 984 * "ReIsErFs" or "ReIsEr2Fs" or "ReIsEr3Fs"
829 __le32 s_hash_function_code; /* indicate, what hash function is being use 985 */
830 * to sort names in a directory*/ 986 char s_magic[10];
987
988 /*
989 * it is set to used by fsck to mark which
990 * phase of rebuilding is done
991 */
992 __le16 s_fs_state;
993 /*
994 * indicate, what hash function is being use
995 * to sort names in a directory
996 */
997 __le32 s_hash_function_code;
831 __le16 s_tree_height; /* height of disk tree */ 998 __le16 s_tree_height; /* height of disk tree */
832 __le16 s_bmap_nr; /* amount of bitmap blocks needed to address 999
833 * each block of file system */ 1000 /*
834 __le16 s_version; /* this field is only reliable on filesystem 1001 * amount of bitmap blocks needed to address
835 * with non-standard journal */ 1002 * each block of file system
836 __le16 s_reserved_for_journal; /* size in blocks of journal area on main 1003 */
837 * device, we need to keep after 1004 __le16 s_bmap_nr;
838 * making fs with non-standard journal */ 1005
1006 /*
1007 * this field is only reliable on filesystem with non-standard journal
1008 */
1009 __le16 s_version;
1010
1011 /*
1012 * size in blocks of journal area on main device, we need to
1013 * keep after making fs with non-standard journal
1014 */
1015 __le16 s_reserved_for_journal;
839} __attribute__ ((__packed__)); 1016} __attribute__ ((__packed__));
840 1017
841#define SB_SIZE_V1 (sizeof(struct reiserfs_super_block_v1)) 1018#define SB_SIZE_V1 (sizeof(struct reiserfs_super_block_v1))
@@ -844,17 +1021,21 @@ struct reiserfs_super_block_v1 {
844struct reiserfs_super_block { 1021struct reiserfs_super_block {
845 struct reiserfs_super_block_v1 s_v1; 1022 struct reiserfs_super_block_v1 s_v1;
846 __le32 s_inode_generation; 1023 __le32 s_inode_generation;
847 __le32 s_flags; /* Right now used only by inode-attributes, if enabled */ 1024
1025 /* Right now used only by inode-attributes, if enabled */
1026 __le32 s_flags;
1027
848 unsigned char s_uuid[16]; /* filesystem unique identifier */ 1028 unsigned char s_uuid[16]; /* filesystem unique identifier */
849 unsigned char s_label[16]; /* filesystem volume label */ 1029 unsigned char s_label[16]; /* filesystem volume label */
850 __le16 s_mnt_count; /* Count of mounts since last fsck */ 1030 __le16 s_mnt_count; /* Count of mounts since last fsck */
851 __le16 s_max_mnt_count; /* Maximum mounts before check */ 1031 __le16 s_max_mnt_count; /* Maximum mounts before check */
852 __le32 s_lastcheck; /* Timestamp of last fsck */ 1032 __le32 s_lastcheck; /* Timestamp of last fsck */
853 __le32 s_check_interval; /* Interval between checks */ 1033 __le32 s_check_interval; /* Interval between checks */
854 char s_unused[76]; /* zero filled by mkreiserfs and 1034
855 * reiserfs_convert_objectid_map_v1() 1035 /*
856 * so any additions must be updated 1036 * zero filled by mkreiserfs and reiserfs_convert_objectid_map_v1()
857 * there as well. */ 1037 * so any additions must be updated there as well. */
1038 char s_unused[76];
858} __attribute__ ((__packed__)); 1039} __attribute__ ((__packed__));
859 1040
860#define SB_SIZE (sizeof(struct reiserfs_super_block)) 1041#define SB_SIZE (sizeof(struct reiserfs_super_block))
@@ -862,7 +1043,7 @@ struct reiserfs_super_block {
862#define REISERFS_VERSION_1 0 1043#define REISERFS_VERSION_1 0
863#define REISERFS_VERSION_2 2 1044#define REISERFS_VERSION_2 2
864 1045
865// on-disk super block fields converted to cpu form 1046/* on-disk super block fields converted to cpu form */
866#define SB_DISK_SUPER_BLOCK(s) (REISERFS_SB(s)->s_rs) 1047#define SB_DISK_SUPER_BLOCK(s) (REISERFS_SB(s)->s_rs)
867#define SB_V1_DISK_SUPER_BLOCK(s) (&(SB_DISK_SUPER_BLOCK(s)->s_v1)) 1048#define SB_V1_DISK_SUPER_BLOCK(s) (&(SB_DISK_SUPER_BLOCK(s)->s_v1))
868#define SB_BLOCKSIZE(s) \ 1049#define SB_BLOCKSIZE(s) \
@@ -917,11 +1098,13 @@ int is_reiserfs_3_5(struct reiserfs_super_block *rs);
917int is_reiserfs_3_6(struct reiserfs_super_block *rs); 1098int is_reiserfs_3_6(struct reiserfs_super_block *rs);
918int is_reiserfs_jr(struct reiserfs_super_block *rs); 1099int is_reiserfs_jr(struct reiserfs_super_block *rs);
919 1100
920/* ReiserFS leaves the first 64k unused, so that partition labels have 1101/*
921 enough space. If someone wants to write a fancy bootloader that 1102 * ReiserFS leaves the first 64k unused, so that partition labels have
922 needs more than 64k, let us know, and this will be increased in size. 1103 * enough space. If someone wants to write a fancy bootloader that
923 This number must be larger than than the largest block size on any 1104 * needs more than 64k, let us know, and this will be increased in size.
924 platform, or code will break. -Hans */ 1105 * This number must be larger than than the largest block size on any
1106 * platform, or code will break. -Hans
1107 */
925#define REISERFS_DISK_OFFSET_IN_BYTES (64 * 1024) 1108#define REISERFS_DISK_OFFSET_IN_BYTES (64 * 1024)
926#define REISERFS_FIRST_BLOCK unused_define 1109#define REISERFS_FIRST_BLOCK unused_define
927#define REISERFS_JOURNAL_OFFSET_IN_BYTES REISERFS_DISK_OFFSET_IN_BYTES 1110#define REISERFS_JOURNAL_OFFSET_IN_BYTES REISERFS_DISK_OFFSET_IN_BYTES
@@ -946,8 +1129,7 @@ struct unfm_nodeinfo {
946 unsigned short unfm_freespace; 1129 unsigned short unfm_freespace;
947}; 1130};
948 1131
949/* there are two formats of keys: 3.5 and 3.6 1132/* there are two formats of keys: 3.5 and 3.6 */
950 */
951#define KEY_FORMAT_3_5 0 1133#define KEY_FORMAT_3_5 0
952#define KEY_FORMAT_3_6 1 1134#define KEY_FORMAT_3_6 1
953 1135
@@ -965,8 +1147,10 @@ static inline struct reiserfs_sb_info *REISERFS_SB(const struct super_block *sb)
965 return sb->s_fs_info; 1147 return sb->s_fs_info;
966} 1148}
967 1149
968/* Don't trust REISERFS_SB(sb)->s_bmap_nr, it's a u16 1150/*
969 * which overflows on large file systems. */ 1151 * Don't trust REISERFS_SB(sb)->s_bmap_nr, it's a u16
1152 * which overflows on large file systems.
1153 */
970static inline __u32 reiserfs_bmap_count(struct super_block *sb) 1154static inline __u32 reiserfs_bmap_count(struct super_block *sb)
971{ 1155{
972 return (SB_BLOCK_COUNT(sb) - 1) / (sb->s_blocksize * 8) + 1; 1156 return (SB_BLOCK_COUNT(sb) - 1) / (sb->s_blocksize * 8) + 1;
@@ -977,8 +1161,10 @@ static inline int bmap_would_wrap(unsigned bmap_nr)
977 return bmap_nr > ((1LL << 16) - 1); 1161 return bmap_nr > ((1LL << 16) - 1);
978} 1162}
979 1163
980/** this says about version of key of all items (but stat data) the 1164/*
981 object consists of */ 1165 * this says about version of key of all items (but stat data) the
1166 * object consists of
1167 */
982#define get_inode_item_key_version( inode ) \ 1168#define get_inode_item_key_version( inode ) \
983 ((REISERFS_I(inode)->i_flags & i_item_key_version_mask) ? KEY_FORMAT_3_6 : KEY_FORMAT_3_5) 1169 ((REISERFS_I(inode)->i_flags & i_item_key_version_mask) ? KEY_FORMAT_3_6 : KEY_FORMAT_3_5)
984 1170
@@ -997,16 +1183,18 @@ static inline int bmap_would_wrap(unsigned bmap_nr)
997 else \ 1183 else \
998 REISERFS_I(inode)->i_flags &= ~i_stat_data_version_mask; }) 1184 REISERFS_I(inode)->i_flags &= ~i_stat_data_version_mask; })
999 1185
1000/* This is an aggressive tail suppression policy, I am hoping it 1186/*
1001 improves our benchmarks. The principle behind it is that percentage 1187 * This is an aggressive tail suppression policy, I am hoping it
1002 space saving is what matters, not absolute space saving. This is 1188 * improves our benchmarks. The principle behind it is that percentage
1003 non-intuitive, but it helps to understand it if you consider that the 1189 * space saving is what matters, not absolute space saving. This is
1004 cost to access 4 blocks is not much more than the cost to access 1 1190 * non-intuitive, but it helps to understand it if you consider that the
1005 block, if you have to do a seek and rotate. A tail risks a 1191 * cost to access 4 blocks is not much more than the cost to access 1
1006 non-linear disk access that is significant as a percentage of total 1192 * block, if you have to do a seek and rotate. A tail risks a
1007 time cost for a 4 block file and saves an amount of space that is 1193 * non-linear disk access that is significant as a percentage of total
1008 less significant as a percentage of space, or so goes the hypothesis. 1194 * time cost for a 4 block file and saves an amount of space that is
1009 -Hans */ 1195 * less significant as a percentage of space, or so goes the hypothesis.
1196 * -Hans
1197 */
1010#define STORE_TAIL_IN_UNFM_S1(n_file_size,n_tail_size,n_block_size) \ 1198#define STORE_TAIL_IN_UNFM_S1(n_file_size,n_tail_size,n_block_size) \
1011(\ 1199(\
1012 (!(n_tail_size)) || \ 1200 (!(n_tail_size)) || \
@@ -1020,10 +1208,11 @@ static inline int bmap_would_wrap(unsigned bmap_nr)
1020 ( (n_tail_size) >= (MAX_DIRECT_ITEM_LEN(n_block_size) * 3)/4) ) ) \ 1208 ( (n_tail_size) >= (MAX_DIRECT_ITEM_LEN(n_block_size) * 3)/4) ) ) \
1021) 1209)
1022 1210
1023/* Another strategy for tails, this one means only create a tail if all the 1211/*
1024 file would fit into one DIRECT item. 1212 * Another strategy for tails, this one means only create a tail if all the
1025 Primary intention for this one is to increase performance by decreasing 1213 * file would fit into one DIRECT item.
1026 seeking. 1214 * Primary intention for this one is to increase performance by decreasing
1215 * seeking.
1027*/ 1216*/
1028#define STORE_TAIL_IN_UNFM_S2(n_file_size,n_tail_size,n_block_size) \ 1217#define STORE_TAIL_IN_UNFM_S2(n_file_size,n_tail_size,n_block_size) \
1029(\ 1218(\
@@ -1037,23 +1226,21 @@ static inline int bmap_would_wrap(unsigned bmap_nr)
1037#define REISERFS_VALID_FS 1 1226#define REISERFS_VALID_FS 1
1038#define REISERFS_ERROR_FS 2 1227#define REISERFS_ERROR_FS 2
1039 1228
1040// 1229/*
1041// there are 5 item types currently 1230 * there are 5 item types currently
1042// 1231 */
1043#define TYPE_STAT_DATA 0 1232#define TYPE_STAT_DATA 0
1044#define TYPE_INDIRECT 1 1233#define TYPE_INDIRECT 1
1045#define TYPE_DIRECT 2 1234#define TYPE_DIRECT 2
1046#define TYPE_DIRENTRY 3 1235#define TYPE_DIRENTRY 3
1047#define TYPE_MAXTYPE 3 1236#define TYPE_MAXTYPE 3
1048#define TYPE_ANY 15 // FIXME: comment is required 1237#define TYPE_ANY 15 /* FIXME: comment is required */
1049 1238
1050/***************************************************************************/ 1239/***************************************************************************
1051/* KEY & ITEM HEAD */ 1240 * KEY & ITEM HEAD *
1052/***************************************************************************/ 1241 ***************************************************************************/
1053 1242
1054// 1243/* * directories use this key as well as old files */
1055// directories use this key as well as old files
1056//
1057struct offset_v1 { 1244struct offset_v1 {
1058 __le32 k_offset; 1245 __le32 k_offset;
1059 __le32 k_uniqueness; 1246 __le32 k_uniqueness;
@@ -1086,11 +1273,14 @@ static inline void set_offset_v2_k_offset(struct offset_v2 *v2, loff_t offset)
1086 v2->v = (v2->v & cpu_to_le64(15ULL << 60)) | cpu_to_le64(offset); 1273 v2->v = (v2->v & cpu_to_le64(15ULL << 60)) | cpu_to_le64(offset);
1087} 1274}
1088 1275
1089/* Key of an item determines its location in the S+tree, and 1276/*
1090 is composed of 4 components */ 1277 * Key of an item determines its location in the S+tree, and
1278 * is composed of 4 components
1279 */
1091struct reiserfs_key { 1280struct reiserfs_key {
1092 __le32 k_dir_id; /* packing locality: by default parent 1281 /* packing locality: by default parent directory object id */
1093 directory object id */ 1282 __le32 k_dir_id;
1283
1094 __le32 k_objectid; /* object identifier */ 1284 __le32 k_objectid; /* object identifier */
1095 union { 1285 union {
1096 struct offset_v1 k_offset_v1; 1286 struct offset_v1 k_offset_v1;
@@ -1099,8 +1289,8 @@ struct reiserfs_key {
1099} __attribute__ ((__packed__)); 1289} __attribute__ ((__packed__));
1100 1290
1101struct in_core_key { 1291struct in_core_key {
1102 __u32 k_dir_id; /* packing locality: by default parent 1292 /* packing locality: by default parent directory object id */
1103 directory object id */ 1293 __u32 k_dir_id;
1104 __u32 k_objectid; /* object identifier */ 1294 __u32 k_objectid; /* object identifier */
1105 __u64 k_offset; 1295 __u64 k_offset;
1106 __u8 k_type; 1296 __u8 k_type;
@@ -1109,14 +1299,16 @@ struct in_core_key {
1109struct cpu_key { 1299struct cpu_key {
1110 struct in_core_key on_disk_key; 1300 struct in_core_key on_disk_key;
1111 int version; 1301 int version;
1112 int key_length; /* 3 in all cases but direct2indirect and 1302 /* 3 in all cases but direct2indirect and indirect2direct conversion */
1113 indirect2direct conversion */ 1303 int key_length;
1114}; 1304};
1115 1305
1116/* Our function for comparing keys can compare keys of different 1306/*
1117 lengths. It takes as a parameter the length of the keys it is to 1307 * Our function for comparing keys can compare keys of different
1118 compare. These defines are used in determining what is to be passed 1308 * lengths. It takes as a parameter the length of the keys it is to
1119 to it as that parameter. */ 1309 * compare. These defines are used in determining what is to be passed
1310 * to it as that parameter.
1311 */
1120#define REISERFS_FULL_KEY_LEN 4 1312#define REISERFS_FULL_KEY_LEN 4
1121#define REISERFS_SHORT_KEY_LEN 2 1313#define REISERFS_SHORT_KEY_LEN 2
1122 1314
@@ -1145,40 +1337,52 @@ struct cpu_key {
1145#define POSITION_FOUND 1 1337#define POSITION_FOUND 1
1146#define POSITION_NOT_FOUND 0 1338#define POSITION_NOT_FOUND 0
1147 1339
1148// return values for reiserfs_find_entry and search_by_entry_key 1340/* return values for reiserfs_find_entry and search_by_entry_key */
1149#define NAME_FOUND 1 1341#define NAME_FOUND 1
1150#define NAME_NOT_FOUND 0 1342#define NAME_NOT_FOUND 0
1151#define GOTO_PREVIOUS_ITEM 2 1343#define GOTO_PREVIOUS_ITEM 2
1152#define NAME_FOUND_INVISIBLE 3 1344#define NAME_FOUND_INVISIBLE 3
1153 1345
1154/* Everything in the filesystem is stored as a set of items. The 1346/*
1155 item head contains the key of the item, its free space (for 1347 * Everything in the filesystem is stored as a set of items. The
1156 indirect items) and specifies the location of the item itself 1348 * item head contains the key of the item, its free space (for
1157 within the block. */ 1349 * indirect items) and specifies the location of the item itself
1350 * within the block.
1351 */
1158 1352
1159struct item_head { 1353struct item_head {
1160 /* Everything in the tree is found by searching for it based on 1354 /*
1161 * its key.*/ 1355 * Everything in the tree is found by searching for it based on
1356 * its key.
1357 */
1162 struct reiserfs_key ih_key; 1358 struct reiserfs_key ih_key;
1163 union { 1359 union {
1164 /* The free space in the last unformatted node of an 1360 /*
1165 indirect item if this is an indirect item. This 1361 * The free space in the last unformatted node of an
1166 equals 0xFFFF iff this is a direct item or stat data 1362 * indirect item if this is an indirect item. This
1167 item. Note that the key, not this field, is used to 1363 * equals 0xFFFF iff this is a direct item or stat data
1168 determine the item type, and thus which field this 1364 * item. Note that the key, not this field, is used to
1169 union contains. */ 1365 * determine the item type, and thus which field this
1366 * union contains.
1367 */
1170 __le16 ih_free_space_reserved; 1368 __le16 ih_free_space_reserved;
1171 /* Iff this is a directory item, this field equals the 1369
1172 number of directory entries in the directory item. */ 1370 /*
1371 * Iff this is a directory item, this field equals the
1372 * number of directory entries in the directory item.
1373 */
1173 __le16 ih_entry_count; 1374 __le16 ih_entry_count;
1174 } __attribute__ ((__packed__)) u; 1375 } __attribute__ ((__packed__)) u;
1175 __le16 ih_item_len; /* total size of the item body */ 1376 __le16 ih_item_len; /* total size of the item body */
1176 __le16 ih_item_location; /* an offset to the item body 1377
1177 * within the block */ 1378 /* an offset to the item body within the block */
1178 __le16 ih_version; /* 0 for all old items, 2 for new 1379 __le16 ih_item_location;
1179 ones. Highest bit is set by fsck 1380
1180 temporary, cleaned after all 1381 /*
1181 done */ 1382 * 0 for all old items, 2 for new ones. Highest bit is set by fsck
1383 * temporary, cleaned after all done
1384 */
1385 __le16 ih_version;
1182} __attribute__ ((__packed__)); 1386} __attribute__ ((__packed__));
1183/* size of item header */ 1387/* size of item header */
1184#define IH_SIZE (sizeof(struct item_head)) 1388#define IH_SIZE (sizeof(struct item_head))
@@ -1200,27 +1404,24 @@ struct item_head {
1200#define get_ih_free_space(ih) (ih_version (ih) == KEY_FORMAT_3_6 ? 0 : ih_free_space (ih)) 1404#define get_ih_free_space(ih) (ih_version (ih) == KEY_FORMAT_3_6 ? 0 : ih_free_space (ih))
1201#define set_ih_free_space(ih,val) put_ih_free_space((ih), ((ih_version(ih) == KEY_FORMAT_3_6) ? 0 : (val))) 1405#define set_ih_free_space(ih,val) put_ih_free_space((ih), ((ih_version(ih) == KEY_FORMAT_3_6) ? 0 : (val)))
1202 1406
1203/* these operate on indirect items, where you've got an array of ints 1407/*
1204** at a possibly unaligned location. These are a noop on ia32 1408 * these operate on indirect items, where you've got an array of ints
1205** 1409 * at a possibly unaligned location. These are a noop on ia32
1206** p is the array of __u32, i is the index into the array, v is the value 1410 *
1207** to store there. 1411 * p is the array of __u32, i is the index into the array, v is the value
1208*/ 1412 * to store there.
1413 */
1209#define get_block_num(p, i) get_unaligned_le32((p) + (i)) 1414#define get_block_num(p, i) get_unaligned_le32((p) + (i))
1210#define put_block_num(p, i, v) put_unaligned_le32((v), (p) + (i)) 1415#define put_block_num(p, i, v) put_unaligned_le32((v), (p) + (i))
1211 1416
1212// 1417/* * in old version uniqueness field shows key type */
1213// in old version uniqueness field shows key type
1214//
1215#define V1_SD_UNIQUENESS 0 1418#define V1_SD_UNIQUENESS 0
1216#define V1_INDIRECT_UNIQUENESS 0xfffffffe 1419#define V1_INDIRECT_UNIQUENESS 0xfffffffe
1217#define V1_DIRECT_UNIQUENESS 0xffffffff 1420#define V1_DIRECT_UNIQUENESS 0xffffffff
1218#define V1_DIRENTRY_UNIQUENESS 500 1421#define V1_DIRENTRY_UNIQUENESS 500
1219#define V1_ANY_UNIQUENESS 555 // FIXME: comment is required 1422#define V1_ANY_UNIQUENESS 555 /* FIXME: comment is required */
1220 1423
1221// 1424/* here are conversion routines */
1222// here are conversion routines
1223//
1224static inline int uniqueness2type(__u32 uniqueness) CONSTF; 1425static inline int uniqueness2type(__u32 uniqueness) CONSTF;
1225static inline int uniqueness2type(__u32 uniqueness) 1426static inline int uniqueness2type(__u32 uniqueness)
1226{ 1427{
@@ -1257,11 +1458,11 @@ static inline __u32 type2uniqueness(int type)
1257 } 1458 }
1258} 1459}
1259 1460
1260// 1461/*
1261// key is pointer to on disk key which is stored in le, result is cpu, 1462 * key is pointer to on disk key which is stored in le, result is cpu,
1262// there is no way to get version of object from key, so, provide 1463 * there is no way to get version of object from key, so, provide
1263// version to these defines 1464 * version to these defines
1264// 1465 */
1265static inline loff_t le_key_k_offset(int version, 1466static inline loff_t le_key_k_offset(int version,
1266 const struct reiserfs_key *key) 1467 const struct reiserfs_key *key)
1267{ 1468{
@@ -1350,9 +1551,7 @@ static inline int is_statdata_le_key(int version, struct reiserfs_key *key)
1350 return le_key_k_type(version, key) == TYPE_STAT_DATA; 1551 return le_key_k_type(version, key) == TYPE_STAT_DATA;
1351} 1552}
1352 1553
1353// 1554/* item header has version. */
1354// item header has version.
1355//
1356static inline int is_direntry_le_ih(struct item_head *ih) 1555static inline int is_direntry_le_ih(struct item_head *ih)
1357{ 1556{
1358 return is_direntry_le_key(ih_version(ih), &ih->ih_key); 1557 return is_direntry_le_key(ih_version(ih), &ih->ih_key);
@@ -1373,9 +1572,7 @@ static inline int is_statdata_le_ih(struct item_head *ih)
1373 return is_statdata_le_key(ih_version(ih), &ih->ih_key); 1572 return is_statdata_le_key(ih_version(ih), &ih->ih_key);
1374} 1573}
1375 1574
1376// 1575/* key is pointer to cpu key, result is cpu */
1377// key is pointer to cpu key, result is cpu
1378//
1379static inline loff_t cpu_key_k_offset(const struct cpu_key *key) 1576static inline loff_t cpu_key_k_offset(const struct cpu_key *key)
1380{ 1577{
1381 return key->on_disk_key.k_offset; 1578 return key->on_disk_key.k_offset;
@@ -1426,7 +1623,7 @@ static inline void cpu_key_k_offset_dec(struct cpu_key *key)
1426 1623
1427extern struct reiserfs_key root_key; 1624extern struct reiserfs_key root_key;
1428 1625
1429/* 1626/*
1430 * Picture represents a leaf of the S+tree 1627 * Picture represents a leaf of the S+tree
1431 * ______________________________________________________ 1628 * ______________________________________________________
1432 * | | Array of | | | 1629 * | | Array of | | |
@@ -1435,15 +1632,19 @@ extern struct reiserfs_key root_key;
1435 * |______|_______________|___________________|___________| 1632 * |______|_______________|___________________|___________|
1436 */ 1633 */
1437 1634
1438/* Header of a disk block. More precisely, header of a formatted leaf 1635/*
1439 or internal node, and not the header of an unformatted node. */ 1636 * Header of a disk block. More precisely, header of a formatted leaf
1637 * or internal node, and not the header of an unformatted node.
1638 */
1440struct block_head { 1639struct block_head {
1441 __le16 blk_level; /* Level of a block in the tree. */ 1640 __le16 blk_level; /* Level of a block in the tree. */
1442 __le16 blk_nr_item; /* Number of keys/items in a block. */ 1641 __le16 blk_nr_item; /* Number of keys/items in a block. */
1443 __le16 blk_free_space; /* Block free space in bytes. */ 1642 __le16 blk_free_space; /* Block free space in bytes. */
1444 __le16 blk_reserved; 1643 __le16 blk_reserved;
1445 /* dump this in v4/planA */ 1644 /* dump this in v4/planA */
1446 struct reiserfs_key blk_right_delim_key; /* kept only for compatibility */ 1645
1646 /* kept only for compatibility */
1647 struct reiserfs_key blk_right_delim_key;
1447}; 1648};
1448 1649
1449#define BLKH_SIZE (sizeof(struct block_head)) 1650#define BLKH_SIZE (sizeof(struct block_head))
@@ -1458,18 +1659,20 @@ struct block_head {
1458#define blkh_right_delim_key(p_blkh) ((p_blkh)->blk_right_delim_key) 1659#define blkh_right_delim_key(p_blkh) ((p_blkh)->blk_right_delim_key)
1459#define set_blkh_right_delim_key(p_blkh,val) ((p_blkh)->blk_right_delim_key = val) 1660#define set_blkh_right_delim_key(p_blkh,val) ((p_blkh)->blk_right_delim_key = val)
1460 1661
1662/* values for blk_level field of the struct block_head */
1663
1461/* 1664/*
1462 * values for blk_level field of the struct block_head 1665 * When node gets removed from the tree its blk_level is set to FREE_LEVEL.
1666 * It is then used to see whether the node is still in the tree
1463 */ 1667 */
1464 1668#define FREE_LEVEL 0
1465#define FREE_LEVEL 0 /* when node gets removed from the tree its
1466 blk_level is set to FREE_LEVEL. It is then
1467 used to see whether the node is still in the
1468 tree */
1469 1669
1470#define DISK_LEAF_NODE_LEVEL 1 /* Leaf node level. */ 1670#define DISK_LEAF_NODE_LEVEL 1 /* Leaf node level. */
1471 1671
1472/* Given the buffer head of a formatted node, resolve to the block head of that node. */ 1672/*
1673 * Given the buffer head of a formatted node, resolve to the
1674 * block head of that node.
1675 */
1473#define B_BLK_HEAD(bh) ((struct block_head *)((bh)->b_data)) 1676#define B_BLK_HEAD(bh) ((struct block_head *)((bh)->b_data))
1474/* Number of items that are in buffer. */ 1677/* Number of items that are in buffer. */
1475#define B_NR_ITEMS(bh) (blkh_nr_item(B_BLK_HEAD(bh))) 1678#define B_NR_ITEMS(bh) (blkh_nr_item(B_BLK_HEAD(bh)))
@@ -1490,14 +1693,14 @@ struct block_head {
1490#define B_IS_KEYS_LEVEL(bh) (B_LEVEL(bh) > DISK_LEAF_NODE_LEVEL \ 1693#define B_IS_KEYS_LEVEL(bh) (B_LEVEL(bh) > DISK_LEAF_NODE_LEVEL \
1491 && B_LEVEL(bh) <= MAX_HEIGHT) 1694 && B_LEVEL(bh) <= MAX_HEIGHT)
1492 1695
1493/***************************************************************************/ 1696/***************************************************************************
1494/* STAT DATA */ 1697 * STAT DATA *
1495/***************************************************************************/ 1698 ***************************************************************************/
1496 1699
1497// 1700/*
1498// old stat data is 32 bytes long. We are going to distinguish new one by 1701 * old stat data is 32 bytes long. We are going to distinguish new one by
1499// different size 1702 * different size
1500// 1703*/
1501struct stat_data_v1 { 1704struct stat_data_v1 {
1502 __le16 sd_mode; /* file type, permissions */ 1705 __le16 sd_mode; /* file type, permissions */
1503 __le16 sd_nlink; /* number of hard links */ 1706 __le16 sd_nlink; /* number of hard links */
@@ -1506,20 +1709,25 @@ struct stat_data_v1 {
1506 __le32 sd_size; /* file size */ 1709 __le32 sd_size; /* file size */
1507 __le32 sd_atime; /* time of last access */ 1710 __le32 sd_atime; /* time of last access */
1508 __le32 sd_mtime; /* time file was last modified */ 1711 __le32 sd_mtime; /* time file was last modified */
1509 __le32 sd_ctime; /* time inode (stat data) was last changed (except changes to sd_atime and sd_mtime) */ 1712
1713 /*
1714 * time inode (stat data) was last changed
1715 * (except changes to sd_atime and sd_mtime)
1716 */
1717 __le32 sd_ctime;
1510 union { 1718 union {
1511 __le32 sd_rdev; 1719 __le32 sd_rdev;
1512 __le32 sd_blocks; /* number of blocks file uses */ 1720 __le32 sd_blocks; /* number of blocks file uses */
1513 } __attribute__ ((__packed__)) u; 1721 } __attribute__ ((__packed__)) u;
1514 __le32 sd_first_direct_byte; /* first byte of file which is stored 1722
1515 in a direct item: except that if it 1723 /*
1516 equals 1 it is a symlink and if it 1724 * first byte of file which is stored in a direct item: except that if
1517 equals ~(__u32)0 there is no 1725 * it equals 1 it is a symlink and if it equals ~(__u32)0 there is no
1518 direct item. The existence of this 1726 * direct item. The existence of this field really grates on me.
1519 field really grates on me. Let's 1727 * Let's replace it with a macro based on sd_size and our tail
1520 replace it with a macro based on 1728 * suppression policy. Someday. -Hans
1521 sd_size and our tail suppression 1729 */
1522 policy. Someday. -Hans */ 1730 __le32 sd_first_direct_byte;
1523} __attribute__ ((__packed__)); 1731} __attribute__ ((__packed__));
1524 1732
1525#define SD_V1_SIZE (sizeof(struct stat_data_v1)) 1733#define SD_V1_SIZE (sizeof(struct stat_data_v1))
@@ -1551,8 +1759,10 @@ struct stat_data_v1 {
1551 1759
1552/* inode flags stored in sd_attrs (nee sd_reserved) */ 1760/* inode flags stored in sd_attrs (nee sd_reserved) */
1553 1761
1554/* we want common flags to have the same values as in ext2, 1762/*
1555 so chattr(1) will work without problems */ 1763 * we want common flags to have the same values as in ext2,
1764 * so chattr(1) will work without problems
1765 */
1556#define REISERFS_IMMUTABLE_FL FS_IMMUTABLE_FL 1766#define REISERFS_IMMUTABLE_FL FS_IMMUTABLE_FL
1557#define REISERFS_APPEND_FL FS_APPEND_FL 1767#define REISERFS_APPEND_FL FS_APPEND_FL
1558#define REISERFS_SYNC_FL FS_SYNC_FL 1768#define REISERFS_SYNC_FL FS_SYNC_FL
@@ -1572,8 +1782,10 @@ struct stat_data_v1 {
1572 REISERFS_COMPR_FL | \ 1782 REISERFS_COMPR_FL | \
1573 REISERFS_NOTAIL_FL ) 1783 REISERFS_NOTAIL_FL )
1574 1784
1575/* Stat Data on disk (reiserfs version of UFS disk inode minus the 1785/*
1576 address blocks) */ 1786 * Stat Data on disk (reiserfs version of UFS disk inode minus the
1787 * address blocks)
1788 */
1577struct stat_data { 1789struct stat_data {
1578 __le16 sd_mode; /* file type, permissions */ 1790 __le16 sd_mode; /* file type, permissions */
1579 __le16 sd_attrs; /* persistent inode flags */ 1791 __le16 sd_attrs; /* persistent inode flags */
@@ -1583,25 +1795,20 @@ struct stat_data {
1583 __le32 sd_gid; /* group */ 1795 __le32 sd_gid; /* group */
1584 __le32 sd_atime; /* time of last access */ 1796 __le32 sd_atime; /* time of last access */
1585 __le32 sd_mtime; /* time file was last modified */ 1797 __le32 sd_mtime; /* time file was last modified */
1586 __le32 sd_ctime; /* time inode (stat data) was last changed (except changes to sd_atime and sd_mtime) */ 1798
1799 /*
1800 * time inode (stat data) was last changed
1801 * (except changes to sd_atime and sd_mtime)
1802 */
1803 __le32 sd_ctime;
1587 __le32 sd_blocks; 1804 __le32 sd_blocks;
1588 union { 1805 union {
1589 __le32 sd_rdev; 1806 __le32 sd_rdev;
1590 __le32 sd_generation; 1807 __le32 sd_generation;
1591 //__le32 sd_first_direct_byte;
1592 /* first byte of file which is stored in a
1593 direct item: except that if it equals 1
1594 it is a symlink and if it equals
1595 ~(__u32)0 there is no direct item. The
1596 existence of this field really grates
1597 on me. Let's replace it with a macro
1598 based on sd_size and our tail
1599 suppression policy? */
1600 } __attribute__ ((__packed__)) u; 1808 } __attribute__ ((__packed__)) u;
1601} __attribute__ ((__packed__)); 1809} __attribute__ ((__packed__));
1602// 1810
1603// this is 44 bytes long 1811/* this is 44 bytes long */
1604//
1605#define SD_SIZE (sizeof(struct stat_data)) 1812#define SD_SIZE (sizeof(struct stat_data))
1606#define SD_V2_SIZE SD_SIZE 1813#define SD_V2_SIZE SD_SIZE
1607#define stat_data_v2(ih) (ih_version (ih) == KEY_FORMAT_3_6) 1814#define stat_data_v2(ih) (ih_version (ih) == KEY_FORMAT_3_6)
@@ -1632,48 +1839,61 @@ struct stat_data {
1632#define sd_v2_attrs(sdp) (le16_to_cpu((sdp)->sd_attrs)) 1839#define sd_v2_attrs(sdp) (le16_to_cpu((sdp)->sd_attrs))
1633#define set_sd_v2_attrs(sdp,v) ((sdp)->sd_attrs = cpu_to_le16(v)) 1840#define set_sd_v2_attrs(sdp,v) ((sdp)->sd_attrs = cpu_to_le16(v))
1634 1841
1635/***************************************************************************/ 1842/***************************************************************************
1636/* DIRECTORY STRUCTURE */ 1843 * DIRECTORY STRUCTURE *
1637/***************************************************************************/ 1844 ***************************************************************************/
1638/* 1845/*
1639 Picture represents the structure of directory items 1846 * Picture represents the structure of directory items
1640 ________________________________________________ 1847 * ________________________________________________
1641 | Array of | | | | | | 1848 * | Array of | | | | | |
1642 | directory |N-1| N-2 | .... | 1st |0th| 1849 * | directory |N-1| N-2 | .... | 1st |0th|
1643 | entry headers | | | | | | 1850 * | entry headers | | | | | |
1644 |_______________|___|_____|________|_______|___| 1851 * |_______________|___|_____|________|_______|___|
1645 <---- directory entries ------> 1852 * <---- directory entries ------>
1646 1853 *
1647 First directory item has k_offset component 1. We store "." and ".." 1854 * First directory item has k_offset component 1. We store "." and ".."
1648 in one item, always, we never split "." and ".." into differing 1855 * in one item, always, we never split "." and ".." into differing
1649 items. This makes, among other things, the code for removing 1856 * items. This makes, among other things, the code for removing
1650 directories simpler. */ 1857 * directories simpler.
1858 */
1651#define SD_OFFSET 0 1859#define SD_OFFSET 0
1652#define SD_UNIQUENESS 0 1860#define SD_UNIQUENESS 0
1653#define DOT_OFFSET 1 1861#define DOT_OFFSET 1
1654#define DOT_DOT_OFFSET 2 1862#define DOT_DOT_OFFSET 2
1655#define DIRENTRY_UNIQUENESS 500 1863#define DIRENTRY_UNIQUENESS 500
1656 1864
1657/* */
1658#define FIRST_ITEM_OFFSET 1 1865#define FIRST_ITEM_OFFSET 1
1659 1866
1660/* 1867/*
1661 Q: How to get key of object pointed to by entry from entry? 1868 * Q: How to get key of object pointed to by entry from entry?
1662 1869 *
1663 A: Each directory entry has its header. This header has deh_dir_id and deh_objectid fields, those are key 1870 * A: Each directory entry has its header. This header has deh_dir_id
1664 of object, entry points to */ 1871 * and deh_objectid fields, those are key of object, entry points to
1872 */
1665 1873
1666/* NOT IMPLEMENTED: 1874/*
1667 Directory will someday contain stat data of object */ 1875 * NOT IMPLEMENTED:
1876 * Directory will someday contain stat data of object
1877 */
1668 1878
1669struct reiserfs_de_head { 1879struct reiserfs_de_head {
1670 __le32 deh_offset; /* third component of the directory entry key */ 1880 __le32 deh_offset; /* third component of the directory entry key */
1671 __le32 deh_dir_id; /* objectid of the parent directory of the object, that is referenced 1881
1672 by directory entry */ 1882 /*
1673 __le32 deh_objectid; /* objectid of the object, that is referenced by directory entry */ 1883 * objectid of the parent directory of the object, that is referenced
1884 * by directory entry
1885 */
1886 __le32 deh_dir_id;
1887
1888 /* objectid of the object, that is referenced by directory entry */
1889 __le32 deh_objectid;
1674 __le16 deh_location; /* offset of name in the whole item */ 1890 __le16 deh_location; /* offset of name in the whole item */
1675 __le16 deh_state; /* whether 1) entry contains stat data (for future), and 2) whether 1891
1676 entry is hidden (unlinked) */ 1892 /*
1893 * whether 1) entry contains stat data (for future), and
1894 * 2) whether entry is hidden (unlinked)
1895 */
1896 __le16 deh_state;
1677} __attribute__ ((__packed__)); 1897} __attribute__ ((__packed__));
1678#define DEH_SIZE sizeof(struct reiserfs_de_head) 1898#define DEH_SIZE sizeof(struct reiserfs_de_head)
1679#define deh_offset(p_deh) (le32_to_cpu((p_deh)->deh_offset)) 1899#define deh_offset(p_deh) (le32_to_cpu((p_deh)->deh_offset))
@@ -1703,9 +1923,11 @@ struct reiserfs_de_head {
1703# define ADDR_UNALIGNED_BITS (3) 1923# define ADDR_UNALIGNED_BITS (3)
1704#endif 1924#endif
1705 1925
1706/* These are only used to manipulate deh_state. 1926/*
1927 * These are only used to manipulate deh_state.
1707 * Because of this, we'll use the ext2_ bit routines, 1928 * Because of this, we'll use the ext2_ bit routines,
1708 * since they are little endian */ 1929 * since they are little endian
1930 */
1709#ifdef ADDR_UNALIGNED_BITS 1931#ifdef ADDR_UNALIGNED_BITS
1710 1932
1711# define aligned_address(addr) ((void *)((long)(addr) & ~((1UL << ADDR_UNALIGNED_BITS) - 1))) 1933# define aligned_address(addr) ((void *)((long)(addr) & ~((1UL << ADDR_UNALIGNED_BITS) - 1)))
@@ -1740,13 +1962,16 @@ extern void make_empty_dir_item_v1(char *body, __le32 dirid, __le32 objid,
1740extern void make_empty_dir_item(char *body, __le32 dirid, __le32 objid, 1962extern void make_empty_dir_item(char *body, __le32 dirid, __le32 objid,
1741 __le32 par_dirid, __le32 par_objid); 1963 __le32 par_dirid, __le32 par_objid);
1742 1964
1743// two entries per block (at least) 1965/* two entries per block (at least) */
1744#define REISERFS_MAX_NAME(block_size) 255 1966#define REISERFS_MAX_NAME(block_size) 255
1745 1967
1746/* this structure is used for operations on directory entries. It is 1968/*
1747 not a disk structure. */ 1969 * this structure is used for operations on directory entries. It is
1748/* When reiserfs_find_entry or search_by_entry_key find directory 1970 * not a disk structure.
1749 entry, they return filled reiserfs_dir_entry structure */ 1971 *
1972 * When reiserfs_find_entry or search_by_entry_key find directory
1973 * entry, they return filled reiserfs_dir_entry structure
1974 */
1750struct reiserfs_dir_entry { 1975struct reiserfs_dir_entry {
1751 struct buffer_head *de_bh; 1976 struct buffer_head *de_bh;
1752 int de_item_num; 1977 int de_item_num;
@@ -1764,7 +1989,10 @@ struct reiserfs_dir_entry {
1764 struct cpu_key de_entry_key; 1989 struct cpu_key de_entry_key;
1765}; 1990};
1766 1991
1767/* these defines are useful when a particular member of a reiserfs_dir_entry is needed */ 1992/*
1993 * these defines are useful when a particular member of
1994 * a reiserfs_dir_entry is needed
1995 */
1768 1996
1769/* pointer to file name, stored in entry */ 1997/* pointer to file name, stored in entry */
1770#define B_I_DEH_ENTRY_FILE_NAME(bh, ih, deh) \ 1998#define B_I_DEH_ENTRY_FILE_NAME(bh, ih, deh) \
@@ -1791,11 +2019,13 @@ struct reiserfs_dir_entry {
1791 * |______|_______________|___________________|___________| 2019 * |______|_______________|___________________|___________|
1792 */ 2020 */
1793 2021
1794/***************************************************************************/ 2022/***************************************************************************
1795/* DISK CHILD */ 2023 * DISK CHILD *
1796/***************************************************************************/ 2024 ***************************************************************************/
1797/* Disk child pointer: The pointer from an internal node of the tree 2025/*
1798 to a node that is on disk. */ 2026 * Disk child pointer:
2027 * The pointer from an internal node of the tree to a node that is on disk.
2028 */
1799struct disk_child { 2029struct disk_child {
1800 __le32 dc_block_number; /* Disk child's block number. */ 2030 __le32 dc_block_number; /* Disk child's block number. */
1801 __le16 dc_size; /* Disk child's used space. */ 2031 __le16 dc_size; /* Disk child's used space. */
@@ -1828,47 +2058,66 @@ struct disk_child {
1828#define MAX_NR_KEY(bh) ( (MAX_CHILD_SIZE(bh)-DC_SIZE)/(KEY_SIZE+DC_SIZE) ) 2058#define MAX_NR_KEY(bh) ( (MAX_CHILD_SIZE(bh)-DC_SIZE)/(KEY_SIZE+DC_SIZE) )
1829#define MIN_NR_KEY(bh) (MAX_NR_KEY(bh)/2) 2059#define MIN_NR_KEY(bh) (MAX_NR_KEY(bh)/2)
1830 2060
1831/***************************************************************************/ 2061/***************************************************************************
1832/* PATH STRUCTURES AND DEFINES */ 2062 * PATH STRUCTURES AND DEFINES *
1833/***************************************************************************/ 2063 ***************************************************************************/
1834 2064
1835/* Search_by_key fills up the path from the root to the leaf as it descends the tree looking for the 2065/*
1836 key. It uses reiserfs_bread to try to find buffers in the cache given their block number. If it 2066 * search_by_key fills up the path from the root to the leaf as it descends
1837 does not find them in the cache it reads them from disk. For each node search_by_key finds using 2067 * the tree looking for the key. It uses reiserfs_bread to try to find
1838 reiserfs_bread it then uses bin_search to look through that node. bin_search will find the 2068 * buffers in the cache given their block number. If it does not find
1839 position of the block_number of the next node if it is looking through an internal node. If it 2069 * them in the cache it reads them from disk. For each node search_by_key
1840 is looking through a leaf node bin_search will find the position of the item which has key either 2070 * finds using reiserfs_bread it then uses bin_search to look through that
1841 equal to given key, or which is the maximal key less than the given key. */ 2071 * node. bin_search will find the position of the block_number of the next
2072 * node if it is looking through an internal node. If it is looking through
2073 * a leaf node bin_search will find the position of the item which has key
2074 * either equal to given key, or which is the maximal key less than the
2075 * given key.
2076 */
1842 2077
1843struct path_element { 2078struct path_element {
1844 struct buffer_head *pe_buffer; /* Pointer to the buffer at the path in the tree. */ 2079 /* Pointer to the buffer at the path in the tree. */
1845 int pe_position; /* Position in the tree node which is placed in the */ 2080 struct buffer_head *pe_buffer;
1846 /* buffer above. */ 2081 /* Position in the tree node which is placed in the buffer above. */
2082 int pe_position;
1847}; 2083};
1848 2084
1849#define MAX_HEIGHT 5 /* maximal height of a tree. don't change this without changing JOURNAL_PER_BALANCE_CNT */ 2085/*
1850#define EXTENDED_MAX_HEIGHT 7 /* Must be equals MAX_HEIGHT + FIRST_PATH_ELEMENT_OFFSET */ 2086 * maximal height of a tree. don't change this without
1851#define FIRST_PATH_ELEMENT_OFFSET 2 /* Must be equal to at least 2. */ 2087 * changing JOURNAL_PER_BALANCE_CNT
1852 2088 */
1853#define ILLEGAL_PATH_ELEMENT_OFFSET 1 /* Must be equal to FIRST_PATH_ELEMENT_OFFSET - 1 */ 2089#define MAX_HEIGHT 5
1854#define MAX_FEB_SIZE 6 /* this MUST be MAX_HEIGHT + 1. See about FEB below */ 2090
1855 2091/* Must be equals MAX_HEIGHT + FIRST_PATH_ELEMENT_OFFSET */
1856/* We need to keep track of who the ancestors of nodes are. When we 2092#define EXTENDED_MAX_HEIGHT 7
1857 perform a search we record which nodes were visited while 2093
1858 descending the tree looking for the node we searched for. This list 2094/* Must be equal to at least 2. */
1859 of nodes is called the path. This information is used while 2095#define FIRST_PATH_ELEMENT_OFFSET 2
1860 performing balancing. Note that this path information may become 2096
1861 invalid, and this means we must check it when using it to see if it 2097/* Must be equal to FIRST_PATH_ELEMENT_OFFSET - 1 */
1862 is still valid. You'll need to read search_by_key and the comments 2098#define ILLEGAL_PATH_ELEMENT_OFFSET 1
1863 in it, especially about decrement_counters_in_path(), to understand 2099
1864 this structure. 2100/* this MUST be MAX_HEIGHT + 1. See about FEB below */
1865 2101#define MAX_FEB_SIZE 6
1866Paths make the code so much harder to work with and debug.... An 2102
1867enormous number of bugs are due to them, and trying to write or modify 2103/*
1868code that uses them just makes my head hurt. They are based on an 2104 * We need to keep track of who the ancestors of nodes are. When we
1869excessive effort to avoid disturbing the precious VFS code.:-( The 2105 * perform a search we record which nodes were visited while
1870gods only know how we are going to SMP the code that uses them. 2106 * descending the tree looking for the node we searched for. This list
1871znodes are the way! */ 2107 * of nodes is called the path. This information is used while
2108 * performing balancing. Note that this path information may become
2109 * invalid, and this means we must check it when using it to see if it
2110 * is still valid. You'll need to read search_by_key and the comments
2111 * in it, especially about decrement_counters_in_path(), to understand
2112 * this structure.
2113 *
2114 * Paths make the code so much harder to work with and debug.... An
2115 * enormous number of bugs are due to them, and trying to write or modify
2116 * code that uses them just makes my head hurt. They are based on an
2117 * excessive effort to avoid disturbing the precious VFS code.:-( The
2118 * gods only know how we are going to SMP the code that uses them.
2119 * znodes are the way!
2120 */
1872 2121
1873#define PATH_READA 0x1 /* do read ahead */ 2122#define PATH_READA 0x1 /* do read ahead */
1874#define PATH_READA_BACK 0x2 /* read backwards */ 2123#define PATH_READA_BACK 0x2 /* read backwards */
@@ -1876,7 +2125,8 @@ znodes are the way! */
1876struct treepath { 2125struct treepath {
1877 int path_length; /* Length of the array above. */ 2126 int path_length; /* Length of the array above. */
1878 int reada; 2127 int reada;
1879 struct path_element path_elements[EXTENDED_MAX_HEIGHT]; /* Array of the path elements. */ 2128 /* Array of the path elements. */
2129 struct path_element path_elements[EXTENDED_MAX_HEIGHT];
1880 int pos_in_item; 2130 int pos_in_item;
1881}; 2131};
1882 2132
@@ -1895,20 +2145,31 @@ struct treepath var = {.path_length = ILLEGAL_PATH_ELEMENT_OFFSET, .reada = 0,}
1895#define PATH_OFFSET_POSITION(path, n_offset) (PATH_OFFSET_PELEMENT(path, n_offset)->pe_position) 2145#define PATH_OFFSET_POSITION(path, n_offset) (PATH_OFFSET_PELEMENT(path, n_offset)->pe_position)
1896 2146
1897#define PATH_PLAST_BUFFER(path) (PATH_OFFSET_PBUFFER((path), (path)->path_length)) 2147#define PATH_PLAST_BUFFER(path) (PATH_OFFSET_PBUFFER((path), (path)->path_length))
1898 /* you know, to the person who didn't 2148
1899 write this the macro name does not 2149/*
1900 at first suggest what it does. 2150 * you know, to the person who didn't write this the macro name does not
1901 Maybe POSITION_FROM_PATH_END? Or 2151 * at first suggest what it does. Maybe POSITION_FROM_PATH_END? Or
1902 maybe we should just focus on 2152 * maybe we should just focus on dumping paths... -Hans
1903 dumping paths... -Hans */ 2153 */
1904#define PATH_LAST_POSITION(path) (PATH_OFFSET_POSITION((path), (path)->path_length)) 2154#define PATH_LAST_POSITION(path) (PATH_OFFSET_POSITION((path), (path)->path_length))
1905 2155
1906/* in do_balance leaf has h == 0 in contrast with path structure, 2156/*
1907 where root has level == 0. That is why we need these defines */ 2157 * in do_balance leaf has h == 0 in contrast with path structure,
1908#define PATH_H_PBUFFER(path, h) PATH_OFFSET_PBUFFER (path, path->path_length - (h)) /* tb->S[h] */ 2158 * where root has level == 0. That is why we need these defines
1909#define PATH_H_PPARENT(path, h) PATH_H_PBUFFER (path, (h) + 1) /* tb->F[h] or tb->S[0]->b_parent */ 2159 */
1910#define PATH_H_POSITION(path, h) PATH_OFFSET_POSITION (path, path->path_length - (h)) 2160
1911#define PATH_H_B_ITEM_ORDER(path, h) PATH_H_POSITION(path, h + 1) /* tb->S[h]->b_item_order */ 2161/* tb->S[h] */
2162#define PATH_H_PBUFFER(path, h) \
2163 PATH_OFFSET_PBUFFER(path, path->path_length - (h))
2164
2165/* tb->F[h] or tb->S[0]->b_parent */
2166#define PATH_H_PPARENT(path, h) PATH_H_PBUFFER(path, (h) + 1)
2167
2168#define PATH_H_POSITION(path, h) \
2169 PATH_OFFSET_POSITION(path, path->path_length - (h))
2170
2171/* tb->S[h]->b_item_order */
2172#define PATH_H_B_ITEM_ORDER(path, h) PATH_H_POSITION(path, h + 1)
1912 2173
1913#define PATH_H_PATH_OFFSET(path, n_h) ((path)->path_length - (n_h)) 2174#define PATH_H_PATH_OFFSET(path, n_h) ((path)->path_length - (n_h))
1914 2175
@@ -1973,16 +2234,14 @@ static inline void *tp_item_body(const struct treepath *path)
1973 /* get item body */ 2234 /* get item body */
1974#define B_I_DEH(bh, ih) ((struct reiserfs_de_head *)(ih_item_body(bh, ih))) 2235#define B_I_DEH(bh, ih) ((struct reiserfs_de_head *)(ih_item_body(bh, ih)))
1975 2236
1976/* length of the directory entry in directory item. This define
1977 calculates length of i-th directory entry using directory entry
1978 locations from dir entry head. When it calculates length of 0-th
1979 directory entry, it uses length of whole item in place of entry
1980 location of the non-existent following entry in the calculation.
1981 See picture above.*/
1982/* 2237/*
1983#define I_DEH_N_ENTRY_LENGTH(ih,deh,i) \ 2238 * length of the directory entry in directory item. This define
1984((i) ? (deh_location((deh)-1) - deh_location((deh))) : (ih_item_len((ih)) - deh_location((deh)))) 2239 * calculates length of i-th directory entry using directory entry
1985*/ 2240 * locations from dir entry head. When it calculates length of 0-th
2241 * directory entry, it uses length of whole item in place of entry
2242 * location of the non-existent following entry in the calculation.
2243 * See picture above.
2244 */
1986static inline int entry_length(const struct buffer_head *bh, 2245static inline int entry_length(const struct buffer_head *bh,
1987 const struct item_head *ih, int pos_in_item) 2246 const struct item_head *ih, int pos_in_item)
1988{ 2247{
@@ -1995,15 +2254,15 @@ static inline int entry_length(const struct buffer_head *bh,
1995 return ih_item_len(ih) - deh_location(deh); 2254 return ih_item_len(ih) - deh_location(deh);
1996} 2255}
1997 2256
1998/***************************************************************************/ 2257/***************************************************************************
1999/* MISC */ 2258 * MISC *
2000/***************************************************************************/ 2259 ***************************************************************************/
2001 2260
2002/* Size of pointer to the unformatted node. */ 2261/* Size of pointer to the unformatted node. */
2003#define UNFM_P_SIZE (sizeof(unp_t)) 2262#define UNFM_P_SIZE (sizeof(unp_t))
2004#define UNFM_P_SHIFT 2 2263#define UNFM_P_SHIFT 2
2005 2264
2006// in in-core inode key is stored on le form 2265/* in in-core inode key is stored on le form */
2007#define INODE_PKEY(inode) ((struct reiserfs_key *)(REISERFS_I(inode)->i_key)) 2266#define INODE_PKEY(inode) ((struct reiserfs_key *)(REISERFS_I(inode)->i_key))
2008 2267
2009#define MAX_UL_INT 0xffffffff 2268#define MAX_UL_INT 0xffffffff
@@ -2019,7 +2278,6 @@ static inline loff_t max_reiserfs_offset(struct inode *inode)
2019 return (loff_t) ((~(__u64) 0) >> 4); 2278 return (loff_t) ((~(__u64) 0) >> 4);
2020} 2279}
2021 2280
2022/*#define MAX_KEY_UNIQUENESS MAX_UL_INT*/
2023#define MAX_KEY_OBJECTID MAX_UL_INT 2281#define MAX_KEY_OBJECTID MAX_UL_INT
2024 2282
2025#define MAX_B_NUM MAX_UL_INT 2283#define MAX_B_NUM MAX_UL_INT
@@ -2028,9 +2286,12 @@ static inline loff_t max_reiserfs_offset(struct inode *inode)
2028/* the purpose is to detect overflow of an unsigned short */ 2286/* the purpose is to detect overflow of an unsigned short */
2029#define REISERFS_LINK_MAX (MAX_US_INT - 1000) 2287#define REISERFS_LINK_MAX (MAX_US_INT - 1000)
2030 2288
2031/* The following defines are used in reiserfs_insert_item and reiserfs_append_item */ 2289/*
2032#define REISERFS_KERNEL_MEM 0 /* reiserfs kernel memory mode */ 2290 * The following defines are used in reiserfs_insert_item
2033#define REISERFS_USER_MEM 1 /* reiserfs user memory mode */ 2291 * and reiserfs_append_item
2292 */
2293#define REISERFS_KERNEL_MEM 0 /* kernel memory mode */
2294#define REISERFS_USER_MEM 1 /* user memory mode */
2034 2295
2035#define fs_generation(s) (REISERFS_SB(s)->s_generation_counter) 2296#define fs_generation(s) (REISERFS_SB(s)->s_generation_counter)
2036#define get_generation(s) atomic_read (&fs_generation(s)) 2297#define get_generation(s) atomic_read (&fs_generation(s))
@@ -2042,46 +2303,65 @@ static inline loff_t max_reiserfs_offset(struct inode *inode)
2042 __fs_changed(gen, s); \ 2303 __fs_changed(gen, s); \
2043}) 2304})
2044 2305
2045/***************************************************************************/ 2306/***************************************************************************
2046/* FIXATE NODES */ 2307 * FIXATE NODES *
2047/***************************************************************************/ 2308 ***************************************************************************/
2048 2309
2049#define VI_TYPE_LEFT_MERGEABLE 1 2310#define VI_TYPE_LEFT_MERGEABLE 1
2050#define VI_TYPE_RIGHT_MERGEABLE 2 2311#define VI_TYPE_RIGHT_MERGEABLE 2
2051 2312
2052/* To make any changes in the tree we always first find node, that 2313/*
2053 contains item to be changed/deleted or place to insert a new 2314 * To make any changes in the tree we always first find node, that
2054 item. We call this node S. To do balancing we need to decide what 2315 * contains item to be changed/deleted or place to insert a new
2055 we will shift to left/right neighbor, or to a new node, where new 2316 * item. We call this node S. To do balancing we need to decide what
2056 item will be etc. To make this analysis simpler we build virtual 2317 * we will shift to left/right neighbor, or to a new node, where new
2057 node. Virtual node is an array of items, that will replace items of 2318 * item will be etc. To make this analysis simpler we build virtual
2058 node S. (For instance if we are going to delete an item, virtual 2319 * node. Virtual node is an array of items, that will replace items of
2059 node does not contain it). Virtual node keeps information about 2320 * node S. (For instance if we are going to delete an item, virtual
2060 item sizes and types, mergeability of first and last items, sizes 2321 * node does not contain it). Virtual node keeps information about
2061 of all entries in directory item. We use this array of items when 2322 * item sizes and types, mergeability of first and last items, sizes
2062 calculating what we can shift to neighbors and how many nodes we 2323 * of all entries in directory item. We use this array of items when
2063 have to have if we do not any shiftings, if we shift to left/right 2324 * calculating what we can shift to neighbors and how many nodes we
2064 neighbor or to both. */ 2325 * have to have if we do not any shiftings, if we shift to left/right
2326 * neighbor or to both.
2327 */
2065struct virtual_item { 2328struct virtual_item {
2066 int vi_index; // index in the array of item operations 2329 int vi_index; /* index in the array of item operations */
2067 unsigned short vi_type; // left/right mergeability 2330 unsigned short vi_type; /* left/right mergeability */
2068 unsigned short vi_item_len; /* length of item that it will have after balancing */ 2331
2332 /* length of item that it will have after balancing */
2333 unsigned short vi_item_len;
2334
2069 struct item_head *vi_ih; 2335 struct item_head *vi_ih;
2070 const char *vi_item; // body of item (old or new) 2336 const char *vi_item; /* body of item (old or new) */
2071 const void *vi_new_data; // 0 always but paste mode 2337 const void *vi_new_data; /* 0 always but paste mode */
2072 void *vi_uarea; // item specific area 2338 void *vi_uarea; /* item specific area */
2073}; 2339};
2074 2340
2075struct virtual_node { 2341struct virtual_node {
2076 char *vn_free_ptr; /* this is a pointer to the free space in the buffer */ 2342 /* this is a pointer to the free space in the buffer */
2343 char *vn_free_ptr;
2344
2077 unsigned short vn_nr_item; /* number of items in virtual node */ 2345 unsigned short vn_nr_item; /* number of items in virtual node */
2078 short vn_size; /* size of node , that node would have if it has unlimited size and no balancing is performed */ 2346
2079 short vn_mode; /* mode of balancing (paste, insert, delete, cut) */ 2347 /*
2348 * size of node , that node would have if it has
2349 * unlimited size and no balancing is performed
2350 */
2351 short vn_size;
2352
2353 /* mode of balancing (paste, insert, delete, cut) */
2354 short vn_mode;
2355
2080 short vn_affected_item_num; 2356 short vn_affected_item_num;
2081 short vn_pos_in_item; 2357 short vn_pos_in_item;
2082 struct item_head *vn_ins_ih; /* item header of inserted item, 0 for other modes */ 2358
2359 /* item header of inserted item, 0 for other modes */
2360 struct item_head *vn_ins_ih;
2083 const void *vn_data; 2361 const void *vn_data;
2084 struct virtual_item *vn_vi; /* array of items (including a new one, excluding item to be deleted) */ 2362
2363 /* array of items (including a new one, excluding item to be deleted) */
2364 struct virtual_item *vn_vi;
2085}; 2365};
2086 2366
2087/* used by directory items when creating virtual nodes */ 2367/* used by directory items when creating virtual nodes */
@@ -2091,22 +2371,25 @@ struct direntry_uarea {
2091 __u16 entry_sizes[1]; 2371 __u16 entry_sizes[1];
2092} __attribute__ ((__packed__)); 2372} __attribute__ ((__packed__));
2093 2373
2094/***************************************************************************/ 2374/***************************************************************************
2095/* TREE BALANCE */ 2375 * TREE BALANCE *
2096/***************************************************************************/ 2376 ***************************************************************************/
2097 2377
2098/* This temporary structure is used in tree balance algorithms, and 2378/*
2099 constructed as we go to the extent that its various parts are 2379 * This temporary structure is used in tree balance algorithms, and
2100 needed. It contains arrays of nodes that can potentially be 2380 * constructed as we go to the extent that its various parts are
2101 involved in the balancing of node S, and parameters that define how 2381 * needed. It contains arrays of nodes that can potentially be
2102 each of the nodes must be balanced. Note that in these algorithms 2382 * involved in the balancing of node S, and parameters that define how
2103 for balancing the worst case is to need to balance the current node 2383 * each of the nodes must be balanced. Note that in these algorithms
2104 S and the left and right neighbors and all of their parents plus 2384 * for balancing the worst case is to need to balance the current node
2105 create a new node. We implement S1 balancing for the leaf nodes 2385 * S and the left and right neighbors and all of their parents plus
2106 and S0 balancing for the internal nodes (S1 and S0 are defined in 2386 * create a new node. We implement S1 balancing for the leaf nodes
2107 our papers.)*/ 2387 * and S0 balancing for the internal nodes (S1 and S0 are defined in
2388 * our papers.)
2389 */
2108 2390
2109#define MAX_FREE_BLOCK 7 /* size of the array of buffers to free at end of do_balance */ 2391/* size of the array of buffers to free at end of do_balance */
2392#define MAX_FREE_BLOCK 7
2110 2393
2111/* maximum number of FEB blocknrs on a single level */ 2394/* maximum number of FEB blocknrs on a single level */
2112#define MAX_AMOUNT_NEEDED 2 2395#define MAX_AMOUNT_NEEDED 2
@@ -2118,64 +2401,132 @@ struct tree_balance {
2118 struct super_block *tb_sb; 2401 struct super_block *tb_sb;
2119 struct reiserfs_transaction_handle *transaction_handle; 2402 struct reiserfs_transaction_handle *transaction_handle;
2120 struct treepath *tb_path; 2403 struct treepath *tb_path;
2121 struct buffer_head *L[MAX_HEIGHT]; /* array of left neighbors of nodes in the path */ 2404
2122 struct buffer_head *R[MAX_HEIGHT]; /* array of right neighbors of nodes in the path */ 2405 /* array of left neighbors of nodes in the path */
2123 struct buffer_head *FL[MAX_HEIGHT]; /* array of fathers of the left neighbors */ 2406 struct buffer_head *L[MAX_HEIGHT];
2124 struct buffer_head *FR[MAX_HEIGHT]; /* array of fathers of the right neighbors */ 2407
2125 struct buffer_head *CFL[MAX_HEIGHT]; /* array of common parents of center node and its left neighbor */ 2408 /* array of right neighbors of nodes in the path */
2126 struct buffer_head *CFR[MAX_HEIGHT]; /* array of common parents of center node and its right neighbor */ 2409 struct buffer_head *R[MAX_HEIGHT];
2127 2410
2128 struct buffer_head *FEB[MAX_FEB_SIZE]; /* array of empty buffers. Number of buffers in array equals 2411 /* array of fathers of the left neighbors */
2129 cur_blknum. */ 2412 struct buffer_head *FL[MAX_HEIGHT];
2413
2414 /* array of fathers of the right neighbors */
2415 struct buffer_head *FR[MAX_HEIGHT];
2416 /* array of common parents of center node and its left neighbor */
2417 struct buffer_head *CFL[MAX_HEIGHT];
2418
2419 /* array of common parents of center node and its right neighbor */
2420 struct buffer_head *CFR[MAX_HEIGHT];
2421
2422 /*
2423 * array of empty buffers. Number of buffers in array equals
2424 * cur_blknum.
2425 */
2426 struct buffer_head *FEB[MAX_FEB_SIZE];
2130 struct buffer_head *used[MAX_FEB_SIZE]; 2427 struct buffer_head *used[MAX_FEB_SIZE];
2131 struct buffer_head *thrown[MAX_FEB_SIZE]; 2428 struct buffer_head *thrown[MAX_FEB_SIZE];
2132 int lnum[MAX_HEIGHT]; /* array of number of items which must be 2429
2133 shifted to the left in order to balance the 2430 /*
2134 current node; for leaves includes item that 2431 * array of number of items which must be shifted to the left in
2135 will be partially shifted; for internal 2432 * order to balance the current node; for leaves includes item that
2136 nodes, it is the number of child pointers 2433 * will be partially shifted; for internal nodes, it is the number
2137 rather than items. It includes the new item 2434 * of child pointers rather than items. It includes the new item
2138 being created. The code sometimes subtracts 2435 * being created. The code sometimes subtracts one to get the
2139 one to get the number of wholly shifted 2436 * number of wholly shifted items for other purposes.
2140 items for other purposes. */ 2437 */
2141 int rnum[MAX_HEIGHT]; /* substitute right for left in comment above */ 2438 int lnum[MAX_HEIGHT];
2142 int lkey[MAX_HEIGHT]; /* array indexed by height h mapping the key delimiting L[h] and 2439
2143 S[h] to its item number within the node CFL[h] */ 2440 /* substitute right for left in comment above */
2144 int rkey[MAX_HEIGHT]; /* substitute r for l in comment above */ 2441 int rnum[MAX_HEIGHT];
2145 int insert_size[MAX_HEIGHT]; /* the number of bytes by we are trying to add or remove from 2442
2146 S[h]. A negative value means removing. */ 2443 /*
2147 int blknum[MAX_HEIGHT]; /* number of nodes that will replace node S[h] after 2444 * array indexed by height h mapping the key delimiting L[h] and
2148 balancing on the level h of the tree. If 0 then S is 2445 * S[h] to its item number within the node CFL[h]
2149 being deleted, if 1 then S is remaining and no new nodes 2446 */
2150 are being created, if 2 or 3 then 1 or 2 new nodes is 2447 int lkey[MAX_HEIGHT];
2151 being created */ 2448
2449 /* substitute r for l in comment above */
2450 int rkey[MAX_HEIGHT];
2451
2452 /*
2453 * the number of bytes by we are trying to add or remove from
2454 * S[h]. A negative value means removing.
2455 */
2456 int insert_size[MAX_HEIGHT];
2457
2458 /*
2459 * number of nodes that will replace node S[h] after balancing
2460 * on the level h of the tree. If 0 then S is being deleted,
2461 * if 1 then S is remaining and no new nodes are being created,
2462 * if 2 or 3 then 1 or 2 new nodes is being created
2463 */
2464 int blknum[MAX_HEIGHT];
2152 2465
2153 /* fields that are used only for balancing leaves of the tree */ 2466 /* fields that are used only for balancing leaves of the tree */
2154 int cur_blknum; /* number of empty blocks having been already allocated */ 2467
2155 int s0num; /* number of items that fall into left most node when S[0] splits */ 2468 /* number of empty blocks having been already allocated */
2156 int s1num; /* number of items that fall into first new node when S[0] splits */ 2469 int cur_blknum;
2157 int s2num; /* number of items that fall into second new node when S[0] splits */ 2470
2158 int lbytes; /* number of bytes which can flow to the left neighbor from the left */ 2471 /* number of items that fall into left most node when S[0] splits */
2159 /* most liquid item that cannot be shifted from S[0] entirely */ 2472 int s0num;
2160 /* if -1 then nothing will be partially shifted */ 2473
2161 int rbytes; /* number of bytes which will flow to the right neighbor from the right */ 2474 /* number of items that fall into first new node when S[0] splits */
2162 /* most liquid item that cannot be shifted from S[0] entirely */ 2475 int s1num;
2163 /* if -1 then nothing will be partially shifted */ 2476
2164 int s1bytes; /* number of bytes which flow to the first new node when S[0] splits */ 2477 /* number of items that fall into second new node when S[0] splits */
2165 /* note: if S[0] splits into 3 nodes, then items do not need to be cut */ 2478 int s2num;
2479
2480 /*
2481 * number of bytes which can flow to the left neighbor from the left
2482 * most liquid item that cannot be shifted from S[0] entirely
2483 * if -1 then nothing will be partially shifted
2484 */
2485 int lbytes;
2486
2487 /*
2488 * number of bytes which will flow to the right neighbor from the right
2489 * most liquid item that cannot be shifted from S[0] entirely
2490 * if -1 then nothing will be partially shifted
2491 */
2492 int rbytes;
2493
2494 /*
2495 * number of bytes which flow to the first new node when S[0] splits
2496 * note: if S[0] splits into 3 nodes, then items do not need to be cut
2497 */
2498 int s1bytes;
2166 int s2bytes; 2499 int s2bytes;
2167 struct buffer_head *buf_to_free[MAX_FREE_BLOCK]; /* buffers which are to be freed after do_balance finishes by unfix_nodes */ 2500
2168 char *vn_buf; /* kmalloced memory. Used to create 2501 /*
2169 virtual node and keep map of 2502 * buffers which are to be freed after do_balance finishes
2170 dirtied bitmap blocks */ 2503 * by unfix_nodes
2504 */
2505 struct buffer_head *buf_to_free[MAX_FREE_BLOCK];
2506
2507 /*
2508 * kmalloced memory. Used to create virtual node and keep
2509 * map of dirtied bitmap blocks
2510 */
2511 char *vn_buf;
2512
2171 int vn_buf_size; /* size of the vn_buf */ 2513 int vn_buf_size; /* size of the vn_buf */
2172 struct virtual_node *tb_vn; /* VN starts after bitmap of bitmap blocks */
2173 2514
2174 int fs_gen; /* saved value of `reiserfs_generation' counter 2515 /* VN starts after bitmap of bitmap blocks */
2175 see FILESYSTEM_CHANGED() macro in reiserfs_fs.h */ 2516 struct virtual_node *tb_vn;
2517
2518 /*
2519 * saved value of `reiserfs_generation' counter see
2520 * FILESYSTEM_CHANGED() macro in reiserfs_fs.h
2521 */
2522 int fs_gen;
2523
2176#ifdef DISPLACE_NEW_PACKING_LOCALITIES 2524#ifdef DISPLACE_NEW_PACKING_LOCALITIES
2177 struct in_core_key key; /* key pointer, to pass to block allocator or 2525 /*
2178 another low-level subsystem */ 2526 * key pointer, to pass to block allocator or
2527 * another low-level subsystem
2528 */
2529 struct in_core_key key;
2179#endif 2530#endif
2180}; 2531};
2181 2532
@@ -2183,20 +2534,24 @@ struct tree_balance {
2183 2534
2184/* When inserting an item. */ 2535/* When inserting an item. */
2185#define M_INSERT 'i' 2536#define M_INSERT 'i'
2186/* When inserting into (directories only) or appending onto an already 2537/*
2187 existent item. */ 2538 * When inserting into (directories only) or appending onto an already
2539 * existent item.
2540 */
2188#define M_PASTE 'p' 2541#define M_PASTE 'p'
2189/* When deleting an item. */ 2542/* When deleting an item. */
2190#define M_DELETE 'd' 2543#define M_DELETE 'd'
2191/* When truncating an item or removing an entry from a (directory) item. */ 2544/* When truncating an item or removing an entry from a (directory) item. */
2192#define M_CUT 'c' 2545#define M_CUT 'c'
2193 2546
2194/* used when balancing on leaf level skipped (in reiserfsck) */ 2547/* used when balancing on leaf level skipped (in reiserfsck) */
2195#define M_INTERNAL 'n' 2548#define M_INTERNAL 'n'
2196 2549
2197/* When further balancing is not needed, then do_balance does not need 2550/*
2198 to be called. */ 2551 * When further balancing is not needed, then do_balance does not need
2199#define M_SKIP_BALANCING 's' 2552 * to be called.
2553 */
2554#define M_SKIP_BALANCING 's'
2200#define M_CONVERT 'v' 2555#define M_CONVERT 'v'
2201 2556
2202/* modes of leaf_move_items */ 2557/* modes of leaf_move_items */
@@ -2209,8 +2564,10 @@ struct tree_balance {
2209#define FIRST_TO_LAST 0 2564#define FIRST_TO_LAST 0
2210#define LAST_TO_FIRST 1 2565#define LAST_TO_FIRST 1
2211 2566
2212/* used in do_balance for passing parent of node information that has 2567/*
2213 been gotten from tb struct */ 2568 * used in do_balance for passing parent of node information that has
2569 * been gotten from tb struct
2570 */
2214struct buffer_info { 2571struct buffer_info {
2215 struct tree_balance *tb; 2572 struct tree_balance *tb;
2216 struct buffer_head *bi_bh; 2573 struct buffer_head *bi_bh;
@@ -2228,20 +2585,24 @@ static inline struct super_block *sb_from_bi(struct buffer_info *bi)
2228 return bi ? sb_from_tb(bi->tb) : NULL; 2585 return bi ? sb_from_tb(bi->tb) : NULL;
2229} 2586}
2230 2587
2231/* there are 4 types of items: stat data, directory item, indirect, direct. 2588/*
2232+-------------------+------------+--------------+------------+ 2589 * there are 4 types of items: stat data, directory item, indirect, direct.
2233| | k_offset | k_uniqueness | mergeable? | 2590 * +-------------------+------------+--------------+------------+
2234+-------------------+------------+--------------+------------+ 2591 * | | k_offset | k_uniqueness | mergeable? |
2235| stat data | 0 | 0 | no | 2592 * +-------------------+------------+--------------+------------+
2236+-------------------+------------+--------------+------------+ 2593 * | stat data | 0 | 0 | no |
2237| 1st directory item| DOT_OFFSET |DIRENTRY_UNIQUENESS| no | 2594 * +-------------------+------------+--------------+------------+
2238| non 1st directory | hash value | | yes | 2595 * | 1st directory item| DOT_OFFSET | DIRENTRY_ .. | no |
2239| item | | | | 2596 * | non 1st directory | hash value | UNIQUENESS | yes |
2240+-------------------+------------+--------------+------------+ 2597 * | item | | | |
2241| indirect item | offset + 1 |TYPE_INDIRECT | if this is not the first indirect item of the object 2598 * +-------------------+------------+--------------+------------+
2242+-------------------+------------+--------------+------------+ 2599 * | indirect item | offset + 1 |TYPE_INDIRECT | [1] |
2243| direct item | offset + 1 |TYPE_DIRECT | if not this is not the first direct item of the object 2600 * +-------------------+------------+--------------+------------+
2244+-------------------+------------+--------------+------------+ 2601 * | direct item | offset + 1 |TYPE_DIRECT | [2] |
2602 * +-------------------+------------+--------------+------------+
2603 *
2604 * [1] if this is not the first indirect item of the object
2605 * [2] if this is not the first direct item of the object
2245*/ 2606*/
2246 2607
2247struct item_operations { 2608struct item_operations {
@@ -2280,22 +2641,30 @@ extern struct item_operations *item_ops[TYPE_ANY + 1];
2280/* number of blocks pointed to by the indirect item */ 2641/* number of blocks pointed to by the indirect item */
2281#define I_UNFM_NUM(ih) (ih_item_len(ih) / UNFM_P_SIZE) 2642#define I_UNFM_NUM(ih) (ih_item_len(ih) / UNFM_P_SIZE)
2282 2643
2283/* the used space within the unformatted node corresponding to pos within the item pointed to by ih */ 2644/*
2645 * the used space within the unformatted node corresponding
2646 * to pos within the item pointed to by ih
2647 */
2284#define I_POS_UNFM_SIZE(ih,pos,size) (((pos) == I_UNFM_NUM(ih) - 1 ) ? (size) - ih_free_space(ih) : (size)) 2648#define I_POS_UNFM_SIZE(ih,pos,size) (((pos) == I_UNFM_NUM(ih) - 1 ) ? (size) - ih_free_space(ih) : (size))
2285 2649
2286/* number of bytes contained by the direct item or the unformatted nodes the indirect item points to */ 2650/*
2651 * number of bytes contained by the direct item or the
2652 * unformatted nodes the indirect item points to
2653 */
2287 2654
2288 /* following defines use reiserfs buffer header and item header */ 2655/* following defines use reiserfs buffer header and item header */
2289 2656
2290/* get stat-data */ 2657/* get stat-data */
2291#define B_I_STAT_DATA(bh, ih) ( (struct stat_data * )((bh)->b_data + ih_location(ih)) ) 2658#define B_I_STAT_DATA(bh, ih) ( (struct stat_data * )((bh)->b_data + ih_location(ih)) )
2292 2659
2293// this is 3976 for size==4096 2660/* this is 3976 for size==4096 */
2294#define MAX_DIRECT_ITEM_LEN(size) ((size) - BLKH_SIZE - 2*IH_SIZE - SD_SIZE - UNFM_P_SIZE) 2661#define MAX_DIRECT_ITEM_LEN(size) ((size) - BLKH_SIZE - 2*IH_SIZE - SD_SIZE - UNFM_P_SIZE)
2295 2662
2296/* indirect items consist of entries which contain blocknrs, pos 2663/*
2297 indicates which entry, and B_I_POS_UNFM_POINTER resolves to the 2664 * indirect items consist of entries which contain blocknrs, pos
2298 blocknr contained by the entry pos points to */ 2665 * indicates which entry, and B_I_POS_UNFM_POINTER resolves to the
2666 * blocknr contained by the entry pos points to
2667 */
2299#define B_I_POS_UNFM_POINTER(bh, ih, pos) \ 2668#define B_I_POS_UNFM_POINTER(bh, ih, pos) \
2300 le32_to_cpu(*(((unp_t *)ih_item_body(bh, ih)) + (pos))) 2669 le32_to_cpu(*(((unp_t *)ih_item_body(bh, ih)) + (pos)))
2301#define PUT_B_I_POS_UNFM_POINTER(bh, ih, pos, val) \ 2670#define PUT_B_I_POS_UNFM_POINTER(bh, ih, pos, val) \
@@ -2306,9 +2675,9 @@ struct reiserfs_iget_args {
2306 __u32 dirid; 2675 __u32 dirid;
2307}; 2676};
2308 2677
2309/***************************************************************************/ 2678/***************************************************************************
2310/* FUNCTION DECLARATIONS */ 2679 * FUNCTION DECLARATIONS *
2311/***************************************************************************/ 2680 ***************************************************************************/
2312 2681
2313#define get_journal_desc_magic(bh) (bh->b_data + bh->b_size - 12) 2682#define get_journal_desc_magic(bh) (bh->b_data + bh->b_size - 12)
2314 2683
@@ -2320,7 +2689,10 @@ struct reiserfs_iget_args {
2320/* first block written in a commit. */ 2689/* first block written in a commit. */
2321struct reiserfs_journal_desc { 2690struct reiserfs_journal_desc {
2322 __le32 j_trans_id; /* id of commit */ 2691 __le32 j_trans_id; /* id of commit */
2323 __le32 j_len; /* length of commit. len +1 is the commit block */ 2692
2693 /* length of commit. len +1 is the commit block */
2694 __le32 j_len;
2695
2324 __le32 j_mount_id; /* mount id of this trans */ 2696 __le32 j_mount_id; /* mount id of this trans */
2325 __le32 j_realblock[1]; /* real locations for each block */ 2697 __le32 j_realblock[1]; /* real locations for each block */
2326}; 2698};
@@ -2347,22 +2719,35 @@ struct reiserfs_journal_commit {
2347#define set_commit_trans_id(c,val) do { (c)->j_trans_id = cpu_to_le32 (val); } while (0) 2719#define set_commit_trans_id(c,val) do { (c)->j_trans_id = cpu_to_le32 (val); } while (0)
2348#define set_commit_trans_len(c,val) do { (c)->j_len = cpu_to_le32 (val); } while (0) 2720#define set_commit_trans_len(c,val) do { (c)->j_len = cpu_to_le32 (val); } while (0)
2349 2721
2350/* this header block gets written whenever a transaction is considered fully flushed, and is more recent than the 2722/*
2351** last fully flushed transaction. fully flushed means all the log blocks and all the real blocks are on disk, 2723 * this header block gets written whenever a transaction is considered
2352** and this transaction does not need to be replayed. 2724 * fully flushed, and is more recent than the last fully flushed transaction.
2353*/ 2725 * fully flushed means all the log blocks and all the real blocks are on
2726 * disk, and this transaction does not need to be replayed.
2727 */
2354struct reiserfs_journal_header { 2728struct reiserfs_journal_header {
2355 __le32 j_last_flush_trans_id; /* id of last fully flushed transaction */ 2729 /* id of last fully flushed transaction */
2356 __le32 j_first_unflushed_offset; /* offset in the log of where to start replay after a crash */ 2730 __le32 j_last_flush_trans_id;
2731
2732 /* offset in the log of where to start replay after a crash */
2733 __le32 j_first_unflushed_offset;
2734
2357 __le32 j_mount_id; 2735 __le32 j_mount_id;
2358 /* 12 */ struct journal_params jh_journal; 2736 /* 12 */ struct journal_params jh_journal;
2359}; 2737};
2360 2738
2361/* biggest tunable defines are right here */ 2739/* biggest tunable defines are right here */
2362#define JOURNAL_BLOCK_COUNT 8192 /* number of blocks in the journal */ 2740#define JOURNAL_BLOCK_COUNT 8192 /* number of blocks in the journal */
2363#define JOURNAL_TRANS_MAX_DEFAULT 1024 /* biggest possible single transaction, don't change for now (8/3/99) */ 2741
2742/* biggest possible single transaction, don't change for now (8/3/99) */
2743#define JOURNAL_TRANS_MAX_DEFAULT 1024
2364#define JOURNAL_TRANS_MIN_DEFAULT 256 2744#define JOURNAL_TRANS_MIN_DEFAULT 256
2365#define JOURNAL_MAX_BATCH_DEFAULT 900 /* max blocks to batch into one transaction, don't make this any bigger than 900 */ 2745
2746/*
2747 * max blocks to batch into one transaction,
2748 * don't make this any bigger than 900
2749 */
2750#define JOURNAL_MAX_BATCH_DEFAULT 900
2366#define JOURNAL_MIN_RATIO 2 2751#define JOURNAL_MIN_RATIO 2
2367#define JOURNAL_MAX_COMMIT_AGE 30 2752#define JOURNAL_MAX_COMMIT_AGE 30
2368#define JOURNAL_MAX_TRANS_AGE 30 2753#define JOURNAL_MAX_TRANS_AGE 30
@@ -2387,16 +2772,18 @@ struct reiserfs_journal_header {
2387#define REISERFS_QUOTA_DEL_BLOCKS(s) 0 2772#define REISERFS_QUOTA_DEL_BLOCKS(s) 0
2388#endif 2773#endif
2389 2774
2390/* both of these can be as low as 1, or as high as you want. The min is the 2775/*
2391** number of 4k bitmap nodes preallocated on mount. New nodes are allocated 2776 * both of these can be as low as 1, or as high as you want. The min is the
2392** as needed, and released when transactions are committed. On release, if 2777 * number of 4k bitmap nodes preallocated on mount. New nodes are allocated
2393** the current number of nodes is > max, the node is freed, otherwise, 2778 * as needed, and released when transactions are committed. On release, if
2394** it is put on a free list for faster use later. 2779 * the current number of nodes is > max, the node is freed, otherwise,
2780 * it is put on a free list for faster use later.
2395*/ 2781*/
2396#define REISERFS_MIN_BITMAP_NODES 10 2782#define REISERFS_MIN_BITMAP_NODES 10
2397#define REISERFS_MAX_BITMAP_NODES 100 2783#define REISERFS_MAX_BITMAP_NODES 100
2398 2784
2399#define JBH_HASH_SHIFT 13 /* these are based on journal hash size of 8192 */ 2785/* these are based on journal hash size of 8192 */
2786#define JBH_HASH_SHIFT 13
2400#define JBH_HASH_MASK 8191 2787#define JBH_HASH_MASK 8191
2401 2788
2402#define _jhashfn(sb,block) \ 2789#define _jhashfn(sb,block) \
@@ -2404,7 +2791,7 @@ struct reiserfs_journal_header {
2404 (((block)<<(JBH_HASH_SHIFT - 6)) ^ ((block) >> 13) ^ ((block) << (JBH_HASH_SHIFT - 12)))) 2791 (((block)<<(JBH_HASH_SHIFT - 6)) ^ ((block) >> 13) ^ ((block) << (JBH_HASH_SHIFT - 12))))
2405#define journal_hash(t,sb,block) ((t)[_jhashfn((sb),(block)) & JBH_HASH_MASK]) 2792#define journal_hash(t,sb,block) ((t)[_jhashfn((sb),(block)) & JBH_HASH_MASK])
2406 2793
2407// We need these to make journal.c code more readable 2794/* We need these to make journal.c code more readable */
2408#define journal_find_get_block(s, block) __find_get_block(SB_JOURNAL(s)->j_dev_bd, block, s->s_blocksize) 2795#define journal_find_get_block(s, block) __find_get_block(SB_JOURNAL(s)->j_dev_bd, block, s->s_blocksize)
2409#define journal_getblk(s, block) __getblk(SB_JOURNAL(s)->j_dev_bd, block, s->s_blocksize) 2796#define journal_getblk(s, block) __getblk(SB_JOURNAL(s)->j_dev_bd, block, s->s_blocksize)
2410#define journal_bread(s, block) __bread(SB_JOURNAL(s)->j_dev_bd, block, s->s_blocksize) 2797#define journal_bread(s, block) __bread(SB_JOURNAL(s)->j_dev_bd, block, s->s_blocksize)
@@ -2412,12 +2799,14 @@ struct reiserfs_journal_header {
2412enum reiserfs_bh_state_bits { 2799enum reiserfs_bh_state_bits {
2413 BH_JDirty = BH_PrivateStart, /* buffer is in current transaction */ 2800 BH_JDirty = BH_PrivateStart, /* buffer is in current transaction */
2414 BH_JDirty_wait, 2801 BH_JDirty_wait,
2415 BH_JNew, /* disk block was taken off free list before 2802 /*
2416 * being in a finished transaction, or 2803 * disk block was taken off free list before being in a
2417 * written to disk. Can be reused immed. */ 2804 * finished transaction, or written to disk. Can be reused immed.
2805 */
2806 BH_JNew,
2418 BH_JPrepared, 2807 BH_JPrepared,
2419 BH_JRestore_dirty, 2808 BH_JRestore_dirty,
2420 BH_JTest, // debugging only will go away 2809 BH_JTest, /* debugging only will go away */
2421}; 2810};
2422 2811
2423BUFFER_FNS(JDirty, journaled); 2812BUFFER_FNS(JDirty, journaled);
@@ -2433,27 +2822,36 @@ TAS_BUFFER_FNS(JRestore_dirty, journal_restore_dirty);
2433BUFFER_FNS(JTest, journal_test); 2822BUFFER_FNS(JTest, journal_test);
2434TAS_BUFFER_FNS(JTest, journal_test); 2823TAS_BUFFER_FNS(JTest, journal_test);
2435 2824
2436/* 2825/* transaction handle which is passed around for all journal calls */
2437** transaction handle which is passed around for all journal calls
2438*/
2439struct reiserfs_transaction_handle { 2826struct reiserfs_transaction_handle {
2440 struct super_block *t_super; /* super for this FS when journal_begin was 2827 /*
2441 called. saves calls to reiserfs_get_super 2828 * super for this FS when journal_begin was called. saves calls to
2442 also used by nested transactions to make 2829 * reiserfs_get_super also used by nested transactions to make
2443 sure they are nesting on the right FS 2830 * sure they are nesting on the right FS _must_ be first
2444 _must_ be first in the handle 2831 * in the handle
2445 */ 2832 */
2833 struct super_block *t_super;
2834
2446 int t_refcount; 2835 int t_refcount;
2447 int t_blocks_logged; /* number of blocks this writer has logged */ 2836 int t_blocks_logged; /* number of blocks this writer has logged */
2448 int t_blocks_allocated; /* number of blocks this writer allocated */ 2837 int t_blocks_allocated; /* number of blocks this writer allocated */
2449 unsigned int t_trans_id; /* sanity check, equals the current trans id */ 2838
2839 /* sanity check, equals the current trans id */
2840 unsigned int t_trans_id;
2841
2450 void *t_handle_save; /* save existing current->journal_info */ 2842 void *t_handle_save; /* save existing current->journal_info */
2451 unsigned displace_new_blocks:1; /* if new block allocation occurres, that block 2843
2452 should be displaced from others */ 2844 /*
2845 * if new block allocation occurres, that block
2846 * should be displaced from others
2847 */
2848 unsigned displace_new_blocks:1;
2849
2453 struct list_head t_list; 2850 struct list_head t_list;
2454}; 2851};
2455 2852
2456/* used to keep track of ordered and tail writes, attached to the buffer 2853/*
2854 * used to keep track of ordered and tail writes, attached to the buffer
2457 * head through b_journal_head. 2855 * head through b_journal_head.
2458 */ 2856 */
2459struct reiserfs_jh { 2857struct reiserfs_jh {
@@ -2550,20 +2948,18 @@ int B_IS_IN_TREE(const struct buffer_head *);
2550extern void copy_item_head(struct item_head *to, 2948extern void copy_item_head(struct item_head *to,
2551 const struct item_head *from); 2949 const struct item_head *from);
2552 2950
2553// first key is in cpu form, second - le 2951/* first key is in cpu form, second - le */
2554extern int comp_short_keys(const struct reiserfs_key *le_key, 2952extern int comp_short_keys(const struct reiserfs_key *le_key,
2555 const struct cpu_key *cpu_key); 2953 const struct cpu_key *cpu_key);
2556extern void le_key2cpu_key(struct cpu_key *to, const struct reiserfs_key *from); 2954extern void le_key2cpu_key(struct cpu_key *to, const struct reiserfs_key *from);
2557 2955
2558// both are in le form 2956/* both are in le form */
2559extern int comp_le_keys(const struct reiserfs_key *, 2957extern int comp_le_keys(const struct reiserfs_key *,
2560 const struct reiserfs_key *); 2958 const struct reiserfs_key *);
2561extern int comp_short_le_keys(const struct reiserfs_key *, 2959extern int comp_short_le_keys(const struct reiserfs_key *,
2562 const struct reiserfs_key *); 2960 const struct reiserfs_key *);
2563 2961
2564// 2962/* * get key version from on disk key - kludge */
2565// get key version from on disk key - kludge
2566//
2567static inline int le_key_version(const struct reiserfs_key *key) 2963static inline int le_key_version(const struct reiserfs_key *key)
2568{ 2964{
2569 int type; 2965 int type;
@@ -2640,12 +3036,12 @@ void padd_item(char *item, int total_length, int length);
2640 3036
2641/* inode.c */ 3037/* inode.c */
2642/* args for the create parameter of reiserfs_get_block */ 3038/* args for the create parameter of reiserfs_get_block */
2643#define GET_BLOCK_NO_CREATE 0 /* don't create new blocks or convert tails */ 3039#define GET_BLOCK_NO_CREATE 0 /* don't create new blocks or convert tails */
2644#define GET_BLOCK_CREATE 1 /* add anything you need to find block */ 3040#define GET_BLOCK_CREATE 1 /* add anything you need to find block */
2645#define GET_BLOCK_NO_HOLE 2 /* return -ENOENT for file holes */ 3041#define GET_BLOCK_NO_HOLE 2 /* return -ENOENT for file holes */
2646#define GET_BLOCK_READ_DIRECT 4 /* read the tail if indirect item not found */ 3042#define GET_BLOCK_READ_DIRECT 4 /* read the tail if indirect item not found */
2647#define GET_BLOCK_NO_IMUX 8 /* i_mutex is not held, don't preallocate */ 3043#define GET_BLOCK_NO_IMUX 8 /* i_mutex is not held, don't preallocate */
2648#define GET_BLOCK_NO_DANGLE 16 /* don't leave any transactions running */ 3044#define GET_BLOCK_NO_DANGLE 16 /* don't leave any transactions running */
2649 3045
2650void reiserfs_read_locked_inode(struct inode *inode, 3046void reiserfs_read_locked_inode(struct inode *inode,
2651 struct reiserfs_iget_args *args); 3047 struct reiserfs_iget_args *args);
@@ -2844,25 +3240,49 @@ struct buffer_head *get_FEB(struct tree_balance *);
2844 3240
2845/* bitmap.c */ 3241/* bitmap.c */
2846 3242
2847/* structure contains hints for block allocator, and it is a container for 3243/*
2848 * arguments, such as node, search path, transaction_handle, etc. */ 3244 * structure contains hints for block allocator, and it is a container for
3245 * arguments, such as node, search path, transaction_handle, etc.
3246 */
2849struct __reiserfs_blocknr_hint { 3247struct __reiserfs_blocknr_hint {
2850 struct inode *inode; /* inode passed to allocator, if we allocate unf. nodes */ 3248 /* inode passed to allocator, if we allocate unf. nodes */
3249 struct inode *inode;
3250
2851 sector_t block; /* file offset, in blocks */ 3251 sector_t block; /* file offset, in blocks */
2852 struct in_core_key key; 3252 struct in_core_key key;
2853 struct treepath *path; /* search path, used by allocator to deternine search_start by 3253
2854 * various ways */ 3254 /*
2855 struct reiserfs_transaction_handle *th; /* transaction handle is needed to log super blocks and 3255 * search path, used by allocator to deternine search_start by
2856 * bitmap blocks changes */ 3256 * various ways
3257 */
3258 struct treepath *path;
3259
3260 /*
3261 * transaction handle is needed to log super blocks
3262 * and bitmap blocks changes
3263 */
3264 struct reiserfs_transaction_handle *th;
3265
2857 b_blocknr_t beg, end; 3266 b_blocknr_t beg, end;
2858 b_blocknr_t search_start; /* a field used to transfer search start value (block number) 3267
2859 * between different block allocator procedures 3268 /*
2860 * (determine_search_start() and others) */ 3269 * a field used to transfer search start value (block number)
2861 int prealloc_size; /* is set in determine_prealloc_size() function, used by underlayed 3270 * between different block allocator procedures
2862 * function that do actual allocation */ 3271 * (determine_search_start() and others)
2863 3272 */
2864 unsigned formatted_node:1; /* the allocator uses different polices for getting disk space for 3273 b_blocknr_t search_start;
2865 * formatted/unformatted blocks with/without preallocation */ 3274
3275 /*
3276 * is set in determine_prealloc_size() function,
3277 * used by underlayed function that do actual allocation
3278 */
3279 int prealloc_size;
3280
3281 /*
3282 * the allocator uses different polices for getting disk
3283 * space for formatted/unformatted blocks with/without preallocation
3284 */
3285 unsigned formatted_node:1;
2866 unsigned preallocate:1; 3286 unsigned preallocate:1;
2867}; 3287};
2868 3288
@@ -2956,13 +3376,15 @@ __u32 r5_hash(const signed char *msg, int len);
2956#define reiserfs_test_le_bit test_bit_le 3376#define reiserfs_test_le_bit test_bit_le
2957#define reiserfs_find_next_zero_le_bit find_next_zero_bit_le 3377#define reiserfs_find_next_zero_le_bit find_next_zero_bit_le
2958 3378
2959/* sometimes reiserfs_truncate may require to allocate few new blocks 3379/*
2960 to perform indirect2direct conversion. People probably used to 3380 * sometimes reiserfs_truncate may require to allocate few new blocks
2961 think, that truncate should work without problems on a filesystem 3381 * to perform indirect2direct conversion. People probably used to
2962 without free disk space. They may complain that they can not 3382 * think, that truncate should work without problems on a filesystem
2963 truncate due to lack of free disk space. This spare space allows us 3383 * without free disk space. They may complain that they can not
2964 to not worry about it. 500 is probably too much, but it should be 3384 * truncate due to lack of free disk space. This spare space allows us
2965 absolutely safe */ 3385 * to not worry about it. 500 is probably too much, but it should be
3386 * absolutely safe
3387 */
2966#define SPARE_SPACE 500 3388#define SPARE_SPACE 500
2967 3389
2968/* prototypes from ioctl.c */ 3390/* prototypes from ioctl.c */
diff --git a/fs/reiserfs/resize.c b/fs/reiserfs/resize.c
index a4ef5cd606eb..037b00c40f1f 100644
--- a/fs/reiserfs/resize.c
+++ b/fs/reiserfs/resize.c
@@ -53,8 +53,10 @@ int reiserfs_resize(struct super_block *s, unsigned long block_count_new)
53 } 53 }
54 bforget(bh); 54 bforget(bh);
55 55
56 /* old disk layout detection; those partitions can be mounted, but 56 /*
57 * cannot be resized */ 57 * old disk layout detection; those partitions can be mounted, but
58 * cannot be resized
59 */
58 if (SB_BUFFER_WITH_SB(s)->b_blocknr * SB_BUFFER_WITH_SB(s)->b_size 60 if (SB_BUFFER_WITH_SB(s)->b_blocknr * SB_BUFFER_WITH_SB(s)->b_size
59 != REISERFS_DISK_OFFSET_IN_BYTES) { 61 != REISERFS_DISK_OFFSET_IN_BYTES) {
60 printk 62 printk
@@ -86,12 +88,14 @@ int reiserfs_resize(struct super_block *s, unsigned long block_count_new)
86 ("reiserfs_resize: unable to allocate memory for journal bitmaps\n"); 88 ("reiserfs_resize: unable to allocate memory for journal bitmaps\n");
87 return -ENOMEM; 89 return -ENOMEM;
88 } 90 }
89 /* the new journal bitmaps are zero filled, now we copy in the bitmap 91 /*
90 ** node pointers from the old journal bitmap structs, and then 92 * the new journal bitmaps are zero filled, now we copy i
91 ** transfer the new data structures into the journal struct. 93 * the bitmap node pointers from the old journal bitmap
92 ** 94 * structs, and then transfer the new data structures
93 ** using the copy_size var below allows this code to work for 95 * into the journal struct.
94 ** both shrinking and expanding the FS. 96 *
97 * using the copy_size var below allows this code to work for
98 * both shrinking and expanding the FS.
95 */ 99 */
96 copy_size = bmap_nr_new < bmap_nr ? bmap_nr_new : bmap_nr; 100 copy_size = bmap_nr_new < bmap_nr ? bmap_nr_new : bmap_nr;
97 copy_size = 101 copy_size =
@@ -101,36 +105,45 @@ int reiserfs_resize(struct super_block *s, unsigned long block_count_new)
101 jb = SB_JOURNAL(s)->j_list_bitmap + i; 105 jb = SB_JOURNAL(s)->j_list_bitmap + i;
102 memcpy(jbitmap[i].bitmaps, jb->bitmaps, copy_size); 106 memcpy(jbitmap[i].bitmaps, jb->bitmaps, copy_size);
103 107
104 /* just in case vfree schedules on us, copy the new 108 /*
105 ** pointer into the journal struct before freeing the 109 * just in case vfree schedules on us, copy the new
106 ** old one 110 * pointer into the journal struct before freeing the
111 * old one
107 */ 112 */
108 node_tmp = jb->bitmaps; 113 node_tmp = jb->bitmaps;
109 jb->bitmaps = jbitmap[i].bitmaps; 114 jb->bitmaps = jbitmap[i].bitmaps;
110 vfree(node_tmp); 115 vfree(node_tmp);
111 } 116 }
112 117
113 /* allocate additional bitmap blocks, reallocate array of bitmap 118 /*
114 * block pointers */ 119 * allocate additional bitmap blocks, reallocate
120 * array of bitmap block pointers
121 */
115 bitmap = 122 bitmap =
116 vzalloc(sizeof(struct reiserfs_bitmap_info) * bmap_nr_new); 123 vzalloc(sizeof(struct reiserfs_bitmap_info) * bmap_nr_new);
117 if (!bitmap) { 124 if (!bitmap) {
118 /* Journal bitmaps are still supersized, but the memory isn't 125 /*
119 * leaked, so I guess it's ok */ 126 * Journal bitmaps are still supersized, but the
127 * memory isn't leaked, so I guess it's ok
128 */
120 printk("reiserfs_resize: unable to allocate memory.\n"); 129 printk("reiserfs_resize: unable to allocate memory.\n");
121 return -ENOMEM; 130 return -ENOMEM;
122 } 131 }
123 for (i = 0; i < bmap_nr; i++) 132 for (i = 0; i < bmap_nr; i++)
124 bitmap[i] = old_bitmap[i]; 133 bitmap[i] = old_bitmap[i];
125 134
126 /* This doesn't go through the journal, but it doesn't have to. 135 /*
127 * The changes are still atomic: We're synced up when the journal 136 * This doesn't go through the journal, but it doesn't have to.
128 * transaction begins, and the new bitmaps don't matter if the 137 * The changes are still atomic: We're synced up when the
129 * transaction fails. */ 138 * journal transaction begins, and the new bitmaps don't
139 * matter if the transaction fails.
140 */
130 for (i = bmap_nr; i < bmap_nr_new; i++) { 141 for (i = bmap_nr; i < bmap_nr_new; i++) {
131 int depth; 142 int depth;
132 /* don't use read_bitmap_block since it will cache 143 /*
133 * the uninitialized bitmap */ 144 * don't use read_bitmap_block since it will cache
145 * the uninitialized bitmap
146 */
134 depth = reiserfs_write_unlock_nested(s); 147 depth = reiserfs_write_unlock_nested(s);
135 bh = sb_bread(s, i * s->s_blocksize * 8); 148 bh = sb_bread(s, i * s->s_blocksize * 8);
136 reiserfs_write_lock_nested(s, depth); 149 reiserfs_write_lock_nested(s, depth);
@@ -147,7 +160,7 @@ int reiserfs_resize(struct super_block *s, unsigned long block_count_new)
147 depth = reiserfs_write_unlock_nested(s); 160 depth = reiserfs_write_unlock_nested(s);
148 sync_dirty_buffer(bh); 161 sync_dirty_buffer(bh);
149 reiserfs_write_lock_nested(s, depth); 162 reiserfs_write_lock_nested(s, depth);
150 // update bitmap_info stuff 163 /* update bitmap_info stuff */
151 bitmap[i].free_count = sb_blocksize(sb) * 8 - 1; 164 bitmap[i].free_count = sb_blocksize(sb) * 8 - 1;
152 brelse(bh); 165 brelse(bh);
153 } 166 }
@@ -156,9 +169,11 @@ int reiserfs_resize(struct super_block *s, unsigned long block_count_new)
156 vfree(old_bitmap); 169 vfree(old_bitmap);
157 } 170 }
158 171
159 /* begin transaction, if there was an error, it's fine. Yes, we have 172 /*
173 * begin transaction, if there was an error, it's fine. Yes, we have
160 * incorrect bitmaps now, but none of it is ever going to touch the 174 * incorrect bitmaps now, but none of it is ever going to touch the
161 * disk anyway. */ 175 * disk anyway.
176 */
162 err = journal_begin(&th, s, 10); 177 err = journal_begin(&th, s, 10);
163 if (err) 178 if (err)
164 return err; 179 return err;
diff --git a/fs/reiserfs/stree.c b/fs/reiserfs/stree.c
index 40b3e77c8ff3..aa86757e48f8 100644
--- a/fs/reiserfs/stree.c
+++ b/fs/reiserfs/stree.c
@@ -8,46 +8,6 @@
8 * Pereslavl-Zalessky Russia 8 * Pereslavl-Zalessky Russia
9 */ 9 */
10 10
11/*
12 * This file contains functions dealing with S+tree
13 *
14 * B_IS_IN_TREE
15 * copy_item_head
16 * comp_short_keys
17 * comp_keys
18 * comp_short_le_keys
19 * le_key2cpu_key
20 * comp_le_keys
21 * bin_search
22 * get_lkey
23 * get_rkey
24 * key_in_buffer
25 * decrement_bcount
26 * reiserfs_check_path
27 * pathrelse_and_restore
28 * pathrelse
29 * search_by_key_reada
30 * search_by_key
31 * search_for_position_by_key
32 * comp_items
33 * prepare_for_direct_item
34 * prepare_for_direntry_item
35 * prepare_for_delete_or_cut
36 * calc_deleted_bytes_number
37 * init_tb_struct
38 * padd_item
39 * reiserfs_delete_item
40 * reiserfs_delete_solid_item
41 * reiserfs_delete_object
42 * maybe_indirect_to_direct
43 * indirect_to_direct_roll_back
44 * reiserfs_cut_from_item
45 * truncate_directory
46 * reiserfs_do_truncate
47 * reiserfs_paste_into_item
48 * reiserfs_insert_item
49 */
50
51#include <linux/time.h> 11#include <linux/time.h>
52#include <linux/string.h> 12#include <linux/string.h>
53#include <linux/pagemap.h> 13#include <linux/pagemap.h>
@@ -65,21 +25,21 @@ inline int B_IS_IN_TREE(const struct buffer_head *bh)
65 return (B_LEVEL(bh) != FREE_LEVEL); 25 return (B_LEVEL(bh) != FREE_LEVEL);
66} 26}
67 27
68// 28/* to get item head in le form */
69// to gets item head in le form
70//
71inline void copy_item_head(struct item_head *to, 29inline void copy_item_head(struct item_head *to,
72 const struct item_head *from) 30 const struct item_head *from)
73{ 31{
74 memcpy(to, from, IH_SIZE); 32 memcpy(to, from, IH_SIZE);
75} 33}
76 34
77/* k1 is pointer to on-disk structure which is stored in little-endian 35/*
78 form. k2 is pointer to cpu variable. For key of items of the same 36 * k1 is pointer to on-disk structure which is stored in little-endian
79 object this returns 0. 37 * form. k2 is pointer to cpu variable. For key of items of the same
80 Returns: -1 if key1 < key2 38 * object this returns 0.
81 0 if key1 == key2 39 * Returns: -1 if key1 < key2
82 1 if key1 > key2 */ 40 * 0 if key1 == key2
41 * 1 if key1 > key2
42 */
83inline int comp_short_keys(const struct reiserfs_key *le_key, 43inline int comp_short_keys(const struct reiserfs_key *le_key,
84 const struct cpu_key *cpu_key) 44 const struct cpu_key *cpu_key)
85{ 45{
@@ -97,11 +57,13 @@ inline int comp_short_keys(const struct reiserfs_key *le_key,
97 return 0; 57 return 0;
98} 58}
99 59
100/* k1 is pointer to on-disk structure which is stored in little-endian 60/*
101 form. k2 is pointer to cpu variable. 61 * k1 is pointer to on-disk structure which is stored in little-endian
102 Compare keys using all 4 key fields. 62 * form. k2 is pointer to cpu variable.
103 Returns: -1 if key1 < key2 0 63 * Compare keys using all 4 key fields.
104 if key1 = key2 1 if key1 > key2 */ 64 * Returns: -1 if key1 < key2 0
65 * if key1 = key2 1 if key1 > key2
66 */
105static inline int comp_keys(const struct reiserfs_key *le_key, 67static inline int comp_keys(const struct reiserfs_key *le_key,
106 const struct cpu_key *cpu_key) 68 const struct cpu_key *cpu_key)
107{ 69{
@@ -155,15 +117,17 @@ inline void le_key2cpu_key(struct cpu_key *to, const struct reiserfs_key *from)
155 to->on_disk_key.k_dir_id = le32_to_cpu(from->k_dir_id); 117 to->on_disk_key.k_dir_id = le32_to_cpu(from->k_dir_id);
156 to->on_disk_key.k_objectid = le32_to_cpu(from->k_objectid); 118 to->on_disk_key.k_objectid = le32_to_cpu(from->k_objectid);
157 119
158 // find out version of the key 120 /* find out version of the key */
159 version = le_key_version(from); 121 version = le_key_version(from);
160 to->version = version; 122 to->version = version;
161 to->on_disk_key.k_offset = le_key_k_offset(version, from); 123 to->on_disk_key.k_offset = le_key_k_offset(version, from);
162 to->on_disk_key.k_type = le_key_k_type(version, from); 124 to->on_disk_key.k_type = le_key_k_type(version, from);
163} 125}
164 126
165// this does not say which one is bigger, it only returns 1 if keys 127/*
166// are not equal, 0 otherwise 128 * this does not say which one is bigger, it only returns 1 if keys
129 * are not equal, 0 otherwise
130 */
167inline int comp_le_keys(const struct reiserfs_key *k1, 131inline int comp_le_keys(const struct reiserfs_key *k1,
168 const struct reiserfs_key *k2) 132 const struct reiserfs_key *k2)
169{ 133{
@@ -177,24 +141,27 @@ inline int comp_le_keys(const struct reiserfs_key *k1,
177 * *pos = number of the searched element if found, else the * 141 * *pos = number of the searched element if found, else the *
178 * number of the first element that is larger than key. * 142 * number of the first element that is larger than key. *
179 **************************************************************************/ 143 **************************************************************************/
180/* For those not familiar with binary search: lbound is the leftmost item that it 144/*
181 could be, rbound the rightmost item that it could be. We examine the item 145 * For those not familiar with binary search: lbound is the leftmost item
182 halfway between lbound and rbound, and that tells us either that we can increase 146 * that it could be, rbound the rightmost item that it could be. We examine
183 lbound, or decrease rbound, or that we have found it, or if lbound <= rbound that 147 * the item halfway between lbound and rbound, and that tells us either
184 there are no possible items, and we have not found it. With each examination we 148 * that we can increase lbound, or decrease rbound, or that we have found it,
185 cut the number of possible items it could be by one more than half rounded down, 149 * or if lbound <= rbound that there are no possible items, and we have not
186 or we find it. */ 150 * found it. With each examination we cut the number of possible items it
151 * could be by one more than half rounded down, or we find it.
152 */
187static inline int bin_search(const void *key, /* Key to search for. */ 153static inline int bin_search(const void *key, /* Key to search for. */
188 const void *base, /* First item in the array. */ 154 const void *base, /* First item in the array. */
189 int num, /* Number of items in the array. */ 155 int num, /* Number of items in the array. */
190 int width, /* Item size in the array. 156 /*
191 searched. Lest the reader be 157 * Item size in the array. searched. Lest the
192 confused, note that this is crafted 158 * reader be confused, note that this is crafted
193 as a general function, and when it 159 * as a general function, and when it is applied
194 is applied specifically to the array 160 * specifically to the array of item headers in a
195 of item headers in a node, width 161 * node, width is actually the item header size
196 is actually the item header size not 162 * not the item size.
197 the item size. */ 163 */
164 int width,
198 int *pos /* Number of the searched for element. */ 165 int *pos /* Number of the searched for element. */
199 ) 166 )
200{ 167{
@@ -216,8 +183,10 @@ static inline int bin_search(const void *key, /* Key to search for. */
216 return ITEM_FOUND; /* Key found in the array. */ 183 return ITEM_FOUND; /* Key found in the array. */
217 } 184 }
218 185
219 /* bin_search did not find given key, it returns position of key, 186 /*
220 that is minimal and greater than the given one. */ 187 * bin_search did not find given key, it returns position of key,
188 * that is minimal and greater than the given one.
189 */
221 *pos = lbound; 190 *pos = lbound;
222 return ITEM_NOT_FOUND; 191 return ITEM_NOT_FOUND;
223} 192}
@@ -234,10 +203,14 @@ static const struct reiserfs_key MAX_KEY = {
234 __constant_cpu_to_le32(0xffffffff)},} 203 __constant_cpu_to_le32(0xffffffff)},}
235}; 204};
236 205
237/* Get delimiting key of the buffer by looking for it in the buffers in the path, starting from the bottom 206/*
238 of the path, and going upwards. We must check the path's validity at each step. If the key is not in 207 * Get delimiting key of the buffer by looking for it in the buffers in the
239 the path, there is no delimiting key in the tree (buffer is first or last buffer in tree), and in this 208 * path, starting from the bottom of the path, and going upwards. We must
240 case we return a special key, either MIN_KEY or MAX_KEY. */ 209 * check the path's validity at each step. If the key is not in the path,
210 * there is no delimiting key in the tree (buffer is first or last buffer
211 * in tree), and in this case we return a special key, either MIN_KEY or
212 * MAX_KEY.
213 */
241static inline const struct reiserfs_key *get_lkey(const struct treepath *chk_path, 214static inline const struct reiserfs_key *get_lkey(const struct treepath *chk_path,
242 const struct super_block *sb) 215 const struct super_block *sb)
243{ 216{
@@ -270,7 +243,10 @@ static inline const struct reiserfs_key *get_lkey(const struct treepath *chk_pat
270 PATH_OFFSET_PBUFFER(chk_path, 243 PATH_OFFSET_PBUFFER(chk_path,
271 path_offset + 1)->b_blocknr) 244 path_offset + 1)->b_blocknr)
272 return &MAX_KEY; 245 return &MAX_KEY;
273 /* Return delimiting key if position in the parent is not equal to zero. */ 246 /*
247 * Return delimiting key if position in the parent
248 * is not equal to zero.
249 */
274 if (position) 250 if (position)
275 return internal_key(parent, position - 1); 251 return internal_key(parent, position - 1);
276 } 252 }
@@ -308,15 +284,23 @@ inline const struct reiserfs_key *get_rkey(const struct treepath *chk_path,
308 path_offset)) > 284 path_offset)) >
309 B_NR_ITEMS(parent)) 285 B_NR_ITEMS(parent))
310 return &MIN_KEY; 286 return &MIN_KEY;
311 /* Check whether parent at the path really points to the child. */ 287 /*
288 * Check whether parent at the path really points
289 * to the child.
290 */
312 if (B_N_CHILD_NUM(parent, position) != 291 if (B_N_CHILD_NUM(parent, position) !=
313 PATH_OFFSET_PBUFFER(chk_path, 292 PATH_OFFSET_PBUFFER(chk_path,
314 path_offset + 1)->b_blocknr) 293 path_offset + 1)->b_blocknr)
315 return &MIN_KEY; 294 return &MIN_KEY;
316 /* Return delimiting key if position in the parent is not the last one. */ 295
296 /*
297 * Return delimiting key if position in the parent
298 * is not the last one.
299 */
317 if (position != B_NR_ITEMS(parent)) 300 if (position != B_NR_ITEMS(parent))
318 return internal_key(parent, position); 301 return internal_key(parent, position);
319 } 302 }
303
320 /* Return MAX_KEY if we are in the root of the buffer tree. */ 304 /* Return MAX_KEY if we are in the root of the buffer tree. */
321 if (PATH_OFFSET_PBUFFER(chk_path, FIRST_PATH_ELEMENT_OFFSET)-> 305 if (PATH_OFFSET_PBUFFER(chk_path, FIRST_PATH_ELEMENT_OFFSET)->
322 b_blocknr == SB_ROOT_BLOCK(sb)) 306 b_blocknr == SB_ROOT_BLOCK(sb))
@@ -324,13 +308,20 @@ inline const struct reiserfs_key *get_rkey(const struct treepath *chk_path,
324 return &MIN_KEY; 308 return &MIN_KEY;
325} 309}
326 310
327/* Check whether a key is contained in the tree rooted from a buffer at a path. */ 311/*
328/* This works by looking at the left and right delimiting keys for the buffer in the last path_element in 312 * Check whether a key is contained in the tree rooted from a buffer at a path.
329 the path. These delimiting keys are stored at least one level above that buffer in the tree. If the 313 * This works by looking at the left and right delimiting keys for the buffer
330 buffer is the first or last node in the tree order then one of the delimiting keys may be absent, and in 314 * in the last path_element in the path. These delimiting keys are stored
331 this case get_lkey and get_rkey return a special key which is MIN_KEY or MAX_KEY. */ 315 * at least one level above that buffer in the tree. If the buffer is the
332static inline int key_in_buffer(struct treepath *chk_path, /* Path which should be checked. */ 316 * first or last node in the tree order then one of the delimiting keys may
333 const struct cpu_key *key, /* Key which should be checked. */ 317 * be absent, and in this case get_lkey and get_rkey return a special key
318 * which is MIN_KEY or MAX_KEY.
319 */
320static inline int key_in_buffer(
321 /* Path which should be checked. */
322 struct treepath *chk_path,
323 /* Key which should be checked. */
324 const struct cpu_key *key,
334 struct super_block *sb 325 struct super_block *sb
335 ) 326 )
336{ 327{
@@ -359,9 +350,11 @@ int reiserfs_check_path(struct treepath *p)
359 return 0; 350 return 0;
360} 351}
361 352
362/* Drop the reference to each buffer in a path and restore 353/*
354 * Drop the reference to each buffer in a path and restore
363 * dirty bits clean when preparing the buffer for the log. 355 * dirty bits clean when preparing the buffer for the log.
364 * This version should only be called from fix_nodes() */ 356 * This version should only be called from fix_nodes()
357 */
365void pathrelse_and_restore(struct super_block *sb, 358void pathrelse_and_restore(struct super_block *sb,
366 struct treepath *search_path) 359 struct treepath *search_path)
367{ 360{
@@ -418,14 +411,17 @@ static int is_leaf(char *buf, int blocksize, struct buffer_head *bh)
418 } 411 }
419 ih = (struct item_head *)(buf + BLKH_SIZE) + nr - 1; 412 ih = (struct item_head *)(buf + BLKH_SIZE) + nr - 1;
420 used_space = BLKH_SIZE + IH_SIZE * nr + (blocksize - ih_location(ih)); 413 used_space = BLKH_SIZE + IH_SIZE * nr + (blocksize - ih_location(ih));
414
415 /* free space does not match to calculated amount of use space */
421 if (used_space != blocksize - blkh_free_space(blkh)) { 416 if (used_space != blocksize - blkh_free_space(blkh)) {
422 /* free space does not match to calculated amount of use space */
423 reiserfs_warning(NULL, "reiserfs-5082", 417 reiserfs_warning(NULL, "reiserfs-5082",
424 "free space seems wrong: %z", bh); 418 "free space seems wrong: %z", bh);
425 return 0; 419 return 0;
426 } 420 }
427 // FIXME: it is_leaf will hit performance too much - we may have 421 /*
428 // return 1 here 422 * FIXME: it is_leaf will hit performance too much - we may have
423 * return 1 here
424 */
429 425
430 /* check tables of item heads */ 426 /* check tables of item heads */
431 ih = (struct item_head *)(buf + BLKH_SIZE); 427 ih = (struct item_head *)(buf + BLKH_SIZE);
@@ -460,7 +456,7 @@ static int is_leaf(char *buf, int blocksize, struct buffer_head *bh)
460 prev_location = ih_location(ih); 456 prev_location = ih_location(ih);
461 } 457 }
462 458
463 // one may imagine much more checks 459 /* one may imagine many more checks */
464 return 1; 460 return 1;
465} 461}
466 462
@@ -481,8 +477,8 @@ static int is_internal(char *buf, int blocksize, struct buffer_head *bh)
481 } 477 }
482 478
483 nr = blkh_nr_item(blkh); 479 nr = blkh_nr_item(blkh);
480 /* for internal which is not root we might check min number of keys */
484 if (nr > (blocksize - BLKH_SIZE - DC_SIZE) / (KEY_SIZE + DC_SIZE)) { 481 if (nr > (blocksize - BLKH_SIZE - DC_SIZE) / (KEY_SIZE + DC_SIZE)) {
485 /* for internal which is not root we might check min number of keys */
486 reiserfs_warning(NULL, "reiserfs-5088", 482 reiserfs_warning(NULL, "reiserfs-5088",
487 "number of key seems wrong: %z", bh); 483 "number of key seems wrong: %z", bh);
488 return 0; 484 return 0;
@@ -494,12 +490,15 @@ static int is_internal(char *buf, int blocksize, struct buffer_head *bh)
494 "free space seems wrong: %z", bh); 490 "free space seems wrong: %z", bh);
495 return 0; 491 return 0;
496 } 492 }
497 // one may imagine much more checks 493
494 /* one may imagine many more checks */
498 return 1; 495 return 1;
499} 496}
500 497
501// make sure that bh contains formatted node of reiserfs tree of 498/*
502// 'level'-th level 499 * make sure that bh contains formatted node of reiserfs tree of
500 * 'level'-th level
501 */
503static int is_tree_node(struct buffer_head *bh, int level) 502static int is_tree_node(struct buffer_head *bh, int level)
504{ 503{
505 if (B_LEVEL(bh) != level) { 504 if (B_LEVEL(bh) != level) {
@@ -546,7 +545,8 @@ static int search_by_key_reada(struct super_block *s,
546 for (j = 0; j < i; j++) { 545 for (j = 0; j < i; j++) {
547 /* 546 /*
548 * note, this needs attention if we are getting rid of the BKL 547 * note, this needs attention if we are getting rid of the BKL
549 * you have to make sure the prepared bit isn't set on this buffer 548 * you have to make sure the prepared bit isn't set on this
549 * buffer
550 */ 550 */
551 if (!buffer_uptodate(bh[j])) { 551 if (!buffer_uptodate(bh[j])) {
552 if (depth == -1) 552 if (depth == -1)
@@ -558,39 +558,34 @@ static int search_by_key_reada(struct super_block *s,
558 return depth; 558 return depth;
559} 559}
560 560
561/************************************************************************** 561/*
562 * Algorithm SearchByKey * 562 * This function fills up the path from the root to the leaf as it
563 * look for item in the Disk S+Tree by its key * 563 * descends the tree looking for the key. It uses reiserfs_bread to
564 * Input: sb - super block * 564 * try to find buffers in the cache given their block number. If it
565 * key - pointer to the key to search * 565 * does not find them in the cache it reads them from disk. For each
566 * Output: ITEM_FOUND, ITEM_NOT_FOUND or IO_ERROR * 566 * node search_by_key finds using reiserfs_bread it then uses
567 * search_path - path from the root to the needed leaf * 567 * bin_search to look through that node. bin_search will find the
568 **************************************************************************/ 568 * position of the block_number of the next node if it is looking
569 569 * through an internal node. If it is looking through a leaf node
570/* This function fills up the path from the root to the leaf as it 570 * bin_search will find the position of the item which has key either
571 descends the tree looking for the key. It uses reiserfs_bread to 571 * equal to given key, or which is the maximal key less than the given
572 try to find buffers in the cache given their block number. If it 572 * key. search_by_key returns a path that must be checked for the
573 does not find them in the cache it reads them from disk. For each 573 * correctness of the top of the path but need not be checked for the
574 node search_by_key finds using reiserfs_bread it then uses 574 * correctness of the bottom of the path
575 bin_search to look through that node. bin_search will find the 575 */
576 position of the block_number of the next node if it is looking 576/*
577 through an internal node. If it is looking through a leaf node 577 * search_by_key - search for key (and item) in stree
578 bin_search will find the position of the item which has key either 578 * @sb: superblock
579 equal to given key, or which is the maximal key less than the given 579 * @key: pointer to key to search for
580 key. search_by_key returns a path that must be checked for the 580 * @search_path: Allocated and initialized struct treepath; Returned filled
581 correctness of the top of the path but need not be checked for the 581 * on success.
582 correctness of the bottom of the path */ 582 * @stop_level: How far down the tree to search, Use DISK_LEAF_NODE_LEVEL to
583/* The function is NOT SCHEDULE-SAFE! */ 583 * stop at leaf level.
584int search_by_key(struct super_block *sb, const struct cpu_key *key, /* Key to search. */ 584 *
585 struct treepath *search_path,/* This structure was 585 * The function is NOT SCHEDULE-SAFE!
586 allocated and initialized 586 */
587 by the calling 587int search_by_key(struct super_block *sb, const struct cpu_key *key,
588 function. It is filled up 588 struct treepath *search_path, int stop_level)
589 by this function. */
590 int stop_level /* How far down the tree to search. To
591 stop at leaf level - set to
592 DISK_LEAF_NODE_LEVEL */
593 )
594{ 589{
595 b_blocknr_t block_number; 590 b_blocknr_t block_number;
596 int expected_level; 591 int expected_level;
@@ -609,17 +604,22 @@ int search_by_key(struct super_block *sb, const struct cpu_key *key, /* Key to s
609 604
610 PROC_INFO_INC(sb, search_by_key); 605 PROC_INFO_INC(sb, search_by_key);
611 606
612 /* As we add each node to a path we increase its count. This means that 607 /*
613 we must be careful to release all nodes in a path before we either 608 * As we add each node to a path we increase its count. This means
614 discard the path struct or re-use the path struct, as we do here. */ 609 * that we must be careful to release all nodes in a path before we
610 * either discard the path struct or re-use the path struct, as we
611 * do here.
612 */
615 613
616 pathrelse(search_path); 614 pathrelse(search_path);
617 615
618 right_neighbor_of_leaf_node = 0; 616 right_neighbor_of_leaf_node = 0;
619 617
620 /* With each iteration of this loop we search through the items in the 618 /*
621 current node, and calculate the next current node(next path element) 619 * With each iteration of this loop we search through the items in the
622 for the next iteration of this loop.. */ 620 * current node, and calculate the next current node(next path element)
621 * for the next iteration of this loop..
622 */
623 block_number = SB_ROOT_BLOCK(sb); 623 block_number = SB_ROOT_BLOCK(sb);
624 expected_level = -1; 624 expected_level = -1;
625 while (1) { 625 while (1) {
@@ -639,8 +639,10 @@ int search_by_key(struct super_block *sb, const struct cpu_key *key, /* Key to s
639 ++search_path->path_length); 639 ++search_path->path_length);
640 fs_gen = get_generation(sb); 640 fs_gen = get_generation(sb);
641 641
642 /* Read the next tree node, and set the last element in the path to 642 /*
643 have a pointer to it. */ 643 * Read the next tree node, and set the last element
644 * in the path to have a pointer to it.
645 */
644 if ((bh = last_element->pe_buffer = 646 if ((bh = last_element->pe_buffer =
645 sb_getblk(sb, block_number))) { 647 sb_getblk(sb, block_number))) {
646 648
@@ -676,9 +678,12 @@ int search_by_key(struct super_block *sb, const struct cpu_key *key, /* Key to s
676 expected_level = SB_TREE_HEIGHT(sb); 678 expected_level = SB_TREE_HEIGHT(sb);
677 expected_level--; 679 expected_level--;
678 680
679 /* It is possible that schedule occurred. We must check whether the key 681 /*
680 to search is still in the tree rooted from the current buffer. If 682 * It is possible that schedule occurred. We must check
681 not then repeat search from the root. */ 683 * whether the key to search is still in the tree rooted
684 * from the current buffer. If not then repeat search
685 * from the root.
686 */
682 if (fs_changed(fs_gen, sb) && 687 if (fs_changed(fs_gen, sb) &&
683 (!B_IS_IN_TREE(bh) || 688 (!B_IS_IN_TREE(bh) ||
684 B_LEVEL(bh) != expected_level || 689 B_LEVEL(bh) != expected_level ||
@@ -689,8 +694,10 @@ int search_by_key(struct super_block *sb, const struct cpu_key *key, /* Key to s
689 sbk_restarted[expected_level - 1]); 694 sbk_restarted[expected_level - 1]);
690 pathrelse(search_path); 695 pathrelse(search_path);
691 696
692 /* Get the root block number so that we can repeat the search 697 /*
693 starting from the root. */ 698 * Get the root block number so that we can
699 * repeat the search starting from the root.
700 */
694 block_number = SB_ROOT_BLOCK(sb); 701 block_number = SB_ROOT_BLOCK(sb);
695 expected_level = -1; 702 expected_level = -1;
696 right_neighbor_of_leaf_node = 0; 703 right_neighbor_of_leaf_node = 0;
@@ -699,9 +706,11 @@ int search_by_key(struct super_block *sb, const struct cpu_key *key, /* Key to s
699 continue; 706 continue;
700 } 707 }
701 708
702 /* only check that the key is in the buffer if key is not 709 /*
703 equal to the MAX_KEY. Latter case is only possible in 710 * only check that the key is in the buffer if key is not
704 "finish_unfinished()" processing during mount. */ 711 * equal to the MAX_KEY. Latter case is only possible in
712 * "finish_unfinished()" processing during mount.
713 */
705 RFALSE(comp_keys(&MAX_KEY, key) && 714 RFALSE(comp_keys(&MAX_KEY, key) &&
706 !key_in_buffer(search_path, key, sb), 715 !key_in_buffer(search_path, key, sb),
707 "PAP-5130: key is not in the buffer"); 716 "PAP-5130: key is not in the buffer");
@@ -713,8 +722,10 @@ int search_by_key(struct super_block *sb, const struct cpu_key *key, /* Key to s
713 } 722 }
714#endif 723#endif
715 724
716 // make sure, that the node contents look like a node of 725 /*
717 // certain level 726 * make sure, that the node contents look like a node of
727 * certain level
728 */
718 if (!is_tree_node(bh, expected_level)) { 729 if (!is_tree_node(bh, expected_level)) {
719 reiserfs_error(sb, "vs-5150", 730 reiserfs_error(sb, "vs-5150",
720 "invalid format found in block %ld. " 731 "invalid format found in block %ld. "
@@ -743,21 +754,31 @@ int search_by_key(struct super_block *sb, const struct cpu_key *key, /* Key to s
743 } 754 }
744 755
745 /* we are not in the stop level */ 756 /* we are not in the stop level */
757 /*
758 * item has been found, so we choose the pointer which
759 * is to the right of the found one
760 */
746 if (retval == ITEM_FOUND) 761 if (retval == ITEM_FOUND)
747 /* item has been found, so we choose the pointer which is to the right of the found one */
748 last_element->pe_position++; 762 last_element->pe_position++;
749 763
750 /* if item was not found we choose the position which is to 764 /*
751 the left of the found item. This requires no code, 765 * if item was not found we choose the position which is to
752 bin_search did it already. */ 766 * the left of the found item. This requires no code,
767 * bin_search did it already.
768 */
753 769
754 /* So we have chosen a position in the current node which is 770 /*
755 an internal node. Now we calculate child block number by 771 * So we have chosen a position in the current node which is
756 position in the node. */ 772 * an internal node. Now we calculate child block number by
773 * position in the node.
774 */
757 block_number = 775 block_number =
758 B_N_CHILD_NUM(bh, last_element->pe_position); 776 B_N_CHILD_NUM(bh, last_element->pe_position);
759 777
760 /* if we are going to read leaf nodes, try for read ahead as well */ 778 /*
779 * if we are going to read leaf nodes, try for read
780 * ahead as well
781 */
761 if ((search_path->reada & PATH_READA) && 782 if ((search_path->reada & PATH_READA) &&
762 node_level == DISK_LEAF_NODE_LEVEL + 1) { 783 node_level == DISK_LEAF_NODE_LEVEL + 1) {
763 int pos = last_element->pe_position; 784 int pos = last_element->pe_position;
@@ -789,26 +810,28 @@ int search_by_key(struct super_block *sb, const struct cpu_key *key, /* Key to s
789 } 810 }
790} 811}
791 812
792/* Form the path to an item and position in this item which contains 813/*
793 file byte defined by key. If there is no such item 814 * Form the path to an item and position in this item which contains
794 corresponding to the key, we point the path to the item with 815 * file byte defined by key. If there is no such item
795 maximal key less than key, and *pos_in_item is set to one 816 * corresponding to the key, we point the path to the item with
796 past the last entry/byte in the item. If searching for entry in a 817 * maximal key less than key, and *pos_in_item is set to one
797 directory item, and it is not found, *pos_in_item is set to one 818 * past the last entry/byte in the item. If searching for entry in a
798 entry more than the entry with maximal key which is less than the 819 * directory item, and it is not found, *pos_in_item is set to one
799 sought key. 820 * entry more than the entry with maximal key which is less than the
800 821 * sought key.
801 Note that if there is no entry in this same node which is one more, 822 *
802 then we point to an imaginary entry. for direct items, the 823 * Note that if there is no entry in this same node which is one more,
803 position is in units of bytes, for indirect items the position is 824 * then we point to an imaginary entry. for direct items, the
804 in units of blocknr entries, for directory items the position is in 825 * position is in units of bytes, for indirect items the position is
805 units of directory entries. */ 826 * in units of blocknr entries, for directory items the position is in
806 827 * units of directory entries.
828 */
807/* The function is NOT SCHEDULE-SAFE! */ 829/* The function is NOT SCHEDULE-SAFE! */
808int search_for_position_by_key(struct super_block *sb, /* Pointer to the super block. */ 830int search_for_position_by_key(struct super_block *sb,
809 const struct cpu_key *p_cpu_key, /* Key to search (cpu variable) */ 831 /* Key to search (cpu variable) */
810 struct treepath *search_path /* Filled up by this function. */ 832 const struct cpu_key *p_cpu_key,
811 ) 833 /* Filled up by this function. */
834 struct treepath *search_path)
812{ 835{
813 struct item_head *p_le_ih; /* pointer to on-disk structure */ 836 struct item_head *p_le_ih; /* pointer to on-disk structure */
814 int blk_size; 837 int blk_size;
@@ -851,7 +874,8 @@ int search_for_position_by_key(struct super_block *sb, /* Pointer to the super b
851 if (comp_short_keys(&(p_le_ih->ih_key), p_cpu_key)) { 874 if (comp_short_keys(&(p_le_ih->ih_key), p_cpu_key)) {
852 return FILE_NOT_FOUND; 875 return FILE_NOT_FOUND;
853 } 876 }
854 // FIXME: quite ugly this far 877
878 /* FIXME: quite ugly this far */
855 879
856 item_offset = le_ih_k_offset(p_le_ih); 880 item_offset = le_ih_k_offset(p_le_ih);
857 offset = cpu_key_k_offset(p_cpu_key); 881 offset = cpu_key_k_offset(p_cpu_key);
@@ -866,8 +890,10 @@ int search_for_position_by_key(struct super_block *sb, /* Pointer to the super b
866 return POSITION_FOUND; 890 return POSITION_FOUND;
867 } 891 }
868 892
869 /* Needed byte is not contained in the item pointed to by the 893 /*
870 path. Set pos_in_item out of the item. */ 894 * Needed byte is not contained in the item pointed to by the
895 * path. Set pos_in_item out of the item.
896 */
871 if (is_indirect_le_ih(p_le_ih)) 897 if (is_indirect_le_ih(p_le_ih))
872 pos_in_item(search_path) = 898 pos_in_item(search_path) =
873 ih_item_len(p_le_ih) / UNFM_P_SIZE; 899 ih_item_len(p_le_ih) / UNFM_P_SIZE;
@@ -896,15 +922,13 @@ int comp_items(const struct item_head *stored_ih, const struct treepath *path)
896 return memcmp(stored_ih, ih, IH_SIZE); 922 return memcmp(stored_ih, ih, IH_SIZE);
897} 923}
898 924
899/* unformatted nodes are not logged anymore, ever. This is safe 925/* unformatted nodes are not logged anymore, ever. This is safe now */
900** now
901*/
902#define held_by_others(bh) (atomic_read(&(bh)->b_count) > 1) 926#define held_by_others(bh) (atomic_read(&(bh)->b_count) > 1)
903 927
904// block can not be forgotten as it is in I/O or held by someone 928/* block can not be forgotten as it is in I/O or held by someone */
905#define block_in_use(bh) (buffer_locked(bh) || (held_by_others(bh))) 929#define block_in_use(bh) (buffer_locked(bh) || (held_by_others(bh)))
906 930
907// prepare for delete or cut of direct item 931/* prepare for delete or cut of direct item */
908static inline int prepare_for_direct_item(struct treepath *path, 932static inline int prepare_for_direct_item(struct treepath *path,
909 struct item_head *le_ih, 933 struct item_head *le_ih,
910 struct inode *inode, 934 struct inode *inode,
@@ -917,9 +941,8 @@ static inline int prepare_for_direct_item(struct treepath *path,
917 *cut_size = -(IH_SIZE + ih_item_len(le_ih)); 941 *cut_size = -(IH_SIZE + ih_item_len(le_ih));
918 return M_DELETE; 942 return M_DELETE;
919 } 943 }
920 // new file gets truncated 944 /* new file gets truncated */
921 if (get_inode_item_key_version(inode) == KEY_FORMAT_3_6) { 945 if (get_inode_item_key_version(inode) == KEY_FORMAT_3_6) {
922 //
923 round_len = ROUND_UP(new_file_length); 946 round_len = ROUND_UP(new_file_length);
924 /* this was new_file_length < le_ih ... */ 947 /* this was new_file_length < le_ih ... */
925 if (round_len < le_ih_k_offset(le_ih)) { 948 if (round_len < le_ih_k_offset(le_ih)) {
@@ -933,12 +956,13 @@ static inline int prepare_for_direct_item(struct treepath *path,
933 return M_CUT; /* Cut from this item. */ 956 return M_CUT; /* Cut from this item. */
934 } 957 }
935 958
936 // old file: items may have any length 959 /* old file: items may have any length */
937 960
938 if (new_file_length < le_ih_k_offset(le_ih)) { 961 if (new_file_length < le_ih_k_offset(le_ih)) {
939 *cut_size = -(IH_SIZE + ih_item_len(le_ih)); 962 *cut_size = -(IH_SIZE + ih_item_len(le_ih));
940 return M_DELETE; /* Delete this item. */ 963 return M_DELETE; /* Delete this item. */
941 } 964 }
965
942 /* Calculate first position and size for cutting from item. */ 966 /* Calculate first position and size for cutting from item. */
943 *cut_size = -(ih_item_len(le_ih) - 967 *cut_size = -(ih_item_len(le_ih) -
944 (pos_in_item(path) = 968 (pos_in_item(path) =
@@ -957,12 +981,15 @@ static inline int prepare_for_direntry_item(struct treepath *path,
957 RFALSE(ih_entry_count(le_ih) != 2, 981 RFALSE(ih_entry_count(le_ih) != 2,
958 "PAP-5220: incorrect empty directory item (%h)", le_ih); 982 "PAP-5220: incorrect empty directory item (%h)", le_ih);
959 *cut_size = -(IH_SIZE + ih_item_len(le_ih)); 983 *cut_size = -(IH_SIZE + ih_item_len(le_ih));
960 return M_DELETE; /* Delete the directory item containing "." and ".." entry. */ 984 /* Delete the directory item containing "." and ".." entry. */
985 return M_DELETE;
961 } 986 }
962 987
963 if (ih_entry_count(le_ih) == 1) { 988 if (ih_entry_count(le_ih) == 1) {
964 /* Delete the directory item such as there is one record only 989 /*
965 in this item */ 990 * Delete the directory item such as there is one record only
991 * in this item
992 */
966 *cut_size = -(IH_SIZE + ih_item_len(le_ih)); 993 *cut_size = -(IH_SIZE + ih_item_len(le_ih));
967 return M_DELETE; 994 return M_DELETE;
968 } 995 }
@@ -976,14 +1003,30 @@ static inline int prepare_for_direntry_item(struct treepath *path,
976 1003
977#define JOURNAL_FOR_FREE_BLOCK_AND_UPDATE_SD (2 * JOURNAL_PER_BALANCE_CNT + 1) 1004#define JOURNAL_FOR_FREE_BLOCK_AND_UPDATE_SD (2 * JOURNAL_PER_BALANCE_CNT + 1)
978 1005
979/* If the path points to a directory or direct item, calculate mode and the size cut, for balance. 1006/*
980 If the path points to an indirect item, remove some number of its unformatted nodes. 1007 * If the path points to a directory or direct item, calculate mode
981 In case of file truncate calculate whether this item must be deleted/truncated or last 1008 * and the size cut, for balance.
982 unformatted node of this item will be converted to a direct item. 1009 * If the path points to an indirect item, remove some number of its
983 This function returns a determination of what balance mode the calling function should employ. */ 1010 * unformatted nodes.
984static char prepare_for_delete_or_cut(struct reiserfs_transaction_handle *th, struct inode *inode, struct treepath *path, const struct cpu_key *item_key, int *removed, /* Number of unformatted nodes which were removed 1011 * In case of file truncate calculate whether this item must be
985 from end of the file. */ 1012 * deleted/truncated or last unformatted node of this item will be
986 int *cut_size, unsigned long long new_file_length /* MAX_KEY_OFFSET in case of delete. */ 1013 * converted to a direct item.
1014 * This function returns a determination of what balance mode the
1015 * calling function should employ.
1016 */
1017static char prepare_for_delete_or_cut(struct reiserfs_transaction_handle *th,
1018 struct inode *inode,
1019 struct treepath *path,
1020 const struct cpu_key *item_key,
1021 /*
1022 * Number of unformatted nodes
1023 * which were removed from end
1024 * of the file.
1025 */
1026 int *removed,
1027 int *cut_size,
1028 /* MAX_KEY_OFFSET in case of delete. */
1029 unsigned long long new_file_length
987 ) 1030 )
988{ 1031{
989 struct super_block *sb = inode->i_sb; 1032 struct super_block *sb = inode->i_sb;
@@ -1023,8 +1066,10 @@ static char prepare_for_delete_or_cut(struct reiserfs_transaction_handle *th, st
1023 int pos = 0; 1066 int pos = 0;
1024 1067
1025 if ( new_file_length == max_reiserfs_offset (inode) ) { 1068 if ( new_file_length == max_reiserfs_offset (inode) ) {
1026 /* prepare_for_delete_or_cut() is called by 1069 /*
1027 * reiserfs_delete_item() */ 1070 * prepare_for_delete_or_cut() is called by
1071 * reiserfs_delete_item()
1072 */
1028 new_file_length = 0; 1073 new_file_length = 0;
1029 delete = 1; 1074 delete = 1;
1030 } 1075 }
@@ -1040,9 +1085,12 @@ static char prepare_for_delete_or_cut(struct reiserfs_transaction_handle *th, st
1040 __le32 *unfm; 1085 __le32 *unfm;
1041 __u32 block; 1086 __u32 block;
1042 1087
1043 /* Each unformatted block deletion may involve one additional 1088 /*
1044 * bitmap block into the transaction, thereby the initial 1089 * Each unformatted block deletion may involve
1045 * journal space reservation might not be enough. */ 1090 * one additional bitmap block into the transaction,
1091 * thereby the initial journal space reservation
1092 * might not be enough.
1093 */
1046 if (!delete && (*cut_size) != 0 && 1094 if (!delete && (*cut_size) != 0 &&
1047 reiserfs_transaction_free_space(th) < JOURNAL_FOR_FREE_BLOCK_AND_UPDATE_SD) 1095 reiserfs_transaction_free_space(th) < JOURNAL_FOR_FREE_BLOCK_AND_UPDATE_SD)
1048 break; 1096 break;
@@ -1074,17 +1122,21 @@ static char prepare_for_delete_or_cut(struct reiserfs_transaction_handle *th, st
1074 break; 1122 break;
1075 } 1123 }
1076 } 1124 }
1077 /* a trick. If the buffer has been logged, this will do nothing. If 1125 /*
1078 ** we've broken the loop without logging it, it will restore the 1126 * a trick. If the buffer has been logged, this will
1079 ** buffer */ 1127 * do nothing. If we've broken the loop without logging
1128 * it, it will restore the buffer
1129 */
1080 reiserfs_restore_prepared_buffer(sb, bh); 1130 reiserfs_restore_prepared_buffer(sb, bh);
1081 } while (need_re_search && 1131 } while (need_re_search &&
1082 search_for_position_by_key(sb, item_key, path) == POSITION_FOUND); 1132 search_for_position_by_key(sb, item_key, path) == POSITION_FOUND);
1083 pos_in_item(path) = pos * UNFM_P_SIZE; 1133 pos_in_item(path) = pos * UNFM_P_SIZE;
1084 1134
1085 if (*cut_size == 0) { 1135 if (*cut_size == 0) {
1086 /* Nothing were cut. maybe convert last unformatted node to the 1136 /*
1087 * direct item? */ 1137 * Nothing was cut. maybe convert last unformatted node to the
1138 * direct item?
1139 */
1088 result = M_CONVERT; 1140 result = M_CONVERT;
1089 } 1141 }
1090 return result; 1142 return result;
@@ -1104,9 +1156,11 @@ static int calc_deleted_bytes_number(struct tree_balance *tb, char mode)
1104 (mode == 1156 (mode ==
1105 M_DELETE) ? ih_item_len(p_le_ih) : -tb->insert_size[0]; 1157 M_DELETE) ? ih_item_len(p_le_ih) : -tb->insert_size[0];
1106 if (is_direntry_le_ih(p_le_ih)) { 1158 if (is_direntry_le_ih(p_le_ih)) {
1107 /* return EMPTY_DIR_SIZE; We delete emty directoris only. 1159 /*
1108 * we can't use EMPTY_DIR_SIZE, as old format dirs have a different 1160 * return EMPTY_DIR_SIZE; We delete emty directories only.
1109 * empty size. ick. FIXME, is this right? */ 1161 * we can't use EMPTY_DIR_SIZE, as old format dirs have a
1162 * different empty size. ick. FIXME, is this right?
1163 */
1110 return del_size; 1164 return del_size;
1111 } 1165 }
1112 1166
@@ -1169,7 +1223,8 @@ char head2type(struct item_head *ih)
1169} 1223}
1170#endif 1224#endif
1171 1225
1172/* Delete object item. 1226/*
1227 * Delete object item.
1173 * th - active transaction handle 1228 * th - active transaction handle
1174 * path - path to the deleted item 1229 * path - path to the deleted item
1175 * item_key - key to search for the deleted item 1230 * item_key - key to search for the deleted item
@@ -1221,7 +1276,7 @@ int reiserfs_delete_item(struct reiserfs_transaction_handle *th,
1221 1276
1222 PROC_INFO_INC(sb, delete_item_restarted); 1277 PROC_INFO_INC(sb, delete_item_restarted);
1223 1278
1224 // file system changed, repeat search 1279 /* file system changed, repeat search */
1225 ret_value = 1280 ret_value =
1226 search_for_position_by_key(sb, item_key, path); 1281 search_for_position_by_key(sb, item_key, path);
1227 if (ret_value == IO_ERROR) 1282 if (ret_value == IO_ERROR)
@@ -1238,16 +1293,18 @@ int reiserfs_delete_item(struct reiserfs_transaction_handle *th,
1238 unfix_nodes(&s_del_balance); 1293 unfix_nodes(&s_del_balance);
1239 return 0; 1294 return 0;
1240 } 1295 }
1241 // reiserfs_delete_item returns item length when success 1296
1297 /* reiserfs_delete_item returns item length when success */
1242 ret_value = calc_deleted_bytes_number(&s_del_balance, M_DELETE); 1298 ret_value = calc_deleted_bytes_number(&s_del_balance, M_DELETE);
1243 q_ih = tp_item_head(path); 1299 q_ih = tp_item_head(path);
1244 quota_cut_bytes = ih_item_len(q_ih); 1300 quota_cut_bytes = ih_item_len(q_ih);
1245 1301
1246 /* hack so the quota code doesn't have to guess if the file 1302 /*
1247 ** has a tail. On tail insert, we allocate quota for 1 unformatted node. 1303 * hack so the quota code doesn't have to guess if the file has a
1248 ** We test the offset because the tail might have been 1304 * tail. On tail insert, we allocate quota for 1 unformatted node.
1249 ** split into multiple items, and we only want to decrement for 1305 * We test the offset because the tail might have been
1250 ** the unfm node once 1306 * split into multiple items, and we only want to decrement for
1307 * the unfm node once
1251 */ 1308 */
1252 if (!S_ISLNK(inode->i_mode) && is_direct_le_ih(q_ih)) { 1309 if (!S_ISLNK(inode->i_mode) && is_direct_le_ih(q_ih)) {
1253 if ((le_ih_k_offset(q_ih) & (sb->s_blocksize - 1)) == 1) { 1310 if ((le_ih_k_offset(q_ih) & (sb->s_blocksize - 1)) == 1) {
@@ -1261,24 +1318,28 @@ int reiserfs_delete_item(struct reiserfs_transaction_handle *th,
1261 int off; 1318 int off;
1262 char *data; 1319 char *data;
1263 1320
1264 /* We are in direct2indirect conversion, so move tail contents 1321 /*
1265 to the unformatted node */ 1322 * We are in direct2indirect conversion, so move tail contents
1266 /* note, we do the copy before preparing the buffer because we 1323 * to the unformatted node
1267 ** don't care about the contents of the unformatted node yet. 1324 */
1268 ** the only thing we really care about is the direct item's data 1325 /*
1269 ** is in the unformatted node. 1326 * note, we do the copy before preparing the buffer because we
1270 ** 1327 * don't care about the contents of the unformatted node yet.
1271 ** Otherwise, we would have to call reiserfs_prepare_for_journal on 1328 * the only thing we really care about is the direct item's
1272 ** the unformatted node, which might schedule, meaning we'd have to 1329 * data is in the unformatted node.
1273 ** loop all the way back up to the start of the while loop. 1330 *
1274 ** 1331 * Otherwise, we would have to call
1275 ** The unformatted node must be dirtied later on. We can't be 1332 * reiserfs_prepare_for_journal on the unformatted node,
1276 ** sure here if the entire tail has been deleted yet. 1333 * which might schedule, meaning we'd have to loop all the
1277 ** 1334 * way back up to the start of the while loop.
1278 ** un_bh is from the page cache (all unformatted nodes are 1335 *
1279 ** from the page cache) and might be a highmem page. So, we 1336 * The unformatted node must be dirtied later on. We can't be
1280 ** can't use un_bh->b_data. 1337 * sure here if the entire tail has been deleted yet.
1281 ** -clm 1338 *
1339 * un_bh is from the page cache (all unformatted nodes are
1340 * from the page cache) and might be a highmem page. So, we
1341 * can't use un_bh->b_data.
1342 * -clm
1282 */ 1343 */
1283 1344
1284 data = kmap_atomic(un_bh->b_page); 1345 data = kmap_atomic(un_bh->b_page);
@@ -1288,6 +1349,7 @@ int reiserfs_delete_item(struct reiserfs_transaction_handle *th,
1288 ret_value); 1349 ret_value);
1289 kunmap_atomic(data); 1350 kunmap_atomic(data);
1290 } 1351 }
1352
1291 /* Perform balancing after all resources have been collected at once. */ 1353 /* Perform balancing after all resources have been collected at once. */
1292 do_balance(&s_del_balance, NULL, NULL, M_DELETE); 1354 do_balance(&s_del_balance, NULL, NULL, M_DELETE);
1293 1355
@@ -1304,20 +1366,21 @@ int reiserfs_delete_item(struct reiserfs_transaction_handle *th,
1304 return ret_value; 1366 return ret_value;
1305} 1367}
1306 1368
1307/* Summary Of Mechanisms For Handling Collisions Between Processes: 1369/*
1308 1370 * Summary Of Mechanisms For Handling Collisions Between Processes:
1309 deletion of the body of the object is performed by iput(), with the 1371 *
1310 result that if multiple processes are operating on a file, the 1372 * deletion of the body of the object is performed by iput(), with the
1311 deletion of the body of the file is deferred until the last process 1373 * result that if multiple processes are operating on a file, the
1312 that has an open inode performs its iput(). 1374 * deletion of the body of the file is deferred until the last process
1313 1375 * that has an open inode performs its iput().
1314 writes and truncates are protected from collisions by use of 1376 *
1315 semaphores. 1377 * writes and truncates are protected from collisions by use of
1316 1378 * semaphores.
1317 creates, linking, and mknod are protected from collisions with other 1379 *
1318 processes by making the reiserfs_add_entry() the last step in the 1380 * creates, linking, and mknod are protected from collisions with other
1319 creation, and then rolling back all changes if there was a collision. 1381 * processes by making the reiserfs_add_entry() the last step in the
1320 - Hans 1382 * creation, and then rolling back all changes if there was a collision.
1383 * - Hans
1321*/ 1384*/
1322 1385
1323/* this deletes item which never gets split */ 1386/* this deletes item which never gets split */
@@ -1347,7 +1410,11 @@ void reiserfs_delete_solid_item(struct reiserfs_transaction_handle *th,
1347 } 1410 }
1348 if (retval != ITEM_FOUND) { 1411 if (retval != ITEM_FOUND) {
1349 pathrelse(&path); 1412 pathrelse(&path);
1350 // No need for a warning, if there is just no free space to insert '..' item into the newly-created subdir 1413 /*
1414 * No need for a warning, if there is just no free
1415 * space to insert '..' item into the
1416 * newly-created subdir
1417 */
1351 if (! 1418 if (!
1352 ((unsigned long long) 1419 ((unsigned long long)
1353 GET_HASH_VALUE(le_key_k_offset 1420 GET_HASH_VALUE(le_key_k_offset
@@ -1376,7 +1443,11 @@ void reiserfs_delete_solid_item(struct reiserfs_transaction_handle *th,
1376 1443
1377 if (retval == CARRY_ON) { 1444 if (retval == CARRY_ON) {
1378 do_balance(&tb, NULL, NULL, M_DELETE); 1445 do_balance(&tb, NULL, NULL, M_DELETE);
1379 if (inode) { /* Should we count quota for item? (we don't count quotas for save-links) */ 1446 /*
1447 * Should we count quota for item? (we don't
1448 * count quotas for save-links)
1449 */
1450 if (inode) {
1380 int depth; 1451 int depth;
1381#ifdef REISERQUOTA_DEBUG 1452#ifdef REISERQUOTA_DEBUG
1382 reiserfs_debug(th->t_super, REISERFS_DEBUG_CODE, 1453 reiserfs_debug(th->t_super, REISERFS_DEBUG_CODE,
@@ -1391,7 +1462,8 @@ void reiserfs_delete_solid_item(struct reiserfs_transaction_handle *th,
1391 } 1462 }
1392 break; 1463 break;
1393 } 1464 }
1394 // IO_ERROR, NO_DISK_SPACE, etc 1465
1466 /* IO_ERROR, NO_DISK_SPACE, etc */
1395 reiserfs_warning(th->t_super, "vs-5360", 1467 reiserfs_warning(th->t_super, "vs-5360",
1396 "could not delete %K due to fix_nodes failure", 1468 "could not delete %K due to fix_nodes failure",
1397 &cpu_key); 1469 &cpu_key);
@@ -1447,11 +1519,13 @@ static void unmap_buffers(struct page *page, loff_t pos)
1447 do { 1519 do {
1448 next = bh->b_this_page; 1520 next = bh->b_this_page;
1449 1521
1450 /* we want to unmap the buffers that contain the tail, and 1522 /*
1451 ** all the buffers after it (since the tail must be at the 1523 * we want to unmap the buffers that contain
1452 ** end of the file). We don't want to unmap file data 1524 * the tail, and all the buffers after it
1453 ** before the tail, since it might be dirty and waiting to 1525 * (since the tail must be at the end of the
1454 ** reach disk 1526 * file). We don't want to unmap file data
1527 * before the tail, since it might be dirty
1528 * and waiting to reach disk
1455 */ 1529 */
1456 cur_index += bh->b_size; 1530 cur_index += bh->b_size;
1457 if (cur_index > tail_index) { 1531 if (cur_index > tail_index) {
@@ -1476,9 +1550,10 @@ static int maybe_indirect_to_direct(struct reiserfs_transaction_handle *th,
1476 BUG_ON(!th->t_trans_id); 1550 BUG_ON(!th->t_trans_id);
1477 BUG_ON(new_file_size != inode->i_size); 1551 BUG_ON(new_file_size != inode->i_size);
1478 1552
1479 /* the page being sent in could be NULL if there was an i/o error 1553 /*
1480 ** reading in the last block. The user will hit problems trying to 1554 * the page being sent in could be NULL if there was an i/o error
1481 ** read the file, but for now we just skip the indirect2direct 1555 * reading in the last block. The user will hit problems trying to
1556 * read the file, but for now we just skip the indirect2direct
1482 */ 1557 */
1483 if (atomic_read(&inode->i_count) > 1 || 1558 if (atomic_read(&inode->i_count) > 1 ||
1484 !tail_has_to_be_packed(inode) || 1559 !tail_has_to_be_packed(inode) ||
@@ -1490,17 +1565,18 @@ static int maybe_indirect_to_direct(struct reiserfs_transaction_handle *th,
1490 pathrelse(path); 1565 pathrelse(path);
1491 return cut_bytes; 1566 return cut_bytes;
1492 } 1567 }
1568
1493 /* Perform the conversion to a direct_item. */ 1569 /* Perform the conversion to a direct_item. */
1494 /* return indirect_to_direct(inode, path, item_key,
1495 new_file_size, mode); */
1496 return indirect2direct(th, inode, page, path, item_key, 1570 return indirect2direct(th, inode, page, path, item_key,
1497 new_file_size, mode); 1571 new_file_size, mode);
1498} 1572}
1499 1573
1500/* we did indirect_to_direct conversion. And we have inserted direct 1574/*
1501 item successesfully, but there were no disk space to cut unfm 1575 * we did indirect_to_direct conversion. And we have inserted direct
1502 pointer being converted. Therefore we have to delete inserted 1576 * item successesfully, but there were no disk space to cut unfm
1503 direct item(s) */ 1577 * pointer being converted. Therefore we have to delete inserted
1578 * direct item(s)
1579 */
1504static void indirect_to_direct_roll_back(struct reiserfs_transaction_handle *th, 1580static void indirect_to_direct_roll_back(struct reiserfs_transaction_handle *th,
1505 struct inode *inode, struct treepath *path) 1581 struct inode *inode, struct treepath *path)
1506{ 1582{
@@ -1509,7 +1585,7 @@ static void indirect_to_direct_roll_back(struct reiserfs_transaction_handle *th,
1509 int removed; 1585 int removed;
1510 BUG_ON(!th->t_trans_id); 1586 BUG_ON(!th->t_trans_id);
1511 1587
1512 make_cpu_key(&tail_key, inode, inode->i_size + 1, TYPE_DIRECT, 4); // !!!! 1588 make_cpu_key(&tail_key, inode, inode->i_size + 1, TYPE_DIRECT, 4);
1513 tail_key.key_length = 4; 1589 tail_key.key_length = 4;
1514 1590
1515 tail_len = 1591 tail_len =
@@ -1539,7 +1615,6 @@ static void indirect_to_direct_roll_back(struct reiserfs_transaction_handle *th,
1539 reiserfs_warning(inode->i_sb, "reiserfs-5091", "indirect_to_direct " 1615 reiserfs_warning(inode->i_sb, "reiserfs-5091", "indirect_to_direct "
1540 "conversion has been rolled back due to " 1616 "conversion has been rolled back due to "
1541 "lack of disk space"); 1617 "lack of disk space");
1542 //mark_file_without_tail (inode);
1543 mark_inode_dirty(inode); 1618 mark_inode_dirty(inode);
1544} 1619}
1545 1620
@@ -1551,15 +1626,18 @@ int reiserfs_cut_from_item(struct reiserfs_transaction_handle *th,
1551 struct page *page, loff_t new_file_size) 1626 struct page *page, loff_t new_file_size)
1552{ 1627{
1553 struct super_block *sb = inode->i_sb; 1628 struct super_block *sb = inode->i_sb;
1554 /* Every function which is going to call do_balance must first 1629 /*
1555 create a tree_balance structure. Then it must fill up this 1630 * Every function which is going to call do_balance must first
1556 structure by using the init_tb_struct and fix_nodes functions. 1631 * create a tree_balance structure. Then it must fill up this
1557 After that we can make tree balancing. */ 1632 * structure by using the init_tb_struct and fix_nodes functions.
1633 * After that we can make tree balancing.
1634 */
1558 struct tree_balance s_cut_balance; 1635 struct tree_balance s_cut_balance;
1559 struct item_head *p_le_ih; 1636 struct item_head *p_le_ih;
1560 int cut_size = 0, /* Amount to be cut. */ 1637 int cut_size = 0; /* Amount to be cut. */
1561 ret_value = CARRY_ON, removed = 0, /* Number of the removed unformatted nodes. */ 1638 int ret_value = CARRY_ON;
1562 is_inode_locked = 0; 1639 int removed = 0; /* Number of the removed unformatted nodes. */
1640 int is_inode_locked = 0;
1563 char mode; /* Mode of the balance. */ 1641 char mode; /* Mode of the balance. */
1564 int retval2 = -1; 1642 int retval2 = -1;
1565 int quota_cut_bytes; 1643 int quota_cut_bytes;
@@ -1571,21 +1649,27 @@ int reiserfs_cut_from_item(struct reiserfs_transaction_handle *th,
1571 init_tb_struct(th, &s_cut_balance, inode->i_sb, path, 1649 init_tb_struct(th, &s_cut_balance, inode->i_sb, path,
1572 cut_size); 1650 cut_size);
1573 1651
1574 /* Repeat this loop until we either cut the item without needing 1652 /*
1575 to balance, or we fix_nodes without schedule occurring */ 1653 * Repeat this loop until we either cut the item without needing
1654 * to balance, or we fix_nodes without schedule occurring
1655 */
1576 while (1) { 1656 while (1) {
1577 /* Determine the balance mode, position of the first byte to 1657 /*
1578 be cut, and size to be cut. In case of the indirect item 1658 * Determine the balance mode, position of the first byte to
1579 free unformatted nodes which are pointed to by the cut 1659 * be cut, and size to be cut. In case of the indirect item
1580 pointers. */ 1660 * free unformatted nodes which are pointed to by the cut
1661 * pointers.
1662 */
1581 1663
1582 mode = 1664 mode =
1583 prepare_for_delete_or_cut(th, inode, path, 1665 prepare_for_delete_or_cut(th, inode, path,
1584 item_key, &removed, 1666 item_key, &removed,
1585 &cut_size, new_file_size); 1667 &cut_size, new_file_size);
1586 if (mode == M_CONVERT) { 1668 if (mode == M_CONVERT) {
1587 /* convert last unformatted node to direct item or leave 1669 /*
1588 tail in the unformatted node */ 1670 * convert last unformatted node to direct item or
1671 * leave tail in the unformatted node
1672 */
1589 RFALSE(ret_value != CARRY_ON, 1673 RFALSE(ret_value != CARRY_ON,
1590 "PAP-5570: can not convert twice"); 1674 "PAP-5570: can not convert twice");
1591 1675
@@ -1599,15 +1683,20 @@ int reiserfs_cut_from_item(struct reiserfs_transaction_handle *th,
1599 1683
1600 is_inode_locked = 1; 1684 is_inode_locked = 1;
1601 1685
1602 /* removing of last unformatted node will change value we 1686 /*
1603 have to return to truncate. Save it */ 1687 * removing of last unformatted node will
1688 * change value we have to return to truncate.
1689 * Save it
1690 */
1604 retval2 = ret_value; 1691 retval2 = ret_value;
1605 /*retval2 = sb->s_blocksize - (new_file_size & (sb->s_blocksize - 1)); */
1606 1692
1607 /* So, we have performed the first part of the conversion: 1693 /*
1608 inserting the new direct item. Now we are removing the 1694 * So, we have performed the first part of the
1609 last unformatted node pointer. Set key to search for 1695 * conversion:
1610 it. */ 1696 * inserting the new direct item. Now we are
1697 * removing the last unformatted node pointer.
1698 * Set key to search for it.
1699 */
1611 set_cpu_key_k_type(item_key, TYPE_INDIRECT); 1700 set_cpu_key_k_type(item_key, TYPE_INDIRECT);
1612 item_key->key_length = 4; 1701 item_key->key_length = 4;
1613 new_file_size -= 1702 new_file_size -=
@@ -1650,11 +1739,13 @@ int reiserfs_cut_from_item(struct reiserfs_transaction_handle *th,
1650 return (ret_value == IO_ERROR) ? -EIO : -ENOENT; 1739 return (ret_value == IO_ERROR) ? -EIO : -ENOENT;
1651 } /* while */ 1740 } /* while */
1652 1741
1653 // check fix_nodes results (IO_ERROR or NO_DISK_SPACE) 1742 /* check fix_nodes results (IO_ERROR or NO_DISK_SPACE) */
1654 if (ret_value != CARRY_ON) { 1743 if (ret_value != CARRY_ON) {
1655 if (is_inode_locked) { 1744 if (is_inode_locked) {
1656 // FIXME: this seems to be not needed: we are always able 1745 /*
1657 // to cut item 1746 * FIXME: this seems to be not needed: we are always
1747 * able to cut item
1748 */
1658 indirect_to_direct_roll_back(th, inode, path); 1749 indirect_to_direct_roll_back(th, inode, path);
1659 } 1750 }
1660 if (ret_value == NO_DISK_SPACE) 1751 if (ret_value == NO_DISK_SPACE)
@@ -1678,15 +1769,16 @@ int reiserfs_cut_from_item(struct reiserfs_transaction_handle *th,
1678 else 1769 else
1679 ret_value = retval2; 1770 ret_value = retval2;
1680 1771
1681 /* For direct items, we only change the quota when deleting the last 1772 /*
1682 ** item. 1773 * For direct items, we only change the quota when deleting the last
1774 * item.
1683 */ 1775 */
1684 p_le_ih = tp_item_head(s_cut_balance.tb_path); 1776 p_le_ih = tp_item_head(s_cut_balance.tb_path);
1685 if (!S_ISLNK(inode->i_mode) && is_direct_le_ih(p_le_ih)) { 1777 if (!S_ISLNK(inode->i_mode) && is_direct_le_ih(p_le_ih)) {
1686 if (mode == M_DELETE && 1778 if (mode == M_DELETE &&
1687 (le_ih_k_offset(p_le_ih) & (sb->s_blocksize - 1)) == 1779 (le_ih_k_offset(p_le_ih) & (sb->s_blocksize - 1)) ==
1688 1) { 1780 1) {
1689 // FIXME: this is to keep 3.5 happy 1781 /* FIXME: this is to keep 3.5 happy */
1690 REISERFS_I(inode)->i_first_direct_byte = U32_MAX; 1782 REISERFS_I(inode)->i_first_direct_byte = U32_MAX;
1691 quota_cut_bytes = sb->s_blocksize + UNFM_P_SIZE; 1783 quota_cut_bytes = sb->s_blocksize + UNFM_P_SIZE;
1692 } else { 1784 } else {
@@ -1697,9 +1789,11 @@ int reiserfs_cut_from_item(struct reiserfs_transaction_handle *th,
1697 if (is_inode_locked) { 1789 if (is_inode_locked) {
1698 struct item_head *le_ih = 1790 struct item_head *le_ih =
1699 tp_item_head(s_cut_balance.tb_path); 1791 tp_item_head(s_cut_balance.tb_path);
1700 /* we are going to complete indirect2direct conversion. Make 1792 /*
1701 sure, that we exactly remove last unformatted node pointer 1793 * we are going to complete indirect2direct conversion. Make
1702 of the item */ 1794 * sure, that we exactly remove last unformatted node pointer
1795 * of the item
1796 */
1703 if (!is_indirect_le_ih(le_ih)) 1797 if (!is_indirect_le_ih(le_ih))
1704 reiserfs_panic(sb, "vs-5652", 1798 reiserfs_panic(sb, "vs-5652",
1705 "item must be indirect %h", le_ih); 1799 "item must be indirect %h", le_ih);
@@ -1717,17 +1811,20 @@ int reiserfs_cut_from_item(struct reiserfs_transaction_handle *th,
1717 "(CUT, insert_size==%d)", 1811 "(CUT, insert_size==%d)",
1718 le_ih, s_cut_balance.insert_size[0]); 1812 le_ih, s_cut_balance.insert_size[0]);
1719 } 1813 }
1720 /* it would be useful to make sure, that right neighboring 1814 /*
1721 item is direct item of this file */ 1815 * it would be useful to make sure, that right neighboring
1816 * item is direct item of this file
1817 */
1722 } 1818 }
1723#endif 1819#endif
1724 1820
1725 do_balance(&s_cut_balance, NULL, NULL, mode); 1821 do_balance(&s_cut_balance, NULL, NULL, mode);
1726 if (is_inode_locked) { 1822 if (is_inode_locked) {
1727 /* we've done an indirect->direct conversion. when the data block 1823 /*
1728 ** was freed, it was removed from the list of blocks that must 1824 * we've done an indirect->direct conversion. when the
1729 ** be flushed before the transaction commits, make sure to 1825 * data block was freed, it was removed from the list of
1730 ** unmap and invalidate it 1826 * blocks that must be flushed before the transaction
1827 * commits, make sure to unmap and invalidate it
1731 */ 1828 */
1732 unmap_buffers(page, tail_pos); 1829 unmap_buffers(page, tail_pos);
1733 REISERFS_I(inode)->i_flags &= ~i_pack_on_close_mask; 1830 REISERFS_I(inode)->i_flags &= ~i_pack_on_close_mask;
@@ -1758,20 +1855,25 @@ static void truncate_directory(struct reiserfs_transaction_handle *th,
1758 set_le_key_k_type(KEY_FORMAT_3_5, INODE_PKEY(inode), TYPE_STAT_DATA); 1855 set_le_key_k_type(KEY_FORMAT_3_5, INODE_PKEY(inode), TYPE_STAT_DATA);
1759} 1856}
1760 1857
1761/* Truncate file to the new size. Note, this must be called with a transaction 1858/*
1762 already started */ 1859 * Truncate file to the new size. Note, this must be called with a
1860 * transaction already started
1861 */
1763int reiserfs_do_truncate(struct reiserfs_transaction_handle *th, 1862int reiserfs_do_truncate(struct reiserfs_transaction_handle *th,
1764 struct inode *inode, /* ->i_size contains new size */ 1863 struct inode *inode, /* ->i_size contains new size */
1765 struct page *page, /* up to date for last block */ 1864 struct page *page, /* up to date for last block */
1766 int update_timestamps /* when it is called by 1865 /*
1767 file_release to convert 1866 * when it is called by file_release to convert
1768 the tail - no timestamps 1867 * the tail - no timestamps should be updated
1769 should be updated */ 1868 */
1869 int update_timestamps
1770 ) 1870 )
1771{ 1871{
1772 INITIALIZE_PATH(s_search_path); /* Path to the current object item. */ 1872 INITIALIZE_PATH(s_search_path); /* Path to the current object item. */
1773 struct item_head *p_le_ih; /* Pointer to an item header. */ 1873 struct item_head *p_le_ih; /* Pointer to an item header. */
1774 struct cpu_key s_item_key; /* Key to search for a previous file item. */ 1874
1875 /* Key to search for a previous file item. */
1876 struct cpu_key s_item_key;
1775 loff_t file_size, /* Old file size. */ 1877 loff_t file_size, /* Old file size. */
1776 new_file_size; /* New file size. */ 1878 new_file_size; /* New file size. */
1777 int deleted; /* Number of deleted or truncated bytes. */ 1879 int deleted; /* Number of deleted or truncated bytes. */
@@ -1784,8 +1886,8 @@ int reiserfs_do_truncate(struct reiserfs_transaction_handle *th,
1784 || S_ISLNK(inode->i_mode))) 1886 || S_ISLNK(inode->i_mode)))
1785 return 0; 1887 return 0;
1786 1888
1889 /* deletion of directory - no need to update timestamps */
1787 if (S_ISDIR(inode->i_mode)) { 1890 if (S_ISDIR(inode->i_mode)) {
1788 // deletion of directory - no need to update timestamps
1789 truncate_directory(th, inode); 1891 truncate_directory(th, inode);
1790 return 0; 1892 return 0;
1791 } 1893 }
@@ -1793,7 +1895,7 @@ int reiserfs_do_truncate(struct reiserfs_transaction_handle *th,
1793 /* Get new file size. */ 1895 /* Get new file size. */
1794 new_file_size = inode->i_size; 1896 new_file_size = inode->i_size;
1795 1897
1796 // FIXME: note, that key type is unimportant here 1898 /* FIXME: note, that key type is unimportant here */
1797 make_cpu_key(&s_item_key, inode, max_reiserfs_offset(inode), 1899 make_cpu_key(&s_item_key, inode, max_reiserfs_offset(inode),
1798 TYPE_DIRECT, 3); 1900 TYPE_DIRECT, 3);
1799 1901
@@ -1827,9 +1929,11 @@ int reiserfs_do_truncate(struct reiserfs_transaction_handle *th,
1827 int bytes = 1929 int bytes =
1828 op_bytes_number(p_le_ih, inode->i_sb->s_blocksize); 1930 op_bytes_number(p_le_ih, inode->i_sb->s_blocksize);
1829 1931
1830 /* this may mismatch with real file size: if last direct item 1932 /*
1831 had no padding zeros and last unformatted node had no free 1933 * this may mismatch with real file size: if last direct item
1832 space, this file would have this file size */ 1934 * had no padding zeros and last unformatted node had no free
1935 * space, this file would have this file size
1936 */
1833 file_size = offset + bytes - 1; 1937 file_size = offset + bytes - 1;
1834 } 1938 }
1835 /* 1939 /*
@@ -1867,14 +1971,17 @@ int reiserfs_do_truncate(struct reiserfs_transaction_handle *th,
1867 1971
1868 set_cpu_key_k_offset(&s_item_key, file_size); 1972 set_cpu_key_k_offset(&s_item_key, file_size);
1869 1973
1870 /* While there are bytes to truncate and previous file item is presented in the tree. */ 1974 /*
1975 * While there are bytes to truncate and previous
1976 * file item is presented in the tree.
1977 */
1871 1978
1872 /* 1979 /*
1873 ** This loop could take a really long time, and could log 1980 * This loop could take a really long time, and could log
1874 ** many more blocks than a transaction can hold. So, we do a polite 1981 * many more blocks than a transaction can hold. So, we do
1875 ** journal end here, and if the transaction needs ending, we make 1982 * a polite journal end here, and if the transaction needs
1876 ** sure the file is consistent before ending the current trans 1983 * ending, we make sure the file is consistent before ending
1877 ** and starting a new one 1984 * the current trans and starting a new one
1878 */ 1985 */
1879 if (journal_transaction_should_end(th, 0) || 1986 if (journal_transaction_should_end(th, 0) ||
1880 reiserfs_transaction_free_space(th) <= JOURNAL_FOR_FREE_BLOCK_AND_UPDATE_SD) { 1987 reiserfs_transaction_free_space(th) <= JOURNAL_FOR_FREE_BLOCK_AND_UPDATE_SD) {
@@ -1906,7 +2013,7 @@ int reiserfs_do_truncate(struct reiserfs_transaction_handle *th,
1906 2013
1907 update_and_out: 2014 update_and_out:
1908 if (update_timestamps) { 2015 if (update_timestamps) {
1909 // this is truncate, not file closing 2016 /* this is truncate, not file closing */
1910 inode->i_mtime = CURRENT_TIME_SEC; 2017 inode->i_mtime = CURRENT_TIME_SEC;
1911 inode->i_ctime = CURRENT_TIME_SEC; 2018 inode->i_ctime = CURRENT_TIME_SEC;
1912 } 2019 }
@@ -1918,7 +2025,7 @@ int reiserfs_do_truncate(struct reiserfs_transaction_handle *th,
1918} 2025}
1919 2026
1920#ifdef CONFIG_REISERFS_CHECK 2027#ifdef CONFIG_REISERFS_CHECK
1921// this makes sure, that we __append__, not overwrite or add holes 2028/* this makes sure, that we __append__, not overwrite or add holes */
1922static void check_research_for_paste(struct treepath *path, 2029static void check_research_for_paste(struct treepath *path,
1923 const struct cpu_key *key) 2030 const struct cpu_key *key)
1924{ 2031{
@@ -1952,13 +2059,22 @@ static void check_research_for_paste(struct treepath *path,
1952} 2059}
1953#endif /* config reiserfs check */ 2060#endif /* config reiserfs check */
1954 2061
1955/* Paste bytes to the existing item. Returns bytes number pasted into the item. */ 2062/*
1956int reiserfs_paste_into_item(struct reiserfs_transaction_handle *th, struct treepath *search_path, /* Path to the pasted item. */ 2063 * Paste bytes to the existing item.
1957 const struct cpu_key *key, /* Key to search for the needed item. */ 2064 * Returns bytes number pasted into the item.
1958 struct inode *inode, /* Inode item belongs to */ 2065 */
1959 const char *body, /* Pointer to the bytes to paste. */ 2066int reiserfs_paste_into_item(struct reiserfs_transaction_handle *th,
2067 /* Path to the pasted item. */
2068 struct treepath *search_path,
2069 /* Key to search for the needed item. */
2070 const struct cpu_key *key,
2071 /* Inode item belongs to */
2072 struct inode *inode,
2073 /* Pointer to the bytes to paste. */
2074 const char *body,
2075 /* Size of pasted bytes. */
1960 int pasted_size) 2076 int pasted_size)
1961{ /* Size of pasted bytes. */ 2077{
1962 struct super_block *sb = inode->i_sb; 2078 struct super_block *sb = inode->i_sb;
1963 struct tree_balance s_paste_balance; 2079 struct tree_balance s_paste_balance;
1964 int retval; 2080 int retval;
@@ -2019,8 +2135,10 @@ int reiserfs_paste_into_item(struct reiserfs_transaction_handle *th, struct tree
2019#endif 2135#endif
2020 } 2136 }
2021 2137
2022 /* Perform balancing after all resources are collected by fix_nodes, and 2138 /*
2023 accessing them will not risk triggering schedule. */ 2139 * Perform balancing after all resources are collected by fix_nodes,
2140 * and accessing them will not risk triggering schedule.
2141 */
2024 if (retval == CARRY_ON) { 2142 if (retval == CARRY_ON) {
2025 do_balance(&s_paste_balance, NULL /*ih */ , body, M_PASTE); 2143 do_balance(&s_paste_balance, NULL /*ih */ , body, M_PASTE);
2026 return 0; 2144 return 0;
@@ -2041,7 +2159,8 @@ int reiserfs_paste_into_item(struct reiserfs_transaction_handle *th, struct tree
2041 return retval; 2159 return retval;
2042} 2160}
2043 2161
2044/* Insert new item into the buffer at the path. 2162/*
2163 * Insert new item into the buffer at the path.
2045 * th - active transaction handle 2164 * th - active transaction handle
2046 * path - path to the inserted item 2165 * path - path to the inserted item
2047 * ih - pointer to the item header to insert 2166 * ih - pointer to the item header to insert
@@ -2064,8 +2183,10 @@ int reiserfs_insert_item(struct reiserfs_transaction_handle *th,
2064 fs_gen = get_generation(inode->i_sb); 2183 fs_gen = get_generation(inode->i_sb);
2065 quota_bytes = ih_item_len(ih); 2184 quota_bytes = ih_item_len(ih);
2066 2185
2067 /* hack so the quota code doesn't have to guess if the file has 2186 /*
2068 ** a tail, links are always tails, so there's no guessing needed 2187 * hack so the quota code doesn't have to guess
2188 * if the file has a tail, links are always tails,
2189 * so there's no guessing needed
2069 */ 2190 */
2070 if (!S_ISLNK(inode->i_mode) && is_direct_le_ih(ih)) 2191 if (!S_ISLNK(inode->i_mode) && is_direct_le_ih(ih))
2071 quota_bytes = inode->i_sb->s_blocksize + UNFM_P_SIZE; 2192 quota_bytes = inode->i_sb->s_blocksize + UNFM_P_SIZE;
@@ -2074,8 +2195,10 @@ int reiserfs_insert_item(struct reiserfs_transaction_handle *th,
2074 "reiserquota insert_item(): allocating %u id=%u type=%c", 2195 "reiserquota insert_item(): allocating %u id=%u type=%c",
2075 quota_bytes, inode->i_uid, head2type(ih)); 2196 quota_bytes, inode->i_uid, head2type(ih));
2076#endif 2197#endif
2077 /* We can't dirty inode here. It would be immediately written but 2198 /*
2078 * appropriate stat item isn't inserted yet... */ 2199 * We can't dirty inode here. It would be immediately
2200 * written but appropriate stat item isn't inserted yet...
2201 */
2079 depth = reiserfs_write_unlock_nested(inode->i_sb); 2202 depth = reiserfs_write_unlock_nested(inode->i_sb);
2080 retval = dquot_alloc_space_nodirty(inode, quota_bytes); 2203 retval = dquot_alloc_space_nodirty(inode, quota_bytes);
2081 reiserfs_write_lock_nested(inode->i_sb, depth); 2204 reiserfs_write_lock_nested(inode->i_sb, depth);
@@ -2089,7 +2212,10 @@ int reiserfs_insert_item(struct reiserfs_transaction_handle *th,
2089#ifdef DISPLACE_NEW_PACKING_LOCALITIES 2212#ifdef DISPLACE_NEW_PACKING_LOCALITIES
2090 s_ins_balance.key = key->on_disk_key; 2213 s_ins_balance.key = key->on_disk_key;
2091#endif 2214#endif
2092 /* DQUOT_* can schedule, must check to be sure calling fix_nodes is safe */ 2215 /*
2216 * DQUOT_* can schedule, must check to be sure calling
2217 * fix_nodes is safe
2218 */
2093 if (inode && fs_changed(fs_gen, inode->i_sb)) { 2219 if (inode && fs_changed(fs_gen, inode->i_sb)) {
2094 goto search_again; 2220 goto search_again;
2095 } 2221 }
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index c02b6b07508d..6268bb8195c5 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -153,13 +153,15 @@ static int reiserfs_unfreeze(struct super_block *s)
153 153
154extern const struct in_core_key MAX_IN_CORE_KEY; 154extern const struct in_core_key MAX_IN_CORE_KEY;
155 155
156/* this is used to delete "save link" when there are no items of a 156/*
157 file it points to. It can either happen if unlink is completed but 157 * this is used to delete "save link" when there are no items of a
158 "save unlink" removal, or if file has both unlink and truncate 158 * file it points to. It can either happen if unlink is completed but
159 pending and as unlink completes first (because key of "save link" 159 * "save unlink" removal, or if file has both unlink and truncate
160 protecting unlink is bigger that a key lf "save link" which 160 * pending and as unlink completes first (because key of "save link"
161 protects truncate), so there left no items to make truncate 161 * protecting unlink is bigger that a key lf "save link" which
162 completion on */ 162 * protects truncate), so there left no items to make truncate
163 * completion on
164 */
163static int remove_save_link_only(struct super_block *s, 165static int remove_save_link_only(struct super_block *s,
164 struct reiserfs_key *key, int oid_free) 166 struct reiserfs_key *key, int oid_free)
165{ 167{
@@ -282,8 +284,10 @@ static int finish_unfinished(struct super_block *s)
282 284
283 inode = reiserfs_iget(s, &obj_key); 285 inode = reiserfs_iget(s, &obj_key);
284 if (!inode) { 286 if (!inode) {
285 /* the unlink almost completed, it just did not manage to remove 287 /*
286 "save" link and release objectid */ 288 * the unlink almost completed, it just did not
289 * manage to remove "save" link and release objectid
290 */
287 reiserfs_warning(s, "vs-2180", "iget failed for %K", 291 reiserfs_warning(s, "vs-2180", "iget failed for %K",
288 &obj_key); 292 &obj_key);
289 retval = remove_save_link_only(s, &save_link_key, 1); 293 retval = remove_save_link_only(s, &save_link_key, 1);
@@ -303,10 +307,13 @@ static int finish_unfinished(struct super_block *s)
303 reiserfs_write_lock_nested(inode->i_sb, depth); 307 reiserfs_write_lock_nested(inode->i_sb, depth);
304 308
305 if (truncate && S_ISDIR(inode->i_mode)) { 309 if (truncate && S_ISDIR(inode->i_mode)) {
306 /* We got a truncate request for a dir which is impossible. 310 /*
307 The only imaginable way is to execute unfinished truncate request 311 * We got a truncate request for a dir which
308 then boot into old kernel, remove the file and create dir with 312 * is impossible. The only imaginable way is to
309 the same key. */ 313 * execute unfinished truncate request then boot
314 * into old kernel, remove the file and create dir
315 * with the same key.
316 */
310 reiserfs_warning(s, "green-2101", 317 reiserfs_warning(s, "green-2101",
311 "impossible truncate on a " 318 "impossible truncate on a "
312 "directory %k. Please report", 319 "directory %k. Please report",
@@ -320,14 +327,16 @@ static int finish_unfinished(struct super_block *s)
320 if (truncate) { 327 if (truncate) {
321 REISERFS_I(inode)->i_flags |= 328 REISERFS_I(inode)->i_flags |=
322 i_link_saved_truncate_mask; 329 i_link_saved_truncate_mask;
323 /* not completed truncate found. New size was committed together 330 /*
324 with "save" link */ 331 * not completed truncate found. New size was
332 * committed together with "save" link
333 */
325 reiserfs_info(s, "Truncating %k to %Ld ..", 334 reiserfs_info(s, "Truncating %k to %Ld ..",
326 INODE_PKEY(inode), inode->i_size); 335 INODE_PKEY(inode), inode->i_size);
327 reiserfs_truncate_file(inode, 336
328 0 337 /* don't update modification time */
329 /*don't update modification time */ 338 reiserfs_truncate_file(inode, 0);
330 ); 339
331 retval = remove_save_link(inode, truncate); 340 retval = remove_save_link(inode, truncate);
332 } else { 341 } else {
333 REISERFS_I(inode)->i_flags |= i_link_saved_unlink_mask; 342 REISERFS_I(inode)->i_flags |= i_link_saved_unlink_mask;
@@ -373,10 +382,12 @@ static int finish_unfinished(struct super_block *s)
373 return retval; 382 return retval;
374} 383}
375 384
376/* to protect file being unlinked from getting lost we "safe" link files 385/*
377 being unlinked. This link will be deleted in the same transaction with last 386 * to protect file being unlinked from getting lost we "safe" link files
378 item of file. mounting the filesystem we scan all these links and remove 387 * being unlinked. This link will be deleted in the same transaction with last
379 files which almost got lost */ 388 * item of file. mounting the filesystem we scan all these links and remove
389 * files which almost got lost
390 */
380void add_save_link(struct reiserfs_transaction_handle *th, 391void add_save_link(struct reiserfs_transaction_handle *th,
381 struct inode *inode, int truncate) 392 struct inode *inode, int truncate)
382{ 393{
@@ -530,7 +541,10 @@ static void reiserfs_put_super(struct super_block *s)
530 541
531 reiserfs_write_lock(s); 542 reiserfs_write_lock(s);
532 543
533 /* change file system state to current state if it was mounted with read-write permissions */ 544 /*
545 * change file system state to current state if it was mounted
546 * with read-write permissions
547 */
534 if (!(s->s_flags & MS_RDONLY)) { 548 if (!(s->s_flags & MS_RDONLY)) {
535 if (!journal_begin(&th, s, 10)) { 549 if (!journal_begin(&th, s, 10)) {
536 reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s), 550 reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s),
@@ -541,8 +555,9 @@ static void reiserfs_put_super(struct super_block *s)
541 } 555 }
542 } 556 }
543 557
544 /* note, journal_release checks for readonly mount, and can decide not 558 /*
545 ** to do a journal_end 559 * note, journal_release checks for readonly mount, and can
560 * decide not to do a journal_end
546 */ 561 */
547 journal_release(&th, s); 562 journal_release(&th, s);
548 563
@@ -635,8 +650,9 @@ static void reiserfs_dirty_inode(struct inode *inode, int flags)
635 } 650 }
636 reiserfs_write_lock(inode->i_sb); 651 reiserfs_write_lock(inode->i_sb);
637 652
638 /* this is really only used for atime updates, so they don't have 653 /*
639 ** to be included in O_SYNC or fsync 654 * this is really only used for atime updates, so they don't have
655 * to be included in O_SYNC or fsync
640 */ 656 */
641 err = journal_begin(&th, inode->i_sb, 1); 657 err = journal_begin(&th, inode->i_sb, 1);
642 if (err) 658 if (err)
@@ -789,31 +805,53 @@ static const struct export_operations reiserfs_export_ops = {
789 .get_parent = reiserfs_get_parent, 805 .get_parent = reiserfs_get_parent,
790}; 806};
791 807
792/* this struct is used in reiserfs_getopt () for containing the value for those 808/*
793 mount options that have values rather than being toggles. */ 809 * this struct is used in reiserfs_getopt () for containing the value for
810 * those mount options that have values rather than being toggles.
811 */
794typedef struct { 812typedef struct {
795 char *value; 813 char *value;
796 int setmask; /* bitmask which is to set on mount_options bitmask when this 814 /*
797 value is found, 0 is no bits are to be changed. */ 815 * bitmask which is to set on mount_options bitmask
798 int clrmask; /* bitmask which is to clear on mount_options bitmask when this 816 * when this value is found, 0 is no bits are to be changed.
799 value is found, 0 is no bits are to be changed. This is 817 */
800 applied BEFORE setmask */ 818 int setmask;
819 /*
820 * bitmask which is to clear on mount_options bitmask
821 * when this value is found, 0 is no bits are to be changed.
822 * This is applied BEFORE setmask
823 */
824 int clrmask;
801} arg_desc_t; 825} arg_desc_t;
802 826
803/* Set this bit in arg_required to allow empty arguments */ 827/* Set this bit in arg_required to allow empty arguments */
804#define REISERFS_OPT_ALLOWEMPTY 31 828#define REISERFS_OPT_ALLOWEMPTY 31
805 829
806/* this struct is used in reiserfs_getopt() for describing the set of reiserfs 830/*
807 mount options */ 831 * this struct is used in reiserfs_getopt() for describing the
832 * set of reiserfs mount options
833 */
808typedef struct { 834typedef struct {
809 char *option_name; 835 char *option_name;
810 int arg_required; /* 0 if argument is not required, not 0 otherwise */ 836
811 const arg_desc_t *values; /* list of values accepted by an option */ 837 /* 0 if argument is not required, not 0 otherwise */
812 int setmask; /* bitmask which is to set on mount_options bitmask when this 838 int arg_required;
813 value is found, 0 is no bits are to be changed. */ 839
814 int clrmask; /* bitmask which is to clear on mount_options bitmask when this 840 /* list of values accepted by an option */
815 value is found, 0 is no bits are to be changed. This is 841 const arg_desc_t *values;
816 applied BEFORE setmask */ 842
843 /*
844 * bitmask which is to set on mount_options bitmask
845 * when this value is found, 0 is no bits are to be changed.
846 */
847 int setmask;
848
849 /*
850 * bitmask which is to clear on mount_options bitmask
851 * when this value is found, 0 is no bits are to be changed.
852 * This is applied BEFORE setmask
853 */
854 int clrmask;
817} opt_desc_t; 855} opt_desc_t;
818 856
819/* possible values for -o data= */ 857/* possible values for -o data= */
@@ -834,8 +872,10 @@ static const arg_desc_t barrier_mode[] = {
834 {.value = NULL} 872 {.value = NULL}
835}; 873};
836 874
837/* possible values for "-o block-allocator=" and bits which are to be set in 875/*
838 s_mount_opt of reiserfs specific part of in-core super block */ 876 * possible values for "-o block-allocator=" and bits which are to be set in
877 * s_mount_opt of reiserfs specific part of in-core super block
878 */
839static const arg_desc_t balloc[] = { 879static const arg_desc_t balloc[] = {
840 {"noborder", 1 << REISERFS_NO_BORDER, 0}, 880 {"noborder", 1 << REISERFS_NO_BORDER, 0},
841 {"border", 0, 1 << REISERFS_NO_BORDER}, 881 {"border", 0, 1 << REISERFS_NO_BORDER},
@@ -865,21 +905,25 @@ static const arg_desc_t error_actions[] = {
865 {NULL, 0, 0}, 905 {NULL, 0, 0},
866}; 906};
867 907
868/* proceed only one option from a list *cur - string containing of mount options 908/*
869 opts - array of options which are accepted 909 * proceed only one option from a list *cur - string containing of mount
870 opt_arg - if option is found and requires an argument and if it is specifed 910 * options
871 in the input - pointer to the argument is stored here 911 * opts - array of options which are accepted
872 bit_flags - if option requires to set a certain bit - it is set here 912 * opt_arg - if option is found and requires an argument and if it is specifed
873 return -1 if unknown option is found, opt->arg_required otherwise */ 913 * in the input - pointer to the argument is stored here
914 * bit_flags - if option requires to set a certain bit - it is set here
915 * return -1 if unknown option is found, opt->arg_required otherwise
916 */
874static int reiserfs_getopt(struct super_block *s, char **cur, opt_desc_t * opts, 917static int reiserfs_getopt(struct super_block *s, char **cur, opt_desc_t * opts,
875 char **opt_arg, unsigned long *bit_flags) 918 char **opt_arg, unsigned long *bit_flags)
876{ 919{
877 char *p; 920 char *p;
878 /* foo=bar, 921 /*
879 ^ ^ ^ 922 * foo=bar,
880 | | +-- option_end 923 * ^ ^ ^
881 | +-- arg_start 924 * | | +-- option_end
882 +-- option_start 925 * | +-- arg_start
926 * +-- option_start
883 */ 927 */
884 const opt_desc_t *opt; 928 const opt_desc_t *opt;
885 const arg_desc_t *arg; 929 const arg_desc_t *arg;
@@ -894,9 +938,12 @@ static int reiserfs_getopt(struct super_block *s, char **cur, opt_desc_t * opts,
894 } 938 }
895 939
896 if (!strncmp(p, "alloc=", 6)) { 940 if (!strncmp(p, "alloc=", 6)) {
897 /* Ugly special case, probably we should redo options parser so that 941 /*
898 it can understand several arguments for some options, also so that 942 * Ugly special case, probably we should redo options
899 it can fill several bitfields with option values. */ 943 * parser so that it can understand several arguments for
944 * some options, also so that it can fill several bitfields
945 * with option values.
946 */
900 if (reiserfs_parse_alloc_options(s, p + 6)) { 947 if (reiserfs_parse_alloc_options(s, p + 6)) {
901 return -1; 948 return -1;
902 } else { 949 } else {
@@ -959,7 +1006,10 @@ static int reiserfs_getopt(struct super_block *s, char **cur, opt_desc_t * opts,
959 return -1; 1006 return -1;
960 } 1007 }
961 1008
962 /* move to the argument, or to next option if argument is not required */ 1009 /*
1010 * move to the argument, or to next option if argument is not
1011 * required
1012 */
963 p++; 1013 p++;
964 1014
965 if (opt->arg_required 1015 if (opt->arg_required
@@ -996,12 +1046,20 @@ static int reiserfs_getopt(struct super_block *s, char **cur, opt_desc_t * opts,
996} 1046}
997 1047
998/* returns 0 if something is wrong in option string, 1 - otherwise */ 1048/* returns 0 if something is wrong in option string, 1 - otherwise */
999static int reiserfs_parse_options(struct super_block *s, char *options, /* string given via mount's -o */ 1049static int reiserfs_parse_options(struct super_block *s,
1050
1051 /* string given via mount's -o */
1052 char *options,
1053
1054 /*
1055 * after the parsing phase, contains the
1056 * collection of bitflags defining what
1057 * mount options were selected.
1058 */
1000 unsigned long *mount_options, 1059 unsigned long *mount_options,
1001 /* after the parsing phase, contains the 1060
1002 collection of bitflags defining what 1061 /* strtol-ed from NNN of resize=NNN */
1003 mount options were selected. */ 1062 unsigned long *blocks,
1004 unsigned long *blocks, /* strtol-ed from NNN of resize=NNN */
1005 char **jdev_name, 1063 char **jdev_name,
1006 unsigned int *commit_max_age, 1064 unsigned int *commit_max_age,
1007 char **qf_names, 1065 char **qf_names,
@@ -1011,7 +1069,10 @@ static int reiserfs_parse_options(struct super_block *s, char *options, /* strin
1011 char *arg = NULL; 1069 char *arg = NULL;
1012 char *pos; 1070 char *pos;
1013 opt_desc_t opts[] = { 1071 opt_desc_t opts[] = {
1014 /* Compatibility stuff, so that -o notail for old setups still work */ 1072 /*
1073 * Compatibility stuff, so that -o notail for old
1074 * setups still work
1075 */
1015 {"tails",.arg_required = 't',.values = tails}, 1076 {"tails",.arg_required = 't',.values = tails},
1016 {"notail",.clrmask = 1077 {"notail",.clrmask =
1017 (1 << REISERFS_LARGETAIL) | (1 << REISERFS_SMALLTAIL)}, 1078 (1 << REISERFS_LARGETAIL) | (1 << REISERFS_SMALLTAIL)},
@@ -1056,8 +1117,10 @@ static int reiserfs_parse_options(struct super_block *s, char *options, /* strin
1056 1117
1057 *blocks = 0; 1118 *blocks = 0;
1058 if (!options || !*options) 1119 if (!options || !*options)
1059 /* use default configuration: create tails, journaling on, no 1120 /*
1060 conversion to newest format */ 1121 * use default configuration: create tails, journaling on, no
1122 * conversion to newest format
1123 */
1061 return 1; 1124 return 1;
1062 1125
1063 for (pos = options; pos;) { 1126 for (pos = options; pos;) {
@@ -1110,7 +1173,8 @@ static int reiserfs_parse_options(struct super_block *s, char *options, /* strin
1110 1173
1111 if (c == 'j') { 1174 if (c == 'j') {
1112 if (arg && *arg && jdev_name) { 1175 if (arg && *arg && jdev_name) {
1113 if (*jdev_name) { //Hm, already assigned? 1176 /* Hm, already assigned? */
1177 if (*jdev_name) {
1114 reiserfs_warning(s, "super-6510", 1178 reiserfs_warning(s, "super-6510",
1115 "journal device was " 1179 "journal device was "
1116 "already specified to " 1180 "already specified to "
@@ -1363,8 +1427,10 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg)
1363 safe_mask |= 1 << REISERFS_USRQUOTA; 1427 safe_mask |= 1 << REISERFS_USRQUOTA;
1364 safe_mask |= 1 << REISERFS_GRPQUOTA; 1428 safe_mask |= 1 << REISERFS_GRPQUOTA;
1365 1429
1366 /* Update the bitmask, taking care to keep 1430 /*
1367 * the bits we're not allowed to change here */ 1431 * Update the bitmask, taking care to keep
1432 * the bits we're not allowed to change here
1433 */
1368 REISERFS_SB(s)->s_mount_opt = 1434 REISERFS_SB(s)->s_mount_opt =
1369 (REISERFS_SB(s)-> 1435 (REISERFS_SB(s)->
1370 s_mount_opt & ~safe_mask) | (mount_options & safe_mask); 1436 s_mount_opt & ~safe_mask) | (mount_options & safe_mask);
@@ -1428,7 +1494,9 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg)
1428 handle_data_mode(s, mount_options); 1494 handle_data_mode(s, mount_options);
1429 handle_barrier_mode(s, mount_options); 1495 handle_barrier_mode(s, mount_options);
1430 REISERFS_SB(s)->s_mount_state = sb_umount_state(rs); 1496 REISERFS_SB(s)->s_mount_state = sb_umount_state(rs);
1431 s->s_flags &= ~MS_RDONLY; /* now it is safe to call journal_begin */ 1497
1498 /* now it is safe to call journal_begin */
1499 s->s_flags &= ~MS_RDONLY;
1432 err = journal_begin(&th, s, 10); 1500 err = journal_begin(&th, s, 10);
1433 if (err) 1501 if (err)
1434 goto out_err_unlock; 1502 goto out_err_unlock;
@@ -1490,9 +1558,9 @@ static int read_super_block(struct super_block *s, int offset)
1490 brelse(bh); 1558 brelse(bh);
1491 return 1; 1559 return 1;
1492 } 1560 }
1493 // 1561 /*
1494 // ok, reiserfs signature (old or new) found in at the given offset 1562 * ok, reiserfs signature (old or new) found in at the given offset
1495 // 1563 */
1496 fs_blocksize = sb_blocksize(rs); 1564 fs_blocksize = sb_blocksize(rs);
1497 brelse(bh); 1565 brelse(bh);
1498 sb_set_blocksize(s, fs_blocksize); 1566 sb_set_blocksize(s, fs_blocksize);
@@ -1530,9 +1598,11 @@ static int read_super_block(struct super_block *s, int offset)
1530 SB_BUFFER_WITH_SB(s) = bh; 1598 SB_BUFFER_WITH_SB(s) = bh;
1531 SB_DISK_SUPER_BLOCK(s) = rs; 1599 SB_DISK_SUPER_BLOCK(s) = rs;
1532 1600
1601 /*
1602 * magic is of non-standard journal filesystem, look at s_version to
1603 * find which format is in use
1604 */
1533 if (is_reiserfs_jr(rs)) { 1605 if (is_reiserfs_jr(rs)) {
1534 /* magic is of non-standard journal filesystem, look at s_version to
1535 find which format is in use */
1536 if (sb_version(rs) == REISERFS_VERSION_2) 1606 if (sb_version(rs) == REISERFS_VERSION_2)
1537 reiserfs_info(s, "found reiserfs format \"3.6\"" 1607 reiserfs_info(s, "found reiserfs format \"3.6\""
1538 " with non-standard journal\n"); 1608 " with non-standard journal\n");
@@ -1546,8 +1616,10 @@ static int read_super_block(struct super_block *s, int offset)
1546 return 1; 1616 return 1;
1547 } 1617 }
1548 } else 1618 } else
1549 /* s_version of standard format may contain incorrect information, 1619 /*
1550 so we just look at the magic string */ 1620 * s_version of standard format may contain incorrect
1621 * information, so we just look at the magic string
1622 */
1551 reiserfs_info(s, 1623 reiserfs_info(s,
1552 "found reiserfs format \"%s\" with standard journal\n", 1624 "found reiserfs format \"%s\" with standard journal\n",
1553 is_reiserfs_3_5(rs) ? "3.5" : "3.6"); 1625 is_reiserfs_3_5(rs) ? "3.5" : "3.6");
@@ -1559,8 +1631,9 @@ static int read_super_block(struct super_block *s, int offset)
1559 s->dq_op = &reiserfs_quota_operations; 1631 s->dq_op = &reiserfs_quota_operations;
1560#endif 1632#endif
1561 1633
1562 /* new format is limited by the 32 bit wide i_blocks field, want to 1634 /*
1563 ** be one full block below that. 1635 * new format is limited by the 32 bit wide i_blocks field, want to
1636 * be one full block below that.
1564 */ 1637 */
1565 s->s_maxbytes = (512LL << 32) - s->s_blocksize; 1638 s->s_maxbytes = (512LL << 32) - s->s_blocksize;
1566 return 0; 1639 return 0;
@@ -1579,14 +1652,15 @@ static int reread_meta_blocks(struct super_block *s)
1579 return 0; 1652 return 0;
1580} 1653}
1581 1654
1582///////////////////////////////////////////////////// 1655/* hash detection stuff */
1583// hash detection stuff
1584 1656
1585// if root directory is empty - we set default - Yura's - hash and 1657/*
1586// warn about it 1658 * if root directory is empty - we set default - Yura's - hash and
1587// FIXME: we look for only one name in a directory. If tea and yura 1659 * warn about it
1588// bith have the same value - we ask user to send report to the 1660 * FIXME: we look for only one name in a directory. If tea and yura
1589// mailing list 1661 * both have the same value - we ask user to send report to the
1662 * mailing list
1663 */
1590static __u32 find_hash_out(struct super_block *s) 1664static __u32 find_hash_out(struct super_block *s)
1591{ 1665{
1592 int retval; 1666 int retval;
@@ -1598,7 +1672,7 @@ static __u32 find_hash_out(struct super_block *s)
1598 1672
1599 inode = s->s_root->d_inode; 1673 inode = s->s_root->d_inode;
1600 1674
1601 do { // Some serious "goto"-hater was there ;) 1675 do { /* Some serious "goto"-hater was there ;) */
1602 u32 teahash, r5hash, yurahash; 1676 u32 teahash, r5hash, yurahash;
1603 1677
1604 make_cpu_key(&key, inode, ~0, TYPE_DIRENTRY, 3); 1678 make_cpu_key(&key, inode, ~0, TYPE_DIRENTRY, 3);
@@ -1663,23 +1737,25 @@ static __u32 find_hash_out(struct super_block *s)
1663 return hash; 1737 return hash;
1664} 1738}
1665 1739
1666// finds out which hash names are sorted with 1740/* finds out which hash names are sorted with */
1667static int what_hash(struct super_block *s) 1741static int what_hash(struct super_block *s)
1668{ 1742{
1669 __u32 code; 1743 __u32 code;
1670 1744
1671 code = sb_hash_function_code(SB_DISK_SUPER_BLOCK(s)); 1745 code = sb_hash_function_code(SB_DISK_SUPER_BLOCK(s));
1672 1746
1673 /* reiserfs_hash_detect() == true if any of the hash mount options 1747 /*
1674 ** were used. We must check them to make sure the user isn't 1748 * reiserfs_hash_detect() == true if any of the hash mount options
1675 ** using a bad hash value 1749 * were used. We must check them to make sure the user isn't
1750 * using a bad hash value
1676 */ 1751 */
1677 if (code == UNSET_HASH || reiserfs_hash_detect(s)) 1752 if (code == UNSET_HASH || reiserfs_hash_detect(s))
1678 code = find_hash_out(s); 1753 code = find_hash_out(s);
1679 1754
1680 if (code != UNSET_HASH && reiserfs_hash_detect(s)) { 1755 if (code != UNSET_HASH && reiserfs_hash_detect(s)) {
1681 /* detection has found the hash, and we must check against the 1756 /*
1682 ** mount options 1757 * detection has found the hash, and we must check against the
1758 * mount options
1683 */ 1759 */
1684 if (reiserfs_rupasov_hash(s) && code != YURA_HASH) { 1760 if (reiserfs_rupasov_hash(s) && code != YURA_HASH) {
1685 reiserfs_warning(s, "reiserfs-2507", 1761 reiserfs_warning(s, "reiserfs-2507",
@@ -1701,7 +1777,10 @@ static int what_hash(struct super_block *s)
1701 code = UNSET_HASH; 1777 code = UNSET_HASH;
1702 } 1778 }
1703 } else { 1779 } else {
1704 /* find_hash_out was not called or could not determine the hash */ 1780 /*
1781 * find_hash_out was not called or
1782 * could not determine the hash
1783 */
1705 if (reiserfs_rupasov_hash(s)) { 1784 if (reiserfs_rupasov_hash(s)) {
1706 code = YURA_HASH; 1785 code = YURA_HASH;
1707 } else if (reiserfs_tea_hash(s)) { 1786 } else if (reiserfs_tea_hash(s)) {
@@ -1711,8 +1790,9 @@ static int what_hash(struct super_block *s)
1711 } 1790 }
1712 } 1791 }
1713 1792
1714 /* if we are mounted RW, and we have a new valid hash code, update 1793 /*
1715 ** the super 1794 * if we are mounted RW, and we have a new valid hash code, update
1795 * the super
1716 */ 1796 */
1717 if (code != UNSET_HASH && 1797 if (code != UNSET_HASH &&
1718 !(s->s_flags & MS_RDONLY) && 1798 !(s->s_flags & MS_RDONLY) &&
@@ -1722,7 +1802,7 @@ static int what_hash(struct super_block *s)
1722 return code; 1802 return code;
1723} 1803}
1724 1804
1725// return pointer to appropriate function 1805/* return pointer to appropriate function */
1726static hashf_t hash_function(struct super_block *s) 1806static hashf_t hash_function(struct super_block *s)
1727{ 1807{
1728 switch (what_hash(s)) { 1808 switch (what_hash(s)) {
@@ -1739,7 +1819,7 @@ static hashf_t hash_function(struct super_block *s)
1739 return NULL; 1819 return NULL;
1740} 1820}
1741 1821
1742// this is used to set up correct value for old partitions 1822/* this is used to set up correct value for old partitions */
1743static int function2code(hashf_t func) 1823static int function2code(hashf_t func)
1744{ 1824{
1745 if (func == keyed_hash) 1825 if (func == keyed_hash)
@@ -1749,7 +1829,7 @@ static int function2code(hashf_t func)
1749 if (func == r5_hash) 1829 if (func == r5_hash)
1750 return R5_HASH; 1830 return R5_HASH;
1751 1831
1752 BUG(); // should never happen 1832 BUG(); /* should never happen */
1753 1833
1754 return 0; 1834 return 0;
1755} 1835}
@@ -1784,8 +1864,7 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent)
1784 sbi->s_mount_opt |= (1 << REISERFS_SMALLTAIL); 1864 sbi->s_mount_opt |= (1 << REISERFS_SMALLTAIL);
1785 sbi->s_mount_opt |= (1 << REISERFS_ERROR_RO); 1865 sbi->s_mount_opt |= (1 << REISERFS_ERROR_RO);
1786 sbi->s_mount_opt |= (1 << REISERFS_BARRIER_FLUSH); 1866 sbi->s_mount_opt |= (1 << REISERFS_BARRIER_FLUSH);
1787 /* no preallocation minimum, be smart in 1867 /* no preallocation minimum, be smart in reiserfs_file_write instead */
1788 reiserfs_file_write instead */
1789 sbi->s_alloc_options.preallocmin = 0; 1868 sbi->s_alloc_options.preallocmin = 0;
1790 /* Preallocate by 16 blocks (17-1) at once */ 1869 /* Preallocate by 16 blocks (17-1) at once */
1791 sbi->s_alloc_options.preallocsize = 17; 1870 sbi->s_alloc_options.preallocsize = 17;
@@ -1828,10 +1907,17 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent)
1828 goto error_unlocked; 1907 goto error_unlocked;
1829 } 1908 }
1830 1909
1831 /* try old format (undistributed bitmap, super block in 8-th 1k block of a device) */ 1910 /*
1911 * try old format (undistributed bitmap, super block in 8-th 1k
1912 * block of a device)
1913 */
1832 if (!read_super_block(s, REISERFS_OLD_DISK_OFFSET_IN_BYTES)) 1914 if (!read_super_block(s, REISERFS_OLD_DISK_OFFSET_IN_BYTES))
1833 old_format = 1; 1915 old_format = 1;
1834 /* try new format (64-th 1k block), which can contain reiserfs super block */ 1916
1917 /*
1918 * try new format (64-th 1k block), which can contain reiserfs
1919 * super block
1920 */
1835 else if (read_super_block(s, REISERFS_DISK_OFFSET_IN_BYTES)) { 1921 else if (read_super_block(s, REISERFS_DISK_OFFSET_IN_BYTES)) {
1836 SWARN(silent, s, "sh-2021", "can not find reiserfs on %s", 1922 SWARN(silent, s, "sh-2021", "can not find reiserfs on %s",
1837 s->s_id); 1923 s->s_id);
@@ -1839,9 +1925,11 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent)
1839 } 1925 }
1840 1926
1841 rs = SB_DISK_SUPER_BLOCK(s); 1927 rs = SB_DISK_SUPER_BLOCK(s);
1842 /* Let's do basic sanity check to verify that underlying device is not 1928 /*
1843 smaller than the filesystem. If the check fails then abort and scream, 1929 * Let's do basic sanity check to verify that underlying device is not
1844 because bad stuff will happen otherwise. */ 1930 * smaller than the filesystem. If the check fails then abort and
1931 * scream, because bad stuff will happen otherwise.
1932 */
1845 if (s->s_bdev && s->s_bdev->bd_inode 1933 if (s->s_bdev && s->s_bdev->bd_inode
1846 && i_size_read(s->s_bdev->bd_inode) < 1934 && i_size_read(s->s_bdev->bd_inode) <
1847 sb_block_count(rs) * sb_blocksize(rs)) { 1935 sb_block_count(rs) * sb_blocksize(rs)) {
@@ -1885,15 +1973,16 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent)
1885 printk("reiserfs: using flush barriers\n"); 1973 printk("reiserfs: using flush barriers\n");
1886 } 1974 }
1887 1975
1888 // set_device_ro(s->s_dev, 1) ;
1889 if (journal_init(s, jdev_name, old_format, commit_max_age)) { 1976 if (journal_init(s, jdev_name, old_format, commit_max_age)) {
1890 SWARN(silent, s, "sh-2022", 1977 SWARN(silent, s, "sh-2022",
1891 "unable to initialize journal space"); 1978 "unable to initialize journal space");
1892 goto error_unlocked; 1979 goto error_unlocked;
1893 } else { 1980 } else {
1894 jinit_done = 1; /* once this is set, journal_release must be called 1981 /*
1895 ** if we error out of the mount 1982 * once this is set, journal_release must be called
1896 */ 1983 * if we error out of the mount
1984 */
1985 jinit_done = 1;
1897 } 1986 }
1898 1987
1899 if (reread_meta_blocks(s)) { 1988 if (reread_meta_blocks(s)) {
@@ -1938,7 +2027,7 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent)
1938 s->s_root = d_make_root(root_inode); 2027 s->s_root = d_make_root(root_inode);
1939 if (!s->s_root) 2028 if (!s->s_root)
1940 goto error; 2029 goto error;
1941 // define and initialize hash function 2030 /* define and initialize hash function */
1942 sbi->s_hash_function = hash_function(s); 2031 sbi->s_hash_function = hash_function(s);
1943 if (sbi->s_hash_function == NULL) { 2032 if (sbi->s_hash_function == NULL) {
1944 dput(s->s_root); 2033 dput(s->s_root);
@@ -1967,10 +2056,12 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent)
1967 set_sb_umount_state(rs, REISERFS_ERROR_FS); 2056 set_sb_umount_state(rs, REISERFS_ERROR_FS);
1968 set_sb_fs_state(rs, 0); 2057 set_sb_fs_state(rs, 0);
1969 2058
1970 /* Clear out s_bmap_nr if it would wrap. We can handle this 2059 /*
2060 * Clear out s_bmap_nr if it would wrap. We can handle this
1971 * case, but older revisions can't. This will cause the 2061 * case, but older revisions can't. This will cause the
1972 * file system to fail mount on those older implementations, 2062 * file system to fail mount on those older implementations,
1973 * avoiding corruption. -jeffm */ 2063 * avoiding corruption. -jeffm
2064 */
1974 if (bmap_would_wrap(reiserfs_bmap_count(s)) && 2065 if (bmap_would_wrap(reiserfs_bmap_count(s)) &&
1975 sb_bmap_nr(rs) != 0) { 2066 sb_bmap_nr(rs) != 0) {
1976 reiserfs_warning(s, "super-2030", "This file system " 2067 reiserfs_warning(s, "super-2030", "This file system "
@@ -1983,8 +2074,10 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent)
1983 } 2074 }
1984 2075
1985 if (old_format_only(s)) { 2076 if (old_format_only(s)) {
1986 /* filesystem of format 3.5 either with standard or non-standard 2077 /*
1987 journal */ 2078 * filesystem of format 3.5 either with standard
2079 * or non-standard journal
2080 */
1988 if (convert_reiserfs(s)) { 2081 if (convert_reiserfs(s)) {
1989 /* and -o conv is given */ 2082 /* and -o conv is given */
1990 if (!silent) 2083 if (!silent)
@@ -1992,8 +2085,11 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent)
1992 "converting 3.5 filesystem to the 3.6 format"); 2085 "converting 3.5 filesystem to the 3.6 format");
1993 2086
1994 if (is_reiserfs_3_5(rs)) 2087 if (is_reiserfs_3_5(rs))
1995 /* put magic string of 3.6 format. 2.2 will not be able to 2088 /*
1996 mount this filesystem anymore */ 2089 * put magic string of 3.6 format.
2090 * 2.2 will not be able to
2091 * mount this filesystem anymore
2092 */
1997 memcpy(rs->s_v1.s_magic, 2093 memcpy(rs->s_v1.s_magic,
1998 reiserfs_3_6_magic_string, 2094 reiserfs_3_6_magic_string,
1999 sizeof 2095 sizeof
@@ -2027,7 +2123,9 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent)
2027 } 2123 }
2028 reiserfs_write_lock(s); 2124 reiserfs_write_lock(s);
2029 2125
2030 /* look for files which were to be removed in previous session */ 2126 /*
2127 * look for files which were to be removed in previous session
2128 */
2031 finish_unfinished(s); 2129 finish_unfinished(s);
2032 } else { 2130 } else {
2033 if (old_format_only(s) && !silent) { 2131 if (old_format_only(s) && !silent) {
@@ -2043,7 +2141,9 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent)
2043 } 2141 }
2044 reiserfs_write_lock(s); 2142 reiserfs_write_lock(s);
2045 } 2143 }
2046 // mark hash in super block: it could be unset. overwrite should be ok 2144 /*
2145 * mark hash in super block: it could be unset. overwrite should be ok
2146 */
2047 set_sb_hash_function_code(rs, function2code(sbi->s_hash_function)); 2147 set_sb_hash_function_code(rs, function2code(sbi->s_hash_function));
2048 2148
2049 handle_attrs(s); 2149 handle_attrs(s);
@@ -2247,7 +2347,10 @@ static int reiserfs_quota_on(struct super_block *sb, int type, int format_id,
2247 goto out; 2347 goto out;
2248 } 2348 }
2249 inode = path->dentry->d_inode; 2349 inode = path->dentry->d_inode;
2250 /* We must not pack tails for quota files on reiserfs for quota IO to work */ 2350 /*
2351 * We must not pack tails for quota files on reiserfs for quota
2352 * IO to work
2353 */
2251 if (!(REISERFS_I(inode)->i_flags & i_nopack_mask)) { 2354 if (!(REISERFS_I(inode)->i_flags & i_nopack_mask)) {
2252 err = reiserfs_unpack(inode, NULL); 2355 err = reiserfs_unpack(inode, NULL);
2253 if (err) { 2356 if (err) {
@@ -2288,10 +2391,12 @@ out:
2288 return err; 2391 return err;
2289} 2392}
2290 2393
2291/* Read data from quotafile - avoid pagecache and such because we cannot afford 2394/*
2395 * Read data from quotafile - avoid pagecache and such because we cannot afford
2292 * acquiring the locks... As quota files are never truncated and quota code 2396 * acquiring the locks... As quota files are never truncated and quota code
2293 * itself serializes the operations (and no one else should touch the files) 2397 * itself serializes the operations (and no one else should touch the files)
2294 * we don't have to be afraid of races */ 2398 * we don't have to be afraid of races
2399 */
2295static ssize_t reiserfs_quota_read(struct super_block *sb, int type, char *data, 2400static ssize_t reiserfs_quota_read(struct super_block *sb, int type, char *data,
2296 size_t len, loff_t off) 2401 size_t len, loff_t off)
2297{ 2402{
@@ -2312,7 +2417,10 @@ static ssize_t reiserfs_quota_read(struct super_block *sb, int type, char *data,
2312 sb->s_blocksize - offset < 2417 sb->s_blocksize - offset <
2313 toread ? sb->s_blocksize - offset : toread; 2418 toread ? sb->s_blocksize - offset : toread;
2314 tmp_bh.b_state = 0; 2419 tmp_bh.b_state = 0;
2315 /* Quota files are without tails so we can safely use this function */ 2420 /*
2421 * Quota files are without tails so we can safely
2422 * use this function
2423 */
2316 reiserfs_write_lock(sb); 2424 reiserfs_write_lock(sb);
2317 err = reiserfs_get_block(inode, blk, &tmp_bh, 0); 2425 err = reiserfs_get_block(inode, blk, &tmp_bh, 0);
2318 reiserfs_write_unlock(sb); 2426 reiserfs_write_unlock(sb);
@@ -2335,8 +2443,10 @@ static ssize_t reiserfs_quota_read(struct super_block *sb, int type, char *data,
2335 return len; 2443 return len;
2336} 2444}
2337 2445
2338/* Write to quotafile (we know the transaction is already started and has 2446/*
2339 * enough credits) */ 2447 * Write to quotafile (we know the transaction is already started and has
2448 * enough credits)
2449 */
2340static ssize_t reiserfs_quota_write(struct super_block *sb, int type, 2450static ssize_t reiserfs_quota_write(struct super_block *sb, int type,
2341 const char *data, size_t len, loff_t off) 2451 const char *data, size_t len, loff_t off)
2342{ 2452{
diff --git a/fs/reiserfs/tail_conversion.c b/fs/reiserfs/tail_conversion.c
index fc1981d858dc..f41e19b4bb42 100644
--- a/fs/reiserfs/tail_conversion.c
+++ b/fs/reiserfs/tail_conversion.c
@@ -1,5 +1,6 @@
1/* 1/*
2 * Copyright 1999 Hans Reiser, see reiserfs/README for licensing and copyright details 2 * Copyright 1999 Hans Reiser, see reiserfs/README for licensing and copyright
3 * details
3 */ 4 */
4 5
5#include <linux/time.h> 6#include <linux/time.h>
@@ -7,13 +8,19 @@
7#include <linux/buffer_head.h> 8#include <linux/buffer_head.h>
8#include "reiserfs.h" 9#include "reiserfs.h"
9 10
10/* access to tail : when one is going to read tail it must make sure, that is not running. 11/*
11 direct2indirect and indirect2direct can not run concurrently */ 12 * access to tail : when one is going to read tail it must make sure, that is
13 * not running. direct2indirect and indirect2direct can not run concurrently
14 */
12 15
13/* Converts direct items to an unformatted node. Panics if file has no 16/*
14 tail. -ENOSPC if no disk space for conversion */ 17 * Converts direct items to an unformatted node. Panics if file has no
15/* path points to first direct item of the file regarless of how many of 18 * tail. -ENOSPC if no disk space for conversion
16 them are there */ 19 */
20/*
21 * path points to first direct item of the file regardless of how many of
22 * them are there
23 */
17int direct2indirect(struct reiserfs_transaction_handle *th, struct inode *inode, 24int direct2indirect(struct reiserfs_transaction_handle *th, struct inode *inode,
18 struct treepath *path, struct buffer_head *unbh, 25 struct treepath *path, struct buffer_head *unbh,
19 loff_t tail_offset) 26 loff_t tail_offset)
@@ -22,14 +29,20 @@ int direct2indirect(struct reiserfs_transaction_handle *th, struct inode *inode,
22 struct buffer_head *up_to_date_bh; 29 struct buffer_head *up_to_date_bh;
23 struct item_head *p_le_ih = tp_item_head(path); 30 struct item_head *p_le_ih = tp_item_head(path);
24 unsigned long total_tail = 0; 31 unsigned long total_tail = 0;
25 struct cpu_key end_key; /* Key to search for the last byte of the 32
26 converted item. */ 33 /* Key to search for the last byte of the converted item. */
27 struct item_head ind_ih; /* new indirect item to be inserted or 34 struct cpu_key end_key;
28 key of unfm pointer to be pasted */ 35
29 int blk_size, retval; /* returned value for reiserfs_insert_item and clones */ 36 /*
30 unp_t unfm_ptr; /* Handle on an unformatted node 37 * new indirect item to be inserted or key
31 that will be inserted in the 38 * of unfm pointer to be pasted
32 tree. */ 39 */
40 struct item_head ind_ih;
41 int blk_size;
42 /* returned value for reiserfs_insert_item and clones */
43 int retval;
44 /* Handle on an unformatted node that will be inserted in the tree. */
45 unp_t unfm_ptr;
33 46
34 BUG_ON(!th->t_trans_id); 47 BUG_ON(!th->t_trans_id);
35 48
@@ -37,8 +50,10 @@ int direct2indirect(struct reiserfs_transaction_handle *th, struct inode *inode,
37 50
38 blk_size = sb->s_blocksize; 51 blk_size = sb->s_blocksize;
39 52
40 /* and key to search for append or insert pointer to the new 53 /*
41 unformatted node. */ 54 * and key to search for append or insert pointer to the new
55 * unformatted node.
56 */
42 copy_item_head(&ind_ih, p_le_ih); 57 copy_item_head(&ind_ih, p_le_ih);
43 set_le_ih_k_offset(&ind_ih, tail_offset); 58 set_le_ih_k_offset(&ind_ih, tail_offset);
44 set_le_ih_k_type(&ind_ih, TYPE_INDIRECT); 59 set_le_ih_k_type(&ind_ih, TYPE_INDIRECT);
@@ -76,20 +91,26 @@ int direct2indirect(struct reiserfs_transaction_handle *th, struct inode *inode,
76 if (retval) { 91 if (retval) {
77 return retval; 92 return retval;
78 } 93 }
79 // note: from here there are two keys which have matching first 94 /*
80 // three key components. They only differ by the fourth one. 95 * note: from here there are two keys which have matching first
96 * three key components. They only differ by the fourth one.
97 */
81 98
82 /* Set the key to search for the direct items of the file */ 99 /* Set the key to search for the direct items of the file */
83 make_cpu_key(&end_key, inode, max_reiserfs_offset(inode), TYPE_DIRECT, 100 make_cpu_key(&end_key, inode, max_reiserfs_offset(inode), TYPE_DIRECT,
84 4); 101 4);
85 102
86 /* Move bytes from the direct items to the new unformatted node 103 /*
87 and delete them. */ 104 * Move bytes from the direct items to the new unformatted node
105 * and delete them.
106 */
88 while (1) { 107 while (1) {
89 int tail_size; 108 int tail_size;
90 109
91 /* end_key.k_offset is set so, that we will always have found 110 /*
92 last item of the file */ 111 * end_key.k_offset is set so, that we will always have found
112 * last item of the file
113 */
93 if (search_for_position_by_key(sb, &end_key, path) == 114 if (search_for_position_by_key(sb, &end_key, path) ==
94 POSITION_FOUND) 115 POSITION_FOUND)
95 reiserfs_panic(sb, "PAP-14050", 116 reiserfs_panic(sb, "PAP-14050",
@@ -101,11 +122,12 @@ int direct2indirect(struct reiserfs_transaction_handle *th, struct inode *inode,
101 tail_size = (le_ih_k_offset(p_le_ih) & (blk_size - 1)) 122 tail_size = (le_ih_k_offset(p_le_ih) & (blk_size - 1))
102 + ih_item_len(p_le_ih) - 1; 123 + ih_item_len(p_le_ih) - 1;
103 124
104 /* we only send the unbh pointer if the buffer is not up to date. 125 /*
105 ** this avoids overwriting good data from writepage() with old data 126 * we only send the unbh pointer if the buffer is not
106 ** from the disk or buffer cache 127 * up to date. this avoids overwriting good data from
107 ** Special case: unbh->b_page will be NULL if we are coming through 128 * writepage() with old data from the disk or buffer cache
108 ** DIRECT_IO handler here. 129 * Special case: unbh->b_page will be NULL if we are coming
130 * through DIRECT_IO handler here.
109 */ 131 */
110 if (!unbh->b_page || buffer_uptodate(unbh) 132 if (!unbh->b_page || buffer_uptodate(unbh)
111 || PageUptodate(unbh->b_page)) { 133 || PageUptodate(unbh->b_page)) {
@@ -117,13 +139,15 @@ int direct2indirect(struct reiserfs_transaction_handle *th, struct inode *inode,
117 up_to_date_bh); 139 up_to_date_bh);
118 140
119 total_tail += retval; 141 total_tail += retval;
142
143 /* done: file does not have direct items anymore */
120 if (tail_size == retval) 144 if (tail_size == retval)
121 // done: file does not have direct items anymore
122 break; 145 break;
123 146
124 } 147 }
125 /* if we've copied bytes from disk into the page, we need to zero 148 /*
126 ** out the unused part of the block (it was not up to date before) 149 * if we've copied bytes from disk into the page, we need to zero
150 * out the unused part of the block (it was not up to date before)
127 */ 151 */
128 if (up_to_date_bh) { 152 if (up_to_date_bh) {
129 unsigned pgoff = 153 unsigned pgoff =
@@ -146,9 +170,11 @@ void reiserfs_unmap_buffer(struct buffer_head *bh)
146 BUG(); 170 BUG();
147 } 171 }
148 clear_buffer_dirty(bh); 172 clear_buffer_dirty(bh);
149 /* Remove the buffer from whatever list it belongs to. We are mostly 173 /*
150 interested in removing it from per-sb j_dirty_buffers list, to avoid 174 * Remove the buffer from whatever list it belongs to. We are mostly
151 BUG() on attempt to write not mapped buffer */ 175 * interested in removing it from per-sb j_dirty_buffers list, to avoid
176 * BUG() on attempt to write not mapped buffer
177 */
152 if ((!list_empty(&bh->b_assoc_buffers) || bh->b_private) && bh->b_page) { 178 if ((!list_empty(&bh->b_assoc_buffers) || bh->b_private) && bh->b_page) {
153 struct inode *inode = bh->b_page->mapping->host; 179 struct inode *inode = bh->b_page->mapping->host;
154 struct reiserfs_journal *j = SB_JOURNAL(inode->i_sb); 180 struct reiserfs_journal *j = SB_JOURNAL(inode->i_sb);
@@ -164,12 +190,14 @@ void reiserfs_unmap_buffer(struct buffer_head *bh)
164 unlock_buffer(bh); 190 unlock_buffer(bh);
165} 191}
166 192
167/* this first locks inode (neither reads nor sync are permitted), 193/*
168 reads tail through page cache, insert direct item. When direct item 194 * this first locks inode (neither reads nor sync are permitted),
169 inserted successfully inode is left locked. Return value is always 195 * reads tail through page cache, insert direct item. When direct item
170 what we expect from it (number of cut bytes). But when tail remains 196 * inserted successfully inode is left locked. Return value is always
171 in the unformatted node, we set mode to SKIP_BALANCING and unlock 197 * what we expect from it (number of cut bytes). But when tail remains
172 inode */ 198 * in the unformatted node, we set mode to SKIP_BALANCING and unlock
199 * inode
200 */
173int indirect2direct(struct reiserfs_transaction_handle *th, 201int indirect2direct(struct reiserfs_transaction_handle *th,
174 struct inode *inode, struct page *page, 202 struct inode *inode, struct page *page,
175 struct treepath *path, /* path to the indirect item. */ 203 struct treepath *path, /* path to the indirect item. */
@@ -207,9 +235,11 @@ int indirect2direct(struct reiserfs_transaction_handle *th,
207 1) * sb->s_blocksize; 235 1) * sb->s_blocksize;
208 pos1 = pos; 236 pos1 = pos;
209 237
210 // we are protected by i_mutex. The tail can not disapper, not 238 /*
211 // append can be done either 239 * we are protected by i_mutex. The tail can not disapper, not
212 // we are in truncate or packing tail in file_release 240 * append can be done either
241 * we are in truncate or packing tail in file_release
242 */
213 243
214 tail = (char *)kmap(page); /* this can schedule */ 244 tail = (char *)kmap(page); /* this can schedule */
215 245
@@ -236,9 +266,10 @@ int indirect2direct(struct reiserfs_transaction_handle *th,
236 pos1 + 1, TYPE_DIRECT, round_tail_len, 266 pos1 + 1, TYPE_DIRECT, round_tail_len,
237 0xffff /*ih_free_space */ ); 267 0xffff /*ih_free_space */ );
238 268
239 /* we want a pointer to the first byte of the tail in the page. 269 /*
240 ** the page was locked and this part of the page was up to date when 270 * we want a pointer to the first byte of the tail in the page.
241 ** indirect2direct was called, so we know the bytes are still valid 271 * the page was locked and this part of the page was up to date when
272 * indirect2direct was called, so we know the bytes are still valid
242 */ 273 */
243 tail = tail + (pos & (PAGE_CACHE_SIZE - 1)); 274 tail = tail + (pos & (PAGE_CACHE_SIZE - 1));
244 275
@@ -250,12 +281,14 @@ int indirect2direct(struct reiserfs_transaction_handle *th,
250 /* Insert tail as new direct item in the tree */ 281 /* Insert tail as new direct item in the tree */
251 if (reiserfs_insert_item(th, path, &key, &s_ih, inode, 282 if (reiserfs_insert_item(th, path, &key, &s_ih, inode,
252 tail ? tail : NULL) < 0) { 283 tail ? tail : NULL) < 0) {
253 /* No disk memory. So we can not convert last unformatted node 284 /*
254 to the direct item. In this case we used to adjust 285 * No disk memory. So we can not convert last unformatted node
255 indirect items's ih_free_space. Now ih_free_space is not 286 * to the direct item. In this case we used to adjust
256 used, it would be ideal to write zeros to corresponding 287 * indirect items's ih_free_space. Now ih_free_space is not
257 unformatted node. For now i_size is considered as guard for 288 * used, it would be ideal to write zeros to corresponding
258 going out of file size */ 289 * unformatted node. For now i_size is considered as guard for
290 * going out of file size
291 */
259 kunmap(page); 292 kunmap(page);
260 return block_size - round_tail_len; 293 return block_size - round_tail_len;
261 } 294 }
@@ -264,12 +297,16 @@ int indirect2direct(struct reiserfs_transaction_handle *th,
264 /* make sure to get the i_blocks changes from reiserfs_insert_item */ 297 /* make sure to get the i_blocks changes from reiserfs_insert_item */
265 reiserfs_update_sd(th, inode); 298 reiserfs_update_sd(th, inode);
266 299
267 // note: we have now the same as in above direct2indirect 300 /*
268 // conversion: there are two keys which have matching first three 301 * note: we have now the same as in above direct2indirect
269 // key components. They only differ by the fouhth one. 302 * conversion: there are two keys which have matching first three
303 * key components. They only differ by the fourth one.
304 */
270 305
271 /* We have inserted new direct item and must remove last 306 /*
272 unformatted node. */ 307 * We have inserted new direct item and must remove last
308 * unformatted node.
309 */
273 *mode = M_CUT; 310 *mode = M_CUT;
274 311
275 /* we store position of first direct item in the in-core inode */ 312 /* we store position of first direct item in the in-core inode */
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index 5cdfbd638b5c..f669990376af 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c
@@ -56,9 +56,11 @@
56#define XAROOT_NAME "xattrs" 56#define XAROOT_NAME "xattrs"
57 57
58 58
59/* Helpers for inode ops. We do this so that we don't have all the VFS 59/*
60 * Helpers for inode ops. We do this so that we don't have all the VFS
60 * overhead and also for proper i_mutex annotation. 61 * overhead and also for proper i_mutex annotation.
61 * dir->i_mutex must be held for all of them. */ 62 * dir->i_mutex must be held for all of them.
63 */
62#ifdef CONFIG_REISERFS_FS_XATTR 64#ifdef CONFIG_REISERFS_FS_XATTR
63static int xattr_create(struct inode *dir, struct dentry *dentry, int mode) 65static int xattr_create(struct inode *dir, struct dentry *dentry, int mode)
64{ 66{
@@ -73,10 +75,12 @@ static int xattr_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
73 return dir->i_op->mkdir(dir, dentry, mode); 75 return dir->i_op->mkdir(dir, dentry, mode);
74} 76}
75 77
76/* We use I_MUTEX_CHILD here to silence lockdep. It's safe because xattr 78/*
79 * We use I_MUTEX_CHILD here to silence lockdep. It's safe because xattr
77 * mutation ops aren't called during rename or splace, which are the 80 * mutation ops aren't called during rename or splace, which are the
78 * only other users of I_MUTEX_CHILD. It violates the ordering, but that's 81 * only other users of I_MUTEX_CHILD. It violates the ordering, but that's
79 * better than allocating another subclass just for this code. */ 82 * better than allocating another subclass just for this code.
83 */
80static int xattr_unlink(struct inode *dir, struct dentry *dentry) 84static int xattr_unlink(struct inode *dir, struct dentry *dentry)
81{ 85{
82 int error; 86 int error;
@@ -166,9 +170,11 @@ static struct dentry *open_xa_dir(const struct inode *inode, int flags)
166 return xadir; 170 return xadir;
167} 171}
168 172
169/* The following are side effects of other operations that aren't explicitly 173/*
174 * The following are side effects of other operations that aren't explicitly
170 * modifying extended attributes. This includes operations such as permissions 175 * modifying extended attributes. This includes operations such as permissions
171 * or ownership changes, object deletions, etc. */ 176 * or ownership changes, object deletions, etc.
177 */
172struct reiserfs_dentry_buf { 178struct reiserfs_dentry_buf {
173 struct dir_context ctx; 179 struct dir_context ctx;
174 struct dentry *xadir; 180 struct dentry *xadir;
@@ -267,11 +273,13 @@ static int reiserfs_for_each_xattr(struct inode *inode,
267 cleanup_dentry_buf(&buf); 273 cleanup_dentry_buf(&buf);
268 274
269 if (!err) { 275 if (!err) {
270 /* We start a transaction here to avoid a ABBA situation 276 /*
277 * We start a transaction here to avoid a ABBA situation
271 * between the xattr root's i_mutex and the journal lock. 278 * between the xattr root's i_mutex and the journal lock.
272 * This doesn't incur much additional overhead since the 279 * This doesn't incur much additional overhead since the
273 * new transaction will just nest inside the 280 * new transaction will just nest inside the
274 * outer transaction. */ 281 * outer transaction.
282 */
275 int blocks = JOURNAL_PER_BALANCE_CNT * 2 + 2 + 283 int blocks = JOURNAL_PER_BALANCE_CNT * 2 + 2 +
276 4 * REISERFS_QUOTA_TRANS_BLOCKS(inode->i_sb); 284 4 * REISERFS_QUOTA_TRANS_BLOCKS(inode->i_sb);
277 struct reiserfs_transaction_handle th; 285 struct reiserfs_transaction_handle th;
@@ -349,9 +357,11 @@ int reiserfs_chown_xattrs(struct inode *inode, struct iattr *attrs)
349} 357}
350 358
351#ifdef CONFIG_REISERFS_FS_XATTR 359#ifdef CONFIG_REISERFS_FS_XATTR
352/* Returns a dentry corresponding to a specific extended attribute file 360/*
361 * Returns a dentry corresponding to a specific extended attribute file
353 * for the inode. If flags allow, the file is created. Otherwise, a 362 * for the inode. If flags allow, the file is created. Otherwise, a
354 * valid or negative dentry, or an error is returned. */ 363 * valid or negative dentry, or an error is returned.
364 */
355static struct dentry *xattr_lookup(struct inode *inode, const char *name, 365static struct dentry *xattr_lookup(struct inode *inode, const char *name,
356 int flags) 366 int flags)
357{ 367{
@@ -400,8 +410,10 @@ static struct page *reiserfs_get_page(struct inode *dir, size_t n)
400{ 410{
401 struct address_space *mapping = dir->i_mapping; 411 struct address_space *mapping = dir->i_mapping;
402 struct page *page; 412 struct page *page;
403 /* We can deadlock if we try to free dentries, 413 /*
404 and an unlink/rmdir has just occurred - GFP_NOFS avoids this */ 414 * We can deadlock if we try to free dentries,
415 * and an unlink/rmdir has just occurred - GFP_NOFS avoids this
416 */
405 mapping_set_gfp_mask(mapping, GFP_NOFS); 417 mapping_set_gfp_mask(mapping, GFP_NOFS);
406 page = read_mapping_page(mapping, n >> PAGE_CACHE_SHIFT, NULL); 418 page = read_mapping_page(mapping, n >> PAGE_CACHE_SHIFT, NULL);
407 if (!IS_ERR(page)) { 419 if (!IS_ERR(page)) {
@@ -615,8 +627,10 @@ reiserfs_xattr_get(struct inode *inode, const char *name, void *buffer,
615 if (name == NULL) 627 if (name == NULL)
616 return -EINVAL; 628 return -EINVAL;
617 629
618 /* We can't have xattrs attached to v1 items since they don't have 630 /*
619 * generation numbers */ 631 * We can't have xattrs attached to v1 items since they don't have
632 * generation numbers
633 */
620 if (get_inode_sd_version(inode) == STAT_DATA_V1) 634 if (get_inode_sd_version(inode) == STAT_DATA_V1)
621 return -EOPNOTSUPP; 635 return -EOPNOTSUPP;
622 636
@@ -913,12 +927,16 @@ static const struct xattr_handler *reiserfs_xattr_handlers[] = {
913 927
914static int xattr_mount_check(struct super_block *s) 928static int xattr_mount_check(struct super_block *s)
915{ 929{
916 /* We need generation numbers to ensure that the oid mapping is correct 930 /*
917 * v3.5 filesystems don't have them. */ 931 * We need generation numbers to ensure that the oid mapping is correct
932 * v3.5 filesystems don't have them.
933 */
918 if (old_format_only(s)) { 934 if (old_format_only(s)) {
919 if (reiserfs_xattrs_optional(s)) { 935 if (reiserfs_xattrs_optional(s)) {
920 /* Old format filesystem, but optional xattrs have 936 /*
921 * been enabled. Error out. */ 937 * Old format filesystem, but optional xattrs have
938 * been enabled. Error out.
939 */
922 reiserfs_warning(s, "jdm-2005", 940 reiserfs_warning(s, "jdm-2005",
923 "xattrs/ACLs not supported " 941 "xattrs/ACLs not supported "
924 "on pre-v3.6 format filesystems. " 942 "on pre-v3.6 format filesystems. "
@@ -972,9 +990,11 @@ int reiserfs_lookup_privroot(struct super_block *s)
972 return err; 990 return err;
973} 991}
974 992
975/* We need to take a copy of the mount flags since things like 993/*
994 * We need to take a copy of the mount flags since things like
976 * MS_RDONLY don't get set until *after* we're called. 995 * MS_RDONLY don't get set until *after* we're called.
977 * mount_flags != mount_options */ 996 * mount_flags != mount_options
997 */
978int reiserfs_xattr_init(struct super_block *s, int mount_flags) 998int reiserfs_xattr_init(struct super_block *s, int mount_flags)
979{ 999{
980 int err = 0; 1000 int err = 0;
diff --git a/fs/reiserfs/xattr.h b/fs/reiserfs/xattr.h
index f59626c5d33b..857ec7e3016f 100644
--- a/fs/reiserfs/xattr.h
+++ b/fs/reiserfs/xattr.h
@@ -61,7 +61,8 @@ static inline loff_t reiserfs_xattr_nblocks(struct inode *inode, loff_t size)
61 return ret; 61 return ret;
62} 62}
63 63
64/* We may have to create up to 3 objects: xattr root, xattr dir, xattr file. 64/*
65 * We may have to create up to 3 objects: xattr root, xattr dir, xattr file.
65 * Let's try to be smart about it. 66 * Let's try to be smart about it.
66 * xattr root: We cache it. If it's not cached, we may need to create it. 67 * xattr root: We cache it. If it's not cached, we may need to create it.
67 * xattr dir: If anything has been loaded for this inode, we can set a flag 68 * xattr dir: If anything has been loaded for this inode, we can set a flag
diff --git a/fs/reiserfs/xattr_acl.c b/fs/reiserfs/xattr_acl.c
index a6ce532402dc..a333a073bea8 100644
--- a/fs/reiserfs/xattr_acl.c
+++ b/fs/reiserfs/xattr_acl.c
@@ -25,8 +25,10 @@ reiserfs_set_acl(struct inode *inode, struct posix_acl *acl, int type)
25 int size = acl ? posix_acl_xattr_size(acl->a_count) : 0; 25 int size = acl ? posix_acl_xattr_size(acl->a_count) : 0;
26 26
27 27
28 /* Pessimism: We can't assume that anything from the xattr root up 28 /*
29 * has been created. */ 29 * Pessimism: We can't assume that anything from the xattr root up
30 * has been created.
31 */
30 32
31 jcreate_blocks = reiserfs_xattr_jcreate_nblocks(inode) + 33 jcreate_blocks = reiserfs_xattr_jcreate_nblocks(inode) +
32 reiserfs_xattr_nblocks(inode, size) * 2; 34 reiserfs_xattr_nblocks(inode, size) * 2;
@@ -208,8 +210,10 @@ struct posix_acl *reiserfs_get_acl(struct inode *inode, int type)
208 210
209 retval = reiserfs_xattr_get(inode, name, value, size); 211 retval = reiserfs_xattr_get(inode, name, value, size);
210 if (retval == -ENODATA || retval == -ENOSYS) { 212 if (retval == -ENODATA || retval == -ENOSYS) {
211 /* This shouldn't actually happen as it should have 213 /*
212 been caught above.. but just in case */ 214 * This shouldn't actually happen as it should have
215 * been caught above.. but just in case
216 */
213 acl = NULL; 217 acl = NULL;
214 } else if (retval < 0) { 218 } else if (retval < 0) {
215 acl = ERR_PTR(retval); 219 acl = ERR_PTR(retval);
@@ -290,8 +294,10 @@ __reiserfs_set_acl(struct reiserfs_transaction_handle *th, struct inode *inode,
290 return error; 294 return error;
291} 295}
292 296
293/* dir->i_mutex: locked, 297/*
294 * inode is new and not released into the wild yet */ 298 * dir->i_mutex: locked,
299 * inode is new and not released into the wild yet
300 */
295int 301int
296reiserfs_inherit_default_acl(struct reiserfs_transaction_handle *th, 302reiserfs_inherit_default_acl(struct reiserfs_transaction_handle *th,
297 struct inode *dir, struct dentry *dentry, 303 struct inode *dir, struct dentry *dentry,
@@ -304,14 +310,18 @@ reiserfs_inherit_default_acl(struct reiserfs_transaction_handle *th,
304 if (S_ISLNK(inode->i_mode)) 310 if (S_ISLNK(inode->i_mode))
305 return 0; 311 return 0;
306 312
307 /* ACLs can only be used on "new" objects, so if it's an old object 313 /*
308 * there is nothing to inherit from */ 314 * ACLs can only be used on "new" objects, so if it's an old object
315 * there is nothing to inherit from
316 */
309 if (get_inode_sd_version(dir) == STAT_DATA_V1) 317 if (get_inode_sd_version(dir) == STAT_DATA_V1)
310 goto apply_umask; 318 goto apply_umask;
311 319
312 /* Don't apply ACLs to objects in the .reiserfs_priv tree.. This 320 /*
321 * Don't apply ACLs to objects in the .reiserfs_priv tree.. This
313 * would be useless since permissions are ignored, and a pain because 322 * would be useless since permissions are ignored, and a pain because
314 * it introduces locking cycles */ 323 * it introduces locking cycles
324 */
315 if (IS_PRIVATE(dir)) { 325 if (IS_PRIVATE(dir)) {
316 inode->i_flags |= S_PRIVATE; 326 inode->i_flags |= S_PRIVATE;
317 goto apply_umask; 327 goto apply_umask;