diff options
author | Jeff Mahoney <jeffm@suse.com> | 2014-04-23 10:00:36 -0400 |
---|---|---|
committer | Jan Kara <jack@suse.cz> | 2014-05-06 16:52:19 -0400 |
commit | 098297b27d23ad9d0fc302e3417474d9342c6c14 (patch) | |
tree | 58f2054cd9933225ef1ae9c7febedc9160041af6 /fs/reiserfs | |
parent | 4cf5f7addf18ecae2ea49b11944976cbd26d5281 (diff) |
reiserfs: cleanup, reformat comments to normal kernel style
This patch reformats comments in the reiserfs code to fit in 80 columns and
to follow the style rules.
There is no functional change but it helps make my eyes bleed less.
Signed-off-by: Jeff Mahoney <jeffm@suse.com>
Signed-off-by: Jan Kara <jack@suse.cz>
Diffstat (limited to 'fs/reiserfs')
-rw-r--r-- | fs/reiserfs/bitmap.c | 237 | ||||
-rw-r--r-- | fs/reiserfs/dir.c | 77 | ||||
-rw-r--r-- | fs/reiserfs/do_balan.c | 276 | ||||
-rw-r--r-- | fs/reiserfs/file.c | 62 | ||||
-rw-r--r-- | fs/reiserfs/fix_node.c | 967 | ||||
-rw-r--r-- | fs/reiserfs/hashes.c | 15 | ||||
-rw-r--r-- | fs/reiserfs/ibalance.c | 247 | ||||
-rw-r--r-- | fs/reiserfs/inode.c | 1063 | ||||
-rw-r--r-- | fs/reiserfs/ioctl.c | 23 | ||||
-rw-r--r-- | fs/reiserfs/item_ops.c | 100 | ||||
-rw-r--r-- | fs/reiserfs/journal.c | 1127 | ||||
-rw-r--r-- | fs/reiserfs/lbalance.c | 349 | ||||
-rw-r--r-- | fs/reiserfs/namei.c | 420 | ||||
-rw-r--r-- | fs/reiserfs/objectid.c | 95 | ||||
-rw-r--r-- | fs/reiserfs/prints.c | 152 | ||||
-rw-r--r-- | fs/reiserfs/reiserfs.h | 1740 | ||||
-rw-r--r-- | fs/reiserfs/resize.c | 63 | ||||
-rw-r--r-- | fs/reiserfs/stree.c | 812 | ||||
-rw-r--r-- | fs/reiserfs/super.c | 366 | ||||
-rw-r--r-- | fs/reiserfs/tail_conversion.c | 151 | ||||
-rw-r--r-- | fs/reiserfs/xattr.c | 60 | ||||
-rw-r--r-- | fs/reiserfs/xattr.h | 3 | ||||
-rw-r--r-- | fs/reiserfs/xattr_acl.c | 30 |
23 files changed, 5124 insertions, 3311 deletions
diff --git a/fs/reiserfs/bitmap.c b/fs/reiserfs/bitmap.c index c3de6501a5cb..70daba6fa6a5 100644 --- a/fs/reiserfs/bitmap.c +++ b/fs/reiserfs/bitmap.c | |||
@@ -50,8 +50,10 @@ static inline void get_bit_address(struct super_block *s, | |||
50 | unsigned int *bmap_nr, | 50 | unsigned int *bmap_nr, |
51 | unsigned int *offset) | 51 | unsigned int *offset) |
52 | { | 52 | { |
53 | /* It is in the bitmap block number equal to the block | 53 | /* |
54 | * number divided by the number of bits in a block. */ | 54 | * It is in the bitmap block number equal to the block |
55 | * number divided by the number of bits in a block. | ||
56 | */ | ||
55 | *bmap_nr = block >> (s->s_blocksize_bits + 3); | 57 | *bmap_nr = block >> (s->s_blocksize_bits + 3); |
56 | /* Within that bitmap block it is located at bit offset *offset. */ | 58 | /* Within that bitmap block it is located at bit offset *offset. */ |
57 | *offset = block & ((s->s_blocksize << 3) - 1); | 59 | *offset = block & ((s->s_blocksize << 3) - 1); |
@@ -71,8 +73,10 @@ int is_reusable(struct super_block *s, b_blocknr_t block, int bit_value) | |||
71 | 73 | ||
72 | get_bit_address(s, block, &bmap, &offset); | 74 | get_bit_address(s, block, &bmap, &offset); |
73 | 75 | ||
74 | /* Old format filesystem? Unlikely, but the bitmaps are all up front so | 76 | /* |
75 | * we need to account for it. */ | 77 | * Old format filesystem? Unlikely, but the bitmaps are all |
78 | * up front so we need to account for it. | ||
79 | */ | ||
76 | if (unlikely(test_bit(REISERFS_OLD_FORMAT, | 80 | if (unlikely(test_bit(REISERFS_OLD_FORMAT, |
77 | &(REISERFS_SB(s)->s_properties)))) { | 81 | &(REISERFS_SB(s)->s_properties)))) { |
78 | b_blocknr_t bmap1 = REISERFS_SB(s)->s_sbh->b_blocknr + 1; | 82 | b_blocknr_t bmap1 = REISERFS_SB(s)->s_sbh->b_blocknr + 1; |
@@ -108,8 +112,11 @@ int is_reusable(struct super_block *s, b_blocknr_t block, int bit_value) | |||
108 | return 1; | 112 | return 1; |
109 | } | 113 | } |
110 | 114 | ||
111 | /* searches in journal structures for a given block number (bmap, off). If block | 115 | /* |
112 | is found in reiserfs journal it suggests next free block candidate to test. */ | 116 | * Searches in journal structures for a given block number (bmap, off). |
117 | * If block is found in reiserfs journal it suggests next free block | ||
118 | * candidate to test. | ||
119 | */ | ||
113 | static inline int is_block_in_journal(struct super_block *s, unsigned int bmap, | 120 | static inline int is_block_in_journal(struct super_block *s, unsigned int bmap, |
114 | int off, int *next) | 121 | int off, int *next) |
115 | { | 122 | { |
@@ -120,7 +127,7 @@ static inline int is_block_in_journal(struct super_block *s, unsigned int bmap, | |||
120 | *next = tmp; | 127 | *next = tmp; |
121 | PROC_INFO_INC(s, scan_bitmap.in_journal_hint); | 128 | PROC_INFO_INC(s, scan_bitmap.in_journal_hint); |
122 | } else { | 129 | } else { |
123 | (*next) = off + 1; /* inc offset to avoid looping. */ | 130 | (*next) = off + 1; /* inc offset to avoid looping. */ |
124 | PROC_INFO_INC(s, scan_bitmap.in_journal_nohint); | 131 | PROC_INFO_INC(s, scan_bitmap.in_journal_nohint); |
125 | } | 132 | } |
126 | PROC_INFO_INC(s, scan_bitmap.retry); | 133 | PROC_INFO_INC(s, scan_bitmap.retry); |
@@ -129,8 +136,10 @@ static inline int is_block_in_journal(struct super_block *s, unsigned int bmap, | |||
129 | return 0; | 136 | return 0; |
130 | } | 137 | } |
131 | 138 | ||
132 | /* it searches for a window of zero bits with given minimum and maximum lengths in one bitmap | 139 | /* |
133 | * block; */ | 140 | * Searches for a window of zero bits with given minimum and maximum |
141 | * lengths in one bitmap block | ||
142 | */ | ||
134 | static int scan_bitmap_block(struct reiserfs_transaction_handle *th, | 143 | static int scan_bitmap_block(struct reiserfs_transaction_handle *th, |
135 | unsigned int bmap_n, int *beg, int boundary, | 144 | unsigned int bmap_n, int *beg, int boundary, |
136 | int min, int max, int unfm) | 145 | int min, int max, int unfm) |
@@ -146,10 +155,6 @@ static int scan_bitmap_block(struct reiserfs_transaction_handle *th, | |||
146 | RFALSE(bmap_n >= reiserfs_bmap_count(s), "Bitmap %u is out of " | 155 | RFALSE(bmap_n >= reiserfs_bmap_count(s), "Bitmap %u is out of " |
147 | "range (0..%u)", bmap_n, reiserfs_bmap_count(s) - 1); | 156 | "range (0..%u)", bmap_n, reiserfs_bmap_count(s) - 1); |
148 | PROC_INFO_INC(s, scan_bitmap.bmap); | 157 | PROC_INFO_INC(s, scan_bitmap.bmap); |
149 | /* this is unclear and lacks comments, explain how journal bitmaps | ||
150 | work here for the reader. Convey a sense of the design here. What | ||
151 | is a window? */ | ||
152 | /* - I mean `a window of zero bits' as in description of this function - Zam. */ | ||
153 | 158 | ||
154 | if (!bi) { | 159 | if (!bi) { |
155 | reiserfs_error(s, "jdm-4055", "NULL bitmap info pointer " | 160 | reiserfs_error(s, "jdm-4055", "NULL bitmap info pointer " |
@@ -165,15 +170,18 @@ static int scan_bitmap_block(struct reiserfs_transaction_handle *th, | |||
165 | cont: | 170 | cont: |
166 | if (bi->free_count < min) { | 171 | if (bi->free_count < min) { |
167 | brelse(bh); | 172 | brelse(bh); |
168 | return 0; // No free blocks in this bitmap | 173 | return 0; /* No free blocks in this bitmap */ |
169 | } | 174 | } |
170 | 175 | ||
171 | /* search for a first zero bit -- beginning of a window */ | 176 | /* search for a first zero bit -- beginning of a window */ |
172 | *beg = reiserfs_find_next_zero_le_bit | 177 | *beg = reiserfs_find_next_zero_le_bit |
173 | ((unsigned long *)(bh->b_data), boundary, *beg); | 178 | ((unsigned long *)(bh->b_data), boundary, *beg); |
174 | 179 | ||
175 | if (*beg + min > boundary) { /* search for a zero bit fails or the rest of bitmap block | 180 | /* |
176 | * cannot contain a zero window of minimum size */ | 181 | * search for a zero bit fails or the rest of bitmap block |
182 | * cannot contain a zero window of minimum size | ||
183 | */ | ||
184 | if (*beg + min > boundary) { | ||
177 | brelse(bh); | 185 | brelse(bh); |
178 | return 0; | 186 | return 0; |
179 | } | 187 | } |
@@ -187,37 +195,63 @@ static int scan_bitmap_block(struct reiserfs_transaction_handle *th, | |||
187 | next = end; | 195 | next = end; |
188 | break; | 196 | break; |
189 | } | 197 | } |
190 | /* finding the other end of zero bit window requires looking into journal structures (in | 198 | |
191 | * case of searching for free blocks for unformatted nodes) */ | 199 | /* |
200 | * finding the other end of zero bit window requires | ||
201 | * looking into journal structures (in case of | ||
202 | * searching for free blocks for unformatted nodes) | ||
203 | */ | ||
192 | if (unfm && is_block_in_journal(s, bmap_n, end, &next)) | 204 | if (unfm && is_block_in_journal(s, bmap_n, end, &next)) |
193 | break; | 205 | break; |
194 | } | 206 | } |
195 | 207 | ||
196 | /* now (*beg) points to beginning of zero bits window, | 208 | /* |
197 | * (end) points to one bit after the window end */ | 209 | * now (*beg) points to beginning of zero bits window, |
198 | if (end - *beg >= min) { /* it seems we have found window of proper size */ | 210 | * (end) points to one bit after the window end |
211 | */ | ||
212 | |||
213 | /* found window of proper size */ | ||
214 | if (end - *beg >= min) { | ||
199 | int i; | 215 | int i; |
200 | reiserfs_prepare_for_journal(s, bh, 1); | 216 | reiserfs_prepare_for_journal(s, bh, 1); |
201 | /* try to set all blocks used checking are they still free */ | 217 | /* |
218 | * try to set all blocks used checking are | ||
219 | * they still free | ||
220 | */ | ||
202 | for (i = *beg; i < end; i++) { | 221 | for (i = *beg; i < end; i++) { |
203 | /* It seems that we should not check in journal again. */ | 222 | /* Don't check in journal again. */ |
204 | if (reiserfs_test_and_set_le_bit | 223 | if (reiserfs_test_and_set_le_bit |
205 | (i, bh->b_data)) { | 224 | (i, bh->b_data)) { |
206 | /* bit was set by another process | 225 | /* |
207 | * while we slept in prepare_for_journal() */ | 226 | * bit was set by another process while |
227 | * we slept in prepare_for_journal() | ||
228 | */ | ||
208 | PROC_INFO_INC(s, scan_bitmap.stolen); | 229 | PROC_INFO_INC(s, scan_bitmap.stolen); |
209 | if (i >= *beg + min) { /* we can continue with smaller set of allocated blocks, | 230 | |
210 | * if length of this set is more or equal to `min' */ | 231 | /* |
232 | * we can continue with smaller set | ||
233 | * of allocated blocks, if length of | ||
234 | * this set is more or equal to `min' | ||
235 | */ | ||
236 | if (i >= *beg + min) { | ||
211 | end = i; | 237 | end = i; |
212 | break; | 238 | break; |
213 | } | 239 | } |
214 | /* otherwise we clear all bit were set ... */ | 240 | |
241 | /* | ||
242 | * otherwise we clear all bit | ||
243 | * were set ... | ||
244 | */ | ||
215 | while (--i >= *beg) | 245 | while (--i >= *beg) |
216 | reiserfs_clear_le_bit | 246 | reiserfs_clear_le_bit |
217 | (i, bh->b_data); | 247 | (i, bh->b_data); |
218 | reiserfs_restore_prepared_buffer(s, bh); | 248 | reiserfs_restore_prepared_buffer(s, bh); |
219 | *beg = org; | 249 | *beg = org; |
220 | /* ... and search again in current block from beginning */ | 250 | |
251 | /* | ||
252 | * Search again in current block | ||
253 | * from beginning | ||
254 | */ | ||
221 | goto cont; | 255 | goto cont; |
222 | } | 256 | } |
223 | } | 257 | } |
@@ -268,11 +302,13 @@ static inline int block_group_used(struct super_block *s, u32 id) | |||
268 | int bm = bmap_hash_id(s, id); | 302 | int bm = bmap_hash_id(s, id); |
269 | struct reiserfs_bitmap_info *info = &SB_AP_BITMAP(s)[bm]; | 303 | struct reiserfs_bitmap_info *info = &SB_AP_BITMAP(s)[bm]; |
270 | 304 | ||
271 | /* If we don't have cached information on this bitmap block, we're | 305 | /* |
306 | * If we don't have cached information on this bitmap block, we're | ||
272 | * going to have to load it later anyway. Loading it here allows us | 307 | * going to have to load it later anyway. Loading it here allows us |
273 | * to make a better decision. This favors long-term performance gain | 308 | * to make a better decision. This favors long-term performance gain |
274 | * with a better on-disk layout vs. a short term gain of skipping the | 309 | * with a better on-disk layout vs. a short term gain of skipping the |
275 | * read and potentially having a bad placement. */ | 310 | * read and potentially having a bad placement. |
311 | */ | ||
276 | if (info->free_count == UINT_MAX) { | 312 | if (info->free_count == UINT_MAX) { |
277 | struct buffer_head *bh = reiserfs_read_bitmap_block(s, bm); | 313 | struct buffer_head *bh = reiserfs_read_bitmap_block(s, bm); |
278 | brelse(bh); | 314 | brelse(bh); |
@@ -305,17 +341,16 @@ __le32 reiserfs_choose_packing(struct inode * dir) | |||
305 | return packing; | 341 | return packing; |
306 | } | 342 | } |
307 | 343 | ||
308 | /* Tries to find contiguous zero bit window (given size) in given region of | 344 | /* |
309 | * bitmap and place new blocks there. Returns number of allocated blocks. */ | 345 | * Tries to find contiguous zero bit window (given size) in given region of |
346 | * bitmap and place new blocks there. Returns number of allocated blocks. | ||
347 | */ | ||
310 | static int scan_bitmap(struct reiserfs_transaction_handle *th, | 348 | static int scan_bitmap(struct reiserfs_transaction_handle *th, |
311 | b_blocknr_t * start, b_blocknr_t finish, | 349 | b_blocknr_t * start, b_blocknr_t finish, |
312 | int min, int max, int unfm, sector_t file_block) | 350 | int min, int max, int unfm, sector_t file_block) |
313 | { | 351 | { |
314 | int nr_allocated = 0; | 352 | int nr_allocated = 0; |
315 | struct super_block *s = th->t_super; | 353 | struct super_block *s = th->t_super; |
316 | /* find every bm and bmap and bmap_nr in this file, and change them all to bitmap_blocknr | ||
317 | * - Hans, it is not a block number - Zam. */ | ||
318 | |||
319 | unsigned int bm, off; | 354 | unsigned int bm, off; |
320 | unsigned int end_bm, end_off; | 355 | unsigned int end_bm, end_off; |
321 | unsigned int off_max = s->s_blocksize << 3; | 356 | unsigned int off_max = s->s_blocksize << 3; |
@@ -323,8 +358,10 @@ static int scan_bitmap(struct reiserfs_transaction_handle *th, | |||
323 | BUG_ON(!th->t_trans_id); | 358 | BUG_ON(!th->t_trans_id); |
324 | 359 | ||
325 | PROC_INFO_INC(s, scan_bitmap.call); | 360 | PROC_INFO_INC(s, scan_bitmap.call); |
361 | |||
362 | /* No point in looking for more free blocks */ | ||
326 | if (SB_FREE_BLOCKS(s) <= 0) | 363 | if (SB_FREE_BLOCKS(s) <= 0) |
327 | return 0; // No point in looking for more free blocks | 364 | return 0; |
328 | 365 | ||
329 | get_bit_address(s, *start, &bm, &off); | 366 | get_bit_address(s, *start, &bm, &off); |
330 | get_bit_address(s, finish, &end_bm, &end_off); | 367 | get_bit_address(s, finish, &end_bm, &end_off); |
@@ -333,7 +370,8 @@ static int scan_bitmap(struct reiserfs_transaction_handle *th, | |||
333 | if (end_bm > reiserfs_bmap_count(s)) | 370 | if (end_bm > reiserfs_bmap_count(s)) |
334 | end_bm = reiserfs_bmap_count(s); | 371 | end_bm = reiserfs_bmap_count(s); |
335 | 372 | ||
336 | /* When the bitmap is more than 10% free, anyone can allocate. | 373 | /* |
374 | * When the bitmap is more than 10% free, anyone can allocate. | ||
337 | * When it's less than 10% free, only files that already use the | 375 | * When it's less than 10% free, only files that already use the |
338 | * bitmap are allowed. Once we pass 80% full, this restriction | 376 | * bitmap are allowed. Once we pass 80% full, this restriction |
339 | * is lifted. | 377 | * is lifted. |
@@ -532,7 +570,8 @@ int reiserfs_parse_alloc_options(struct super_block *s, char *options) | |||
532 | { | 570 | { |
533 | char *this_char, *value; | 571 | char *this_char, *value; |
534 | 572 | ||
535 | REISERFS_SB(s)->s_alloc_options.bits = 0; /* clear default settings */ | 573 | /* clear default settings */ |
574 | REISERFS_SB(s)->s_alloc_options.bits = 0; | ||
536 | 575 | ||
537 | while ((this_char = strsep(&options, ":")) != NULL) { | 576 | while ((this_char = strsep(&options, ":")) != NULL) { |
538 | if ((value = strchr(this_char, '=')) != NULL) | 577 | if ((value = strchr(this_char, '=')) != NULL) |
@@ -733,7 +772,7 @@ static inline void new_hashed_relocation(reiserfs_blocknr_hint_t * hint) | |||
733 | hash_in = (char *)&hint->key.k_dir_id; | 772 | hash_in = (char *)&hint->key.k_dir_id; |
734 | } else { | 773 | } else { |
735 | if (!hint->inode) { | 774 | if (!hint->inode) { |
736 | //hint->search_start = hint->beg; | 775 | /*hint->search_start = hint->beg;*/ |
737 | hash_in = (char *)&hint->key.k_dir_id; | 776 | hash_in = (char *)&hint->key.k_dir_id; |
738 | } else | 777 | } else |
739 | if (TEST_OPTION(displace_based_on_dirid, hint->th->t_super)) | 778 | if (TEST_OPTION(displace_based_on_dirid, hint->th->t_super)) |
@@ -786,7 +825,8 @@ static void oid_groups(reiserfs_blocknr_hint_t * hint) | |||
786 | 825 | ||
787 | dirid = le32_to_cpu(INODE_PKEY(hint->inode)->k_dir_id); | 826 | dirid = le32_to_cpu(INODE_PKEY(hint->inode)->k_dir_id); |
788 | 827 | ||
789 | /* keep the root dir and it's first set of subdirs close to | 828 | /* |
829 | * keep the root dir and it's first set of subdirs close to | ||
790 | * the start of the disk | 830 | * the start of the disk |
791 | */ | 831 | */ |
792 | if (dirid <= 2) | 832 | if (dirid <= 2) |
@@ -800,7 +840,8 @@ static void oid_groups(reiserfs_blocknr_hint_t * hint) | |||
800 | } | 840 | } |
801 | } | 841 | } |
802 | 842 | ||
803 | /* returns 1 if it finds an indirect item and gets valid hint info | 843 | /* |
844 | * returns 1 if it finds an indirect item and gets valid hint info | ||
804 | * from it, otherwise 0 | 845 | * from it, otherwise 0 |
805 | */ | 846 | */ |
806 | static int get_left_neighbor(reiserfs_blocknr_hint_t * hint) | 847 | static int get_left_neighbor(reiserfs_blocknr_hint_t * hint) |
@@ -812,8 +853,11 @@ static int get_left_neighbor(reiserfs_blocknr_hint_t * hint) | |||
812 | __le32 *item; | 853 | __le32 *item; |
813 | int ret = 0; | 854 | int ret = 0; |
814 | 855 | ||
815 | if (!hint->path) /* reiserfs code can call this function w/o pointer to path | 856 | /* |
816 | * structure supplied; then we rely on supplied search_start */ | 857 | * reiserfs code can call this function w/o pointer to path |
858 | * structure supplied; then we rely on supplied search_start | ||
859 | */ | ||
860 | if (!hint->path) | ||
817 | return 0; | 861 | return 0; |
818 | 862 | ||
819 | path = hint->path; | 863 | path = hint->path; |
@@ -825,12 +869,13 @@ static int get_left_neighbor(reiserfs_blocknr_hint_t * hint) | |||
825 | 869 | ||
826 | hint->search_start = bh->b_blocknr; | 870 | hint->search_start = bh->b_blocknr; |
827 | 871 | ||
872 | /* | ||
873 | * for indirect item: go to left and look for the first non-hole entry | ||
874 | * in the indirect item | ||
875 | */ | ||
828 | if (!hint->formatted_node && is_indirect_le_ih(ih)) { | 876 | if (!hint->formatted_node && is_indirect_le_ih(ih)) { |
829 | /* for indirect item: go to left and look for the first non-hole entry | ||
830 | in the indirect item */ | ||
831 | if (pos_in_item == I_UNFM_NUM(ih)) | 877 | if (pos_in_item == I_UNFM_NUM(ih)) |
832 | pos_in_item--; | 878 | pos_in_item--; |
833 | // pos_in_item = I_UNFM_NUM (ih) - 1; | ||
834 | while (pos_in_item >= 0) { | 879 | while (pos_in_item >= 0) { |
835 | int t = get_block_num(item, pos_in_item); | 880 | int t = get_block_num(item, pos_in_item); |
836 | if (t) { | 881 | if (t) { |
@@ -846,10 +891,12 @@ static int get_left_neighbor(reiserfs_blocknr_hint_t * hint) | |||
846 | return ret; | 891 | return ret; |
847 | } | 892 | } |
848 | 893 | ||
849 | /* should be, if formatted node, then try to put on first part of the device | 894 | /* |
850 | specified as number of percent with mount option device, else try to put | 895 | * should be, if formatted node, then try to put on first part of the device |
851 | on last of device. This is not to say it is good code to do so, | 896 | * specified as number of percent with mount option device, else try to put |
852 | but the effect should be measured. */ | 897 | * on last of device. This is not to say it is good code to do so, |
898 | * but the effect should be measured. | ||
899 | */ | ||
853 | static inline void set_border_in_hint(struct super_block *s, | 900 | static inline void set_border_in_hint(struct super_block *s, |
854 | reiserfs_blocknr_hint_t * hint) | 901 | reiserfs_blocknr_hint_t * hint) |
855 | { | 902 | { |
@@ -975,21 +1022,27 @@ static void determine_search_start(reiserfs_blocknr_hint_t * hint, | |||
975 | set_border_in_hint(s, hint); | 1022 | set_border_in_hint(s, hint); |
976 | 1023 | ||
977 | #ifdef DISPLACE_NEW_PACKING_LOCALITIES | 1024 | #ifdef DISPLACE_NEW_PACKING_LOCALITIES |
978 | /* whenever we create a new directory, we displace it. At first we will | 1025 | /* |
979 | hash for location, later we might look for a moderately empty place for | 1026 | * whenever we create a new directory, we displace it. At first |
980 | it */ | 1027 | * we will hash for location, later we might look for a moderately |
1028 | * empty place for it | ||
1029 | */ | ||
981 | if (displacing_new_packing_localities(s) | 1030 | if (displacing_new_packing_localities(s) |
982 | && hint->th->displace_new_blocks) { | 1031 | && hint->th->displace_new_blocks) { |
983 | displace_new_packing_locality(hint); | 1032 | displace_new_packing_locality(hint); |
984 | 1033 | ||
985 | /* we do not continue determine_search_start, | 1034 | /* |
986 | * if new packing locality is being displaced */ | 1035 | * we do not continue determine_search_start, |
1036 | * if new packing locality is being displaced | ||
1037 | */ | ||
987 | return; | 1038 | return; |
988 | } | 1039 | } |
989 | #endif | 1040 | #endif |
990 | 1041 | ||
991 | /* all persons should feel encouraged to add more special cases here and | 1042 | /* |
992 | * test them */ | 1043 | * all persons should feel encouraged to add more special cases |
1044 | * here and test them | ||
1045 | */ | ||
993 | 1046 | ||
994 | if (displacing_large_files(s) && !hint->formatted_node | 1047 | if (displacing_large_files(s) && !hint->formatted_node |
995 | && this_blocknr_allocation_would_make_it_a_large_file(hint)) { | 1048 | && this_blocknr_allocation_would_make_it_a_large_file(hint)) { |
@@ -997,8 +1050,10 @@ static void determine_search_start(reiserfs_blocknr_hint_t * hint, | |||
997 | return; | 1050 | return; |
998 | } | 1051 | } |
999 | 1052 | ||
1000 | /* if none of our special cases is relevant, use the left neighbor in the | 1053 | /* |
1001 | tree order of the new node we are allocating for */ | 1054 | * if none of our special cases is relevant, use the left |
1055 | * neighbor in the tree order of the new node we are allocating for | ||
1056 | */ | ||
1002 | if (hint->formatted_node && TEST_OPTION(hashed_formatted_nodes, s)) { | 1057 | if (hint->formatted_node && TEST_OPTION(hashed_formatted_nodes, s)) { |
1003 | hash_formatted_node(hint); | 1058 | hash_formatted_node(hint); |
1004 | return; | 1059 | return; |
@@ -1006,10 +1061,13 @@ static void determine_search_start(reiserfs_blocknr_hint_t * hint, | |||
1006 | 1061 | ||
1007 | unfm_hint = get_left_neighbor(hint); | 1062 | unfm_hint = get_left_neighbor(hint); |
1008 | 1063 | ||
1009 | /* Mimic old block allocator behaviour, that is if VFS allowed for preallocation, | 1064 | /* |
1010 | new blocks are displaced based on directory ID. Also, if suggested search_start | 1065 | * Mimic old block allocator behaviour, that is if VFS allowed for |
1011 | is less than last preallocated block, we start searching from it, assuming that | 1066 | * preallocation, new blocks are displaced based on directory ID. |
1012 | HDD dataflow is faster in forward direction */ | 1067 | * Also, if suggested search_start is less than last preallocated |
1068 | * block, we start searching from it, assuming that HDD dataflow | ||
1069 | * is faster in forward direction | ||
1070 | */ | ||
1013 | if (TEST_OPTION(old_way, s)) { | 1071 | if (TEST_OPTION(old_way, s)) { |
1014 | if (!hint->formatted_node) { | 1072 | if (!hint->formatted_node) { |
1015 | if (!reiserfs_hashed_relocation(s)) | 1073 | if (!reiserfs_hashed_relocation(s)) |
@@ -1038,11 +1096,13 @@ static void determine_search_start(reiserfs_blocknr_hint_t * hint, | |||
1038 | TEST_OPTION(old_hashed_relocation, s)) { | 1096 | TEST_OPTION(old_hashed_relocation, s)) { |
1039 | old_hashed_relocation(hint); | 1097 | old_hashed_relocation(hint); |
1040 | } | 1098 | } |
1099 | |||
1041 | /* new_hashed_relocation works with both formatted/unformatted nodes */ | 1100 | /* new_hashed_relocation works with both formatted/unformatted nodes */ |
1042 | if ((!unfm_hint || hint->formatted_node) && | 1101 | if ((!unfm_hint || hint->formatted_node) && |
1043 | TEST_OPTION(new_hashed_relocation, s)) { | 1102 | TEST_OPTION(new_hashed_relocation, s)) { |
1044 | new_hashed_relocation(hint); | 1103 | new_hashed_relocation(hint); |
1045 | } | 1104 | } |
1105 | |||
1046 | /* dirid grouping works only on unformatted nodes */ | 1106 | /* dirid grouping works only on unformatted nodes */ |
1047 | if (!unfm_hint && !hint->formatted_node && TEST_OPTION(dirid_groups, s)) { | 1107 | if (!unfm_hint && !hint->formatted_node && TEST_OPTION(dirid_groups, s)) { |
1048 | dirid_groups(hint); | 1108 | dirid_groups(hint); |
@@ -1080,8 +1140,6 @@ static int determine_prealloc_size(reiserfs_blocknr_hint_t * hint) | |||
1080 | return CARRY_ON; | 1140 | return CARRY_ON; |
1081 | } | 1141 | } |
1082 | 1142 | ||
1083 | /* XXX I know it could be merged with upper-level function; | ||
1084 | but may be result function would be too complex. */ | ||
1085 | static inline int allocate_without_wrapping_disk(reiserfs_blocknr_hint_t * hint, | 1143 | static inline int allocate_without_wrapping_disk(reiserfs_blocknr_hint_t * hint, |
1086 | b_blocknr_t * new_blocknrs, | 1144 | b_blocknr_t * new_blocknrs, |
1087 | b_blocknr_t start, | 1145 | b_blocknr_t start, |
@@ -1109,7 +1167,10 @@ static inline int allocate_without_wrapping_disk(reiserfs_blocknr_hint_t * hint, | |||
1109 | 1167 | ||
1110 | /* do we have something to fill prealloc. array also ? */ | 1168 | /* do we have something to fill prealloc. array also ? */ |
1111 | if (nr_allocated > 0) { | 1169 | if (nr_allocated > 0) { |
1112 | /* it means prealloc_size was greater that 0 and we do preallocation */ | 1170 | /* |
1171 | * it means prealloc_size was greater that 0 and | ||
1172 | * we do preallocation | ||
1173 | */ | ||
1113 | list_add(&REISERFS_I(hint->inode)->i_prealloc_list, | 1174 | list_add(&REISERFS_I(hint->inode)->i_prealloc_list, |
1114 | &SB_JOURNAL(hint->th->t_super)-> | 1175 | &SB_JOURNAL(hint->th->t_super)-> |
1115 | j_prealloc_list); | 1176 | j_prealloc_list); |
@@ -1177,7 +1238,8 @@ static inline int blocknrs_and_prealloc_arrays_from_search_start | |||
1177 | start = 0; | 1238 | start = 0; |
1178 | finish = hint->beg; | 1239 | finish = hint->beg; |
1179 | break; | 1240 | break; |
1180 | default: /* We've tried searching everywhere, not enough space */ | 1241 | default: |
1242 | /* We've tried searching everywhere, not enough space */ | ||
1181 | /* Free the blocks */ | 1243 | /* Free the blocks */ |
1182 | if (!hint->formatted_node) { | 1244 | if (!hint->formatted_node) { |
1183 | #ifdef REISERQUOTA_DEBUG | 1245 | #ifdef REISERQUOTA_DEBUG |
@@ -1262,8 +1324,11 @@ static int use_preallocated_list_if_available(reiserfs_blocknr_hint_t * hint, | |||
1262 | return amount_needed; | 1324 | return amount_needed; |
1263 | } | 1325 | } |
1264 | 1326 | ||
1265 | int reiserfs_allocate_blocknrs(reiserfs_blocknr_hint_t * hint, b_blocknr_t * new_blocknrs, int amount_needed, int reserved_by_us /* Amount of blocks we have | 1327 | int reiserfs_allocate_blocknrs(reiserfs_blocknr_hint_t *hint, |
1266 | already reserved */ ) | 1328 | b_blocknr_t *new_blocknrs, |
1329 | int amount_needed, | ||
1330 | /* Amount of blocks we have already reserved */ | ||
1331 | int reserved_by_us) | ||
1267 | { | 1332 | { |
1268 | int initial_amount_needed = amount_needed; | 1333 | int initial_amount_needed = amount_needed; |
1269 | int ret; | 1334 | int ret; |
@@ -1275,15 +1340,21 @@ int reiserfs_allocate_blocknrs(reiserfs_blocknr_hint_t * hint, b_blocknr_t * new | |||
1275 | return NO_DISK_SPACE; | 1340 | return NO_DISK_SPACE; |
1276 | /* should this be if !hint->inode && hint->preallocate? */ | 1341 | /* should this be if !hint->inode && hint->preallocate? */ |
1277 | /* do you mean hint->formatted_node can be removed ? - Zam */ | 1342 | /* do you mean hint->formatted_node can be removed ? - Zam */ |
1278 | /* hint->formatted_node cannot be removed because we try to access | 1343 | /* |
1279 | inode information here, and there is often no inode assotiated with | 1344 | * hint->formatted_node cannot be removed because we try to access |
1280 | metadata allocations - green */ | 1345 | * inode information here, and there is often no inode associated with |
1346 | * metadata allocations - green | ||
1347 | */ | ||
1281 | 1348 | ||
1282 | if (!hint->formatted_node && hint->preallocate) { | 1349 | if (!hint->formatted_node && hint->preallocate) { |
1283 | amount_needed = use_preallocated_list_if_available | 1350 | amount_needed = use_preallocated_list_if_available |
1284 | (hint, new_blocknrs, amount_needed); | 1351 | (hint, new_blocknrs, amount_needed); |
1285 | if (amount_needed == 0) /* all blocknrs we need we got from | 1352 | |
1286 | prealloc. list */ | 1353 | /* |
1354 | * We have all the block numbers we need from the | ||
1355 | * prealloc list | ||
1356 | */ | ||
1357 | if (amount_needed == 0) | ||
1287 | return CARRY_ON; | 1358 | return CARRY_ON; |
1288 | new_blocknrs += (initial_amount_needed - amount_needed); | 1359 | new_blocknrs += (initial_amount_needed - amount_needed); |
1289 | } | 1360 | } |
@@ -1297,10 +1368,12 @@ int reiserfs_allocate_blocknrs(reiserfs_blocknr_hint_t * hint, b_blocknr_t * new | |||
1297 | ret = blocknrs_and_prealloc_arrays_from_search_start | 1368 | ret = blocknrs_and_prealloc_arrays_from_search_start |
1298 | (hint, new_blocknrs, amount_needed); | 1369 | (hint, new_blocknrs, amount_needed); |
1299 | 1370 | ||
1300 | /* we used prealloc. list to fill (partially) new_blocknrs array. If final allocation fails we | 1371 | /* |
1301 | * need to return blocks back to prealloc. list or just free them. -- Zam (I chose second | 1372 | * We used prealloc. list to fill (partially) new_blocknrs array. |
1302 | * variant) */ | 1373 | * If final allocation fails we need to return blocks back to |
1303 | 1374 | * prealloc. list or just free them. -- Zam (I chose second | |
1375 | * variant) | ||
1376 | */ | ||
1304 | if (ret != CARRY_ON) { | 1377 | if (ret != CARRY_ON) { |
1305 | while (amount_needed++ < initial_amount_needed) { | 1378 | while (amount_needed++ < initial_amount_needed) { |
1306 | reiserfs_free_block(hint->th, hint->inode, | 1379 | reiserfs_free_block(hint->th, hint->inode, |
@@ -1339,8 +1412,10 @@ struct buffer_head *reiserfs_read_bitmap_block(struct super_block *sb, | |||
1339 | struct reiserfs_bitmap_info *info = SB_AP_BITMAP(sb) + bitmap; | 1412 | struct reiserfs_bitmap_info *info = SB_AP_BITMAP(sb) + bitmap; |
1340 | struct buffer_head *bh; | 1413 | struct buffer_head *bh; |
1341 | 1414 | ||
1342 | /* Way old format filesystems had the bitmaps packed up front. | 1415 | /* |
1343 | * I doubt there are any of these left, but just in case... */ | 1416 | * Way old format filesystems had the bitmaps packed up front. |
1417 | * I doubt there are any of these left, but just in case... | ||
1418 | */ | ||
1344 | if (unlikely(test_bit(REISERFS_OLD_FORMAT, | 1419 | if (unlikely(test_bit(REISERFS_OLD_FORMAT, |
1345 | &(REISERFS_SB(sb)->s_properties)))) | 1420 | &(REISERFS_SB(sb)->s_properties)))) |
1346 | block = REISERFS_SB(sb)->s_sbh->b_blocknr + 1 + bitmap; | 1421 | block = REISERFS_SB(sb)->s_sbh->b_blocknr + 1 + bitmap; |
diff --git a/fs/reiserfs/dir.c b/fs/reiserfs/dir.c index 1fe5cdeb5862..8d51f28d6345 100644 --- a/fs/reiserfs/dir.c +++ b/fs/reiserfs/dir.c | |||
@@ -59,7 +59,10 @@ static inline bool is_privroot_deh(struct inode *dir, struct reiserfs_de_head *d | |||
59 | 59 | ||
60 | int reiserfs_readdir_inode(struct inode *inode, struct dir_context *ctx) | 60 | int reiserfs_readdir_inode(struct inode *inode, struct dir_context *ctx) |
61 | { | 61 | { |
62 | struct cpu_key pos_key; /* key of current position in the directory (key of directory entry) */ | 62 | |
63 | /* key of current position in the directory (key of directory entry) */ | ||
64 | struct cpu_key pos_key; | ||
65 | |||
63 | INITIALIZE_PATH(path_to_entry); | 66 | INITIALIZE_PATH(path_to_entry); |
64 | struct buffer_head *bh; | 67 | struct buffer_head *bh; |
65 | int item_num, entry_num; | 68 | int item_num, entry_num; |
@@ -77,21 +80,28 @@ int reiserfs_readdir_inode(struct inode *inode, struct dir_context *ctx) | |||
77 | 80 | ||
78 | reiserfs_check_lock_depth(inode->i_sb, "readdir"); | 81 | reiserfs_check_lock_depth(inode->i_sb, "readdir"); |
79 | 82 | ||
80 | /* form key for search the next directory entry using f_pos field of | 83 | /* |
81 | file structure */ | 84 | * form key for search the next directory entry using |
85 | * f_pos field of file structure | ||
86 | */ | ||
82 | make_cpu_key(&pos_key, inode, ctx->pos ?: DOT_OFFSET, TYPE_DIRENTRY, 3); | 87 | make_cpu_key(&pos_key, inode, ctx->pos ?: DOT_OFFSET, TYPE_DIRENTRY, 3); |
83 | next_pos = cpu_key_k_offset(&pos_key); | 88 | next_pos = cpu_key_k_offset(&pos_key); |
84 | 89 | ||
85 | path_to_entry.reada = PATH_READA; | 90 | path_to_entry.reada = PATH_READA; |
86 | while (1) { | 91 | while (1) { |
87 | research: | 92 | research: |
88 | /* search the directory item, containing entry with specified key */ | 93 | /* |
94 | * search the directory item, containing entry with | ||
95 | * specified key | ||
96 | */ | ||
89 | search_res = | 97 | search_res = |
90 | search_by_entry_key(inode->i_sb, &pos_key, &path_to_entry, | 98 | search_by_entry_key(inode->i_sb, &pos_key, &path_to_entry, |
91 | &de); | 99 | &de); |
92 | if (search_res == IO_ERROR) { | 100 | if (search_res == IO_ERROR) { |
93 | // FIXME: we could just skip part of directory which could | 101 | /* |
94 | // not be read | 102 | * FIXME: we could just skip part of directory |
103 | * which could not be read | ||
104 | */ | ||
95 | ret = -EIO; | 105 | ret = -EIO; |
96 | goto out; | 106 | goto out; |
97 | } | 107 | } |
@@ -109,14 +119,20 @@ int reiserfs_readdir_inode(struct inode *inode, struct dir_context *ctx) | |||
109 | "vs-9005 item_num == %d, item amount == %d", | 119 | "vs-9005 item_num == %d, item amount == %d", |
110 | item_num, B_NR_ITEMS(bh)); | 120 | item_num, B_NR_ITEMS(bh)); |
111 | 121 | ||
112 | /* and entry must be not more than number of entries in the item */ | 122 | /* |
123 | * and entry must be not more than number of entries | ||
124 | * in the item | ||
125 | */ | ||
113 | RFALSE(ih_entry_count(ih) < entry_num, | 126 | RFALSE(ih_entry_count(ih) < entry_num, |
114 | "vs-9010: entry number is too big %d (%d)", | 127 | "vs-9010: entry number is too big %d (%d)", |
115 | entry_num, ih_entry_count(ih)); | 128 | entry_num, ih_entry_count(ih)); |
116 | 129 | ||
130 | /* | ||
131 | * go through all entries in the directory item beginning | ||
132 | * from the entry, that has been found | ||
133 | */ | ||
117 | if (search_res == POSITION_FOUND | 134 | if (search_res == POSITION_FOUND |
118 | || entry_num < ih_entry_count(ih)) { | 135 | || entry_num < ih_entry_count(ih)) { |
119 | /* go through all entries in the directory item beginning from the entry, that has been found */ | ||
120 | struct reiserfs_de_head *deh = | 136 | struct reiserfs_de_head *deh = |
121 | B_I_DEH(bh, ih) + entry_num; | 137 | B_I_DEH(bh, ih) + entry_num; |
122 | 138 | ||
@@ -127,16 +143,18 @@ int reiserfs_readdir_inode(struct inode *inode, struct dir_context *ctx) | |||
127 | ino_t d_ino; | 143 | ino_t d_ino; |
128 | loff_t cur_pos = deh_offset(deh); | 144 | loff_t cur_pos = deh_offset(deh); |
129 | 145 | ||
146 | /* it is hidden entry */ | ||
130 | if (!de_visible(deh)) | 147 | if (!de_visible(deh)) |
131 | /* it is hidden entry */ | ||
132 | continue; | 148 | continue; |
133 | d_reclen = entry_length(bh, ih, entry_num); | 149 | d_reclen = entry_length(bh, ih, entry_num); |
134 | d_name = B_I_DEH_ENTRY_FILE_NAME(bh, ih, deh); | 150 | d_name = B_I_DEH_ENTRY_FILE_NAME(bh, ih, deh); |
135 | 151 | ||
136 | if (d_reclen <= 0 || | 152 | if (d_reclen <= 0 || |
137 | d_name + d_reclen > bh->b_data + bh->b_size) { | 153 | d_name + d_reclen > bh->b_data + bh->b_size) { |
138 | /* There is corrupted data in entry, | 154 | /* |
139 | * We'd better stop here */ | 155 | * There is corrupted data in entry, |
156 | * We'd better stop here | ||
157 | */ | ||
140 | pathrelse(&path_to_entry); | 158 | pathrelse(&path_to_entry); |
141 | ret = -EIO; | 159 | ret = -EIO; |
142 | goto out; | 160 | goto out; |
@@ -145,10 +163,10 @@ int reiserfs_readdir_inode(struct inode *inode, struct dir_context *ctx) | |||
145 | if (!d_name[d_reclen - 1]) | 163 | if (!d_name[d_reclen - 1]) |
146 | d_reclen = strlen(d_name); | 164 | d_reclen = strlen(d_name); |
147 | 165 | ||
166 | /* too big to send back to VFS */ | ||
148 | if (d_reclen > | 167 | if (d_reclen > |
149 | REISERFS_MAX_NAME(inode->i_sb-> | 168 | REISERFS_MAX_NAME(inode->i_sb-> |
150 | s_blocksize)) { | 169 | s_blocksize)) { |
151 | /* too big to send back to VFS */ | ||
152 | continue; | 170 | continue; |
153 | } | 171 | } |
154 | 172 | ||
@@ -173,10 +191,14 @@ int reiserfs_readdir_inode(struct inode *inode, struct dir_context *ctx) | |||
173 | goto research; | 191 | goto research; |
174 | } | 192 | } |
175 | } | 193 | } |
176 | // Note, that we copy name to user space via temporary | 194 | |
177 | // buffer (local_buf) because filldir will block if | 195 | /* |
178 | // user space buffer is swapped out. At that time | 196 | * Note, that we copy name to user space via |
179 | // entry can move to somewhere else | 197 | * temporary buffer (local_buf) because |
198 | * filldir will block if user space buffer is | ||
199 | * swapped out. At that time entry can move to | ||
200 | * somewhere else | ||
201 | */ | ||
180 | memcpy(local_buf, d_name, d_reclen); | 202 | memcpy(local_buf, d_name, d_reclen); |
181 | 203 | ||
182 | /* | 204 | /* |
@@ -209,22 +231,26 @@ int reiserfs_readdir_inode(struct inode *inode, struct dir_context *ctx) | |||
209 | } /* for */ | 231 | } /* for */ |
210 | } | 232 | } |
211 | 233 | ||
234 | /* end of directory has been reached */ | ||
212 | if (item_num != B_NR_ITEMS(bh) - 1) | 235 | if (item_num != B_NR_ITEMS(bh) - 1) |
213 | // end of directory has been reached | ||
214 | goto end; | 236 | goto end; |
215 | 237 | ||
216 | /* item we went through is last item of node. Using right | 238 | /* |
217 | delimiting key check is it directory end */ | 239 | * item we went through is last item of node. Using right |
240 | * delimiting key check is it directory end | ||
241 | */ | ||
218 | rkey = get_rkey(&path_to_entry, inode->i_sb); | 242 | rkey = get_rkey(&path_to_entry, inode->i_sb); |
219 | if (!comp_le_keys(rkey, &MIN_KEY)) { | 243 | if (!comp_le_keys(rkey, &MIN_KEY)) { |
220 | /* set pos_key to key, that is the smallest and greater | 244 | /* |
221 | that key of the last entry in the item */ | 245 | * set pos_key to key, that is the smallest and greater |
246 | * that key of the last entry in the item | ||
247 | */ | ||
222 | set_cpu_key_k_offset(&pos_key, next_pos); | 248 | set_cpu_key_k_offset(&pos_key, next_pos); |
223 | continue; | 249 | continue; |
224 | } | 250 | } |
225 | 251 | ||
252 | /* end of directory has been reached */ | ||
226 | if (COMP_SHORT_KEYS(rkey, &pos_key)) { | 253 | if (COMP_SHORT_KEYS(rkey, &pos_key)) { |
227 | // end of directory has been reached | ||
228 | goto end; | 254 | goto end; |
229 | } | 255 | } |
230 | 256 | ||
@@ -248,9 +274,10 @@ static int reiserfs_readdir(struct file *file, struct dir_context *ctx) | |||
248 | return reiserfs_readdir_inode(file_inode(file), ctx); | 274 | return reiserfs_readdir_inode(file_inode(file), ctx); |
249 | } | 275 | } |
250 | 276 | ||
251 | /* compose directory item containing "." and ".." entries (entries are | 277 | /* |
252 | not aligned to 4 byte boundary) */ | 278 | * compose directory item containing "." and ".." entries (entries are |
253 | /* the last four params are LE */ | 279 | * not aligned to 4 byte boundary) |
280 | */ | ||
254 | void make_empty_dir_item_v1(char *body, __le32 dirid, __le32 objid, | 281 | void make_empty_dir_item_v1(char *body, __le32 dirid, __le32 objid, |
255 | __le32 par_dirid, __le32 par_objid) | 282 | __le32 par_dirid, __le32 par_objid) |
256 | { | 283 | { |
diff --git a/fs/reiserfs/do_balan.c b/fs/reiserfs/do_balan.c index 80b2b1b37169..399b2009b677 100644 --- a/fs/reiserfs/do_balan.c +++ b/fs/reiserfs/do_balan.c | |||
@@ -2,18 +2,13 @@ | |||
2 | * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README | 2 | * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README |
3 | */ | 3 | */ |
4 | 4 | ||
5 | /* Now we have all buffers that must be used in balancing of the tree */ | 5 | /* |
6 | /* Further calculations can not cause schedule(), and thus the buffer */ | 6 | * Now we have all buffers that must be used in balancing of the tree |
7 | /* tree will be stable until the balancing will be finished */ | 7 | * Further calculations can not cause schedule(), and thus the buffer |
8 | /* balance the tree according to the analysis made before, */ | 8 | * tree will be stable until the balancing will be finished |
9 | /* and using buffers obtained after all above. */ | 9 | * balance the tree according to the analysis made before, |
10 | 10 | * and using buffers obtained after all above. | |
11 | /** | 11 | */ |
12 | ** balance_leaf_when_delete | ||
13 | ** balance_leaf | ||
14 | ** do_balance | ||
15 | ** | ||
16 | **/ | ||
17 | 12 | ||
18 | #include <asm/uaccess.h> | 13 | #include <asm/uaccess.h> |
19 | #include <linux/time.h> | 14 | #include <linux/time.h> |
@@ -68,35 +63,39 @@ inline void do_balance_mark_leaf_dirty(struct tree_balance *tb, | |||
68 | #define do_balance_mark_internal_dirty do_balance_mark_leaf_dirty | 63 | #define do_balance_mark_internal_dirty do_balance_mark_leaf_dirty |
69 | #define do_balance_mark_sb_dirty do_balance_mark_leaf_dirty | 64 | #define do_balance_mark_sb_dirty do_balance_mark_leaf_dirty |
70 | 65 | ||
71 | /* summary: | 66 | /* |
72 | if deleting something ( tb->insert_size[0] < 0 ) | 67 | * summary: |
73 | return(balance_leaf_when_delete()); (flag d handled here) | 68 | * if deleting something ( tb->insert_size[0] < 0 ) |
74 | else | 69 | * return(balance_leaf_when_delete()); (flag d handled here) |
75 | if lnum is larger than 0 we put items into the left node | 70 | * else |
76 | if rnum is larger than 0 we put items into the right node | 71 | * if lnum is larger than 0 we put items into the left node |
77 | if snum1 is larger than 0 we put items into the new node s1 | 72 | * if rnum is larger than 0 we put items into the right node |
78 | if snum2 is larger than 0 we put items into the new node s2 | 73 | * if snum1 is larger than 0 we put items into the new node s1 |
79 | Note that all *num* count new items being created. | 74 | * if snum2 is larger than 0 we put items into the new node s2 |
80 | 75 | * Note that all *num* count new items being created. | |
81 | It would be easier to read balance_leaf() if each of these summary | 76 | * |
82 | lines was a separate procedure rather than being inlined. I think | 77 | * It would be easier to read balance_leaf() if each of these summary |
83 | that there are many passages here and in balance_leaf_when_delete() in | 78 | * lines was a separate procedure rather than being inlined. I think |
84 | which two calls to one procedure can replace two passages, and it | 79 | * that there are many passages here and in balance_leaf_when_delete() in |
85 | might save cache space and improve software maintenance costs to do so. | 80 | * which two calls to one procedure can replace two passages, and it |
86 | 81 | * might save cache space and improve software maintenance costs to do so. | |
87 | Vladimir made the perceptive comment that we should offload most of | 82 | * |
88 | the decision making in this function into fix_nodes/check_balance, and | 83 | * Vladimir made the perceptive comment that we should offload most of |
89 | then create some sort of structure in tb that says what actions should | 84 | * the decision making in this function into fix_nodes/check_balance, and |
90 | be performed by do_balance. | 85 | * then create some sort of structure in tb that says what actions should |
91 | 86 | * be performed by do_balance. | |
92 | -Hans */ | 87 | * |
93 | 88 | * -Hans | |
94 | /* Balance leaf node in case of delete or cut: insert_size[0] < 0 | 89 | */ |
90 | |||
91 | /* | ||
92 | * Balance leaf node in case of delete or cut: insert_size[0] < 0 | ||
95 | * | 93 | * |
96 | * lnum, rnum can have values >= -1 | 94 | * lnum, rnum can have values >= -1 |
97 | * -1 means that the neighbor must be joined with S | 95 | * -1 means that the neighbor must be joined with S |
98 | * 0 means that nothing should be done with the neighbor | 96 | * 0 means that nothing should be done with the neighbor |
99 | * >0 means to shift entirely or partly the specified number of items to the neighbor | 97 | * >0 means to shift entirely or partly the specified number of items |
98 | * to the neighbor | ||
100 | */ | 99 | */ |
101 | static int balance_leaf_when_delete(struct tree_balance *tb, int flag) | 100 | static int balance_leaf_when_delete(struct tree_balance *tb, int flag) |
102 | { | 101 | { |
@@ -149,8 +148,16 @@ static int balance_leaf_when_delete(struct tree_balance *tb, int flag) | |||
149 | case M_CUT:{ /* cut item in S[0] */ | 148 | case M_CUT:{ /* cut item in S[0] */ |
150 | if (is_direntry_le_ih(ih)) { | 149 | if (is_direntry_le_ih(ih)) { |
151 | 150 | ||
152 | /* UFS unlink semantics are such that you can only delete one directory entry at a time. */ | 151 | /* |
153 | /* when we cut a directory tb->insert_size[0] means number of entries to be cut (always 1) */ | 152 | * UFS unlink semantics are such that you |
153 | * can only delete one directory entry at | ||
154 | * a time. | ||
155 | */ | ||
156 | |||
157 | /* | ||
158 | * when we cut a directory tb->insert_size[0] | ||
159 | * means number of entries to be cut (always 1) | ||
160 | */ | ||
154 | tb->insert_size[0] = -1; | 161 | tb->insert_size[0] = -1; |
155 | leaf_cut_from_buffer(&bi, item_pos, pos_in_item, | 162 | leaf_cut_from_buffer(&bi, item_pos, pos_in_item, |
156 | -tb->insert_size[0]); | 163 | -tb->insert_size[0]); |
@@ -183,13 +190,22 @@ static int balance_leaf_when_delete(struct tree_balance *tb, int flag) | |||
183 | "UNKNOWN"), flag); | 190 | "UNKNOWN"), flag); |
184 | } | 191 | } |
185 | 192 | ||
186 | /* the rule is that no shifting occurs unless by shifting a node can be freed */ | 193 | /* |
194 | * the rule is that no shifting occurs unless by shifting | ||
195 | * a node can be freed | ||
196 | */ | ||
187 | n = B_NR_ITEMS(tbS0); | 197 | n = B_NR_ITEMS(tbS0); |
188 | if (tb->lnum[0]) { /* L[0] takes part in balancing */ | 198 | /* L[0] takes part in balancing */ |
189 | if (tb->lnum[0] == -1) { /* L[0] must be joined with S[0] */ | 199 | if (tb->lnum[0]) { |
190 | if (tb->rnum[0] == -1) { /* R[0] must be also joined with S[0] */ | 200 | /* L[0] must be joined with S[0] */ |
201 | if (tb->lnum[0] == -1) { | ||
202 | /* R[0] must be also joined with S[0] */ | ||
203 | if (tb->rnum[0] == -1) { | ||
191 | if (tb->FR[0] == PATH_H_PPARENT(tb->tb_path, 0)) { | 204 | if (tb->FR[0] == PATH_H_PPARENT(tb->tb_path, 0)) { |
192 | /* all contents of all the 3 buffers will be in L[0] */ | 205 | /* |
206 | * all contents of all the 3 buffers | ||
207 | * will be in L[0] | ||
208 | */ | ||
193 | if (PATH_H_POSITION(tb->tb_path, 1) == 0 | 209 | if (PATH_H_POSITION(tb->tb_path, 1) == 0 |
194 | && 1 < B_NR_ITEMS(tb->FR[0])) | 210 | && 1 < B_NR_ITEMS(tb->FR[0])) |
195 | replace_key(tb, tb->CFL[0], | 211 | replace_key(tb, tb->CFL[0], |
@@ -208,7 +224,10 @@ static int balance_leaf_when_delete(struct tree_balance *tb, int flag) | |||
208 | 224 | ||
209 | return 0; | 225 | return 0; |
210 | } | 226 | } |
211 | /* all contents of all the 3 buffers will be in R[0] */ | 227 | /* |
228 | * all contents of all the 3 buffers will | ||
229 | * be in R[0] | ||
230 | */ | ||
212 | leaf_move_items(LEAF_FROM_S_TO_R, tb, n, -1, | 231 | leaf_move_items(LEAF_FROM_S_TO_R, tb, n, -1, |
213 | NULL); | 232 | NULL); |
214 | leaf_move_items(LEAF_FROM_L_TO_R, tb, | 233 | leaf_move_items(LEAF_FROM_L_TO_R, tb, |
@@ -233,7 +252,11 @@ static int balance_leaf_when_delete(struct tree_balance *tb, int flag) | |||
233 | 252 | ||
234 | return 0; | 253 | return 0; |
235 | } | 254 | } |
236 | /* a part of contents of S[0] will be in L[0] and the rest part of S[0] will be in R[0] */ | 255 | |
256 | /* | ||
257 | * a part of contents of S[0] will be in L[0] and the | ||
258 | * rest part of S[0] will be in R[0] | ||
259 | */ | ||
237 | 260 | ||
238 | RFALSE((tb->lnum[0] + tb->rnum[0] < n) || | 261 | RFALSE((tb->lnum[0] + tb->rnum[0] < n) || |
239 | (tb->lnum[0] + tb->rnum[0] > n + 1), | 262 | (tb->lnum[0] + tb->rnum[0] > n + 1), |
@@ -1178,9 +1201,7 @@ struct buffer_head *get_FEB(struct tree_balance *tb) | |||
1178 | return tb->used[i]; | 1201 | return tb->used[i]; |
1179 | } | 1202 | } |
1180 | 1203 | ||
1181 | /* This is now used because reiserfs_free_block has to be able to | 1204 | /* This is now used because reiserfs_free_block has to be able to schedule. */ |
1182 | ** schedule. | ||
1183 | */ | ||
1184 | static void store_thrown(struct tree_balance *tb, struct buffer_head *bh) | 1205 | static void store_thrown(struct tree_balance *tb, struct buffer_head *bh) |
1185 | { | 1206 | { |
1186 | int i; | 1207 | int i; |
@@ -1335,8 +1356,10 @@ static int check_before_balancing(struct tree_balance *tb) | |||
1335 | "mount point."); | 1356 | "mount point."); |
1336 | } | 1357 | } |
1337 | 1358 | ||
1338 | /* double check that buffers that we will modify are unlocked. (fix_nodes should already have | 1359 | /* |
1339 | prepped all of these for us). */ | 1360 | * double check that buffers that we will modify are unlocked. |
1361 | * (fix_nodes should already have prepped all of these for us). | ||
1362 | */ | ||
1340 | if (tb->lnum[0]) { | 1363 | if (tb->lnum[0]) { |
1341 | retval |= locked_or_not_in_tree(tb, tb->L[0], "L[0]"); | 1364 | retval |= locked_or_not_in_tree(tb, tb->L[0], "L[0]"); |
1342 | retval |= locked_or_not_in_tree(tb, tb->FL[0], "FL[0]"); | 1365 | retval |= locked_or_not_in_tree(tb, tb->FL[0], "FL[0]"); |
@@ -1429,49 +1452,51 @@ static void check_internal_levels(struct tree_balance *tb) | |||
1429 | 1452 | ||
1430 | #endif | 1453 | #endif |
1431 | 1454 | ||
1432 | /* Now we have all of the buffers that must be used in balancing of | 1455 | /* |
1433 | the tree. We rely on the assumption that schedule() will not occur | 1456 | * Now we have all of the buffers that must be used in balancing of |
1434 | while do_balance works. ( Only interrupt handlers are acceptable.) | 1457 | * the tree. We rely on the assumption that schedule() will not occur |
1435 | We balance the tree according to the analysis made before this, | 1458 | * while do_balance works. ( Only interrupt handlers are acceptable.) |
1436 | using buffers already obtained. For SMP support it will someday be | 1459 | * We balance the tree according to the analysis made before this, |
1437 | necessary to add ordered locking of tb. */ | 1460 | * using buffers already obtained. For SMP support it will someday be |
1438 | 1461 | * necessary to add ordered locking of tb. | |
1439 | /* Some interesting rules of balancing: | 1462 | */ |
1440 | |||
1441 | we delete a maximum of two nodes per level per balancing: we never | ||
1442 | delete R, when we delete two of three nodes L, S, R then we move | ||
1443 | them into R. | ||
1444 | |||
1445 | we only delete L if we are deleting two nodes, if we delete only | ||
1446 | one node we delete S | ||
1447 | |||
1448 | if we shift leaves then we shift as much as we can: this is a | ||
1449 | deliberate policy of extremism in node packing which results in | ||
1450 | higher average utilization after repeated random balance operations | ||
1451 | at the cost of more memory copies and more balancing as a result of | ||
1452 | small insertions to full nodes. | ||
1453 | |||
1454 | if we shift internal nodes we try to evenly balance the node | ||
1455 | utilization, with consequent less balancing at the cost of lower | ||
1456 | utilization. | ||
1457 | |||
1458 | one could argue that the policy for directories in leaves should be | ||
1459 | that of internal nodes, but we will wait until another day to | ||
1460 | evaluate this.... It would be nice to someday measure and prove | ||
1461 | these assumptions as to what is optimal.... | ||
1462 | 1463 | ||
1463 | */ | 1464 | /* |
1465 | * Some interesting rules of balancing: | ||
1466 | * we delete a maximum of two nodes per level per balancing: we never | ||
1467 | * delete R, when we delete two of three nodes L, S, R then we move | ||
1468 | * them into R. | ||
1469 | * | ||
1470 | * we only delete L if we are deleting two nodes, if we delete only | ||
1471 | * one node we delete S | ||
1472 | * | ||
1473 | * if we shift leaves then we shift as much as we can: this is a | ||
1474 | * deliberate policy of extremism in node packing which results in | ||
1475 | * higher average utilization after repeated random balance operations | ||
1476 | * at the cost of more memory copies and more balancing as a result of | ||
1477 | * small insertions to full nodes. | ||
1478 | * | ||
1479 | * if we shift internal nodes we try to evenly balance the node | ||
1480 | * utilization, with consequent less balancing at the cost of lower | ||
1481 | * utilization. | ||
1482 | * | ||
1483 | * one could argue that the policy for directories in leaves should be | ||
1484 | * that of internal nodes, but we will wait until another day to | ||
1485 | * evaluate this.... It would be nice to someday measure and prove | ||
1486 | * these assumptions as to what is optimal.... | ||
1487 | */ | ||
1464 | 1488 | ||
1465 | static inline void do_balance_starts(struct tree_balance *tb) | 1489 | static inline void do_balance_starts(struct tree_balance *tb) |
1466 | { | 1490 | { |
1467 | /* use print_cur_tb() to see initial state of struct | 1491 | /* use print_cur_tb() to see initial state of struct tree_balance */ |
1468 | tree_balance */ | ||
1469 | 1492 | ||
1470 | /* store_print_tb (tb); */ | 1493 | /* store_print_tb (tb); */ |
1471 | 1494 | ||
1472 | /* do not delete, just comment it out */ | 1495 | /* do not delete, just comment it out */ |
1473 | /* print_tb(flag, PATH_LAST_POSITION(tb->tb_path), tb->tb_path->pos_in_item, tb, | 1496 | /* |
1474 | "check");*/ | 1497 | print_tb(flag, PATH_LAST_POSITION(tb->tb_path), |
1498 | tb->tb_path->pos_in_item, tb, "check"); | ||
1499 | */ | ||
1475 | RFALSE(check_before_balancing(tb), "PAP-12340: locked buffers in TB"); | 1500 | RFALSE(check_before_balancing(tb), "PAP-12340: locked buffers in TB"); |
1476 | #ifdef CONFIG_REISERFS_CHECK | 1501 | #ifdef CONFIG_REISERFS_CHECK |
1477 | REISERFS_SB(tb->tb_sb)->cur_tb = tb; | 1502 | REISERFS_SB(tb->tb_sb)->cur_tb = tb; |
@@ -1487,9 +1512,10 @@ static inline void do_balance_completed(struct tree_balance *tb) | |||
1487 | REISERFS_SB(tb->tb_sb)->cur_tb = NULL; | 1512 | REISERFS_SB(tb->tb_sb)->cur_tb = NULL; |
1488 | #endif | 1513 | #endif |
1489 | 1514 | ||
1490 | /* reiserfs_free_block is no longer schedule safe. So, we need to | 1515 | /* |
1491 | ** put the buffers we want freed on the thrown list during do_balance, | 1516 | * reiserfs_free_block is no longer schedule safe. So, we need to |
1492 | ** and then free them now | 1517 | * put the buffers we want freed on the thrown list during do_balance, |
1518 | * and then free them now | ||
1493 | */ | 1519 | */ |
1494 | 1520 | ||
1495 | REISERFS_SB(tb->tb_sb)->s_do_balance++; | 1521 | REISERFS_SB(tb->tb_sb)->s_do_balance++; |
@@ -1500,36 +1526,40 @@ static inline void do_balance_completed(struct tree_balance *tb) | |||
1500 | free_thrown(tb); | 1526 | free_thrown(tb); |
1501 | } | 1527 | } |
1502 | 1528 | ||
1503 | void do_balance(struct tree_balance *tb, /* tree_balance structure */ | 1529 | /* |
1504 | struct item_head *ih, /* item header of inserted item */ | 1530 | * do_balance - balance the tree |
1505 | const char *body, /* body of inserted item or bytes to paste */ | 1531 | * |
1506 | int flag) | 1532 | * @tb: tree_balance structure |
1507 | { /* i - insert, d - delete | 1533 | * @ih: item header of inserted item |
1508 | c - cut, p - paste | 1534 | * @body: body of inserted item or bytes to paste |
1509 | 1535 | * @flag: 'i' - insert, 'd' - delete, 'c' - cut, 'p' paste | |
1510 | Cut means delete part of an item | 1536 | * |
1511 | (includes removing an entry from a | 1537 | * Cut means delete part of an item (includes removing an entry from a |
1512 | directory). | 1538 | * directory). |
1513 | 1539 | * | |
1514 | Delete means delete whole item. | 1540 | * Delete means delete whole item. |
1515 | 1541 | * | |
1516 | Insert means add a new item into the | 1542 | * Insert means add a new item into the tree. |
1517 | tree. | 1543 | * |
1518 | 1544 | * Paste means to append to the end of an existing file or to | |
1519 | Paste means to append to the end of an | 1545 | * insert a directory entry. |
1520 | existing file or to insert a directory | 1546 | */ |
1521 | entry. */ | 1547 | void do_balance(struct tree_balance *tb, struct item_head *ih, |
1522 | int child_pos, /* position of a child node in its parent */ | 1548 | const char *body, int flag) |
1523 | h; /* level of the tree being processed */ | 1549 | { |
1524 | struct item_head insert_key[2]; /* in our processing of one level | 1550 | int child_pos; /* position of a child node in its parent */ |
1525 | we sometimes determine what | 1551 | int h; /* level of the tree being processed */ |
1526 | must be inserted into the next | 1552 | |
1527 | higher level. This insertion | 1553 | /* |
1528 | consists of a key or two keys | 1554 | * in our processing of one level we sometimes determine what |
1529 | and their corresponding | 1555 | * must be inserted into the next higher level. This insertion |
1530 | pointers */ | 1556 | * consists of a key or two keys and their corresponding |
1531 | struct buffer_head *insert_ptr[2]; /* inserted node-ptrs for the next | 1557 | * pointers |
1532 | level */ | 1558 | */ |
1559 | struct item_head insert_key[2]; | ||
1560 | |||
1561 | /* inserted node-ptrs for the next level */ | ||
1562 | struct buffer_head *insert_ptr[2]; | ||
1533 | 1563 | ||
1534 | tb->tb_mode = flag; | 1564 | tb->tb_mode = flag; |
1535 | tb->need_balance_dirty = 0; | 1565 | tb->need_balance_dirty = 0; |
@@ -1549,9 +1579,11 @@ void do_balance(struct tree_balance *tb, /* tree_balance structure */ | |||
1549 | atomic_inc(&(fs_generation(tb->tb_sb))); | 1579 | atomic_inc(&(fs_generation(tb->tb_sb))); |
1550 | do_balance_starts(tb); | 1580 | do_balance_starts(tb); |
1551 | 1581 | ||
1552 | /* balance leaf returns 0 except if combining L R and S into | 1582 | /* |
1553 | one node. see balance_internal() for explanation of this | 1583 | * balance_leaf returns 0 except if combining L R and S into |
1554 | line of code. */ | 1584 | * one node. see balance_internal() for explanation of this |
1585 | * line of code. | ||
1586 | */ | ||
1555 | child_pos = PATH_H_B_ITEM_ORDER(tb->tb_path, 0) + | 1587 | child_pos = PATH_H_B_ITEM_ORDER(tb->tb_path, 0) + |
1556 | balance_leaf(tb, ih, body, flag, insert_key, insert_ptr); | 1588 | balance_leaf(tb, ih, body, flag, insert_key, insert_ptr); |
1557 | 1589 | ||
diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c index ed58d843d578..27399430664e 100644 --- a/fs/reiserfs/file.c +++ b/fs/reiserfs/file.c | |||
@@ -15,20 +15,20 @@ | |||
15 | #include <linux/quotaops.h> | 15 | #include <linux/quotaops.h> |
16 | 16 | ||
17 | /* | 17 | /* |
18 | ** We pack the tails of files on file close, not at the time they are written. | 18 | * We pack the tails of files on file close, not at the time they are written. |
19 | ** This implies an unnecessary copy of the tail and an unnecessary indirect item | 19 | * This implies an unnecessary copy of the tail and an unnecessary indirect item |
20 | ** insertion/balancing, for files that are written in one write. | 20 | * insertion/balancing, for files that are written in one write. |
21 | ** It avoids unnecessary tail packings (balances) for files that are written in | 21 | * It avoids unnecessary tail packings (balances) for files that are written in |
22 | ** multiple writes and are small enough to have tails. | 22 | * multiple writes and are small enough to have tails. |
23 | ** | 23 | * |
24 | ** file_release is called by the VFS layer when the file is closed. If | 24 | * file_release is called by the VFS layer when the file is closed. If |
25 | ** this is the last open file descriptor, and the file | 25 | * this is the last open file descriptor, and the file |
26 | ** small enough to have a tail, and the tail is currently in an | 26 | * small enough to have a tail, and the tail is currently in an |
27 | ** unformatted node, the tail is converted back into a direct item. | 27 | * unformatted node, the tail is converted back into a direct item. |
28 | ** | 28 | * |
29 | ** We use reiserfs_truncate_file to pack the tail, since it already has | 29 | * We use reiserfs_truncate_file to pack the tail, since it already has |
30 | ** all the conditions coded. | 30 | * all the conditions coded. |
31 | */ | 31 | */ |
32 | static int reiserfs_file_release(struct inode *inode, struct file *filp) | 32 | static int reiserfs_file_release(struct inode *inode, struct file *filp) |
33 | { | 33 | { |
34 | 34 | ||
@@ -57,14 +57,16 @@ static int reiserfs_file_release(struct inode *inode, struct file *filp) | |||
57 | } | 57 | } |
58 | 58 | ||
59 | reiserfs_write_lock(inode->i_sb); | 59 | reiserfs_write_lock(inode->i_sb); |
60 | /* freeing preallocation only involves relogging blocks that | 60 | /* |
61 | * freeing preallocation only involves relogging blocks that | ||
61 | * are already in the current transaction. preallocation gets | 62 | * are already in the current transaction. preallocation gets |
62 | * freed at the end of each transaction, so it is impossible for | 63 | * freed at the end of each transaction, so it is impossible for |
63 | * us to log any additional blocks (including quota blocks) | 64 | * us to log any additional blocks (including quota blocks) |
64 | */ | 65 | */ |
65 | err = journal_begin(&th, inode->i_sb, 1); | 66 | err = journal_begin(&th, inode->i_sb, 1); |
66 | if (err) { | 67 | if (err) { |
67 | /* uh oh, we can't allow the inode to go away while there | 68 | /* |
69 | * uh oh, we can't allow the inode to go away while there | ||
68 | * is still preallocation blocks pending. Try to join the | 70 | * is still preallocation blocks pending. Try to join the |
69 | * aborted transaction | 71 | * aborted transaction |
70 | */ | 72 | */ |
@@ -72,11 +74,13 @@ static int reiserfs_file_release(struct inode *inode, struct file *filp) | |||
72 | err = journal_join_abort(&th, inode->i_sb, 1); | 74 | err = journal_join_abort(&th, inode->i_sb, 1); |
73 | 75 | ||
74 | if (err) { | 76 | if (err) { |
75 | /* hmpf, our choices here aren't good. We can pin the inode | 77 | /* |
76 | * which will disallow unmount from every happening, we can | 78 | * hmpf, our choices here aren't good. We can pin |
77 | * do nothing, which will corrupt random memory on unmount, | 79 | * the inode which will disallow unmount from ever |
78 | * or we can forcibly remove the file from the preallocation | 80 | * happening, we can do nothing, which will corrupt |
79 | * list, which will leak blocks on disk. Lets pin the inode | 81 | * random memory on unmount, or we can forcibly |
82 | * remove the file from the preallocation list, which | ||
83 | * will leak blocks on disk. Lets pin the inode | ||
80 | * and let the admin know what is going on. | 84 | * and let the admin know what is going on. |
81 | */ | 85 | */ |
82 | igrab(inode); | 86 | igrab(inode); |
@@ -102,10 +106,12 @@ static int reiserfs_file_release(struct inode *inode, struct file *filp) | |||
102 | (REISERFS_I(inode)->i_flags & i_pack_on_close_mask) && | 106 | (REISERFS_I(inode)->i_flags & i_pack_on_close_mask) && |
103 | tail_has_to_be_packed(inode)) { | 107 | tail_has_to_be_packed(inode)) { |
104 | 108 | ||
105 | /* if regular file is released by last holder and it has been | 109 | /* |
106 | appended (we append by unformatted node only) or its direct | 110 | * if regular file is released by last holder and it has been |
107 | item(s) had to be converted, then it may have to be | 111 | * appended (we append by unformatted node only) or its direct |
108 | indirect2direct converted */ | 112 | * item(s) had to be converted, then it may have to be |
113 | * indirect2direct converted | ||
114 | */ | ||
109 | err = reiserfs_truncate_file(inode, 0); | 115 | err = reiserfs_truncate_file(inode, 0); |
110 | } | 116 | } |
111 | out: | 117 | out: |
@@ -117,8 +123,9 @@ static int reiserfs_file_release(struct inode *inode, struct file *filp) | |||
117 | static int reiserfs_file_open(struct inode *inode, struct file *file) | 123 | static int reiserfs_file_open(struct inode *inode, struct file *file) |
118 | { | 124 | { |
119 | int err = dquot_file_open(inode, file); | 125 | int err = dquot_file_open(inode, file); |
126 | |||
127 | /* somebody might be tailpacking on final close; wait for it */ | ||
120 | if (!atomic_inc_not_zero(&REISERFS_I(inode)->openers)) { | 128 | if (!atomic_inc_not_zero(&REISERFS_I(inode)->openers)) { |
121 | /* somebody might be tailpacking on final close; wait for it */ | ||
122 | mutex_lock(&(REISERFS_I(inode)->tailpack)); | 129 | mutex_lock(&(REISERFS_I(inode)->tailpack)); |
123 | atomic_inc(&REISERFS_I(inode)->openers); | 130 | atomic_inc(&REISERFS_I(inode)->openers); |
124 | mutex_unlock(&(REISERFS_I(inode)->tailpack)); | 131 | mutex_unlock(&(REISERFS_I(inode)->tailpack)); |
@@ -208,7 +215,8 @@ int reiserfs_commit_page(struct inode *inode, struct page *page, | |||
208 | journal_mark_dirty(&th, s, bh); | 215 | journal_mark_dirty(&th, s, bh); |
209 | } else if (!buffer_dirty(bh)) { | 216 | } else if (!buffer_dirty(bh)) { |
210 | mark_buffer_dirty(bh); | 217 | mark_buffer_dirty(bh); |
211 | /* do data=ordered on any page past the end | 218 | /* |
219 | * do data=ordered on any page past the end | ||
212 | * of file and any buffer marked BH_New. | 220 | * of file and any buffer marked BH_New. |
213 | */ | 221 | */ |
214 | if (reiserfs_data_ordered(inode->i_sb) && | 222 | if (reiserfs_data_ordered(inode->i_sb) && |
diff --git a/fs/reiserfs/fix_node.c b/fs/reiserfs/fix_node.c index b6a05a7f4658..144bd62c3e39 100644 --- a/fs/reiserfs/fix_node.c +++ b/fs/reiserfs/fix_node.c | |||
@@ -2,59 +2,32 @@ | |||
2 | * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README | 2 | * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README |
3 | */ | 3 | */ |
4 | 4 | ||
5 | /** | ||
6 | ** old_item_num | ||
7 | ** old_entry_num | ||
8 | ** set_entry_sizes | ||
9 | ** create_virtual_node | ||
10 | ** check_left | ||
11 | ** check_right | ||
12 | ** directory_part_size | ||
13 | ** get_num_ver | ||
14 | ** set_parameters | ||
15 | ** is_leaf_removable | ||
16 | ** are_leaves_removable | ||
17 | ** get_empty_nodes | ||
18 | ** get_lfree | ||
19 | ** get_rfree | ||
20 | ** is_left_neighbor_in_cache | ||
21 | ** decrement_key | ||
22 | ** get_far_parent | ||
23 | ** get_parents | ||
24 | ** can_node_be_removed | ||
25 | ** ip_check_balance | ||
26 | ** dc_check_balance_internal | ||
27 | ** dc_check_balance_leaf | ||
28 | ** dc_check_balance | ||
29 | ** check_balance | ||
30 | ** get_direct_parent | ||
31 | ** get_neighbors | ||
32 | ** fix_nodes | ||
33 | ** | ||
34 | ** | ||
35 | **/ | ||
36 | |||
37 | #include <linux/time.h> | 5 | #include <linux/time.h> |
38 | #include <linux/slab.h> | 6 | #include <linux/slab.h> |
39 | #include <linux/string.h> | 7 | #include <linux/string.h> |
40 | #include "reiserfs.h" | 8 | #include "reiserfs.h" |
41 | #include <linux/buffer_head.h> | 9 | #include <linux/buffer_head.h> |
42 | 10 | ||
43 | /* To make any changes in the tree we find a node, that contains item | 11 | /* |
44 | to be changed/deleted or position in the node we insert a new item | 12 | * To make any changes in the tree we find a node that contains item |
45 | to. We call this node S. To do balancing we need to decide what we | 13 | * to be changed/deleted or position in the node we insert a new item |
46 | will shift to left/right neighbor, or to a new node, where new item | 14 | * to. We call this node S. To do balancing we need to decide what we |
47 | will be etc. To make this analysis simpler we build virtual | 15 | * will shift to left/right neighbor, or to a new node, where new item |
48 | node. Virtual node is an array of items, that will replace items of | 16 | * will be etc. To make this analysis simpler we build virtual |
49 | node S. (For instance if we are going to delete an item, virtual | 17 | * node. Virtual node is an array of items, that will replace items of |
50 | node does not contain it). Virtual node keeps information about | 18 | * node S. (For instance if we are going to delete an item, virtual |
51 | item sizes and types, mergeability of first and last items, sizes | 19 | * node does not contain it). Virtual node keeps information about |
52 | of all entries in directory item. We use this array of items when | 20 | * item sizes and types, mergeability of first and last items, sizes |
53 | calculating what we can shift to neighbors and how many nodes we | 21 | * of all entries in directory item. We use this array of items when |
54 | have to have if we do not any shiftings, if we shift to left/right | 22 | * calculating what we can shift to neighbors and how many nodes we |
55 | neighbor or to both. */ | 23 | * have to have if we do not any shiftings, if we shift to left/right |
56 | 24 | * neighbor or to both. | |
57 | /* taking item number in virtual node, returns number of item, that it has in source buffer */ | 25 | */ |
26 | |||
27 | /* | ||
28 | * Takes item number in virtual node, returns number of item | ||
29 | * that it has in source buffer | ||
30 | */ | ||
58 | static inline int old_item_num(int new_num, int affected_item_num, int mode) | 31 | static inline int old_item_num(int new_num, int affected_item_num, int mode) |
59 | { | 32 | { |
60 | if (mode == M_PASTE || mode == M_CUT || new_num < affected_item_num) | 33 | if (mode == M_PASTE || mode == M_CUT || new_num < affected_item_num) |
@@ -112,7 +85,10 @@ static void create_virtual_node(struct tree_balance *tb, int h) | |||
112 | && (vn->vn_mode != M_DELETE || vn->vn_affected_item_num)) | 85 | && (vn->vn_mode != M_DELETE || vn->vn_affected_item_num)) |
113 | vn->vn_vi[0].vi_type |= VI_TYPE_LEFT_MERGEABLE; | 86 | vn->vn_vi[0].vi_type |= VI_TYPE_LEFT_MERGEABLE; |
114 | 87 | ||
115 | /* go through all items those remain in the virtual node (except for the new (inserted) one) */ | 88 | /* |
89 | * go through all items that remain in the virtual | ||
90 | * node (except for the new (inserted) one) | ||
91 | */ | ||
116 | for (new_num = 0; new_num < vn->vn_nr_item; new_num++) { | 92 | for (new_num = 0; new_num < vn->vn_nr_item; new_num++) { |
117 | int j; | 93 | int j; |
118 | struct virtual_item *vi = vn->vn_vi + new_num; | 94 | struct virtual_item *vi = vn->vn_vi + new_num; |
@@ -131,8 +107,10 @@ static void create_virtual_node(struct tree_balance *tb, int h) | |||
131 | vi->vi_item = ih_item_body(Sh, ih + j); | 107 | vi->vi_item = ih_item_body(Sh, ih + j); |
132 | vi->vi_uarea = vn->vn_free_ptr; | 108 | vi->vi_uarea = vn->vn_free_ptr; |
133 | 109 | ||
134 | // FIXME: there is no check, that item operation did not | 110 | /* |
135 | // consume too much memory | 111 | * FIXME: there is no check that item operation did not |
112 | * consume too much memory | ||
113 | */ | ||
136 | vn->vn_free_ptr += | 114 | vn->vn_free_ptr += |
137 | op_create_vi(vn, vi, is_affected, tb->insert_size[0]); | 115 | op_create_vi(vn, vi, is_affected, tb->insert_size[0]); |
138 | if (tb->vn_buf + tb->vn_buf_size < vn->vn_free_ptr) | 116 | if (tb->vn_buf + tb->vn_buf_size < vn->vn_free_ptr) |
@@ -145,7 +123,8 @@ static void create_virtual_node(struct tree_balance *tb, int h) | |||
145 | 123 | ||
146 | if (vn->vn_mode == M_PASTE || vn->vn_mode == M_CUT) { | 124 | if (vn->vn_mode == M_PASTE || vn->vn_mode == M_CUT) { |
147 | vn->vn_vi[new_num].vi_item_len += tb->insert_size[0]; | 125 | vn->vn_vi[new_num].vi_item_len += tb->insert_size[0]; |
148 | vi->vi_new_data = vn->vn_data; // pointer to data which is going to be pasted | 126 | /* pointer to data which is going to be pasted */ |
127 | vi->vi_new_data = vn->vn_data; | ||
149 | } | 128 | } |
150 | } | 129 | } |
151 | 130 | ||
@@ -164,7 +143,10 @@ static void create_virtual_node(struct tree_balance *tb, int h) | |||
164 | tb->insert_size[0]); | 143 | tb->insert_size[0]); |
165 | } | 144 | } |
166 | 145 | ||
167 | /* set right merge flag we take right delimiting key and check whether it is a mergeable item */ | 146 | /* |
147 | * set right merge flag we take right delimiting key and | ||
148 | * check whether it is a mergeable item | ||
149 | */ | ||
168 | if (tb->CFR[0]) { | 150 | if (tb->CFR[0]) { |
169 | struct reiserfs_key *key; | 151 | struct reiserfs_key *key; |
170 | 152 | ||
@@ -179,12 +161,19 @@ static void create_virtual_node(struct tree_balance *tb, int h) | |||
179 | if (op_is_left_mergeable(key, Sh->b_size) && | 161 | if (op_is_left_mergeable(key, Sh->b_size) && |
180 | !(vn->vn_mode != M_DELETE | 162 | !(vn->vn_mode != M_DELETE |
181 | || vn->vn_affected_item_num != B_NR_ITEMS(Sh) - 1)) { | 163 | || vn->vn_affected_item_num != B_NR_ITEMS(Sh) - 1)) { |
182 | /* we delete last item and it could be merged with right neighbor's first item */ | 164 | /* |
165 | * we delete last item and it could be merged | ||
166 | * with right neighbor's first item | ||
167 | */ | ||
183 | if (! | 168 | if (! |
184 | (B_NR_ITEMS(Sh) == 1 | 169 | (B_NR_ITEMS(Sh) == 1 |
185 | && is_direntry_le_ih(item_head(Sh, 0)) | 170 | && is_direntry_le_ih(item_head(Sh, 0)) |
186 | && ih_entry_count(item_head(Sh, 0)) == 1)) { | 171 | && ih_entry_count(item_head(Sh, 0)) == 1)) { |
187 | /* node contains more than 1 item, or item is not directory item, or this item contains more than 1 entry */ | 172 | /* |
173 | * node contains more than 1 item, or item | ||
174 | * is not directory item, or this item | ||
175 | * contains more than 1 entry | ||
176 | */ | ||
188 | print_block(Sh, 0, -1, -1); | 177 | print_block(Sh, 0, -1, -1); |
189 | reiserfs_panic(tb->tb_sb, "vs-8045", | 178 | reiserfs_panic(tb->tb_sb, "vs-8045", |
190 | "rdkey %k, affected item==%d " | 179 | "rdkey %k, affected item==%d " |
@@ -198,8 +187,10 @@ static void create_virtual_node(struct tree_balance *tb, int h) | |||
198 | } | 187 | } |
199 | } | 188 | } |
200 | 189 | ||
201 | /* using virtual node check, how many items can be shifted to left | 190 | /* |
202 | neighbor */ | 191 | * Using virtual node check, how many items can be |
192 | * shifted to left neighbor | ||
193 | */ | ||
203 | static void check_left(struct tree_balance *tb, int h, int cur_free) | 194 | static void check_left(struct tree_balance *tb, int h, int cur_free) |
204 | { | 195 | { |
205 | int i; | 196 | int i; |
@@ -259,9 +250,13 @@ static void check_left(struct tree_balance *tb, int h, int cur_free) | |||
259 | } | 250 | } |
260 | 251 | ||
261 | /* the item cannot be shifted entirely, try to split it */ | 252 | /* the item cannot be shifted entirely, try to split it */ |
262 | /* check whether L[0] can hold ih and at least one byte of the item body */ | 253 | /* |
254 | * check whether L[0] can hold ih and at least one byte | ||
255 | * of the item body | ||
256 | */ | ||
257 | |||
258 | /* cannot shift even a part of the current item */ | ||
263 | if (cur_free <= ih_size) { | 259 | if (cur_free <= ih_size) { |
264 | /* cannot shift even a part of the current item */ | ||
265 | tb->lbytes = -1; | 260 | tb->lbytes = -1; |
266 | return; | 261 | return; |
267 | } | 262 | } |
@@ -278,8 +273,10 @@ static void check_left(struct tree_balance *tb, int h, int cur_free) | |||
278 | return; | 273 | return; |
279 | } | 274 | } |
280 | 275 | ||
281 | /* using virtual node check, how many items can be shifted to right | 276 | /* |
282 | neighbor */ | 277 | * Using virtual node check, how many items can be |
278 | * shifted to right neighbor | ||
279 | */ | ||
283 | static void check_right(struct tree_balance *tb, int h, int cur_free) | 280 | static void check_right(struct tree_balance *tb, int h, int cur_free) |
284 | { | 281 | { |
285 | int i; | 282 | int i; |
@@ -338,13 +335,21 @@ static void check_right(struct tree_balance *tb, int h, int cur_free) | |||
338 | continue; | 335 | continue; |
339 | } | 336 | } |
340 | 337 | ||
341 | /* check whether R[0] can hold ih and at least one byte of the item body */ | 338 | /* |
342 | if (cur_free <= ih_size) { /* cannot shift even a part of the current item */ | 339 | * check whether R[0] can hold ih and at least one |
340 | * byte of the item body | ||
341 | */ | ||
342 | |||
343 | /* cannot shift even a part of the current item */ | ||
344 | if (cur_free <= ih_size) { | ||
343 | tb->rbytes = -1; | 345 | tb->rbytes = -1; |
344 | return; | 346 | return; |
345 | } | 347 | } |
346 | 348 | ||
347 | /* R[0] can hold the header of the item and at least one byte of its body */ | 349 | /* |
350 | * R[0] can hold the header of the item and at least | ||
351 | * one byte of its body | ||
352 | */ | ||
348 | cur_free -= ih_size; /* cur_free is still > 0 */ | 353 | cur_free -= ih_size; /* cur_free is still > 0 */ |
349 | 354 | ||
350 | tb->rbytes = op_check_right(vi, cur_free); | 355 | tb->rbytes = op_check_right(vi, cur_free); |
@@ -361,45 +366,64 @@ static void check_right(struct tree_balance *tb, int h, int cur_free) | |||
361 | /* | 366 | /* |
362 | * from - number of items, which are shifted to left neighbor entirely | 367 | * from - number of items, which are shifted to left neighbor entirely |
363 | * to - number of item, which are shifted to right neighbor entirely | 368 | * to - number of item, which are shifted to right neighbor entirely |
364 | * from_bytes - number of bytes of boundary item (or directory entries) which are shifted to left neighbor | 369 | * from_bytes - number of bytes of boundary item (or directory entries) |
365 | * to_bytes - number of bytes of boundary item (or directory entries) which are shifted to right neighbor */ | 370 | * which are shifted to left neighbor |
371 | * to_bytes - number of bytes of boundary item (or directory entries) | ||
372 | * which are shifted to right neighbor | ||
373 | */ | ||
366 | static int get_num_ver(int mode, struct tree_balance *tb, int h, | 374 | static int get_num_ver(int mode, struct tree_balance *tb, int h, |
367 | int from, int from_bytes, | 375 | int from, int from_bytes, |
368 | int to, int to_bytes, short *snum012, int flow) | 376 | int to, int to_bytes, short *snum012, int flow) |
369 | { | 377 | { |
370 | int i; | 378 | int i; |
371 | int cur_free; | 379 | int cur_free; |
372 | // int bytes; | ||
373 | int units; | 380 | int units; |
374 | struct virtual_node *vn = tb->tb_vn; | 381 | struct virtual_node *vn = tb->tb_vn; |
375 | // struct virtual_item * vi; | ||
376 | |||
377 | int total_node_size, max_node_size, current_item_size; | 382 | int total_node_size, max_node_size, current_item_size; |
378 | int needed_nodes; | 383 | int needed_nodes; |
379 | int start_item, /* position of item we start filling node from */ | 384 | |
380 | end_item, /* position of item we finish filling node by */ | 385 | /* position of item we start filling node from */ |
381 | start_bytes, /* number of first bytes (entries for directory) of start_item-th item | 386 | int start_item; |
382 | we do not include into node that is being filled */ | 387 | |
383 | end_bytes; /* number of last bytes (entries for directory) of end_item-th item | 388 | /* position of item we finish filling node by */ |
384 | we do node include into node that is being filled */ | 389 | int end_item; |
385 | int split_item_positions[2]; /* these are positions in virtual item of | 390 | |
386 | items, that are split between S[0] and | 391 | /* |
387 | S1new and S1new and S2new */ | 392 | * number of first bytes (entries for directory) of start_item-th item |
393 | * we do not include into node that is being filled | ||
394 | */ | ||
395 | int start_bytes; | ||
396 | |||
397 | /* | ||
398 | * number of last bytes (entries for directory) of end_item-th item | ||
399 | * we do node include into node that is being filled | ||
400 | */ | ||
401 | int end_bytes; | ||
402 | |||
403 | /* | ||
404 | * these are positions in virtual item of items, that are split | ||
405 | * between S[0] and S1new and S1new and S2new | ||
406 | */ | ||
407 | int split_item_positions[2]; | ||
388 | 408 | ||
389 | split_item_positions[0] = -1; | 409 | split_item_positions[0] = -1; |
390 | split_item_positions[1] = -1; | 410 | split_item_positions[1] = -1; |
391 | 411 | ||
392 | /* We only create additional nodes if we are in insert or paste mode | 412 | /* |
393 | or we are in replace mode at the internal level. If h is 0 and | 413 | * We only create additional nodes if we are in insert or paste mode |
394 | the mode is M_REPLACE then in fix_nodes we change the mode to | 414 | * or we are in replace mode at the internal level. If h is 0 and |
395 | paste or insert before we get here in the code. */ | 415 | * the mode is M_REPLACE then in fix_nodes we change the mode to |
416 | * paste or insert before we get here in the code. | ||
417 | */ | ||
396 | RFALSE(tb->insert_size[h] < 0 || (mode != M_INSERT && mode != M_PASTE), | 418 | RFALSE(tb->insert_size[h] < 0 || (mode != M_INSERT && mode != M_PASTE), |
397 | "vs-8100: insert_size < 0 in overflow"); | 419 | "vs-8100: insert_size < 0 in overflow"); |
398 | 420 | ||
399 | max_node_size = MAX_CHILD_SIZE(PATH_H_PBUFFER(tb->tb_path, h)); | 421 | max_node_size = MAX_CHILD_SIZE(PATH_H_PBUFFER(tb->tb_path, h)); |
400 | 422 | ||
401 | /* snum012 [0-2] - number of items, that lay | 423 | /* |
402 | to S[0], first new node and second new node */ | 424 | * snum012 [0-2] - number of items, that lay |
425 | * to S[0], first new node and second new node | ||
426 | */ | ||
403 | snum012[3] = -1; /* s1bytes */ | 427 | snum012[3] = -1; /* s1bytes */ |
404 | snum012[4] = -1; /* s2bytes */ | 428 | snum012[4] = -1; /* s2bytes */ |
405 | 429 | ||
@@ -416,20 +440,22 @@ static int get_num_ver(int mode, struct tree_balance *tb, int h, | |||
416 | total_node_size = 0; | 440 | total_node_size = 0; |
417 | cur_free = max_node_size; | 441 | cur_free = max_node_size; |
418 | 442 | ||
419 | // start from 'from'-th item | 443 | /* start from 'from'-th item */ |
420 | start_item = from; | 444 | start_item = from; |
421 | // skip its first 'start_bytes' units | 445 | /* skip its first 'start_bytes' units */ |
422 | start_bytes = ((from_bytes != -1) ? from_bytes : 0); | 446 | start_bytes = ((from_bytes != -1) ? from_bytes : 0); |
423 | 447 | ||
424 | // last included item is the 'end_item'-th one | 448 | /* last included item is the 'end_item'-th one */ |
425 | end_item = vn->vn_nr_item - to - 1; | 449 | end_item = vn->vn_nr_item - to - 1; |
426 | // do not count last 'end_bytes' units of 'end_item'-th item | 450 | /* do not count last 'end_bytes' units of 'end_item'-th item */ |
427 | end_bytes = (to_bytes != -1) ? to_bytes : 0; | 451 | end_bytes = (to_bytes != -1) ? to_bytes : 0; |
428 | 452 | ||
429 | /* go through all item beginning from the start_item-th item and ending by | 453 | /* |
430 | the end_item-th item. Do not count first 'start_bytes' units of | 454 | * go through all item beginning from the start_item-th item |
431 | 'start_item'-th item and last 'end_bytes' of 'end_item'-th item */ | 455 | * and ending by the end_item-th item. Do not count first |
432 | 456 | * 'start_bytes' units of 'start_item'-th item and last | |
457 | * 'end_bytes' of 'end_item'-th item | ||
458 | */ | ||
433 | for (i = start_item; i <= end_item; i++) { | 459 | for (i = start_item; i <= end_item; i++) { |
434 | struct virtual_item *vi = vn->vn_vi + i; | 460 | struct virtual_item *vi = vn->vn_vi + i; |
435 | int skip_from_end = ((i == end_item) ? end_bytes : 0); | 461 | int skip_from_end = ((i == end_item) ? end_bytes : 0); |
@@ -439,7 +465,10 @@ static int get_num_ver(int mode, struct tree_balance *tb, int h, | |||
439 | /* get size of current item */ | 465 | /* get size of current item */ |
440 | current_item_size = vi->vi_item_len; | 466 | current_item_size = vi->vi_item_len; |
441 | 467 | ||
442 | /* do not take in calculation head part (from_bytes) of from-th item */ | 468 | /* |
469 | * do not take in calculation head part (from_bytes) | ||
470 | * of from-th item | ||
471 | */ | ||
443 | current_item_size -= | 472 | current_item_size -= |
444 | op_part_size(vi, 0 /*from start */ , start_bytes); | 473 | op_part_size(vi, 0 /*from start */ , start_bytes); |
445 | 474 | ||
@@ -455,9 +484,11 @@ static int get_num_ver(int mode, struct tree_balance *tb, int h, | |||
455 | continue; | 484 | continue; |
456 | } | 485 | } |
457 | 486 | ||
487 | /* | ||
488 | * virtual item length is longer, than max size of item in | ||
489 | * a node. It is impossible for direct item | ||
490 | */ | ||
458 | if (current_item_size > max_node_size) { | 491 | if (current_item_size > max_node_size) { |
459 | /* virtual item length is longer, than max size of item in | ||
460 | a node. It is impossible for direct item */ | ||
461 | RFALSE(is_direct_le_ih(vi->vi_ih), | 492 | RFALSE(is_direct_le_ih(vi->vi_ih), |
462 | "vs-8110: " | 493 | "vs-8110: " |
463 | "direct item length is %d. It can not be longer than %d", | 494 | "direct item length is %d. It can not be longer than %d", |
@@ -466,15 +497,18 @@ static int get_num_ver(int mode, struct tree_balance *tb, int h, | |||
466 | flow = 1; | 497 | flow = 1; |
467 | } | 498 | } |
468 | 499 | ||
500 | /* as we do not split items, take new node and continue */ | ||
469 | if (!flow) { | 501 | if (!flow) { |
470 | /* as we do not split items, take new node and continue */ | ||
471 | needed_nodes++; | 502 | needed_nodes++; |
472 | i--; | 503 | i--; |
473 | total_node_size = 0; | 504 | total_node_size = 0; |
474 | continue; | 505 | continue; |
475 | } | 506 | } |
476 | // calculate number of item units which fit into node being | 507 | |
477 | // filled | 508 | /* |
509 | * calculate number of item units which fit into node being | ||
510 | * filled | ||
511 | */ | ||
478 | { | 512 | { |
479 | int free_space; | 513 | int free_space; |
480 | 514 | ||
@@ -482,17 +516,17 @@ static int get_num_ver(int mode, struct tree_balance *tb, int h, | |||
482 | units = | 516 | units = |
483 | op_check_left(vi, free_space, start_bytes, | 517 | op_check_left(vi, free_space, start_bytes, |
484 | skip_from_end); | 518 | skip_from_end); |
519 | /* | ||
520 | * nothing fits into current node, take new | ||
521 | * node and continue | ||
522 | */ | ||
485 | if (units == -1) { | 523 | if (units == -1) { |
486 | /* nothing fits into current node, take new node and continue */ | ||
487 | needed_nodes++, i--, total_node_size = 0; | 524 | needed_nodes++, i--, total_node_size = 0; |
488 | continue; | 525 | continue; |
489 | } | 526 | } |
490 | } | 527 | } |
491 | 528 | ||
492 | /* something fits into the current node */ | 529 | /* something fits into the current node */ |
493 | //if (snum012[3] != -1 || needed_nodes != 1) | ||
494 | // reiserfs_panic (tb->tb_sb, "vs-8115: get_num_ver: too many nodes required"); | ||
495 | //snum012[needed_nodes - 1 + 3] = op_unit_num (vi) - start_bytes - units; | ||
496 | start_bytes += units; | 530 | start_bytes += units; |
497 | snum012[needed_nodes - 1 + 3] = units; | 531 | snum012[needed_nodes - 1 + 3] = units; |
498 | 532 | ||
@@ -508,9 +542,11 @@ static int get_num_ver(int mode, struct tree_balance *tb, int h, | |||
508 | total_node_size = 0; | 542 | total_node_size = 0; |
509 | } | 543 | } |
510 | 544 | ||
511 | // sum012[4] (if it is not -1) contains number of units of which | 545 | /* |
512 | // are to be in S1new, snum012[3] - to be in S0. They are supposed | 546 | * sum012[4] (if it is not -1) contains number of units of which |
513 | // to be S1bytes and S2bytes correspondingly, so recalculate | 547 | * are to be in S1new, snum012[3] - to be in S0. They are supposed |
548 | * to be S1bytes and S2bytes correspondingly, so recalculate | ||
549 | */ | ||
514 | if (snum012[4] > 0) { | 550 | if (snum012[4] > 0) { |
515 | int split_item_num; | 551 | int split_item_num; |
516 | int bytes_to_r, bytes_to_l; | 552 | int bytes_to_r, bytes_to_l; |
@@ -527,7 +563,7 @@ static int get_num_ver(int mode, struct tree_balance *tb, int h, | |||
527 | ((split_item_positions[0] == | 563 | ((split_item_positions[0] == |
528 | split_item_positions[1]) ? snum012[3] : 0); | 564 | split_item_positions[1]) ? snum012[3] : 0); |
529 | 565 | ||
530 | // s2bytes | 566 | /* s2bytes */ |
531 | snum012[4] = | 567 | snum012[4] = |
532 | op_unit_num(&vn->vn_vi[split_item_num]) - snum012[4] - | 568 | op_unit_num(&vn->vn_vi[split_item_num]) - snum012[4] - |
533 | bytes_to_r - bytes_to_l - bytes_to_S1new; | 569 | bytes_to_r - bytes_to_l - bytes_to_S1new; |
@@ -555,7 +591,7 @@ static int get_num_ver(int mode, struct tree_balance *tb, int h, | |||
555 | ((split_item_positions[0] == split_item_positions[1] | 591 | ((split_item_positions[0] == split_item_positions[1] |
556 | && snum012[4] != -1) ? snum012[4] : 0); | 592 | && snum012[4] != -1) ? snum012[4] : 0); |
557 | 593 | ||
558 | // s1bytes | 594 | /* s1bytes */ |
559 | snum012[3] = | 595 | snum012[3] = |
560 | op_unit_num(&vn->vn_vi[split_item_num]) - snum012[3] - | 596 | op_unit_num(&vn->vn_vi[split_item_num]) - snum012[3] - |
561 | bytes_to_r - bytes_to_l - bytes_to_S2new; | 597 | bytes_to_r - bytes_to_l - bytes_to_S2new; |
@@ -565,7 +601,8 @@ static int get_num_ver(int mode, struct tree_balance *tb, int h, | |||
565 | } | 601 | } |
566 | 602 | ||
567 | 603 | ||
568 | /* Set parameters for balancing. | 604 | /* |
605 | * Set parameters for balancing. | ||
569 | * Performs write of results of analysis of balancing into structure tb, | 606 | * Performs write of results of analysis of balancing into structure tb, |
570 | * where it will later be used by the functions that actually do the balancing. | 607 | * where it will later be used by the functions that actually do the balancing. |
571 | * Parameters: | 608 | * Parameters: |
@@ -575,11 +612,12 @@ static int get_num_ver(int mode, struct tree_balance *tb, int h, | |||
575 | * rnum number of items from S[h] that must be shifted to R[h]; | 612 | * rnum number of items from S[h] that must be shifted to R[h]; |
576 | * blk_num number of blocks that S[h] will be splitted into; | 613 | * blk_num number of blocks that S[h] will be splitted into; |
577 | * s012 number of items that fall into splitted nodes. | 614 | * s012 number of items that fall into splitted nodes. |
578 | * lbytes number of bytes which flow to the left neighbor from the item that is not | 615 | * lbytes number of bytes which flow to the left neighbor from the |
579 | * not shifted entirely | 616 | * item that is not not shifted entirely |
580 | * rbytes number of bytes which flow to the right neighbor from the item that is not | 617 | * rbytes number of bytes which flow to the right neighbor from the |
581 | * not shifted entirely | 618 | * item that is not not shifted entirely |
582 | * s1bytes number of bytes which flow to the first new node when S[0] splits (this number is contained in s012 array) | 619 | * s1bytes number of bytes which flow to the first new node when |
620 | * S[0] splits (this number is contained in s012 array) | ||
583 | */ | 621 | */ |
584 | 622 | ||
585 | static void set_parameters(struct tree_balance *tb, int h, int lnum, | 623 | static void set_parameters(struct tree_balance *tb, int h, int lnum, |
@@ -590,7 +628,8 @@ static void set_parameters(struct tree_balance *tb, int h, int lnum, | |||
590 | tb->rnum[h] = rnum; | 628 | tb->rnum[h] = rnum; |
591 | tb->blknum[h] = blk_num; | 629 | tb->blknum[h] = blk_num; |
592 | 630 | ||
593 | if (h == 0) { /* only for leaf level */ | 631 | /* only for leaf level */ |
632 | if (h == 0) { | ||
594 | if (s012 != NULL) { | 633 | if (s012 != NULL) { |
595 | tb->s0num = *s012++, | 634 | tb->s0num = *s012++, |
596 | tb->s1num = *s012++, tb->s2num = *s012++; | 635 | tb->s1num = *s012++, tb->s2num = *s012++; |
@@ -607,8 +646,10 @@ static void set_parameters(struct tree_balance *tb, int h, int lnum, | |||
607 | PROC_INFO_ADD(tb->tb_sb, rbytes[h], rb); | 646 | PROC_INFO_ADD(tb->tb_sb, rbytes[h], rb); |
608 | } | 647 | } |
609 | 648 | ||
610 | /* check, does node disappear if we shift tb->lnum[0] items to left | 649 | /* |
611 | neighbor and tb->rnum[0] to the right one. */ | 650 | * check if node disappears if we shift tb->lnum[0] items to left |
651 | * neighbor and tb->rnum[0] to the right one. | ||
652 | */ | ||
612 | static int is_leaf_removable(struct tree_balance *tb) | 653 | static int is_leaf_removable(struct tree_balance *tb) |
613 | { | 654 | { |
614 | struct virtual_node *vn = tb->tb_vn; | 655 | struct virtual_node *vn = tb->tb_vn; |
@@ -616,8 +657,10 @@ static int is_leaf_removable(struct tree_balance *tb) | |||
616 | int size; | 657 | int size; |
617 | int remain_items; | 658 | int remain_items; |
618 | 659 | ||
619 | /* number of items, that will be shifted to left (right) neighbor | 660 | /* |
620 | entirely */ | 661 | * number of items that will be shifted to left (right) neighbor |
662 | * entirely | ||
663 | */ | ||
621 | to_left = tb->lnum[0] - ((tb->lbytes != -1) ? 1 : 0); | 664 | to_left = tb->lnum[0] - ((tb->lbytes != -1) ? 1 : 0); |
622 | to_right = tb->rnum[0] - ((tb->rbytes != -1) ? 1 : 0); | 665 | to_right = tb->rnum[0] - ((tb->rbytes != -1) ? 1 : 0); |
623 | remain_items = vn->vn_nr_item; | 666 | remain_items = vn->vn_nr_item; |
@@ -625,18 +668,18 @@ static int is_leaf_removable(struct tree_balance *tb) | |||
625 | /* how many items remain in S[0] after shiftings to neighbors */ | 668 | /* how many items remain in S[0] after shiftings to neighbors */ |
626 | remain_items -= (to_left + to_right); | 669 | remain_items -= (to_left + to_right); |
627 | 670 | ||
671 | /* all content of node can be shifted to neighbors */ | ||
628 | if (remain_items < 1) { | 672 | if (remain_items < 1) { |
629 | /* all content of node can be shifted to neighbors */ | ||
630 | set_parameters(tb, 0, to_left, vn->vn_nr_item - to_left, 0, | 673 | set_parameters(tb, 0, to_left, vn->vn_nr_item - to_left, 0, |
631 | NULL, -1, -1); | 674 | NULL, -1, -1); |
632 | return 1; | 675 | return 1; |
633 | } | 676 | } |
634 | 677 | ||
678 | /* S[0] is not removable */ | ||
635 | if (remain_items > 1 || tb->lbytes == -1 || tb->rbytes == -1) | 679 | if (remain_items > 1 || tb->lbytes == -1 || tb->rbytes == -1) |
636 | /* S[0] is not removable */ | ||
637 | return 0; | 680 | return 0; |
638 | 681 | ||
639 | /* check, whether we can divide 1 remaining item between neighbors */ | 682 | /* check whether we can divide 1 remaining item between neighbors */ |
640 | 683 | ||
641 | /* get size of remaining item (in item units) */ | 684 | /* get size of remaining item (in item units) */ |
642 | size = op_unit_num(&(vn->vn_vi[to_left])); | 685 | size = op_unit_num(&(vn->vn_vi[to_left])); |
@@ -680,18 +723,23 @@ static int are_leaves_removable(struct tree_balance *tb, int lfree, int rfree) | |||
680 | && !comp_short_le_keys(&(ih->ih_key), | 723 | && !comp_short_le_keys(&(ih->ih_key), |
681 | internal_key(tb->CFR[0], | 724 | internal_key(tb->CFR[0], |
682 | tb->rkey[0]))) | 725 | tb->rkey[0]))) |
726 | /* | ||
727 | * Directory must be in correct state here: that is | ||
728 | * somewhere at the left side should exist first | ||
729 | * directory item. But the item being deleted can | ||
730 | * not be that first one because its right neighbor | ||
731 | * is item of the same directory. (But first item | ||
732 | * always gets deleted in last turn). So, neighbors | ||
733 | * of deleted item can be merged, so we can save | ||
734 | * ih_size | ||
735 | */ | ||
683 | if (is_direntry_le_ih(ih)) { | 736 | if (is_direntry_le_ih(ih)) { |
684 | /* Directory must be in correct state here: that is | ||
685 | somewhere at the left side should exist first directory | ||
686 | item. But the item being deleted can not be that first | ||
687 | one because its right neighbor is item of the same | ||
688 | directory. (But first item always gets deleted in last | ||
689 | turn). So, neighbors of deleted item can be merged, so | ||
690 | we can save ih_size */ | ||
691 | ih_size = IH_SIZE; | 737 | ih_size = IH_SIZE; |
692 | 738 | ||
693 | /* we might check that left neighbor exists and is of the | 739 | /* |
694 | same directory */ | 740 | * we might check that left neighbor exists |
741 | * and is of the same directory | ||
742 | */ | ||
695 | RFALSE(le_ih_k_offset(ih) == DOT_OFFSET, | 743 | RFALSE(le_ih_k_offset(ih) == DOT_OFFSET, |
696 | "vs-8130: first directory item can not be removed until directory is not empty"); | 744 | "vs-8130: first directory item can not be removed until directory is not empty"); |
697 | } | 745 | } |
@@ -770,7 +818,8 @@ static void free_buffers_in_tb(struct tree_balance *tb) | |||
770 | } | 818 | } |
771 | } | 819 | } |
772 | 820 | ||
773 | /* Get new buffers for storing new nodes that are created while balancing. | 821 | /* |
822 | * Get new buffers for storing new nodes that are created while balancing. | ||
774 | * Returns: SCHEDULE_OCCURRED - schedule occurred while the function worked; | 823 | * Returns: SCHEDULE_OCCURRED - schedule occurred while the function worked; |
775 | * CARRY_ON - schedule didn't occur while the function worked; | 824 | * CARRY_ON - schedule didn't occur while the function worked; |
776 | * NO_DISK_SPACE - no disk space. | 825 | * NO_DISK_SPACE - no disk space. |
@@ -778,28 +827,33 @@ static void free_buffers_in_tb(struct tree_balance *tb) | |||
778 | /* The function is NOT SCHEDULE-SAFE! */ | 827 | /* The function is NOT SCHEDULE-SAFE! */ |
779 | static int get_empty_nodes(struct tree_balance *tb, int h) | 828 | static int get_empty_nodes(struct tree_balance *tb, int h) |
780 | { | 829 | { |
781 | struct buffer_head *new_bh, | 830 | struct buffer_head *new_bh, *Sh = PATH_H_PBUFFER(tb->tb_path, h); |
782 | *Sh = PATH_H_PBUFFER(tb->tb_path, h); | ||
783 | b_blocknr_t *blocknr, blocknrs[MAX_AMOUNT_NEEDED] = { 0, }; | 831 | b_blocknr_t *blocknr, blocknrs[MAX_AMOUNT_NEEDED] = { 0, }; |
784 | int counter, number_of_freeblk, amount_needed, /* number of needed empty blocks */ | 832 | int counter, number_of_freeblk; |
785 | retval = CARRY_ON; | 833 | int amount_needed; /* number of needed empty blocks */ |
834 | int retval = CARRY_ON; | ||
786 | struct super_block *sb = tb->tb_sb; | 835 | struct super_block *sb = tb->tb_sb; |
787 | 836 | ||
788 | /* number_of_freeblk is the number of empty blocks which have been | 837 | /* |
789 | acquired for use by the balancing algorithm minus the number of | 838 | * number_of_freeblk is the number of empty blocks which have been |
790 | empty blocks used in the previous levels of the analysis, | 839 | * acquired for use by the balancing algorithm minus the number of |
791 | number_of_freeblk = tb->cur_blknum can be non-zero if a schedule occurs | 840 | * empty blocks used in the previous levels of the analysis, |
792 | after empty blocks are acquired, and the balancing analysis is | 841 | * number_of_freeblk = tb->cur_blknum can be non-zero if a schedule |
793 | then restarted, amount_needed is the number needed by this level | 842 | * occurs after empty blocks are acquired, and the balancing analysis |
794 | (h) of the balancing analysis. | 843 | * is then restarted, amount_needed is the number needed by this |
795 | 844 | * level (h) of the balancing analysis. | |
796 | Note that for systems with many processes writing, it would be | 845 | * |
797 | more layout optimal to calculate the total number needed by all | 846 | * Note that for systems with many processes writing, it would be |
798 | levels and then to run reiserfs_new_blocks to get all of them at once. */ | 847 | * more layout optimal to calculate the total number needed by all |
799 | 848 | * levels and then to run reiserfs_new_blocks to get all of them at | |
800 | /* Initiate number_of_freeblk to the amount acquired prior to the restart of | 849 | * once. |
801 | the analysis or 0 if not restarted, then subtract the amount needed | 850 | */ |
802 | by all of the levels of the tree below h. */ | 851 | |
852 | /* | ||
853 | * Initiate number_of_freeblk to the amount acquired prior to the | ||
854 | * restart of the analysis or 0 if not restarted, then subtract the | ||
855 | * amount needed by all of the levels of the tree below h. | ||
856 | */ | ||
803 | /* blknum includes S[h], so we subtract 1 in this calculation */ | 857 | /* blknum includes S[h], so we subtract 1 in this calculation */ |
804 | for (counter = 0, number_of_freeblk = tb->cur_blknum; | 858 | for (counter = 0, number_of_freeblk = tb->cur_blknum; |
805 | counter < h; counter++) | 859 | counter < h; counter++) |
@@ -810,13 +864,19 @@ static int get_empty_nodes(struct tree_balance *tb, int h) | |||
810 | /* Allocate missing empty blocks. */ | 864 | /* Allocate missing empty blocks. */ |
811 | /* if Sh == 0 then we are getting a new root */ | 865 | /* if Sh == 0 then we are getting a new root */ |
812 | amount_needed = (Sh) ? (tb->blknum[h] - 1) : 1; | 866 | amount_needed = (Sh) ? (tb->blknum[h] - 1) : 1; |
813 | /* Amount_needed = the amount that we need more than the amount that we have. */ | 867 | /* |
868 | * Amount_needed = the amount that we need more than the | ||
869 | * amount that we have. | ||
870 | */ | ||
814 | if (amount_needed > number_of_freeblk) | 871 | if (amount_needed > number_of_freeblk) |
815 | amount_needed -= number_of_freeblk; | 872 | amount_needed -= number_of_freeblk; |
816 | else /* If we have enough already then there is nothing to do. */ | 873 | else /* If we have enough already then there is nothing to do. */ |
817 | return CARRY_ON; | 874 | return CARRY_ON; |
818 | 875 | ||
819 | /* No need to check quota - is not allocated for blocks used for formatted nodes */ | 876 | /* |
877 | * No need to check quota - is not allocated for blocks used | ||
878 | * for formatted nodes | ||
879 | */ | ||
820 | if (reiserfs_new_form_blocknrs(tb, blocknrs, | 880 | if (reiserfs_new_form_blocknrs(tb, blocknrs, |
821 | amount_needed) == NO_DISK_SPACE) | 881 | amount_needed) == NO_DISK_SPACE) |
822 | return NO_DISK_SPACE; | 882 | return NO_DISK_SPACE; |
@@ -849,8 +909,10 @@ static int get_empty_nodes(struct tree_balance *tb, int h) | |||
849 | return retval; | 909 | return retval; |
850 | } | 910 | } |
851 | 911 | ||
852 | /* Get free space of the left neighbor, which is stored in the parent | 912 | /* |
853 | * node of the left neighbor. */ | 913 | * Get free space of the left neighbor, which is stored in the parent |
914 | * node of the left neighbor. | ||
915 | */ | ||
854 | static int get_lfree(struct tree_balance *tb, int h) | 916 | static int get_lfree(struct tree_balance *tb, int h) |
855 | { | 917 | { |
856 | struct buffer_head *l, *f; | 918 | struct buffer_head *l, *f; |
@@ -870,7 +932,8 @@ static int get_lfree(struct tree_balance *tb, int h) | |||
870 | return (MAX_CHILD_SIZE(f) - dc_size(B_N_CHILD(f, order))); | 932 | return (MAX_CHILD_SIZE(f) - dc_size(B_N_CHILD(f, order))); |
871 | } | 933 | } |
872 | 934 | ||
873 | /* Get free space of the right neighbor, | 935 | /* |
936 | * Get free space of the right neighbor, | ||
874 | * which is stored in the parent node of the right neighbor. | 937 | * which is stored in the parent node of the right neighbor. |
875 | */ | 938 | */ |
876 | static int get_rfree(struct tree_balance *tb, int h) | 939 | static int get_rfree(struct tree_balance *tb, int h) |
@@ -916,7 +979,10 @@ static int is_left_neighbor_in_cache(struct tree_balance *tb, int h) | |||
916 | "vs-8165: F[h] (%b) or FL[h] (%b) is invalid", | 979 | "vs-8165: F[h] (%b) or FL[h] (%b) is invalid", |
917 | father, tb->FL[h]); | 980 | father, tb->FL[h]); |
918 | 981 | ||
919 | /* Get position of the pointer to the left neighbor into the left father. */ | 982 | /* |
983 | * Get position of the pointer to the left neighbor | ||
984 | * into the left father. | ||
985 | */ | ||
920 | left_neighbor_position = (father == tb->FL[h]) ? | 986 | left_neighbor_position = (father == tb->FL[h]) ? |
921 | tb->lkey[h] : B_NR_ITEMS(tb->FL[h]); | 987 | tb->lkey[h] : B_NR_ITEMS(tb->FL[h]); |
922 | /* Get left neighbor block number. */ | 988 | /* Get left neighbor block number. */ |
@@ -940,17 +1006,20 @@ static int is_left_neighbor_in_cache(struct tree_balance *tb, int h) | |||
940 | 1006 | ||
941 | static void decrement_key(struct cpu_key *key) | 1007 | static void decrement_key(struct cpu_key *key) |
942 | { | 1008 | { |
943 | // call item specific function for this key | 1009 | /* call item specific function for this key */ |
944 | item_ops[cpu_key_k_type(key)]->decrement_key(key); | 1010 | item_ops[cpu_key_k_type(key)]->decrement_key(key); |
945 | } | 1011 | } |
946 | 1012 | ||
947 | /* Calculate far left/right parent of the left/right neighbor of the current node, that | 1013 | /* |
948 | * is calculate the left/right (FL[h]/FR[h]) neighbor of the parent F[h]. | 1014 | * Calculate far left/right parent of the left/right neighbor of the |
1015 | * current node, that is calculate the left/right (FL[h]/FR[h]) neighbor | ||
1016 | * of the parent F[h]. | ||
949 | * Calculate left/right common parent of the current node and L[h]/R[h]. | 1017 | * Calculate left/right common parent of the current node and L[h]/R[h]. |
950 | * Calculate left/right delimiting key position. | 1018 | * Calculate left/right delimiting key position. |
951 | * Returns: PATH_INCORRECT - path in the tree is not correct; | 1019 | * Returns: PATH_INCORRECT - path in the tree is not correct |
952 | SCHEDULE_OCCURRED - schedule occurred while the function worked; | 1020 | * SCHEDULE_OCCURRED - schedule occurred while the function worked |
953 | * CARRY_ON - schedule didn't occur while the function worked; | 1021 | * CARRY_ON - schedule didn't occur while the function |
1022 | * worked | ||
954 | */ | 1023 | */ |
955 | static int get_far_parent(struct tree_balance *tb, | 1024 | static int get_far_parent(struct tree_balance *tb, |
956 | int h, | 1025 | int h, |
@@ -966,8 +1035,10 @@ static int get_far_parent(struct tree_balance *tb, | |||
966 | first_last_position = 0, | 1035 | first_last_position = 0, |
967 | path_offset = PATH_H_PATH_OFFSET(path, h); | 1036 | path_offset = PATH_H_PATH_OFFSET(path, h); |
968 | 1037 | ||
969 | /* Starting from F[h] go upwards in the tree, and look for the common | 1038 | /* |
970 | ancestor of F[h], and its neighbor l/r, that should be obtained. */ | 1039 | * Starting from F[h] go upwards in the tree, and look for the common |
1040 | * ancestor of F[h], and its neighbor l/r, that should be obtained. | ||
1041 | */ | ||
971 | 1042 | ||
972 | counter = path_offset; | 1043 | counter = path_offset; |
973 | 1044 | ||
@@ -975,21 +1046,33 @@ static int get_far_parent(struct tree_balance *tb, | |||
975 | "PAP-8180: invalid path length"); | 1046 | "PAP-8180: invalid path length"); |
976 | 1047 | ||
977 | for (; counter > FIRST_PATH_ELEMENT_OFFSET; counter--) { | 1048 | for (; counter > FIRST_PATH_ELEMENT_OFFSET; counter--) { |
978 | /* Check whether parent of the current buffer in the path is really parent in the tree. */ | 1049 | /* |
1050 | * Check whether parent of the current buffer in the path | ||
1051 | * is really parent in the tree. | ||
1052 | */ | ||
979 | if (!B_IS_IN_TREE | 1053 | if (!B_IS_IN_TREE |
980 | (parent = PATH_OFFSET_PBUFFER(path, counter - 1))) | 1054 | (parent = PATH_OFFSET_PBUFFER(path, counter - 1))) |
981 | return REPEAT_SEARCH; | 1055 | return REPEAT_SEARCH; |
1056 | |||
982 | /* Check whether position in the parent is correct. */ | 1057 | /* Check whether position in the parent is correct. */ |
983 | if ((position = | 1058 | if ((position = |
984 | PATH_OFFSET_POSITION(path, | 1059 | PATH_OFFSET_POSITION(path, |
985 | counter - 1)) > | 1060 | counter - 1)) > |
986 | B_NR_ITEMS(parent)) | 1061 | B_NR_ITEMS(parent)) |
987 | return REPEAT_SEARCH; | 1062 | return REPEAT_SEARCH; |
988 | /* Check whether parent at the path really points to the child. */ | 1063 | |
1064 | /* | ||
1065 | * Check whether parent at the path really points | ||
1066 | * to the child. | ||
1067 | */ | ||
989 | if (B_N_CHILD_NUM(parent, position) != | 1068 | if (B_N_CHILD_NUM(parent, position) != |
990 | PATH_OFFSET_PBUFFER(path, counter)->b_blocknr) | 1069 | PATH_OFFSET_PBUFFER(path, counter)->b_blocknr) |
991 | return REPEAT_SEARCH; | 1070 | return REPEAT_SEARCH; |
992 | /* Return delimiting key if position in the parent is not equal to first/last one. */ | 1071 | |
1072 | /* | ||
1073 | * Return delimiting key if position in the parent is not | ||
1074 | * equal to first/last one. | ||
1075 | */ | ||
993 | if (c_lr_par == RIGHT_PARENTS) | 1076 | if (c_lr_par == RIGHT_PARENTS) |
994 | first_last_position = B_NR_ITEMS(parent); | 1077 | first_last_position = B_NR_ITEMS(parent); |
995 | if (position != first_last_position) { | 1078 | if (position != first_last_position) { |
@@ -1002,7 +1085,10 @@ static int get_far_parent(struct tree_balance *tb, | |||
1002 | 1085 | ||
1003 | /* if we are in the root of the tree, then there is no common father */ | 1086 | /* if we are in the root of the tree, then there is no common father */ |
1004 | if (counter == FIRST_PATH_ELEMENT_OFFSET) { | 1087 | if (counter == FIRST_PATH_ELEMENT_OFFSET) { |
1005 | /* Check whether first buffer in the path is the root of the tree. */ | 1088 | /* |
1089 | * Check whether first buffer in the path is the | ||
1090 | * root of the tree. | ||
1091 | */ | ||
1006 | if (PATH_OFFSET_PBUFFER | 1092 | if (PATH_OFFSET_PBUFFER |
1007 | (tb->tb_path, | 1093 | (tb->tb_path, |
1008 | FIRST_PATH_ELEMENT_OFFSET)->b_blocknr == | 1094 | FIRST_PATH_ELEMENT_OFFSET)->b_blocknr == |
@@ -1031,8 +1117,11 @@ static int get_far_parent(struct tree_balance *tb, | |||
1031 | } | 1117 | } |
1032 | } | 1118 | } |
1033 | 1119 | ||
1034 | /* So, we got common parent of the current node and its left/right neighbor. | 1120 | /* |
1035 | Now we are geting the parent of the left/right neighbor. */ | 1121 | * So, we got common parent of the current node and its |
1122 | * left/right neighbor. Now we are getting the parent of the | ||
1123 | * left/right neighbor. | ||
1124 | */ | ||
1036 | 1125 | ||
1037 | /* Form key to get parent of the left/right neighbor. */ | 1126 | /* Form key to get parent of the left/right neighbor. */ |
1038 | le_key2cpu_key(&s_lr_father_key, | 1127 | le_key2cpu_key(&s_lr_father_key, |
@@ -1050,7 +1139,7 @@ static int get_far_parent(struct tree_balance *tb, | |||
1050 | if (search_by_key | 1139 | if (search_by_key |
1051 | (tb->tb_sb, &s_lr_father_key, &s_path_to_neighbor_father, | 1140 | (tb->tb_sb, &s_lr_father_key, &s_path_to_neighbor_father, |
1052 | h + 1) == IO_ERROR) | 1141 | h + 1) == IO_ERROR) |
1053 | // path is released | 1142 | /* path is released */ |
1054 | return IO_ERROR; | 1143 | return IO_ERROR; |
1055 | 1144 | ||
1056 | if (FILESYSTEM_CHANGED_TB(tb)) { | 1145 | if (FILESYSTEM_CHANGED_TB(tb)) { |
@@ -1071,12 +1160,15 @@ static int get_far_parent(struct tree_balance *tb, | |||
1071 | return CARRY_ON; | 1160 | return CARRY_ON; |
1072 | } | 1161 | } |
1073 | 1162 | ||
1074 | /* Get parents of neighbors of node in the path(S[path_offset]) and common parents of | 1163 | /* |
1075 | * S[path_offset] and L[path_offset]/R[path_offset]: F[path_offset], FL[path_offset], | 1164 | * Get parents of neighbors of node in the path(S[path_offset]) and |
1076 | * FR[path_offset], CFL[path_offset], CFR[path_offset]. | 1165 | * common parents of S[path_offset] and L[path_offset]/R[path_offset]: |
1077 | * Calculate numbers of left and right delimiting keys position: lkey[path_offset], rkey[path_offset]. | 1166 | * F[path_offset], FL[path_offset], FR[path_offset], CFL[path_offset], |
1078 | * Returns: SCHEDULE_OCCURRED - schedule occurred while the function worked; | 1167 | * CFR[path_offset]. |
1079 | * CARRY_ON - schedule didn't occur while the function worked; | 1168 | * Calculate numbers of left and right delimiting keys position: |
1169 | * lkey[path_offset], rkey[path_offset]. | ||
1170 | * Returns: SCHEDULE_OCCURRED - schedule occurred while the function worked | ||
1171 | * CARRY_ON - schedule didn't occur while the function worked | ||
1080 | */ | 1172 | */ |
1081 | static int get_parents(struct tree_balance *tb, int h) | 1173 | static int get_parents(struct tree_balance *tb, int h) |
1082 | { | 1174 | { |
@@ -1088,8 +1180,11 @@ static int get_parents(struct tree_balance *tb, int h) | |||
1088 | 1180 | ||
1089 | /* Current node is the root of the tree or will be root of the tree */ | 1181 | /* Current node is the root of the tree or will be root of the tree */ |
1090 | if (path_offset <= FIRST_PATH_ELEMENT_OFFSET) { | 1182 | if (path_offset <= FIRST_PATH_ELEMENT_OFFSET) { |
1091 | /* The root can not have parents. | 1183 | /* |
1092 | Release nodes which previously were obtained as parents of the current node neighbors. */ | 1184 | * The root can not have parents. |
1185 | * Release nodes which previously were obtained as | ||
1186 | * parents of the current node neighbors. | ||
1187 | */ | ||
1093 | brelse(tb->FL[h]); | 1188 | brelse(tb->FL[h]); |
1094 | brelse(tb->CFL[h]); | 1189 | brelse(tb->CFL[h]); |
1095 | brelse(tb->FR[h]); | 1190 | brelse(tb->FR[h]); |
@@ -1111,10 +1206,14 @@ static int get_parents(struct tree_balance *tb, int h) | |||
1111 | get_bh(curf); | 1206 | get_bh(curf); |
1112 | tb->lkey[h] = position - 1; | 1207 | tb->lkey[h] = position - 1; |
1113 | } else { | 1208 | } else { |
1114 | /* Calculate current parent of L[path_offset], which is the left neighbor of the current node. | 1209 | /* |
1115 | Calculate current common parent of L[path_offset] and the current node. Note that | 1210 | * Calculate current parent of L[path_offset], which is the |
1116 | CFL[path_offset] not equal FL[path_offset] and CFL[path_offset] not equal F[path_offset]. | 1211 | * left neighbor of the current node. Calculate current |
1117 | Calculate lkey[path_offset]. */ | 1212 | * common parent of L[path_offset] and the current node. |
1213 | * Note that CFL[path_offset] not equal FL[path_offset] and | ||
1214 | * CFL[path_offset] not equal F[path_offset]. | ||
1215 | * Calculate lkey[path_offset]. | ||
1216 | */ | ||
1118 | if ((ret = get_far_parent(tb, h + 1, &curf, | 1217 | if ((ret = get_far_parent(tb, h + 1, &curf, |
1119 | &curcf, | 1218 | &curcf, |
1120 | LEFT_PARENTS)) != CARRY_ON) | 1219 | LEFT_PARENTS)) != CARRY_ON) |
@@ -1130,19 +1229,22 @@ static int get_parents(struct tree_balance *tb, int h) | |||
1130 | (curcf && !B_IS_IN_TREE(curcf)), | 1229 | (curcf && !B_IS_IN_TREE(curcf)), |
1131 | "PAP-8195: FL (%b) or CFL (%b) is invalid", curf, curcf); | 1230 | "PAP-8195: FL (%b) or CFL (%b) is invalid", curf, curcf); |
1132 | 1231 | ||
1133 | /* Get parent FR[h] of R[h]. */ | 1232 | /* Get parent FR[h] of R[h]. */ |
1134 | 1233 | ||
1135 | /* Current node is the last child of F[h]. FR[h] != F[h]. */ | 1234 | /* Current node is the last child of F[h]. FR[h] != F[h]. */ |
1136 | if (position == B_NR_ITEMS(PATH_H_PBUFFER(path, h + 1))) { | 1235 | if (position == B_NR_ITEMS(PATH_H_PBUFFER(path, h + 1))) { |
1137 | /* Calculate current parent of R[h], which is the right neighbor of F[h]. | 1236 | /* |
1138 | Calculate current common parent of R[h] and current node. Note that CFR[h] | 1237 | * Calculate current parent of R[h], which is the right |
1139 | not equal FR[path_offset] and CFR[h] not equal F[h]. */ | 1238 | * neighbor of F[h]. Calculate current common parent of |
1239 | * R[h] and current node. Note that CFR[h] not equal | ||
1240 | * FR[path_offset] and CFR[h] not equal F[h]. | ||
1241 | */ | ||
1140 | if ((ret = | 1242 | if ((ret = |
1141 | get_far_parent(tb, h + 1, &curf, &curcf, | 1243 | get_far_parent(tb, h + 1, &curf, &curcf, |
1142 | RIGHT_PARENTS)) != CARRY_ON) | 1244 | RIGHT_PARENTS)) != CARRY_ON) |
1143 | return ret; | 1245 | return ret; |
1144 | } else { | 1246 | } else { |
1145 | /* Current node is not the last child of its parent F[h]. */ | 1247 | /* Current node is not the last child of its parent F[h]. */ |
1146 | curf = PATH_OFFSET_PBUFFER(path, path_offset - 1); | 1248 | curf = PATH_OFFSET_PBUFFER(path, path_offset - 1); |
1147 | curcf = PATH_OFFSET_PBUFFER(path, path_offset - 1); | 1249 | curcf = PATH_OFFSET_PBUFFER(path, path_offset - 1); |
1148 | get_bh(curf); | 1250 | get_bh(curf); |
@@ -1165,8 +1267,10 @@ static int get_parents(struct tree_balance *tb, int h) | |||
1165 | return CARRY_ON; | 1267 | return CARRY_ON; |
1166 | } | 1268 | } |
1167 | 1269 | ||
1168 | /* it is possible to remove node as result of shiftings to | 1270 | /* |
1169 | neighbors even when we insert or paste item. */ | 1271 | * it is possible to remove node as result of shiftings to |
1272 | * neighbors even when we insert or paste item. | ||
1273 | */ | ||
1170 | static inline int can_node_be_removed(int mode, int lfree, int sfree, int rfree, | 1274 | static inline int can_node_be_removed(int mode, int lfree, int sfree, int rfree, |
1171 | struct tree_balance *tb, int h) | 1275 | struct tree_balance *tb, int h) |
1172 | { | 1276 | { |
@@ -1189,7 +1293,8 @@ static inline int can_node_be_removed(int mode, int lfree, int sfree, int rfree, | |||
1189 | && op_is_left_mergeable(r_key, Sh->b_size)) ? IH_SIZE : 0) | 1293 | && op_is_left_mergeable(r_key, Sh->b_size)) ? IH_SIZE : 0) |
1190 | + ((h) ? KEY_SIZE : 0)) { | 1294 | + ((h) ? KEY_SIZE : 0)) { |
1191 | /* node can not be removed */ | 1295 | /* node can not be removed */ |
1192 | if (sfree >= levbytes) { /* new item fits into node S[h] without any shifting */ | 1296 | if (sfree >= levbytes) { |
1297 | /* new item fits into node S[h] without any shifting */ | ||
1193 | if (!h) | 1298 | if (!h) |
1194 | tb->s0num = | 1299 | tb->s0num = |
1195 | B_NR_ITEMS(Sh) + | 1300 | B_NR_ITEMS(Sh) + |
@@ -1202,7 +1307,8 @@ static inline int can_node_be_removed(int mode, int lfree, int sfree, int rfree, | |||
1202 | return !NO_BALANCING_NEEDED; | 1307 | return !NO_BALANCING_NEEDED; |
1203 | } | 1308 | } |
1204 | 1309 | ||
1205 | /* Check whether current node S[h] is balanced when increasing its size by | 1310 | /* |
1311 | * Check whether current node S[h] is balanced when increasing its size by | ||
1206 | * Inserting or Pasting. | 1312 | * Inserting or Pasting. |
1207 | * Calculate parameters for balancing for current level h. | 1313 | * Calculate parameters for balancing for current level h. |
1208 | * Parameters: | 1314 | * Parameters: |
@@ -1219,39 +1325,48 @@ static inline int can_node_be_removed(int mode, int lfree, int sfree, int rfree, | |||
1219 | static int ip_check_balance(struct tree_balance *tb, int h) | 1325 | static int ip_check_balance(struct tree_balance *tb, int h) |
1220 | { | 1326 | { |
1221 | struct virtual_node *vn = tb->tb_vn; | 1327 | struct virtual_node *vn = tb->tb_vn; |
1222 | int levbytes, /* Number of bytes that must be inserted into (value | 1328 | /* |
1223 | is negative if bytes are deleted) buffer which | 1329 | * Number of bytes that must be inserted into (value is negative |
1224 | contains node being balanced. The mnemonic is | 1330 | * if bytes are deleted) buffer which contains node being balanced. |
1225 | that the attempted change in node space used level | 1331 | * The mnemonic is that the attempted change in node space used |
1226 | is levbytes bytes. */ | 1332 | * level is levbytes bytes. |
1227 | ret; | 1333 | */ |
1334 | int levbytes; | ||
1335 | int ret; | ||
1228 | 1336 | ||
1229 | int lfree, sfree, rfree /* free space in L, S and R */ ; | 1337 | int lfree, sfree, rfree /* free space in L, S and R */ ; |
1230 | 1338 | ||
1231 | /* nver is short for number of vertixes, and lnver is the number if | 1339 | /* |
1232 | we shift to the left, rnver is the number if we shift to the | 1340 | * nver is short for number of vertixes, and lnver is the number if |
1233 | right, and lrnver is the number if we shift in both directions. | 1341 | * we shift to the left, rnver is the number if we shift to the |
1234 | The goal is to minimize first the number of vertixes, and second, | 1342 | * right, and lrnver is the number if we shift in both directions. |
1235 | the number of vertixes whose contents are changed by shifting, | 1343 | * The goal is to minimize first the number of vertixes, and second, |
1236 | and third the number of uncached vertixes whose contents are | 1344 | * the number of vertixes whose contents are changed by shifting, |
1237 | changed by shifting and must be read from disk. */ | 1345 | * and third the number of uncached vertixes whose contents are |
1346 | * changed by shifting and must be read from disk. | ||
1347 | */ | ||
1238 | int nver, lnver, rnver, lrnver; | 1348 | int nver, lnver, rnver, lrnver; |
1239 | 1349 | ||
1240 | /* used at leaf level only, S0 = S[0] is the node being balanced, | 1350 | /* |
1241 | sInum [ I = 0,1,2 ] is the number of items that will | 1351 | * used at leaf level only, S0 = S[0] is the node being balanced, |
1242 | remain in node SI after balancing. S1 and S2 are new | 1352 | * sInum [ I = 0,1,2 ] is the number of items that will |
1243 | nodes that might be created. */ | 1353 | * remain in node SI after balancing. S1 and S2 are new |
1354 | * nodes that might be created. | ||
1355 | */ | ||
1244 | 1356 | ||
1245 | /* we perform 8 calls to get_num_ver(). For each call we calculate five parameters. | 1357 | /* |
1246 | where 4th parameter is s1bytes and 5th - s2bytes | 1358 | * we perform 8 calls to get_num_ver(). For each call we |
1359 | * calculate five parameters. where 4th parameter is s1bytes | ||
1360 | * and 5th - s2bytes | ||
1361 | * | ||
1362 | * s0num, s1num, s2num for 8 cases | ||
1363 | * 0,1 - do not shift and do not shift but bottle | ||
1364 | * 2 - shift only whole item to left | ||
1365 | * 3 - shift to left and bottle as much as possible | ||
1366 | * 4,5 - shift to right (whole items and as much as possible | ||
1367 | * 6,7 - shift to both directions (whole items and as much as possible) | ||
1247 | */ | 1368 | */ |
1248 | short snum012[40] = { 0, }; /* s0num, s1num, s2num for 8 cases | 1369 | short snum012[40] = { 0, }; |
1249 | 0,1 - do not shift and do not shift but bottle | ||
1250 | 2 - shift only whole item to left | ||
1251 | 3 - shift to left and bottle as much as possible | ||
1252 | 4,5 - shift to right (whole items and as much as possible | ||
1253 | 6,7 - shift to both directions (whole items and as much as possible) | ||
1254 | */ | ||
1255 | 1370 | ||
1256 | /* Sh is the node whose balance is currently being checked */ | 1371 | /* Sh is the node whose balance is currently being checked */ |
1257 | struct buffer_head *Sh; | 1372 | struct buffer_head *Sh; |
@@ -1265,9 +1380,10 @@ static int ip_check_balance(struct tree_balance *tb, int h) | |||
1265 | reiserfs_panic(tb->tb_sb, "vs-8210", | 1380 | reiserfs_panic(tb->tb_sb, "vs-8210", |
1266 | "S[0] can not be 0"); | 1381 | "S[0] can not be 0"); |
1267 | switch (ret = get_empty_nodes(tb, h)) { | 1382 | switch (ret = get_empty_nodes(tb, h)) { |
1383 | /* no balancing for higher levels needed */ | ||
1268 | case CARRY_ON: | 1384 | case CARRY_ON: |
1269 | set_parameters(tb, h, 0, 0, 1, NULL, -1, -1); | 1385 | set_parameters(tb, h, 0, 0, 1, NULL, -1, -1); |
1270 | return NO_BALANCING_NEEDED; /* no balancing for higher levels needed */ | 1386 | return NO_BALANCING_NEEDED; |
1271 | 1387 | ||
1272 | case NO_DISK_SPACE: | 1388 | case NO_DISK_SPACE: |
1273 | case REPEAT_SEARCH: | 1389 | case REPEAT_SEARCH: |
@@ -1278,7 +1394,9 @@ static int ip_check_balance(struct tree_balance *tb, int h) | |||
1278 | } | 1394 | } |
1279 | } | 1395 | } |
1280 | 1396 | ||
1281 | if ((ret = get_parents(tb, h)) != CARRY_ON) /* get parents of S[h] neighbors. */ | 1397 | /* get parents of S[h] neighbors. */ |
1398 | ret = get_parents(tb, h); | ||
1399 | if (ret != CARRY_ON) | ||
1282 | return ret; | 1400 | return ret; |
1283 | 1401 | ||
1284 | sfree = B_FREE_SPACE(Sh); | 1402 | sfree = B_FREE_SPACE(Sh); |
@@ -1287,38 +1405,44 @@ static int ip_check_balance(struct tree_balance *tb, int h) | |||
1287 | rfree = get_rfree(tb, h); | 1405 | rfree = get_rfree(tb, h); |
1288 | lfree = get_lfree(tb, h); | 1406 | lfree = get_lfree(tb, h); |
1289 | 1407 | ||
1408 | /* and new item fits into node S[h] without any shifting */ | ||
1290 | if (can_node_be_removed(vn->vn_mode, lfree, sfree, rfree, tb, h) == | 1409 | if (can_node_be_removed(vn->vn_mode, lfree, sfree, rfree, tb, h) == |
1291 | NO_BALANCING_NEEDED) | 1410 | NO_BALANCING_NEEDED) |
1292 | /* and new item fits into node S[h] without any shifting */ | ||
1293 | return NO_BALANCING_NEEDED; | 1411 | return NO_BALANCING_NEEDED; |
1294 | 1412 | ||
1295 | create_virtual_node(tb, h); | 1413 | create_virtual_node(tb, h); |
1296 | 1414 | ||
1297 | /* | 1415 | /* |
1298 | determine maximal number of items we can shift to the left neighbor (in tb structure) | 1416 | * determine maximal number of items we can shift to the left |
1299 | and the maximal number of bytes that can flow to the left neighbor | 1417 | * neighbor (in tb structure) and the maximal number of bytes |
1300 | from the left most liquid item that cannot be shifted from S[0] entirely (returned value) | 1418 | * that can flow to the left neighbor from the left most liquid |
1419 | * item that cannot be shifted from S[0] entirely (returned value) | ||
1301 | */ | 1420 | */ |
1302 | check_left(tb, h, lfree); | 1421 | check_left(tb, h, lfree); |
1303 | 1422 | ||
1304 | /* | 1423 | /* |
1305 | determine maximal number of items we can shift to the right neighbor (in tb structure) | 1424 | * determine maximal number of items we can shift to the right |
1306 | and the maximal number of bytes that can flow to the right neighbor | 1425 | * neighbor (in tb structure) and the maximal number of bytes |
1307 | from the right most liquid item that cannot be shifted from S[0] entirely (returned value) | 1426 | * that can flow to the right neighbor from the right most liquid |
1427 | * item that cannot be shifted from S[0] entirely (returned value) | ||
1308 | */ | 1428 | */ |
1309 | check_right(tb, h, rfree); | 1429 | check_right(tb, h, rfree); |
1310 | 1430 | ||
1311 | /* all contents of internal node S[h] can be moved into its | 1431 | /* |
1312 | neighbors, S[h] will be removed after balancing */ | 1432 | * all contents of internal node S[h] can be moved into its |
1433 | * neighbors, S[h] will be removed after balancing | ||
1434 | */ | ||
1313 | if (h && (tb->rnum[h] + tb->lnum[h] >= vn->vn_nr_item + 1)) { | 1435 | if (h && (tb->rnum[h] + tb->lnum[h] >= vn->vn_nr_item + 1)) { |
1314 | int to_r; | 1436 | int to_r; |
1315 | 1437 | ||
1316 | /* Since we are working on internal nodes, and our internal | 1438 | /* |
1317 | nodes have fixed size entries, then we can balance by the | 1439 | * Since we are working on internal nodes, and our internal |
1318 | number of items rather than the space they consume. In this | 1440 | * nodes have fixed size entries, then we can balance by the |
1319 | routine we set the left node equal to the right node, | 1441 | * number of items rather than the space they consume. In this |
1320 | allowing a difference of less than or equal to 1 child | 1442 | * routine we set the left node equal to the right node, |
1321 | pointer. */ | 1443 | * allowing a difference of less than or equal to 1 child |
1444 | * pointer. | ||
1445 | */ | ||
1322 | to_r = | 1446 | to_r = |
1323 | ((MAX_NR_KEY(Sh) << 1) + 2 - tb->lnum[h] - tb->rnum[h] + | 1447 | ((MAX_NR_KEY(Sh) << 1) + 2 - tb->lnum[h] - tb->rnum[h] + |
1324 | vn->vn_nr_item + 1) / 2 - (MAX_NR_KEY(Sh) + 1 - | 1448 | vn->vn_nr_item + 1) / 2 - (MAX_NR_KEY(Sh) + 1 - |
@@ -1328,7 +1452,10 @@ static int ip_check_balance(struct tree_balance *tb, int h) | |||
1328 | return CARRY_ON; | 1452 | return CARRY_ON; |
1329 | } | 1453 | } |
1330 | 1454 | ||
1331 | /* this checks balance condition, that any two neighboring nodes can not fit in one node */ | 1455 | /* |
1456 | * this checks balance condition, that any two neighboring nodes | ||
1457 | * can not fit in one node | ||
1458 | */ | ||
1332 | RFALSE(h && | 1459 | RFALSE(h && |
1333 | (tb->lnum[h] >= vn->vn_nr_item + 1 || | 1460 | (tb->lnum[h] >= vn->vn_nr_item + 1 || |
1334 | tb->rnum[h] >= vn->vn_nr_item + 1), | 1461 | tb->rnum[h] >= vn->vn_nr_item + 1), |
@@ -1337,16 +1464,22 @@ static int ip_check_balance(struct tree_balance *tb, int h) | |||
1337 | (tb->rnum[h] >= vn->vn_nr_item && (tb->rbytes == -1))), | 1464 | (tb->rnum[h] >= vn->vn_nr_item && (tb->rbytes == -1))), |
1338 | "vs-8225: tree is not balanced on leaf level"); | 1465 | "vs-8225: tree is not balanced on leaf level"); |
1339 | 1466 | ||
1340 | /* all contents of S[0] can be moved into its neighbors | 1467 | /* |
1341 | S[0] will be removed after balancing. */ | 1468 | * all contents of S[0] can be moved into its neighbors |
1469 | * S[0] will be removed after balancing. | ||
1470 | */ | ||
1342 | if (!h && is_leaf_removable(tb)) | 1471 | if (!h && is_leaf_removable(tb)) |
1343 | return CARRY_ON; | 1472 | return CARRY_ON; |
1344 | 1473 | ||
1345 | /* why do we perform this check here rather than earlier?? | 1474 | /* |
1346 | Answer: we can win 1 node in some cases above. Moreover we | 1475 | * why do we perform this check here rather than earlier?? |
1347 | checked it above, when we checked, that S[0] is not removable | 1476 | * Answer: we can win 1 node in some cases above. Moreover we |
1348 | in principle */ | 1477 | * checked it above, when we checked, that S[0] is not removable |
1349 | if (sfree >= levbytes) { /* new item fits into node S[h] without any shifting */ | 1478 | * in principle |
1479 | */ | ||
1480 | |||
1481 | /* new item fits into node S[h] without any shifting */ | ||
1482 | if (sfree >= levbytes) { | ||
1350 | if (!h) | 1483 | if (!h) |
1351 | tb->s0num = vn->vn_nr_item; | 1484 | tb->s0num = vn->vn_nr_item; |
1352 | set_parameters(tb, h, 0, 0, 1, NULL, -1, -1); | 1485 | set_parameters(tb, h, 0, 0, 1, NULL, -1, -1); |
@@ -1355,18 +1488,19 @@ static int ip_check_balance(struct tree_balance *tb, int h) | |||
1355 | 1488 | ||
1356 | { | 1489 | { |
1357 | int lpar, rpar, nset, lset, rset, lrset; | 1490 | int lpar, rpar, nset, lset, rset, lrset; |
1358 | /* | 1491 | /* regular overflowing of the node */ |
1359 | * regular overflowing of the node | ||
1360 | */ | ||
1361 | 1492 | ||
1362 | /* get_num_ver works in 2 modes (FLOW & NO_FLOW) | 1493 | /* |
1363 | lpar, rpar - number of items we can shift to left/right neighbor (including splitting item) | 1494 | * get_num_ver works in 2 modes (FLOW & NO_FLOW) |
1364 | nset, lset, rset, lrset - shows, whether flowing items give better packing | 1495 | * lpar, rpar - number of items we can shift to left/right |
1496 | * neighbor (including splitting item) | ||
1497 | * nset, lset, rset, lrset - shows, whether flowing items | ||
1498 | * give better packing | ||
1365 | */ | 1499 | */ |
1366 | #define FLOW 1 | 1500 | #define FLOW 1 |
1367 | #define NO_FLOW 0 /* do not any splitting */ | 1501 | #define NO_FLOW 0 /* do not any splitting */ |
1368 | 1502 | ||
1369 | /* we choose one the following */ | 1503 | /* we choose one of the following */ |
1370 | #define NOTHING_SHIFT_NO_FLOW 0 | 1504 | #define NOTHING_SHIFT_NO_FLOW 0 |
1371 | #define NOTHING_SHIFT_FLOW 5 | 1505 | #define NOTHING_SHIFT_FLOW 5 |
1372 | #define LEFT_SHIFT_NO_FLOW 10 | 1506 | #define LEFT_SHIFT_NO_FLOW 10 |
@@ -1379,10 +1513,13 @@ static int ip_check_balance(struct tree_balance *tb, int h) | |||
1379 | lpar = tb->lnum[h]; | 1513 | lpar = tb->lnum[h]; |
1380 | rpar = tb->rnum[h]; | 1514 | rpar = tb->rnum[h]; |
1381 | 1515 | ||
1382 | /* calculate number of blocks S[h] must be split into when | 1516 | /* |
1383 | nothing is shifted to the neighbors, | 1517 | * calculate number of blocks S[h] must be split into when |
1384 | as well as number of items in each part of the split node (s012 numbers), | 1518 | * nothing is shifted to the neighbors, as well as number of |
1385 | and number of bytes (s1bytes) of the shared drop which flow to S1 if any */ | 1519 | * items in each part of the split node (s012 numbers), |
1520 | * and number of bytes (s1bytes) of the shared drop which | ||
1521 | * flow to S1 if any | ||
1522 | */ | ||
1386 | nset = NOTHING_SHIFT_NO_FLOW; | 1523 | nset = NOTHING_SHIFT_NO_FLOW; |
1387 | nver = get_num_ver(vn->vn_mode, tb, h, | 1524 | nver = get_num_ver(vn->vn_mode, tb, h, |
1388 | 0, -1, h ? vn->vn_nr_item : 0, -1, | 1525 | 0, -1, h ? vn->vn_nr_item : 0, -1, |
@@ -1391,7 +1528,10 @@ static int ip_check_balance(struct tree_balance *tb, int h) | |||
1391 | if (!h) { | 1528 | if (!h) { |
1392 | int nver1; | 1529 | int nver1; |
1393 | 1530 | ||
1394 | /* note, that in this case we try to bottle between S[0] and S1 (S1 - the first new node) */ | 1531 | /* |
1532 | * note, that in this case we try to bottle | ||
1533 | * between S[0] and S1 (S1 - the first new node) | ||
1534 | */ | ||
1395 | nver1 = get_num_ver(vn->vn_mode, tb, h, | 1535 | nver1 = get_num_ver(vn->vn_mode, tb, h, |
1396 | 0, -1, 0, -1, | 1536 | 0, -1, 0, -1, |
1397 | snum012 + NOTHING_SHIFT_FLOW, FLOW); | 1537 | snum012 + NOTHING_SHIFT_FLOW, FLOW); |
@@ -1399,11 +1539,13 @@ static int ip_check_balance(struct tree_balance *tb, int h) | |||
1399 | nset = NOTHING_SHIFT_FLOW, nver = nver1; | 1539 | nset = NOTHING_SHIFT_FLOW, nver = nver1; |
1400 | } | 1540 | } |
1401 | 1541 | ||
1402 | /* calculate number of blocks S[h] must be split into when | 1542 | /* |
1403 | l_shift_num first items and l_shift_bytes of the right most | 1543 | * calculate number of blocks S[h] must be split into when |
1404 | liquid item to be shifted are shifted to the left neighbor, | 1544 | * l_shift_num first items and l_shift_bytes of the right |
1405 | as well as number of items in each part of the splitted node (s012 numbers), | 1545 | * most liquid item to be shifted are shifted to the left |
1406 | and number of bytes (s1bytes) of the shared drop which flow to S1 if any | 1546 | * neighbor, as well as number of items in each part of the |
1547 | * splitted node (s012 numbers), and number of bytes | ||
1548 | * (s1bytes) of the shared drop which flow to S1 if any | ||
1407 | */ | 1549 | */ |
1408 | lset = LEFT_SHIFT_NO_FLOW; | 1550 | lset = LEFT_SHIFT_NO_FLOW; |
1409 | lnver = get_num_ver(vn->vn_mode, tb, h, | 1551 | lnver = get_num_ver(vn->vn_mode, tb, h, |
@@ -1422,11 +1564,13 @@ static int ip_check_balance(struct tree_balance *tb, int h) | |||
1422 | lset = LEFT_SHIFT_FLOW, lnver = lnver1; | 1564 | lset = LEFT_SHIFT_FLOW, lnver = lnver1; |
1423 | } | 1565 | } |
1424 | 1566 | ||
1425 | /* calculate number of blocks S[h] must be split into when | 1567 | /* |
1426 | r_shift_num first items and r_shift_bytes of the left most | 1568 | * calculate number of blocks S[h] must be split into when |
1427 | liquid item to be shifted are shifted to the right neighbor, | 1569 | * r_shift_num first items and r_shift_bytes of the left most |
1428 | as well as number of items in each part of the splitted node (s012 numbers), | 1570 | * liquid item to be shifted are shifted to the right neighbor, |
1429 | and number of bytes (s1bytes) of the shared drop which flow to S1 if any | 1571 | * as well as number of items in each part of the splitted |
1572 | * node (s012 numbers), and number of bytes (s1bytes) of the | ||
1573 | * shared drop which flow to S1 if any | ||
1430 | */ | 1574 | */ |
1431 | rset = RIGHT_SHIFT_NO_FLOW; | 1575 | rset = RIGHT_SHIFT_NO_FLOW; |
1432 | rnver = get_num_ver(vn->vn_mode, tb, h, | 1576 | rnver = get_num_ver(vn->vn_mode, tb, h, |
@@ -1451,10 +1595,12 @@ static int ip_check_balance(struct tree_balance *tb, int h) | |||
1451 | rset = RIGHT_SHIFT_FLOW, rnver = rnver1; | 1595 | rset = RIGHT_SHIFT_FLOW, rnver = rnver1; |
1452 | } | 1596 | } |
1453 | 1597 | ||
1454 | /* calculate number of blocks S[h] must be split into when | 1598 | /* |
1455 | items are shifted in both directions, | 1599 | * calculate number of blocks S[h] must be split into when |
1456 | as well as number of items in each part of the splitted node (s012 numbers), | 1600 | * items are shifted in both directions, as well as number |
1457 | and number of bytes (s1bytes) of the shared drop which flow to S1 if any | 1601 | * of items in each part of the splitted node (s012 numbers), |
1602 | * and number of bytes (s1bytes) of the shared drop which | ||
1603 | * flow to S1 if any | ||
1458 | */ | 1604 | */ |
1459 | lrset = LR_SHIFT_NO_FLOW; | 1605 | lrset = LR_SHIFT_NO_FLOW; |
1460 | lrnver = get_num_ver(vn->vn_mode, tb, h, | 1606 | lrnver = get_num_ver(vn->vn_mode, tb, h, |
@@ -1481,10 +1627,12 @@ static int ip_check_balance(struct tree_balance *tb, int h) | |||
1481 | lrset = LR_SHIFT_FLOW, lrnver = lrnver1; | 1627 | lrset = LR_SHIFT_FLOW, lrnver = lrnver1; |
1482 | } | 1628 | } |
1483 | 1629 | ||
1484 | /* Our general shifting strategy is: | 1630 | /* |
1485 | 1) to minimized number of new nodes; | 1631 | * Our general shifting strategy is: |
1486 | 2) to minimized number of neighbors involved in shifting; | 1632 | * 1) to minimized number of new nodes; |
1487 | 3) to minimized number of disk reads; */ | 1633 | * 2) to minimized number of neighbors involved in shifting; |
1634 | * 3) to minimized number of disk reads; | ||
1635 | */ | ||
1488 | 1636 | ||
1489 | /* we can win TWO or ONE nodes by shifting in both directions */ | 1637 | /* we can win TWO or ONE nodes by shifting in both directions */ |
1490 | if (lrnver < lnver && lrnver < rnver) { | 1638 | if (lrnver < lnver && lrnver < rnver) { |
@@ -1508,42 +1656,59 @@ static int ip_check_balance(struct tree_balance *tb, int h) | |||
1508 | return CARRY_ON; | 1656 | return CARRY_ON; |
1509 | } | 1657 | } |
1510 | 1658 | ||
1511 | /* if shifting doesn't lead to better packing then don't shift */ | 1659 | /* |
1660 | * if shifting doesn't lead to better packing | ||
1661 | * then don't shift | ||
1662 | */ | ||
1512 | if (nver == lrnver) { | 1663 | if (nver == lrnver) { |
1513 | set_parameters(tb, h, 0, 0, nver, snum012 + nset, -1, | 1664 | set_parameters(tb, h, 0, 0, nver, snum012 + nset, -1, |
1514 | -1); | 1665 | -1); |
1515 | return CARRY_ON; | 1666 | return CARRY_ON; |
1516 | } | 1667 | } |
1517 | 1668 | ||
1518 | /* now we know that for better packing shifting in only one | 1669 | /* |
1519 | direction either to the left or to the right is required */ | 1670 | * now we know that for better packing shifting in only one |
1671 | * direction either to the left or to the right is required | ||
1672 | */ | ||
1520 | 1673 | ||
1521 | /* if shifting to the left is better than shifting to the right */ | 1674 | /* |
1675 | * if shifting to the left is better than | ||
1676 | * shifting to the right | ||
1677 | */ | ||
1522 | if (lnver < rnver) { | 1678 | if (lnver < rnver) { |
1523 | SET_PAR_SHIFT_LEFT; | 1679 | SET_PAR_SHIFT_LEFT; |
1524 | return CARRY_ON; | 1680 | return CARRY_ON; |
1525 | } | 1681 | } |
1526 | 1682 | ||
1527 | /* if shifting to the right is better than shifting to the left */ | 1683 | /* |
1684 | * if shifting to the right is better than | ||
1685 | * shifting to the left | ||
1686 | */ | ||
1528 | if (lnver > rnver) { | 1687 | if (lnver > rnver) { |
1529 | SET_PAR_SHIFT_RIGHT; | 1688 | SET_PAR_SHIFT_RIGHT; |
1530 | return CARRY_ON; | 1689 | return CARRY_ON; |
1531 | } | 1690 | } |
1532 | 1691 | ||
1533 | /* now shifting in either direction gives the same number | 1692 | /* |
1534 | of nodes and we can make use of the cached neighbors */ | 1693 | * now shifting in either direction gives the same number |
1694 | * of nodes and we can make use of the cached neighbors | ||
1695 | */ | ||
1535 | if (is_left_neighbor_in_cache(tb, h)) { | 1696 | if (is_left_neighbor_in_cache(tb, h)) { |
1536 | SET_PAR_SHIFT_LEFT; | 1697 | SET_PAR_SHIFT_LEFT; |
1537 | return CARRY_ON; | 1698 | return CARRY_ON; |
1538 | } | 1699 | } |
1539 | 1700 | ||
1540 | /* shift to the right independently on whether the right neighbor in cache or not */ | 1701 | /* |
1702 | * shift to the right independently on whether the | ||
1703 | * right neighbor in cache or not | ||
1704 | */ | ||
1541 | SET_PAR_SHIFT_RIGHT; | 1705 | SET_PAR_SHIFT_RIGHT; |
1542 | return CARRY_ON; | 1706 | return CARRY_ON; |
1543 | } | 1707 | } |
1544 | } | 1708 | } |
1545 | 1709 | ||
1546 | /* Check whether current node S[h] is balanced when Decreasing its size by | 1710 | /* |
1711 | * Check whether current node S[h] is balanced when Decreasing its size by | ||
1547 | * Deleting or Cutting for INTERNAL node of S+tree. | 1712 | * Deleting or Cutting for INTERNAL node of S+tree. |
1548 | * Calculate parameters for balancing for current level h. | 1713 | * Calculate parameters for balancing for current level h. |
1549 | * Parameters: | 1714 | * Parameters: |
@@ -1563,8 +1728,10 @@ static int dc_check_balance_internal(struct tree_balance *tb, int h) | |||
1563 | { | 1728 | { |
1564 | struct virtual_node *vn = tb->tb_vn; | 1729 | struct virtual_node *vn = tb->tb_vn; |
1565 | 1730 | ||
1566 | /* Sh is the node whose balance is currently being checked, | 1731 | /* |
1567 | and Fh is its father. */ | 1732 | * Sh is the node whose balance is currently being checked, |
1733 | * and Fh is its father. | ||
1734 | */ | ||
1568 | struct buffer_head *Sh, *Fh; | 1735 | struct buffer_head *Sh, *Fh; |
1569 | int maxsize, ret; | 1736 | int maxsize, ret; |
1570 | int lfree, rfree /* free space in L and R */ ; | 1737 | int lfree, rfree /* free space in L and R */ ; |
@@ -1574,19 +1741,25 @@ static int dc_check_balance_internal(struct tree_balance *tb, int h) | |||
1574 | 1741 | ||
1575 | maxsize = MAX_CHILD_SIZE(Sh); | 1742 | maxsize = MAX_CHILD_SIZE(Sh); |
1576 | 1743 | ||
1577 | /* using tb->insert_size[h], which is negative in this case, create_virtual_node calculates: */ | 1744 | /* |
1578 | /* new_nr_item = number of items node would have if operation is */ | 1745 | * using tb->insert_size[h], which is negative in this case, |
1579 | /* performed without balancing (new_nr_item); */ | 1746 | * create_virtual_node calculates: |
1747 | * new_nr_item = number of items node would have if operation is | ||
1748 | * performed without balancing (new_nr_item); | ||
1749 | */ | ||
1580 | create_virtual_node(tb, h); | 1750 | create_virtual_node(tb, h); |
1581 | 1751 | ||
1582 | if (!Fh) { /* S[h] is the root. */ | 1752 | if (!Fh) { /* S[h] is the root. */ |
1753 | /* no balancing for higher levels needed */ | ||
1583 | if (vn->vn_nr_item > 0) { | 1754 | if (vn->vn_nr_item > 0) { |
1584 | set_parameters(tb, h, 0, 0, 1, NULL, -1, -1); | 1755 | set_parameters(tb, h, 0, 0, 1, NULL, -1, -1); |
1585 | return NO_BALANCING_NEEDED; /* no balancing for higher levels needed */ | 1756 | return NO_BALANCING_NEEDED; |
1586 | } | 1757 | } |
1587 | /* new_nr_item == 0. | 1758 | /* |
1759 | * new_nr_item == 0. | ||
1588 | * Current root will be deleted resulting in | 1760 | * Current root will be deleted resulting in |
1589 | * decrementing the tree height. */ | 1761 | * decrementing the tree height. |
1762 | */ | ||
1590 | set_parameters(tb, h, 0, 0, 0, NULL, -1, -1); | 1763 | set_parameters(tb, h, 0, 0, 0, NULL, -1, -1); |
1591 | return CARRY_ON; | 1764 | return CARRY_ON; |
1592 | } | 1765 | } |
@@ -1602,12 +1775,18 @@ static int dc_check_balance_internal(struct tree_balance *tb, int h) | |||
1602 | check_left(tb, h, lfree); | 1775 | check_left(tb, h, lfree); |
1603 | check_right(tb, h, rfree); | 1776 | check_right(tb, h, rfree); |
1604 | 1777 | ||
1605 | if (vn->vn_nr_item >= MIN_NR_KEY(Sh)) { /* Balance condition for the internal node is valid. | 1778 | /* |
1606 | * In this case we balance only if it leads to better packing. */ | 1779 | * Balance condition for the internal node is valid. |
1607 | if (vn->vn_nr_item == MIN_NR_KEY(Sh)) { /* Here we join S[h] with one of its neighbors, | 1780 | * In this case we balance only if it leads to better packing. |
1608 | * which is impossible with greater values of new_nr_item. */ | 1781 | */ |
1782 | if (vn->vn_nr_item >= MIN_NR_KEY(Sh)) { | ||
1783 | /* | ||
1784 | * Here we join S[h] with one of its neighbors, | ||
1785 | * which is impossible with greater values of new_nr_item. | ||
1786 | */ | ||
1787 | if (vn->vn_nr_item == MIN_NR_KEY(Sh)) { | ||
1788 | /* All contents of S[h] can be moved to L[h]. */ | ||
1609 | if (tb->lnum[h] >= vn->vn_nr_item + 1) { | 1789 | if (tb->lnum[h] >= vn->vn_nr_item + 1) { |
1610 | /* All contents of S[h] can be moved to L[h]. */ | ||
1611 | int n; | 1790 | int n; |
1612 | int order_L; | 1791 | int order_L; |
1613 | 1792 | ||
@@ -1623,8 +1802,8 @@ static int dc_check_balance_internal(struct tree_balance *tb, int h) | |||
1623 | return CARRY_ON; | 1802 | return CARRY_ON; |
1624 | } | 1803 | } |
1625 | 1804 | ||
1805 | /* All contents of S[h] can be moved to R[h]. */ | ||
1626 | if (tb->rnum[h] >= vn->vn_nr_item + 1) { | 1806 | if (tb->rnum[h] >= vn->vn_nr_item + 1) { |
1627 | /* All contents of S[h] can be moved to R[h]. */ | ||
1628 | int n; | 1807 | int n; |
1629 | int order_R; | 1808 | int order_R; |
1630 | 1809 | ||
@@ -1641,8 +1820,11 @@ static int dc_check_balance_internal(struct tree_balance *tb, int h) | |||
1641 | } | 1820 | } |
1642 | } | 1821 | } |
1643 | 1822 | ||
1823 | /* | ||
1824 | * All contents of S[h] can be moved to the neighbors | ||
1825 | * (L[h] & R[h]). | ||
1826 | */ | ||
1644 | if (tb->rnum[h] + tb->lnum[h] >= vn->vn_nr_item + 1) { | 1827 | if (tb->rnum[h] + tb->lnum[h] >= vn->vn_nr_item + 1) { |
1645 | /* All contents of S[h] can be moved to the neighbors (L[h] & R[h]). */ | ||
1646 | int to_r; | 1828 | int to_r; |
1647 | 1829 | ||
1648 | to_r = | 1830 | to_r = |
@@ -1659,7 +1841,10 @@ static int dc_check_balance_internal(struct tree_balance *tb, int h) | |||
1659 | return NO_BALANCING_NEEDED; | 1841 | return NO_BALANCING_NEEDED; |
1660 | } | 1842 | } |
1661 | 1843 | ||
1662 | /* Current node contain insufficient number of items. Balancing is required. */ | 1844 | /* |
1845 | * Current node contain insufficient number of items. | ||
1846 | * Balancing is required. | ||
1847 | */ | ||
1663 | /* Check whether we can merge S[h] with left neighbor. */ | 1848 | /* Check whether we can merge S[h] with left neighbor. */ |
1664 | if (tb->lnum[h] >= vn->vn_nr_item + 1) | 1849 | if (tb->lnum[h] >= vn->vn_nr_item + 1) |
1665 | if (is_left_neighbor_in_cache(tb, h) | 1850 | if (is_left_neighbor_in_cache(tb, h) |
@@ -1726,7 +1911,8 @@ static int dc_check_balance_internal(struct tree_balance *tb, int h) | |||
1726 | return CARRY_ON; | 1911 | return CARRY_ON; |
1727 | } | 1912 | } |
1728 | 1913 | ||
1729 | /* Check whether current node S[h] is balanced when Decreasing its size by | 1914 | /* |
1915 | * Check whether current node S[h] is balanced when Decreasing its size by | ||
1730 | * Deleting or Truncating for LEAF node of S+tree. | 1916 | * Deleting or Truncating for LEAF node of S+tree. |
1731 | * Calculate parameters for balancing for current level h. | 1917 | * Calculate parameters for balancing for current level h. |
1732 | * Parameters: | 1918 | * Parameters: |
@@ -1743,15 +1929,21 @@ static int dc_check_balance_leaf(struct tree_balance *tb, int h) | |||
1743 | { | 1929 | { |
1744 | struct virtual_node *vn = tb->tb_vn; | 1930 | struct virtual_node *vn = tb->tb_vn; |
1745 | 1931 | ||
1746 | /* Number of bytes that must be deleted from | 1932 | /* |
1747 | (value is negative if bytes are deleted) buffer which | 1933 | * Number of bytes that must be deleted from |
1748 | contains node being balanced. The mnemonic is that the | 1934 | * (value is negative if bytes are deleted) buffer which |
1749 | attempted change in node space used level is levbytes bytes. */ | 1935 | * contains node being balanced. The mnemonic is that the |
1936 | * attempted change in node space used level is levbytes bytes. | ||
1937 | */ | ||
1750 | int levbytes; | 1938 | int levbytes; |
1939 | |||
1751 | /* the maximal item size */ | 1940 | /* the maximal item size */ |
1752 | int maxsize, ret; | 1941 | int maxsize, ret; |
1753 | /* S0 is the node whose balance is currently being checked, | 1942 | |
1754 | and F0 is its father. */ | 1943 | /* |
1944 | * S0 is the node whose balance is currently being checked, | ||
1945 | * and F0 is its father. | ||
1946 | */ | ||
1755 | struct buffer_head *S0, *F0; | 1947 | struct buffer_head *S0, *F0; |
1756 | int lfree, rfree /* free space in L and R */ ; | 1948 | int lfree, rfree /* free space in L and R */ ; |
1757 | 1949 | ||
@@ -1784,9 +1976,11 @@ static int dc_check_balance_leaf(struct tree_balance *tb, int h) | |||
1784 | if (are_leaves_removable(tb, lfree, rfree)) | 1976 | if (are_leaves_removable(tb, lfree, rfree)) |
1785 | return CARRY_ON; | 1977 | return CARRY_ON; |
1786 | 1978 | ||
1787 | /* determine maximal number of items we can shift to the left/right neighbor | 1979 | /* |
1788 | and the maximal number of bytes that can flow to the left/right neighbor | 1980 | * determine maximal number of items we can shift to the left/right |
1789 | from the left/right most liquid item that cannot be shifted from S[0] entirely | 1981 | * neighbor and the maximal number of bytes that can flow to the |
1982 | * left/right neighbor from the left/right most liquid item that | ||
1983 | * cannot be shifted from S[0] entirely | ||
1790 | */ | 1984 | */ |
1791 | check_left(tb, h, lfree); | 1985 | check_left(tb, h, lfree); |
1792 | check_right(tb, h, rfree); | 1986 | check_right(tb, h, rfree); |
@@ -1810,7 +2004,10 @@ static int dc_check_balance_leaf(struct tree_balance *tb, int h) | |||
1810 | return CARRY_ON; | 2004 | return CARRY_ON; |
1811 | } | 2005 | } |
1812 | 2006 | ||
1813 | /* All contents of S[0] can be moved to the neighbors (L[0] & R[0]). Set parameters and return */ | 2007 | /* |
2008 | * All contents of S[0] can be moved to the neighbors (L[0] & R[0]). | ||
2009 | * Set parameters and return | ||
2010 | */ | ||
1814 | if (is_leaf_removable(tb)) | 2011 | if (is_leaf_removable(tb)) |
1815 | return CARRY_ON; | 2012 | return CARRY_ON; |
1816 | 2013 | ||
@@ -1820,7 +2017,8 @@ static int dc_check_balance_leaf(struct tree_balance *tb, int h) | |||
1820 | return NO_BALANCING_NEEDED; | 2017 | return NO_BALANCING_NEEDED; |
1821 | } | 2018 | } |
1822 | 2019 | ||
1823 | /* Check whether current node S[h] is balanced when Decreasing its size by | 2020 | /* |
2021 | * Check whether current node S[h] is balanced when Decreasing its size by | ||
1824 | * Deleting or Cutting. | 2022 | * Deleting or Cutting. |
1825 | * Calculate parameters for balancing for current level h. | 2023 | * Calculate parameters for balancing for current level h. |
1826 | * Parameters: | 2024 | * Parameters: |
@@ -1844,15 +2042,16 @@ static int dc_check_balance(struct tree_balance *tb, int h) | |||
1844 | return dc_check_balance_leaf(tb, h); | 2042 | return dc_check_balance_leaf(tb, h); |
1845 | } | 2043 | } |
1846 | 2044 | ||
1847 | /* Check whether current node S[h] is balanced. | 2045 | /* |
2046 | * Check whether current node S[h] is balanced. | ||
1848 | * Calculate parameters for balancing for current level h. | 2047 | * Calculate parameters for balancing for current level h. |
1849 | * Parameters: | 2048 | * Parameters: |
1850 | * | 2049 | * |
1851 | * tb tree_balance structure: | 2050 | * tb tree_balance structure: |
1852 | * | 2051 | * |
1853 | * tb is a large structure that must be read about in the header file | 2052 | * tb is a large structure that must be read about in the header |
1854 | * at the same time as this procedure if the reader is to successfully | 2053 | * file at the same time as this procedure if the reader is |
1855 | * understand this procedure | 2054 | * to successfully understand this procedure |
1856 | * | 2055 | * |
1857 | * h current level of the node; | 2056 | * h current level of the node; |
1858 | * inum item number in S[h]; | 2057 | * inum item number in S[h]; |
@@ -1882,8 +2081,8 @@ static int check_balance(int mode, | |||
1882 | RFALSE(mode == M_INSERT && !vn->vn_ins_ih, | 2081 | RFALSE(mode == M_INSERT && !vn->vn_ins_ih, |
1883 | "vs-8255: ins_ih can not be 0 in insert mode"); | 2082 | "vs-8255: ins_ih can not be 0 in insert mode"); |
1884 | 2083 | ||
2084 | /* Calculate balance parameters when size of node is increasing. */ | ||
1885 | if (tb->insert_size[h] > 0) | 2085 | if (tb->insert_size[h] > 0) |
1886 | /* Calculate balance parameters when size of node is increasing. */ | ||
1887 | return ip_check_balance(tb, h); | 2086 | return ip_check_balance(tb, h); |
1888 | 2087 | ||
1889 | /* Calculate balance parameters when size of node is decreasing. */ | 2088 | /* Calculate balance parameters when size of node is decreasing. */ |
@@ -1911,21 +2110,23 @@ static int get_direct_parent(struct tree_balance *tb, int h) | |||
1911 | PATH_OFFSET_POSITION(path, path_offset - 1) = 0; | 2110 | PATH_OFFSET_POSITION(path, path_offset - 1) = 0; |
1912 | return CARRY_ON; | 2111 | return CARRY_ON; |
1913 | } | 2112 | } |
1914 | return REPEAT_SEARCH; /* Root is changed and we must recalculate the path. */ | 2113 | /* Root is changed and we must recalculate the path. */ |
2114 | return REPEAT_SEARCH; | ||
1915 | } | 2115 | } |
1916 | 2116 | ||
2117 | /* Parent in the path is not in the tree. */ | ||
1917 | if (!B_IS_IN_TREE | 2118 | if (!B_IS_IN_TREE |
1918 | (bh = PATH_OFFSET_PBUFFER(path, path_offset - 1))) | 2119 | (bh = PATH_OFFSET_PBUFFER(path, path_offset - 1))) |
1919 | return REPEAT_SEARCH; /* Parent in the path is not in the tree. */ | 2120 | return REPEAT_SEARCH; |
1920 | 2121 | ||
1921 | if ((position = | 2122 | if ((position = |
1922 | PATH_OFFSET_POSITION(path, | 2123 | PATH_OFFSET_POSITION(path, |
1923 | path_offset - 1)) > B_NR_ITEMS(bh)) | 2124 | path_offset - 1)) > B_NR_ITEMS(bh)) |
1924 | return REPEAT_SEARCH; | 2125 | return REPEAT_SEARCH; |
1925 | 2126 | ||
2127 | /* Parent in the path is not parent of the current node in the tree. */ | ||
1926 | if (B_N_CHILD_NUM(bh, position) != | 2128 | if (B_N_CHILD_NUM(bh, position) != |
1927 | PATH_OFFSET_PBUFFER(path, path_offset)->b_blocknr) | 2129 | PATH_OFFSET_PBUFFER(path, path_offset)->b_blocknr) |
1928 | /* Parent in the path is not parent of the current node in the tree. */ | ||
1929 | return REPEAT_SEARCH; | 2130 | return REPEAT_SEARCH; |
1930 | 2131 | ||
1931 | if (buffer_locked(bh)) { | 2132 | if (buffer_locked(bh)) { |
@@ -1936,10 +2137,15 @@ static int get_direct_parent(struct tree_balance *tb, int h) | |||
1936 | return REPEAT_SEARCH; | 2137 | return REPEAT_SEARCH; |
1937 | } | 2138 | } |
1938 | 2139 | ||
1939 | return CARRY_ON; /* Parent in the path is unlocked and really parent of the current node. */ | 2140 | /* |
2141 | * Parent in the path is unlocked and really parent | ||
2142 | * of the current node. | ||
2143 | */ | ||
2144 | return CARRY_ON; | ||
1940 | } | 2145 | } |
1941 | 2146 | ||
1942 | /* Using lnum[h] and rnum[h] we should determine what neighbors | 2147 | /* |
2148 | * Using lnum[h] and rnum[h] we should determine what neighbors | ||
1943 | * of S[h] we | 2149 | * of S[h] we |
1944 | * need in order to balance S[h], and get them if necessary. | 2150 | * need in order to balance S[h], and get them if necessary. |
1945 | * Returns: SCHEDULE_OCCURRED - schedule occurred while the function worked; | 2151 | * Returns: SCHEDULE_OCCURRED - schedule occurred while the function worked; |
@@ -1997,7 +2203,7 @@ static int get_neighbors(struct tree_balance *tb, int h) | |||
1997 | } | 2203 | } |
1998 | 2204 | ||
1999 | /* We need right neighbor to balance S[path_offset]. */ | 2205 | /* We need right neighbor to balance S[path_offset]. */ |
2000 | if (tb->rnum[h]) { /* We need right neighbor to balance S[path_offset]. */ | 2206 | if (tb->rnum[h]) { |
2001 | PROC_INFO_INC(sb, need_r_neighbor[h]); | 2207 | PROC_INFO_INC(sb, need_r_neighbor[h]); |
2002 | bh = PATH_OFFSET_PBUFFER(tb->tb_path, path_offset); | 2208 | bh = PATH_OFFSET_PBUFFER(tb->tb_path, path_offset); |
2003 | 2209 | ||
@@ -2053,9 +2259,11 @@ static int get_virtual_node_size(struct super_block *sb, struct buffer_head *bh) | |||
2053 | (max_num_of_entries - 1) * sizeof(__u16)); | 2259 | (max_num_of_entries - 1) * sizeof(__u16)); |
2054 | } | 2260 | } |
2055 | 2261 | ||
2056 | /* maybe we should fail balancing we are going to perform when kmalloc | 2262 | /* |
2057 | fails several times. But now it will loop until kmalloc gets | 2263 | * maybe we should fail balancing we are going to perform when kmalloc |
2058 | required memory */ | 2264 | * fails several times. But now it will loop until kmalloc gets |
2265 | * required memory | ||
2266 | */ | ||
2059 | static int get_mem_for_virtual_node(struct tree_balance *tb) | 2267 | static int get_mem_for_virtual_node(struct tree_balance *tb) |
2060 | { | 2268 | { |
2061 | int check_fs = 0; | 2269 | int check_fs = 0; |
@@ -2064,8 +2272,8 @@ static int get_mem_for_virtual_node(struct tree_balance *tb) | |||
2064 | 2272 | ||
2065 | size = get_virtual_node_size(tb->tb_sb, PATH_PLAST_BUFFER(tb->tb_path)); | 2273 | size = get_virtual_node_size(tb->tb_sb, PATH_PLAST_BUFFER(tb->tb_path)); |
2066 | 2274 | ||
2275 | /* we have to allocate more memory for virtual node */ | ||
2067 | if (size > tb->vn_buf_size) { | 2276 | if (size > tb->vn_buf_size) { |
2068 | /* we have to allocate more memory for virtual node */ | ||
2069 | if (tb->vn_buf) { | 2277 | if (tb->vn_buf) { |
2070 | /* free memory allocated before */ | 2278 | /* free memory allocated before */ |
2071 | kfree(tb->vn_buf); | 2279 | kfree(tb->vn_buf); |
@@ -2079,10 +2287,12 @@ static int get_mem_for_virtual_node(struct tree_balance *tb) | |||
2079 | /* get memory for virtual item */ | 2287 | /* get memory for virtual item */ |
2080 | buf = kmalloc(size, GFP_ATOMIC | __GFP_NOWARN); | 2288 | buf = kmalloc(size, GFP_ATOMIC | __GFP_NOWARN); |
2081 | if (!buf) { | 2289 | if (!buf) { |
2082 | /* getting memory with GFP_KERNEL priority may involve | 2290 | /* |
2083 | balancing now (due to indirect_to_direct conversion on | 2291 | * getting memory with GFP_KERNEL priority may involve |
2084 | dcache shrinking). So, release path and collected | 2292 | * balancing now (due to indirect_to_direct conversion |
2085 | resources here */ | 2293 | * on dcache shrinking). So, release path and collected |
2294 | * resources here | ||
2295 | */ | ||
2086 | free_buffers_in_tb(tb); | 2296 | free_buffers_in_tb(tb); |
2087 | buf = kmalloc(size, GFP_NOFS); | 2297 | buf = kmalloc(size, GFP_NOFS); |
2088 | if (!buf) { | 2298 | if (!buf) { |
@@ -2168,8 +2378,10 @@ static int wait_tb_buffers_until_unlocked(struct tree_balance *tb) | |||
2168 | for (i = tb->tb_path->path_length; | 2378 | for (i = tb->tb_path->path_length; |
2169 | !locked && i > ILLEGAL_PATH_ELEMENT_OFFSET; i--) { | 2379 | !locked && i > ILLEGAL_PATH_ELEMENT_OFFSET; i--) { |
2170 | if (PATH_OFFSET_PBUFFER(tb->tb_path, i)) { | 2380 | if (PATH_OFFSET_PBUFFER(tb->tb_path, i)) { |
2171 | /* if I understand correctly, we can only be sure the last buffer | 2381 | /* |
2172 | ** in the path is in the tree --clm | 2382 | * if I understand correctly, we can only |
2383 | * be sure the last buffer in the path is | ||
2384 | * in the tree --clm | ||
2173 | */ | 2385 | */ |
2174 | #ifdef CONFIG_REISERFS_CHECK | 2386 | #ifdef CONFIG_REISERFS_CHECK |
2175 | if (PATH_PLAST_BUFFER(tb->tb_path) == | 2387 | if (PATH_PLAST_BUFFER(tb->tb_path) == |
@@ -2256,13 +2468,15 @@ static int wait_tb_buffers_until_unlocked(struct tree_balance *tb) | |||
2256 | } | 2468 | } |
2257 | } | 2469 | } |
2258 | } | 2470 | } |
2259 | /* as far as I can tell, this is not required. The FEB list seems | 2471 | |
2260 | ** to be full of newly allocated nodes, which will never be locked, | 2472 | /* |
2261 | ** dirty, or anything else. | 2473 | * as far as I can tell, this is not required. The FEB list |
2262 | ** To be safe, I'm putting in the checks and waits in. For the moment, | 2474 | * seems to be full of newly allocated nodes, which will |
2263 | ** they are needed to keep the code in journal.c from complaining | 2475 | * never be locked, dirty, or anything else. |
2264 | ** about the buffer. That code is inside CONFIG_REISERFS_CHECK as well. | 2476 | * To be safe, I'm putting in the checks and waits in. |
2265 | ** --clm | 2477 | * For the moment, they are needed to keep the code in |
2478 | * journal.c from complaining about the buffer. | ||
2479 | * That code is inside CONFIG_REISERFS_CHECK as well. --clm | ||
2266 | */ | 2480 | */ |
2267 | for (i = 0; !locked && i < MAX_FEB_SIZE; i++) { | 2481 | for (i = 0; !locked && i < MAX_FEB_SIZE; i++) { |
2268 | if (tb->FEB[i]) { | 2482 | if (tb->FEB[i]) { |
@@ -2300,7 +2514,8 @@ static int wait_tb_buffers_until_unlocked(struct tree_balance *tb) | |||
2300 | return CARRY_ON; | 2514 | return CARRY_ON; |
2301 | } | 2515 | } |
2302 | 2516 | ||
2303 | /* Prepare for balancing, that is | 2517 | /* |
2518 | * Prepare for balancing, that is | ||
2304 | * get all necessary parents, and neighbors; | 2519 | * get all necessary parents, and neighbors; |
2305 | * analyze what and where should be moved; | 2520 | * analyze what and where should be moved; |
2306 | * get sufficient number of new nodes; | 2521 | * get sufficient number of new nodes; |
@@ -2309,13 +2524,14 @@ static int wait_tb_buffers_until_unlocked(struct tree_balance *tb) | |||
2309 | * When ported to SMP kernels, only at the last moment after all needed nodes | 2524 | * When ported to SMP kernels, only at the last moment after all needed nodes |
2310 | * are collected in cache, will the resources be locked using the usual | 2525 | * are collected in cache, will the resources be locked using the usual |
2311 | * textbook ordered lock acquisition algorithms. Note that ensuring that | 2526 | * textbook ordered lock acquisition algorithms. Note that ensuring that |
2312 | * this code neither write locks what it does not need to write lock nor locks out of order | 2527 | * this code neither write locks what it does not need to write lock nor locks |
2313 | * will be a pain in the butt that could have been avoided. Grumble grumble. -Hans | 2528 | * out of order will be a pain in the butt that could have been avoided. |
2529 | * Grumble grumble. -Hans | ||
2314 | * | 2530 | * |
2315 | * fix is meant in the sense of render unchanging | 2531 | * fix is meant in the sense of render unchanging |
2316 | * | 2532 | * |
2317 | * Latency might be improved by first gathering a list of what buffers are needed | 2533 | * Latency might be improved by first gathering a list of what buffers |
2318 | * and then getting as many of them in parallel as possible? -Hans | 2534 | * are needed and then getting as many of them in parallel as possible? -Hans |
2319 | * | 2535 | * |
2320 | * Parameters: | 2536 | * Parameters: |
2321 | * op_mode i - insert, d - delete, c - cut (truncate), p - paste (append) | 2537 | * op_mode i - insert, d - delete, c - cut (truncate), p - paste (append) |
@@ -2335,8 +2551,9 @@ int fix_nodes(int op_mode, struct tree_balance *tb, | |||
2335 | int ret, h, item_num = PATH_LAST_POSITION(tb->tb_path); | 2551 | int ret, h, item_num = PATH_LAST_POSITION(tb->tb_path); |
2336 | int pos_in_item; | 2552 | int pos_in_item; |
2337 | 2553 | ||
2338 | /* we set wait_tb_buffers_run when we have to restore any dirty bits cleared | 2554 | /* |
2339 | ** during wait_tb_buffers_run | 2555 | * we set wait_tb_buffers_run when we have to restore any dirty |
2556 | * bits cleared during wait_tb_buffers_run | ||
2340 | */ | 2557 | */ |
2341 | int wait_tb_buffers_run = 0; | 2558 | int wait_tb_buffers_run = 0; |
2342 | struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path); | 2559 | struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path); |
@@ -2347,10 +2564,11 @@ int fix_nodes(int op_mode, struct tree_balance *tb, | |||
2347 | 2564 | ||
2348 | tb->fs_gen = get_generation(tb->tb_sb); | 2565 | tb->fs_gen = get_generation(tb->tb_sb); |
2349 | 2566 | ||
2350 | /* we prepare and log the super here so it will already be in the | 2567 | /* |
2351 | ** transaction when do_balance needs to change it. | 2568 | * we prepare and log the super here so it will already be in the |
2352 | ** This way do_balance won't have to schedule when trying to prepare | 2569 | * transaction when do_balance needs to change it. |
2353 | ** the super for logging | 2570 | * This way do_balance won't have to schedule when trying to prepare |
2571 | * the super for logging | ||
2354 | */ | 2572 | */ |
2355 | reiserfs_prepare_for_journal(tb->tb_sb, | 2573 | reiserfs_prepare_for_journal(tb->tb_sb, |
2356 | SB_BUFFER_WITH_SB(tb->tb_sb), 1); | 2574 | SB_BUFFER_WITH_SB(tb->tb_sb), 1); |
@@ -2408,7 +2626,7 @@ int fix_nodes(int op_mode, struct tree_balance *tb, | |||
2408 | #endif | 2626 | #endif |
2409 | 2627 | ||
2410 | if (get_mem_for_virtual_node(tb) == REPEAT_SEARCH) | 2628 | if (get_mem_for_virtual_node(tb) == REPEAT_SEARCH) |
2411 | // FIXME: maybe -ENOMEM when tb->vn_buf == 0? Now just repeat | 2629 | /* FIXME: maybe -ENOMEM when tb->vn_buf == 0? Now just repeat */ |
2412 | return REPEAT_SEARCH; | 2630 | return REPEAT_SEARCH; |
2413 | 2631 | ||
2414 | /* Starting from the leaf level; for all levels h of the tree. */ | 2632 | /* Starting from the leaf level; for all levels h of the tree. */ |
@@ -2427,7 +2645,10 @@ int fix_nodes(int op_mode, struct tree_balance *tb, | |||
2427 | goto repeat; | 2645 | goto repeat; |
2428 | if (h != MAX_HEIGHT - 1) | 2646 | if (h != MAX_HEIGHT - 1) |
2429 | tb->insert_size[h + 1] = 0; | 2647 | tb->insert_size[h + 1] = 0; |
2430 | /* ok, analysis and resource gathering are complete */ | 2648 | /* |
2649 | * ok, analysis and resource gathering | ||
2650 | * are complete | ||
2651 | */ | ||
2431 | break; | 2652 | break; |
2432 | } | 2653 | } |
2433 | goto repeat; | 2654 | goto repeat; |
@@ -2437,15 +2658,19 @@ int fix_nodes(int op_mode, struct tree_balance *tb, | |||
2437 | if (ret != CARRY_ON) | 2658 | if (ret != CARRY_ON) |
2438 | goto repeat; | 2659 | goto repeat; |
2439 | 2660 | ||
2440 | /* No disk space, or schedule occurred and analysis may be | 2661 | /* |
2441 | * invalid and needs to be redone. */ | 2662 | * No disk space, or schedule occurred and analysis may be |
2663 | * invalid and needs to be redone. | ||
2664 | */ | ||
2442 | ret = get_empty_nodes(tb, h); | 2665 | ret = get_empty_nodes(tb, h); |
2443 | if (ret != CARRY_ON) | 2666 | if (ret != CARRY_ON) |
2444 | goto repeat; | 2667 | goto repeat; |
2445 | 2668 | ||
2669 | /* | ||
2670 | * We have a positive insert size but no nodes exist on this | ||
2671 | * level, this means that we are creating a new root. | ||
2672 | */ | ||
2446 | if (!PATH_H_PBUFFER(tb->tb_path, h)) { | 2673 | if (!PATH_H_PBUFFER(tb->tb_path, h)) { |
2447 | /* We have a positive insert size but no nodes exist on this | ||
2448 | level, this means that we are creating a new root. */ | ||
2449 | 2674 | ||
2450 | RFALSE(tb->blknum[h] != 1, | 2675 | RFALSE(tb->blknum[h] != 1, |
2451 | "PAP-8350: creating new empty root"); | 2676 | "PAP-8350: creating new empty root"); |
@@ -2453,11 +2678,13 @@ int fix_nodes(int op_mode, struct tree_balance *tb, | |||
2453 | if (h < MAX_HEIGHT - 1) | 2678 | if (h < MAX_HEIGHT - 1) |
2454 | tb->insert_size[h + 1] = 0; | 2679 | tb->insert_size[h + 1] = 0; |
2455 | } else if (!PATH_H_PBUFFER(tb->tb_path, h + 1)) { | 2680 | } else if (!PATH_H_PBUFFER(tb->tb_path, h + 1)) { |
2681 | /* | ||
2682 | * The tree needs to be grown, so this node S[h] | ||
2683 | * which is the root node is split into two nodes, | ||
2684 | * and a new node (S[h+1]) will be created to | ||
2685 | * become the root node. | ||
2686 | */ | ||
2456 | if (tb->blknum[h] > 1) { | 2687 | if (tb->blknum[h] > 1) { |
2457 | /* The tree needs to be grown, so this node S[h] | ||
2458 | which is the root node is split into two nodes, | ||
2459 | and a new node (S[h+1]) will be created to | ||
2460 | become the root node. */ | ||
2461 | 2688 | ||
2462 | RFALSE(h == MAX_HEIGHT - 1, | 2689 | RFALSE(h == MAX_HEIGHT - 1, |
2463 | "PAP-8355: attempt to create too high of a tree"); | 2690 | "PAP-8355: attempt to create too high of a tree"); |
@@ -2488,11 +2715,13 @@ int fix_nodes(int op_mode, struct tree_balance *tb, | |||
2488 | } | 2715 | } |
2489 | 2716 | ||
2490 | repeat: | 2717 | repeat: |
2491 | // fix_nodes was unable to perform its calculation due to | 2718 | /* |
2492 | // filesystem got changed under us, lack of free disk space or i/o | 2719 | * fix_nodes was unable to perform its calculation due to |
2493 | // failure. If the first is the case - the search will be | 2720 | * filesystem got changed under us, lack of free disk space or i/o |
2494 | // repeated. For now - free all resources acquired so far except | 2721 | * failure. If the first is the case - the search will be |
2495 | // for the new allocated nodes | 2722 | * repeated. For now - free all resources acquired so far except |
2723 | * for the new allocated nodes | ||
2724 | */ | ||
2496 | { | 2725 | { |
2497 | int i; | 2726 | int i; |
2498 | 2727 | ||
@@ -2548,8 +2777,6 @@ int fix_nodes(int op_mode, struct tree_balance *tb, | |||
2548 | 2777 | ||
2549 | } | 2778 | } |
2550 | 2779 | ||
2551 | /* Anatoly will probably forgive me renaming tb to tb. I just | ||
2552 | wanted to make lines shorter */ | ||
2553 | void unfix_nodes(struct tree_balance *tb) | 2780 | void unfix_nodes(struct tree_balance *tb) |
2554 | { | 2781 | { |
2555 | int i; | 2782 | int i; |
@@ -2578,8 +2805,10 @@ void unfix_nodes(struct tree_balance *tb) | |||
2578 | for (i = 0; i < MAX_FEB_SIZE; i++) { | 2805 | for (i = 0; i < MAX_FEB_SIZE; i++) { |
2579 | if (tb->FEB[i]) { | 2806 | if (tb->FEB[i]) { |
2580 | b_blocknr_t blocknr = tb->FEB[i]->b_blocknr; | 2807 | b_blocknr_t blocknr = tb->FEB[i]->b_blocknr; |
2581 | /* de-allocated block which was not used by balancing and | 2808 | /* |
2582 | bforget about buffer for it */ | 2809 | * de-allocated block which was not used by |
2810 | * balancing and bforget about buffer for it | ||
2811 | */ | ||
2583 | brelse(tb->FEB[i]); | 2812 | brelse(tb->FEB[i]); |
2584 | reiserfs_free_block(tb->transaction_handle, NULL, | 2813 | reiserfs_free_block(tb->transaction_handle, NULL, |
2585 | blocknr, 0); | 2814 | blocknr, 0); |
diff --git a/fs/reiserfs/hashes.c b/fs/reiserfs/hashes.c index 91b0cc1242a2..7a26c4fe6c46 100644 --- a/fs/reiserfs/hashes.c +++ b/fs/reiserfs/hashes.c | |||
@@ -12,12 +12,6 @@ | |||
12 | * Yura's function is added (04/07/2000) | 12 | * Yura's function is added (04/07/2000) |
13 | */ | 13 | */ |
14 | 14 | ||
15 | // | ||
16 | // keyed_hash | ||
17 | // yura_hash | ||
18 | // r5_hash | ||
19 | // | ||
20 | |||
21 | #include <linux/kernel.h> | 15 | #include <linux/kernel.h> |
22 | #include "reiserfs.h" | 16 | #include "reiserfs.h" |
23 | #include <asm/types.h> | 17 | #include <asm/types.h> |
@@ -56,7 +50,7 @@ u32 keyed_hash(const signed char *msg, int len) | |||
56 | u32 pad; | 50 | u32 pad; |
57 | int i; | 51 | int i; |
58 | 52 | ||
59 | // assert(len >= 0 && len < 256); | 53 | /* assert(len >= 0 && len < 256); */ |
60 | 54 | ||
61 | pad = (u32) len | ((u32) len << 8); | 55 | pad = (u32) len | ((u32) len << 8); |
62 | pad |= pad << 16; | 56 | pad |= pad << 16; |
@@ -127,9 +121,10 @@ u32 keyed_hash(const signed char *msg, int len) | |||
127 | return h0 ^ h1; | 121 | return h0 ^ h1; |
128 | } | 122 | } |
129 | 123 | ||
130 | /* What follows in this file is copyright 2000 by Hans Reiser, and the | 124 | /* |
131 | * licensing of what follows is governed by reiserfs/README */ | 125 | * What follows in this file is copyright 2000 by Hans Reiser, and the |
132 | 126 | * licensing of what follows is governed by reiserfs/README | |
127 | */ | ||
133 | u32 yura_hash(const signed char *msg, int len) | 128 | u32 yura_hash(const signed char *msg, int len) |
134 | { | 129 | { |
135 | int j, pow; | 130 | int j, pow; |
diff --git a/fs/reiserfs/ibalance.c b/fs/reiserfs/ibalance.c index ae26a271da35..c4a696714148 100644 --- a/fs/reiserfs/ibalance.c +++ b/fs/reiserfs/ibalance.c | |||
@@ -12,7 +12,10 @@ | |||
12 | int balance_internal(struct tree_balance *, | 12 | int balance_internal(struct tree_balance *, |
13 | int, int, struct item_head *, struct buffer_head **); | 13 | int, int, struct item_head *, struct buffer_head **); |
14 | 14 | ||
15 | /* modes of internal_shift_left, internal_shift_right and internal_insert_childs */ | 15 | /* |
16 | * modes of internal_shift_left, internal_shift_right and | ||
17 | * internal_insert_childs | ||
18 | */ | ||
16 | #define INTERNAL_SHIFT_FROM_S_TO_L 0 | 19 | #define INTERNAL_SHIFT_FROM_S_TO_L 0 |
17 | #define INTERNAL_SHIFT_FROM_R_TO_S 1 | 20 | #define INTERNAL_SHIFT_FROM_R_TO_S 1 |
18 | #define INTERNAL_SHIFT_FROM_L_TO_S 2 | 21 | #define INTERNAL_SHIFT_FROM_L_TO_S 2 |
@@ -32,7 +35,9 @@ static void internal_define_dest_src_infos(int shift_mode, | |||
32 | memset(src_bi, 0, sizeof(struct buffer_info)); | 35 | memset(src_bi, 0, sizeof(struct buffer_info)); |
33 | /* define dest, src, dest parent, dest position */ | 36 | /* define dest, src, dest parent, dest position */ |
34 | switch (shift_mode) { | 37 | switch (shift_mode) { |
35 | case INTERNAL_SHIFT_FROM_S_TO_L: /* used in internal_shift_left */ | 38 | |
39 | /* used in internal_shift_left */ | ||
40 | case INTERNAL_SHIFT_FROM_S_TO_L: | ||
36 | src_bi->tb = tb; | 41 | src_bi->tb = tb; |
37 | src_bi->bi_bh = PATH_H_PBUFFER(tb->tb_path, h); | 42 | src_bi->bi_bh = PATH_H_PBUFFER(tb->tb_path, h); |
38 | src_bi->bi_parent = PATH_H_PPARENT(tb->tb_path, h); | 43 | src_bi->bi_parent = PATH_H_PPARENT(tb->tb_path, h); |
@@ -52,12 +57,14 @@ static void internal_define_dest_src_infos(int shift_mode, | |||
52 | dest_bi->tb = tb; | 57 | dest_bi->tb = tb; |
53 | dest_bi->bi_bh = PATH_H_PBUFFER(tb->tb_path, h); | 58 | dest_bi->bi_bh = PATH_H_PBUFFER(tb->tb_path, h); |
54 | dest_bi->bi_parent = PATH_H_PPARENT(tb->tb_path, h); | 59 | dest_bi->bi_parent = PATH_H_PPARENT(tb->tb_path, h); |
55 | dest_bi->bi_position = PATH_H_POSITION(tb->tb_path, h + 1); /* dest position is analog of dest->b_item_order */ | 60 | /* dest position is analog of dest->b_item_order */ |
61 | dest_bi->bi_position = PATH_H_POSITION(tb->tb_path, h + 1); | ||
56 | *d_key = tb->lkey[h]; | 62 | *d_key = tb->lkey[h]; |
57 | *cf = tb->CFL[h]; | 63 | *cf = tb->CFL[h]; |
58 | break; | 64 | break; |
59 | 65 | ||
60 | case INTERNAL_SHIFT_FROM_R_TO_S: /* used in internal_shift_left */ | 66 | /* used in internal_shift_left */ |
67 | case INTERNAL_SHIFT_FROM_R_TO_S: | ||
61 | src_bi->tb = tb; | 68 | src_bi->tb = tb; |
62 | src_bi->bi_bh = tb->R[h]; | 69 | src_bi->bi_bh = tb->R[h]; |
63 | src_bi->bi_parent = tb->FR[h]; | 70 | src_bi->bi_parent = tb->FR[h]; |
@@ -111,7 +118,8 @@ static void internal_define_dest_src_infos(int shift_mode, | |||
111 | } | 118 | } |
112 | } | 119 | } |
113 | 120 | ||
114 | /* Insert count node pointers into buffer cur before position to + 1. | 121 | /* |
122 | * Insert count node pointers into buffer cur before position to + 1. | ||
115 | * Insert count items into buffer cur before position to. | 123 | * Insert count items into buffer cur before position to. |
116 | * Items and node pointers are specified by inserted and bh respectively. | 124 | * Items and node pointers are specified by inserted and bh respectively. |
117 | */ | 125 | */ |
@@ -190,8 +198,10 @@ static void internal_insert_childs(struct buffer_info *cur_bi, | |||
190 | 198 | ||
191 | } | 199 | } |
192 | 200 | ||
193 | /* Delete del_num items and node pointers from buffer cur starting from * | 201 | /* |
194 | * the first_i'th item and first_p'th pointers respectively. */ | 202 | * Delete del_num items and node pointers from buffer cur starting from |
203 | * the first_i'th item and first_p'th pointers respectively. | ||
204 | */ | ||
195 | static void internal_delete_pointers_items(struct buffer_info *cur_bi, | 205 | static void internal_delete_pointers_items(struct buffer_info *cur_bi, |
196 | int first_p, | 206 | int first_p, |
197 | int first_i, int del_num) | 207 | int first_i, int del_num) |
@@ -270,22 +280,30 @@ static void internal_delete_childs(struct buffer_info *cur_bi, int from, int n) | |||
270 | 280 | ||
271 | i_from = (from == 0) ? from : from - 1; | 281 | i_from = (from == 0) ? from : from - 1; |
272 | 282 | ||
273 | /* delete n pointers starting from `from' position in CUR; | 283 | /* |
274 | delete n keys starting from 'i_from' position in CUR; | 284 | * delete n pointers starting from `from' position in CUR; |
285 | * delete n keys starting from 'i_from' position in CUR; | ||
275 | */ | 286 | */ |
276 | internal_delete_pointers_items(cur_bi, from, i_from, n); | 287 | internal_delete_pointers_items(cur_bi, from, i_from, n); |
277 | } | 288 | } |
278 | 289 | ||
279 | /* copy cpy_num node pointers and cpy_num - 1 items from buffer src to buffer dest | 290 | /* |
280 | * last_first == FIRST_TO_LAST means, that we copy first items from src to tail of dest | 291 | * copy cpy_num node pointers and cpy_num - 1 items from buffer src to buffer |
281 | * last_first == LAST_TO_FIRST means, that we copy last items from src to head of dest | 292 | * dest |
293 | * last_first == FIRST_TO_LAST means that we copy first items | ||
294 | * from src to tail of dest | ||
295 | * last_first == LAST_TO_FIRST means that we copy last items | ||
296 | * from src to head of dest | ||
282 | */ | 297 | */ |
283 | static void internal_copy_pointers_items(struct buffer_info *dest_bi, | 298 | static void internal_copy_pointers_items(struct buffer_info *dest_bi, |
284 | struct buffer_head *src, | 299 | struct buffer_head *src, |
285 | int last_first, int cpy_num) | 300 | int last_first, int cpy_num) |
286 | { | 301 | { |
287 | /* ATTENTION! Number of node pointers in DEST is equal to number of items in DEST * | 302 | /* |
288 | * as delimiting key have already inserted to buffer dest.*/ | 303 | * ATTENTION! Number of node pointers in DEST is equal to number |
304 | * of items in DEST as delimiting key have already inserted to | ||
305 | * buffer dest. | ||
306 | */ | ||
289 | struct buffer_head *dest = dest_bi->bi_bh; | 307 | struct buffer_head *dest = dest_bi->bi_bh; |
290 | int nr_dest, nr_src; | 308 | int nr_dest, nr_src; |
291 | int dest_order, src_order; | 309 | int dest_order, src_order; |
@@ -366,7 +384,9 @@ static void internal_copy_pointers_items(struct buffer_info *dest_bi, | |||
366 | 384 | ||
367 | } | 385 | } |
368 | 386 | ||
369 | /* Copy cpy_num node pointers and cpy_num - 1 items from buffer src to buffer dest. | 387 | /* |
388 | * Copy cpy_num node pointers and cpy_num - 1 items from buffer src to | ||
389 | * buffer dest. | ||
370 | * Delete cpy_num - del_par items and node pointers from buffer src. | 390 | * Delete cpy_num - del_par items and node pointers from buffer src. |
371 | * last_first == FIRST_TO_LAST means, that we copy/delete first items from src. | 391 | * last_first == FIRST_TO_LAST means, that we copy/delete first items from src. |
372 | * last_first == LAST_TO_FIRST means, that we copy/delete last items from src. | 392 | * last_first == LAST_TO_FIRST means, that we copy/delete last items from src. |
@@ -385,8 +405,10 @@ static void internal_move_pointers_items(struct buffer_info *dest_bi, | |||
385 | if (last_first == FIRST_TO_LAST) { /* shift_left occurs */ | 405 | if (last_first == FIRST_TO_LAST) { /* shift_left occurs */ |
386 | first_pointer = 0; | 406 | first_pointer = 0; |
387 | first_item = 0; | 407 | first_item = 0; |
388 | /* delete cpy_num - del_par pointers and keys starting for pointers with first_pointer, | 408 | /* |
389 | for key - with first_item */ | 409 | * delete cpy_num - del_par pointers and keys starting for |
410 | * pointers with first_pointer, for key - with first_item | ||
411 | */ | ||
390 | internal_delete_pointers_items(src_bi, first_pointer, | 412 | internal_delete_pointers_items(src_bi, first_pointer, |
391 | first_item, cpy_num - del_par); | 413 | first_item, cpy_num - del_par); |
392 | } else { /* shift_right occurs */ | 414 | } else { /* shift_right occurs */ |
@@ -404,7 +426,9 @@ static void internal_move_pointers_items(struct buffer_info *dest_bi, | |||
404 | } | 426 | } |
405 | 427 | ||
406 | /* Insert n_src'th key of buffer src before n_dest'th key of buffer dest. */ | 428 | /* Insert n_src'th key of buffer src before n_dest'th key of buffer dest. */ |
407 | static void internal_insert_key(struct buffer_info *dest_bi, int dest_position_before, /* insert key before key with n_dest number */ | 429 | static void internal_insert_key(struct buffer_info *dest_bi, |
430 | /* insert key before key with n_dest number */ | ||
431 | int dest_position_before, | ||
408 | struct buffer_head *src, int src_position) | 432 | struct buffer_head *src, int src_position) |
409 | { | 433 | { |
410 | struct buffer_head *dest = dest_bi->bi_bh; | 434 | struct buffer_head *dest = dest_bi->bi_bh; |
@@ -453,13 +477,19 @@ static void internal_insert_key(struct buffer_info *dest_bi, int dest_position_b | |||
453 | } | 477 | } |
454 | } | 478 | } |
455 | 479 | ||
456 | /* Insert d_key'th (delimiting) key from buffer cfl to tail of dest. | 480 | /* |
457 | * Copy pointer_amount node pointers and pointer_amount - 1 items from buffer src to buffer dest. | 481 | * Insert d_key'th (delimiting) key from buffer cfl to tail of dest. |
482 | * Copy pointer_amount node pointers and pointer_amount - 1 items from | ||
483 | * buffer src to buffer dest. | ||
458 | * Replace d_key'th key in buffer cfl. | 484 | * Replace d_key'th key in buffer cfl. |
459 | * Delete pointer_amount items and node pointers from buffer src. | 485 | * Delete pointer_amount items and node pointers from buffer src. |
460 | */ | 486 | */ |
461 | /* this can be invoked both to shift from S to L and from R to S */ | 487 | /* this can be invoked both to shift from S to L and from R to S */ |
462 | static void internal_shift_left(int mode, /* INTERNAL_FROM_S_TO_L | INTERNAL_FROM_R_TO_S */ | 488 | static void internal_shift_left( |
489 | /* | ||
490 | * INTERNAL_FROM_S_TO_L | INTERNAL_FROM_R_TO_S | ||
491 | */ | ||
492 | int mode, | ||
463 | struct tree_balance *tb, | 493 | struct tree_balance *tb, |
464 | int h, int pointer_amount) | 494 | int h, int pointer_amount) |
465 | { | 495 | { |
@@ -473,7 +503,10 @@ static void internal_shift_left(int mode, /* INTERNAL_FROM_S_TO_L | INTERNAL_FRO | |||
473 | /*printk("pointer_amount = %d\n",pointer_amount); */ | 503 | /*printk("pointer_amount = %d\n",pointer_amount); */ |
474 | 504 | ||
475 | if (pointer_amount) { | 505 | if (pointer_amount) { |
476 | /* insert delimiting key from common father of dest and src to node dest into position B_NR_ITEM(dest) */ | 506 | /* |
507 | * insert delimiting key from common father of dest and | ||
508 | * src to node dest into position B_NR_ITEM(dest) | ||
509 | */ | ||
477 | internal_insert_key(&dest_bi, B_NR_ITEMS(dest_bi.bi_bh), cf, | 510 | internal_insert_key(&dest_bi, B_NR_ITEMS(dest_bi.bi_bh), cf, |
478 | d_key_position); | 511 | d_key_position); |
479 | 512 | ||
@@ -492,7 +525,8 @@ static void internal_shift_left(int mode, /* INTERNAL_FROM_S_TO_L | INTERNAL_FRO | |||
492 | 525 | ||
493 | } | 526 | } |
494 | 527 | ||
495 | /* Insert delimiting key to L[h]. | 528 | /* |
529 | * Insert delimiting key to L[h]. | ||
496 | * Copy n node pointers and n - 1 items from buffer S[h] to L[h]. | 530 | * Copy n node pointers and n - 1 items from buffer S[h] to L[h]. |
497 | * Delete n - 1 items and node pointers from buffer S[h]. | 531 | * Delete n - 1 items and node pointers from buffer S[h]. |
498 | */ | 532 | */ |
@@ -507,23 +541,27 @@ static void internal_shift1_left(struct tree_balance *tb, | |||
507 | internal_define_dest_src_infos(INTERNAL_SHIFT_FROM_S_TO_L, tb, h, | 541 | internal_define_dest_src_infos(INTERNAL_SHIFT_FROM_S_TO_L, tb, h, |
508 | &dest_bi, &src_bi, &d_key_position, &cf); | 542 | &dest_bi, &src_bi, &d_key_position, &cf); |
509 | 543 | ||
510 | if (pointer_amount > 0) /* insert lkey[h]-th key from CFL[h] to left neighbor L[h] */ | 544 | /* insert lkey[h]-th key from CFL[h] to left neighbor L[h] */ |
545 | if (pointer_amount > 0) | ||
511 | internal_insert_key(&dest_bi, B_NR_ITEMS(dest_bi.bi_bh), cf, | 546 | internal_insert_key(&dest_bi, B_NR_ITEMS(dest_bi.bi_bh), cf, |
512 | d_key_position); | 547 | d_key_position); |
513 | /* internal_insert_key (tb->L[h], B_NR_ITEM(tb->L[h]), tb->CFL[h], tb->lkey[h]); */ | ||
514 | 548 | ||
515 | /* last parameter is del_parameter */ | 549 | /* last parameter is del_parameter */ |
516 | internal_move_pointers_items(&dest_bi, &src_bi, FIRST_TO_LAST, | 550 | internal_move_pointers_items(&dest_bi, &src_bi, FIRST_TO_LAST, |
517 | pointer_amount, 1); | 551 | pointer_amount, 1); |
518 | /* internal_move_pointers_items (tb->L[h], tb->S[h], FIRST_TO_LAST, pointer_amount, 1); */ | ||
519 | } | 552 | } |
520 | 553 | ||
521 | /* Insert d_key'th (delimiting) key from buffer cfr to head of dest. | 554 | /* |
555 | * Insert d_key'th (delimiting) key from buffer cfr to head of dest. | ||
522 | * Copy n node pointers and n - 1 items from buffer src to buffer dest. | 556 | * Copy n node pointers and n - 1 items from buffer src to buffer dest. |
523 | * Replace d_key'th key in buffer cfr. | 557 | * Replace d_key'th key in buffer cfr. |
524 | * Delete n items and node pointers from buffer src. | 558 | * Delete n items and node pointers from buffer src. |
525 | */ | 559 | */ |
526 | static void internal_shift_right(int mode, /* INTERNAL_FROM_S_TO_R | INTERNAL_FROM_L_TO_S */ | 560 | static void internal_shift_right( |
561 | /* | ||
562 | * INTERNAL_FROM_S_TO_R | INTERNAL_FROM_L_TO_S | ||
563 | */ | ||
564 | int mode, | ||
527 | struct tree_balance *tb, | 565 | struct tree_balance *tb, |
528 | int h, int pointer_amount) | 566 | int h, int pointer_amount) |
529 | { | 567 | { |
@@ -538,7 +576,10 @@ static void internal_shift_right(int mode, /* INTERNAL_FROM_S_TO_R | INTERNAL_FR | |||
538 | nr = B_NR_ITEMS(src_bi.bi_bh); | 576 | nr = B_NR_ITEMS(src_bi.bi_bh); |
539 | 577 | ||
540 | if (pointer_amount > 0) { | 578 | if (pointer_amount > 0) { |
541 | /* insert delimiting key from common father of dest and src to dest node into position 0 */ | 579 | /* |
580 | * insert delimiting key from common father of dest | ||
581 | * and src to dest node into position 0 | ||
582 | */ | ||
542 | internal_insert_key(&dest_bi, 0, cf, d_key_position); | 583 | internal_insert_key(&dest_bi, 0, cf, d_key_position); |
543 | if (nr == pointer_amount - 1) { | 584 | if (nr == pointer_amount - 1) { |
544 | RFALSE(src_bi.bi_bh != PATH_H_PBUFFER(tb->tb_path, h) /*tb->S[h] */ || | 585 | RFALSE(src_bi.bi_bh != PATH_H_PBUFFER(tb->tb_path, h) /*tb->S[h] */ || |
@@ -559,7 +600,8 @@ static void internal_shift_right(int mode, /* INTERNAL_FROM_S_TO_R | INTERNAL_FR | |||
559 | pointer_amount, 0); | 600 | pointer_amount, 0); |
560 | } | 601 | } |
561 | 602 | ||
562 | /* Insert delimiting key to R[h]. | 603 | /* |
604 | * Insert delimiting key to R[h]. | ||
563 | * Copy n node pointers and n - 1 items from buffer S[h] to R[h]. | 605 | * Copy n node pointers and n - 1 items from buffer S[h] to R[h]. |
564 | * Delete n - 1 items and node pointers from buffer S[h]. | 606 | * Delete n - 1 items and node pointers from buffer S[h]. |
565 | */ | 607 | */ |
@@ -574,18 +616,19 @@ static void internal_shift1_right(struct tree_balance *tb, | |||
574 | internal_define_dest_src_infos(INTERNAL_SHIFT_FROM_S_TO_R, tb, h, | 616 | internal_define_dest_src_infos(INTERNAL_SHIFT_FROM_S_TO_R, tb, h, |
575 | &dest_bi, &src_bi, &d_key_position, &cf); | 617 | &dest_bi, &src_bi, &d_key_position, &cf); |
576 | 618 | ||
577 | if (pointer_amount > 0) /* insert rkey from CFR[h] to right neighbor R[h] */ | 619 | /* insert rkey from CFR[h] to right neighbor R[h] */ |
620 | if (pointer_amount > 0) | ||
578 | internal_insert_key(&dest_bi, 0, cf, d_key_position); | 621 | internal_insert_key(&dest_bi, 0, cf, d_key_position); |
579 | /* internal_insert_key (tb->R[h], 0, tb->CFR[h], tb->rkey[h]); */ | ||
580 | 622 | ||
581 | /* last parameter is del_parameter */ | 623 | /* last parameter is del_parameter */ |
582 | internal_move_pointers_items(&dest_bi, &src_bi, LAST_TO_FIRST, | 624 | internal_move_pointers_items(&dest_bi, &src_bi, LAST_TO_FIRST, |
583 | pointer_amount, 1); | 625 | pointer_amount, 1); |
584 | /* internal_move_pointers_items (tb->R[h], tb->S[h], LAST_TO_FIRST, pointer_amount, 1); */ | ||
585 | } | 626 | } |
586 | 627 | ||
587 | /* Delete insert_num node pointers together with their left items | 628 | /* |
588 | * and balance current node.*/ | 629 | * Delete insert_num node pointers together with their left items |
630 | * and balance current node. | ||
631 | */ | ||
589 | static void balance_internal_when_delete(struct tree_balance *tb, | 632 | static void balance_internal_when_delete(struct tree_balance *tb, |
590 | int h, int child_pos) | 633 | int h, int child_pos) |
591 | { | 634 | { |
@@ -626,9 +669,11 @@ static void balance_internal_when_delete(struct tree_balance *tb, | |||
626 | new_root = tb->R[h - 1]; | 669 | new_root = tb->R[h - 1]; |
627 | else | 670 | else |
628 | new_root = tb->L[h - 1]; | 671 | new_root = tb->L[h - 1]; |
629 | /* switch super block's tree root block number to the new value */ | 672 | /* |
673 | * switch super block's tree root block | ||
674 | * number to the new value */ | ||
630 | PUT_SB_ROOT_BLOCK(tb->tb_sb, new_root->b_blocknr); | 675 | PUT_SB_ROOT_BLOCK(tb->tb_sb, new_root->b_blocknr); |
631 | //REISERFS_SB(tb->tb_sb)->s_rs->s_tree_height --; | 676 | /*REISERFS_SB(tb->tb_sb)->s_rs->s_tree_height --; */ |
632 | PUT_SB_TREE_HEIGHT(tb->tb_sb, | 677 | PUT_SB_TREE_HEIGHT(tb->tb_sb, |
633 | SB_TREE_HEIGHT(tb->tb_sb) - 1); | 678 | SB_TREE_HEIGHT(tb->tb_sb) - 1); |
634 | 679 | ||
@@ -636,8 +681,8 @@ static void balance_internal_when_delete(struct tree_balance *tb, | |||
636 | REISERFS_SB(tb->tb_sb)->s_sbh, | 681 | REISERFS_SB(tb->tb_sb)->s_sbh, |
637 | 1); | 682 | 1); |
638 | /*&&&&&&&&&&&&&&&&&&&&&& */ | 683 | /*&&&&&&&&&&&&&&&&&&&&&& */ |
684 | /* use check_internal if new root is an internal node */ | ||
639 | if (h > 1) | 685 | if (h > 1) |
640 | /* use check_internal if new root is an internal node */ | ||
641 | check_internal(new_root); | 686 | check_internal(new_root); |
642 | /*&&&&&&&&&&&&&&&&&&&&&& */ | 687 | /*&&&&&&&&&&&&&&&&&&&&&& */ |
643 | 688 | ||
@@ -648,7 +693,8 @@ static void balance_internal_when_delete(struct tree_balance *tb, | |||
648 | return; | 693 | return; |
649 | } | 694 | } |
650 | 695 | ||
651 | if (tb->L[h] && tb->lnum[h] == -B_NR_ITEMS(tb->L[h]) - 1) { /* join S[h] with L[h] */ | 696 | /* join S[h] with L[h] */ |
697 | if (tb->L[h] && tb->lnum[h] == -B_NR_ITEMS(tb->L[h]) - 1) { | ||
652 | 698 | ||
653 | RFALSE(tb->rnum[h] != 0, | 699 | RFALSE(tb->rnum[h] != 0, |
654 | "invalid tb->rnum[%d]==%d when joining S[h] with L[h]", | 700 | "invalid tb->rnum[%d]==%d when joining S[h] with L[h]", |
@@ -660,7 +706,8 @@ static void balance_internal_when_delete(struct tree_balance *tb, | |||
660 | return; | 706 | return; |
661 | } | 707 | } |
662 | 708 | ||
663 | if (tb->R[h] && tb->rnum[h] == -B_NR_ITEMS(tb->R[h]) - 1) { /* join S[h] with R[h] */ | 709 | /* join S[h] with R[h] */ |
710 | if (tb->R[h] && tb->rnum[h] == -B_NR_ITEMS(tb->R[h]) - 1) { | ||
664 | RFALSE(tb->lnum[h] != 0, | 711 | RFALSE(tb->lnum[h] != 0, |
665 | "invalid tb->lnum[%d]==%d when joining S[h] with R[h]", | 712 | "invalid tb->lnum[%d]==%d when joining S[h] with R[h]", |
666 | h, tb->lnum[h]); | 713 | h, tb->lnum[h]); |
@@ -671,17 +718,18 @@ static void balance_internal_when_delete(struct tree_balance *tb, | |||
671 | return; | 718 | return; |
672 | } | 719 | } |
673 | 720 | ||
674 | if (tb->lnum[h] < 0) { /* borrow from left neighbor L[h] */ | 721 | /* borrow from left neighbor L[h] */ |
722 | if (tb->lnum[h] < 0) { | ||
675 | RFALSE(tb->rnum[h] != 0, | 723 | RFALSE(tb->rnum[h] != 0, |
676 | "wrong tb->rnum[%d]==%d when borrow from L[h]", h, | 724 | "wrong tb->rnum[%d]==%d when borrow from L[h]", h, |
677 | tb->rnum[h]); | 725 | tb->rnum[h]); |
678 | /*internal_shift_right (tb, h, tb->L[h], tb->CFL[h], tb->lkey[h], tb->S[h], -tb->lnum[h]); */ | ||
679 | internal_shift_right(INTERNAL_SHIFT_FROM_L_TO_S, tb, h, | 726 | internal_shift_right(INTERNAL_SHIFT_FROM_L_TO_S, tb, h, |
680 | -tb->lnum[h]); | 727 | -tb->lnum[h]); |
681 | return; | 728 | return; |
682 | } | 729 | } |
683 | 730 | ||
684 | if (tb->rnum[h] < 0) { /* borrow from right neighbor R[h] */ | 731 | /* borrow from right neighbor R[h] */ |
732 | if (tb->rnum[h] < 0) { | ||
685 | RFALSE(tb->lnum[h] != 0, | 733 | RFALSE(tb->lnum[h] != 0, |
686 | "invalid tb->lnum[%d]==%d when borrow from R[h]", | 734 | "invalid tb->lnum[%d]==%d when borrow from R[h]", |
687 | h, tb->lnum[h]); | 735 | h, tb->lnum[h]); |
@@ -689,7 +737,8 @@ static void balance_internal_when_delete(struct tree_balance *tb, | |||
689 | return; | 737 | return; |
690 | } | 738 | } |
691 | 739 | ||
692 | if (tb->lnum[h] > 0) { /* split S[h] into two parts and put them into neighbors */ | 740 | /* split S[h] into two parts and put them into neighbors */ |
741 | if (tb->lnum[h] > 0) { | ||
693 | RFALSE(tb->rnum[h] == 0 || tb->lnum[h] + tb->rnum[h] != n + 1, | 742 | RFALSE(tb->rnum[h] == 0 || tb->lnum[h] + tb->rnum[h] != n + 1, |
694 | "invalid tb->lnum[%d]==%d or tb->rnum[%d]==%d when S[h](item number == %d) is split between them", | 743 | "invalid tb->lnum[%d]==%d or tb->rnum[%d]==%d when S[h](item number == %d) is split between them", |
695 | h, tb->lnum[h], h, tb->rnum[h], n); | 744 | h, tb->lnum[h], h, tb->rnum[h], n); |
@@ -737,29 +786,36 @@ static void replace_rkey(struct tree_balance *tb, int h, struct item_head *key) | |||
737 | do_balance_mark_internal_dirty(tb, tb->CFR[h], 0); | 786 | do_balance_mark_internal_dirty(tb, tb->CFR[h], 0); |
738 | } | 787 | } |
739 | 788 | ||
740 | int balance_internal(struct tree_balance *tb, /* tree_balance structure */ | 789 | |
741 | int h, /* level of the tree */ | 790 | /* |
742 | int child_pos, struct item_head *insert_key, /* key for insertion on higher level */ | 791 | * if inserting/pasting { |
743 | struct buffer_head **insert_ptr /* node for insertion on higher level */ | 792 | * child_pos is the position of the node-pointer in S[h] that |
744 | ) | 793 | * pointed to S[h-1] before balancing of the h-1 level; |
745 | /* if inserting/pasting | 794 | * this means that new pointers and items must be inserted AFTER |
746 | { | 795 | * child_pos |
747 | child_pos is the position of the node-pointer in S[h] that * | 796 | * } else { |
748 | pointed to S[h-1] before balancing of the h-1 level; * | 797 | * it is the position of the leftmost pointer that must be deleted |
749 | this means that new pointers and items must be inserted AFTER * | 798 | * (together with its corresponding key to the left of the pointer) |
750 | child_pos | 799 | * as a result of the previous level's balancing. |
751 | } | 800 | * } |
752 | else | 801 | */ |
753 | { | 802 | |
754 | it is the position of the leftmost pointer that must be deleted (together with | 803 | int balance_internal(struct tree_balance *tb, |
755 | its corresponding key to the left of the pointer) | 804 | int h, /* level of the tree */ |
756 | as a result of the previous level's balancing. | 805 | int child_pos, |
757 | } | 806 | /* key for insertion on higher level */ |
758 | */ | 807 | struct item_head *insert_key, |
808 | /* node for insertion on higher level */ | ||
809 | struct buffer_head **insert_ptr) | ||
759 | { | 810 | { |
760 | struct buffer_head *tbSh = PATH_H_PBUFFER(tb->tb_path, h); | 811 | struct buffer_head *tbSh = PATH_H_PBUFFER(tb->tb_path, h); |
761 | struct buffer_info bi; | 812 | struct buffer_info bi; |
762 | int order; /* we return this: it is 0 if there is no S[h], else it is tb->S[h]->b_item_order */ | 813 | |
814 | /* | ||
815 | * we return this: it is 0 if there is no S[h], | ||
816 | * else it is tb->S[h]->b_item_order | ||
817 | */ | ||
818 | int order; | ||
763 | int insert_num, n, k; | 819 | int insert_num, n, k; |
764 | struct buffer_head *S_new; | 820 | struct buffer_head *S_new; |
765 | struct item_head new_insert_key; | 821 | struct item_head new_insert_key; |
@@ -774,8 +830,10 @@ int balance_internal(struct tree_balance *tb, /* tree_balance structure | |||
774 | (tbSh) ? PATH_H_POSITION(tb->tb_path, | 830 | (tbSh) ? PATH_H_POSITION(tb->tb_path, |
775 | h + 1) /*tb->S[h]->b_item_order */ : 0; | 831 | h + 1) /*tb->S[h]->b_item_order */ : 0; |
776 | 832 | ||
777 | /* Using insert_size[h] calculate the number insert_num of items | 833 | /* |
778 | that must be inserted to or deleted from S[h]. */ | 834 | * Using insert_size[h] calculate the number insert_num of items |
835 | * that must be inserted to or deleted from S[h]. | ||
836 | */ | ||
779 | insert_num = tb->insert_size[h] / ((int)(KEY_SIZE + DC_SIZE)); | 837 | insert_num = tb->insert_size[h] / ((int)(KEY_SIZE + DC_SIZE)); |
780 | 838 | ||
781 | /* Check whether insert_num is proper * */ | 839 | /* Check whether insert_num is proper * */ |
@@ -794,23 +852,21 @@ int balance_internal(struct tree_balance *tb, /* tree_balance structure | |||
794 | 852 | ||
795 | k = 0; | 853 | k = 0; |
796 | if (tb->lnum[h] > 0) { | 854 | if (tb->lnum[h] > 0) { |
797 | /* shift lnum[h] items from S[h] to the left neighbor L[h]. | 855 | /* |
798 | check how many of new items fall into L[h] or CFL[h] after | 856 | * shift lnum[h] items from S[h] to the left neighbor L[h]. |
799 | shifting */ | 857 | * check how many of new items fall into L[h] or CFL[h] after |
858 | * shifting | ||
859 | */ | ||
800 | n = B_NR_ITEMS(tb->L[h]); /* number of items in L[h] */ | 860 | n = B_NR_ITEMS(tb->L[h]); /* number of items in L[h] */ |
801 | if (tb->lnum[h] <= child_pos) { | 861 | if (tb->lnum[h] <= child_pos) { |
802 | /* new items don't fall into L[h] or CFL[h] */ | 862 | /* new items don't fall into L[h] or CFL[h] */ |
803 | internal_shift_left(INTERNAL_SHIFT_FROM_S_TO_L, tb, h, | 863 | internal_shift_left(INTERNAL_SHIFT_FROM_S_TO_L, tb, h, |
804 | tb->lnum[h]); | 864 | tb->lnum[h]); |
805 | /*internal_shift_left (tb->L[h],tb->CFL[h],tb->lkey[h],tbSh,tb->lnum[h]); */ | ||
806 | child_pos -= tb->lnum[h]; | 865 | child_pos -= tb->lnum[h]; |
807 | } else if (tb->lnum[h] > child_pos + insert_num) { | 866 | } else if (tb->lnum[h] > child_pos + insert_num) { |
808 | /* all new items fall into L[h] */ | 867 | /* all new items fall into L[h] */ |
809 | internal_shift_left(INTERNAL_SHIFT_FROM_S_TO_L, tb, h, | 868 | internal_shift_left(INTERNAL_SHIFT_FROM_S_TO_L, tb, h, |
810 | tb->lnum[h] - insert_num); | 869 | tb->lnum[h] - insert_num); |
811 | /* internal_shift_left(tb->L[h],tb->CFL[h],tb->lkey[h],tbSh, | ||
812 | tb->lnum[h]-insert_num); | ||
813 | */ | ||
814 | /* insert insert_num keys and node-pointers into L[h] */ | 870 | /* insert insert_num keys and node-pointers into L[h] */ |
815 | bi.tb = tb; | 871 | bi.tb = tb; |
816 | bi.bi_bh = tb->L[h]; | 872 | bi.bi_bh = tb->L[h]; |
@@ -826,7 +882,10 @@ int balance_internal(struct tree_balance *tb, /* tree_balance structure | |||
826 | } else { | 882 | } else { |
827 | struct disk_child *dc; | 883 | struct disk_child *dc; |
828 | 884 | ||
829 | /* some items fall into L[h] or CFL[h], but some don't fall */ | 885 | /* |
886 | * some items fall into L[h] or CFL[h], | ||
887 | * but some don't fall | ||
888 | */ | ||
830 | internal_shift1_left(tb, h, child_pos + 1); | 889 | internal_shift1_left(tb, h, child_pos + 1); |
831 | /* calculate number of new items that fall into L[h] */ | 890 | /* calculate number of new items that fall into L[h] */ |
832 | k = tb->lnum[h] - child_pos - 1; | 891 | k = tb->lnum[h] - child_pos - 1; |
@@ -841,7 +900,10 @@ int balance_internal(struct tree_balance *tb, /* tree_balance structure | |||
841 | 900 | ||
842 | replace_lkey(tb, h, insert_key + k); | 901 | replace_lkey(tb, h, insert_key + k); |
843 | 902 | ||
844 | /* replace the first node-ptr in S[h] by node-ptr to insert_ptr[k] */ | 903 | /* |
904 | * replace the first node-ptr in S[h] by | ||
905 | * node-ptr to insert_ptr[k] | ||
906 | */ | ||
845 | dc = B_N_CHILD(tbSh, 0); | 907 | dc = B_N_CHILD(tbSh, 0); |
846 | put_dc_size(dc, | 908 | put_dc_size(dc, |
847 | MAX_CHILD_SIZE(insert_ptr[k]) - | 909 | MAX_CHILD_SIZE(insert_ptr[k]) - |
@@ -860,17 +922,17 @@ int balance_internal(struct tree_balance *tb, /* tree_balance structure | |||
860 | /* tb->lnum[h] > 0 */ | 922 | /* tb->lnum[h] > 0 */ |
861 | if (tb->rnum[h] > 0) { | 923 | if (tb->rnum[h] > 0) { |
862 | /*shift rnum[h] items from S[h] to the right neighbor R[h] */ | 924 | /*shift rnum[h] items from S[h] to the right neighbor R[h] */ |
863 | /* check how many of new items fall into R or CFR after shifting */ | 925 | /* |
926 | * check how many of new items fall into R or CFR | ||
927 | * after shifting | ||
928 | */ | ||
864 | n = B_NR_ITEMS(tbSh); /* number of items in S[h] */ | 929 | n = B_NR_ITEMS(tbSh); /* number of items in S[h] */ |
865 | if (n - tb->rnum[h] >= child_pos) | 930 | if (n - tb->rnum[h] >= child_pos) |
866 | /* new items fall into S[h] */ | 931 | /* new items fall into S[h] */ |
867 | /*internal_shift_right(tb,h,tbSh,tb->CFR[h],tb->rkey[h],tb->R[h],tb->rnum[h]); */ | ||
868 | internal_shift_right(INTERNAL_SHIFT_FROM_S_TO_R, tb, h, | 932 | internal_shift_right(INTERNAL_SHIFT_FROM_S_TO_R, tb, h, |
869 | tb->rnum[h]); | 933 | tb->rnum[h]); |
870 | else if (n + insert_num - tb->rnum[h] < child_pos) { | 934 | else if (n + insert_num - tb->rnum[h] < child_pos) { |
871 | /* all new items fall into R[h] */ | 935 | /* all new items fall into R[h] */ |
872 | /*internal_shift_right(tb,h,tbSh,tb->CFR[h],tb->rkey[h],tb->R[h], | ||
873 | tb->rnum[h] - insert_num); */ | ||
874 | internal_shift_right(INTERNAL_SHIFT_FROM_S_TO_R, tb, h, | 936 | internal_shift_right(INTERNAL_SHIFT_FROM_S_TO_R, tb, h, |
875 | tb->rnum[h] - insert_num); | 937 | tb->rnum[h] - insert_num); |
876 | 938 | ||
@@ -904,7 +966,10 @@ int balance_internal(struct tree_balance *tb, /* tree_balance structure | |||
904 | 966 | ||
905 | replace_rkey(tb, h, insert_key + insert_num - k - 1); | 967 | replace_rkey(tb, h, insert_key + insert_num - k - 1); |
906 | 968 | ||
907 | /* replace the first node-ptr in R[h] by node-ptr insert_ptr[insert_num-k-1] */ | 969 | /* |
970 | * replace the first node-ptr in R[h] by | ||
971 | * node-ptr insert_ptr[insert_num-k-1] | ||
972 | */ | ||
908 | dc = B_N_CHILD(tb->R[h], 0); | 973 | dc = B_N_CHILD(tb->R[h], 0); |
909 | put_dc_size(dc, | 974 | put_dc_size(dc, |
910 | MAX_CHILD_SIZE(insert_ptr | 975 | MAX_CHILD_SIZE(insert_ptr |
@@ -921,7 +986,7 @@ int balance_internal(struct tree_balance *tb, /* tree_balance structure | |||
921 | } | 986 | } |
922 | } | 987 | } |
923 | 988 | ||
924 | /** Fill new node that appears instead of S[h] **/ | 989 | /** Fill new node that appears instead of S[h] **/ |
925 | RFALSE(tb->blknum[h] > 2, "blknum can not be > 2 for internal level"); | 990 | RFALSE(tb->blknum[h] > 2, "blknum can not be > 2 for internal level"); |
926 | RFALSE(tb->blknum[h] < 0, "blknum can not be < 0"); | 991 | RFALSE(tb->blknum[h] < 0, "blknum can not be < 0"); |
927 | 992 | ||
@@ -1002,11 +1067,13 @@ int balance_internal(struct tree_balance *tb, /* tree_balance structure | |||
1002 | /* last parameter is del_par */ | 1067 | /* last parameter is del_par */ |
1003 | internal_move_pointers_items(&dest_bi, &src_bi, | 1068 | internal_move_pointers_items(&dest_bi, &src_bi, |
1004 | LAST_TO_FIRST, snum, 0); | 1069 | LAST_TO_FIRST, snum, 0); |
1005 | /* internal_move_pointers_items(S_new, tbSh, LAST_TO_FIRST, snum, 0); */ | ||
1006 | } else if (n + insert_num - snum < child_pos) { | 1070 | } else if (n + insert_num - snum < child_pos) { |
1007 | /* all new items fall into S_new */ | 1071 | /* all new items fall into S_new */ |
1008 | /* store the delimiting key for the next level */ | 1072 | /* store the delimiting key for the next level */ |
1009 | /* new_insert_key = (n + insert_item - snum)'th key in S[h] */ | 1073 | /* |
1074 | * new_insert_key = (n + insert_item - snum)'th | ||
1075 | * key in S[h] | ||
1076 | */ | ||
1010 | memcpy(&new_insert_key, | 1077 | memcpy(&new_insert_key, |
1011 | internal_key(tbSh, n + insert_num - snum), | 1078 | internal_key(tbSh, n + insert_num - snum), |
1012 | KEY_SIZE); | 1079 | KEY_SIZE); |
@@ -1014,9 +1081,11 @@ int balance_internal(struct tree_balance *tb, /* tree_balance structure | |||
1014 | internal_move_pointers_items(&dest_bi, &src_bi, | 1081 | internal_move_pointers_items(&dest_bi, &src_bi, |
1015 | LAST_TO_FIRST, | 1082 | LAST_TO_FIRST, |
1016 | snum - insert_num, 0); | 1083 | snum - insert_num, 0); |
1017 | /* internal_move_pointers_items(S_new,tbSh,1,snum - insert_num,0); */ | ||
1018 | 1084 | ||
1019 | /* insert insert_num keys and node-pointers into S_new */ | 1085 | /* |
1086 | * insert insert_num keys and node-pointers | ||
1087 | * into S_new | ||
1088 | */ | ||
1020 | internal_insert_childs(&dest_bi, | 1089 | internal_insert_childs(&dest_bi, |
1021 | /*S_new,tb->S[h-1]->b_next, */ | 1090 | /*S_new,tb->S[h-1]->b_next, */ |
1022 | child_pos - n - insert_num + | 1091 | child_pos - n - insert_num + |
@@ -1033,7 +1102,6 @@ int balance_internal(struct tree_balance *tb, /* tree_balance structure | |||
1033 | internal_move_pointers_items(&dest_bi, &src_bi, | 1102 | internal_move_pointers_items(&dest_bi, &src_bi, |
1034 | LAST_TO_FIRST, | 1103 | LAST_TO_FIRST, |
1035 | n - child_pos + 1, 1); | 1104 | n - child_pos + 1, 1); |
1036 | /* internal_move_pointers_items(S_new,tbSh,1,n - child_pos + 1,1); */ | ||
1037 | /* calculate number of new items that fall into S_new */ | 1105 | /* calculate number of new items that fall into S_new */ |
1038 | k = snum - n + child_pos - 1; | 1106 | k = snum - n + child_pos - 1; |
1039 | 1107 | ||
@@ -1043,7 +1111,10 @@ int balance_internal(struct tree_balance *tb, /* tree_balance structure | |||
1043 | /* new_insert_key = insert_key[insert_num - k - 1] */ | 1111 | /* new_insert_key = insert_key[insert_num - k - 1] */ |
1044 | memcpy(&new_insert_key, insert_key + insert_num - k - 1, | 1112 | memcpy(&new_insert_key, insert_key + insert_num - k - 1, |
1045 | KEY_SIZE); | 1113 | KEY_SIZE); |
1046 | /* replace first node-ptr in S_new by node-ptr to insert_ptr[insert_num-k-1] */ | 1114 | /* |
1115 | * replace first node-ptr in S_new by node-ptr | ||
1116 | * to insert_ptr[insert_num-k-1] | ||
1117 | */ | ||
1047 | 1118 | ||
1048 | dc = B_N_CHILD(S_new, 0); | 1119 | dc = B_N_CHILD(S_new, 0); |
1049 | put_dc_size(dc, | 1120 | put_dc_size(dc, |
@@ -1066,7 +1137,7 @@ int balance_internal(struct tree_balance *tb, /* tree_balance structure | |||
1066 | || buffer_dirty(S_new), "cm-00001: bad S_new (%b)", | 1137 | || buffer_dirty(S_new), "cm-00001: bad S_new (%b)", |
1067 | S_new); | 1138 | S_new); |
1068 | 1139 | ||
1069 | // S_new is released in unfix_nodes | 1140 | /* S_new is released in unfix_nodes */ |
1070 | } | 1141 | } |
1071 | 1142 | ||
1072 | n = B_NR_ITEMS(tbSh); /*number of items in S[h] */ | 1143 | n = B_NR_ITEMS(tbSh); /*number of items in S[h] */ |
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c index b8d3ffb1f722..cc2095943ec6 100644 --- a/fs/reiserfs/inode.c +++ b/fs/reiserfs/inode.c | |||
@@ -25,7 +25,10 @@ int reiserfs_commit_write(struct file *f, struct page *page, | |||
25 | 25 | ||
26 | void reiserfs_evict_inode(struct inode *inode) | 26 | void reiserfs_evict_inode(struct inode *inode) |
27 | { | 27 | { |
28 | /* We need blocks for transaction + (user+group) quota update (possibly delete) */ | 28 | /* |
29 | * We need blocks for transaction + (user+group) quota | ||
30 | * update (possibly delete) | ||
31 | */ | ||
29 | int jbegin_count = | 32 | int jbegin_count = |
30 | JOURNAL_PER_BALANCE_CNT * 2 + | 33 | JOURNAL_PER_BALANCE_CNT * 2 + |
31 | 2 * REISERFS_QUOTA_INIT_BLOCKS(inode->i_sb); | 34 | 2 * REISERFS_QUOTA_INIT_BLOCKS(inode->i_sb); |
@@ -39,8 +42,12 @@ void reiserfs_evict_inode(struct inode *inode) | |||
39 | if (inode->i_nlink) | 42 | if (inode->i_nlink) |
40 | goto no_delete; | 43 | goto no_delete; |
41 | 44 | ||
42 | /* The = 0 happens when we abort creating a new inode for some reason like lack of space.. */ | 45 | /* |
43 | if (!(inode->i_state & I_NEW) && INODE_PKEY(inode)->k_objectid != 0) { /* also handles bad_inode case */ | 46 | * The = 0 happens when we abort creating a new inode |
47 | * for some reason like lack of space.. | ||
48 | * also handles bad_inode case | ||
49 | */ | ||
50 | if (!(inode->i_state & I_NEW) && INODE_PKEY(inode)->k_objectid != 0) { | ||
44 | 51 | ||
45 | reiserfs_delete_xattrs(inode); | 52 | reiserfs_delete_xattrs(inode); |
46 | 53 | ||
@@ -54,9 +61,11 @@ void reiserfs_evict_inode(struct inode *inode) | |||
54 | 61 | ||
55 | err = reiserfs_delete_object(&th, inode); | 62 | err = reiserfs_delete_object(&th, inode); |
56 | 63 | ||
57 | /* Do quota update inside a transaction for journaled quotas. We must do that | 64 | /* |
58 | * after delete_object so that quota updates go into the same transaction as | 65 | * Do quota update inside a transaction for journaled quotas. |
59 | * stat data deletion */ | 66 | * We must do that after delete_object so that quota updates |
67 | * go into the same transaction as stat data deletion | ||
68 | */ | ||
60 | if (!err) { | 69 | if (!err) { |
61 | int depth = reiserfs_write_unlock_nested(inode->i_sb); | 70 | int depth = reiserfs_write_unlock_nested(inode->i_sb); |
62 | dquot_free_inode(inode); | 71 | dquot_free_inode(inode); |
@@ -66,22 +75,29 @@ void reiserfs_evict_inode(struct inode *inode) | |||
66 | if (journal_end(&th, inode->i_sb, jbegin_count)) | 75 | if (journal_end(&th, inode->i_sb, jbegin_count)) |
67 | goto out; | 76 | goto out; |
68 | 77 | ||
69 | /* check return value from reiserfs_delete_object after | 78 | /* |
79 | * check return value from reiserfs_delete_object after | ||
70 | * ending the transaction | 80 | * ending the transaction |
71 | */ | 81 | */ |
72 | if (err) | 82 | if (err) |
73 | goto out; | 83 | goto out; |
74 | 84 | ||
75 | /* all items of file are deleted, so we can remove "save" link */ | 85 | /* |
76 | remove_save_link(inode, 0 /* not truncate */ ); /* we can't do anything | 86 | * all items of file are deleted, so we can remove |
77 | * about an error here */ | 87 | * "save" link |
88 | * we can't do anything about an error here | ||
89 | */ | ||
90 | remove_save_link(inode, 0 /* not truncate */); | ||
78 | out: | 91 | out: |
79 | reiserfs_write_unlock(inode->i_sb); | 92 | reiserfs_write_unlock(inode->i_sb); |
80 | } else { | 93 | } else { |
81 | /* no object items are in the tree */ | 94 | /* no object items are in the tree */ |
82 | ; | 95 | ; |
83 | } | 96 | } |
84 | clear_inode(inode); /* note this must go after the journal_end to prevent deadlock */ | 97 | |
98 | /* note this must go after the journal_end to prevent deadlock */ | ||
99 | clear_inode(inode); | ||
100 | |||
85 | dquot_drop(inode); | 101 | dquot_drop(inode); |
86 | inode->i_blocks = 0; | 102 | inode->i_blocks = 0; |
87 | return; | 103 | return; |
@@ -103,8 +119,10 @@ static void _make_cpu_key(struct cpu_key *key, int version, __u32 dirid, | |||
103 | key->key_length = length; | 119 | key->key_length = length; |
104 | } | 120 | } |
105 | 121 | ||
106 | /* take base of inode_key (it comes from inode always) (dirid, objectid) and version from an inode, set | 122 | /* |
107 | offset and type of key */ | 123 | * take base of inode_key (it comes from inode always) (dirid, objectid) |
124 | * and version from an inode, set offset and type of key | ||
125 | */ | ||
108 | void make_cpu_key(struct cpu_key *key, struct inode *inode, loff_t offset, | 126 | void make_cpu_key(struct cpu_key *key, struct inode *inode, loff_t offset, |
109 | int type, int length) | 127 | int type, int length) |
110 | { | 128 | { |
@@ -114,9 +132,7 @@ void make_cpu_key(struct cpu_key *key, struct inode *inode, loff_t offset, | |||
114 | length); | 132 | length); |
115 | } | 133 | } |
116 | 134 | ||
117 | // | 135 | /* when key is 0, do not set version and short key */ |
118 | // when key is 0, do not set version and short key | ||
119 | // | ||
120 | inline void make_le_item_head(struct item_head *ih, const struct cpu_key *key, | 136 | inline void make_le_item_head(struct item_head *ih, const struct cpu_key *key, |
121 | int version, | 137 | int version, |
122 | loff_t offset, int type, int length, | 138 | loff_t offset, int type, int length, |
@@ -132,43 +148,47 @@ inline void make_le_item_head(struct item_head *ih, const struct cpu_key *key, | |||
132 | set_le_ih_k_type(ih, type); | 148 | set_le_ih_k_type(ih, type); |
133 | put_ih_item_len(ih, length); | 149 | put_ih_item_len(ih, length); |
134 | /* set_ih_free_space (ih, 0); */ | 150 | /* set_ih_free_space (ih, 0); */ |
135 | // for directory items it is entry count, for directs and stat | 151 | /* |
136 | // datas - 0xffff, for indirects - 0 | 152 | * for directory items it is entry count, for directs and stat |
153 | * datas - 0xffff, for indirects - 0 | ||
154 | */ | ||
137 | put_ih_entry_count(ih, entry_count); | 155 | put_ih_entry_count(ih, entry_count); |
138 | } | 156 | } |
139 | 157 | ||
140 | // | 158 | /* |
141 | // FIXME: we might cache recently accessed indirect item | 159 | * FIXME: we might cache recently accessed indirect item |
142 | 160 | * Ugh. Not too eager for that.... | |
143 | // Ugh. Not too eager for that.... | 161 | * I cut the code until such time as I see a convincing argument (benchmark). |
144 | // I cut the code until such time as I see a convincing argument (benchmark). | 162 | * I don't want a bloated inode struct..., and I don't like code complexity.... |
145 | // I don't want a bloated inode struct..., and I don't like code complexity.... | 163 | */ |
146 | |||
147 | /* cutting the code is fine, since it really isn't in use yet and is easy | ||
148 | ** to add back in. But, Vladimir has a really good idea here. Think | ||
149 | ** about what happens for reading a file. For each page, | ||
150 | ** The VFS layer calls reiserfs_readpage, who searches the tree to find | ||
151 | ** an indirect item. This indirect item has X number of pointers, where | ||
152 | ** X is a big number if we've done the block allocation right. But, | ||
153 | ** we only use one or two of these pointers during each call to readpage, | ||
154 | ** needlessly researching again later on. | ||
155 | ** | ||
156 | ** The size of the cache could be dynamic based on the size of the file. | ||
157 | ** | ||
158 | ** I'd also like to see us cache the location the stat data item, since | ||
159 | ** we are needlessly researching for that frequently. | ||
160 | ** | ||
161 | ** --chris | ||
162 | */ | ||
163 | 164 | ||
164 | /* If this page has a file tail in it, and | 165 | /* |
165 | ** it was read in by get_block_create_0, the page data is valid, | 166 | * cutting the code is fine, since it really isn't in use yet and is easy |
166 | ** but tail is still sitting in a direct item, and we can't write to | 167 | * to add back in. But, Vladimir has a really good idea here. Think |
167 | ** it. So, look through this page, and check all the mapped buffers | 168 | * about what happens for reading a file. For each page, |
168 | ** to make sure they have valid block numbers. Any that don't need | 169 | * The VFS layer calls reiserfs_readpage, who searches the tree to find |
169 | ** to be unmapped, so that __block_write_begin will correctly call | 170 | * an indirect item. This indirect item has X number of pointers, where |
170 | ** reiserfs_get_block to convert the tail into an unformatted node | 171 | * X is a big number if we've done the block allocation right. But, |
171 | */ | 172 | * we only use one or two of these pointers during each call to readpage, |
173 | * needlessly researching again later on. | ||
174 | * | ||
175 | * The size of the cache could be dynamic based on the size of the file. | ||
176 | * | ||
177 | * I'd also like to see us cache the location the stat data item, since | ||
178 | * we are needlessly researching for that frequently. | ||
179 | * | ||
180 | * --chris | ||
181 | */ | ||
182 | |||
183 | /* | ||
184 | * If this page has a file tail in it, and | ||
185 | * it was read in by get_block_create_0, the page data is valid, | ||
186 | * but tail is still sitting in a direct item, and we can't write to | ||
187 | * it. So, look through this page, and check all the mapped buffers | ||
188 | * to make sure they have valid block numbers. Any that don't need | ||
189 | * to be unmapped, so that __block_write_begin will correctly call | ||
190 | * reiserfs_get_block to convert the tail into an unformatted node | ||
191 | */ | ||
172 | static inline void fix_tail_page_for_writing(struct page *page) | 192 | static inline void fix_tail_page_for_writing(struct page *page) |
173 | { | 193 | { |
174 | struct buffer_head *head, *next, *bh; | 194 | struct buffer_head *head, *next, *bh; |
@@ -186,8 +206,10 @@ static inline void fix_tail_page_for_writing(struct page *page) | |||
186 | } | 206 | } |
187 | } | 207 | } |
188 | 208 | ||
189 | /* reiserfs_get_block does not need to allocate a block only if it has been | 209 | /* |
190 | done already or non-hole position has been found in the indirect item */ | 210 | * reiserfs_get_block does not need to allocate a block only if it has been |
211 | * done already or non-hole position has been found in the indirect item | ||
212 | */ | ||
191 | static inline int allocation_needed(int retval, b_blocknr_t allocated, | 213 | static inline int allocation_needed(int retval, b_blocknr_t allocated, |
192 | struct item_head *ih, | 214 | struct item_head *ih, |
193 | __le32 * item, int pos_in_item) | 215 | __le32 * item, int pos_in_item) |
@@ -211,14 +233,16 @@ static inline void set_block_dev_mapped(struct buffer_head *bh, | |||
211 | map_bh(bh, inode->i_sb, block); | 233 | map_bh(bh, inode->i_sb, block); |
212 | } | 234 | } |
213 | 235 | ||
214 | // | 236 | /* |
215 | // files which were created in the earlier version can not be longer, | 237 | * files which were created in the earlier version can not be longer, |
216 | // than 2 gb | 238 | * than 2 gb |
217 | // | 239 | */ |
218 | static int file_capable(struct inode *inode, sector_t block) | 240 | static int file_capable(struct inode *inode, sector_t block) |
219 | { | 241 | { |
220 | if (get_inode_item_key_version(inode) != KEY_FORMAT_3_5 || // it is new file. | 242 | /* it is new file. */ |
221 | block < (1 << (31 - inode->i_sb->s_blocksize_bits))) // old file, but 'block' is inside of 2gb | 243 | if (get_inode_item_key_version(inode) != KEY_FORMAT_3_5 || |
244 | /* old file, but 'block' is inside of 2gb */ | ||
245 | block < (1 << (31 - inode->i_sb->s_blocksize_bits))) | ||
222 | return 1; | 246 | return 1; |
223 | 247 | ||
224 | return 0; | 248 | return 0; |
@@ -250,14 +274,14 @@ static int restart_transaction(struct reiserfs_transaction_handle *th, | |||
250 | return err; | 274 | return err; |
251 | } | 275 | } |
252 | 276 | ||
253 | // it is called by get_block when create == 0. Returns block number | 277 | /* |
254 | // for 'block'-th logical block of file. When it hits direct item it | 278 | * it is called by get_block when create == 0. Returns block number |
255 | // returns 0 (being called from bmap) or read direct item into piece | 279 | * for 'block'-th logical block of file. When it hits direct item it |
256 | // of page (bh_result) | 280 | * returns 0 (being called from bmap) or read direct item into piece |
257 | 281 | * of page (bh_result) | |
258 | // Please improve the english/clarity in the comment above, as it is | 282 | * Please improve the english/clarity in the comment above, as it is |
259 | // hard to understand. | 283 | * hard to understand. |
260 | 284 | */ | |
261 | static int _get_block_create_0(struct inode *inode, sector_t block, | 285 | static int _get_block_create_0(struct inode *inode, sector_t block, |
262 | struct buffer_head *bh_result, int args) | 286 | struct buffer_head *bh_result, int args) |
263 | { | 287 | { |
@@ -273,7 +297,7 @@ static int _get_block_create_0(struct inode *inode, sector_t block, | |||
273 | int done = 0; | 297 | int done = 0; |
274 | unsigned long offset; | 298 | unsigned long offset; |
275 | 299 | ||
276 | // prepare the key to look for the 'block'-th block of file | 300 | /* prepare the key to look for the 'block'-th block of file */ |
277 | make_cpu_key(&key, inode, | 301 | make_cpu_key(&key, inode, |
278 | (loff_t) block * inode->i_sb->s_blocksize + 1, TYPE_ANY, | 302 | (loff_t) block * inode->i_sb->s_blocksize + 1, TYPE_ANY, |
279 | 3); | 303 | 3); |
@@ -285,23 +309,28 @@ static int _get_block_create_0(struct inode *inode, sector_t block, | |||
285 | kunmap(bh_result->b_page); | 309 | kunmap(bh_result->b_page); |
286 | if (result == IO_ERROR) | 310 | if (result == IO_ERROR) |
287 | return -EIO; | 311 | return -EIO; |
288 | // We do not return -ENOENT if there is a hole but page is uptodate, because it means | 312 | /* |
289 | // That there is some MMAPED data associated with it that is yet to be written to disk. | 313 | * We do not return -ENOENT if there is a hole but page is |
314 | * uptodate, because it means that there is some MMAPED data | ||
315 | * associated with it that is yet to be written to disk. | ||
316 | */ | ||
290 | if ((args & GET_BLOCK_NO_HOLE) | 317 | if ((args & GET_BLOCK_NO_HOLE) |
291 | && !PageUptodate(bh_result->b_page)) { | 318 | && !PageUptodate(bh_result->b_page)) { |
292 | return -ENOENT; | 319 | return -ENOENT; |
293 | } | 320 | } |
294 | return 0; | 321 | return 0; |
295 | } | 322 | } |
296 | // | 323 | |
297 | bh = get_last_bh(&path); | 324 | bh = get_last_bh(&path); |
298 | ih = tp_item_head(&path); | 325 | ih = tp_item_head(&path); |
299 | if (is_indirect_le_ih(ih)) { | 326 | if (is_indirect_le_ih(ih)) { |
300 | __le32 *ind_item = (__le32 *) ih_item_body(bh, ih); | 327 | __le32 *ind_item = (__le32 *) ih_item_body(bh, ih); |
301 | 328 | ||
302 | /* FIXME: here we could cache indirect item or part of it in | 329 | /* |
303 | the inode to avoid search_by_key in case of subsequent | 330 | * FIXME: here we could cache indirect item or part of it in |
304 | access to file */ | 331 | * the inode to avoid search_by_key in case of subsequent |
332 | * access to file | ||
333 | */ | ||
305 | blocknr = get_block_num(ind_item, path.pos_in_item); | 334 | blocknr = get_block_num(ind_item, path.pos_in_item); |
306 | ret = 0; | 335 | ret = 0; |
307 | if (blocknr) { | 336 | if (blocknr) { |
@@ -311,8 +340,12 @@ static int _get_block_create_0(struct inode *inode, sector_t block, | |||
311 | set_buffer_boundary(bh_result); | 340 | set_buffer_boundary(bh_result); |
312 | } | 341 | } |
313 | } else | 342 | } else |
314 | // We do not return -ENOENT if there is a hole but page is uptodate, because it means | 343 | /* |
315 | // That there is some MMAPED data associated with it that is yet to be written to disk. | 344 | * We do not return -ENOENT if there is a hole but |
345 | * page is uptodate, because it means that there is | ||
346 | * some MMAPED data associated with it that is | ||
347 | * yet to be written to disk. | ||
348 | */ | ||
316 | if ((args & GET_BLOCK_NO_HOLE) | 349 | if ((args & GET_BLOCK_NO_HOLE) |
317 | && !PageUptodate(bh_result->b_page)) { | 350 | && !PageUptodate(bh_result->b_page)) { |
318 | ret = -ENOENT; | 351 | ret = -ENOENT; |
@@ -323,41 +356,45 @@ static int _get_block_create_0(struct inode *inode, sector_t block, | |||
323 | kunmap(bh_result->b_page); | 356 | kunmap(bh_result->b_page); |
324 | return ret; | 357 | return ret; |
325 | } | 358 | } |
326 | // requested data are in direct item(s) | 359 | /* requested data are in direct item(s) */ |
327 | if (!(args & GET_BLOCK_READ_DIRECT)) { | 360 | if (!(args & GET_BLOCK_READ_DIRECT)) { |
328 | // we are called by bmap. FIXME: we can not map block of file | 361 | /* |
329 | // when it is stored in direct item(s) | 362 | * we are called by bmap. FIXME: we can not map block of file |
363 | * when it is stored in direct item(s) | ||
364 | */ | ||
330 | pathrelse(&path); | 365 | pathrelse(&path); |
331 | if (p) | 366 | if (p) |
332 | kunmap(bh_result->b_page); | 367 | kunmap(bh_result->b_page); |
333 | return -ENOENT; | 368 | return -ENOENT; |
334 | } | 369 | } |
335 | 370 | ||
336 | /* if we've got a direct item, and the buffer or page was uptodate, | 371 | /* |
337 | ** we don't want to pull data off disk again. skip to the | 372 | * if we've got a direct item, and the buffer or page was uptodate, |
338 | ** end, where we map the buffer and return | 373 | * we don't want to pull data off disk again. skip to the |
374 | * end, where we map the buffer and return | ||
339 | */ | 375 | */ |
340 | if (buffer_uptodate(bh_result)) { | 376 | if (buffer_uptodate(bh_result)) { |
341 | goto finished; | 377 | goto finished; |
342 | } else | 378 | } else |
343 | /* | 379 | /* |
344 | ** grab_tail_page can trigger calls to reiserfs_get_block on up to date | 380 | * grab_tail_page can trigger calls to reiserfs_get_block on |
345 | ** pages without any buffers. If the page is up to date, we don't want | 381 | * up to date pages without any buffers. If the page is up |
346 | ** read old data off disk. Set the up to date bit on the buffer instead | 382 | * to date, we don't want read old data off disk. Set the up |
347 | ** and jump to the end | 383 | * to date bit on the buffer instead and jump to the end |
348 | */ | 384 | */ |
349 | if (!bh_result->b_page || PageUptodate(bh_result->b_page)) { | 385 | if (!bh_result->b_page || PageUptodate(bh_result->b_page)) { |
350 | set_buffer_uptodate(bh_result); | 386 | set_buffer_uptodate(bh_result); |
351 | goto finished; | 387 | goto finished; |
352 | } | 388 | } |
353 | // read file tail into part of page | 389 | /* read file tail into part of page */ |
354 | offset = (cpu_key_k_offset(&key) - 1) & (PAGE_CACHE_SIZE - 1); | 390 | offset = (cpu_key_k_offset(&key) - 1) & (PAGE_CACHE_SIZE - 1); |
355 | copy_item_head(&tmp_ih, ih); | 391 | copy_item_head(&tmp_ih, ih); |
356 | 392 | ||
357 | /* we only want to kmap if we are reading the tail into the page. | 393 | /* |
358 | ** this is not the common case, so we don't kmap until we are | 394 | * we only want to kmap if we are reading the tail into the page. |
359 | ** sure we need to. But, this means the item might move if | 395 | * this is not the common case, so we don't kmap until we are |
360 | ** kmap schedules | 396 | * sure we need to. But, this means the item might move if |
397 | * kmap schedules | ||
361 | */ | 398 | */ |
362 | if (!p) | 399 | if (!p) |
363 | p = (char *)kmap(bh_result->b_page); | 400 | p = (char *)kmap(bh_result->b_page); |
@@ -368,10 +405,11 @@ static int _get_block_create_0(struct inode *inode, sector_t block, | |||
368 | if (!is_direct_le_ih(ih)) { | 405 | if (!is_direct_le_ih(ih)) { |
369 | BUG(); | 406 | BUG(); |
370 | } | 407 | } |
371 | /* make sure we don't read more bytes than actually exist in | 408 | /* |
372 | ** the file. This can happen in odd cases where i_size isn't | 409 | * make sure we don't read more bytes than actually exist in |
373 | ** correct, and when direct item padding results in a few | 410 | * the file. This can happen in odd cases where i_size isn't |
374 | ** extra bytes at the end of the direct item | 411 | * correct, and when direct item padding results in a few |
412 | * extra bytes at the end of the direct item | ||
375 | */ | 413 | */ |
376 | if ((le_ih_k_offset(ih) + path.pos_in_item) > inode->i_size) | 414 | if ((le_ih_k_offset(ih) + path.pos_in_item) > inode->i_size) |
377 | break; | 415 | break; |
@@ -390,18 +428,20 @@ static int _get_block_create_0(struct inode *inode, sector_t block, | |||
390 | 428 | ||
391 | p += chars; | 429 | p += chars; |
392 | 430 | ||
431 | /* | ||
432 | * we done, if read direct item is not the last item of | ||
433 | * node FIXME: we could try to check right delimiting key | ||
434 | * to see whether direct item continues in the right | ||
435 | * neighbor or rely on i_size | ||
436 | */ | ||
393 | if (PATH_LAST_POSITION(&path) != (B_NR_ITEMS(bh) - 1)) | 437 | if (PATH_LAST_POSITION(&path) != (B_NR_ITEMS(bh) - 1)) |
394 | // we done, if read direct item is not the last item of | ||
395 | // node FIXME: we could try to check right delimiting key | ||
396 | // to see whether direct item continues in the right | ||
397 | // neighbor or rely on i_size | ||
398 | break; | 438 | break; |
399 | 439 | ||
400 | // update key to look for the next piece | 440 | /* update key to look for the next piece */ |
401 | set_cpu_key_k_offset(&key, cpu_key_k_offset(&key) + chars); | 441 | set_cpu_key_k_offset(&key, cpu_key_k_offset(&key) + chars); |
402 | result = search_for_position_by_key(inode->i_sb, &key, &path); | 442 | result = search_for_position_by_key(inode->i_sb, &key, &path); |
403 | if (result != POSITION_FOUND) | 443 | if (result != POSITION_FOUND) |
404 | // i/o error most likely | 444 | /* i/o error most likely */ |
405 | break; | 445 | break; |
406 | bh = get_last_bh(&path); | 446 | bh = get_last_bh(&path); |
407 | ih = tp_item_head(&path); | 447 | ih = tp_item_head(&path); |
@@ -416,7 +456,8 @@ static int _get_block_create_0(struct inode *inode, sector_t block, | |||
416 | if (result == IO_ERROR) | 456 | if (result == IO_ERROR) |
417 | return -EIO; | 457 | return -EIO; |
418 | 458 | ||
419 | /* this buffer has valid data, but isn't valid for io. mapping it to | 459 | /* |
460 | * this buffer has valid data, but isn't valid for io. mapping it to | ||
420 | * block #0 tells the rest of reiserfs it just has a tail in it | 461 | * block #0 tells the rest of reiserfs it just has a tail in it |
421 | */ | 462 | */ |
422 | map_bh(bh_result, inode->i_sb, 0); | 463 | map_bh(bh_result, inode->i_sb, 0); |
@@ -424,8 +465,10 @@ static int _get_block_create_0(struct inode *inode, sector_t block, | |||
424 | return 0; | 465 | return 0; |
425 | } | 466 | } |
426 | 467 | ||
427 | // this is called to create file map. So, _get_block_create_0 will not | 468 | /* |
428 | // read direct item | 469 | * this is called to create file map. So, _get_block_create_0 will not |
470 | * read direct item | ||
471 | */ | ||
429 | static int reiserfs_bmap(struct inode *inode, sector_t block, | 472 | static int reiserfs_bmap(struct inode *inode, sector_t block, |
430 | struct buffer_head *bh_result, int create) | 473 | struct buffer_head *bh_result, int create) |
431 | { | 474 | { |
@@ -439,22 +482,23 @@ static int reiserfs_bmap(struct inode *inode, sector_t block, | |||
439 | return 0; | 482 | return 0; |
440 | } | 483 | } |
441 | 484 | ||
442 | /* special version of get_block that is only used by grab_tail_page right | 485 | /* |
443 | ** now. It is sent to __block_write_begin, and when you try to get a | 486 | * special version of get_block that is only used by grab_tail_page right |
444 | ** block past the end of the file (or a block from a hole) it returns | 487 | * now. It is sent to __block_write_begin, and when you try to get a |
445 | ** -ENOENT instead of a valid buffer. __block_write_begin expects to | 488 | * block past the end of the file (or a block from a hole) it returns |
446 | ** be able to do i/o on the buffers returned, unless an error value | 489 | * -ENOENT instead of a valid buffer. __block_write_begin expects to |
447 | ** is also returned. | 490 | * be able to do i/o on the buffers returned, unless an error value |
448 | ** | 491 | * is also returned. |
449 | ** So, this allows __block_write_begin to be used for reading a single block | 492 | * |
450 | ** in a page. Where it does not produce a valid page for holes, or past the | 493 | * So, this allows __block_write_begin to be used for reading a single block |
451 | ** end of the file. This turns out to be exactly what we need for reading | 494 | * in a page. Where it does not produce a valid page for holes, or past the |
452 | ** tails for conversion. | 495 | * end of the file. This turns out to be exactly what we need for reading |
453 | ** | 496 | * tails for conversion. |
454 | ** The point of the wrapper is forcing a certain value for create, even | 497 | * |
455 | ** though the VFS layer is calling this function with create==1. If you | 498 | * The point of the wrapper is forcing a certain value for create, even |
456 | ** don't want to send create == GET_BLOCK_NO_HOLE to reiserfs_get_block, | 499 | * though the VFS layer is calling this function with create==1. If you |
457 | ** don't use this function. | 500 | * don't want to send create == GET_BLOCK_NO_HOLE to reiserfs_get_block, |
501 | * don't use this function. | ||
458 | */ | 502 | */ |
459 | static int reiserfs_get_block_create_0(struct inode *inode, sector_t block, | 503 | static int reiserfs_get_block_create_0(struct inode *inode, sector_t block, |
460 | struct buffer_head *bh_result, | 504 | struct buffer_head *bh_result, |
@@ -463,8 +507,10 @@ static int reiserfs_get_block_create_0(struct inode *inode, sector_t block, | |||
463 | return reiserfs_get_block(inode, block, bh_result, GET_BLOCK_NO_HOLE); | 507 | return reiserfs_get_block(inode, block, bh_result, GET_BLOCK_NO_HOLE); |
464 | } | 508 | } |
465 | 509 | ||
466 | /* This is special helper for reiserfs_get_block in case we are executing | 510 | /* |
467 | direct_IO request. */ | 511 | * This is special helper for reiserfs_get_block in case we are executing |
512 | * direct_IO request. | ||
513 | */ | ||
468 | static int reiserfs_get_blocks_direct_io(struct inode *inode, | 514 | static int reiserfs_get_blocks_direct_io(struct inode *inode, |
469 | sector_t iblock, | 515 | sector_t iblock, |
470 | struct buffer_head *bh_result, | 516 | struct buffer_head *bh_result, |
@@ -474,9 +520,11 @@ static int reiserfs_get_blocks_direct_io(struct inode *inode, | |||
474 | 520 | ||
475 | bh_result->b_page = NULL; | 521 | bh_result->b_page = NULL; |
476 | 522 | ||
477 | /* We set the b_size before reiserfs_get_block call since it is | 523 | /* |
478 | referenced in convert_tail_for_hole() that may be called from | 524 | * We set the b_size before reiserfs_get_block call since it is |
479 | reiserfs_get_block() */ | 525 | * referenced in convert_tail_for_hole() that may be called from |
526 | * reiserfs_get_block() | ||
527 | */ | ||
480 | bh_result->b_size = (1 << inode->i_blkbits); | 528 | bh_result->b_size = (1 << inode->i_blkbits); |
481 | 529 | ||
482 | ret = reiserfs_get_block(inode, iblock, bh_result, | 530 | ret = reiserfs_get_block(inode, iblock, bh_result, |
@@ -486,14 +534,18 @@ static int reiserfs_get_blocks_direct_io(struct inode *inode, | |||
486 | 534 | ||
487 | /* don't allow direct io onto tail pages */ | 535 | /* don't allow direct io onto tail pages */ |
488 | if (buffer_mapped(bh_result) && bh_result->b_blocknr == 0) { | 536 | if (buffer_mapped(bh_result) && bh_result->b_blocknr == 0) { |
489 | /* make sure future calls to the direct io funcs for this offset | 537 | /* |
490 | ** in the file fail by unmapping the buffer | 538 | * make sure future calls to the direct io funcs for this |
539 | * offset in the file fail by unmapping the buffer | ||
491 | */ | 540 | */ |
492 | clear_buffer_mapped(bh_result); | 541 | clear_buffer_mapped(bh_result); |
493 | ret = -EINVAL; | 542 | ret = -EINVAL; |
494 | } | 543 | } |
495 | /* Possible unpacked tail. Flush the data before pages have | 544 | |
496 | disappeared */ | 545 | /* |
546 | * Possible unpacked tail. Flush the data before pages have | ||
547 | * disappeared | ||
548 | */ | ||
497 | if (REISERFS_I(inode)->i_flags & i_pack_on_close_mask) { | 549 | if (REISERFS_I(inode)->i_flags & i_pack_on_close_mask) { |
498 | int err; | 550 | int err; |
499 | 551 | ||
@@ -512,15 +564,15 @@ static int reiserfs_get_blocks_direct_io(struct inode *inode, | |||
512 | } | 564 | } |
513 | 565 | ||
514 | /* | 566 | /* |
515 | ** helper function for when reiserfs_get_block is called for a hole | 567 | * helper function for when reiserfs_get_block is called for a hole |
516 | ** but the file tail is still in a direct item | 568 | * but the file tail is still in a direct item |
517 | ** bh_result is the buffer head for the hole | 569 | * bh_result is the buffer head for the hole |
518 | ** tail_offset is the offset of the start of the tail in the file | 570 | * tail_offset is the offset of the start of the tail in the file |
519 | ** | 571 | * |
520 | ** This calls prepare_write, which will start a new transaction | 572 | * This calls prepare_write, which will start a new transaction |
521 | ** you should not be in a transaction, or have any paths held when you | 573 | * you should not be in a transaction, or have any paths held when you |
522 | ** call this. | 574 | * call this. |
523 | */ | 575 | */ |
524 | static int convert_tail_for_hole(struct inode *inode, | 576 | static int convert_tail_for_hole(struct inode *inode, |
525 | struct buffer_head *bh_result, | 577 | struct buffer_head *bh_result, |
526 | loff_t tail_offset) | 578 | loff_t tail_offset) |
@@ -540,9 +592,10 @@ static int convert_tail_for_hole(struct inode *inode, | |||
540 | tail_end = (tail_start | (bh_result->b_size - 1)) + 1; | 592 | tail_end = (tail_start | (bh_result->b_size - 1)) + 1; |
541 | 593 | ||
542 | index = tail_offset >> PAGE_CACHE_SHIFT; | 594 | index = tail_offset >> PAGE_CACHE_SHIFT; |
543 | /* hole_page can be zero in case of direct_io, we are sure | 595 | /* |
544 | that we cannot get here if we write with O_DIRECT into | 596 | * hole_page can be zero in case of direct_io, we are sure |
545 | tail page */ | 597 | * that we cannot get here if we write with O_DIRECT into tail page |
598 | */ | ||
546 | if (!hole_page || index != hole_page->index) { | 599 | if (!hole_page || index != hole_page->index) { |
547 | tail_page = grab_cache_page(inode->i_mapping, index); | 600 | tail_page = grab_cache_page(inode->i_mapping, index); |
548 | retval = -ENOMEM; | 601 | retval = -ENOMEM; |
@@ -553,14 +606,15 @@ static int convert_tail_for_hole(struct inode *inode, | |||
553 | tail_page = hole_page; | 606 | tail_page = hole_page; |
554 | } | 607 | } |
555 | 608 | ||
556 | /* we don't have to make sure the conversion did not happen while | 609 | /* |
557 | ** we were locking the page because anyone that could convert | 610 | * we don't have to make sure the conversion did not happen while |
558 | ** must first take i_mutex. | 611 | * we were locking the page because anyone that could convert |
559 | ** | 612 | * must first take i_mutex. |
560 | ** We must fix the tail page for writing because it might have buffers | 613 | * |
561 | ** that are mapped, but have a block number of 0. This indicates tail | 614 | * We must fix the tail page for writing because it might have buffers |
562 | ** data that has been read directly into the page, and | 615 | * that are mapped, but have a block number of 0. This indicates tail |
563 | ** __block_write_begin won't trigger a get_block in this case. | 616 | * data that has been read directly into the page, and |
617 | * __block_write_begin won't trigger a get_block in this case. | ||
564 | */ | 618 | */ |
565 | fix_tail_page_for_writing(tail_page); | 619 | fix_tail_page_for_writing(tail_page); |
566 | retval = __reiserfs_write_begin(tail_page, tail_start, | 620 | retval = __reiserfs_write_begin(tail_page, tail_start, |
@@ -604,7 +658,8 @@ int reiserfs_get_block(struct inode *inode, sector_t block, | |||
604 | struct buffer_head *bh_result, int create) | 658 | struct buffer_head *bh_result, int create) |
605 | { | 659 | { |
606 | int repeat, retval = 0; | 660 | int repeat, retval = 0; |
607 | b_blocknr_t allocated_block_nr = 0; // b_blocknr_t is (unsigned) 32 bit int | 661 | /* b_blocknr_t is (unsigned) 32 bit int*/ |
662 | b_blocknr_t allocated_block_nr = 0; | ||
608 | INITIALIZE_PATH(path); | 663 | INITIALIZE_PATH(path); |
609 | int pos_in_item; | 664 | int pos_in_item; |
610 | struct cpu_key key; | 665 | struct cpu_key key; |
@@ -614,12 +669,14 @@ int reiserfs_get_block(struct inode *inode, sector_t block, | |||
614 | int done; | 669 | int done; |
615 | int fs_gen; | 670 | int fs_gen; |
616 | struct reiserfs_transaction_handle *th = NULL; | 671 | struct reiserfs_transaction_handle *th = NULL; |
617 | /* space reserved in transaction batch: | 672 | /* |
618 | . 3 balancings in direct->indirect conversion | 673 | * space reserved in transaction batch: |
619 | . 1 block involved into reiserfs_update_sd() | 674 | * . 3 balancings in direct->indirect conversion |
620 | XXX in practically impossible worst case direct2indirect() | 675 | * . 1 block involved into reiserfs_update_sd() |
621 | can incur (much) more than 3 balancings. | 676 | * XXX in practically impossible worst case direct2indirect() |
622 | quota update for user, group */ | 677 | * can incur (much) more than 3 balancings. |
678 | * quota update for user, group | ||
679 | */ | ||
623 | int jbegin_count = | 680 | int jbegin_count = |
624 | JOURNAL_PER_BALANCE_CNT * 3 + 1 + | 681 | JOURNAL_PER_BALANCE_CNT * 3 + 1 + |
625 | 2 * REISERFS_QUOTA_TRANS_BLOCKS(inode->i_sb); | 682 | 2 * REISERFS_QUOTA_TRANS_BLOCKS(inode->i_sb); |
@@ -636,8 +693,9 @@ int reiserfs_get_block(struct inode *inode, sector_t block, | |||
636 | return -EFBIG; | 693 | return -EFBIG; |
637 | } | 694 | } |
638 | 695 | ||
639 | /* if !create, we aren't changing the FS, so we don't need to | 696 | /* |
640 | ** log anything, so we don't need to start a transaction | 697 | * if !create, we aren't changing the FS, so we don't need to |
698 | * log anything, so we don't need to start a transaction | ||
641 | */ | 699 | */ |
642 | if (!(create & GET_BLOCK_CREATE)) { | 700 | if (!(create & GET_BLOCK_CREATE)) { |
643 | int ret; | 701 | int ret; |
@@ -647,6 +705,7 @@ int reiserfs_get_block(struct inode *inode, sector_t block, | |||
647 | reiserfs_write_unlock(inode->i_sb); | 705 | reiserfs_write_unlock(inode->i_sb); |
648 | return ret; | 706 | return ret; |
649 | } | 707 | } |
708 | |||
650 | /* | 709 | /* |
651 | * if we're already in a transaction, make sure to close | 710 | * if we're already in a transaction, make sure to close |
652 | * any new transactions we start in this func | 711 | * any new transactions we start in this func |
@@ -655,8 +714,10 @@ int reiserfs_get_block(struct inode *inode, sector_t block, | |||
655 | reiserfs_transaction_running(inode->i_sb)) | 714 | reiserfs_transaction_running(inode->i_sb)) |
656 | dangle = 0; | 715 | dangle = 0; |
657 | 716 | ||
658 | /* If file is of such a size, that it might have a tail and tails are enabled | 717 | /* |
659 | ** we should mark it as possibly needing tail packing on close | 718 | * If file is of such a size, that it might have a tail and |
719 | * tails are enabled we should mark it as possibly needing | ||
720 | * tail packing on close | ||
660 | */ | 721 | */ |
661 | if ((have_large_tails(inode->i_sb) | 722 | if ((have_large_tails(inode->i_sb) |
662 | && inode->i_size < i_block_size(inode) * 4) | 723 | && inode->i_size < i_block_size(inode) * 4) |
@@ -703,11 +764,12 @@ int reiserfs_get_block(struct inode *inode, sector_t block, | |||
703 | _allocate_block(th, block, inode, &allocated_block_nr, | 764 | _allocate_block(th, block, inode, &allocated_block_nr, |
704 | &path, create); | 765 | &path, create); |
705 | 766 | ||
767 | /* | ||
768 | * restart the transaction to give the journal a chance to free | ||
769 | * some blocks. releases the path, so we have to go back to | ||
770 | * research if we succeed on the second try | ||
771 | */ | ||
706 | if (repeat == NO_DISK_SPACE || repeat == QUOTA_EXCEEDED) { | 772 | if (repeat == NO_DISK_SPACE || repeat == QUOTA_EXCEEDED) { |
707 | /* restart the transaction to give the journal a chance to free | ||
708 | ** some blocks. releases the path, so we have to go back to | ||
709 | ** research if we succeed on the second try | ||
710 | */ | ||
711 | SB_JOURNAL(inode->i_sb)->j_next_async_flush = 1; | 773 | SB_JOURNAL(inode->i_sb)->j_next_async_flush = 1; |
712 | retval = restart_transaction(th, inode, &path); | 774 | retval = restart_transaction(th, inode, &path); |
713 | if (retval) | 775 | if (retval) |
@@ -734,9 +796,11 @@ int reiserfs_get_block(struct inode *inode, sector_t block, | |||
734 | 796 | ||
735 | if (indirect_item_found(retval, ih)) { | 797 | if (indirect_item_found(retval, ih)) { |
736 | b_blocknr_t unfm_ptr; | 798 | b_blocknr_t unfm_ptr; |
737 | /* 'block'-th block is in the file already (there is | 799 | /* |
738 | corresponding cell in some indirect item). But it may be | 800 | * 'block'-th block is in the file already (there is |
739 | zero unformatted node pointer (hole) */ | 801 | * corresponding cell in some indirect item). But it may be |
802 | * zero unformatted node pointer (hole) | ||
803 | */ | ||
740 | unfm_ptr = get_block_num(item, pos_in_item); | 804 | unfm_ptr = get_block_num(item, pos_in_item); |
741 | if (unfm_ptr == 0) { | 805 | if (unfm_ptr == 0) { |
742 | /* use allocated block to plug the hole */ | 806 | /* use allocated block to plug the hole */ |
@@ -764,9 +828,10 @@ int reiserfs_get_block(struct inode *inode, sector_t block, | |||
764 | 828 | ||
765 | reiserfs_write_unlock(inode->i_sb); | 829 | reiserfs_write_unlock(inode->i_sb); |
766 | 830 | ||
767 | /* the item was found, so new blocks were not added to the file | 831 | /* |
768 | ** there is no need to make sure the inode is updated with this | 832 | * the item was found, so new blocks were not added to the file |
769 | ** transaction | 833 | * there is no need to make sure the inode is updated with this |
834 | * transaction | ||
770 | */ | 835 | */ |
771 | return retval; | 836 | return retval; |
772 | } | 837 | } |
@@ -776,9 +841,11 @@ int reiserfs_get_block(struct inode *inode, sector_t block, | |||
776 | goto start_trans; | 841 | goto start_trans; |
777 | } | 842 | } |
778 | 843 | ||
779 | /* desired position is not found or is in the direct item. We have | 844 | /* |
780 | to append file with holes up to 'block'-th block converting | 845 | * desired position is not found or is in the direct item. We have |
781 | direct items to indirect one if necessary */ | 846 | * to append file with holes up to 'block'-th block converting |
847 | * direct items to indirect one if necessary | ||
848 | */ | ||
782 | done = 0; | 849 | done = 0; |
783 | do { | 850 | do { |
784 | if (is_statdata_le_ih(ih)) { | 851 | if (is_statdata_le_ih(ih)) { |
@@ -790,16 +857,18 @@ int reiserfs_get_block(struct inode *inode, sector_t block, | |||
790 | TYPE_INDIRECT, UNFM_P_SIZE, | 857 | TYPE_INDIRECT, UNFM_P_SIZE, |
791 | 0 /* free_space */ ); | 858 | 0 /* free_space */ ); |
792 | 859 | ||
860 | /* | ||
861 | * we are going to add 'block'-th block to the file. | ||
862 | * Use allocated block for that | ||
863 | */ | ||
793 | if (cpu_key_k_offset(&key) == 1) { | 864 | if (cpu_key_k_offset(&key) == 1) { |
794 | /* we are going to add 'block'-th block to the file. Use | ||
795 | allocated block for that */ | ||
796 | unp = cpu_to_le32(allocated_block_nr); | 865 | unp = cpu_to_le32(allocated_block_nr); |
797 | set_block_dev_mapped(bh_result, | 866 | set_block_dev_mapped(bh_result, |
798 | allocated_block_nr, inode); | 867 | allocated_block_nr, inode); |
799 | set_buffer_new(bh_result); | 868 | set_buffer_new(bh_result); |
800 | done = 1; | 869 | done = 1; |
801 | } | 870 | } |
802 | tmp_key = key; // ;) | 871 | tmp_key = key; /* ;) */ |
803 | set_cpu_key_k_offset(&tmp_key, 1); | 872 | set_cpu_key_k_offset(&tmp_key, 1); |
804 | PATH_LAST_POSITION(&path)++; | 873 | PATH_LAST_POSITION(&path)++; |
805 | 874 | ||
@@ -809,9 +878,12 @@ int reiserfs_get_block(struct inode *inode, sector_t block, | |||
809 | if (retval) { | 878 | if (retval) { |
810 | reiserfs_free_block(th, inode, | 879 | reiserfs_free_block(th, inode, |
811 | allocated_block_nr, 1); | 880 | allocated_block_nr, 1); |
812 | goto failure; // retval == -ENOSPC, -EDQUOT or -EIO or -EEXIST | 881 | /* |
882 | * retval == -ENOSPC, -EDQUOT or -EIO | ||
883 | * or -EEXIST | ||
884 | */ | ||
885 | goto failure; | ||
813 | } | 886 | } |
814 | //mark_tail_converted (inode); | ||
815 | } else if (is_direct_le_ih(ih)) { | 887 | } else if (is_direct_le_ih(ih)) { |
816 | /* direct item has to be converted */ | 888 | /* direct item has to be converted */ |
817 | loff_t tail_offset; | 889 | loff_t tail_offset; |
@@ -819,18 +891,24 @@ int reiserfs_get_block(struct inode *inode, sector_t block, | |||
819 | tail_offset = | 891 | tail_offset = |
820 | ((le_ih_k_offset(ih) - | 892 | ((le_ih_k_offset(ih) - |
821 | 1) & ~(inode->i_sb->s_blocksize - 1)) + 1; | 893 | 1) & ~(inode->i_sb->s_blocksize - 1)) + 1; |
894 | |||
895 | /* | ||
896 | * direct item we just found fits into block we have | ||
897 | * to map. Convert it into unformatted node: use | ||
898 | * bh_result for the conversion | ||
899 | */ | ||
822 | if (tail_offset == cpu_key_k_offset(&key)) { | 900 | if (tail_offset == cpu_key_k_offset(&key)) { |
823 | /* direct item we just found fits into block we have | ||
824 | to map. Convert it into unformatted node: use | ||
825 | bh_result for the conversion */ | ||
826 | set_block_dev_mapped(bh_result, | 901 | set_block_dev_mapped(bh_result, |
827 | allocated_block_nr, inode); | 902 | allocated_block_nr, inode); |
828 | unbh = bh_result; | 903 | unbh = bh_result; |
829 | done = 1; | 904 | done = 1; |
830 | } else { | 905 | } else { |
831 | /* we have to padd file tail stored in direct item(s) | 906 | /* |
832 | up to block size and convert it to unformatted | 907 | * we have to pad file tail stored in direct |
833 | node. FIXME: this should also get into page cache */ | 908 | * item(s) up to block size and convert it |
909 | * to unformatted node. FIXME: this should | ||
910 | * also get into page cache | ||
911 | */ | ||
834 | 912 | ||
835 | pathrelse(&path); | 913 | pathrelse(&path); |
836 | /* | 914 | /* |
@@ -859,7 +937,10 @@ int reiserfs_get_block(struct inode *inode, sector_t block, | |||
859 | inode->i_ino, | 937 | inode->i_ino, |
860 | retval); | 938 | retval); |
861 | if (allocated_block_nr) { | 939 | if (allocated_block_nr) { |
862 | /* the bitmap, the super, and the stat data == 3 */ | 940 | /* |
941 | * the bitmap, the super, | ||
942 | * and the stat data == 3 | ||
943 | */ | ||
863 | if (!th) | 944 | if (!th) |
864 | th = reiserfs_persistent_transaction(inode->i_sb, 3); | 945 | th = reiserfs_persistent_transaction(inode->i_sb, 3); |
865 | if (th) | 946 | if (th) |
@@ -881,43 +962,57 @@ int reiserfs_get_block(struct inode *inode, sector_t block, | |||
881 | allocated_block_nr, 1); | 962 | allocated_block_nr, 1); |
882 | goto failure; | 963 | goto failure; |
883 | } | 964 | } |
884 | /* it is important the set_buffer_uptodate is done after | 965 | /* |
885 | ** the direct2indirect. The buffer might contain valid | 966 | * it is important the set_buffer_uptodate is done |
886 | ** data newer than the data on disk (read by readpage, changed, | 967 | * after the direct2indirect. The buffer might |
887 | ** and then sent here by writepage). direct2indirect needs | 968 | * contain valid data newer than the data on disk |
888 | ** to know if unbh was already up to date, so it can decide | 969 | * (read by readpage, changed, and then sent here by |
889 | ** if the data in unbh needs to be replaced with data from | 970 | * writepage). direct2indirect needs to know if unbh |
890 | ** the disk | 971 | * was already up to date, so it can decide if the |
972 | * data in unbh needs to be replaced with data from | ||
973 | * the disk | ||
891 | */ | 974 | */ |
892 | set_buffer_uptodate(unbh); | 975 | set_buffer_uptodate(unbh); |
893 | 976 | ||
894 | /* unbh->b_page == NULL in case of DIRECT_IO request, this means | 977 | /* |
895 | buffer will disappear shortly, so it should not be added to | 978 | * unbh->b_page == NULL in case of DIRECT_IO request, |
979 | * this means buffer will disappear shortly, so it | ||
980 | * should not be added to | ||
896 | */ | 981 | */ |
897 | if (unbh->b_page) { | 982 | if (unbh->b_page) { |
898 | /* we've converted the tail, so we must | 983 | /* |
899 | ** flush unbh before the transaction commits | 984 | * we've converted the tail, so we must |
985 | * flush unbh before the transaction commits | ||
900 | */ | 986 | */ |
901 | reiserfs_add_tail_list(inode, unbh); | 987 | reiserfs_add_tail_list(inode, unbh); |
902 | 988 | ||
903 | /* mark it dirty now to prevent commit_write from adding | 989 | /* |
904 | ** this buffer to the inode's dirty buffer list | 990 | * mark it dirty now to prevent commit_write |
991 | * from adding this buffer to the inode's | ||
992 | * dirty buffer list | ||
905 | */ | 993 | */ |
906 | /* | 994 | /* |
907 | * AKPM: changed __mark_buffer_dirty to mark_buffer_dirty(). | 995 | * AKPM: changed __mark_buffer_dirty to |
908 | * It's still atomic, but it sets the page dirty too, | 996 | * mark_buffer_dirty(). It's still atomic, |
909 | * which makes it eligible for writeback at any time by the | 997 | * but it sets the page dirty too, which makes |
910 | * VM (which was also the case with __mark_buffer_dirty()) | 998 | * it eligible for writeback at any time by the |
999 | * VM (which was also the case with | ||
1000 | * __mark_buffer_dirty()) | ||
911 | */ | 1001 | */ |
912 | mark_buffer_dirty(unbh); | 1002 | mark_buffer_dirty(unbh); |
913 | } | 1003 | } |
914 | } else { | 1004 | } else { |
915 | /* append indirect item with holes if needed, when appending | 1005 | /* |
916 | pointer to 'block'-th block use block, which is already | 1006 | * append indirect item with holes if needed, when |
917 | allocated */ | 1007 | * appending pointer to 'block'-th block use block, |
1008 | * which is already allocated | ||
1009 | */ | ||
918 | struct cpu_key tmp_key; | 1010 | struct cpu_key tmp_key; |
919 | unp_t unf_single = 0; // We use this in case we need to allocate only | 1011 | /* |
920 | // one block which is a fastpath | 1012 | * We use this in case we need to allocate |
1013 | * only one block which is a fastpath | ||
1014 | */ | ||
1015 | unp_t unf_single = 0; | ||
921 | unp_t *un; | 1016 | unp_t *un; |
922 | __u64 max_to_insert = | 1017 | __u64 max_to_insert = |
923 | MAX_ITEM_LEN(inode->i_sb->s_blocksize) / | 1018 | MAX_ITEM_LEN(inode->i_sb->s_blocksize) / |
@@ -926,14 +1021,17 @@ int reiserfs_get_block(struct inode *inode, sector_t block, | |||
926 | 1021 | ||
927 | RFALSE(pos_in_item != ih_item_len(ih) / UNFM_P_SIZE, | 1022 | RFALSE(pos_in_item != ih_item_len(ih) / UNFM_P_SIZE, |
928 | "vs-804: invalid position for append"); | 1023 | "vs-804: invalid position for append"); |
929 | /* indirect item has to be appended, set up key of that position */ | 1024 | /* |
1025 | * indirect item has to be appended, | ||
1026 | * set up key of that position | ||
1027 | * (key type is unimportant) | ||
1028 | */ | ||
930 | make_cpu_key(&tmp_key, inode, | 1029 | make_cpu_key(&tmp_key, inode, |
931 | le_key_k_offset(version, | 1030 | le_key_k_offset(version, |
932 | &(ih->ih_key)) + | 1031 | &(ih->ih_key)) + |
933 | op_bytes_number(ih, | 1032 | op_bytes_number(ih, |
934 | inode->i_sb->s_blocksize), | 1033 | inode->i_sb->s_blocksize), |
935 | //pos_in_item * inode->i_sb->s_blocksize, | 1034 | TYPE_INDIRECT, 3); |
936 | TYPE_INDIRECT, 3); // key type is unimportant | ||
937 | 1035 | ||
938 | RFALSE(cpu_key_k_offset(&tmp_key) > cpu_key_k_offset(&key), | 1036 | RFALSE(cpu_key_k_offset(&tmp_key) > cpu_key_k_offset(&key), |
939 | "green-805: invalid offset"); | 1037 | "green-805: invalid offset"); |
@@ -954,8 +1052,10 @@ int reiserfs_get_block(struct inode *inode, sector_t block, | |||
954 | } | 1052 | } |
955 | } | 1053 | } |
956 | if (blocks_needed <= max_to_insert) { | 1054 | if (blocks_needed <= max_to_insert) { |
957 | /* we are going to add target block to the file. Use allocated | 1055 | /* |
958 | block for that */ | 1056 | * we are going to add target block to |
1057 | * the file. Use allocated block for that | ||
1058 | */ | ||
959 | un[blocks_needed - 1] = | 1059 | un[blocks_needed - 1] = |
960 | cpu_to_le32(allocated_block_nr); | 1060 | cpu_to_le32(allocated_block_nr); |
961 | set_block_dev_mapped(bh_result, | 1061 | set_block_dev_mapped(bh_result, |
@@ -964,8 +1064,11 @@ int reiserfs_get_block(struct inode *inode, sector_t block, | |||
964 | done = 1; | 1064 | done = 1; |
965 | } else { | 1065 | } else { |
966 | /* paste hole to the indirect item */ | 1066 | /* paste hole to the indirect item */ |
967 | /* If kmalloc failed, max_to_insert becomes zero and it means we | 1067 | /* |
968 | only have space for one block */ | 1068 | * If kmalloc failed, max_to_insert becomes |
1069 | * zero and it means we only have space for | ||
1070 | * one block | ||
1071 | */ | ||
969 | blocks_needed = | 1072 | blocks_needed = |
970 | max_to_insert ? max_to_insert : 1; | 1073 | max_to_insert ? max_to_insert : 1; |
971 | } | 1074 | } |
@@ -984,9 +1087,12 @@ int reiserfs_get_block(struct inode *inode, sector_t block, | |||
984 | goto failure; | 1087 | goto failure; |
985 | } | 1088 | } |
986 | if (!done) { | 1089 | if (!done) { |
987 | /* We need to mark new file size in case this function will be | 1090 | /* |
988 | interrupted/aborted later on. And we may do this only for | 1091 | * We need to mark new file size in case |
989 | holes. */ | 1092 | * this function will be interrupted/aborted |
1093 | * later on. And we may do this only for | ||
1094 | * holes. | ||
1095 | */ | ||
990 | inode->i_size += | 1096 | inode->i_size += |
991 | inode->i_sb->s_blocksize * blocks_needed; | 1097 | inode->i_sb->s_blocksize * blocks_needed; |
992 | } | 1098 | } |
@@ -995,13 +1101,13 @@ int reiserfs_get_block(struct inode *inode, sector_t block, | |||
995 | if (done == 1) | 1101 | if (done == 1) |
996 | break; | 1102 | break; |
997 | 1103 | ||
998 | /* this loop could log more blocks than we had originally asked | 1104 | /* |
999 | ** for. So, we have to allow the transaction to end if it is | 1105 | * this loop could log more blocks than we had originally |
1000 | ** too big or too full. Update the inode so things are | 1106 | * asked for. So, we have to allow the transaction to end |
1001 | ** consistent if we crash before the function returns | 1107 | * if it is too big or too full. Update the inode so things |
1002 | ** | 1108 | * are consistent if we crash before the function returns |
1003 | ** release the path so that anybody waiting on the path before | 1109 | * release the path so that anybody waiting on the path before |
1004 | ** ending their transaction will be able to continue. | 1110 | * ending their transaction will be able to continue. |
1005 | */ | 1111 | */ |
1006 | if (journal_transaction_should_end(th, th->t_blocks_allocated)) { | 1112 | if (journal_transaction_should_end(th, th->t_blocks_allocated)) { |
1007 | retval = restart_transaction(th, inode, &path); | 1113 | retval = restart_transaction(th, inode, &path); |
@@ -1060,8 +1166,10 @@ reiserfs_readpages(struct file *file, struct address_space *mapping, | |||
1060 | return mpage_readpages(mapping, pages, nr_pages, reiserfs_get_block); | 1166 | return mpage_readpages(mapping, pages, nr_pages, reiserfs_get_block); |
1061 | } | 1167 | } |
1062 | 1168 | ||
1063 | /* Compute real number of used bytes by file | 1169 | /* |
1064 | * Following three functions can go away when we'll have enough space in stat item | 1170 | * Compute real number of used bytes by file |
1171 | * Following three functions can go away when we'll have enough space in | ||
1172 | * stat item | ||
1065 | */ | 1173 | */ |
1066 | static int real_space_diff(struct inode *inode, int sd_size) | 1174 | static int real_space_diff(struct inode *inode, int sd_size) |
1067 | { | 1175 | { |
@@ -1071,13 +1179,14 @@ static int real_space_diff(struct inode *inode, int sd_size) | |||
1071 | if (S_ISLNK(inode->i_mode) || S_ISDIR(inode->i_mode)) | 1179 | if (S_ISLNK(inode->i_mode) || S_ISDIR(inode->i_mode)) |
1072 | return sd_size; | 1180 | return sd_size; |
1073 | 1181 | ||
1074 | /* End of file is also in full block with indirect reference, so round | 1182 | /* |
1075 | ** up to the next block. | 1183 | * End of file is also in full block with indirect reference, so round |
1076 | ** | 1184 | * up to the next block. |
1077 | ** there is just no way to know if the tail is actually packed | 1185 | * |
1078 | ** on the file, so we have to assume it isn't. When we pack the | 1186 | * there is just no way to know if the tail is actually packed |
1079 | ** tail, we add 4 bytes to pretend there really is an unformatted | 1187 | * on the file, so we have to assume it isn't. When we pack the |
1080 | ** node pointer | 1188 | * tail, we add 4 bytes to pretend there really is an unformatted |
1189 | * node pointer | ||
1081 | */ | 1190 | */ |
1082 | bytes = | 1191 | bytes = |
1083 | ((inode->i_size + | 1192 | ((inode->i_size + |
@@ -1108,29 +1217,29 @@ static inline ulong to_fake_used_blocks(struct inode *inode, int sd_size) | |||
1108 | bytes += (loff_t) 511; | 1217 | bytes += (loff_t) 511; |
1109 | } | 1218 | } |
1110 | 1219 | ||
1111 | /* files from before the quota patch might i_blocks such that | 1220 | /* |
1112 | ** bytes < real_space. Deal with that here to prevent it from | 1221 | * files from before the quota patch might i_blocks such that |
1113 | ** going negative. | 1222 | * bytes < real_space. Deal with that here to prevent it from |
1223 | * going negative. | ||
1114 | */ | 1224 | */ |
1115 | if (bytes < real_space) | 1225 | if (bytes < real_space) |
1116 | return 0; | 1226 | return 0; |
1117 | return (bytes - real_space) >> 9; | 1227 | return (bytes - real_space) >> 9; |
1118 | } | 1228 | } |
1119 | 1229 | ||
1120 | // | 1230 | /* |
1121 | // BAD: new directories have stat data of new type and all other items | 1231 | * BAD: new directories have stat data of new type and all other items |
1122 | // of old type. Version stored in the inode says about body items, so | 1232 | * of old type. Version stored in the inode says about body items, so |
1123 | // in update_stat_data we can not rely on inode, but have to check | 1233 | * in update_stat_data we can not rely on inode, but have to check |
1124 | // item version directly | 1234 | * item version directly |
1125 | // | 1235 | */ |
1126 | 1236 | ||
1127 | // called by read_locked_inode | 1237 | /* called by read_locked_inode */ |
1128 | static void init_inode(struct inode *inode, struct treepath *path) | 1238 | static void init_inode(struct inode *inode, struct treepath *path) |
1129 | { | 1239 | { |
1130 | struct buffer_head *bh; | 1240 | struct buffer_head *bh; |
1131 | struct item_head *ih; | 1241 | struct item_head *ih; |
1132 | __u32 rdev; | 1242 | __u32 rdev; |
1133 | //int version = ITEM_VERSION_1; | ||
1134 | 1243 | ||
1135 | bh = PATH_PLAST_BUFFER(path); | 1244 | bh = PATH_PLAST_BUFFER(path); |
1136 | ih = tp_item_head(path); | 1245 | ih = tp_item_head(path); |
@@ -1168,20 +1277,26 @@ static void init_inode(struct inode *inode, struct treepath *path) | |||
1168 | inode->i_generation = le32_to_cpu(INODE_PKEY(inode)->k_dir_id); | 1277 | inode->i_generation = le32_to_cpu(INODE_PKEY(inode)->k_dir_id); |
1169 | blocks = (inode->i_size + 511) >> 9; | 1278 | blocks = (inode->i_size + 511) >> 9; |
1170 | blocks = _ROUND_UP(blocks, inode->i_sb->s_blocksize >> 9); | 1279 | blocks = _ROUND_UP(blocks, inode->i_sb->s_blocksize >> 9); |
1280 | |||
1281 | /* | ||
1282 | * there was a bug in <=3.5.23 when i_blocks could take | ||
1283 | * negative values. Starting from 3.5.17 this value could | ||
1284 | * even be stored in stat data. For such files we set | ||
1285 | * i_blocks based on file size. Just 2 notes: this can be | ||
1286 | * wrong for sparse files. On-disk value will be only | ||
1287 | * updated if file's inode will ever change | ||
1288 | */ | ||
1171 | if (inode->i_blocks > blocks) { | 1289 | if (inode->i_blocks > blocks) { |
1172 | // there was a bug in <=3.5.23 when i_blocks could take negative | ||
1173 | // values. Starting from 3.5.17 this value could even be stored in | ||
1174 | // stat data. For such files we set i_blocks based on file | ||
1175 | // size. Just 2 notes: this can be wrong for sparce files. On-disk value will be | ||
1176 | // only updated if file's inode will ever change | ||
1177 | inode->i_blocks = blocks; | 1290 | inode->i_blocks = blocks; |
1178 | } | 1291 | } |
1179 | 1292 | ||
1180 | rdev = sd_v1_rdev(sd); | 1293 | rdev = sd_v1_rdev(sd); |
1181 | REISERFS_I(inode)->i_first_direct_byte = | 1294 | REISERFS_I(inode)->i_first_direct_byte = |
1182 | sd_v1_first_direct_byte(sd); | 1295 | sd_v1_first_direct_byte(sd); |
1183 | /* an early bug in the quota code can give us an odd number for the | 1296 | |
1184 | ** block count. This is incorrect, fix it here. | 1297 | /* |
1298 | * an early bug in the quota code can give us an odd | ||
1299 | * number for the block count. This is incorrect, fix it here. | ||
1185 | */ | 1300 | */ |
1186 | if (inode->i_blocks & 1) { | 1301 | if (inode->i_blocks & 1) { |
1187 | inode->i_blocks++; | 1302 | inode->i_blocks++; |
@@ -1189,12 +1304,16 @@ static void init_inode(struct inode *inode, struct treepath *path) | |||
1189 | inode_set_bytes(inode, | 1304 | inode_set_bytes(inode, |
1190 | to_real_used_space(inode, inode->i_blocks, | 1305 | to_real_used_space(inode, inode->i_blocks, |
1191 | SD_V1_SIZE)); | 1306 | SD_V1_SIZE)); |
1192 | /* nopack is initially zero for v1 objects. For v2 objects, | 1307 | /* |
1193 | nopack is initialised from sd_attrs */ | 1308 | * nopack is initially zero for v1 objects. For v2 objects, |
1309 | * nopack is initialised from sd_attrs | ||
1310 | */ | ||
1194 | REISERFS_I(inode)->i_flags &= ~i_nopack_mask; | 1311 | REISERFS_I(inode)->i_flags &= ~i_nopack_mask; |
1195 | } else { | 1312 | } else { |
1196 | // new stat data found, but object may have old items | 1313 | /* |
1197 | // (directories and symlinks) | 1314 | * new stat data found, but object may have old items |
1315 | * (directories and symlinks) | ||
1316 | */ | ||
1198 | struct stat_data *sd = (struct stat_data *)ih_item_body(bh, ih); | 1317 | struct stat_data *sd = (struct stat_data *)ih_item_body(bh, ih); |
1199 | 1318 | ||
1200 | inode->i_mode = sd_v2_mode(sd); | 1319 | inode->i_mode = sd_v2_mode(sd); |
@@ -1225,8 +1344,10 @@ static void init_inode(struct inode *inode, struct treepath *path) | |||
1225 | inode_set_bytes(inode, | 1344 | inode_set_bytes(inode, |
1226 | to_real_used_space(inode, inode->i_blocks, | 1345 | to_real_used_space(inode, inode->i_blocks, |
1227 | SD_V2_SIZE)); | 1346 | SD_V2_SIZE)); |
1228 | /* read persistent inode attributes from sd and initialise | 1347 | /* |
1229 | generic inode flags from them */ | 1348 | * read persistent inode attributes from sd and initialise |
1349 | * generic inode flags from them | ||
1350 | */ | ||
1230 | REISERFS_I(inode)->i_attrs = sd_v2_attrs(sd); | 1351 | REISERFS_I(inode)->i_attrs = sd_v2_attrs(sd); |
1231 | sd_attrs_to_i_attrs(sd_v2_attrs(sd), inode); | 1352 | sd_attrs_to_i_attrs(sd_v2_attrs(sd), inode); |
1232 | } | 1353 | } |
@@ -1249,7 +1370,7 @@ static void init_inode(struct inode *inode, struct treepath *path) | |||
1249 | } | 1370 | } |
1250 | } | 1371 | } |
1251 | 1372 | ||
1252 | // update new stat data with inode fields | 1373 | /* update new stat data with inode fields */ |
1253 | static void inode2sd(void *sd, struct inode *inode, loff_t size) | 1374 | static void inode2sd(void *sd, struct inode *inode, loff_t size) |
1254 | { | 1375 | { |
1255 | struct stat_data *sd_v2 = (struct stat_data *)sd; | 1376 | struct stat_data *sd_v2 = (struct stat_data *)sd; |
@@ -1273,7 +1394,7 @@ static void inode2sd(void *sd, struct inode *inode, loff_t size) | |||
1273 | set_sd_v2_attrs(sd_v2, flags); | 1394 | set_sd_v2_attrs(sd_v2, flags); |
1274 | } | 1395 | } |
1275 | 1396 | ||
1276 | // used to copy inode's fields to old stat data | 1397 | /* used to copy inode's fields to old stat data */ |
1277 | static void inode2sd_v1(void *sd, struct inode *inode, loff_t size) | 1398 | static void inode2sd_v1(void *sd, struct inode *inode, loff_t size) |
1278 | { | 1399 | { |
1279 | struct stat_data_v1 *sd_v1 = (struct stat_data_v1 *)sd; | 1400 | struct stat_data_v1 *sd_v1 = (struct stat_data_v1 *)sd; |
@@ -1292,14 +1413,15 @@ static void inode2sd_v1(void *sd, struct inode *inode, loff_t size) | |||
1292 | else | 1413 | else |
1293 | set_sd_v1_blocks(sd_v1, to_fake_used_blocks(inode, SD_V1_SIZE)); | 1414 | set_sd_v1_blocks(sd_v1, to_fake_used_blocks(inode, SD_V1_SIZE)); |
1294 | 1415 | ||
1295 | // Sigh. i_first_direct_byte is back | 1416 | /* Sigh. i_first_direct_byte is back */ |
1296 | set_sd_v1_first_direct_byte(sd_v1, | 1417 | set_sd_v1_first_direct_byte(sd_v1, |
1297 | REISERFS_I(inode)->i_first_direct_byte); | 1418 | REISERFS_I(inode)->i_first_direct_byte); |
1298 | } | 1419 | } |
1299 | 1420 | ||
1300 | /* NOTE, you must prepare the buffer head before sending it here, | 1421 | /* |
1301 | ** and then log it after the call | 1422 | * NOTE, you must prepare the buffer head before sending it here, |
1302 | */ | 1423 | * and then log it after the call |
1424 | */ | ||
1303 | static void update_stat_data(struct treepath *path, struct inode *inode, | 1425 | static void update_stat_data(struct treepath *path, struct inode *inode, |
1304 | loff_t size) | 1426 | loff_t size) |
1305 | { | 1427 | { |
@@ -1313,8 +1435,8 @@ static void update_stat_data(struct treepath *path, struct inode *inode, | |||
1313 | reiserfs_panic(inode->i_sb, "vs-13065", "key %k, found item %h", | 1435 | reiserfs_panic(inode->i_sb, "vs-13065", "key %k, found item %h", |
1314 | INODE_PKEY(inode), ih); | 1436 | INODE_PKEY(inode), ih); |
1315 | 1437 | ||
1438 | /* path points to old stat data */ | ||
1316 | if (stat_data_v1(ih)) { | 1439 | if (stat_data_v1(ih)) { |
1317 | // path points to old stat data | ||
1318 | inode2sd_v1(ih_item_body(bh, ih), inode, size); | 1440 | inode2sd_v1(ih_item_body(bh, ih), inode, size); |
1319 | } else { | 1441 | } else { |
1320 | inode2sd(ih_item_body(bh, ih), inode, size); | 1442 | inode2sd(ih_item_body(bh, ih), inode, size); |
@@ -1335,7 +1457,8 @@ void reiserfs_update_sd_size(struct reiserfs_transaction_handle *th, | |||
1335 | 1457 | ||
1336 | BUG_ON(!th->t_trans_id); | 1458 | BUG_ON(!th->t_trans_id); |
1337 | 1459 | ||
1338 | make_cpu_key(&key, inode, SD_OFFSET, TYPE_STAT_DATA, 3); //key type is unimportant | 1460 | /* key type is unimportant */ |
1461 | make_cpu_key(&key, inode, SD_OFFSET, TYPE_STAT_DATA, 3); | ||
1339 | 1462 | ||
1340 | for (;;) { | 1463 | for (;;) { |
1341 | int pos; | 1464 | int pos; |
@@ -1363,19 +1486,22 @@ void reiserfs_update_sd_size(struct reiserfs_transaction_handle *th, | |||
1363 | return; | 1486 | return; |
1364 | } | 1487 | } |
1365 | 1488 | ||
1366 | /* sigh, prepare_for_journal might schedule. When it schedules the | 1489 | /* |
1367 | ** FS might change. We have to detect that, and loop back to the | 1490 | * sigh, prepare_for_journal might schedule. When it |
1368 | ** search if the stat data item has moved | 1491 | * schedules the FS might change. We have to detect that, |
1492 | * and loop back to the search if the stat data item has moved | ||
1369 | */ | 1493 | */ |
1370 | bh = get_last_bh(&path); | 1494 | bh = get_last_bh(&path); |
1371 | ih = tp_item_head(&path); | 1495 | ih = tp_item_head(&path); |
1372 | copy_item_head(&tmp_ih, ih); | 1496 | copy_item_head(&tmp_ih, ih); |
1373 | fs_gen = get_generation(inode->i_sb); | 1497 | fs_gen = get_generation(inode->i_sb); |
1374 | reiserfs_prepare_for_journal(inode->i_sb, bh, 1); | 1498 | reiserfs_prepare_for_journal(inode->i_sb, bh, 1); |
1499 | |||
1500 | /* Stat_data item has been moved after scheduling. */ | ||
1375 | if (fs_changed(fs_gen, inode->i_sb) | 1501 | if (fs_changed(fs_gen, inode->i_sb) |
1376 | && item_moved(&tmp_ih, &path)) { | 1502 | && item_moved(&tmp_ih, &path)) { |
1377 | reiserfs_restore_prepared_buffer(inode->i_sb, bh); | 1503 | reiserfs_restore_prepared_buffer(inode->i_sb, bh); |
1378 | continue; /* Stat_data item has been moved after scheduling. */ | 1504 | continue; |
1379 | } | 1505 | } |
1380 | break; | 1506 | break; |
1381 | } | 1507 | } |
@@ -1385,23 +1511,23 @@ void reiserfs_update_sd_size(struct reiserfs_transaction_handle *th, | |||
1385 | return; | 1511 | return; |
1386 | } | 1512 | } |
1387 | 1513 | ||
1388 | /* reiserfs_read_locked_inode is called to read the inode off disk, and it | 1514 | /* |
1389 | ** does a make_bad_inode when things go wrong. But, we need to make sure | 1515 | * reiserfs_read_locked_inode is called to read the inode off disk, and it |
1390 | ** and clear the key in the private portion of the inode, otherwise a | 1516 | * does a make_bad_inode when things go wrong. But, we need to make sure |
1391 | ** corresponding iput might try to delete whatever object the inode last | 1517 | * and clear the key in the private portion of the inode, otherwise a |
1392 | ** represented. | 1518 | * corresponding iput might try to delete whatever object the inode last |
1393 | */ | 1519 | * represented. |
1520 | */ | ||
1394 | static void reiserfs_make_bad_inode(struct inode *inode) | 1521 | static void reiserfs_make_bad_inode(struct inode *inode) |
1395 | { | 1522 | { |
1396 | memset(INODE_PKEY(inode), 0, KEY_SIZE); | 1523 | memset(INODE_PKEY(inode), 0, KEY_SIZE); |
1397 | make_bad_inode(inode); | 1524 | make_bad_inode(inode); |
1398 | } | 1525 | } |
1399 | 1526 | ||
1400 | // | 1527 | /* |
1401 | // initially this function was derived from minix or ext2's analog and | 1528 | * initially this function was derived from minix or ext2's analog and |
1402 | // evolved as the prototype did | 1529 | * evolved as the prototype did |
1403 | // | 1530 | */ |
1404 | |||
1405 | int reiserfs_init_locked_inode(struct inode *inode, void *p) | 1531 | int reiserfs_init_locked_inode(struct inode *inode, void *p) |
1406 | { | 1532 | { |
1407 | struct reiserfs_iget_args *args = (struct reiserfs_iget_args *)p; | 1533 | struct reiserfs_iget_args *args = (struct reiserfs_iget_args *)p; |
@@ -1410,8 +1536,10 @@ int reiserfs_init_locked_inode(struct inode *inode, void *p) | |||
1410 | return 0; | 1536 | return 0; |
1411 | } | 1537 | } |
1412 | 1538 | ||
1413 | /* looks for stat data in the tree, and fills up the fields of in-core | 1539 | /* |
1414 | inode stat data fields */ | 1540 | * looks for stat data in the tree, and fills up the fields of in-core |
1541 | * inode stat data fields | ||
1542 | */ | ||
1415 | void reiserfs_read_locked_inode(struct inode *inode, | 1543 | void reiserfs_read_locked_inode(struct inode *inode, |
1416 | struct reiserfs_iget_args *args) | 1544 | struct reiserfs_iget_args *args) |
1417 | { | 1545 | { |
@@ -1422,8 +1550,10 @@ void reiserfs_read_locked_inode(struct inode *inode, | |||
1422 | 1550 | ||
1423 | dirino = args->dirid; | 1551 | dirino = args->dirid; |
1424 | 1552 | ||
1425 | /* set version 1, version 2 could be used too, because stat data | 1553 | /* |
1426 | key is the same in both versions */ | 1554 | * set version 1, version 2 could be used too, because stat data |
1555 | * key is the same in both versions | ||
1556 | */ | ||
1427 | key.version = KEY_FORMAT_3_5; | 1557 | key.version = KEY_FORMAT_3_5; |
1428 | key.on_disk_key.k_dir_id = dirino; | 1558 | key.on_disk_key.k_dir_id = dirino; |
1429 | key.on_disk_key.k_objectid = inode->i_ino; | 1559 | key.on_disk_key.k_objectid = inode->i_ino; |
@@ -1439,8 +1569,9 @@ void reiserfs_read_locked_inode(struct inode *inode, | |||
1439 | reiserfs_make_bad_inode(inode); | 1569 | reiserfs_make_bad_inode(inode); |
1440 | return; | 1570 | return; |
1441 | } | 1571 | } |
1572 | |||
1573 | /* a stale NFS handle can trigger this without it being an error */ | ||
1442 | if (retval != ITEM_FOUND) { | 1574 | if (retval != ITEM_FOUND) { |
1443 | /* a stale NFS handle can trigger this without it being an error */ | ||
1444 | pathrelse(&path_to_sd); | 1575 | pathrelse(&path_to_sd); |
1445 | reiserfs_make_bad_inode(inode); | 1576 | reiserfs_make_bad_inode(inode); |
1446 | clear_nlink(inode); | 1577 | clear_nlink(inode); |
@@ -1449,20 +1580,25 @@ void reiserfs_read_locked_inode(struct inode *inode, | |||
1449 | 1580 | ||
1450 | init_inode(inode, &path_to_sd); | 1581 | init_inode(inode, &path_to_sd); |
1451 | 1582 | ||
1452 | /* It is possible that knfsd is trying to access inode of a file | 1583 | /* |
1453 | that is being removed from the disk by some other thread. As we | 1584 | * It is possible that knfsd is trying to access inode of a file |
1454 | update sd on unlink all that is required is to check for nlink | 1585 | * that is being removed from the disk by some other thread. As we |
1455 | here. This bug was first found by Sizif when debugging | 1586 | * update sd on unlink all that is required is to check for nlink |
1456 | SquidNG/Butterfly, forgotten, and found again after Philippe | 1587 | * here. This bug was first found by Sizif when debugging |
1457 | Gramoulle <philippe.gramoulle@mmania.com> reproduced it. | 1588 | * SquidNG/Butterfly, forgotten, and found again after Philippe |
1458 | 1589 | * Gramoulle <philippe.gramoulle@mmania.com> reproduced it. | |
1459 | More logical fix would require changes in fs/inode.c:iput() to | 1590 | |
1460 | remove inode from hash-table _after_ fs cleaned disk stuff up and | 1591 | * More logical fix would require changes in fs/inode.c:iput() to |
1461 | in iget() to return NULL if I_FREEING inode is found in | 1592 | * remove inode from hash-table _after_ fs cleaned disk stuff up and |
1462 | hash-table. */ | 1593 | * in iget() to return NULL if I_FREEING inode is found in |
1463 | /* Currently there is one place where it's ok to meet inode with | 1594 | * hash-table. |
1464 | nlink==0: processing of open-unlinked and half-truncated files | 1595 | */ |
1465 | during mount (fs/reiserfs/super.c:finish_unfinished()). */ | 1596 | |
1597 | /* | ||
1598 | * Currently there is one place where it's ok to meet inode with | ||
1599 | * nlink==0: processing of open-unlinked and half-truncated files | ||
1600 | * during mount (fs/reiserfs/super.c:finish_unfinished()). | ||
1601 | */ | ||
1466 | if ((inode->i_nlink == 0) && | 1602 | if ((inode->i_nlink == 0) && |
1467 | !REISERFS_SB(inode->i_sb)->s_is_unlinked_ok) { | 1603 | !REISERFS_SB(inode->i_sb)->s_is_unlinked_ok) { |
1468 | reiserfs_warning(inode->i_sb, "vs-13075", | 1604 | reiserfs_warning(inode->i_sb, "vs-13075", |
@@ -1472,7 +1608,8 @@ void reiserfs_read_locked_inode(struct inode *inode, | |||
1472 | reiserfs_make_bad_inode(inode); | 1608 | reiserfs_make_bad_inode(inode); |
1473 | } | 1609 | } |
1474 | 1610 | ||
1475 | reiserfs_check_path(&path_to_sd); /* init inode should be relsing */ | 1611 | /* init inode should be relsing */ |
1612 | reiserfs_check_path(&path_to_sd); | ||
1476 | 1613 | ||
1477 | /* | 1614 | /* |
1478 | * Stat data v1 doesn't support ACLs. | 1615 | * Stat data v1 doesn't support ACLs. |
@@ -1481,7 +1618,7 @@ void reiserfs_read_locked_inode(struct inode *inode, | |||
1481 | cache_no_acl(inode); | 1618 | cache_no_acl(inode); |
1482 | } | 1619 | } |
1483 | 1620 | ||
1484 | /** | 1621 | /* |
1485 | * reiserfs_find_actor() - "find actor" reiserfs supplies to iget5_locked(). | 1622 | * reiserfs_find_actor() - "find actor" reiserfs supplies to iget5_locked(). |
1486 | * | 1623 | * |
1487 | * @inode: inode from hash table to check | 1624 | * @inode: inode from hash table to check |
@@ -1556,7 +1693,8 @@ static struct dentry *reiserfs_get_dentry(struct super_block *sb, | |||
1556 | struct dentry *reiserfs_fh_to_dentry(struct super_block *sb, struct fid *fid, | 1693 | struct dentry *reiserfs_fh_to_dentry(struct super_block *sb, struct fid *fid, |
1557 | int fh_len, int fh_type) | 1694 | int fh_len, int fh_type) |
1558 | { | 1695 | { |
1559 | /* fhtype happens to reflect the number of u32s encoded. | 1696 | /* |
1697 | * fhtype happens to reflect the number of u32s encoded. | ||
1560 | * due to a bug in earlier code, fhtype might indicate there | 1698 | * due to a bug in earlier code, fhtype might indicate there |
1561 | * are more u32s then actually fitted. | 1699 | * are more u32s then actually fitted. |
1562 | * so if fhtype seems to be more than len, reduce fhtype. | 1700 | * so if fhtype seems to be more than len, reduce fhtype. |
@@ -1625,13 +1763,16 @@ int reiserfs_encode_fh(struct inode *inode, __u32 * data, int *lenp, | |||
1625 | return *lenp; | 1763 | return *lenp; |
1626 | } | 1764 | } |
1627 | 1765 | ||
1628 | /* looks for stat data, then copies fields to it, marks the buffer | 1766 | /* |
1629 | containing stat data as dirty */ | 1767 | * looks for stat data, then copies fields to it, marks the buffer |
1630 | /* reiserfs inodes are never really dirty, since the dirty inode call | 1768 | * containing stat data as dirty |
1631 | ** always logs them. This call allows the VFS inode marking routines | 1769 | */ |
1632 | ** to properly mark inodes for datasync and such, but only actually | 1770 | /* |
1633 | ** does something when called for a synchronous update. | 1771 | * reiserfs inodes are never really dirty, since the dirty inode call |
1634 | */ | 1772 | * always logs them. This call allows the VFS inode marking routines |
1773 | * to properly mark inodes for datasync and such, but only actually | ||
1774 | * does something when called for a synchronous update. | ||
1775 | */ | ||
1635 | int reiserfs_write_inode(struct inode *inode, struct writeback_control *wbc) | 1776 | int reiserfs_write_inode(struct inode *inode, struct writeback_control *wbc) |
1636 | { | 1777 | { |
1637 | struct reiserfs_transaction_handle th; | 1778 | struct reiserfs_transaction_handle th; |
@@ -1639,10 +1780,12 @@ int reiserfs_write_inode(struct inode *inode, struct writeback_control *wbc) | |||
1639 | 1780 | ||
1640 | if (inode->i_sb->s_flags & MS_RDONLY) | 1781 | if (inode->i_sb->s_flags & MS_RDONLY) |
1641 | return -EROFS; | 1782 | return -EROFS; |
1642 | /* memory pressure can sometimes initiate write_inode calls with sync == 1, | 1783 | /* |
1643 | ** these cases are just when the system needs ram, not when the | 1784 | * memory pressure can sometimes initiate write_inode calls with |
1644 | ** inode needs to reach disk for safety, and they can safely be | 1785 | * sync == 1, |
1645 | ** ignored because the altered inode has already been logged. | 1786 | * these cases are just when the system needs ram, not when the |
1787 | * inode needs to reach disk for safety, and they can safely be | ||
1788 | * ignored because the altered inode has already been logged. | ||
1646 | */ | 1789 | */ |
1647 | if (wbc->sync_mode == WB_SYNC_ALL && !(current->flags & PF_MEMALLOC)) { | 1790 | if (wbc->sync_mode == WB_SYNC_ALL && !(current->flags & PF_MEMALLOC)) { |
1648 | reiserfs_write_lock(inode->i_sb); | 1791 | reiserfs_write_lock(inode->i_sb); |
@@ -1655,8 +1798,10 @@ int reiserfs_write_inode(struct inode *inode, struct writeback_control *wbc) | |||
1655 | return 0; | 1798 | return 0; |
1656 | } | 1799 | } |
1657 | 1800 | ||
1658 | /* stat data of new object is inserted already, this inserts the item | 1801 | /* |
1659 | containing "." and ".." entries */ | 1802 | * stat data of new object is inserted already, this inserts the item |
1803 | * containing "." and ".." entries | ||
1804 | */ | ||
1660 | static int reiserfs_new_directory(struct reiserfs_transaction_handle *th, | 1805 | static int reiserfs_new_directory(struct reiserfs_transaction_handle *th, |
1661 | struct inode *inode, | 1806 | struct inode *inode, |
1662 | struct item_head *ih, struct treepath *path, | 1807 | struct item_head *ih, struct treepath *path, |
@@ -1674,9 +1819,11 @@ static int reiserfs_new_directory(struct reiserfs_transaction_handle *th, | |||
1674 | le32_to_cpu(ih->ih_key.k_objectid), DOT_OFFSET, | 1819 | le32_to_cpu(ih->ih_key.k_objectid), DOT_OFFSET, |
1675 | TYPE_DIRENTRY, 3 /*key length */ ); | 1820 | TYPE_DIRENTRY, 3 /*key length */ ); |
1676 | 1821 | ||
1677 | /* compose item head for new item. Directories consist of items of | 1822 | /* |
1678 | old type (ITEM_VERSION_1). Do not set key (second arg is 0), it | 1823 | * compose item head for new item. Directories consist of items of |
1679 | is done by reiserfs_new_inode */ | 1824 | * old type (ITEM_VERSION_1). Do not set key (second arg is 0), it |
1825 | * is done by reiserfs_new_inode | ||
1826 | */ | ||
1680 | if (old_format_only(sb)) { | 1827 | if (old_format_only(sb)) { |
1681 | make_le_item_head(ih, NULL, KEY_FORMAT_3_5, DOT_OFFSET, | 1828 | make_le_item_head(ih, NULL, KEY_FORMAT_3_5, DOT_OFFSET, |
1682 | TYPE_DIRENTRY, EMPTY_DIR_SIZE_V1, 2); | 1829 | TYPE_DIRENTRY, EMPTY_DIR_SIZE_V1, 2); |
@@ -1714,9 +1861,12 @@ static int reiserfs_new_directory(struct reiserfs_transaction_handle *th, | |||
1714 | return reiserfs_insert_item(th, path, &key, ih, inode, body); | 1861 | return reiserfs_insert_item(th, path, &key, ih, inode, body); |
1715 | } | 1862 | } |
1716 | 1863 | ||
1717 | /* stat data of object has been inserted, this inserts the item | 1864 | /* |
1718 | containing the body of symlink */ | 1865 | * stat data of object has been inserted, this inserts the item |
1719 | static int reiserfs_new_symlink(struct reiserfs_transaction_handle *th, struct inode *inode, /* Inode of symlink */ | 1866 | * containing the body of symlink |
1867 | */ | ||
1868 | static int reiserfs_new_symlink(struct reiserfs_transaction_handle *th, | ||
1869 | struct inode *inode, | ||
1720 | struct item_head *ih, | 1870 | struct item_head *ih, |
1721 | struct treepath *path, const char *symname, | 1871 | struct treepath *path, const char *symname, |
1722 | int item_len) | 1872 | int item_len) |
@@ -1754,15 +1904,26 @@ static int reiserfs_new_symlink(struct reiserfs_transaction_handle *th, struct i | |||
1754 | return reiserfs_insert_item(th, path, &key, ih, inode, symname); | 1904 | return reiserfs_insert_item(th, path, &key, ih, inode, symname); |
1755 | } | 1905 | } |
1756 | 1906 | ||
1757 | /* inserts the stat data into the tree, and then calls | 1907 | /* |
1758 | reiserfs_new_directory (to insert ".", ".." item if new object is | 1908 | * inserts the stat data into the tree, and then calls |
1759 | directory) or reiserfs_new_symlink (to insert symlink body if new | 1909 | * reiserfs_new_directory (to insert ".", ".." item if new object is |
1760 | object is symlink) or nothing (if new object is regular file) | 1910 | * directory) or reiserfs_new_symlink (to insert symlink body if new |
1761 | 1911 | * object is symlink) or nothing (if new object is regular file) | |
1762 | NOTE! uid and gid must already be set in the inode. If we return | 1912 | |
1763 | non-zero due to an error, we have to drop the quota previously allocated | 1913 | * NOTE! uid and gid must already be set in the inode. If we return |
1764 | for the fresh inode. This can only be done outside a transaction, so | 1914 | * non-zero due to an error, we have to drop the quota previously allocated |
1765 | if we return non-zero, we also end the transaction. */ | 1915 | * for the fresh inode. This can only be done outside a transaction, so |
1916 | * if we return non-zero, we also end the transaction. | ||
1917 | * | ||
1918 | * @th: active transaction handle | ||
1919 | * @dir: parent directory for new inode | ||
1920 | * @mode: mode of new inode | ||
1921 | * @symname: symlink contents if inode is symlink | ||
1922 | * @isize: 0 for regular file, EMPTY_DIR_SIZE for dirs, strlen(symname) for | ||
1923 | * symlinks | ||
1924 | * @inode: inode to be filled | ||
1925 | * @security: optional security context to associate with this inode | ||
1926 | */ | ||
1766 | int reiserfs_new_inode(struct reiserfs_transaction_handle *th, | 1927 | int reiserfs_new_inode(struct reiserfs_transaction_handle *th, |
1767 | struct inode *dir, umode_t mode, const char *symname, | 1928 | struct inode *dir, umode_t mode, const char *symname, |
1768 | /* 0 for regular, EMTRY_DIR_SIZE for dirs, | 1929 | /* 0 for regular, EMTRY_DIR_SIZE for dirs, |
@@ -1820,10 +1981,11 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th, | |||
1820 | } | 1981 | } |
1821 | 1982 | ||
1822 | if (old_format_only(sb)) | 1983 | if (old_format_only(sb)) |
1823 | /* not a perfect generation count, as object ids can be reused, but | 1984 | /* |
1824 | ** this is as good as reiserfs can do right now. | 1985 | * not a perfect generation count, as object ids can be reused, |
1825 | ** note that the private part of inode isn't filled in yet, we have | 1986 | * but this is as good as reiserfs can do right now. |
1826 | ** to use the directory. | 1987 | * note that the private part of inode isn't filled in yet, |
1988 | * we have to use the directory. | ||
1827 | */ | 1989 | */ |
1828 | inode->i_generation = le32_to_cpu(INODE_PKEY(dir)->k_objectid); | 1990 | inode->i_generation = le32_to_cpu(INODE_PKEY(dir)->k_objectid); |
1829 | else | 1991 | else |
@@ -1878,9 +2040,9 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th, | |||
1878 | goto out_bad_inode; | 2040 | goto out_bad_inode; |
1879 | } | 2041 | } |
1880 | if (old_format_only(sb)) { | 2042 | if (old_format_only(sb)) { |
2043 | /* i_uid or i_gid is too big to be stored in stat data v3.5 */ | ||
1881 | if (i_uid_read(inode) & ~0xffff || i_gid_read(inode) & ~0xffff) { | 2044 | if (i_uid_read(inode) & ~0xffff || i_gid_read(inode) & ~0xffff) { |
1882 | pathrelse(&path_to_key); | 2045 | pathrelse(&path_to_key); |
1883 | /* i_uid or i_gid is too big to be stored in stat data v3.5 */ | ||
1884 | err = -EINVAL; | 2046 | err = -EINVAL; |
1885 | goto out_bad_inode; | 2047 | goto out_bad_inode; |
1886 | } | 2048 | } |
@@ -1888,9 +2050,11 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th, | |||
1888 | } else { | 2050 | } else { |
1889 | inode2sd(&sd, inode, inode->i_size); | 2051 | inode2sd(&sd, inode, inode->i_size); |
1890 | } | 2052 | } |
1891 | // store in in-core inode the key of stat data and version all | 2053 | /* |
1892 | // object items will have (directory items will have old offset | 2054 | * store in in-core inode the key of stat data and version all |
1893 | // format, other new objects will consist of new items) | 2055 | * object items will have (directory items will have old offset |
2056 | * format, other new objects will consist of new items) | ||
2057 | */ | ||
1894 | if (old_format_only(sb) || S_ISDIR(mode) || S_ISLNK(mode)) | 2058 | if (old_format_only(sb) || S_ISDIR(mode) || S_ISLNK(mode)) |
1895 | set_inode_item_key_version(inode, KEY_FORMAT_3_5); | 2059 | set_inode_item_key_version(inode, KEY_FORMAT_3_5); |
1896 | else | 2060 | else |
@@ -1975,10 +2139,6 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th, | |||
1975 | 2139 | ||
1976 | return 0; | 2140 | return 0; |
1977 | 2141 | ||
1978 | /* it looks like you can easily compress these two goto targets into | ||
1979 | * one. Keeping it like this doesn't actually hurt anything, and they | ||
1980 | * are place holders for what the quota code actually needs. | ||
1981 | */ | ||
1982 | out_bad_inode: | 2142 | out_bad_inode: |
1983 | /* Invalidate the object, nothing was inserted yet */ | 2143 | /* Invalidate the object, nothing was inserted yet */ |
1984 | INODE_PKEY(inode)->k_objectid = 0; | 2144 | INODE_PKEY(inode)->k_objectid = 0; |
@@ -1990,7 +2150,10 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th, | |||
1990 | 2150 | ||
1991 | out_end_trans: | 2151 | out_end_trans: |
1992 | journal_end(th, th->t_super, th->t_blocks_allocated); | 2152 | journal_end(th, th->t_super, th->t_blocks_allocated); |
1993 | /* Drop can be outside and it needs more credits so it's better to have it outside */ | 2153 | /* |
2154 | * Drop can be outside and it needs more credits so it's better | ||
2155 | * to have it outside | ||
2156 | */ | ||
1994 | depth = reiserfs_write_unlock_nested(inode->i_sb); | 2157 | depth = reiserfs_write_unlock_nested(inode->i_sb); |
1995 | dquot_drop(inode); | 2158 | dquot_drop(inode); |
1996 | reiserfs_write_lock_nested(inode->i_sb, depth); | 2159 | reiserfs_write_lock_nested(inode->i_sb, depth); |
@@ -2006,25 +2169,26 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th, | |||
2006 | } | 2169 | } |
2007 | 2170 | ||
2008 | /* | 2171 | /* |
2009 | ** finds the tail page in the page cache, | 2172 | * finds the tail page in the page cache, |
2010 | ** reads the last block in. | 2173 | * reads the last block in. |
2011 | ** | 2174 | * |
2012 | ** On success, page_result is set to a locked, pinned page, and bh_result | 2175 | * On success, page_result is set to a locked, pinned page, and bh_result |
2013 | ** is set to an up to date buffer for the last block in the file. returns 0. | 2176 | * is set to an up to date buffer for the last block in the file. returns 0. |
2014 | ** | 2177 | * |
2015 | ** tail conversion is not done, so bh_result might not be valid for writing | 2178 | * tail conversion is not done, so bh_result might not be valid for writing |
2016 | ** check buffer_mapped(bh_result) and bh_result->b_blocknr != 0 before | 2179 | * check buffer_mapped(bh_result) and bh_result->b_blocknr != 0 before |
2017 | ** trying to write the block. | 2180 | * trying to write the block. |
2018 | ** | 2181 | * |
2019 | ** on failure, nonzero is returned, page_result and bh_result are untouched. | 2182 | * on failure, nonzero is returned, page_result and bh_result are untouched. |
2020 | */ | 2183 | */ |
2021 | static int grab_tail_page(struct inode *inode, | 2184 | static int grab_tail_page(struct inode *inode, |
2022 | struct page **page_result, | 2185 | struct page **page_result, |
2023 | struct buffer_head **bh_result) | 2186 | struct buffer_head **bh_result) |
2024 | { | 2187 | { |
2025 | 2188 | ||
2026 | /* we want the page with the last byte in the file, | 2189 | /* |
2027 | ** not the page that will hold the next byte for appending | 2190 | * we want the page with the last byte in the file, |
2191 | * not the page that will hold the next byte for appending | ||
2028 | */ | 2192 | */ |
2029 | unsigned long index = (inode->i_size - 1) >> PAGE_CACHE_SHIFT; | 2193 | unsigned long index = (inode->i_size - 1) >> PAGE_CACHE_SHIFT; |
2030 | unsigned long pos = 0; | 2194 | unsigned long pos = 0; |
@@ -2036,10 +2200,11 @@ static int grab_tail_page(struct inode *inode, | |||
2036 | struct page *page; | 2200 | struct page *page; |
2037 | int error; | 2201 | int error; |
2038 | 2202 | ||
2039 | /* we know that we are only called with inode->i_size > 0. | 2203 | /* |
2040 | ** we also know that a file tail can never be as big as a block | 2204 | * we know that we are only called with inode->i_size > 0. |
2041 | ** If i_size % blocksize == 0, our file is currently block aligned | 2205 | * we also know that a file tail can never be as big as a block |
2042 | ** and it won't need converting or zeroing after a truncate. | 2206 | * If i_size % blocksize == 0, our file is currently block aligned |
2207 | * and it won't need converting or zeroing after a truncate. | ||
2043 | */ | 2208 | */ |
2044 | if ((offset & (blocksize - 1)) == 0) { | 2209 | if ((offset & (blocksize - 1)) == 0) { |
2045 | return -ENOENT; | 2210 | return -ENOENT; |
@@ -2068,10 +2233,11 @@ static int grab_tail_page(struct inode *inode, | |||
2068 | } while (bh != head); | 2233 | } while (bh != head); |
2069 | 2234 | ||
2070 | if (!buffer_uptodate(bh)) { | 2235 | if (!buffer_uptodate(bh)) { |
2071 | /* note, this should never happen, prepare_write should | 2236 | /* |
2072 | ** be taking care of this for us. If the buffer isn't up to date, | 2237 | * note, this should never happen, prepare_write should be |
2073 | ** I've screwed up the code to find the buffer, or the code to | 2238 | * taking care of this for us. If the buffer isn't up to |
2074 | ** call prepare_write | 2239 | * date, I've screwed up the code to find the buffer, or the |
2240 | * code to call prepare_write | ||
2075 | */ | 2241 | */ |
2076 | reiserfs_error(inode->i_sb, "clm-6000", | 2242 | reiserfs_error(inode->i_sb, "clm-6000", |
2077 | "error reading block %lu", bh->b_blocknr); | 2243 | "error reading block %lu", bh->b_blocknr); |
@@ -2091,11 +2257,11 @@ static int grab_tail_page(struct inode *inode, | |||
2091 | } | 2257 | } |
2092 | 2258 | ||
2093 | /* | 2259 | /* |
2094 | ** vfs version of truncate file. Must NOT be called with | 2260 | * vfs version of truncate file. Must NOT be called with |
2095 | ** a transaction already started. | 2261 | * a transaction already started. |
2096 | ** | 2262 | * |
2097 | ** some code taken from block_truncate_page | 2263 | * some code taken from block_truncate_page |
2098 | */ | 2264 | */ |
2099 | int reiserfs_truncate_file(struct inode *inode, int update_timestamps) | 2265 | int reiserfs_truncate_file(struct inode *inode, int update_timestamps) |
2100 | { | 2266 | { |
2101 | struct reiserfs_transaction_handle th; | 2267 | struct reiserfs_transaction_handle th; |
@@ -2113,9 +2279,11 @@ int reiserfs_truncate_file(struct inode *inode, int update_timestamps) | |||
2113 | if (inode->i_size > 0) { | 2279 | if (inode->i_size > 0) { |
2114 | error = grab_tail_page(inode, &page, &bh); | 2280 | error = grab_tail_page(inode, &page, &bh); |
2115 | if (error) { | 2281 | if (error) { |
2116 | // -ENOENT means we truncated past the end of the file, | 2282 | /* |
2117 | // and get_block_create_0 could not find a block to read in, | 2283 | * -ENOENT means we truncated past the end of the |
2118 | // which is ok. | 2284 | * file, and get_block_create_0 could not find a |
2285 | * block to read in, which is ok. | ||
2286 | */ | ||
2119 | if (error != -ENOENT) | 2287 | if (error != -ENOENT) |
2120 | reiserfs_error(inode->i_sb, "clm-6001", | 2288 | reiserfs_error(inode->i_sb, "clm-6001", |
2121 | "grab_tail_page failed %d", | 2289 | "grab_tail_page failed %d", |
@@ -2125,25 +2293,30 @@ int reiserfs_truncate_file(struct inode *inode, int update_timestamps) | |||
2125 | } | 2293 | } |
2126 | } | 2294 | } |
2127 | 2295 | ||
2128 | /* so, if page != NULL, we have a buffer head for the offset at | 2296 | /* |
2129 | ** the end of the file. if the bh is mapped, and bh->b_blocknr != 0, | 2297 | * so, if page != NULL, we have a buffer head for the offset at |
2130 | ** then we have an unformatted node. Otherwise, we have a direct item, | 2298 | * the end of the file. if the bh is mapped, and bh->b_blocknr != 0, |
2131 | ** and no zeroing is required on disk. We zero after the truncate, | 2299 | * then we have an unformatted node. Otherwise, we have a direct item, |
2132 | ** because the truncate might pack the item anyway | 2300 | * and no zeroing is required on disk. We zero after the truncate, |
2133 | ** (it will unmap bh if it packs). | 2301 | * because the truncate might pack the item anyway |
2302 | * (it will unmap bh if it packs). | ||
2303 | * | ||
2304 | * it is enough to reserve space in transaction for 2 balancings: | ||
2305 | * one for "save" link adding and another for the first | ||
2306 | * cut_from_item. 1 is for update_sd | ||
2134 | */ | 2307 | */ |
2135 | /* it is enough to reserve space in transaction for 2 balancings: | ||
2136 | one for "save" link adding and another for the first | ||
2137 | cut_from_item. 1 is for update_sd */ | ||
2138 | error = journal_begin(&th, inode->i_sb, | 2308 | error = journal_begin(&th, inode->i_sb, |
2139 | JOURNAL_PER_BALANCE_CNT * 2 + 1); | 2309 | JOURNAL_PER_BALANCE_CNT * 2 + 1); |
2140 | if (error) | 2310 | if (error) |
2141 | goto out; | 2311 | goto out; |
2142 | reiserfs_update_inode_transaction(inode); | 2312 | reiserfs_update_inode_transaction(inode); |
2143 | if (update_timestamps) | 2313 | if (update_timestamps) |
2144 | /* we are doing real truncate: if the system crashes before the last | 2314 | /* |
2145 | transaction of truncating gets committed - on reboot the file | 2315 | * we are doing real truncate: if the system crashes |
2146 | either appears truncated properly or not truncated at all */ | 2316 | * before the last transaction of truncating gets committed |
2317 | * - on reboot the file either appears truncated properly | ||
2318 | * or not truncated at all | ||
2319 | */ | ||
2147 | add_save_link(&th, inode, 1); | 2320 | add_save_link(&th, inode, 1); |
2148 | err2 = reiserfs_do_truncate(&th, inode, page, update_timestamps); | 2321 | err2 = reiserfs_do_truncate(&th, inode, page, update_timestamps); |
2149 | error = | 2322 | error = |
@@ -2212,7 +2385,10 @@ static int map_block_for_writepage(struct inode *inode, | |||
2212 | int copy_size; | 2385 | int copy_size; |
2213 | int trans_running = 0; | 2386 | int trans_running = 0; |
2214 | 2387 | ||
2215 | /* catch places below that try to log something without starting a trans */ | 2388 | /* |
2389 | * catch places below that try to log something without | ||
2390 | * starting a trans | ||
2391 | */ | ||
2216 | th.t_trans_id = 0; | 2392 | th.t_trans_id = 0; |
2217 | 2393 | ||
2218 | if (!buffer_uptodate(bh_result)) { | 2394 | if (!buffer_uptodate(bh_result)) { |
@@ -2331,7 +2507,8 @@ static int map_block_for_writepage(struct inode *inode, | |||
2331 | kunmap(bh_result->b_page); | 2507 | kunmap(bh_result->b_page); |
2332 | 2508 | ||
2333 | if (!retval && buffer_mapped(bh_result) && bh_result->b_blocknr == 0) { | 2509 | if (!retval && buffer_mapped(bh_result) && bh_result->b_blocknr == 0) { |
2334 | /* we've copied data from the page into the direct item, so the | 2510 | /* |
2511 | * we've copied data from the page into the direct item, so the | ||
2335 | * buffer in the page is now clean, mark it to reflect that. | 2512 | * buffer in the page is now clean, mark it to reflect that. |
2336 | */ | 2513 | */ |
2337 | lock_buffer(bh_result); | 2514 | lock_buffer(bh_result); |
@@ -2370,7 +2547,8 @@ static int reiserfs_write_full_page(struct page *page, | |||
2370 | return 0; | 2547 | return 0; |
2371 | } | 2548 | } |
2372 | 2549 | ||
2373 | /* The page dirty bit is cleared before writepage is called, which | 2550 | /* |
2551 | * The page dirty bit is cleared before writepage is called, which | ||
2374 | * means we have to tell create_empty_buffers to make dirty buffers | 2552 | * means we have to tell create_empty_buffers to make dirty buffers |
2375 | * The page really should be up to date at this point, so tossing | 2553 | * The page really should be up to date at this point, so tossing |
2376 | * in the BH_Uptodate is just a sanity check. | 2554 | * in the BH_Uptodate is just a sanity check. |
@@ -2381,8 +2559,9 @@ static int reiserfs_write_full_page(struct page *page, | |||
2381 | } | 2559 | } |
2382 | head = page_buffers(page); | 2560 | head = page_buffers(page); |
2383 | 2561 | ||
2384 | /* last page in the file, zero out any contents past the | 2562 | /* |
2385 | ** last byte in the file | 2563 | * last page in the file, zero out any contents past the |
2564 | * last byte in the file | ||
2386 | */ | 2565 | */ |
2387 | if (page->index >= end_index) { | 2566 | if (page->index >= end_index) { |
2388 | unsigned last_offset; | 2567 | unsigned last_offset; |
@@ -2412,7 +2591,8 @@ static int reiserfs_write_full_page(struct page *page, | |||
2412 | (!buffer_mapped(bh) || (buffer_mapped(bh) | 2591 | (!buffer_mapped(bh) || (buffer_mapped(bh) |
2413 | && bh->b_blocknr == | 2592 | && bh->b_blocknr == |
2414 | 0))) { | 2593 | 0))) { |
2415 | /* not mapped yet, or it points to a direct item, search | 2594 | /* |
2595 | * not mapped yet, or it points to a direct item, search | ||
2416 | * the btree for the mapping info, and log any direct | 2596 | * the btree for the mapping info, and log any direct |
2417 | * items found | 2597 | * items found |
2418 | */ | 2598 | */ |
@@ -2453,7 +2633,8 @@ static int reiserfs_write_full_page(struct page *page, | |||
2453 | journal_mark_dirty(&th, s, bh); | 2633 | journal_mark_dirty(&th, s, bh); |
2454 | continue; | 2634 | continue; |
2455 | } | 2635 | } |
2456 | /* from this point on, we know the buffer is mapped to a | 2636 | /* |
2637 | * from this point on, we know the buffer is mapped to a | ||
2457 | * real block and not a direct item | 2638 | * real block and not a direct item |
2458 | */ | 2639 | */ |
2459 | if (wbc->sync_mode != WB_SYNC_NONE) { | 2640 | if (wbc->sync_mode != WB_SYNC_NONE) { |
@@ -2520,7 +2701,8 @@ static int reiserfs_write_full_page(struct page *page, | |||
2520 | return error; | 2701 | return error; |
2521 | 2702 | ||
2522 | fail: | 2703 | fail: |
2523 | /* catches various errors, we need to make sure any valid dirty blocks | 2704 | /* |
2705 | * catches various errors, we need to make sure any valid dirty blocks | ||
2524 | * get to the media. The page is currently locked and not marked for | 2706 | * get to the media. The page is currently locked and not marked for |
2525 | * writeback | 2707 | * writeback |
2526 | */ | 2708 | */ |
@@ -2533,8 +2715,8 @@ static int reiserfs_write_full_page(struct page *page, | |||
2533 | mark_buffer_async_write(bh); | 2715 | mark_buffer_async_write(bh); |
2534 | } else { | 2716 | } else { |
2535 | /* | 2717 | /* |
2536 | * clear any dirty bits that might have come from getting | 2718 | * clear any dirty bits that might have come from |
2537 | * attached to a dirty page | 2719 | * getting attached to a dirty page |
2538 | */ | 2720 | */ |
2539 | clear_buffer_dirty(bh); | 2721 | clear_buffer_dirty(bh); |
2540 | } | 2722 | } |
@@ -2614,15 +2796,18 @@ static int reiserfs_write_begin(struct file *file, | |||
2614 | ret = __block_write_begin(page, pos, len, reiserfs_get_block); | 2796 | ret = __block_write_begin(page, pos, len, reiserfs_get_block); |
2615 | if (ret && reiserfs_transaction_running(inode->i_sb)) { | 2797 | if (ret && reiserfs_transaction_running(inode->i_sb)) { |
2616 | struct reiserfs_transaction_handle *th = current->journal_info; | 2798 | struct reiserfs_transaction_handle *th = current->journal_info; |
2617 | /* this gets a little ugly. If reiserfs_get_block returned an | 2799 | /* |
2618 | * error and left a transacstion running, we've got to close it, | 2800 | * this gets a little ugly. If reiserfs_get_block returned an |
2619 | * and we've got to free handle if it was a persistent transaction. | 2801 | * error and left a transacstion running, we've got to close |
2802 | * it, and we've got to free handle if it was a persistent | ||
2803 | * transaction. | ||
2620 | * | 2804 | * |
2621 | * But, if we had nested into an existing transaction, we need | 2805 | * But, if we had nested into an existing transaction, we need |
2622 | * to just drop the ref count on the handle. | 2806 | * to just drop the ref count on the handle. |
2623 | * | 2807 | * |
2624 | * If old_ref == 0, the transaction is from reiserfs_get_block, | 2808 | * If old_ref == 0, the transaction is from reiserfs_get_block, |
2625 | * and it was a persistent trans. Otherwise, it was nested above. | 2809 | * and it was a persistent trans. Otherwise, it was nested |
2810 | * above. | ||
2626 | */ | 2811 | */ |
2627 | if (th->t_refcount > old_ref) { | 2812 | if (th->t_refcount > old_ref) { |
2628 | if (old_ref) | 2813 | if (old_ref) |
@@ -2671,15 +2856,18 @@ int __reiserfs_write_begin(struct page *page, unsigned from, unsigned len) | |||
2671 | ret = __block_write_begin(page, from, len, reiserfs_get_block); | 2856 | ret = __block_write_begin(page, from, len, reiserfs_get_block); |
2672 | if (ret && reiserfs_transaction_running(inode->i_sb)) { | 2857 | if (ret && reiserfs_transaction_running(inode->i_sb)) { |
2673 | struct reiserfs_transaction_handle *th = current->journal_info; | 2858 | struct reiserfs_transaction_handle *th = current->journal_info; |
2674 | /* this gets a little ugly. If reiserfs_get_block returned an | 2859 | /* |
2675 | * error and left a transacstion running, we've got to close it, | 2860 | * this gets a little ugly. If reiserfs_get_block returned an |
2676 | * and we've got to free handle if it was a persistent transaction. | 2861 | * error and left a transacstion running, we've got to close |
2862 | * it, and we've got to free handle if it was a persistent | ||
2863 | * transaction. | ||
2677 | * | 2864 | * |
2678 | * But, if we had nested into an existing transaction, we need | 2865 | * But, if we had nested into an existing transaction, we need |
2679 | * to just drop the ref count on the handle. | 2866 | * to just drop the ref count on the handle. |
2680 | * | 2867 | * |
2681 | * If old_ref == 0, the transaction is from reiserfs_get_block, | 2868 | * If old_ref == 0, the transaction is from reiserfs_get_block, |
2682 | * and it was a persistent trans. Otherwise, it was nested above. | 2869 | * and it was a persistent trans. Otherwise, it was nested |
2870 | * above. | ||
2683 | */ | 2871 | */ |
2684 | if (th->t_refcount > old_ref) { | 2872 | if (th->t_refcount > old_ref) { |
2685 | if (old_ref) | 2873 | if (old_ref) |
@@ -2734,17 +2922,20 @@ static int reiserfs_write_end(struct file *file, struct address_space *mapping, | |||
2734 | 2922 | ||
2735 | reiserfs_commit_page(inode, page, start, start + copied); | 2923 | reiserfs_commit_page(inode, page, start, start + copied); |
2736 | 2924 | ||
2737 | /* generic_commit_write does this for us, but does not update the | 2925 | /* |
2738 | ** transaction tracking stuff when the size changes. So, we have | 2926 | * generic_commit_write does this for us, but does not update the |
2739 | ** to do the i_size updates here. | 2927 | * transaction tracking stuff when the size changes. So, we have |
2928 | * to do the i_size updates here. | ||
2740 | */ | 2929 | */ |
2741 | if (pos + copied > inode->i_size) { | 2930 | if (pos + copied > inode->i_size) { |
2742 | struct reiserfs_transaction_handle myth; | 2931 | struct reiserfs_transaction_handle myth; |
2743 | reiserfs_write_lock(inode->i_sb); | 2932 | reiserfs_write_lock(inode->i_sb); |
2744 | locked = true; | 2933 | locked = true; |
2745 | /* If the file have grown beyond the border where it | 2934 | /* |
2746 | can have a tail, unmark it as needing a tail | 2935 | * If the file have grown beyond the border where it |
2747 | packing */ | 2936 | * can have a tail, unmark it as needing a tail |
2937 | * packing | ||
2938 | */ | ||
2748 | if ((have_large_tails(inode->i_sb) | 2939 | if ((have_large_tails(inode->i_sb) |
2749 | && inode->i_size > i_block_size(inode) * 4) | 2940 | && inode->i_size > i_block_size(inode) * 4) |
2750 | || (have_small_tails(inode->i_sb) | 2941 | || (have_small_tails(inode->i_sb) |
@@ -2759,8 +2950,8 @@ static int reiserfs_write_end(struct file *file, struct address_space *mapping, | |||
2759 | inode->i_size = pos + copied; | 2950 | inode->i_size = pos + copied; |
2760 | /* | 2951 | /* |
2761 | * this will just nest into our transaction. It's important | 2952 | * this will just nest into our transaction. It's important |
2762 | * to use mark_inode_dirty so the inode gets pushed around on the | 2953 | * to use mark_inode_dirty so the inode gets pushed around on |
2763 | * dirty lists, and so that O_SYNC works as expected | 2954 | * the dirty lists, and so that O_SYNC works as expected |
2764 | */ | 2955 | */ |
2765 | mark_inode_dirty(inode); | 2956 | mark_inode_dirty(inode); |
2766 | reiserfs_update_sd(&myth, inode); | 2957 | reiserfs_update_sd(&myth, inode); |
@@ -2822,15 +3013,18 @@ int reiserfs_commit_write(struct file *f, struct page *page, | |||
2822 | } | 3013 | } |
2823 | reiserfs_commit_page(inode, page, from, to); | 3014 | reiserfs_commit_page(inode, page, from, to); |
2824 | 3015 | ||
2825 | /* generic_commit_write does this for us, but does not update the | 3016 | /* |
2826 | ** transaction tracking stuff when the size changes. So, we have | 3017 | * generic_commit_write does this for us, but does not update the |
2827 | ** to do the i_size updates here. | 3018 | * transaction tracking stuff when the size changes. So, we have |
3019 | * to do the i_size updates here. | ||
2828 | */ | 3020 | */ |
2829 | if (pos > inode->i_size) { | 3021 | if (pos > inode->i_size) { |
2830 | struct reiserfs_transaction_handle myth; | 3022 | struct reiserfs_transaction_handle myth; |
2831 | /* If the file have grown beyond the border where it | 3023 | /* |
2832 | can have a tail, unmark it as needing a tail | 3024 | * If the file have grown beyond the border where it |
2833 | packing */ | 3025 | * can have a tail, unmark it as needing a tail |
3026 | * packing | ||
3027 | */ | ||
2834 | if ((have_large_tails(inode->i_sb) | 3028 | if ((have_large_tails(inode->i_sb) |
2835 | && inode->i_size > i_block_size(inode) * 4) | 3029 | && inode->i_size > i_block_size(inode) * 4) |
2836 | || (have_small_tails(inode->i_sb) | 3030 | || (have_small_tails(inode->i_sb) |
@@ -2845,8 +3039,8 @@ int reiserfs_commit_write(struct file *f, struct page *page, | |||
2845 | inode->i_size = pos; | 3039 | inode->i_size = pos; |
2846 | /* | 3040 | /* |
2847 | * this will just nest into our transaction. It's important | 3041 | * this will just nest into our transaction. It's important |
2848 | * to use mark_inode_dirty so the inode gets pushed around on the | 3042 | * to use mark_inode_dirty so the inode gets pushed around |
2849 | * dirty lists, and so that O_SYNC works as expected | 3043 | * on the dirty lists, and so that O_SYNC works as expected |
2850 | */ | 3044 | */ |
2851 | mark_inode_dirty(inode); | 3045 | mark_inode_dirty(inode); |
2852 | reiserfs_update_sd(&myth, inode); | 3046 | reiserfs_update_sd(&myth, inode); |
@@ -2924,9 +3118,10 @@ void i_attrs_to_sd_attrs(struct inode *inode, __u16 * sd_attrs) | |||
2924 | } | 3118 | } |
2925 | } | 3119 | } |
2926 | 3120 | ||
2927 | /* decide if this buffer needs to stay around for data logging or ordered | 3121 | /* |
2928 | ** write purposes | 3122 | * decide if this buffer needs to stay around for data logging or ordered |
2929 | */ | 3123 | * write purposes |
3124 | */ | ||
2930 | static int invalidatepage_can_drop(struct inode *inode, struct buffer_head *bh) | 3125 | static int invalidatepage_can_drop(struct inode *inode, struct buffer_head *bh) |
2931 | { | 3126 | { |
2932 | int ret = 1; | 3127 | int ret = 1; |
@@ -2937,7 +3132,8 @@ static int invalidatepage_can_drop(struct inode *inode, struct buffer_head *bh) | |||
2937 | if (!buffer_mapped(bh)) { | 3132 | if (!buffer_mapped(bh)) { |
2938 | goto free_jh; | 3133 | goto free_jh; |
2939 | } | 3134 | } |
2940 | /* the page is locked, and the only places that log a data buffer | 3135 | /* |
3136 | * the page is locked, and the only places that log a data buffer | ||
2941 | * also lock the page. | 3137 | * also lock the page. |
2942 | */ | 3138 | */ |
2943 | if (reiserfs_file_data_log(inode)) { | 3139 | if (reiserfs_file_data_log(inode)) { |
@@ -2952,7 +3148,8 @@ static int invalidatepage_can_drop(struct inode *inode, struct buffer_head *bh) | |||
2952 | struct reiserfs_journal_list *jl; | 3148 | struct reiserfs_journal_list *jl; |
2953 | struct reiserfs_jh *jh = bh->b_private; | 3149 | struct reiserfs_jh *jh = bh->b_private; |
2954 | 3150 | ||
2955 | /* why is this safe? | 3151 | /* |
3152 | * why is this safe? | ||
2956 | * reiserfs_setattr updates i_size in the on disk | 3153 | * reiserfs_setattr updates i_size in the on disk |
2957 | * stat data before allowing vmtruncate to be called. | 3154 | * stat data before allowing vmtruncate to be called. |
2958 | * | 3155 | * |
@@ -3080,8 +3277,10 @@ static int reiserfs_releasepage(struct page *page, gfp_t unused_gfp_flags) | |||
3080 | return ret; | 3277 | return ret; |
3081 | } | 3278 | } |
3082 | 3279 | ||
3083 | /* We thank Mingming Cao for helping us understand in great detail what | 3280 | /* |
3084 | to do in this section of the code. */ | 3281 | * We thank Mingming Cao for helping us understand in great detail what |
3282 | * to do in this section of the code. | ||
3283 | */ | ||
3085 | static ssize_t reiserfs_direct_IO(int rw, struct kiocb *iocb, | 3284 | static ssize_t reiserfs_direct_IO(int rw, struct kiocb *iocb, |
3086 | const struct iovec *iov, loff_t offset, | 3285 | const struct iovec *iov, loff_t offset, |
3087 | unsigned long nr_segs) | 3286 | unsigned long nr_segs) |
@@ -3127,8 +3326,9 @@ int reiserfs_setattr(struct dentry *dentry, struct iattr *attr) | |||
3127 | dquot_initialize(inode); | 3326 | dquot_initialize(inode); |
3128 | reiserfs_write_lock(inode->i_sb); | 3327 | reiserfs_write_lock(inode->i_sb); |
3129 | if (attr->ia_valid & ATTR_SIZE) { | 3328 | if (attr->ia_valid & ATTR_SIZE) { |
3130 | /* version 2 items will be caught by the s_maxbytes check | 3329 | /* |
3131 | ** done for us in vmtruncate | 3330 | * version 2 items will be caught by the s_maxbytes check |
3331 | * done for us in vmtruncate | ||
3132 | */ | 3332 | */ |
3133 | if (get_inode_item_key_version(inode) == KEY_FORMAT_3_5 && | 3333 | if (get_inode_item_key_version(inode) == KEY_FORMAT_3_5 && |
3134 | attr->ia_size > MAX_NON_LFS) { | 3334 | attr->ia_size > MAX_NON_LFS) { |
@@ -3189,7 +3389,10 @@ int reiserfs_setattr(struct dentry *dentry, struct iattr *attr) | |||
3189 | if (error) | 3389 | if (error) |
3190 | return error; | 3390 | return error; |
3191 | 3391 | ||
3192 | /* (user+group)*(old+new) structure - we count quota info and , inode write (sb, inode) */ | 3392 | /* |
3393 | * (user+group)*(old+new) structure - we count quota | ||
3394 | * info and , inode write (sb, inode) | ||
3395 | */ | ||
3193 | reiserfs_write_lock(inode->i_sb); | 3396 | reiserfs_write_lock(inode->i_sb); |
3194 | error = journal_begin(&th, inode->i_sb, jbegin_count); | 3397 | error = journal_begin(&th, inode->i_sb, jbegin_count); |
3195 | reiserfs_write_unlock(inode->i_sb); | 3398 | reiserfs_write_unlock(inode->i_sb); |
@@ -3203,8 +3406,10 @@ int reiserfs_setattr(struct dentry *dentry, struct iattr *attr) | |||
3203 | goto out; | 3406 | goto out; |
3204 | } | 3407 | } |
3205 | 3408 | ||
3206 | /* Update corresponding info in inode so that everything is in | 3409 | /* |
3207 | * one transaction */ | 3410 | * Update corresponding info in inode so that everything |
3411 | * is in one transaction | ||
3412 | */ | ||
3208 | if (attr->ia_valid & ATTR_UID) | 3413 | if (attr->ia_valid & ATTR_UID) |
3209 | inode->i_uid = attr->ia_uid; | 3414 | inode->i_uid = attr->ia_uid; |
3210 | if (attr->ia_valid & ATTR_GID) | 3415 | if (attr->ia_valid & ATTR_GID) |
diff --git a/fs/reiserfs/ioctl.c b/fs/reiserfs/ioctl.c index 946ccbf5b5a1..a4197c3240b9 100644 --- a/fs/reiserfs/ioctl.c +++ b/fs/reiserfs/ioctl.c | |||
@@ -15,7 +15,8 @@ | |||
15 | * reiserfs_ioctl - handler for ioctl for inode | 15 | * reiserfs_ioctl - handler for ioctl for inode |
16 | * supported commands: | 16 | * supported commands: |
17 | * 1) REISERFS_IOC_UNPACK - try to unpack tail from direct item into indirect | 17 | * 1) REISERFS_IOC_UNPACK - try to unpack tail from direct item into indirect |
18 | * and prevent packing file (argument arg has to be non-zero) | 18 | * and prevent packing file (argument arg has t |
19 | * be non-zero) | ||
19 | * 2) REISERFS_IOC_[GS]ETFLAGS, REISERFS_IOC_[GS]ETVERSION | 20 | * 2) REISERFS_IOC_[GS]ETFLAGS, REISERFS_IOC_[GS]ETVERSION |
20 | * 3) That's all for a while ... | 21 | * 3) That's all for a while ... |
21 | */ | 22 | */ |
@@ -132,7 +133,10 @@ setversion_out: | |||
132 | long reiserfs_compat_ioctl(struct file *file, unsigned int cmd, | 133 | long reiserfs_compat_ioctl(struct file *file, unsigned int cmd, |
133 | unsigned long arg) | 134 | unsigned long arg) |
134 | { | 135 | { |
135 | /* These are just misnamed, they actually get/put from/to user an int */ | 136 | /* |
137 | * These are just misnamed, they actually | ||
138 | * get/put from/to user an int | ||
139 | */ | ||
136 | switch (cmd) { | 140 | switch (cmd) { |
137 | case REISERFS_IOC32_UNPACK: | 141 | case REISERFS_IOC32_UNPACK: |
138 | cmd = REISERFS_IOC_UNPACK; | 142 | cmd = REISERFS_IOC_UNPACK; |
@@ -160,10 +164,10 @@ long reiserfs_compat_ioctl(struct file *file, unsigned int cmd, | |||
160 | int reiserfs_commit_write(struct file *f, struct page *page, | 164 | int reiserfs_commit_write(struct file *f, struct page *page, |
161 | unsigned from, unsigned to); | 165 | unsigned from, unsigned to); |
162 | /* | 166 | /* |
163 | ** reiserfs_unpack | 167 | * reiserfs_unpack |
164 | ** Function try to convert tail from direct item into indirect. | 168 | * Function try to convert tail from direct item into indirect. |
165 | ** It set up nopack attribute in the REISERFS_I(inode)->nopack | 169 | * It set up nopack attribute in the REISERFS_I(inode)->nopack |
166 | */ | 170 | */ |
167 | int reiserfs_unpack(struct inode *inode, struct file *filp) | 171 | int reiserfs_unpack(struct inode *inode, struct file *filp) |
168 | { | 172 | { |
169 | int retval = 0; | 173 | int retval = 0; |
@@ -194,9 +198,10 @@ int reiserfs_unpack(struct inode *inode, struct file *filp) | |||
194 | goto out; | 198 | goto out; |
195 | } | 199 | } |
196 | 200 | ||
197 | /* we unpack by finding the page with the tail, and calling | 201 | /* |
198 | ** __reiserfs_write_begin on that page. This will force a | 202 | * we unpack by finding the page with the tail, and calling |
199 | ** reiserfs_get_block to unpack the tail for us. | 203 | * __reiserfs_write_begin on that page. This will force a |
204 | * reiserfs_get_block to unpack the tail for us. | ||
200 | */ | 205 | */ |
201 | index = inode->i_size >> PAGE_CACHE_SHIFT; | 206 | index = inode->i_size >> PAGE_CACHE_SHIFT; |
202 | mapping = inode->i_mapping; | 207 | mapping = inode->i_mapping; |
diff --git a/fs/reiserfs/item_ops.c b/fs/reiserfs/item_ops.c index c9f136527386..cb6b826ca5e9 100644 --- a/fs/reiserfs/item_ops.c +++ b/fs/reiserfs/item_ops.c | |||
@@ -5,15 +5,17 @@ | |||
5 | #include <linux/time.h> | 5 | #include <linux/time.h> |
6 | #include "reiserfs.h" | 6 | #include "reiserfs.h" |
7 | 7 | ||
8 | // this contains item handlers for old item types: sd, direct, | 8 | /* |
9 | // indirect, directory | 9 | * this contains item handlers for old item types: sd, direct, |
10 | * indirect, directory | ||
11 | */ | ||
10 | 12 | ||
11 | /* and where are the comments? how about saying where we can find an | 13 | /* |
12 | explanation of each item handler method? -Hans */ | 14 | * and where are the comments? how about saying where we can find an |
15 | * explanation of each item handler method? -Hans | ||
16 | */ | ||
13 | 17 | ||
14 | ////////////////////////////////////////////////////////////////////////////// | 18 | /* stat data functions */ |
15 | // stat data functions | ||
16 | // | ||
17 | static int sd_bytes_number(struct item_head *ih, int block_size) | 19 | static int sd_bytes_number(struct item_head *ih, int block_size) |
18 | { | 20 | { |
19 | return 0; | 21 | return 0; |
@@ -60,7 +62,7 @@ static void sd_print_item(struct item_head *ih, char *item) | |||
60 | 62 | ||
61 | static void sd_check_item(struct item_head *ih, char *item) | 63 | static void sd_check_item(struct item_head *ih, char *item) |
62 | { | 64 | { |
63 | // FIXME: type something here! | 65 | /* unused */ |
64 | } | 66 | } |
65 | 67 | ||
66 | static int sd_create_vi(struct virtual_node *vn, | 68 | static int sd_create_vi(struct virtual_node *vn, |
@@ -68,7 +70,6 @@ static int sd_create_vi(struct virtual_node *vn, | |||
68 | int is_affected, int insert_size) | 70 | int is_affected, int insert_size) |
69 | { | 71 | { |
70 | vi->vi_index = TYPE_STAT_DATA; | 72 | vi->vi_index = TYPE_STAT_DATA; |
71 | //vi->vi_type |= VI_TYPE_STAT_DATA;// not needed? | ||
72 | return 0; | 73 | return 0; |
73 | } | 74 | } |
74 | 75 | ||
@@ -117,15 +118,13 @@ static struct item_operations stat_data_ops = { | |||
117 | .print_vi = sd_print_vi | 118 | .print_vi = sd_print_vi |
118 | }; | 119 | }; |
119 | 120 | ||
120 | ////////////////////////////////////////////////////////////////////////////// | 121 | /* direct item functions */ |
121 | // direct item functions | ||
122 | // | ||
123 | static int direct_bytes_number(struct item_head *ih, int block_size) | 122 | static int direct_bytes_number(struct item_head *ih, int block_size) |
124 | { | 123 | { |
125 | return ih_item_len(ih); | 124 | return ih_item_len(ih); |
126 | } | 125 | } |
127 | 126 | ||
128 | // FIXME: this should probably switch to indirect as well | 127 | /* FIXME: this should probably switch to indirect as well */ |
129 | static void direct_decrement_key(struct cpu_key *key) | 128 | static void direct_decrement_key(struct cpu_key *key) |
130 | { | 129 | { |
131 | cpu_key_k_offset_dec(key); | 130 | cpu_key_k_offset_dec(key); |
@@ -144,7 +143,7 @@ static void direct_print_item(struct item_head *ih, char *item) | |||
144 | { | 143 | { |
145 | int j = 0; | 144 | int j = 0; |
146 | 145 | ||
147 | // return; | 146 | /* return; */ |
148 | printk("\""); | 147 | printk("\""); |
149 | while (j < ih_item_len(ih)) | 148 | while (j < ih_item_len(ih)) |
150 | printk("%c", item[j++]); | 149 | printk("%c", item[j++]); |
@@ -153,7 +152,7 @@ static void direct_print_item(struct item_head *ih, char *item) | |||
153 | 152 | ||
154 | static void direct_check_item(struct item_head *ih, char *item) | 153 | static void direct_check_item(struct item_head *ih, char *item) |
155 | { | 154 | { |
156 | // FIXME: type something here! | 155 | /* unused */ |
157 | } | 156 | } |
158 | 157 | ||
159 | static int direct_create_vi(struct virtual_node *vn, | 158 | static int direct_create_vi(struct virtual_node *vn, |
@@ -161,7 +160,6 @@ static int direct_create_vi(struct virtual_node *vn, | |||
161 | int is_affected, int insert_size) | 160 | int is_affected, int insert_size) |
162 | { | 161 | { |
163 | vi->vi_index = TYPE_DIRECT; | 162 | vi->vi_index = TYPE_DIRECT; |
164 | //vi->vi_type |= VI_TYPE_DIRECT; | ||
165 | return 0; | 163 | return 0; |
166 | } | 164 | } |
167 | 165 | ||
@@ -211,16 +209,13 @@ static struct item_operations direct_ops = { | |||
211 | .print_vi = direct_print_vi | 209 | .print_vi = direct_print_vi |
212 | }; | 210 | }; |
213 | 211 | ||
214 | ////////////////////////////////////////////////////////////////////////////// | 212 | /* indirect item functions */ |
215 | // indirect item functions | ||
216 | // | ||
217 | |||
218 | static int indirect_bytes_number(struct item_head *ih, int block_size) | 213 | static int indirect_bytes_number(struct item_head *ih, int block_size) |
219 | { | 214 | { |
220 | return ih_item_len(ih) / UNFM_P_SIZE * block_size; //- get_ih_free_space (ih); | 215 | return ih_item_len(ih) / UNFM_P_SIZE * block_size; |
221 | } | 216 | } |
222 | 217 | ||
223 | // decrease offset, if it becomes 0, change type to stat data | 218 | /* decrease offset, if it becomes 0, change type to stat data */ |
224 | static void indirect_decrement_key(struct cpu_key *key) | 219 | static void indirect_decrement_key(struct cpu_key *key) |
225 | { | 220 | { |
226 | cpu_key_k_offset_dec(key); | 221 | cpu_key_k_offset_dec(key); |
@@ -228,7 +223,7 @@ static void indirect_decrement_key(struct cpu_key *key) | |||
228 | set_cpu_key_k_type(key, TYPE_STAT_DATA); | 223 | set_cpu_key_k_type(key, TYPE_STAT_DATA); |
229 | } | 224 | } |
230 | 225 | ||
231 | // if it is not first item of the body, then it is mergeable | 226 | /* if it is not first item of the body, then it is mergeable */ |
232 | static int indirect_is_left_mergeable(struct reiserfs_key *key, | 227 | static int indirect_is_left_mergeable(struct reiserfs_key *key, |
233 | unsigned long bsize) | 228 | unsigned long bsize) |
234 | { | 229 | { |
@@ -236,7 +231,7 @@ static int indirect_is_left_mergeable(struct reiserfs_key *key, | |||
236 | return (le_key_k_offset(version, key) != 1); | 231 | return (le_key_k_offset(version, key) != 1); |
237 | } | 232 | } |
238 | 233 | ||
239 | // printing of indirect item | 234 | /* printing of indirect item */ |
240 | static void start_new_sequence(__u32 * start, int *len, __u32 new) | 235 | static void start_new_sequence(__u32 * start, int *len, __u32 new) |
241 | { | 236 | { |
242 | *start = new; | 237 | *start = new; |
@@ -295,7 +290,7 @@ static void indirect_print_item(struct item_head *ih, char *item) | |||
295 | 290 | ||
296 | static void indirect_check_item(struct item_head *ih, char *item) | 291 | static void indirect_check_item(struct item_head *ih, char *item) |
297 | { | 292 | { |
298 | // FIXME: type something here! | 293 | /* unused */ |
299 | } | 294 | } |
300 | 295 | ||
301 | static int indirect_create_vi(struct virtual_node *vn, | 296 | static int indirect_create_vi(struct virtual_node *vn, |
@@ -303,7 +298,6 @@ static int indirect_create_vi(struct virtual_node *vn, | |||
303 | int is_affected, int insert_size) | 298 | int is_affected, int insert_size) |
304 | { | 299 | { |
305 | vi->vi_index = TYPE_INDIRECT; | 300 | vi->vi_index = TYPE_INDIRECT; |
306 | //vi->vi_type |= VI_TYPE_INDIRECT; | ||
307 | return 0; | 301 | return 0; |
308 | } | 302 | } |
309 | 303 | ||
@@ -321,16 +315,19 @@ static int indirect_check_right(struct virtual_item *vi, int free) | |||
321 | return indirect_check_left(vi, free, 0, 0); | 315 | return indirect_check_left(vi, free, 0, 0); |
322 | } | 316 | } |
323 | 317 | ||
324 | // return size in bytes of 'units' units. If first == 0 - calculate from the head (left), otherwise - from tail (right) | 318 | /* |
319 | * return size in bytes of 'units' units. If first == 0 - calculate | ||
320 | * from the head (left), otherwise - from tail (right) | ||
321 | */ | ||
325 | static int indirect_part_size(struct virtual_item *vi, int first, int units) | 322 | static int indirect_part_size(struct virtual_item *vi, int first, int units) |
326 | { | 323 | { |
327 | // unit of indirect item is byte (yet) | 324 | /* unit of indirect item is byte (yet) */ |
328 | return units; | 325 | return units; |
329 | } | 326 | } |
330 | 327 | ||
331 | static int indirect_unit_num(struct virtual_item *vi) | 328 | static int indirect_unit_num(struct virtual_item *vi) |
332 | { | 329 | { |
333 | // unit of indirect item is byte (yet) | 330 | /* unit of indirect item is byte (yet) */ |
334 | return vi->vi_item_len - IH_SIZE; | 331 | return vi->vi_item_len - IH_SIZE; |
335 | } | 332 | } |
336 | 333 | ||
@@ -356,10 +353,7 @@ static struct item_operations indirect_ops = { | |||
356 | .print_vi = indirect_print_vi | 353 | .print_vi = indirect_print_vi |
357 | }; | 354 | }; |
358 | 355 | ||
359 | ////////////////////////////////////////////////////////////////////////////// | 356 | /* direntry functions */ |
360 | // direntry functions | ||
361 | // | ||
362 | |||
363 | static int direntry_bytes_number(struct item_head *ih, int block_size) | 357 | static int direntry_bytes_number(struct item_head *ih, int block_size) |
364 | { | 358 | { |
365 | reiserfs_warning(NULL, "vs-16090", | 359 | reiserfs_warning(NULL, "vs-16090", |
@@ -428,7 +422,7 @@ static void direntry_check_item(struct item_head *ih, char *item) | |||
428 | int i; | 422 | int i; |
429 | struct reiserfs_de_head *deh; | 423 | struct reiserfs_de_head *deh; |
430 | 424 | ||
431 | // FIXME: type something here! | 425 | /* unused */ |
432 | deh = (struct reiserfs_de_head *)item; | 426 | deh = (struct reiserfs_de_head *)item; |
433 | for (i = 0; i < ih_entry_count(ih); i++, deh++) { | 427 | for (i = 0; i < ih_entry_count(ih); i++, deh++) { |
434 | ; | 428 | ; |
@@ -439,7 +433,8 @@ static void direntry_check_item(struct item_head *ih, char *item) | |||
439 | 433 | ||
440 | /* | 434 | /* |
441 | * function returns old entry number in directory item in real node | 435 | * function returns old entry number in directory item in real node |
442 | * using new entry number in virtual item in virtual node */ | 436 | * using new entry number in virtual item in virtual node |
437 | */ | ||
443 | static inline int old_entry_num(int is_affected, int virtual_entry_num, | 438 | static inline int old_entry_num(int is_affected, int virtual_entry_num, |
444 | int pos_in_item, int mode) | 439 | int pos_in_item, int mode) |
445 | { | 440 | { |
@@ -463,9 +458,11 @@ static inline int old_entry_num(int is_affected, int virtual_entry_num, | |||
463 | return virtual_entry_num - 1; | 458 | return virtual_entry_num - 1; |
464 | } | 459 | } |
465 | 460 | ||
466 | /* Create an array of sizes of directory entries for virtual | 461 | /* |
467 | item. Return space used by an item. FIXME: no control over | 462 | * Create an array of sizes of directory entries for virtual |
468 | consuming of space used by this item handler */ | 463 | * item. Return space used by an item. FIXME: no control over |
464 | * consuming of space used by this item handler | ||
465 | */ | ||
469 | static int direntry_create_vi(struct virtual_node *vn, | 466 | static int direntry_create_vi(struct virtual_node *vn, |
470 | struct virtual_item *vi, | 467 | struct virtual_item *vi, |
471 | int is_affected, int insert_size) | 468 | int is_affected, int insert_size) |
@@ -529,10 +526,10 @@ static int direntry_create_vi(struct virtual_node *vn, | |||
529 | 526 | ||
530 | } | 527 | } |
531 | 528 | ||
532 | // | 529 | /* |
533 | // return number of entries which may fit into specified amount of | 530 | * return number of entries which may fit into specified amount of |
534 | // free space, or -1 if free space is not enough even for 1 entry | 531 | * free space, or -1 if free space is not enough even for 1 entry |
535 | // | 532 | */ |
536 | static int direntry_check_left(struct virtual_item *vi, int free, | 533 | static int direntry_check_left(struct virtual_item *vi, int free, |
537 | int start_skip, int end_skip) | 534 | int start_skip, int end_skip) |
538 | { | 535 | { |
@@ -541,8 +538,8 @@ static int direntry_check_left(struct virtual_item *vi, int free, | |||
541 | struct direntry_uarea *dir_u = vi->vi_uarea; | 538 | struct direntry_uarea *dir_u = vi->vi_uarea; |
542 | 539 | ||
543 | for (i = start_skip; i < dir_u->entry_count - end_skip; i++) { | 540 | for (i = start_skip; i < dir_u->entry_count - end_skip; i++) { |
541 | /* i-th entry doesn't fit into the remaining free space */ | ||
544 | if (dir_u->entry_sizes[i] > free) | 542 | if (dir_u->entry_sizes[i] > free) |
545 | /* i-th entry doesn't fit into the remaining free space */ | ||
546 | break; | 543 | break; |
547 | 544 | ||
548 | free -= dir_u->entry_sizes[i]; | 545 | free -= dir_u->entry_sizes[i]; |
@@ -570,8 +567,8 @@ static int direntry_check_right(struct virtual_item *vi, int free) | |||
570 | struct direntry_uarea *dir_u = vi->vi_uarea; | 567 | struct direntry_uarea *dir_u = vi->vi_uarea; |
571 | 568 | ||
572 | for (i = dir_u->entry_count - 1; i >= 0; i--) { | 569 | for (i = dir_u->entry_count - 1; i >= 0; i--) { |
570 | /* i-th entry doesn't fit into the remaining free space */ | ||
573 | if (dir_u->entry_sizes[i] > free) | 571 | if (dir_u->entry_sizes[i] > free) |
574 | /* i-th entry doesn't fit into the remaining free space */ | ||
575 | break; | 572 | break; |
576 | 573 | ||
577 | free -= dir_u->entry_sizes[i]; | 574 | free -= dir_u->entry_sizes[i]; |
@@ -643,9 +640,7 @@ static struct item_operations direntry_ops = { | |||
643 | .print_vi = direntry_print_vi | 640 | .print_vi = direntry_print_vi |
644 | }; | 641 | }; |
645 | 642 | ||
646 | ////////////////////////////////////////////////////////////////////////////// | 643 | /* Error catching functions to catch errors caused by incorrect item types. */ |
647 | // Error catching functions to catch errors caused by incorrect item types. | ||
648 | // | ||
649 | static int errcatch_bytes_number(struct item_head *ih, int block_size) | 644 | static int errcatch_bytes_number(struct item_head *ih, int block_size) |
650 | { | 645 | { |
651 | reiserfs_warning(NULL, "green-16001", | 646 | reiserfs_warning(NULL, "green-16001", |
@@ -685,8 +680,12 @@ static int errcatch_create_vi(struct virtual_node *vn, | |||
685 | { | 680 | { |
686 | reiserfs_warning(NULL, "green-16006", | 681 | reiserfs_warning(NULL, "green-16006", |
687 | "Invalid item type observed, run fsck ASAP"); | 682 | "Invalid item type observed, run fsck ASAP"); |
688 | return 0; // We might return -1 here as well, but it won't help as create_virtual_node() from where | 683 | /* |
689 | // this operation is called from is of return type void. | 684 | * We might return -1 here as well, but it won't help as |
685 | * create_virtual_node() from where this operation is called | ||
686 | * from is of return type void. | ||
687 | */ | ||
688 | return 0; | ||
690 | } | 689 | } |
691 | 690 | ||
692 | static int errcatch_check_left(struct virtual_item *vi, int free, | 691 | static int errcatch_check_left(struct virtual_item *vi, int free, |
@@ -739,9 +738,6 @@ static struct item_operations errcatch_ops = { | |||
739 | errcatch_print_vi | 738 | errcatch_print_vi |
740 | }; | 739 | }; |
741 | 740 | ||
742 | ////////////////////////////////////////////////////////////////////////////// | ||
743 | // | ||
744 | // | ||
745 | #if ! (TYPE_STAT_DATA == 0 && TYPE_INDIRECT == 1 && TYPE_DIRECT == 2 && TYPE_DIRENTRY == 3) | 741 | #if ! (TYPE_STAT_DATA == 0 && TYPE_INDIRECT == 1 && TYPE_DIRECT == 2 && TYPE_DIRENTRY == 3) |
746 | #error Item types must use disk-format assigned values. | 742 | #error Item types must use disk-format assigned values. |
747 | #endif | 743 | #endif |
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c index 225921126455..48f03e5d16ef 100644 --- a/fs/reiserfs/journal.c +++ b/fs/reiserfs/journal.c | |||
@@ -1,38 +1,38 @@ | |||
1 | /* | 1 | /* |
2 | ** Write ahead logging implementation copyright Chris Mason 2000 | 2 | * Write ahead logging implementation copyright Chris Mason 2000 |
3 | ** | 3 | * |
4 | ** The background commits make this code very interrelated, and | 4 | * The background commits make this code very interrelated, and |
5 | ** overly complex. I need to rethink things a bit....The major players: | 5 | * overly complex. I need to rethink things a bit....The major players: |
6 | ** | 6 | * |
7 | ** journal_begin -- call with the number of blocks you expect to log. | 7 | * journal_begin -- call with the number of blocks you expect to log. |
8 | ** If the current transaction is too | 8 | * If the current transaction is too |
9 | ** old, it will block until the current transaction is | 9 | * old, it will block until the current transaction is |
10 | ** finished, and then start a new one. | 10 | * finished, and then start a new one. |
11 | ** Usually, your transaction will get joined in with | 11 | * Usually, your transaction will get joined in with |
12 | ** previous ones for speed. | 12 | * previous ones for speed. |
13 | ** | 13 | * |
14 | ** journal_join -- same as journal_begin, but won't block on the current | 14 | * journal_join -- same as journal_begin, but won't block on the current |
15 | ** transaction regardless of age. Don't ever call | 15 | * transaction regardless of age. Don't ever call |
16 | ** this. Ever. There are only two places it should be | 16 | * this. Ever. There are only two places it should be |
17 | ** called from, and they are both inside this file. | 17 | * called from, and they are both inside this file. |
18 | ** | 18 | * |
19 | ** journal_mark_dirty -- adds blocks into this transaction. clears any flags | 19 | * journal_mark_dirty -- adds blocks into this transaction. clears any flags |
20 | ** that might make them get sent to disk | 20 | * that might make them get sent to disk |
21 | ** and then marks them BH_JDirty. Puts the buffer head | 21 | * and then marks them BH_JDirty. Puts the buffer head |
22 | ** into the current transaction hash. | 22 | * into the current transaction hash. |
23 | ** | 23 | * |
24 | ** journal_end -- if the current transaction is batchable, it does nothing | 24 | * journal_end -- if the current transaction is batchable, it does nothing |
25 | ** otherwise, it could do an async/synchronous commit, or | 25 | * otherwise, it could do an async/synchronous commit, or |
26 | ** a full flush of all log and real blocks in the | 26 | * a full flush of all log and real blocks in the |
27 | ** transaction. | 27 | * transaction. |
28 | ** | 28 | * |
29 | ** flush_old_commits -- if the current transaction is too old, it is ended and | 29 | * flush_old_commits -- if the current transaction is too old, it is ended and |
30 | ** commit blocks are sent to disk. Forces commit blocks | 30 | * commit blocks are sent to disk. Forces commit blocks |
31 | ** to disk for all backgrounded commits that have been | 31 | * to disk for all backgrounded commits that have been |
32 | ** around too long. | 32 | * around too long. |
33 | ** -- Note, if you call this as an immediate flush from | 33 | * -- Note, if you call this as an immediate flush from |
34 | ** from within kupdate, it will ignore the immediate flag | 34 | * from within kupdate, it will ignore the immediate flag |
35 | */ | 35 | */ |
36 | 36 | ||
37 | #include <linux/time.h> | 37 | #include <linux/time.h> |
38 | #include <linux/semaphore.h> | 38 | #include <linux/semaphore.h> |
@@ -58,16 +58,19 @@ | |||
58 | #define JOURNAL_WORK_ENTRY(h) (list_entry((h), struct reiserfs_journal_list, \ | 58 | #define JOURNAL_WORK_ENTRY(h) (list_entry((h), struct reiserfs_journal_list, \ |
59 | j_working_list)) | 59 | j_working_list)) |
60 | 60 | ||
61 | #define JOURNAL_TRANS_HALF 1018 /* must be correct to keep the desc and commit | 61 | /* must be correct to keep the desc and commit structs at 4k */ |
62 | structs at 4k */ | 62 | #define JOURNAL_TRANS_HALF 1018 |
63 | #define BUFNR 64 /*read ahead */ | 63 | #define BUFNR 64 /*read ahead */ |
64 | 64 | ||
65 | /* cnode stat bits. Move these into reiserfs_fs.h */ | 65 | /* cnode stat bits. Move these into reiserfs_fs.h */ |
66 | 66 | ||
67 | #define BLOCK_FREED 2 /* this block was freed, and can't be written. */ | 67 | /* this block was freed, and can't be written. */ |
68 | #define BLOCK_FREED_HOLDER 3 /* this block was freed during this transaction, and can't be written */ | 68 | #define BLOCK_FREED 2 |
69 | /* this block was freed during this transaction, and can't be written */ | ||
70 | #define BLOCK_FREED_HOLDER 3 | ||
69 | 71 | ||
70 | #define BLOCK_NEEDS_FLUSH 4 /* used in flush_journal_list */ | 72 | /* used in flush_journal_list */ |
73 | #define BLOCK_NEEDS_FLUSH 4 | ||
71 | #define BLOCK_DIRTIED 5 | 74 | #define BLOCK_DIRTIED 5 |
72 | 75 | ||
73 | /* journal list state bits */ | 76 | /* journal list state bits */ |
@@ -100,8 +103,10 @@ static void queue_log_writer(struct super_block *s); | |||
100 | /* values for join in do_journal_begin_r */ | 103 | /* values for join in do_journal_begin_r */ |
101 | enum { | 104 | enum { |
102 | JBEGIN_REG = 0, /* regular journal begin */ | 105 | JBEGIN_REG = 0, /* regular journal begin */ |
103 | JBEGIN_JOIN = 1, /* join the running transaction if at all possible */ | 106 | /* join the running transaction if at all possible */ |
104 | JBEGIN_ABORT = 2, /* called from cleanup code, ignores aborted flag */ | 107 | JBEGIN_JOIN = 1, |
108 | /* called from cleanup code, ignores aborted flag */ | ||
109 | JBEGIN_ABORT = 2, | ||
105 | }; | 110 | }; |
106 | 111 | ||
107 | static int do_journal_begin_r(struct reiserfs_transaction_handle *th, | 112 | static int do_journal_begin_r(struct reiserfs_transaction_handle *th, |
@@ -116,10 +121,11 @@ static void init_journal_hash(struct super_block *sb) | |||
116 | } | 121 | } |
117 | 122 | ||
118 | /* | 123 | /* |
119 | ** clears BH_Dirty and sticks the buffer on the clean list. Called because I can't allow refile_buffer to | 124 | * clears BH_Dirty and sticks the buffer on the clean list. Called because |
120 | ** make schedule happen after I've freed a block. Look at remove_from_transaction and journal_mark_freed for | 125 | * I can't allow refile_buffer to make schedule happen after I've freed a |
121 | ** more details. | 126 | * block. Look at remove_from_transaction and journal_mark_freed for |
122 | */ | 127 | * more details. |
128 | */ | ||
123 | static int reiserfs_clean_and_file_buffer(struct buffer_head *bh) | 129 | static int reiserfs_clean_and_file_buffer(struct buffer_head *bh) |
124 | { | 130 | { |
125 | if (bh) { | 131 | if (bh) { |
@@ -197,7 +203,8 @@ static void allocate_bitmap_nodes(struct super_block *sb) | |||
197 | list_add(&bn->list, &journal->j_bitmap_nodes); | 203 | list_add(&bn->list, &journal->j_bitmap_nodes); |
198 | journal->j_free_bitmap_nodes++; | 204 | journal->j_free_bitmap_nodes++; |
199 | } else { | 205 | } else { |
200 | break; /* this is ok, we'll try again when more are needed */ | 206 | /* this is ok, we'll try again when more are needed */ |
207 | break; | ||
201 | } | 208 | } |
202 | } | 209 | } |
203 | } | 210 | } |
@@ -232,8 +239,8 @@ static void cleanup_bitmap_list(struct super_block *sb, | |||
232 | } | 239 | } |
233 | 240 | ||
234 | /* | 241 | /* |
235 | ** only call this on FS unmount. | 242 | * only call this on FS unmount. |
236 | */ | 243 | */ |
237 | static int free_list_bitmaps(struct super_block *sb, | 244 | static int free_list_bitmaps(struct super_block *sb, |
238 | struct reiserfs_list_bitmap *jb_array) | 245 | struct reiserfs_list_bitmap *jb_array) |
239 | { | 246 | { |
@@ -268,9 +275,9 @@ static int free_bitmap_nodes(struct super_block *sb) | |||
268 | } | 275 | } |
269 | 276 | ||
270 | /* | 277 | /* |
271 | ** get memory for JOURNAL_NUM_BITMAPS worth of bitmaps. | 278 | * get memory for JOURNAL_NUM_BITMAPS worth of bitmaps. |
272 | ** jb_array is the array to be filled in. | 279 | * jb_array is the array to be filled in. |
273 | */ | 280 | */ |
274 | int reiserfs_allocate_list_bitmaps(struct super_block *sb, | 281 | int reiserfs_allocate_list_bitmaps(struct super_block *sb, |
275 | struct reiserfs_list_bitmap *jb_array, | 282 | struct reiserfs_list_bitmap *jb_array, |
276 | unsigned int bmap_nr) | 283 | unsigned int bmap_nr) |
@@ -299,9 +306,9 @@ int reiserfs_allocate_list_bitmaps(struct super_block *sb, | |||
299 | } | 306 | } |
300 | 307 | ||
301 | /* | 308 | /* |
302 | ** find an available list bitmap. If you can't find one, flush a commit list | 309 | * find an available list bitmap. If you can't find one, flush a commit list |
303 | ** and try again | 310 | * and try again |
304 | */ | 311 | */ |
305 | static struct reiserfs_list_bitmap *get_list_bitmap(struct super_block *sb, | 312 | static struct reiserfs_list_bitmap *get_list_bitmap(struct super_block *sb, |
306 | struct reiserfs_journal_list | 313 | struct reiserfs_journal_list |
307 | *jl) | 314 | *jl) |
@@ -325,18 +332,18 @@ static struct reiserfs_list_bitmap *get_list_bitmap(struct super_block *sb, | |||
325 | break; | 332 | break; |
326 | } | 333 | } |
327 | } | 334 | } |
328 | if (jb->journal_list) { /* double check to make sure if flushed correctly */ | 335 | /* double check to make sure if flushed correctly */ |
336 | if (jb->journal_list) | ||
329 | return NULL; | 337 | return NULL; |
330 | } | ||
331 | jb->journal_list = jl; | 338 | jb->journal_list = jl; |
332 | return jb; | 339 | return jb; |
333 | } | 340 | } |
334 | 341 | ||
335 | /* | 342 | /* |
336 | ** allocates a new chunk of X nodes, and links them all together as a list. | 343 | * allocates a new chunk of X nodes, and links them all together as a list. |
337 | ** Uses the cnode->next and cnode->prev pointers | 344 | * Uses the cnode->next and cnode->prev pointers |
338 | ** returns NULL on failure | 345 | * returns NULL on failure |
339 | */ | 346 | */ |
340 | static struct reiserfs_journal_cnode *allocate_cnodes(int num_cnodes) | 347 | static struct reiserfs_journal_cnode *allocate_cnodes(int num_cnodes) |
341 | { | 348 | { |
342 | struct reiserfs_journal_cnode *head; | 349 | struct reiserfs_journal_cnode *head; |
@@ -358,9 +365,7 @@ static struct reiserfs_journal_cnode *allocate_cnodes(int num_cnodes) | |||
358 | return head; | 365 | return head; |
359 | } | 366 | } |
360 | 367 | ||
361 | /* | 368 | /* pulls a cnode off the free list, or returns NULL on failure */ |
362 | ** pulls a cnode off the free list, or returns NULL on failure | ||
363 | */ | ||
364 | static struct reiserfs_journal_cnode *get_cnode(struct super_block *sb) | 369 | static struct reiserfs_journal_cnode *get_cnode(struct super_block *sb) |
365 | { | 370 | { |
366 | struct reiserfs_journal_cnode *cn; | 371 | struct reiserfs_journal_cnode *cn; |
@@ -386,8 +391,8 @@ static struct reiserfs_journal_cnode *get_cnode(struct super_block *sb) | |||
386 | } | 391 | } |
387 | 392 | ||
388 | /* | 393 | /* |
389 | ** returns a cnode to the free list | 394 | * returns a cnode to the free list |
390 | */ | 395 | */ |
391 | static void free_cnode(struct super_block *sb, | 396 | static void free_cnode(struct super_block *sb, |
392 | struct reiserfs_journal_cnode *cn) | 397 | struct reiserfs_journal_cnode *cn) |
393 | { | 398 | { |
@@ -412,7 +417,10 @@ static void clear_prepared_bits(struct buffer_head *bh) | |||
412 | clear_buffer_journal_restore_dirty(bh); | 417 | clear_buffer_journal_restore_dirty(bh); |
413 | } | 418 | } |
414 | 419 | ||
415 | /* return a cnode with same dev, block number and size in table, or null if not found */ | 420 | /* |
421 | * return a cnode with same dev, block number and size in table, | ||
422 | * or null if not found | ||
423 | */ | ||
416 | static inline struct reiserfs_journal_cnode *get_journal_hash_dev(struct | 424 | static inline struct reiserfs_journal_cnode *get_journal_hash_dev(struct |
417 | super_block | 425 | super_block |
418 | *sb, | 426 | *sb, |
@@ -432,23 +440,24 @@ static inline struct reiserfs_journal_cnode *get_journal_hash_dev(struct | |||
432 | } | 440 | } |
433 | 441 | ||
434 | /* | 442 | /* |
435 | ** this actually means 'can this block be reallocated yet?'. If you set search_all, a block can only be allocated | 443 | * this actually means 'can this block be reallocated yet?'. If you set |
436 | ** if it is not in the current transaction, was not freed by the current transaction, and has no chance of ever | 444 | * search_all, a block can only be allocated if it is not in the current |
437 | ** being overwritten by a replay after crashing. | 445 | * transaction, was not freed by the current transaction, and has no chance |
438 | ** | 446 | * of ever being overwritten by a replay after crashing. |
439 | ** If you don't set search_all, a block can only be allocated if it is not in the current transaction. Since deleting | 447 | * |
440 | ** a block removes it from the current transaction, this case should never happen. If you don't set search_all, make | 448 | * If you don't set search_all, a block can only be allocated if it is not |
441 | ** sure you never write the block without logging it. | 449 | * in the current transaction. Since deleting a block removes it from the |
442 | ** | 450 | * current transaction, this case should never happen. If you don't set |
443 | ** next_zero_bit is a suggestion about the next block to try for find_forward. | 451 | * search_all, make sure you never write the block without logging it. |
444 | ** when bl is rejected because it is set in a journal list bitmap, we search | 452 | * |
445 | ** for the next zero bit in the bitmap that rejected bl. Then, we return that | 453 | * next_zero_bit is a suggestion about the next block to try for find_forward. |
446 | ** through next_zero_bit for find_forward to try. | 454 | * when bl is rejected because it is set in a journal list bitmap, we search |
447 | ** | 455 | * for the next zero bit in the bitmap that rejected bl. Then, we return |
448 | ** Just because we return something in next_zero_bit does not mean we won't | 456 | * that through next_zero_bit for find_forward to try. |
449 | ** reject it on the next call to reiserfs_in_journal | 457 | * |
450 | ** | 458 | * Just because we return something in next_zero_bit does not mean we won't |
451 | */ | 459 | * reject it on the next call to reiserfs_in_journal |
460 | */ | ||
452 | int reiserfs_in_journal(struct super_block *sb, | 461 | int reiserfs_in_journal(struct super_block *sb, |
453 | unsigned int bmap_nr, int bit_nr, int search_all, | 462 | unsigned int bmap_nr, int bit_nr, int search_all, |
454 | b_blocknr_t * next_zero_bit) | 463 | b_blocknr_t * next_zero_bit) |
@@ -462,9 +471,11 @@ int reiserfs_in_journal(struct super_block *sb, | |||
462 | *next_zero_bit = 0; /* always start this at zero. */ | 471 | *next_zero_bit = 0; /* always start this at zero. */ |
463 | 472 | ||
464 | PROC_INFO_INC(sb, journal.in_journal); | 473 | PROC_INFO_INC(sb, journal.in_journal); |
465 | /* If we aren't doing a search_all, this is a metablock, and it will be logged before use. | 474 | /* |
466 | ** if we crash before the transaction that freed it commits, this transaction won't | 475 | * If we aren't doing a search_all, this is a metablock, and it |
467 | ** have committed either, and the block will never be written | 476 | * will be logged before use. if we crash before the transaction |
477 | * that freed it commits, this transaction won't have committed | ||
478 | * either, and the block will never be written | ||
468 | */ | 479 | */ |
469 | if (search_all) { | 480 | if (search_all) { |
470 | for (i = 0; i < JOURNAL_NUM_BITMAPS; i++) { | 481 | for (i = 0; i < JOURNAL_NUM_BITMAPS; i++) { |
@@ -504,8 +515,7 @@ int reiserfs_in_journal(struct super_block *sb, | |||
504 | return 0; | 515 | return 0; |
505 | } | 516 | } |
506 | 517 | ||
507 | /* insert cn into table | 518 | /* insert cn into table */ |
508 | */ | ||
509 | static inline void insert_journal_hash(struct reiserfs_journal_cnode **table, | 519 | static inline void insert_journal_hash(struct reiserfs_journal_cnode **table, |
510 | struct reiserfs_journal_cnode *cn) | 520 | struct reiserfs_journal_cnode *cn) |
511 | { | 521 | { |
@@ -551,10 +561,10 @@ static inline void put_journal_list(struct super_block *s, | |||
551 | } | 561 | } |
552 | 562 | ||
553 | /* | 563 | /* |
554 | ** this used to be much more involved, and I'm keeping it just in case things get ugly again. | 564 | * this used to be much more involved, and I'm keeping it just in case |
555 | ** it gets called by flush_commit_list, and cleans up any data stored about blocks freed during a | 565 | * things get ugly again. it gets called by flush_commit_list, and |
556 | ** transaction. | 566 | * cleans up any data stored about blocks freed during a transaction. |
557 | */ | 567 | */ |
558 | static void cleanup_freed_for_journal_list(struct super_block *sb, | 568 | static void cleanup_freed_for_journal_list(struct super_block *sb, |
559 | struct reiserfs_journal_list *jl) | 569 | struct reiserfs_journal_list *jl) |
560 | { | 570 | { |
@@ -753,7 +763,8 @@ static inline int __add_jh(struct reiserfs_journal *j, struct buffer_head *bh, | |||
753 | get_bh(bh); | 763 | get_bh(bh); |
754 | jh = alloc_jh(); | 764 | jh = alloc_jh(); |
755 | spin_lock(&j->j_dirty_buffers_lock); | 765 | spin_lock(&j->j_dirty_buffers_lock); |
756 | /* buffer must be locked for __add_jh, should be able to have | 766 | /* |
767 | * buffer must be locked for __add_jh, should be able to have | ||
757 | * two adds at the same time | 768 | * two adds at the same time |
758 | */ | 769 | */ |
759 | BUG_ON(bh->b_private); | 770 | BUG_ON(bh->b_private); |
@@ -811,7 +822,8 @@ static int write_ordered_buffers(spinlock_t * lock, | |||
811 | spin_lock(lock); | 822 | spin_lock(lock); |
812 | goto loop_next; | 823 | goto loop_next; |
813 | } | 824 | } |
814 | /* in theory, dirty non-uptodate buffers should never get here, | 825 | /* |
826 | * in theory, dirty non-uptodate buffers should never get here, | ||
815 | * but the upper layer io error paths still have a few quirks. | 827 | * but the upper layer io error paths still have a few quirks. |
816 | * Handle them here as gracefully as we can | 828 | * Handle them here as gracefully as we can |
817 | */ | 829 | */ |
@@ -849,13 +861,14 @@ static int write_ordered_buffers(spinlock_t * lock, | |||
849 | if (!buffer_uptodate(bh)) { | 861 | if (!buffer_uptodate(bh)) { |
850 | ret = -EIO; | 862 | ret = -EIO; |
851 | } | 863 | } |
852 | /* ugly interaction with invalidatepage here. | 864 | /* |
853 | * reiserfs_invalidate_page will pin any buffer that has a valid | 865 | * ugly interaction with invalidatepage here. |
854 | * journal head from an older transaction. If someone else sets | 866 | * reiserfs_invalidate_page will pin any buffer that has a |
855 | * our buffer dirty after we write it in the first loop, and | 867 | * valid journal head from an older transaction. If someone |
856 | * then someone truncates the page away, nobody will ever write | 868 | * else sets our buffer dirty after we write it in the first |
857 | * the buffer. We're safe if we write the page one last time | 869 | * loop, and then someone truncates the page away, nobody |
858 | * after freeing the journal header. | 870 | * will ever write the buffer. We're safe if we write the |
871 | * page one last time after freeing the journal header. | ||
859 | */ | 872 | */ |
860 | if (buffer_dirty(bh) && unlikely(bh->b_page->mapping == NULL)) { | 873 | if (buffer_dirty(bh) && unlikely(bh->b_page->mapping == NULL)) { |
861 | spin_unlock(lock); | 874 | spin_unlock(lock); |
@@ -916,9 +929,11 @@ static int flush_older_commits(struct super_block *s, | |||
916 | if (!journal_list_still_alive(s, trans_id)) | 929 | if (!journal_list_still_alive(s, trans_id)) |
917 | return 1; | 930 | return 1; |
918 | 931 | ||
919 | /* the one we just flushed is gone, this means all | 932 | /* |
920 | * older lists are also gone, so first_jl is no longer | 933 | * the one we just flushed is gone, this means |
921 | * valid either. Go back to the beginning. | 934 | * all older lists are also gone, so first_jl |
935 | * is no longer valid either. Go back to the | ||
936 | * beginning. | ||
922 | */ | 937 | */ |
923 | if (!journal_list_still_alive | 938 | if (!journal_list_still_alive |
924 | (s, other_trans_id)) { | 939 | (s, other_trans_id)) { |
@@ -951,12 +966,12 @@ static int reiserfs_async_progress_wait(struct super_block *s) | |||
951 | } | 966 | } |
952 | 967 | ||
953 | /* | 968 | /* |
954 | ** if this journal list still has commit blocks unflushed, send them to disk. | 969 | * if this journal list still has commit blocks unflushed, send them to disk. |
955 | ** | 970 | * |
956 | ** log areas must be flushed in order (transaction 2 can't commit before transaction 1) | 971 | * log areas must be flushed in order (transaction 2 can't commit before |
957 | ** Before the commit block can by written, every other log block must be safely on disk | 972 | * transaction 1) Before the commit block can by written, every other log |
958 | ** | 973 | * block must be safely on disk |
959 | */ | 974 | */ |
960 | static int flush_commit_list(struct super_block *s, | 975 | static int flush_commit_list(struct super_block *s, |
961 | struct reiserfs_journal_list *jl, int flushall) | 976 | struct reiserfs_journal_list *jl, int flushall) |
962 | { | 977 | { |
@@ -975,8 +990,9 @@ static int flush_commit_list(struct super_block *s, | |||
975 | return 0; | 990 | return 0; |
976 | } | 991 | } |
977 | 992 | ||
978 | /* before we can put our commit blocks on disk, we have to make sure everyone older than | 993 | /* |
979 | ** us is on disk too | 994 | * before we can put our commit blocks on disk, we have to make |
995 | * sure everyone older than us is on disk too | ||
980 | */ | 996 | */ |
981 | BUG_ON(jl->j_len <= 0); | 997 | BUG_ON(jl->j_len <= 0); |
982 | BUG_ON(trans_id == journal->j_trans_id); | 998 | BUG_ON(trans_id == journal->j_trans_id); |
@@ -984,7 +1000,10 @@ static int flush_commit_list(struct super_block *s, | |||
984 | get_journal_list(jl); | 1000 | get_journal_list(jl); |
985 | if (flushall) { | 1001 | if (flushall) { |
986 | if (flush_older_commits(s, jl) == 1) { | 1002 | if (flush_older_commits(s, jl) == 1) { |
987 | /* list disappeared during flush_older_commits. return */ | 1003 | /* |
1004 | * list disappeared during flush_older_commits. | ||
1005 | * return | ||
1006 | */ | ||
988 | goto put_jl; | 1007 | goto put_jl; |
989 | } | 1008 | } |
990 | } | 1009 | } |
@@ -1056,9 +1075,10 @@ static int flush_commit_list(struct super_block *s, | |||
1056 | depth = reiserfs_write_unlock_nested(s); | 1075 | depth = reiserfs_write_unlock_nested(s); |
1057 | __wait_on_buffer(tbh); | 1076 | __wait_on_buffer(tbh); |
1058 | reiserfs_write_lock_nested(s, depth); | 1077 | reiserfs_write_lock_nested(s, depth); |
1059 | // since we're using ll_rw_blk above, it might have skipped over | 1078 | /* |
1060 | // a locked buffer. Double check here | 1079 | * since we're using ll_rw_blk above, it might have skipped |
1061 | // | 1080 | * over a locked buffer. Double check here |
1081 | */ | ||
1062 | /* redundant, sync_dirty_buffer() checks */ | 1082 | /* redundant, sync_dirty_buffer() checks */ |
1063 | if (buffer_dirty(tbh)) { | 1083 | if (buffer_dirty(tbh)) { |
1064 | depth = reiserfs_write_unlock_nested(s); | 1084 | depth = reiserfs_write_unlock_nested(s); |
@@ -1072,17 +1092,21 @@ static int flush_commit_list(struct super_block *s, | |||
1072 | #endif | 1092 | #endif |
1073 | retval = -EIO; | 1093 | retval = -EIO; |
1074 | } | 1094 | } |
1075 | put_bh(tbh); /* once for journal_find_get_block */ | 1095 | /* once for journal_find_get_block */ |
1076 | put_bh(tbh); /* once due to original getblk in do_journal_end */ | 1096 | put_bh(tbh); |
1097 | /* once due to original getblk in do_journal_end */ | ||
1098 | put_bh(tbh); | ||
1077 | atomic_dec(&(jl->j_commit_left)); | 1099 | atomic_dec(&(jl->j_commit_left)); |
1078 | } | 1100 | } |
1079 | 1101 | ||
1080 | BUG_ON(atomic_read(&(jl->j_commit_left)) != 1); | 1102 | BUG_ON(atomic_read(&(jl->j_commit_left)) != 1); |
1081 | 1103 | ||
1082 | /* If there was a write error in the journal - we can't commit | 1104 | /* |
1105 | * If there was a write error in the journal - we can't commit | ||
1083 | * this transaction - it will be invalid and, if successful, | 1106 | * this transaction - it will be invalid and, if successful, |
1084 | * will just end up propagating the write error out to | 1107 | * will just end up propagating the write error out to |
1085 | * the file system. */ | 1108 | * the file system. |
1109 | */ | ||
1086 | if (likely(!retval && !reiserfs_is_journal_aborted (journal))) { | 1110 | if (likely(!retval && !reiserfs_is_journal_aborted (journal))) { |
1087 | if (buffer_dirty(jl->j_commit_bh)) | 1111 | if (buffer_dirty(jl->j_commit_bh)) |
1088 | BUG(); | 1112 | BUG(); |
@@ -1095,9 +1119,11 @@ static int flush_commit_list(struct super_block *s, | |||
1095 | reiserfs_write_lock_nested(s, depth); | 1119 | reiserfs_write_lock_nested(s, depth); |
1096 | } | 1120 | } |
1097 | 1121 | ||
1098 | /* If there was a write error in the journal - we can't commit this | 1122 | /* |
1123 | * If there was a write error in the journal - we can't commit this | ||
1099 | * transaction - it will be invalid and, if successful, will just end | 1124 | * transaction - it will be invalid and, if successful, will just end |
1100 | * up propagating the write error out to the filesystem. */ | 1125 | * up propagating the write error out to the filesystem. |
1126 | */ | ||
1101 | if (unlikely(!buffer_uptodate(jl->j_commit_bh))) { | 1127 | if (unlikely(!buffer_uptodate(jl->j_commit_bh))) { |
1102 | #ifdef CONFIG_REISERFS_CHECK | 1128 | #ifdef CONFIG_REISERFS_CHECK |
1103 | reiserfs_warning(s, "journal-615", "buffer write failed"); | 1129 | reiserfs_warning(s, "journal-615", "buffer write failed"); |
@@ -1112,7 +1138,10 @@ static int flush_commit_list(struct super_block *s, | |||
1112 | } | 1138 | } |
1113 | journal->j_last_commit_id = jl->j_trans_id; | 1139 | journal->j_last_commit_id = jl->j_trans_id; |
1114 | 1140 | ||
1115 | /* now, every commit block is on the disk. It is safe to allow blocks freed during this transaction to be reallocated */ | 1141 | /* |
1142 | * now, every commit block is on the disk. It is safe to allow | ||
1143 | * blocks freed during this transaction to be reallocated | ||
1144 | */ | ||
1116 | cleanup_freed_for_journal_list(s, jl); | 1145 | cleanup_freed_for_journal_list(s, jl); |
1117 | 1146 | ||
1118 | retval = retval ? retval : journal->j_errno; | 1147 | retval = retval ? retval : journal->j_errno; |
@@ -1136,9 +1165,9 @@ static int flush_commit_list(struct super_block *s, | |||
1136 | } | 1165 | } |
1137 | 1166 | ||
1138 | /* | 1167 | /* |
1139 | ** flush_journal_list frequently needs to find a newer transaction for a given block. This does that, or | 1168 | * flush_journal_list frequently needs to find a newer transaction for a |
1140 | ** returns NULL if it can't find anything | 1169 | * given block. This does that, or returns NULL if it can't find anything |
1141 | */ | 1170 | */ |
1142 | static struct reiserfs_journal_list *find_newer_jl_for_cn(struct | 1171 | static struct reiserfs_journal_list *find_newer_jl_for_cn(struct |
1143 | reiserfs_journal_cnode | 1172 | reiserfs_journal_cnode |
1144 | *cn) | 1173 | *cn) |
@@ -1162,10 +1191,11 @@ static void remove_journal_hash(struct super_block *, | |||
1162 | int); | 1191 | int); |
1163 | 1192 | ||
1164 | /* | 1193 | /* |
1165 | ** once all the real blocks have been flushed, it is safe to remove them from the | 1194 | * once all the real blocks have been flushed, it is safe to remove them |
1166 | ** journal list for this transaction. Aside from freeing the cnode, this also allows the | 1195 | * from the journal list for this transaction. Aside from freeing the |
1167 | ** block to be reallocated for data blocks if it had been deleted. | 1196 | * cnode, this also allows the block to be reallocated for data blocks |
1168 | */ | 1197 | * if it had been deleted. |
1198 | */ | ||
1169 | static void remove_all_from_journal_list(struct super_block *sb, | 1199 | static void remove_all_from_journal_list(struct super_block *sb, |
1170 | struct reiserfs_journal_list *jl, | 1200 | struct reiserfs_journal_list *jl, |
1171 | int debug) | 1201 | int debug) |
@@ -1174,8 +1204,9 @@ static void remove_all_from_journal_list(struct super_block *sb, | |||
1174 | struct reiserfs_journal_cnode *cn, *last; | 1204 | struct reiserfs_journal_cnode *cn, *last; |
1175 | cn = jl->j_realblock; | 1205 | cn = jl->j_realblock; |
1176 | 1206 | ||
1177 | /* which is better, to lock once around the whole loop, or | 1207 | /* |
1178 | ** to lock for each call to remove_journal_hash? | 1208 | * which is better, to lock once around the whole loop, or |
1209 | * to lock for each call to remove_journal_hash? | ||
1179 | */ | 1210 | */ |
1180 | while (cn) { | 1211 | while (cn) { |
1181 | if (cn->blocknr != 0) { | 1212 | if (cn->blocknr != 0) { |
@@ -1197,12 +1228,13 @@ static void remove_all_from_journal_list(struct super_block *sb, | |||
1197 | } | 1228 | } |
1198 | 1229 | ||
1199 | /* | 1230 | /* |
1200 | ** if this timestamp is greater than the timestamp we wrote last to the header block, write it to the header block. | 1231 | * if this timestamp is greater than the timestamp we wrote last to the |
1201 | ** once this is done, I can safely say the log area for this transaction won't ever be replayed, and I can start | 1232 | * header block, write it to the header block. once this is done, I can |
1202 | ** releasing blocks in this transaction for reuse as data blocks. | 1233 | * safely say the log area for this transaction won't ever be replayed, |
1203 | ** called by flush_journal_list, before it calls remove_all_from_journal_list | 1234 | * and I can start releasing blocks in this transaction for reuse as data |
1204 | ** | 1235 | * blocks. called by flush_journal_list, before it calls |
1205 | */ | 1236 | * remove_all_from_journal_list |
1237 | */ | ||
1206 | static int _update_journal_header_block(struct super_block *sb, | 1238 | static int _update_journal_header_block(struct super_block *sb, |
1207 | unsigned long offset, | 1239 | unsigned long offset, |
1208 | unsigned int trans_id) | 1240 | unsigned int trans_id) |
@@ -1272,7 +1304,8 @@ static int flush_older_journal_lists(struct super_block *sb, | |||
1272 | struct reiserfs_journal *journal = SB_JOURNAL(sb); | 1304 | struct reiserfs_journal *journal = SB_JOURNAL(sb); |
1273 | unsigned int trans_id = jl->j_trans_id; | 1305 | unsigned int trans_id = jl->j_trans_id; |
1274 | 1306 | ||
1275 | /* we know we are the only ones flushing things, no extra race | 1307 | /* |
1308 | * we know we are the only ones flushing things, no extra race | ||
1276 | * protection is required. | 1309 | * protection is required. |
1277 | */ | 1310 | */ |
1278 | restart: | 1311 | restart: |
@@ -1302,15 +1335,16 @@ static void del_from_work_list(struct super_block *s, | |||
1302 | } | 1335 | } |
1303 | } | 1336 | } |
1304 | 1337 | ||
1305 | /* flush a journal list, both commit and real blocks | 1338 | /* |
1306 | ** | 1339 | * flush a journal list, both commit and real blocks |
1307 | ** always set flushall to 1, unless you are calling from inside | 1340 | * |
1308 | ** flush_journal_list | 1341 | * always set flushall to 1, unless you are calling from inside |
1309 | ** | 1342 | * flush_journal_list |
1310 | ** IMPORTANT. This can only be called while there are no journal writers, | 1343 | * |
1311 | ** and the journal is locked. That means it can only be called from | 1344 | * IMPORTANT. This can only be called while there are no journal writers, |
1312 | ** do_journal_end, or by journal_release | 1345 | * and the journal is locked. That means it can only be called from |
1313 | */ | 1346 | * do_journal_end, or by journal_release |
1347 | */ | ||
1314 | static int flush_journal_list(struct super_block *s, | 1348 | static int flush_journal_list(struct super_block *s, |
1315 | struct reiserfs_journal_list *jl, int flushall) | 1349 | struct reiserfs_journal_list *jl, int flushall) |
1316 | { | 1350 | { |
@@ -1352,8 +1386,9 @@ static int flush_journal_list(struct super_block *s, | |||
1352 | goto flush_older_and_return; | 1386 | goto flush_older_and_return; |
1353 | } | 1387 | } |
1354 | 1388 | ||
1355 | /* start by putting the commit list on disk. This will also flush | 1389 | /* |
1356 | ** the commit lists of any olders transactions | 1390 | * start by putting the commit list on disk. This will also flush |
1391 | * the commit lists of any olders transactions | ||
1357 | */ | 1392 | */ |
1358 | flush_commit_list(s, jl, 1); | 1393 | flush_commit_list(s, jl, 1); |
1359 | 1394 | ||
@@ -1367,8 +1402,9 @@ static int flush_journal_list(struct super_block *s, | |||
1367 | goto flush_older_and_return; | 1402 | goto flush_older_and_return; |
1368 | } | 1403 | } |
1369 | 1404 | ||
1370 | /* loop through each cnode, see if we need to write it, | 1405 | /* |
1371 | ** or wait on a more recent transaction, or just ignore it | 1406 | * loop through each cnode, see if we need to write it, |
1407 | * or wait on a more recent transaction, or just ignore it | ||
1372 | */ | 1408 | */ |
1373 | if (atomic_read(&(journal->j_wcount)) != 0) { | 1409 | if (atomic_read(&(journal->j_wcount)) != 0) { |
1374 | reiserfs_panic(s, "journal-844", "journal list is flushing, " | 1410 | reiserfs_panic(s, "journal-844", "journal list is flushing, " |
@@ -1384,20 +1420,25 @@ static int flush_journal_list(struct super_block *s, | |||
1384 | goto free_cnode; | 1420 | goto free_cnode; |
1385 | } | 1421 | } |
1386 | 1422 | ||
1387 | /* This transaction failed commit. Don't write out to the disk */ | 1423 | /* |
1424 | * This transaction failed commit. | ||
1425 | * Don't write out to the disk | ||
1426 | */ | ||
1388 | if (!(jl->j_state & LIST_DIRTY)) | 1427 | if (!(jl->j_state & LIST_DIRTY)) |
1389 | goto free_cnode; | 1428 | goto free_cnode; |
1390 | 1429 | ||
1391 | pjl = find_newer_jl_for_cn(cn); | 1430 | pjl = find_newer_jl_for_cn(cn); |
1392 | /* the order is important here. We check pjl to make sure we | 1431 | /* |
1393 | ** don't clear BH_JDirty_wait if we aren't the one writing this | 1432 | * the order is important here. We check pjl to make sure we |
1394 | ** block to disk | 1433 | * don't clear BH_JDirty_wait if we aren't the one writing this |
1434 | * block to disk | ||
1395 | */ | 1435 | */ |
1396 | if (!pjl && cn->bh) { | 1436 | if (!pjl && cn->bh) { |
1397 | saved_bh = cn->bh; | 1437 | saved_bh = cn->bh; |
1398 | 1438 | ||
1399 | /* we do this to make sure nobody releases the buffer while | 1439 | /* |
1400 | ** we are working with it | 1440 | * we do this to make sure nobody releases the |
1441 | * buffer while we are working with it | ||
1401 | */ | 1442 | */ |
1402 | get_bh(saved_bh); | 1443 | get_bh(saved_bh); |
1403 | 1444 | ||
@@ -1406,13 +1447,17 @@ static int flush_journal_list(struct super_block *s, | |||
1406 | was_jwait = 1; | 1447 | was_jwait = 1; |
1407 | was_dirty = 1; | 1448 | was_dirty = 1; |
1408 | } else if (can_dirty(cn)) { | 1449 | } else if (can_dirty(cn)) { |
1409 | /* everything with !pjl && jwait should be writable */ | 1450 | /* |
1451 | * everything with !pjl && jwait | ||
1452 | * should be writable | ||
1453 | */ | ||
1410 | BUG(); | 1454 | BUG(); |
1411 | } | 1455 | } |
1412 | } | 1456 | } |
1413 | 1457 | ||
1414 | /* if someone has this block in a newer transaction, just make | 1458 | /* |
1415 | ** sure they are committed, and don't try writing it to disk | 1459 | * if someone has this block in a newer transaction, just make |
1460 | * sure they are committed, and don't try writing it to disk | ||
1416 | */ | 1461 | */ |
1417 | if (pjl) { | 1462 | if (pjl) { |
1418 | if (atomic_read(&pjl->j_commit_left)) | 1463 | if (atomic_read(&pjl->j_commit_left)) |
@@ -1420,16 +1465,18 @@ static int flush_journal_list(struct super_block *s, | |||
1420 | goto free_cnode; | 1465 | goto free_cnode; |
1421 | } | 1466 | } |
1422 | 1467 | ||
1423 | /* bh == NULL when the block got to disk on its own, OR, | 1468 | /* |
1424 | ** the block got freed in a future transaction | 1469 | * bh == NULL when the block got to disk on its own, OR, |
1470 | * the block got freed in a future transaction | ||
1425 | */ | 1471 | */ |
1426 | if (saved_bh == NULL) { | 1472 | if (saved_bh == NULL) { |
1427 | goto free_cnode; | 1473 | goto free_cnode; |
1428 | } | 1474 | } |
1429 | 1475 | ||
1430 | /* this should never happen. kupdate_one_transaction has this list | 1476 | /* |
1431 | ** locked while it works, so we should never see a buffer here that | 1477 | * this should never happen. kupdate_one_transaction has |
1432 | ** is not marked JDirty_wait | 1478 | * this list locked while it works, so we should never see a |
1479 | * buffer here that is not marked JDirty_wait | ||
1433 | */ | 1480 | */ |
1434 | if ((!was_jwait) && !buffer_locked(saved_bh)) { | 1481 | if ((!was_jwait) && !buffer_locked(saved_bh)) { |
1435 | reiserfs_warning(s, "journal-813", | 1482 | reiserfs_warning(s, "journal-813", |
@@ -1440,7 +1487,10 @@ static int flush_journal_list(struct super_block *s, | |||
1440 | was_jwait ? ' ' : '!'); | 1487 | was_jwait ? ' ' : '!'); |
1441 | } | 1488 | } |
1442 | if (was_dirty) { | 1489 | if (was_dirty) { |
1443 | /* we inc again because saved_bh gets decremented at free_cnode */ | 1490 | /* |
1491 | * we inc again because saved_bh gets decremented | ||
1492 | * at free_cnode | ||
1493 | */ | ||
1444 | get_bh(saved_bh); | 1494 | get_bh(saved_bh); |
1445 | set_bit(BLOCK_NEEDS_FLUSH, &cn->state); | 1495 | set_bit(BLOCK_NEEDS_FLUSH, &cn->state); |
1446 | lock_buffer(saved_bh); | 1496 | lock_buffer(saved_bh); |
@@ -1460,7 +1510,10 @@ static int flush_journal_list(struct super_block *s, | |||
1460 | last = cn; | 1510 | last = cn; |
1461 | cn = cn->next; | 1511 | cn = cn->next; |
1462 | if (saved_bh) { | 1512 | if (saved_bh) { |
1463 | /* we incremented this to keep others from taking the buffer head away */ | 1513 | /* |
1514 | * we incremented this to keep others from | ||
1515 | * taking the buffer head away | ||
1516 | */ | ||
1464 | put_bh(saved_bh); | 1517 | put_bh(saved_bh); |
1465 | if (atomic_read(&(saved_bh->b_count)) < 0) { | 1518 | if (atomic_read(&(saved_bh->b_count)) < 0) { |
1466 | reiserfs_warning(s, "journal-945", | 1519 | reiserfs_warning(s, "journal-945", |
@@ -1492,8 +1545,10 @@ static int flush_journal_list(struct super_block *s, | |||
1492 | #endif | 1545 | #endif |
1493 | err = -EIO; | 1546 | err = -EIO; |
1494 | } | 1547 | } |
1495 | /* note, we must clear the JDirty_wait bit after the up to date | 1548 | /* |
1496 | ** check, otherwise we race against our flushpage routine | 1549 | * note, we must clear the JDirty_wait bit |
1550 | * after the up to date check, otherwise we | ||
1551 | * race against our flushpage routine | ||
1497 | */ | 1552 | */ |
1498 | BUG_ON(!test_clear_buffer_journal_dirty | 1553 | BUG_ON(!test_clear_buffer_journal_dirty |
1499 | (cn->bh)); | 1554 | (cn->bh)); |
@@ -1513,23 +1568,25 @@ static int flush_journal_list(struct super_block *s, | |||
1513 | __func__); | 1568 | __func__); |
1514 | flush_older_and_return: | 1569 | flush_older_and_return: |
1515 | 1570 | ||
1516 | /* before we can update the journal header block, we _must_ flush all | 1571 | /* |
1517 | ** real blocks from all older transactions to disk. This is because | 1572 | * before we can update the journal header block, we _must_ flush all |
1518 | ** once the header block is updated, this transaction will not be | 1573 | * real blocks from all older transactions to disk. This is because |
1519 | ** replayed after a crash | 1574 | * once the header block is updated, this transaction will not be |
1575 | * replayed after a crash | ||
1520 | */ | 1576 | */ |
1521 | if (flushall) { | 1577 | if (flushall) { |
1522 | flush_older_journal_lists(s, jl); | 1578 | flush_older_journal_lists(s, jl); |
1523 | } | 1579 | } |
1524 | 1580 | ||
1525 | err = journal->j_errno; | 1581 | err = journal->j_errno; |
1526 | /* before we can remove everything from the hash tables for this | 1582 | /* |
1527 | ** transaction, we must make sure it can never be replayed | 1583 | * before we can remove everything from the hash tables for this |
1528 | ** | 1584 | * transaction, we must make sure it can never be replayed |
1529 | ** since we are only called from do_journal_end, we know for sure there | 1585 | * |
1530 | ** are no allocations going on while we are flushing journal lists. So, | 1586 | * since we are only called from do_journal_end, we know for sure there |
1531 | ** we only need to update the journal header block for the last list | 1587 | * are no allocations going on while we are flushing journal lists. So, |
1532 | ** being flushed | 1588 | * we only need to update the journal header block for the last list |
1589 | * being flushed | ||
1533 | */ | 1590 | */ |
1534 | if (!err && flushall) { | 1591 | if (!err && flushall) { |
1535 | err = | 1592 | err = |
@@ -1554,7 +1611,8 @@ static int flush_journal_list(struct super_block *s, | |||
1554 | } | 1611 | } |
1555 | journal->j_last_flush_id = jl->j_trans_id; | 1612 | journal->j_last_flush_id = jl->j_trans_id; |
1556 | 1613 | ||
1557 | /* not strictly required since we are freeing the list, but it should | 1614 | /* |
1615 | * not strictly required since we are freeing the list, but it should | ||
1558 | * help find code using dead lists later on | 1616 | * help find code using dead lists later on |
1559 | */ | 1617 | */ |
1560 | jl->j_len = 0; | 1618 | jl->j_len = 0; |
@@ -1585,15 +1643,17 @@ static int write_one_transaction(struct super_block *s, | |||
1585 | 1643 | ||
1586 | cn = jl->j_realblock; | 1644 | cn = jl->j_realblock; |
1587 | while (cn) { | 1645 | while (cn) { |
1588 | /* if the blocknr == 0, this has been cleared from the hash, | 1646 | /* |
1589 | ** skip it | 1647 | * if the blocknr == 0, this has been cleared from the hash, |
1648 | * skip it | ||
1590 | */ | 1649 | */ |
1591 | if (cn->blocknr == 0) { | 1650 | if (cn->blocknr == 0) { |
1592 | goto next; | 1651 | goto next; |
1593 | } | 1652 | } |
1594 | if (cn->bh && can_dirty(cn) && buffer_dirty(cn->bh)) { | 1653 | if (cn->bh && can_dirty(cn) && buffer_dirty(cn->bh)) { |
1595 | struct buffer_head *tmp_bh; | 1654 | struct buffer_head *tmp_bh; |
1596 | /* we can race against journal_mark_freed when we try | 1655 | /* |
1656 | * we can race against journal_mark_freed when we try | ||
1597 | * to lock_buffer(cn->bh), so we have to inc the buffer | 1657 | * to lock_buffer(cn->bh), so we have to inc the buffer |
1598 | * count, and recheck things after locking | 1658 | * count, and recheck things after locking |
1599 | */ | 1659 | */ |
@@ -1630,15 +1690,17 @@ static int dirty_one_transaction(struct super_block *s, | |||
1630 | jl->j_state |= LIST_DIRTY; | 1690 | jl->j_state |= LIST_DIRTY; |
1631 | cn = jl->j_realblock; | 1691 | cn = jl->j_realblock; |
1632 | while (cn) { | 1692 | while (cn) { |
1633 | /* look for a more recent transaction that logged this | 1693 | /* |
1634 | ** buffer. Only the most recent transaction with a buffer in | 1694 | * look for a more recent transaction that logged this |
1635 | ** it is allowed to send that buffer to disk | 1695 | * buffer. Only the most recent transaction with a buffer in |
1696 | * it is allowed to send that buffer to disk | ||
1636 | */ | 1697 | */ |
1637 | pjl = find_newer_jl_for_cn(cn); | 1698 | pjl = find_newer_jl_for_cn(cn); |
1638 | if (!pjl && cn->blocknr && cn->bh | 1699 | if (!pjl && cn->blocknr && cn->bh |
1639 | && buffer_journal_dirty(cn->bh)) { | 1700 | && buffer_journal_dirty(cn->bh)) { |
1640 | BUG_ON(!can_dirty(cn)); | 1701 | BUG_ON(!can_dirty(cn)); |
1641 | /* if the buffer is prepared, it will either be logged | 1702 | /* |
1703 | * if the buffer is prepared, it will either be logged | ||
1642 | * or restored. If restored, we need to make sure | 1704 | * or restored. If restored, we need to make sure |
1643 | * it actually gets marked dirty | 1705 | * it actually gets marked dirty |
1644 | */ | 1706 | */ |
@@ -1675,7 +1737,8 @@ static int kupdate_transactions(struct super_block *s, | |||
1675 | goto done; | 1737 | goto done; |
1676 | } | 1738 | } |
1677 | 1739 | ||
1678 | /* we've got j_flush_mutex held, nobody is going to delete any | 1740 | /* |
1741 | * we've got j_flush_mutex held, nobody is going to delete any | ||
1679 | * of these lists out from underneath us | 1742 | * of these lists out from underneath us |
1680 | */ | 1743 | */ |
1681 | while ((num_trans && transactions_flushed < num_trans) || | 1744 | while ((num_trans && transactions_flushed < num_trans) || |
@@ -1714,15 +1777,16 @@ static int kupdate_transactions(struct super_block *s, | |||
1714 | return ret; | 1777 | return ret; |
1715 | } | 1778 | } |
1716 | 1779 | ||
1717 | /* for o_sync and fsync heavy applications, they tend to use | 1780 | /* |
1718 | ** all the journa list slots with tiny transactions. These | 1781 | * for o_sync and fsync heavy applications, they tend to use |
1719 | ** trigger lots and lots of calls to update the header block, which | 1782 | * all the journa list slots with tiny transactions. These |
1720 | ** adds seeks and slows things down. | 1783 | * trigger lots and lots of calls to update the header block, which |
1721 | ** | 1784 | * adds seeks and slows things down. |
1722 | ** This function tries to clear out a large chunk of the journal lists | 1785 | * |
1723 | ** at once, which makes everything faster since only the newest journal | 1786 | * This function tries to clear out a large chunk of the journal lists |
1724 | ** list updates the header block | 1787 | * at once, which makes everything faster since only the newest journal |
1725 | */ | 1788 | * list updates the header block |
1789 | */ | ||
1726 | static int flush_used_journal_lists(struct super_block *s, | 1790 | static int flush_used_journal_lists(struct super_block *s, |
1727 | struct reiserfs_journal_list *jl) | 1791 | struct reiserfs_journal_list *jl) |
1728 | { | 1792 | { |
@@ -1759,9 +1823,11 @@ static int flush_used_journal_lists(struct super_block *s, | |||
1759 | } | 1823 | } |
1760 | get_journal_list(jl); | 1824 | get_journal_list(jl); |
1761 | get_journal_list(flush_jl); | 1825 | get_journal_list(flush_jl); |
1762 | /* try to find a group of blocks we can flush across all the | 1826 | |
1763 | ** transactions, but only bother if we've actually spanned | 1827 | /* |
1764 | ** across multiple lists | 1828 | * try to find a group of blocks we can flush across all the |
1829 | * transactions, but only bother if we've actually spanned | ||
1830 | * across multiple lists | ||
1765 | */ | 1831 | */ |
1766 | if (flush_jl != jl) { | 1832 | if (flush_jl != jl) { |
1767 | ret = kupdate_transactions(s, jl, &tjl, &trans_id, len, i); | 1833 | ret = kupdate_transactions(s, jl, &tjl, &trans_id, len, i); |
@@ -1773,9 +1839,9 @@ static int flush_used_journal_lists(struct super_block *s, | |||
1773 | } | 1839 | } |
1774 | 1840 | ||
1775 | /* | 1841 | /* |
1776 | ** removes any nodes in table with name block and dev as bh. | 1842 | * removes any nodes in table with name block and dev as bh. |
1777 | ** only touchs the hnext and hprev pointers. | 1843 | * only touchs the hnext and hprev pointers. |
1778 | */ | 1844 | */ |
1779 | void remove_journal_hash(struct super_block *sb, | 1845 | void remove_journal_hash(struct super_block *sb, |
1780 | struct reiserfs_journal_cnode **table, | 1846 | struct reiserfs_journal_cnode **table, |
1781 | struct reiserfs_journal_list *jl, | 1847 | struct reiserfs_journal_list *jl, |
@@ -1804,7 +1870,11 @@ void remove_journal_hash(struct super_block *sb, | |||
1804 | cur->blocknr = 0; | 1870 | cur->blocknr = 0; |
1805 | cur->sb = NULL; | 1871 | cur->sb = NULL; |
1806 | cur->state = 0; | 1872 | cur->state = 0; |
1807 | if (cur->bh && cur->jlist) /* anybody who clears the cur->bh will also dec the nonzerolen */ | 1873 | /* |
1874 | * anybody who clears the cur->bh will also | ||
1875 | * dec the nonzerolen | ||
1876 | */ | ||
1877 | if (cur->bh && cur->jlist) | ||
1808 | atomic_dec(&(cur->jlist->j_nonzerolen)); | 1878 | atomic_dec(&(cur->jlist->j_nonzerolen)); |
1809 | cur->bh = NULL; | 1879 | cur->bh = NULL; |
1810 | cur->jlist = NULL; | 1880 | cur->jlist = NULL; |
@@ -1825,17 +1895,18 @@ static void free_journal_ram(struct super_block *sb) | |||
1825 | if (journal->j_header_bh) { | 1895 | if (journal->j_header_bh) { |
1826 | brelse(journal->j_header_bh); | 1896 | brelse(journal->j_header_bh); |
1827 | } | 1897 | } |
1828 | /* j_header_bh is on the journal dev, make sure not to release the journal | 1898 | /* |
1829 | * dev until we brelse j_header_bh | 1899 | * j_header_bh is on the journal dev, make sure |
1900 | * not to release the journal dev until we brelse j_header_bh | ||
1830 | */ | 1901 | */ |
1831 | release_journal_dev(sb, journal); | 1902 | release_journal_dev(sb, journal); |
1832 | vfree(journal); | 1903 | vfree(journal); |
1833 | } | 1904 | } |
1834 | 1905 | ||
1835 | /* | 1906 | /* |
1836 | ** call on unmount. Only set error to 1 if you haven't made your way out | 1907 | * call on unmount. Only set error to 1 if you haven't made your way out |
1837 | ** of read_super() yet. Any other caller must keep error at 0. | 1908 | * of read_super() yet. Any other caller must keep error at 0. |
1838 | */ | 1909 | */ |
1839 | static int do_journal_release(struct reiserfs_transaction_handle *th, | 1910 | static int do_journal_release(struct reiserfs_transaction_handle *th, |
1840 | struct super_block *sb, int error) | 1911 | struct super_block *sb, int error) |
1841 | { | 1912 | { |
@@ -1843,14 +1914,19 @@ static int do_journal_release(struct reiserfs_transaction_handle *th, | |||
1843 | int flushed = 0; | 1914 | int flushed = 0; |
1844 | struct reiserfs_journal *journal = SB_JOURNAL(sb); | 1915 | struct reiserfs_journal *journal = SB_JOURNAL(sb); |
1845 | 1916 | ||
1846 | /* we only want to flush out transactions if we were called with error == 0 | 1917 | /* |
1918 | * we only want to flush out transactions if we were | ||
1919 | * called with error == 0 | ||
1847 | */ | 1920 | */ |
1848 | if (!error && !(sb->s_flags & MS_RDONLY)) { | 1921 | if (!error && !(sb->s_flags & MS_RDONLY)) { |
1849 | /* end the current trans */ | 1922 | /* end the current trans */ |
1850 | BUG_ON(!th->t_trans_id); | 1923 | BUG_ON(!th->t_trans_id); |
1851 | do_journal_end(th, sb, 10, FLUSH_ALL); | 1924 | do_journal_end(th, sb, 10, FLUSH_ALL); |
1852 | 1925 | ||
1853 | /* make sure something gets logged to force our way into the flush code */ | 1926 | /* |
1927 | * make sure something gets logged to force | ||
1928 | * our way into the flush code | ||
1929 | */ | ||
1854 | if (!journal_join(&myth, sb, 1)) { | 1930 | if (!journal_join(&myth, sb, 1)) { |
1855 | reiserfs_prepare_for_journal(sb, | 1931 | reiserfs_prepare_for_journal(sb, |
1856 | SB_BUFFER_WITH_SB(sb), | 1932 | SB_BUFFER_WITH_SB(sb), |
@@ -1894,25 +1970,24 @@ static int do_journal_release(struct reiserfs_transaction_handle *th, | |||
1894 | return 0; | 1970 | return 0; |
1895 | } | 1971 | } |
1896 | 1972 | ||
1897 | /* | 1973 | /* * call on unmount. flush all journal trans, release all alloc'd ram */ |
1898 | ** call on unmount. flush all journal trans, release all alloc'd ram | ||
1899 | */ | ||
1900 | int journal_release(struct reiserfs_transaction_handle *th, | 1974 | int journal_release(struct reiserfs_transaction_handle *th, |
1901 | struct super_block *sb) | 1975 | struct super_block *sb) |
1902 | { | 1976 | { |
1903 | return do_journal_release(th, sb, 0); | 1977 | return do_journal_release(th, sb, 0); |
1904 | } | 1978 | } |
1905 | 1979 | ||
1906 | /* | 1980 | /* only call from an error condition inside reiserfs_read_super! */ |
1907 | ** only call from an error condition inside reiserfs_read_super! | ||
1908 | */ | ||
1909 | int journal_release_error(struct reiserfs_transaction_handle *th, | 1981 | int journal_release_error(struct reiserfs_transaction_handle *th, |
1910 | struct super_block *sb) | 1982 | struct super_block *sb) |
1911 | { | 1983 | { |
1912 | return do_journal_release(th, sb, 1); | 1984 | return do_journal_release(th, sb, 1); |
1913 | } | 1985 | } |
1914 | 1986 | ||
1915 | /* compares description block with commit block. returns 1 if they differ, 0 if they are the same */ | 1987 | /* |
1988 | * compares description block with commit block. | ||
1989 | * returns 1 if they differ, 0 if they are the same | ||
1990 | */ | ||
1916 | static int journal_compare_desc_commit(struct super_block *sb, | 1991 | static int journal_compare_desc_commit(struct super_block *sb, |
1917 | struct reiserfs_journal_desc *desc, | 1992 | struct reiserfs_journal_desc *desc, |
1918 | struct reiserfs_journal_commit *commit) | 1993 | struct reiserfs_journal_commit *commit) |
@@ -1926,11 +2001,12 @@ static int journal_compare_desc_commit(struct super_block *sb, | |||
1926 | return 0; | 2001 | return 0; |
1927 | } | 2002 | } |
1928 | 2003 | ||
1929 | /* returns 0 if it did not find a description block | 2004 | /* |
1930 | ** returns -1 if it found a corrupt commit block | 2005 | * returns 0 if it did not find a description block |
1931 | ** returns 1 if both desc and commit were valid | 2006 | * returns -1 if it found a corrupt commit block |
1932 | ** NOTE: only called during fs mount | 2007 | * returns 1 if both desc and commit were valid |
1933 | */ | 2008 | * NOTE: only called during fs mount |
2009 | */ | ||
1934 | static int journal_transaction_is_valid(struct super_block *sb, | 2010 | static int journal_transaction_is_valid(struct super_block *sb, |
1935 | struct buffer_head *d_bh, | 2011 | struct buffer_head *d_bh, |
1936 | unsigned int *oldest_invalid_trans_id, | 2012 | unsigned int *oldest_invalid_trans_id, |
@@ -1976,7 +2052,10 @@ static int journal_transaction_is_valid(struct super_block *sb, | |||
1976 | } | 2052 | } |
1977 | offset = d_bh->b_blocknr - SB_ONDISK_JOURNAL_1st_BLOCK(sb); | 2053 | offset = d_bh->b_blocknr - SB_ONDISK_JOURNAL_1st_BLOCK(sb); |
1978 | 2054 | ||
1979 | /* ok, we have a journal description block, lets see if the transaction was valid */ | 2055 | /* |
2056 | * ok, we have a journal description block, | ||
2057 | * let's see if the transaction was valid | ||
2058 | */ | ||
1980 | c_bh = | 2059 | c_bh = |
1981 | journal_bread(sb, | 2060 | journal_bread(sb, |
1982 | SB_ONDISK_JOURNAL_1st_BLOCK(sb) + | 2061 | SB_ONDISK_JOURNAL_1st_BLOCK(sb) + |
@@ -2028,11 +2107,11 @@ static void brelse_array(struct buffer_head **heads, int num) | |||
2028 | } | 2107 | } |
2029 | 2108 | ||
2030 | /* | 2109 | /* |
2031 | ** given the start, and values for the oldest acceptable transactions, | 2110 | * given the start, and values for the oldest acceptable transactions, |
2032 | ** this either reads in a replays a transaction, or returns because the | 2111 | * this either reads in a replays a transaction, or returns because the |
2033 | ** transaction is invalid, or too old. | 2112 | * transaction is invalid, or too old. |
2034 | ** NOTE: only called during fs mount | 2113 | * NOTE: only called during fs mount |
2035 | */ | 2114 | */ |
2036 | static int journal_read_transaction(struct super_block *sb, | 2115 | static int journal_read_transaction(struct super_block *sb, |
2037 | unsigned long cur_dblock, | 2116 | unsigned long cur_dblock, |
2038 | unsigned long oldest_start, | 2117 | unsigned long oldest_start, |
@@ -2106,7 +2185,10 @@ static int journal_read_transaction(struct super_block *sb, | |||
2106 | } | 2185 | } |
2107 | 2186 | ||
2108 | trans_id = get_desc_trans_id(desc); | 2187 | trans_id = get_desc_trans_id(desc); |
2109 | /* now we know we've got a good transaction, and it was inside the valid time ranges */ | 2188 | /* |
2189 | * now we know we've got a good transaction, and it was | ||
2190 | * inside the valid time ranges | ||
2191 | */ | ||
2110 | log_blocks = kmalloc(get_desc_trans_len(desc) * | 2192 | log_blocks = kmalloc(get_desc_trans_len(desc) * |
2111 | sizeof(struct buffer_head *), GFP_NOFS); | 2193 | sizeof(struct buffer_head *), GFP_NOFS); |
2112 | real_blocks = kmalloc(get_desc_trans_len(desc) * | 2194 | real_blocks = kmalloc(get_desc_trans_len(desc) * |
@@ -2213,7 +2295,10 @@ static int journal_read_transaction(struct super_block *sb, | |||
2213 | "journal-1095: setting journal " "start to offset %ld", | 2295 | "journal-1095: setting journal " "start to offset %ld", |
2214 | cur_dblock - SB_ONDISK_JOURNAL_1st_BLOCK(sb)); | 2296 | cur_dblock - SB_ONDISK_JOURNAL_1st_BLOCK(sb)); |
2215 | 2297 | ||
2216 | /* init starting values for the first transaction, in case this is the last transaction to be replayed. */ | 2298 | /* |
2299 | * init starting values for the first transaction, in case | ||
2300 | * this is the last transaction to be replayed. | ||
2301 | */ | ||
2217 | journal->j_start = cur_dblock - SB_ONDISK_JOURNAL_1st_BLOCK(sb); | 2302 | journal->j_start = cur_dblock - SB_ONDISK_JOURNAL_1st_BLOCK(sb); |
2218 | journal->j_last_flush_trans_id = trans_id; | 2303 | journal->j_last_flush_trans_id = trans_id; |
2219 | journal->j_trans_id = trans_id + 1; | 2304 | journal->j_trans_id = trans_id + 1; |
@@ -2227,12 +2312,14 @@ static int journal_read_transaction(struct super_block *sb, | |||
2227 | return 0; | 2312 | return 0; |
2228 | } | 2313 | } |
2229 | 2314 | ||
2230 | /* This function reads blocks starting from block and to max_block of bufsize | 2315 | /* |
2231 | size (but no more than BUFNR blocks at a time). This proved to improve | 2316 | * This function reads blocks starting from block and to max_block of bufsize |
2232 | mounting speed on self-rebuilding raid5 arrays at least. | 2317 | * size (but no more than BUFNR blocks at a time). This proved to improve |
2233 | Right now it is only used from journal code. But later we might use it | 2318 | * mounting speed on self-rebuilding raid5 arrays at least. |
2234 | from other places. | 2319 | * Right now it is only used from journal code. But later we might use it |
2235 | Note: Do not use journal_getblk/sb_getblk functions here! */ | 2320 | * from other places. |
2321 | * Note: Do not use journal_getblk/sb_getblk functions here! | ||
2322 | */ | ||
2236 | static struct buffer_head *reiserfs_breada(struct block_device *dev, | 2323 | static struct buffer_head *reiserfs_breada(struct block_device *dev, |
2237 | b_blocknr_t block, int bufsize, | 2324 | b_blocknr_t block, int bufsize, |
2238 | b_blocknr_t max_block) | 2325 | b_blocknr_t max_block) |
@@ -2271,15 +2358,17 @@ static struct buffer_head *reiserfs_breada(struct block_device *dev, | |||
2271 | } | 2358 | } |
2272 | 2359 | ||
2273 | /* | 2360 | /* |
2274 | ** read and replay the log | 2361 | * read and replay the log |
2275 | ** on a clean unmount, the journal header's next unflushed pointer will | 2362 | * on a clean unmount, the journal header's next unflushed pointer will be |
2276 | ** be to an invalid transaction. This tests that before finding all the | 2363 | * to an invalid transaction. This tests that before finding all the |
2277 | ** transactions in the log, which makes normal mount times fast. | 2364 | * transactions in the log, which makes normal mount times fast. |
2278 | ** After a crash, this starts with the next unflushed transaction, and | 2365 | * |
2279 | ** replays until it finds one too old, or invalid. | 2366 | * After a crash, this starts with the next unflushed transaction, and |
2280 | ** On exit, it sets things up so the first transaction will work correctly. | 2367 | * replays until it finds one too old, or invalid. |
2281 | ** NOTE: only called during fs mount | 2368 | * |
2282 | */ | 2369 | * On exit, it sets things up so the first transaction will work correctly. |
2370 | * NOTE: only called during fs mount | ||
2371 | */ | ||
2283 | static int journal_read(struct super_block *sb) | 2372 | static int journal_read(struct super_block *sb) |
2284 | { | 2373 | { |
2285 | struct reiserfs_journal *journal = SB_JOURNAL(sb); | 2374 | struct reiserfs_journal *journal = SB_JOURNAL(sb); |
@@ -2303,9 +2392,10 @@ static int journal_read(struct super_block *sb) | |||
2303 | bdevname(journal->j_dev_bd, b)); | 2392 | bdevname(journal->j_dev_bd, b)); |
2304 | start = get_seconds(); | 2393 | start = get_seconds(); |
2305 | 2394 | ||
2306 | /* step 1, read in the journal header block. Check the transaction it says | 2395 | /* |
2307 | ** is the first unflushed, and if that transaction is not valid, | 2396 | * step 1, read in the journal header block. Check the transaction |
2308 | ** replay is done | 2397 | * it says is the first unflushed, and if that transaction is not |
2398 | * valid, replay is done | ||
2309 | */ | 2399 | */ |
2310 | journal->j_header_bh = journal_bread(sb, | 2400 | journal->j_header_bh = journal_bread(sb, |
2311 | SB_ONDISK_JOURNAL_1st_BLOCK(sb) | 2401 | SB_ONDISK_JOURNAL_1st_BLOCK(sb) |
@@ -2329,9 +2419,10 @@ static int journal_read(struct super_block *sb) | |||
2329 | le32_to_cpu(jh->j_last_flush_trans_id)); | 2419 | le32_to_cpu(jh->j_last_flush_trans_id)); |
2330 | valid_journal_header = 1; | 2420 | valid_journal_header = 1; |
2331 | 2421 | ||
2332 | /* now, we try to read the first unflushed offset. If it is not valid, | 2422 | /* |
2333 | ** there is nothing more we can do, and it makes no sense to read | 2423 | * now, we try to read the first unflushed offset. If it |
2334 | ** through the whole log. | 2424 | * is not valid, there is nothing more we can do, and it |
2425 | * makes no sense to read through the whole log. | ||
2335 | */ | 2426 | */ |
2336 | d_bh = | 2427 | d_bh = |
2337 | journal_bread(sb, | 2428 | journal_bread(sb, |
@@ -2345,15 +2436,19 @@ static int journal_read(struct super_block *sb) | |||
2345 | goto start_log_replay; | 2436 | goto start_log_replay; |
2346 | } | 2437 | } |
2347 | 2438 | ||
2348 | /* ok, there are transactions that need to be replayed. start with the first log block, find | 2439 | /* |
2349 | ** all the valid transactions, and pick out the oldest. | 2440 | * ok, there are transactions that need to be replayed. start |
2441 | * with the first log block, find all the valid transactions, and | ||
2442 | * pick out the oldest. | ||
2350 | */ | 2443 | */ |
2351 | while (continue_replay | 2444 | while (continue_replay |
2352 | && cur_dblock < | 2445 | && cur_dblock < |
2353 | (SB_ONDISK_JOURNAL_1st_BLOCK(sb) + | 2446 | (SB_ONDISK_JOURNAL_1st_BLOCK(sb) + |
2354 | SB_ONDISK_JOURNAL_SIZE(sb))) { | 2447 | SB_ONDISK_JOURNAL_SIZE(sb))) { |
2355 | /* Note that it is required for blocksize of primary fs device and journal | 2448 | /* |
2356 | device to be the same */ | 2449 | * Note that it is required for blocksize of primary fs |
2450 | * device and journal device to be the same | ||
2451 | */ | ||
2357 | d_bh = | 2452 | d_bh = |
2358 | reiserfs_breada(journal->j_dev_bd, cur_dblock, | 2453 | reiserfs_breada(journal->j_dev_bd, cur_dblock, |
2359 | sb->s_blocksize, | 2454 | sb->s_blocksize, |
@@ -2431,9 +2526,11 @@ static int journal_read(struct super_block *sb) | |||
2431 | reiserfs_debug(sb, REISERFS_DEBUG_CODE, | 2526 | reiserfs_debug(sb, REISERFS_DEBUG_CODE, |
2432 | "journal-1225: No valid " "transactions found"); | 2527 | "journal-1225: No valid " "transactions found"); |
2433 | } | 2528 | } |
2434 | /* j_start does not get set correctly if we don't replay any transactions. | 2529 | /* |
2435 | ** if we had a valid journal_header, set j_start to the first unflushed transaction value, | 2530 | * j_start does not get set correctly if we don't replay any |
2436 | ** copy the trans_id from the header | 2531 | * transactions. if we had a valid journal_header, set j_start |
2532 | * to the first unflushed transaction value, copy the trans_id | ||
2533 | * from the header | ||
2437 | */ | 2534 | */ |
2438 | if (valid_journal_header && replay_count == 0) { | 2535 | if (valid_journal_header && replay_count == 0) { |
2439 | journal->j_start = le32_to_cpu(jh->j_first_unflushed_offset); | 2536 | journal->j_start = le32_to_cpu(jh->j_first_unflushed_offset); |
@@ -2462,8 +2559,9 @@ static int journal_read(struct super_block *sb) | |||
2462 | _update_journal_header_block(sb, journal->j_start, | 2559 | _update_journal_header_block(sb, journal->j_start, |
2463 | journal->j_last_flush_trans_id)) { | 2560 | journal->j_last_flush_trans_id)) { |
2464 | reiserfs_write_unlock(sb); | 2561 | reiserfs_write_unlock(sb); |
2465 | /* replay failed, caller must call free_journal_ram and abort | 2562 | /* |
2466 | ** the mount | 2563 | * replay failed, caller must call free_journal_ram and abort |
2564 | * the mount | ||
2467 | */ | 2565 | */ |
2468 | return -1; | 2566 | return -1; |
2469 | } | 2567 | } |
@@ -2556,7 +2654,7 @@ static int journal_init_dev(struct super_block *super, | |||
2556 | return 0; | 2654 | return 0; |
2557 | } | 2655 | } |
2558 | 2656 | ||
2559 | /** | 2657 | /* |
2560 | * When creating/tuning a file system user can assign some | 2658 | * When creating/tuning a file system user can assign some |
2561 | * journal params within boundaries which depend on the ratio | 2659 | * journal params within boundaries which depend on the ratio |
2562 | * blocksize/standard_blocksize. | 2660 | * blocksize/standard_blocksize. |
@@ -2574,8 +2672,7 @@ static int check_advise_trans_params(struct super_block *sb, | |||
2574 | struct reiserfs_journal *journal) | 2672 | struct reiserfs_journal *journal) |
2575 | { | 2673 | { |
2576 | if (journal->j_trans_max) { | 2674 | if (journal->j_trans_max) { |
2577 | /* Non-default journal params. | 2675 | /* Non-default journal params. Do sanity check for them. */ |
2578 | Do sanity check for them. */ | ||
2579 | int ratio = 1; | 2676 | int ratio = 1; |
2580 | if (sb->s_blocksize < REISERFS_STANDARD_BLKSIZE) | 2677 | if (sb->s_blocksize < REISERFS_STANDARD_BLKSIZE) |
2581 | ratio = REISERFS_STANDARD_BLKSIZE / sb->s_blocksize; | 2678 | ratio = REISERFS_STANDARD_BLKSIZE / sb->s_blocksize; |
@@ -2597,10 +2694,12 @@ static int check_advise_trans_params(struct super_block *sb, | |||
2597 | return 1; | 2694 | return 1; |
2598 | } | 2695 | } |
2599 | } else { | 2696 | } else { |
2600 | /* Default journal params. | 2697 | /* |
2601 | The file system was created by old version | 2698 | * Default journal params. |
2602 | of mkreiserfs, so some fields contain zeros, | 2699 | * The file system was created by old version |
2603 | and we need to advise proper values for them */ | 2700 | * of mkreiserfs, so some fields contain zeros, |
2701 | * and we need to advise proper values for them | ||
2702 | */ | ||
2604 | if (sb->s_blocksize != REISERFS_STANDARD_BLKSIZE) { | 2703 | if (sb->s_blocksize != REISERFS_STANDARD_BLKSIZE) { |
2605 | reiserfs_warning(sb, "sh-464", "bad blocksize (%u)", | 2704 | reiserfs_warning(sb, "sh-464", "bad blocksize (%u)", |
2606 | sb->s_blocksize); | 2705 | sb->s_blocksize); |
@@ -2613,9 +2712,7 @@ static int check_advise_trans_params(struct super_block *sb, | |||
2613 | return 0; | 2712 | return 0; |
2614 | } | 2713 | } |
2615 | 2714 | ||
2616 | /* | 2715 | /* must be called once on fs mount. calls journal_read for you */ |
2617 | ** must be called once on fs mount. calls journal_read for you | ||
2618 | */ | ||
2619 | int journal_init(struct super_block *sb, const char *j_dev_name, | 2716 | int journal_init(struct super_block *sb, const char *j_dev_name, |
2620 | int old_format, unsigned int commit_max_age) | 2717 | int old_format, unsigned int commit_max_age) |
2621 | { | 2718 | { |
@@ -2654,8 +2751,10 @@ int journal_init(struct super_block *sb, const char *j_dev_name, | |||
2654 | REISERFS_DISK_OFFSET_IN_BYTES / | 2751 | REISERFS_DISK_OFFSET_IN_BYTES / |
2655 | sb->s_blocksize + 2); | 2752 | sb->s_blocksize + 2); |
2656 | 2753 | ||
2657 | /* Sanity check to see is the standard journal fitting within first bitmap | 2754 | /* |
2658 | (actual for small blocksizes) */ | 2755 | * Sanity check to see is the standard journal fitting |
2756 | * within first bitmap (actual for small blocksizes) | ||
2757 | */ | ||
2659 | if (!SB_ONDISK_JOURNAL_DEVICE(sb) && | 2758 | if (!SB_ONDISK_JOURNAL_DEVICE(sb) && |
2660 | (SB_JOURNAL_1st_RESERVED_BLOCK(sb) + | 2759 | (SB_JOURNAL_1st_RESERVED_BLOCK(sb) + |
2661 | SB_ONDISK_JOURNAL_SIZE(sb) > sb->s_blocksize * 8)) { | 2760 | SB_ONDISK_JOURNAL_SIZE(sb) > sb->s_blocksize * 8)) { |
@@ -2803,10 +2902,10 @@ int journal_init(struct super_block *sb, const char *j_dev_name, | |||
2803 | } | 2902 | } |
2804 | 2903 | ||
2805 | /* | 2904 | /* |
2806 | ** test for a polite end of the current transaction. Used by file_write, and should | 2905 | * test for a polite end of the current transaction. Used by file_write, |
2807 | ** be used by delete to make sure they don't write more than can fit inside a single | 2906 | * and should be used by delete to make sure they don't write more than |
2808 | ** transaction | 2907 | * can fit inside a single transaction |
2809 | */ | 2908 | */ |
2810 | int journal_transaction_should_end(struct reiserfs_transaction_handle *th, | 2909 | int journal_transaction_should_end(struct reiserfs_transaction_handle *th, |
2811 | int new_alloc) | 2910 | int new_alloc) |
2812 | { | 2911 | { |
@@ -2829,8 +2928,7 @@ int journal_transaction_should_end(struct reiserfs_transaction_handle *th, | |||
2829 | return 0; | 2928 | return 0; |
2830 | } | 2929 | } |
2831 | 2930 | ||
2832 | /* this must be called inside a transaction | 2931 | /* this must be called inside a transaction */ |
2833 | */ | ||
2834 | void reiserfs_block_writes(struct reiserfs_transaction_handle *th) | 2932 | void reiserfs_block_writes(struct reiserfs_transaction_handle *th) |
2835 | { | 2933 | { |
2836 | struct reiserfs_journal *journal = SB_JOURNAL(th->t_super); | 2934 | struct reiserfs_journal *journal = SB_JOURNAL(th->t_super); |
@@ -2840,8 +2938,7 @@ void reiserfs_block_writes(struct reiserfs_transaction_handle *th) | |||
2840 | return; | 2938 | return; |
2841 | } | 2939 | } |
2842 | 2940 | ||
2843 | /* this must be called without a transaction started | 2941 | /* this must be called without a transaction started */ |
2844 | */ | ||
2845 | void reiserfs_allow_writes(struct super_block *s) | 2942 | void reiserfs_allow_writes(struct super_block *s) |
2846 | { | 2943 | { |
2847 | struct reiserfs_journal *journal = SB_JOURNAL(s); | 2944 | struct reiserfs_journal *journal = SB_JOURNAL(s); |
@@ -2849,8 +2946,7 @@ void reiserfs_allow_writes(struct super_block *s) | |||
2849 | wake_up(&journal->j_join_wait); | 2946 | wake_up(&journal->j_join_wait); |
2850 | } | 2947 | } |
2851 | 2948 | ||
2852 | /* this must be called without a transaction started | 2949 | /* this must be called without a transaction started */ |
2853 | */ | ||
2854 | void reiserfs_wait_on_write_block(struct super_block *s) | 2950 | void reiserfs_wait_on_write_block(struct super_block *s) |
2855 | { | 2951 | { |
2856 | struct reiserfs_journal *journal = SB_JOURNAL(s); | 2952 | struct reiserfs_journal *journal = SB_JOURNAL(s); |
@@ -2912,11 +3008,12 @@ static void let_transaction_grow(struct super_block *sb, unsigned int trans_id) | |||
2912 | } | 3008 | } |
2913 | } | 3009 | } |
2914 | 3010 | ||
2915 | /* join == true if you must join an existing transaction. | 3011 | /* |
2916 | ** join == false if you can deal with waiting for others to finish | 3012 | * join == true if you must join an existing transaction. |
2917 | ** | 3013 | * join == false if you can deal with waiting for others to finish |
2918 | ** this will block until the transaction is joinable. send the number of blocks you | 3014 | * |
2919 | ** expect to use in nblocks. | 3015 | * this will block until the transaction is joinable. send the number of |
3016 | * blocks you expect to use in nblocks. | ||
2920 | */ | 3017 | */ |
2921 | static int do_journal_begin_r(struct reiserfs_transaction_handle *th, | 3018 | static int do_journal_begin_r(struct reiserfs_transaction_handle *th, |
2922 | struct super_block *sb, unsigned long nblocks, | 3019 | struct super_block *sb, unsigned long nblocks, |
@@ -2957,9 +3054,11 @@ static int do_journal_begin_r(struct reiserfs_transaction_handle *th, | |||
2957 | } | 3054 | } |
2958 | now = get_seconds(); | 3055 | now = get_seconds(); |
2959 | 3056 | ||
2960 | /* if there is no room in the journal OR | 3057 | /* |
2961 | ** if this transaction is too old, and we weren't called joinable, wait for it to finish before beginning | 3058 | * if there is no room in the journal OR |
2962 | ** we don't sleep if there aren't other writers | 3059 | * if this transaction is too old, and we weren't called joinable, |
3060 | * wait for it to finish before beginning we don't sleep if there | ||
3061 | * aren't other writers | ||
2963 | */ | 3062 | */ |
2964 | 3063 | ||
2965 | if ((!join && journal->j_must_wait > 0) || | 3064 | if ((!join && journal->j_must_wait > 0) || |
@@ -2973,7 +3072,8 @@ static int do_journal_begin_r(struct reiserfs_transaction_handle *th, | |||
2973 | || (!join && journal->j_cnode_free < (journal->j_trans_max * 3))) { | 3072 | || (!join && journal->j_cnode_free < (journal->j_trans_max * 3))) { |
2974 | 3073 | ||
2975 | old_trans_id = journal->j_trans_id; | 3074 | old_trans_id = journal->j_trans_id; |
2976 | unlock_journal(sb); /* allow others to finish this transaction */ | 3075 | /* allow others to finish this transaction */ |
3076 | unlock_journal(sb); | ||
2977 | 3077 | ||
2978 | if (!join && (journal->j_len_alloc + nblocks + 2) >= | 3078 | if (!join && (journal->j_len_alloc + nblocks + 2) >= |
2979 | journal->j_max_batch && | 3079 | journal->j_max_batch && |
@@ -2985,8 +3085,9 @@ static int do_journal_begin_r(struct reiserfs_transaction_handle *th, | |||
2985 | goto relock; | 3085 | goto relock; |
2986 | } | 3086 | } |
2987 | } | 3087 | } |
2988 | /* don't mess with joining the transaction if all we have to do is | 3088 | /* |
2989 | * wait for someone else to do a commit | 3089 | * don't mess with joining the transaction if all we |
3090 | * have to do is wait for someone else to do a commit | ||
2990 | */ | 3091 | */ |
2991 | if (atomic_read(&journal->j_jlock)) { | 3092 | if (atomic_read(&journal->j_jlock)) { |
2992 | while (journal->j_trans_id == old_trans_id && | 3093 | while (journal->j_trans_id == old_trans_id && |
@@ -3027,9 +3128,11 @@ static int do_journal_begin_r(struct reiserfs_transaction_handle *th, | |||
3027 | 3128 | ||
3028 | out_fail: | 3129 | out_fail: |
3029 | memset(th, 0, sizeof(*th)); | 3130 | memset(th, 0, sizeof(*th)); |
3030 | /* Re-set th->t_super, so we can properly keep track of how many | 3131 | /* |
3132 | * Re-set th->t_super, so we can properly keep track of how many | ||
3031 | * persistent transactions there are. We need to do this so if this | 3133 | * persistent transactions there are. We need to do this so if this |
3032 | * call is part of a failed restart_transaction, we can free it later */ | 3134 | * call is part of a failed restart_transaction, we can free it later |
3135 | */ | ||
3033 | th->t_super = sb; | 3136 | th->t_super = sb; |
3034 | return retval; | 3137 | return retval; |
3035 | } | 3138 | } |
@@ -3042,14 +3145,15 @@ struct reiserfs_transaction_handle *reiserfs_persistent_transaction(struct | |||
3042 | int ret; | 3145 | int ret; |
3043 | struct reiserfs_transaction_handle *th; | 3146 | struct reiserfs_transaction_handle *th; |
3044 | 3147 | ||
3045 | /* if we're nesting into an existing transaction. It will be | 3148 | /* |
3046 | ** persistent on its own | 3149 | * if we're nesting into an existing transaction. It will be |
3150 | * persistent on its own | ||
3047 | */ | 3151 | */ |
3048 | if (reiserfs_transaction_running(s)) { | 3152 | if (reiserfs_transaction_running(s)) { |
3049 | th = current->journal_info; | 3153 | th = current->journal_info; |
3050 | th->t_refcount++; | 3154 | th->t_refcount++; |
3051 | BUG_ON(th->t_refcount < 2); | 3155 | BUG_ON(th->t_refcount < 2); |
3052 | 3156 | ||
3053 | return th; | 3157 | return th; |
3054 | } | 3158 | } |
3055 | th = kmalloc(sizeof(struct reiserfs_transaction_handle), GFP_NOFS); | 3159 | th = kmalloc(sizeof(struct reiserfs_transaction_handle), GFP_NOFS); |
@@ -3085,8 +3189,9 @@ static int journal_join(struct reiserfs_transaction_handle *th, | |||
3085 | { | 3189 | { |
3086 | struct reiserfs_transaction_handle *cur_th = current->journal_info; | 3190 | struct reiserfs_transaction_handle *cur_th = current->journal_info; |
3087 | 3191 | ||
3088 | /* this keeps do_journal_end from NULLing out the current->journal_info | 3192 | /* |
3089 | ** pointer | 3193 | * this keeps do_journal_end from NULLing out the |
3194 | * current->journal_info pointer | ||
3090 | */ | 3195 | */ |
3091 | th->t_handle_save = cur_th; | 3196 | th->t_handle_save = cur_th; |
3092 | BUG_ON(cur_th && cur_th->t_refcount > 1); | 3197 | BUG_ON(cur_th && cur_th->t_refcount > 1); |
@@ -3098,8 +3203,9 @@ int journal_join_abort(struct reiserfs_transaction_handle *th, | |||
3098 | { | 3203 | { |
3099 | struct reiserfs_transaction_handle *cur_th = current->journal_info; | 3204 | struct reiserfs_transaction_handle *cur_th = current->journal_info; |
3100 | 3205 | ||
3101 | /* this keeps do_journal_end from NULLing out the current->journal_info | 3206 | /* |
3102 | ** pointer | 3207 | * this keeps do_journal_end from NULLing out the |
3208 | * current->journal_info pointer | ||
3103 | */ | 3209 | */ |
3104 | th->t_handle_save = cur_th; | 3210 | th->t_handle_save = cur_th; |
3105 | BUG_ON(cur_th && cur_th->t_refcount > 1); | 3211 | BUG_ON(cur_th && cur_th->t_refcount > 1); |
@@ -3125,9 +3231,10 @@ int journal_begin(struct reiserfs_transaction_handle *th, | |||
3125 | "journal_info != 0"); | 3231 | "journal_info != 0"); |
3126 | return 0; | 3232 | return 0; |
3127 | } else { | 3233 | } else { |
3128 | /* we've ended up with a handle from a different filesystem. | 3234 | /* |
3129 | ** save it and restore on journal_end. This should never | 3235 | * we've ended up with a handle from a different |
3130 | ** really happen... | 3236 | * filesystem. save it and restore on journal_end. |
3237 | * This should never really happen... | ||
3131 | */ | 3238 | */ |
3132 | reiserfs_warning(sb, "clm-2100", | 3239 | reiserfs_warning(sb, "clm-2100", |
3133 | "nesting info a different FS"); | 3240 | "nesting info a different FS"); |
@@ -3140,9 +3247,10 @@ int journal_begin(struct reiserfs_transaction_handle *th, | |||
3140 | ret = do_journal_begin_r(th, sb, nblocks, JBEGIN_REG); | 3247 | ret = do_journal_begin_r(th, sb, nblocks, JBEGIN_REG); |
3141 | BUG_ON(current->journal_info != th); | 3248 | BUG_ON(current->journal_info != th); |
3142 | 3249 | ||
3143 | /* I guess this boils down to being the reciprocal of clm-2100 above. | 3250 | /* |
3144 | * If do_journal_begin_r fails, we need to put it back, since journal_end | 3251 | * I guess this boils down to being the reciprocal of clm-2100 above. |
3145 | * won't be called to do it. */ | 3252 | * If do_journal_begin_r fails, we need to put it back, since |
3253 | * journal_end won't be called to do it. */ | ||
3146 | if (ret) | 3254 | if (ret) |
3147 | current->journal_info = th->t_handle_save; | 3255 | current->journal_info = th->t_handle_save; |
3148 | else | 3256 | else |
@@ -3152,14 +3260,15 @@ int journal_begin(struct reiserfs_transaction_handle *th, | |||
3152 | } | 3260 | } |
3153 | 3261 | ||
3154 | /* | 3262 | /* |
3155 | ** puts bh into the current transaction. If it was already there, reorders removes the | 3263 | * puts bh into the current transaction. If it was already there, reorders |
3156 | ** old pointers from the hash, and puts new ones in (to make sure replay happen in the right order). | 3264 | * removes the old pointers from the hash, and puts new ones in (to make |
3157 | ** | 3265 | * sure replay happen in the right order). |
3158 | ** if it was dirty, cleans and files onto the clean list. I can't let it be dirty again until the | 3266 | * |
3159 | ** transaction is committed. | 3267 | * if it was dirty, cleans and files onto the clean list. I can't let it |
3160 | ** | 3268 | * be dirty again until the transaction is committed. |
3161 | ** if j_len, is bigger than j_len_alloc, it pushes j_len_alloc to 10 + j_len. | 3269 | * |
3162 | */ | 3270 | * if j_len, is bigger than j_len_alloc, it pushes j_len_alloc to 10 + j_len. |
3271 | */ | ||
3163 | int journal_mark_dirty(struct reiserfs_transaction_handle *th, | 3272 | int journal_mark_dirty(struct reiserfs_transaction_handle *th, |
3164 | struct super_block *sb, struct buffer_head *bh) | 3273 | struct super_block *sb, struct buffer_head *bh) |
3165 | { | 3274 | { |
@@ -3184,9 +3293,10 @@ int journal_mark_dirty(struct reiserfs_transaction_handle *th, | |||
3184 | return 0; | 3293 | return 0; |
3185 | } | 3294 | } |
3186 | 3295 | ||
3187 | /* this must be turned into a panic instead of a warning. We can't allow | 3296 | /* |
3188 | ** a dirty or journal_dirty or locked buffer to be logged, as some changes | 3297 | * this must be turned into a panic instead of a warning. We can't |
3189 | ** could get to disk too early. NOT GOOD. | 3298 | * allow a dirty or journal_dirty or locked buffer to be logged, as |
3299 | * some changes could get to disk too early. NOT GOOD. | ||
3190 | */ | 3300 | */ |
3191 | if (!prepared || buffer_dirty(bh)) { | 3301 | if (!prepared || buffer_dirty(bh)) { |
3192 | reiserfs_warning(sb, "journal-1777", | 3302 | reiserfs_warning(sb, "journal-1777", |
@@ -3205,8 +3315,10 @@ int journal_mark_dirty(struct reiserfs_transaction_handle *th, | |||
3205 | atomic_read(&(journal->j_wcount))); | 3315 | atomic_read(&(journal->j_wcount))); |
3206 | return 1; | 3316 | return 1; |
3207 | } | 3317 | } |
3208 | /* this error means I've screwed up, and we've overflowed the transaction. | 3318 | /* |
3209 | ** Nothing can be done here, except make the FS readonly or panic. | 3319 | * this error means I've screwed up, and we've overflowed |
3320 | * the transaction. Nothing can be done here, except make the | ||
3321 | * FS readonly or panic. | ||
3210 | */ | 3322 | */ |
3211 | if (journal->j_len >= journal->j_trans_max) { | 3323 | if (journal->j_len >= journal->j_trans_max) { |
3212 | reiserfs_panic(th->t_super, "journal-1413", | 3324 | reiserfs_panic(th->t_super, "journal-1413", |
@@ -3280,8 +3392,9 @@ int journal_end(struct reiserfs_transaction_handle *th, | |||
3280 | struct reiserfs_transaction_handle *cur_th = | 3392 | struct reiserfs_transaction_handle *cur_th = |
3281 | current->journal_info; | 3393 | current->journal_info; |
3282 | 3394 | ||
3283 | /* we aren't allowed to close a nested transaction on a different | 3395 | /* |
3284 | ** filesystem from the one in the task struct | 3396 | * we aren't allowed to close a nested transaction on a |
3397 | * different filesystem from the one in the task struct | ||
3285 | */ | 3398 | */ |
3286 | BUG_ON(cur_th->t_super != th->t_super); | 3399 | BUG_ON(cur_th->t_super != th->t_super); |
3287 | 3400 | ||
@@ -3295,13 +3408,14 @@ int journal_end(struct reiserfs_transaction_handle *th, | |||
3295 | } | 3408 | } |
3296 | } | 3409 | } |
3297 | 3410 | ||
3298 | /* removes from the current transaction, relsing and descrementing any counters. | 3411 | /* |
3299 | ** also files the removed buffer directly onto the clean list | 3412 | * removes from the current transaction, relsing and descrementing any counters. |
3300 | ** | 3413 | * also files the removed buffer directly onto the clean list |
3301 | ** called by journal_mark_freed when a block has been deleted | 3414 | * |
3302 | ** | 3415 | * called by journal_mark_freed when a block has been deleted |
3303 | ** returns 1 if it cleaned and relsed the buffer. 0 otherwise | 3416 | * |
3304 | */ | 3417 | * returns 1 if it cleaned and relsed the buffer. 0 otherwise |
3418 | */ | ||
3305 | static int remove_from_transaction(struct super_block *sb, | 3419 | static int remove_from_transaction(struct super_block *sb, |
3306 | b_blocknr_t blocknr, int already_cleaned) | 3420 | b_blocknr_t blocknr, int already_cleaned) |
3307 | { | 3421 | { |
@@ -3350,15 +3464,16 @@ static int remove_from_transaction(struct super_block *sb, | |||
3350 | } | 3464 | } |
3351 | 3465 | ||
3352 | /* | 3466 | /* |
3353 | ** for any cnode in a journal list, it can only be dirtied of all the | 3467 | * for any cnode in a journal list, it can only be dirtied of all the |
3354 | ** transactions that include it are committed to disk. | 3468 | * transactions that include it are committed to disk. |
3355 | ** this checks through each transaction, and returns 1 if you are allowed to dirty, | 3469 | * this checks through each transaction, and returns 1 if you are allowed |
3356 | ** and 0 if you aren't | 3470 | * to dirty, and 0 if you aren't |
3357 | ** | 3471 | * |
3358 | ** it is called by dirty_journal_list, which is called after flush_commit_list has gotten all the log | 3472 | * it is called by dirty_journal_list, which is called after |
3359 | ** blocks for a given transaction on disk | 3473 | * flush_commit_list has gotten all the log blocks for a given |
3360 | ** | 3474 | * transaction on disk |
3361 | */ | 3475 | * |
3476 | */ | ||
3362 | static int can_dirty(struct reiserfs_journal_cnode *cn) | 3477 | static int can_dirty(struct reiserfs_journal_cnode *cn) |
3363 | { | 3478 | { |
3364 | struct super_block *sb = cn->sb; | 3479 | struct super_block *sb = cn->sb; |
@@ -3366,9 +3481,10 @@ static int can_dirty(struct reiserfs_journal_cnode *cn) | |||
3366 | struct reiserfs_journal_cnode *cur = cn->hprev; | 3481 | struct reiserfs_journal_cnode *cur = cn->hprev; |
3367 | int can_dirty = 1; | 3482 | int can_dirty = 1; |
3368 | 3483 | ||
3369 | /* first test hprev. These are all newer than cn, so any node here | 3484 | /* |
3370 | ** with the same block number and dev means this node can't be sent | 3485 | * first test hprev. These are all newer than cn, so any node here |
3371 | ** to disk right now. | 3486 | * with the same block number and dev means this node can't be sent |
3487 | * to disk right now. | ||
3372 | */ | 3488 | */ |
3373 | while (cur && can_dirty) { | 3489 | while (cur && can_dirty) { |
3374 | if (cur->jlist && cur->bh && cur->blocknr && cur->sb == sb && | 3490 | if (cur->jlist && cur->bh && cur->blocknr && cur->sb == sb && |
@@ -3377,8 +3493,9 @@ static int can_dirty(struct reiserfs_journal_cnode *cn) | |||
3377 | } | 3493 | } |
3378 | cur = cur->hprev; | 3494 | cur = cur->hprev; |
3379 | } | 3495 | } |
3380 | /* then test hnext. These are all older than cn. As long as they | 3496 | /* |
3381 | ** are committed to the log, it is safe to write cn to disk | 3497 | * then test hnext. These are all older than cn. As long as they |
3498 | * are committed to the log, it is safe to write cn to disk | ||
3382 | */ | 3499 | */ |
3383 | cur = cn->hnext; | 3500 | cur = cn->hnext; |
3384 | while (cur && can_dirty) { | 3501 | while (cur && can_dirty) { |
@@ -3392,9 +3509,10 @@ static int can_dirty(struct reiserfs_journal_cnode *cn) | |||
3392 | return can_dirty; | 3509 | return can_dirty; |
3393 | } | 3510 | } |
3394 | 3511 | ||
3395 | /* syncs the commit blocks, but does not force the real buffers to disk | 3512 | /* |
3396 | ** will wait until the current transaction is done/committed before returning | 3513 | * syncs the commit blocks, but does not force the real buffers to disk |
3397 | */ | 3514 | * will wait until the current transaction is done/committed before returning |
3515 | */ | ||
3398 | int journal_end_sync(struct reiserfs_transaction_handle *th, | 3516 | int journal_end_sync(struct reiserfs_transaction_handle *th, |
3399 | struct super_block *sb, unsigned long nblocks) | 3517 | struct super_block *sb, unsigned long nblocks) |
3400 | { | 3518 | { |
@@ -3411,9 +3529,7 @@ int journal_end_sync(struct reiserfs_transaction_handle *th, | |||
3411 | return do_journal_end(th, sb, nblocks, COMMIT_NOW | WAIT); | 3529 | return do_journal_end(th, sb, nblocks, COMMIT_NOW | WAIT); |
3412 | } | 3530 | } |
3413 | 3531 | ||
3414 | /* | 3532 | /* writeback the pending async commits to disk */ |
3415 | ** writeback the pending async commits to disk | ||
3416 | */ | ||
3417 | static void flush_async_commits(struct work_struct *work) | 3533 | static void flush_async_commits(struct work_struct *work) |
3418 | { | 3534 | { |
3419 | struct reiserfs_journal *journal = | 3535 | struct reiserfs_journal *journal = |
@@ -3433,9 +3549,9 @@ static void flush_async_commits(struct work_struct *work) | |||
3433 | } | 3549 | } |
3434 | 3550 | ||
3435 | /* | 3551 | /* |
3436 | ** flushes any old transactions to disk | 3552 | * flushes any old transactions to disk |
3437 | ** ends the current transaction if it is too old | 3553 | * ends the current transaction if it is too old |
3438 | */ | 3554 | */ |
3439 | void reiserfs_flush_old_commits(struct super_block *sb) | 3555 | void reiserfs_flush_old_commits(struct super_block *sb) |
3440 | { | 3556 | { |
3441 | time_t now; | 3557 | time_t now; |
@@ -3443,13 +3559,15 @@ void reiserfs_flush_old_commits(struct super_block *sb) | |||
3443 | struct reiserfs_journal *journal = SB_JOURNAL(sb); | 3559 | struct reiserfs_journal *journal = SB_JOURNAL(sb); |
3444 | 3560 | ||
3445 | now = get_seconds(); | 3561 | now = get_seconds(); |
3446 | /* safety check so we don't flush while we are replaying the log during | 3562 | /* |
3563 | * safety check so we don't flush while we are replaying the log during | ||
3447 | * mount | 3564 | * mount |
3448 | */ | 3565 | */ |
3449 | if (list_empty(&journal->j_journal_list)) | 3566 | if (list_empty(&journal->j_journal_list)) |
3450 | return; | 3567 | return; |
3451 | 3568 | ||
3452 | /* check the current transaction. If there are no writers, and it is | 3569 | /* |
3570 | * check the current transaction. If there are no writers, and it is | ||
3453 | * too old, finish it, and force the commit blocks to disk | 3571 | * too old, finish it, and force the commit blocks to disk |
3454 | */ | 3572 | */ |
3455 | if (atomic_read(&journal->j_wcount) <= 0 && | 3573 | if (atomic_read(&journal->j_wcount) <= 0 && |
@@ -3463,8 +3581,10 @@ void reiserfs_flush_old_commits(struct super_block *sb) | |||
3463 | journal_mark_dirty(&th, sb, | 3581 | journal_mark_dirty(&th, sb, |
3464 | SB_BUFFER_WITH_SB(sb)); | 3582 | SB_BUFFER_WITH_SB(sb)); |
3465 | 3583 | ||
3466 | /* we're only being called from kreiserfsd, it makes no sense to do | 3584 | /* |
3467 | ** an async commit so that kreiserfsd can do it later | 3585 | * we're only being called from kreiserfsd, it makes |
3586 | * no sense to do an async commit so that kreiserfsd | ||
3587 | * can do it later | ||
3468 | */ | 3588 | */ |
3469 | do_journal_end(&th, sb, 1, COMMIT_NOW | WAIT); | 3589 | do_journal_end(&th, sb, 1, COMMIT_NOW | WAIT); |
3470 | } | 3590 | } |
@@ -3472,16 +3592,20 @@ void reiserfs_flush_old_commits(struct super_block *sb) | |||
3472 | } | 3592 | } |
3473 | 3593 | ||
3474 | /* | 3594 | /* |
3475 | ** returns 0 if do_journal_end should return right away, returns 1 if do_journal_end should finish the commit | 3595 | * returns 0 if do_journal_end should return right away, returns 1 if |
3476 | ** | 3596 | * do_journal_end should finish the commit |
3477 | ** if the current transaction is too old, but still has writers, this will wait on j_join_wait until all | 3597 | * |
3478 | ** the writers are done. By the time it wakes up, the transaction it was called has already ended, so it just | 3598 | * if the current transaction is too old, but still has writers, this will |
3479 | ** flushes the commit list and returns 0. | 3599 | * wait on j_join_wait until all the writers are done. By the time it |
3480 | ** | 3600 | * wakes up, the transaction it was called has already ended, so it just |
3481 | ** Won't batch when flush or commit_now is set. Also won't batch when others are waiting on j_join_wait. | 3601 | * flushes the commit list and returns 0. |
3482 | ** | 3602 | * |
3483 | ** Note, we can't allow the journal_end to proceed while there are still writers in the log. | 3603 | * Won't batch when flush or commit_now is set. Also won't batch when |
3484 | */ | 3604 | * others are waiting on j_join_wait. |
3605 | * | ||
3606 | * Note, we can't allow the journal_end to proceed while there are still | ||
3607 | * writers in the log. | ||
3608 | */ | ||
3485 | static int check_journal_end(struct reiserfs_transaction_handle *th, | 3609 | static int check_journal_end(struct reiserfs_transaction_handle *th, |
3486 | struct super_block *sb, unsigned long nblocks, | 3610 | struct super_block *sb, unsigned long nblocks, |
3487 | int flags) | 3611 | int flags) |
@@ -3503,21 +3627,25 @@ static int check_journal_end(struct reiserfs_transaction_handle *th, | |||
3503 | } | 3627 | } |
3504 | 3628 | ||
3505 | journal->j_len_alloc -= (th->t_blocks_allocated - th->t_blocks_logged); | 3629 | journal->j_len_alloc -= (th->t_blocks_allocated - th->t_blocks_logged); |
3506 | if (atomic_read(&(journal->j_wcount)) > 0) { /* <= 0 is allowed. unmounting might not call begin */ | 3630 | /* <= 0 is allowed. unmounting might not call begin */ |
3631 | if (atomic_read(&(journal->j_wcount)) > 0) | ||
3507 | atomic_dec(&(journal->j_wcount)); | 3632 | atomic_dec(&(journal->j_wcount)); |
3508 | } | ||
3509 | 3633 | ||
3510 | /* BUG, deal with case where j_len is 0, but people previously freed blocks need to be released | 3634 | /* |
3511 | ** will be dealt with by next transaction that actually writes something, but should be taken | 3635 | * BUG, deal with case where j_len is 0, but people previously |
3512 | ** care of in this trans | 3636 | * freed blocks need to be released will be dealt with by next |
3637 | * transaction that actually writes something, but should be taken | ||
3638 | * care of in this trans | ||
3513 | */ | 3639 | */ |
3514 | BUG_ON(journal->j_len == 0); | 3640 | BUG_ON(journal->j_len == 0); |
3515 | 3641 | ||
3516 | /* if wcount > 0, and we are called to with flush or commit_now, | 3642 | /* |
3517 | ** we wait on j_join_wait. We will wake up when the last writer has | 3643 | * if wcount > 0, and we are called to with flush or commit_now, |
3518 | ** finished the transaction, and started it on its way to the disk. | 3644 | * we wait on j_join_wait. We will wake up when the last writer has |
3519 | ** Then, we flush the commit or journal list, and just return 0 | 3645 | * finished the transaction, and started it on its way to the disk. |
3520 | ** because the rest of journal end was already done for this transaction. | 3646 | * Then, we flush the commit or journal list, and just return 0 |
3647 | * because the rest of journal end was already done for this | ||
3648 | * transaction. | ||
3521 | */ | 3649 | */ |
3522 | if (atomic_read(&(journal->j_wcount)) > 0) { | 3650 | if (atomic_read(&(journal->j_wcount)) > 0) { |
3523 | if (flush || commit_now) { | 3651 | if (flush || commit_now) { |
@@ -3533,7 +3661,10 @@ static int check_journal_end(struct reiserfs_transaction_handle *th, | |||
3533 | } | 3661 | } |
3534 | unlock_journal(sb); | 3662 | unlock_journal(sb); |
3535 | 3663 | ||
3536 | /* sleep while the current transaction is still j_jlocked */ | 3664 | /* |
3665 | * sleep while the current transaction is | ||
3666 | * still j_jlocked | ||
3667 | */ | ||
3537 | while (journal->j_trans_id == trans_id) { | 3668 | while (journal->j_trans_id == trans_id) { |
3538 | if (atomic_read(&journal->j_jlock)) { | 3669 | if (atomic_read(&journal->j_jlock)) { |
3539 | queue_log_writer(sb); | 3670 | queue_log_writer(sb); |
@@ -3547,7 +3678,7 @@ static int check_journal_end(struct reiserfs_transaction_handle *th, | |||
3547 | } | 3678 | } |
3548 | } | 3679 | } |
3549 | BUG_ON(journal->j_trans_id == trans_id); | 3680 | BUG_ON(journal->j_trans_id == trans_id); |
3550 | 3681 | ||
3551 | if (commit_now | 3682 | if (commit_now |
3552 | && journal_list_still_alive(sb, trans_id) | 3683 | && journal_list_still_alive(sb, trans_id) |
3553 | && wait_on_commit) { | 3684 | && wait_on_commit) { |
@@ -3585,19 +3716,22 @@ static int check_journal_end(struct reiserfs_transaction_handle *th, | |||
3585 | } | 3716 | } |
3586 | 3717 | ||
3587 | /* | 3718 | /* |
3588 | ** Does all the work that makes deleting blocks safe. | 3719 | * Does all the work that makes deleting blocks safe. |
3589 | ** when deleting a block mark BH_JNew, just remove it from the current transaction, clean it's buffer_head and move on. | 3720 | * when deleting a block mark BH_JNew, just remove it from the current |
3590 | ** | 3721 | * transaction, clean it's buffer_head and move on. |
3591 | ** otherwise: | 3722 | * |
3592 | ** set a bit for the block in the journal bitmap. That will prevent it from being allocated for unformatted nodes | 3723 | * otherwise: |
3593 | ** before this transaction has finished. | 3724 | * set a bit for the block in the journal bitmap. That will prevent it from |
3594 | ** | 3725 | * being allocated for unformatted nodes before this transaction has finished. |
3595 | ** mark any cnodes for this block as BLOCK_FREED, and clear their bh pointers. That will prevent any old transactions with | 3726 | * |
3596 | ** this block from trying to flush to the real location. Since we aren't removing the cnode from the journal_list_hash, | 3727 | * mark any cnodes for this block as BLOCK_FREED, and clear their bh pointers. |
3597 | ** the block can't be reallocated yet. | 3728 | * That will prevent any old transactions with this block from trying to flush |
3598 | ** | 3729 | * to the real location. Since we aren't removing the cnode from the |
3599 | ** Then remove it from the current transaction, decrementing any counters and filing it on the clean list. | 3730 | * journal_list_hash, *the block can't be reallocated yet. |
3600 | */ | 3731 | * |
3732 | * Then remove it from the current transaction, decrementing any counters and | ||
3733 | * filing it on the clean list. | ||
3734 | */ | ||
3601 | int journal_mark_freed(struct reiserfs_transaction_handle *th, | 3735 | int journal_mark_freed(struct reiserfs_transaction_handle *th, |
3602 | struct super_block *sb, b_blocknr_t blocknr) | 3736 | struct super_block *sb, b_blocknr_t blocknr) |
3603 | { | 3737 | { |
@@ -3620,7 +3754,10 @@ int journal_mark_freed(struct reiserfs_transaction_handle *th, | |||
3620 | reiserfs_clean_and_file_buffer(bh); | 3754 | reiserfs_clean_and_file_buffer(bh); |
3621 | cleaned = remove_from_transaction(sb, blocknr, cleaned); | 3755 | cleaned = remove_from_transaction(sb, blocknr, cleaned); |
3622 | } else { | 3756 | } else { |
3623 | /* set the bit for this block in the journal bitmap for this transaction */ | 3757 | /* |
3758 | * set the bit for this block in the journal bitmap | ||
3759 | * for this transaction | ||
3760 | */ | ||
3624 | jb = journal->j_current_jl->j_list_bitmap; | 3761 | jb = journal->j_current_jl->j_list_bitmap; |
3625 | if (!jb) { | 3762 | if (!jb) { |
3626 | reiserfs_panic(sb, "journal-1702", | 3763 | reiserfs_panic(sb, "journal-1702", |
@@ -3636,17 +3773,22 @@ int journal_mark_freed(struct reiserfs_transaction_handle *th, | |||
3636 | } | 3773 | } |
3637 | cleaned = remove_from_transaction(sb, blocknr, cleaned); | 3774 | cleaned = remove_from_transaction(sb, blocknr, cleaned); |
3638 | 3775 | ||
3639 | /* find all older transactions with this block, make sure they don't try to write it out */ | 3776 | /* |
3777 | * find all older transactions with this block, | ||
3778 | * make sure they don't try to write it out | ||
3779 | */ | ||
3640 | cn = get_journal_hash_dev(sb, journal->j_list_hash_table, | 3780 | cn = get_journal_hash_dev(sb, journal->j_list_hash_table, |
3641 | blocknr); | 3781 | blocknr); |
3642 | while (cn) { | 3782 | while (cn) { |
3643 | if (sb == cn->sb && blocknr == cn->blocknr) { | 3783 | if (sb == cn->sb && blocknr == cn->blocknr) { |
3644 | set_bit(BLOCK_FREED, &cn->state); | 3784 | set_bit(BLOCK_FREED, &cn->state); |
3645 | if (cn->bh) { | 3785 | if (cn->bh) { |
3786 | /* | ||
3787 | * remove_from_transaction will brelse | ||
3788 | * the buffer if it was in the current | ||
3789 | * trans | ||
3790 | */ | ||
3646 | if (!cleaned) { | 3791 | if (!cleaned) { |
3647 | /* remove_from_transaction will brelse the buffer if it was | ||
3648 | ** in the current trans | ||
3649 | */ | ||
3650 | clear_buffer_journal_dirty(cn-> | 3792 | clear_buffer_journal_dirty(cn-> |
3651 | bh); | 3793 | bh); |
3652 | clear_buffer_dirty(cn->bh); | 3794 | clear_buffer_dirty(cn->bh); |
@@ -3661,7 +3803,11 @@ int journal_mark_freed(struct reiserfs_transaction_handle *th, | |||
3661 | "cn->bh->b_count < 0"); | 3803 | "cn->bh->b_count < 0"); |
3662 | } | 3804 | } |
3663 | } | 3805 | } |
3664 | if (cn->jlist) { /* since we are clearing the bh, we MUST dec nonzerolen */ | 3806 | /* |
3807 | * since we are clearing the bh, | ||
3808 | * we MUST dec nonzerolen | ||
3809 | */ | ||
3810 | if (cn->jlist) { | ||
3665 | atomic_dec(& | 3811 | atomic_dec(& |
3666 | (cn->jlist-> | 3812 | (cn->jlist-> |
3667 | j_nonzerolen)); | 3813 | j_nonzerolen)); |
@@ -3697,10 +3843,16 @@ static int __commit_trans_jl(struct inode *inode, unsigned long id, | |||
3697 | struct reiserfs_journal *journal = SB_JOURNAL(sb); | 3843 | struct reiserfs_journal *journal = SB_JOURNAL(sb); |
3698 | int ret = 0; | 3844 | int ret = 0; |
3699 | 3845 | ||
3700 | /* is it from the current transaction, or from an unknown transaction? */ | 3846 | /* |
3847 | * is it from the current transaction, | ||
3848 | * or from an unknown transaction? | ||
3849 | */ | ||
3701 | if (id == journal->j_trans_id) { | 3850 | if (id == journal->j_trans_id) { |
3702 | jl = journal->j_current_jl; | 3851 | jl = journal->j_current_jl; |
3703 | /* try to let other writers come in and grow this transaction */ | 3852 | /* |
3853 | * try to let other writers come in and | ||
3854 | * grow this transaction | ||
3855 | */ | ||
3704 | let_transaction_grow(sb, id); | 3856 | let_transaction_grow(sb, id); |
3705 | if (journal->j_trans_id != id) { | 3857 | if (journal->j_trans_id != id) { |
3706 | goto flush_commit_only; | 3858 | goto flush_commit_only; |
@@ -3724,7 +3876,8 @@ static int __commit_trans_jl(struct inode *inode, unsigned long id, | |||
3724 | ret = 1; | 3876 | ret = 1; |
3725 | 3877 | ||
3726 | } else { | 3878 | } else { |
3727 | /* this gets tricky, we have to make sure the journal list in | 3879 | /* |
3880 | * this gets tricky, we have to make sure the journal list in | ||
3728 | * the inode still exists. We know the list is still around | 3881 | * the inode still exists. We know the list is still around |
3729 | * if we've got a larger transaction id than the oldest list | 3882 | * if we've got a larger transaction id than the oldest list |
3730 | */ | 3883 | */ |
@@ -3751,7 +3904,8 @@ int reiserfs_commit_for_inode(struct inode *inode) | |||
3751 | unsigned int id = REISERFS_I(inode)->i_trans_id; | 3904 | unsigned int id = REISERFS_I(inode)->i_trans_id; |
3752 | struct reiserfs_journal_list *jl = REISERFS_I(inode)->i_jl; | 3905 | struct reiserfs_journal_list *jl = REISERFS_I(inode)->i_jl; |
3753 | 3906 | ||
3754 | /* for the whole inode, assume unset id means it was | 3907 | /* |
3908 | * for the whole inode, assume unset id means it was | ||
3755 | * changed in the current transaction. More conservative | 3909 | * changed in the current transaction. More conservative |
3756 | */ | 3910 | */ |
3757 | if (!id || !jl) { | 3911 | if (!id || !jl) { |
@@ -3789,12 +3943,11 @@ void reiserfs_restore_prepared_buffer(struct super_block *sb, | |||
3789 | 3943 | ||
3790 | extern struct tree_balance *cur_tb; | 3944 | extern struct tree_balance *cur_tb; |
3791 | /* | 3945 | /* |
3792 | ** before we can change a metadata block, we have to make sure it won't | 3946 | * before we can change a metadata block, we have to make sure it won't |
3793 | ** be written to disk while we are altering it. So, we must: | 3947 | * be written to disk while we are altering it. So, we must: |
3794 | ** clean it | 3948 | * clean it |
3795 | ** wait on it. | 3949 | * wait on it. |
3796 | ** | 3950 | */ |
3797 | */ | ||
3798 | int reiserfs_prepare_for_journal(struct super_block *sb, | 3951 | int reiserfs_prepare_for_journal(struct super_block *sb, |
3799 | struct buffer_head *bh, int wait) | 3952 | struct buffer_head *bh, int wait) |
3800 | { | 3953 | { |
@@ -3815,15 +3968,15 @@ int reiserfs_prepare_for_journal(struct super_block *sb, | |||
3815 | } | 3968 | } |
3816 | 3969 | ||
3817 | /* | 3970 | /* |
3818 | ** long and ugly. If flush, will not return until all commit | 3971 | * long and ugly. If flush, will not return until all commit |
3819 | ** blocks and all real buffers in the trans are on disk. | 3972 | * blocks and all real buffers in the trans are on disk. |
3820 | ** If no_async, won't return until all commit blocks are on disk. | 3973 | * If no_async, won't return until all commit blocks are on disk. |
3821 | ** | 3974 | * |
3822 | ** keep reading, there are comments as you go along | 3975 | * keep reading, there are comments as you go along |
3823 | ** | 3976 | * |
3824 | ** If the journal is aborted, we just clean up. Things like flushing | 3977 | * If the journal is aborted, we just clean up. Things like flushing |
3825 | ** journal lists, etc just won't happen. | 3978 | * journal lists, etc just won't happen. |
3826 | */ | 3979 | */ |
3827 | static int do_journal_end(struct reiserfs_transaction_handle *th, | 3980 | static int do_journal_end(struct reiserfs_transaction_handle *th, |
3828 | struct super_block *sb, unsigned long nblocks, | 3981 | struct super_block *sb, unsigned long nblocks, |
3829 | int flags) | 3982 | int flags) |
@@ -3850,8 +4003,10 @@ static int do_journal_end(struct reiserfs_transaction_handle *th, | |||
3850 | BUG_ON(th->t_refcount > 1); | 4003 | BUG_ON(th->t_refcount > 1); |
3851 | BUG_ON(!th->t_trans_id); | 4004 | BUG_ON(!th->t_trans_id); |
3852 | 4005 | ||
3853 | /* protect flush_older_commits from doing mistakes if the | 4006 | /* |
3854 | transaction ID counter gets overflowed. */ | 4007 | * protect flush_older_commits from doing mistakes if the |
4008 | * transaction ID counter gets overflowed. | ||
4009 | */ | ||
3855 | if (th->t_trans_id == ~0U) | 4010 | if (th->t_trans_id == ~0U) |
3856 | flags |= FLUSH_ALL | COMMIT_NOW | WAIT; | 4011 | flags |= FLUSH_ALL | COMMIT_NOW | WAIT; |
3857 | flush = flags & FLUSH_ALL; | 4012 | flush = flags & FLUSH_ALL; |
@@ -3875,8 +4030,10 @@ static int do_journal_end(struct reiserfs_transaction_handle *th, | |||
3875 | wait_on_commit = 1; | 4030 | wait_on_commit = 1; |
3876 | } | 4031 | } |
3877 | 4032 | ||
3878 | /* check_journal_end locks the journal, and unlocks if it does not return 1 | 4033 | /* |
3879 | ** it tells us if we should continue with the journal_end, or just return | 4034 | * check_journal_end locks the journal, and unlocks if it does |
4035 | * not return 1 it tells us if we should continue with the | ||
4036 | * journal_end, or just return | ||
3880 | */ | 4037 | */ |
3881 | if (!check_journal_end(th, sb, nblocks, flags)) { | 4038 | if (!check_journal_end(th, sb, nblocks, flags)) { |
3882 | reiserfs_schedule_old_flush(sb); | 4039 | reiserfs_schedule_old_flush(sb); |
@@ -3891,19 +4048,23 @@ static int do_journal_end(struct reiserfs_transaction_handle *th, | |||
3891 | } | 4048 | } |
3892 | 4049 | ||
3893 | /* | 4050 | /* |
3894 | ** j must wait means we have to flush the log blocks, and the real blocks for | 4051 | * j must wait means we have to flush the log blocks, and the |
3895 | ** this transaction | 4052 | * real blocks for this transaction |
3896 | */ | 4053 | */ |
3897 | if (journal->j_must_wait > 0) { | 4054 | if (journal->j_must_wait > 0) { |
3898 | flush = 1; | 4055 | flush = 1; |
3899 | } | 4056 | } |
3900 | #ifdef REISERFS_PREALLOCATE | 4057 | #ifdef REISERFS_PREALLOCATE |
3901 | /* quota ops might need to nest, setup the journal_info pointer for them | 4058 | /* |
3902 | * and raise the refcount so that it is > 0. */ | 4059 | * quota ops might need to nest, setup the journal_info pointer |
4060 | * for them and raise the refcount so that it is > 0. | ||
4061 | */ | ||
3903 | current->journal_info = th; | 4062 | current->journal_info = th; |
3904 | th->t_refcount++; | 4063 | th->t_refcount++; |
3905 | reiserfs_discard_all_prealloc(th); /* it should not involve new blocks into | 4064 | |
3906 | * the transaction */ | 4065 | /* it should not involve new blocks into the transaction */ |
4066 | reiserfs_discard_all_prealloc(th); | ||
4067 | |||
3907 | th->t_refcount--; | 4068 | th->t_refcount--; |
3908 | current->journal_info = th->t_handle_save; | 4069 | current->journal_info = th->t_handle_save; |
3909 | #endif | 4070 | #endif |
@@ -3919,7 +4080,10 @@ static int do_journal_end(struct reiserfs_transaction_handle *th, | |||
3919 | memcpy(get_journal_desc_magic(d_bh), JOURNAL_DESC_MAGIC, 8); | 4080 | memcpy(get_journal_desc_magic(d_bh), JOURNAL_DESC_MAGIC, 8); |
3920 | set_desc_trans_id(desc, journal->j_trans_id); | 4081 | set_desc_trans_id(desc, journal->j_trans_id); |
3921 | 4082 | ||
3922 | /* setup commit block. Don't write (keep it clean too) this one until after everyone else is written */ | 4083 | /* |
4084 | * setup commit block. Don't write (keep it clean too) this one | ||
4085 | * until after everyone else is written | ||
4086 | */ | ||
3923 | c_bh = journal_getblk(sb, SB_ONDISK_JOURNAL_1st_BLOCK(sb) + | 4087 | c_bh = journal_getblk(sb, SB_ONDISK_JOURNAL_1st_BLOCK(sb) + |
3924 | ((journal->j_start + journal->j_len + | 4088 | ((journal->j_start + journal->j_len + |
3925 | 1) % SB_ONDISK_JOURNAL_SIZE(sb))); | 4089 | 1) % SB_ONDISK_JOURNAL_SIZE(sb))); |
@@ -3931,7 +4095,8 @@ static int do_journal_end(struct reiserfs_transaction_handle *th, | |||
3931 | /* init this journal list */ | 4095 | /* init this journal list */ |
3932 | jl = journal->j_current_jl; | 4096 | jl = journal->j_current_jl; |
3933 | 4097 | ||
3934 | /* we lock the commit before doing anything because | 4098 | /* |
4099 | * we lock the commit before doing anything because | ||
3935 | * we want to make sure nobody tries to run flush_commit_list until | 4100 | * we want to make sure nobody tries to run flush_commit_list until |
3936 | * the new transaction is fully setup, and we've already flushed the | 4101 | * the new transaction is fully setup, and we've already flushed the |
3937 | * ordered bh list | 4102 | * ordered bh list |
@@ -3951,9 +4116,10 @@ static int do_journal_end(struct reiserfs_transaction_handle *th, | |||
3951 | atomic_set(&jl->j_commit_left, journal->j_len + 2); | 4116 | atomic_set(&jl->j_commit_left, journal->j_len + 2); |
3952 | jl->j_realblock = NULL; | 4117 | jl->j_realblock = NULL; |
3953 | 4118 | ||
3954 | /* The ENTIRE FOR LOOP MUST not cause schedule to occur. | 4119 | /* |
3955 | ** for each real block, add it to the journal list hash, | 4120 | * The ENTIRE FOR LOOP MUST not cause schedule to occur. |
3956 | ** copy into real block index array in the commit or desc block | 4121 | * for each real block, add it to the journal list hash, |
4122 | * copy into real block index array in the commit or desc block | ||
3957 | */ | 4123 | */ |
3958 | trans_half = journal_trans_half(sb->s_blocksize); | 4124 | trans_half = journal_trans_half(sb->s_blocksize); |
3959 | for (i = 0, cn = journal->j_first; cn; cn = cn->next, i++) { | 4125 | for (i = 0, cn = journal->j_first; cn; cn = cn->next, i++) { |
@@ -3972,9 +4138,10 @@ static int do_journal_end(struct reiserfs_transaction_handle *th, | |||
3972 | last_cn->next = jl_cn; | 4138 | last_cn->next = jl_cn; |
3973 | } | 4139 | } |
3974 | last_cn = jl_cn; | 4140 | last_cn = jl_cn; |
3975 | /* make sure the block we are trying to log is not a block | 4141 | /* |
3976 | of journal or reserved area */ | 4142 | * make sure the block we are trying to log |
3977 | 4143 | * is not a block of journal or reserved area | |
4144 | */ | ||
3978 | if (is_block_in_log_or_reserved_area | 4145 | if (is_block_in_log_or_reserved_area |
3979 | (sb, cn->bh->b_blocknr)) { | 4146 | (sb, cn->bh->b_blocknr)) { |
3980 | reiserfs_panic(sb, "journal-2332", | 4147 | reiserfs_panic(sb, "journal-2332", |
@@ -4004,19 +4171,26 @@ static int do_journal_end(struct reiserfs_transaction_handle *th, | |||
4004 | set_desc_trans_id(desc, journal->j_trans_id); | 4171 | set_desc_trans_id(desc, journal->j_trans_id); |
4005 | set_commit_trans_len(commit, journal->j_len); | 4172 | set_commit_trans_len(commit, journal->j_len); |
4006 | 4173 | ||
4007 | /* special check in case all buffers in the journal were marked for not logging */ | 4174 | /* |
4175 | * special check in case all buffers in the journal | ||
4176 | * were marked for not logging | ||
4177 | */ | ||
4008 | BUG_ON(journal->j_len == 0); | 4178 | BUG_ON(journal->j_len == 0); |
4009 | 4179 | ||
4010 | /* we're about to dirty all the log blocks, mark the description block | 4180 | /* |
4181 | * we're about to dirty all the log blocks, mark the description block | ||
4011 | * dirty now too. Don't mark the commit block dirty until all the | 4182 | * dirty now too. Don't mark the commit block dirty until all the |
4012 | * others are on disk | 4183 | * others are on disk |
4013 | */ | 4184 | */ |
4014 | mark_buffer_dirty(d_bh); | 4185 | mark_buffer_dirty(d_bh); |
4015 | 4186 | ||
4016 | /* first data block is j_start + 1, so add one to cur_write_start wherever you use it */ | 4187 | /* |
4188 | * first data block is j_start + 1, so add one to | ||
4189 | * cur_write_start wherever you use it | ||
4190 | */ | ||
4017 | cur_write_start = journal->j_start; | 4191 | cur_write_start = journal->j_start; |
4018 | cn = journal->j_first; | 4192 | cn = journal->j_first; |
4019 | jindex = 1; /* start at one so we don't get the desc again */ | 4193 | jindex = 1; /* start at one so we don't get the desc again */ |
4020 | while (cn) { | 4194 | while (cn) { |
4021 | clear_buffer_journal_new(cn->bh); | 4195 | clear_buffer_journal_new(cn->bh); |
4022 | /* copy all the real blocks into log area. dirty log blocks */ | 4196 | /* copy all the real blocks into log area. dirty log blocks */ |
@@ -4042,7 +4216,10 @@ static int do_journal_end(struct reiserfs_transaction_handle *th, | |||
4042 | set_buffer_journal_dirty(cn->bh); | 4216 | set_buffer_journal_dirty(cn->bh); |
4043 | clear_buffer_journaled(cn->bh); | 4217 | clear_buffer_journaled(cn->bh); |
4044 | } else { | 4218 | } else { |
4045 | /* JDirty cleared sometime during transaction. don't log this one */ | 4219 | /* |
4220 | * JDirty cleared sometime during transaction. | ||
4221 | * don't log this one | ||
4222 | */ | ||
4046 | reiserfs_warning(sb, "journal-2048", | 4223 | reiserfs_warning(sb, "journal-2048", |
4047 | "BAD, buffer in journal hash, " | 4224 | "BAD, buffer in journal hash, " |
4048 | "but not JDirty!"); | 4225 | "but not JDirty!"); |
@@ -4054,9 +4231,10 @@ static int do_journal_end(struct reiserfs_transaction_handle *th, | |||
4054 | reiserfs_cond_resched(sb); | 4231 | reiserfs_cond_resched(sb); |
4055 | } | 4232 | } |
4056 | 4233 | ||
4057 | /* we are done with both the c_bh and d_bh, but | 4234 | /* |
4058 | ** c_bh must be written after all other commit blocks, | 4235 | * we are done with both the c_bh and d_bh, but |
4059 | ** so we dirty/relse c_bh in flush_commit_list, with commit_left <= 1. | 4236 | * c_bh must be written after all other commit blocks, |
4237 | * so we dirty/relse c_bh in flush_commit_list, with commit_left <= 1. | ||
4060 | */ | 4238 | */ |
4061 | 4239 | ||
4062 | journal->j_current_jl = alloc_journal_list(sb); | 4240 | journal->j_current_jl = alloc_journal_list(sb); |
@@ -4087,15 +4265,18 @@ static int do_journal_end(struct reiserfs_transaction_handle *th, | |||
4087 | journal->j_next_async_flush = 0; | 4265 | journal->j_next_async_flush = 0; |
4088 | init_journal_hash(sb); | 4266 | init_journal_hash(sb); |
4089 | 4267 | ||
4090 | // make sure reiserfs_add_jh sees the new current_jl before we | 4268 | /* |
4091 | // write out the tails | 4269 | * make sure reiserfs_add_jh sees the new current_jl before we |
4270 | * write out the tails | ||
4271 | */ | ||
4092 | smp_mb(); | 4272 | smp_mb(); |
4093 | 4273 | ||
4094 | /* tail conversion targets have to hit the disk before we end the | 4274 | /* |
4275 | * tail conversion targets have to hit the disk before we end the | ||
4095 | * transaction. Otherwise a later transaction might repack the tail | 4276 | * transaction. Otherwise a later transaction might repack the tail |
4096 | * before this transaction commits, leaving the data block unflushed and | 4277 | * before this transaction commits, leaving the data block unflushed |
4097 | * clean, if we crash before the later transaction commits, the data block | 4278 | * and clean, if we crash before the later transaction commits, the |
4098 | * is lost. | 4279 | * data block is lost. |
4099 | */ | 4280 | */ |
4100 | if (!list_empty(&jl->j_tail_bh_list)) { | 4281 | if (!list_empty(&jl->j_tail_bh_list)) { |
4101 | depth = reiserfs_write_unlock_nested(sb); | 4282 | depth = reiserfs_write_unlock_nested(sb); |
@@ -4106,12 +4287,13 @@ static int do_journal_end(struct reiserfs_transaction_handle *th, | |||
4106 | BUG_ON(!list_empty(&jl->j_tail_bh_list)); | 4287 | BUG_ON(!list_empty(&jl->j_tail_bh_list)); |
4107 | mutex_unlock(&jl->j_commit_mutex); | 4288 | mutex_unlock(&jl->j_commit_mutex); |
4108 | 4289 | ||
4109 | /* honor the flush wishes from the caller, simple commits can | 4290 | /* |
4110 | ** be done outside the journal lock, they are done below | 4291 | * honor the flush wishes from the caller, simple commits can |
4111 | ** | 4292 | * be done outside the journal lock, they are done below |
4112 | ** if we don't flush the commit list right now, we put it into | 4293 | * |
4113 | ** the work queue so the people waiting on the async progress work | 4294 | * if we don't flush the commit list right now, we put it into |
4114 | ** queue don't wait for this proc to flush journal lists and such. | 4295 | * the work queue so the people waiting on the async progress work |
4296 | * queue don't wait for this proc to flush journal lists and such. | ||
4115 | */ | 4297 | */ |
4116 | if (flush) { | 4298 | if (flush) { |
4117 | flush_commit_list(sb, jl, 1); | 4299 | flush_commit_list(sb, jl, 1); |
@@ -4120,9 +4302,10 @@ static int do_journal_end(struct reiserfs_transaction_handle *th, | |||
4120 | queue_delayed_work(REISERFS_SB(sb)->commit_wq, | 4302 | queue_delayed_work(REISERFS_SB(sb)->commit_wq, |
4121 | &journal->j_work, HZ / 10); | 4303 | &journal->j_work, HZ / 10); |
4122 | 4304 | ||
4123 | /* if the next transaction has any chance of wrapping, flush | 4305 | /* |
4124 | ** transactions that might get overwritten. If any journal lists are very | 4306 | * if the next transaction has any chance of wrapping, flush |
4125 | ** old flush them as well. | 4307 | * transactions that might get overwritten. If any journal lists |
4308 | * are very old flush them as well. | ||
4126 | */ | 4309 | */ |
4127 | first_jl: | 4310 | first_jl: |
4128 | list_for_each_safe(entry, safe, &journal->j_journal_list) { | 4311 | list_for_each_safe(entry, safe, &journal->j_journal_list) { |
@@ -4135,8 +4318,10 @@ static int do_journal_end(struct reiserfs_transaction_handle *th, | |||
4135 | } else if ((journal->j_start + | 4318 | } else if ((journal->j_start + |
4136 | journal->j_trans_max + 1) < | 4319 | journal->j_trans_max + 1) < |
4137 | SB_ONDISK_JOURNAL_SIZE(sb)) { | 4320 | SB_ONDISK_JOURNAL_SIZE(sb)) { |
4138 | /* if we don't cross into the next transaction and we don't | 4321 | /* |
4139 | * wrap, there is no way we can overlap any later transactions | 4322 | * if we don't cross into the next |
4323 | * transaction and we don't wrap, there is | ||
4324 | * no way we can overlap any later transactions | ||
4140 | * break now | 4325 | * break now |
4141 | */ | 4326 | */ |
4142 | break; | 4327 | break; |
@@ -4150,10 +4335,12 @@ static int do_journal_end(struct reiserfs_transaction_handle *th, | |||
4150 | flush_used_journal_lists(sb, temp_jl); | 4335 | flush_used_journal_lists(sb, temp_jl); |
4151 | goto first_jl; | 4336 | goto first_jl; |
4152 | } else { | 4337 | } else { |
4153 | /* we don't overlap anything from out start to the end of the | 4338 | /* |
4154 | * log, and our wrapped portion doesn't overlap anything at | 4339 | * we don't overlap anything from out start |
4155 | * the start of the log. We can break | 4340 | * to the end of the log, and our wrapped |
4156 | */ | 4341 | * portion doesn't overlap anything at |
4342 | * the start of the log. We can break | ||
4343 | */ | ||
4157 | break; | 4344 | break; |
4158 | } | 4345 | } |
4159 | } | 4346 | } |
@@ -4181,9 +4368,11 @@ static int do_journal_end(struct reiserfs_transaction_handle *th, | |||
4181 | reiserfs_check_lock_depth(sb, "journal end2"); | 4368 | reiserfs_check_lock_depth(sb, "journal end2"); |
4182 | 4369 | ||
4183 | memset(th, 0, sizeof(*th)); | 4370 | memset(th, 0, sizeof(*th)); |
4184 | /* Re-set th->t_super, so we can properly keep track of how many | 4371 | /* |
4372 | * Re-set th->t_super, so we can properly keep track of how many | ||
4185 | * persistent transactions there are. We need to do this so if this | 4373 | * persistent transactions there are. We need to do this so if this |
4186 | * call is part of a failed restart_transaction, we can free it later */ | 4374 | * call is part of a failed restart_transaction, we can free it later |
4375 | */ | ||
4187 | th->t_super = sb; | 4376 | th->t_super = sb; |
4188 | 4377 | ||
4189 | return journal->j_errno; | 4378 | return journal->j_errno; |
diff --git a/fs/reiserfs/lbalance.c b/fs/reiserfs/lbalance.c index b46399d98f84..d48a9e7507a1 100644 --- a/fs/reiserfs/lbalance.c +++ b/fs/reiserfs/lbalance.c | |||
@@ -8,28 +8,21 @@ | |||
8 | #include "reiserfs.h" | 8 | #include "reiserfs.h" |
9 | #include <linux/buffer_head.h> | 9 | #include <linux/buffer_head.h> |
10 | 10 | ||
11 | /* these are used in do_balance.c */ | 11 | /* |
12 | 12 | * copy copy_count entries from source directory item to dest buffer | |
13 | /* leaf_move_items | 13 | * (creating new item if needed) |
14 | leaf_shift_left | 14 | */ |
15 | leaf_shift_right | ||
16 | leaf_delete_items | ||
17 | leaf_insert_into_buf | ||
18 | leaf_paste_in_buffer | ||
19 | leaf_cut_from_buffer | ||
20 | leaf_paste_entries | ||
21 | */ | ||
22 | |||
23 | /* copy copy_count entries from source directory item to dest buffer (creating new item if needed) */ | ||
24 | static void leaf_copy_dir_entries(struct buffer_info *dest_bi, | 15 | static void leaf_copy_dir_entries(struct buffer_info *dest_bi, |
25 | struct buffer_head *source, int last_first, | 16 | struct buffer_head *source, int last_first, |
26 | int item_num, int from, int copy_count) | 17 | int item_num, int from, int copy_count) |
27 | { | 18 | { |
28 | struct buffer_head *dest = dest_bi->bi_bh; | 19 | struct buffer_head *dest = dest_bi->bi_bh; |
29 | int item_num_in_dest; /* either the number of target item, | 20 | /* |
30 | or if we must create a new item, | 21 | * either the number of target item, or if we must create a |
31 | the number of the item we will | 22 | * new item, the number of the item we will create it next to |
32 | create it next to */ | 23 | */ |
24 | int item_num_in_dest; | ||
25 | |||
33 | struct item_head *ih; | 26 | struct item_head *ih; |
34 | struct reiserfs_de_head *deh; | 27 | struct reiserfs_de_head *deh; |
35 | int copy_records_len; /* length of all records in item to be copied */ | 28 | int copy_records_len; /* length of all records in item to be copied */ |
@@ -39,7 +32,10 @@ static void leaf_copy_dir_entries(struct buffer_info *dest_bi, | |||
39 | 32 | ||
40 | RFALSE(!is_direntry_le_ih(ih), "vs-10000: item must be directory item"); | 33 | RFALSE(!is_direntry_le_ih(ih), "vs-10000: item must be directory item"); |
41 | 34 | ||
42 | /* length of all record to be copied and first byte of the last of them */ | 35 | /* |
36 | * length of all record to be copied and first byte of | ||
37 | * the last of them | ||
38 | */ | ||
43 | deh = B_I_DEH(source, ih); | 39 | deh = B_I_DEH(source, ih); |
44 | if (copy_count) { | 40 | if (copy_count) { |
45 | copy_records_len = (from ? deh_location(&(deh[from - 1])) : | 41 | copy_records_len = (from ? deh_location(&(deh[from - 1])) : |
@@ -59,7 +55,10 @@ static void leaf_copy_dir_entries(struct buffer_info *dest_bi, | |||
59 | LAST_TO_FIRST) ? ((B_NR_ITEMS(dest)) ? 0 : -1) : (B_NR_ITEMS(dest) | 55 | LAST_TO_FIRST) ? ((B_NR_ITEMS(dest)) ? 0 : -1) : (B_NR_ITEMS(dest) |
60 | - 1); | 56 | - 1); |
61 | 57 | ||
62 | /* if there are no items in dest or the first/last item in dest is not item of the same directory */ | 58 | /* |
59 | * if there are no items in dest or the first/last item in | ||
60 | * dest is not item of the same directory | ||
61 | */ | ||
63 | if ((item_num_in_dest == -1) || | 62 | if ((item_num_in_dest == -1) || |
64 | (last_first == FIRST_TO_LAST && le_ih_k_offset(ih) == DOT_OFFSET) || | 63 | (last_first == FIRST_TO_LAST && le_ih_k_offset(ih) == DOT_OFFSET) || |
65 | (last_first == LAST_TO_FIRST | 64 | (last_first == LAST_TO_FIRST |
@@ -83,11 +82,17 @@ static void leaf_copy_dir_entries(struct buffer_info *dest_bi, | |||
83 | if (from < ih_entry_count(ih)) { | 82 | if (from < ih_entry_count(ih)) { |
84 | set_le_ih_k_offset(&new_ih, | 83 | set_le_ih_k_offset(&new_ih, |
85 | deh_offset(&(deh[from]))); | 84 | deh_offset(&(deh[from]))); |
86 | /*memcpy (&new_ih.ih_key.k_offset, &deh[from].deh_offset, SHORT_KEY_SIZE); */ | ||
87 | } else { | 85 | } else { |
88 | /* no entries will be copied to this item in this function */ | 86 | /* |
87 | * no entries will be copied to this | ||
88 | * item in this function | ||
89 | */ | ||
89 | set_le_ih_k_offset(&new_ih, U32_MAX); | 90 | set_le_ih_k_offset(&new_ih, U32_MAX); |
90 | /* this item is not yet valid, but we want I_IS_DIRECTORY_ITEM to return 1 for it, so we -1 */ | 91 | /* |
92 | * this item is not yet valid, but we | ||
93 | * want I_IS_DIRECTORY_ITEM to return 1 | ||
94 | * for it, so we -1 | ||
95 | */ | ||
91 | } | 96 | } |
92 | set_le_key_k_type(KEY_FORMAT_3_5, &(new_ih.ih_key), | 97 | set_le_key_k_type(KEY_FORMAT_3_5, &(new_ih.ih_key), |
93 | TYPE_DIRENTRY); | 98 | TYPE_DIRENTRY); |
@@ -119,30 +124,38 @@ static void leaf_copy_dir_entries(struct buffer_info *dest_bi, | |||
119 | DEH_SIZE * copy_count + copy_records_len); | 124 | DEH_SIZE * copy_count + copy_records_len); |
120 | } | 125 | } |
121 | 126 | ||
122 | /* Copy the first (if last_first == FIRST_TO_LAST) or last (last_first == LAST_TO_FIRST) item or | 127 | /* |
123 | part of it or nothing (see the return 0 below) from SOURCE to the end | 128 | * Copy the first (if last_first == FIRST_TO_LAST) or last |
124 | (if last_first) or beginning (!last_first) of the DEST */ | 129 | * (last_first == LAST_TO_FIRST) item or part of it or nothing |
130 | * (see the return 0 below) from SOURCE to the end (if last_first) | ||
131 | * or beginning (!last_first) of the DEST | ||
132 | */ | ||
125 | /* returns 1 if anything was copied, else 0 */ | 133 | /* returns 1 if anything was copied, else 0 */ |
126 | static int leaf_copy_boundary_item(struct buffer_info *dest_bi, | 134 | static int leaf_copy_boundary_item(struct buffer_info *dest_bi, |
127 | struct buffer_head *src, int last_first, | 135 | struct buffer_head *src, int last_first, |
128 | int bytes_or_entries) | 136 | int bytes_or_entries) |
129 | { | 137 | { |
130 | struct buffer_head *dest = dest_bi->bi_bh; | 138 | struct buffer_head *dest = dest_bi->bi_bh; |
131 | int dest_nr_item, src_nr_item; /* number of items in the source and destination buffers */ | 139 | /* number of items in the source and destination buffers */ |
140 | int dest_nr_item, src_nr_item; | ||
132 | struct item_head *ih; | 141 | struct item_head *ih; |
133 | struct item_head *dih; | 142 | struct item_head *dih; |
134 | 143 | ||
135 | dest_nr_item = B_NR_ITEMS(dest); | 144 | dest_nr_item = B_NR_ITEMS(dest); |
136 | 145 | ||
146 | /* | ||
147 | * if ( DEST is empty or first item of SOURCE and last item of | ||
148 | * DEST are the items of different objects or of different types ) | ||
149 | * then there is no need to treat this item differently from the | ||
150 | * other items that we copy, so we return | ||
151 | */ | ||
137 | if (last_first == FIRST_TO_LAST) { | 152 | if (last_first == FIRST_TO_LAST) { |
138 | /* if ( DEST is empty or first item of SOURCE and last item of DEST are the items of different objects | ||
139 | or of different types ) then there is no need to treat this item differently from the other items | ||
140 | that we copy, so we return */ | ||
141 | ih = item_head(src, 0); | 153 | ih = item_head(src, 0); |
142 | dih = item_head(dest, dest_nr_item - 1); | 154 | dih = item_head(dest, dest_nr_item - 1); |
155 | |||
156 | /* there is nothing to merge */ | ||
143 | if (!dest_nr_item | 157 | if (!dest_nr_item |
144 | || (!op_is_left_mergeable(&(ih->ih_key), src->b_size))) | 158 | || (!op_is_left_mergeable(&(ih->ih_key), src->b_size))) |
145 | /* there is nothing to merge */ | ||
146 | return 0; | 159 | return 0; |
147 | 160 | ||
148 | RFALSE(!ih_item_len(ih), | 161 | RFALSE(!ih_item_len(ih), |
@@ -157,8 +170,11 @@ static int leaf_copy_boundary_item(struct buffer_info *dest_bi, | |||
157 | return 1; | 170 | return 1; |
158 | } | 171 | } |
159 | 172 | ||
160 | /* copy part of the body of the first item of SOURCE to the end of the body of the last item of the DEST | 173 | /* |
161 | part defined by 'bytes_or_entries'; if bytes_or_entries == -1 copy whole body; don't create new item header | 174 | * copy part of the body of the first item of SOURCE |
175 | * to the end of the body of the last item of the DEST | ||
176 | * part defined by 'bytes_or_entries'; if bytes_or_entries | ||
177 | * == -1 copy whole body; don't create new item header | ||
162 | */ | 178 | */ |
163 | if (bytes_or_entries == -1) | 179 | if (bytes_or_entries == -1) |
164 | bytes_or_entries = ih_item_len(ih); | 180 | bytes_or_entries = ih_item_len(ih); |
@@ -176,8 +192,10 @@ static int leaf_copy_boundary_item(struct buffer_info *dest_bi, | |||
176 | } | 192 | } |
177 | #endif | 193 | #endif |
178 | 194 | ||
179 | /* merge first item (or its part) of src buffer with the last | 195 | /* |
180 | item of dest buffer. Both are of the same file */ | 196 | * merge first item (or its part) of src buffer with the last |
197 | * item of dest buffer. Both are of the same file | ||
198 | */ | ||
181 | leaf_paste_in_buffer(dest_bi, | 199 | leaf_paste_in_buffer(dest_bi, |
182 | dest_nr_item - 1, ih_item_len(dih), | 200 | dest_nr_item - 1, ih_item_len(dih), |
183 | bytes_or_entries, ih_item_body(src, ih), 0); | 201 | bytes_or_entries, ih_item_body(src, ih), 0); |
@@ -195,8 +213,9 @@ static int leaf_copy_boundary_item(struct buffer_info *dest_bi, | |||
195 | 213 | ||
196 | /* copy boundary item to right (last_first == LAST_TO_FIRST) */ | 214 | /* copy boundary item to right (last_first == LAST_TO_FIRST) */ |
197 | 215 | ||
198 | /* ( DEST is empty or last item of SOURCE and first item of DEST | 216 | /* |
199 | are the items of different object or of different types ) | 217 | * (DEST is empty or last item of SOURCE and first item of DEST |
218 | * are the items of different object or of different types) | ||
200 | */ | 219 | */ |
201 | src_nr_item = B_NR_ITEMS(src); | 220 | src_nr_item = B_NR_ITEMS(src); |
202 | ih = item_head(src, src_nr_item - 1); | 221 | ih = item_head(src, src_nr_item - 1); |
@@ -206,8 +225,11 @@ static int leaf_copy_boundary_item(struct buffer_info *dest_bi, | |||
206 | return 0; | 225 | return 0; |
207 | 226 | ||
208 | if (is_direntry_le_ih(ih)) { | 227 | if (is_direntry_le_ih(ih)) { |
228 | /* | ||
229 | * bytes_or_entries = entries number in last | ||
230 | * item body of SOURCE | ||
231 | */ | ||
209 | if (bytes_or_entries == -1) | 232 | if (bytes_or_entries == -1) |
210 | /* bytes_or_entries = entries number in last item body of SOURCE */ | ||
211 | bytes_or_entries = ih_entry_count(ih); | 233 | bytes_or_entries = ih_entry_count(ih); |
212 | 234 | ||
213 | leaf_copy_dir_entries(dest_bi, src, LAST_TO_FIRST, | 235 | leaf_copy_dir_entries(dest_bi, src, LAST_TO_FIRST, |
@@ -217,9 +239,11 @@ static int leaf_copy_boundary_item(struct buffer_info *dest_bi, | |||
217 | return 1; | 239 | return 1; |
218 | } | 240 | } |
219 | 241 | ||
220 | /* copy part of the body of the last item of SOURCE to the begin of the body of the first item of the DEST; | 242 | /* |
221 | part defined by 'bytes_or_entries'; if byte_or_entriess == -1 copy whole body; change first item key of the DEST; | 243 | * copy part of the body of the last item of SOURCE to the |
222 | don't create new item header | 244 | * begin of the body of the first item of the DEST; part defined |
245 | * by 'bytes_or_entries'; if byte_or_entriess == -1 copy whole body; | ||
246 | * change first item key of the DEST; don't create new item header | ||
223 | */ | 247 | */ |
224 | 248 | ||
225 | RFALSE(is_indirect_le_ih(ih) && get_ih_free_space(ih), | 249 | RFALSE(is_indirect_le_ih(ih) && get_ih_free_space(ih), |
@@ -276,9 +300,12 @@ static int leaf_copy_boundary_item(struct buffer_info *dest_bi, | |||
276 | return 1; | 300 | return 1; |
277 | } | 301 | } |
278 | 302 | ||
279 | /* copy cpy_mun items from buffer src to buffer dest | 303 | /* |
280 | * last_first == FIRST_TO_LAST means, that we copy cpy_num items beginning from first-th item in src to tail of dest | 304 | * copy cpy_mun items from buffer src to buffer dest |
281 | * last_first == LAST_TO_FIRST means, that we copy cpy_num items beginning from first-th item in src to head of dest | 305 | * last_first == FIRST_TO_LAST means, that we copy cpy_num items beginning |
306 | * from first-th item in src to tail of dest | ||
307 | * last_first == LAST_TO_FIRST means, that we copy cpy_num items beginning | ||
308 | * from first-th item in src to head of dest | ||
282 | */ | 309 | */ |
283 | static void leaf_copy_items_entirely(struct buffer_info *dest_bi, | 310 | static void leaf_copy_items_entirely(struct buffer_info *dest_bi, |
284 | struct buffer_head *src, int last_first, | 311 | struct buffer_head *src, int last_first, |
@@ -311,7 +338,10 @@ static void leaf_copy_items_entirely(struct buffer_info *dest_bi, | |||
311 | nr = blkh_nr_item(blkh); | 338 | nr = blkh_nr_item(blkh); |
312 | free_space = blkh_free_space(blkh); | 339 | free_space = blkh_free_space(blkh); |
313 | 340 | ||
314 | /* we will insert items before 0-th or nr-th item in dest buffer. It depends of last_first parameter */ | 341 | /* |
342 | * we will insert items before 0-th or nr-th item in dest buffer. | ||
343 | * It depends of last_first parameter | ||
344 | */ | ||
315 | dest_before = (last_first == LAST_TO_FIRST) ? 0 : nr; | 345 | dest_before = (last_first == LAST_TO_FIRST) ? 0 : nr; |
316 | 346 | ||
317 | /* location of head of first new item */ | 347 | /* location of head of first new item */ |
@@ -377,8 +407,10 @@ static void leaf_copy_items_entirely(struct buffer_info *dest_bi, | |||
377 | } | 407 | } |
378 | } | 408 | } |
379 | 409 | ||
380 | /* This function splits the (liquid) item into two items (useful when | 410 | /* |
381 | shifting part of an item into another node.) */ | 411 | * This function splits the (liquid) item into two items (useful when |
412 | * shifting part of an item into another node.) | ||
413 | */ | ||
382 | static void leaf_item_bottle(struct buffer_info *dest_bi, | 414 | static void leaf_item_bottle(struct buffer_info *dest_bi, |
383 | struct buffer_head *src, int last_first, | 415 | struct buffer_head *src, int last_first, |
384 | int item_num, int cpy_bytes) | 416 | int item_num, int cpy_bytes) |
@@ -390,7 +422,10 @@ static void leaf_item_bottle(struct buffer_info *dest_bi, | |||
390 | "vs-10170: bytes == - 1 means: do not split item"); | 422 | "vs-10170: bytes == - 1 means: do not split item"); |
391 | 423 | ||
392 | if (last_first == FIRST_TO_LAST) { | 424 | if (last_first == FIRST_TO_LAST) { |
393 | /* if ( if item in position item_num in buffer SOURCE is directory item ) */ | 425 | /* |
426 | * if ( if item in position item_num in buffer SOURCE | ||
427 | * is directory item ) | ||
428 | */ | ||
394 | ih = item_head(src, item_num); | 429 | ih = item_head(src, item_num); |
395 | if (is_direntry_le_ih(ih)) | 430 | if (is_direntry_le_ih(ih)) |
396 | leaf_copy_dir_entries(dest_bi, src, FIRST_TO_LAST, | 431 | leaf_copy_dir_entries(dest_bi, src, FIRST_TO_LAST, |
@@ -398,9 +433,11 @@ static void leaf_item_bottle(struct buffer_info *dest_bi, | |||
398 | else { | 433 | else { |
399 | struct item_head n_ih; | 434 | struct item_head n_ih; |
400 | 435 | ||
401 | /* copy part of the body of the item number 'item_num' of SOURCE to the end of the DEST | 436 | /* |
402 | part defined by 'cpy_bytes'; create new item header; change old item_header (????); | 437 | * copy part of the body of the item number 'item_num' |
403 | n_ih = new item_header; | 438 | * of SOURCE to the end of the DEST part defined by |
439 | * 'cpy_bytes'; create new item header; change old | ||
440 | * item_header (????); n_ih = new item_header; | ||
404 | */ | 441 | */ |
405 | memcpy(&n_ih, ih, IH_SIZE); | 442 | memcpy(&n_ih, ih, IH_SIZE); |
406 | put_ih_item_len(&n_ih, cpy_bytes); | 443 | put_ih_item_len(&n_ih, cpy_bytes); |
@@ -419,7 +456,10 @@ static void leaf_item_bottle(struct buffer_info *dest_bi, | |||
419 | item_body(src, item_num), 0); | 456 | item_body(src, item_num), 0); |
420 | } | 457 | } |
421 | } else { | 458 | } else { |
422 | /* if ( if item in position item_num in buffer SOURCE is directory item ) */ | 459 | /* |
460 | * if ( if item in position item_num in buffer | ||
461 | * SOURCE is directory item ) | ||
462 | */ | ||
423 | ih = item_head(src, item_num); | 463 | ih = item_head(src, item_num); |
424 | if (is_direntry_le_ih(ih)) | 464 | if (is_direntry_le_ih(ih)) |
425 | leaf_copy_dir_entries(dest_bi, src, LAST_TO_FIRST, | 465 | leaf_copy_dir_entries(dest_bi, src, LAST_TO_FIRST, |
@@ -429,13 +469,16 @@ static void leaf_item_bottle(struct buffer_info *dest_bi, | |||
429 | else { | 469 | else { |
430 | struct item_head n_ih; | 470 | struct item_head n_ih; |
431 | 471 | ||
432 | /* copy part of the body of the item number 'item_num' of SOURCE to the begin of the DEST | 472 | /* |
433 | part defined by 'cpy_bytes'; create new item header; | 473 | * copy part of the body of the item number 'item_num' |
434 | n_ih = new item_header; | 474 | * of SOURCE to the begin of the DEST part defined by |
475 | * 'cpy_bytes'; create new item header; | ||
476 | * n_ih = new item_header; | ||
435 | */ | 477 | */ |
436 | memcpy(&n_ih, ih, SHORT_KEY_SIZE); | 478 | memcpy(&n_ih, ih, SHORT_KEY_SIZE); |
437 | 479 | ||
438 | n_ih.ih_version = ih->ih_version; /* JDM Endian safe, both le */ | 480 | /* Endian safe, both le */ |
481 | n_ih.ih_version = ih->ih_version; | ||
439 | 482 | ||
440 | if (is_direct_le_ih(ih)) { | 483 | if (is_direct_le_ih(ih)) { |
441 | set_le_ih_k_offset(&n_ih, | 484 | set_le_ih_k_offset(&n_ih, |
@@ -459,7 +502,8 @@ static void leaf_item_bottle(struct buffer_info *dest_bi, | |||
459 | /* set item length */ | 502 | /* set item length */ |
460 | put_ih_item_len(&n_ih, cpy_bytes); | 503 | put_ih_item_len(&n_ih, cpy_bytes); |
461 | 504 | ||
462 | n_ih.ih_version = ih->ih_version; /* JDM Endian safe, both le */ | 505 | /* Endian safe, both le */ |
506 | n_ih.ih_version = ih->ih_version; | ||
463 | 507 | ||
464 | leaf_insert_into_buf(dest_bi, 0, &n_ih, | 508 | leaf_insert_into_buf(dest_bi, 0, &n_ih, |
465 | item_body(src, item_num) + | 509 | item_body(src, item_num) + |
@@ -468,10 +512,12 @@ static void leaf_item_bottle(struct buffer_info *dest_bi, | |||
468 | } | 512 | } |
469 | } | 513 | } |
470 | 514 | ||
471 | /* If cpy_bytes equals minus one than copy cpy_num whole items from SOURCE to DEST. | 515 | /* |
472 | If cpy_bytes not equal to minus one than copy cpy_num-1 whole items from SOURCE to DEST. | 516 | * If cpy_bytes equals minus one than copy cpy_num whole items from SOURCE |
473 | From last item copy cpy_num bytes for regular item and cpy_num directory entries for | 517 | * to DEST. If cpy_bytes not equal to minus one than copy cpy_num-1 whole |
474 | directory item. */ | 518 | * items from SOURCE to DEST. From last item copy cpy_num bytes for regular |
519 | * item and cpy_num directory entries for directory item. | ||
520 | */ | ||
475 | static int leaf_copy_items(struct buffer_info *dest_bi, struct buffer_head *src, | 521 | static int leaf_copy_items(struct buffer_info *dest_bi, struct buffer_head *src, |
476 | int last_first, int cpy_num, int cpy_bytes) | 522 | int last_first, int cpy_num, int cpy_bytes) |
477 | { | 523 | { |
@@ -498,22 +544,34 @@ static int leaf_copy_items(struct buffer_info *dest_bi, struct buffer_head *src, | |||
498 | else | 544 | else |
499 | bytes = -1; | 545 | bytes = -1; |
500 | 546 | ||
501 | /* copy the first item or it part or nothing to the end of the DEST (i = leaf_copy_boundary_item(DEST,SOURCE,0,bytes)) */ | 547 | /* |
548 | * copy the first item or it part or nothing to the end of | ||
549 | * the DEST (i = leaf_copy_boundary_item(DEST,SOURCE,0,bytes)) | ||
550 | */ | ||
502 | i = leaf_copy_boundary_item(dest_bi, src, FIRST_TO_LAST, bytes); | 551 | i = leaf_copy_boundary_item(dest_bi, src, FIRST_TO_LAST, bytes); |
503 | cpy_num -= i; | 552 | cpy_num -= i; |
504 | if (cpy_num == 0) | 553 | if (cpy_num == 0) |
505 | return i; | 554 | return i; |
506 | pos += i; | 555 | pos += i; |
507 | if (cpy_bytes == -1) | 556 | if (cpy_bytes == -1) |
508 | /* copy first cpy_num items starting from position 'pos' of SOURCE to end of DEST */ | 557 | /* |
558 | * copy first cpy_num items starting from position | ||
559 | * 'pos' of SOURCE to end of DEST | ||
560 | */ | ||
509 | leaf_copy_items_entirely(dest_bi, src, FIRST_TO_LAST, | 561 | leaf_copy_items_entirely(dest_bi, src, FIRST_TO_LAST, |
510 | pos, cpy_num); | 562 | pos, cpy_num); |
511 | else { | 563 | else { |
512 | /* copy first cpy_num-1 items starting from position 'pos-1' of the SOURCE to the end of the DEST */ | 564 | /* |
565 | * copy first cpy_num-1 items starting from position | ||
566 | * 'pos-1' of the SOURCE to the end of the DEST | ||
567 | */ | ||
513 | leaf_copy_items_entirely(dest_bi, src, FIRST_TO_LAST, | 568 | leaf_copy_items_entirely(dest_bi, src, FIRST_TO_LAST, |
514 | pos, cpy_num - 1); | 569 | pos, cpy_num - 1); |
515 | 570 | ||
516 | /* copy part of the item which number is cpy_num+pos-1 to the end of the DEST */ | 571 | /* |
572 | * copy part of the item which number is | ||
573 | * cpy_num+pos-1 to the end of the DEST | ||
574 | */ | ||
517 | leaf_item_bottle(dest_bi, src, FIRST_TO_LAST, | 575 | leaf_item_bottle(dest_bi, src, FIRST_TO_LAST, |
518 | cpy_num + pos - 1, cpy_bytes); | 576 | cpy_num + pos - 1, cpy_bytes); |
519 | } | 577 | } |
@@ -525,7 +583,11 @@ static int leaf_copy_items(struct buffer_info *dest_bi, struct buffer_head *src, | |||
525 | else | 583 | else |
526 | bytes = -1; | 584 | bytes = -1; |
527 | 585 | ||
528 | /* copy the last item or it part or nothing to the begin of the DEST (i = leaf_copy_boundary_item(DEST,SOURCE,1,bytes)); */ | 586 | /* |
587 | * copy the last item or it part or nothing to the | ||
588 | * begin of the DEST | ||
589 | * (i = leaf_copy_boundary_item(DEST,SOURCE,1,bytes)); | ||
590 | */ | ||
529 | i = leaf_copy_boundary_item(dest_bi, src, LAST_TO_FIRST, bytes); | 591 | i = leaf_copy_boundary_item(dest_bi, src, LAST_TO_FIRST, bytes); |
530 | 592 | ||
531 | cpy_num -= i; | 593 | cpy_num -= i; |
@@ -534,15 +596,24 @@ static int leaf_copy_items(struct buffer_info *dest_bi, struct buffer_head *src, | |||
534 | 596 | ||
535 | pos = src_nr_item - cpy_num - i; | 597 | pos = src_nr_item - cpy_num - i; |
536 | if (cpy_bytes == -1) { | 598 | if (cpy_bytes == -1) { |
537 | /* starting from position 'pos' copy last cpy_num items of SOURCE to begin of DEST */ | 599 | /* |
600 | * starting from position 'pos' copy last cpy_num | ||
601 | * items of SOURCE to begin of DEST | ||
602 | */ | ||
538 | leaf_copy_items_entirely(dest_bi, src, LAST_TO_FIRST, | 603 | leaf_copy_items_entirely(dest_bi, src, LAST_TO_FIRST, |
539 | pos, cpy_num); | 604 | pos, cpy_num); |
540 | } else { | 605 | } else { |
541 | /* copy last cpy_num-1 items starting from position 'pos+1' of the SOURCE to the begin of the DEST; */ | 606 | /* |
607 | * copy last cpy_num-1 items starting from position | ||
608 | * 'pos+1' of the SOURCE to the begin of the DEST; | ||
609 | */ | ||
542 | leaf_copy_items_entirely(dest_bi, src, LAST_TO_FIRST, | 610 | leaf_copy_items_entirely(dest_bi, src, LAST_TO_FIRST, |
543 | pos + 1, cpy_num - 1); | 611 | pos + 1, cpy_num - 1); |
544 | 612 | ||
545 | /* copy part of the item which number is pos to the begin of the DEST */ | 613 | /* |
614 | * copy part of the item which number is pos to | ||
615 | * the begin of the DEST | ||
616 | */ | ||
546 | leaf_item_bottle(dest_bi, src, LAST_TO_FIRST, pos, | 617 | leaf_item_bottle(dest_bi, src, LAST_TO_FIRST, pos, |
547 | cpy_bytes); | 618 | cpy_bytes); |
548 | } | 619 | } |
@@ -550,9 +621,11 @@ static int leaf_copy_items(struct buffer_info *dest_bi, struct buffer_head *src, | |||
550 | return i; | 621 | return i; |
551 | } | 622 | } |
552 | 623 | ||
553 | /* there are types of coping: from S[0] to L[0], from S[0] to R[0], | 624 | /* |
554 | from R[0] to L[0]. for each of these we have to define parent and | 625 | * there are types of coping: from S[0] to L[0], from S[0] to R[0], |
555 | positions of destination and source buffers */ | 626 | * from R[0] to L[0]. for each of these we have to define parent and |
627 | * positions of destination and source buffers | ||
628 | */ | ||
556 | static void leaf_define_dest_src_infos(int shift_mode, struct tree_balance *tb, | 629 | static void leaf_define_dest_src_infos(int shift_mode, struct tree_balance *tb, |
557 | struct buffer_info *dest_bi, | 630 | struct buffer_info *dest_bi, |
558 | struct buffer_info *src_bi, | 631 | struct buffer_info *src_bi, |
@@ -568,7 +641,9 @@ static void leaf_define_dest_src_infos(int shift_mode, struct tree_balance *tb, | |||
568 | src_bi->tb = tb; | 641 | src_bi->tb = tb; |
569 | src_bi->bi_bh = PATH_PLAST_BUFFER(tb->tb_path); | 642 | src_bi->bi_bh = PATH_PLAST_BUFFER(tb->tb_path); |
570 | src_bi->bi_parent = PATH_H_PPARENT(tb->tb_path, 0); | 643 | src_bi->bi_parent = PATH_H_PPARENT(tb->tb_path, 0); |
571 | src_bi->bi_position = PATH_H_B_ITEM_ORDER(tb->tb_path, 0); /* src->b_item_order */ | 644 | |
645 | /* src->b_item_order */ | ||
646 | src_bi->bi_position = PATH_H_B_ITEM_ORDER(tb->tb_path, 0); | ||
572 | dest_bi->tb = tb; | 647 | dest_bi->tb = tb; |
573 | dest_bi->bi_bh = tb->L[0]; | 648 | dest_bi->bi_bh = tb->L[0]; |
574 | dest_bi->bi_parent = tb->FL[0]; | 649 | dest_bi->bi_parent = tb->FL[0]; |
@@ -633,8 +708,10 @@ static void leaf_define_dest_src_infos(int shift_mode, struct tree_balance *tb, | |||
633 | shift_mode, src_bi->bi_bh, dest_bi->bi_bh); | 708 | shift_mode, src_bi->bi_bh, dest_bi->bi_bh); |
634 | } | 709 | } |
635 | 710 | ||
636 | /* copy mov_num items and mov_bytes of the (mov_num-1)th item to | 711 | /* |
637 | neighbor. Delete them from source */ | 712 | * copy mov_num items and mov_bytes of the (mov_num-1)th item to |
713 | * neighbor. Delete them from source | ||
714 | */ | ||
638 | int leaf_move_items(int shift_mode, struct tree_balance *tb, int mov_num, | 715 | int leaf_move_items(int shift_mode, struct tree_balance *tb, int mov_num, |
639 | int mov_bytes, struct buffer_head *Snew) | 716 | int mov_bytes, struct buffer_head *Snew) |
640 | { | 717 | { |
@@ -657,18 +734,24 @@ int leaf_move_items(int shift_mode, struct tree_balance *tb, int mov_num, | |||
657 | return ret_value; | 734 | return ret_value; |
658 | } | 735 | } |
659 | 736 | ||
660 | /* Shift shift_num items (and shift_bytes of last shifted item if shift_bytes != -1) | 737 | /* |
661 | from S[0] to L[0] and replace the delimiting key */ | 738 | * Shift shift_num items (and shift_bytes of last shifted item if |
739 | * shift_bytes != -1) from S[0] to L[0] and replace the delimiting key | ||
740 | */ | ||
662 | int leaf_shift_left(struct tree_balance *tb, int shift_num, int shift_bytes) | 741 | int leaf_shift_left(struct tree_balance *tb, int shift_num, int shift_bytes) |
663 | { | 742 | { |
664 | struct buffer_head *S0 = PATH_PLAST_BUFFER(tb->tb_path); | 743 | struct buffer_head *S0 = PATH_PLAST_BUFFER(tb->tb_path); |
665 | int i; | 744 | int i; |
666 | 745 | ||
667 | /* move shift_num (and shift_bytes bytes) items from S[0] to left neighbor L[0] */ | 746 | /* |
747 | * move shift_num (and shift_bytes bytes) items from S[0] | ||
748 | * to left neighbor L[0] | ||
749 | */ | ||
668 | i = leaf_move_items(LEAF_FROM_S_TO_L, tb, shift_num, shift_bytes, NULL); | 750 | i = leaf_move_items(LEAF_FROM_S_TO_L, tb, shift_num, shift_bytes, NULL); |
669 | 751 | ||
670 | if (shift_num) { | 752 | if (shift_num) { |
671 | if (B_NR_ITEMS(S0) == 0) { /* number of items in S[0] == 0 */ | 753 | /* number of items in S[0] == 0 */ |
754 | if (B_NR_ITEMS(S0) == 0) { | ||
672 | 755 | ||
673 | RFALSE(shift_bytes != -1, | 756 | RFALSE(shift_bytes != -1, |
674 | "vs-10270: S0 is empty now, but shift_bytes != -1 (%d)", | 757 | "vs-10270: S0 is empty now, but shift_bytes != -1 (%d)", |
@@ -704,13 +787,18 @@ int leaf_shift_left(struct tree_balance *tb, int shift_num, int shift_bytes) | |||
704 | 787 | ||
705 | /* CLEANING STOPPED HERE */ | 788 | /* CLEANING STOPPED HERE */ |
706 | 789 | ||
707 | /* Shift shift_num (shift_bytes) items from S[0] to the right neighbor, and replace the delimiting key */ | 790 | /* |
791 | * Shift shift_num (shift_bytes) items from S[0] to the right neighbor, | ||
792 | * and replace the delimiting key | ||
793 | */ | ||
708 | int leaf_shift_right(struct tree_balance *tb, int shift_num, int shift_bytes) | 794 | int leaf_shift_right(struct tree_balance *tb, int shift_num, int shift_bytes) |
709 | { | 795 | { |
710 | // struct buffer_head * S0 = PATH_PLAST_BUFFER (tb->tb_path); | ||
711 | int ret_value; | 796 | int ret_value; |
712 | 797 | ||
713 | /* move shift_num (and shift_bytes) items from S[0] to right neighbor R[0] */ | 798 | /* |
799 | * move shift_num (and shift_bytes) items from S[0] to | ||
800 | * right neighbor R[0] | ||
801 | */ | ||
714 | ret_value = | 802 | ret_value = |
715 | leaf_move_items(LEAF_FROM_S_TO_R, tb, shift_num, shift_bytes, NULL); | 803 | leaf_move_items(LEAF_FROM_S_TO_R, tb, shift_num, shift_bytes, NULL); |
716 | 804 | ||
@@ -725,12 +813,16 @@ int leaf_shift_right(struct tree_balance *tb, int shift_num, int shift_bytes) | |||
725 | 813 | ||
726 | static void leaf_delete_items_entirely(struct buffer_info *bi, | 814 | static void leaf_delete_items_entirely(struct buffer_info *bi, |
727 | int first, int del_num); | 815 | int first, int del_num); |
728 | /* If del_bytes == -1, starting from position 'first' delete del_num items in whole in buffer CUR. | 816 | /* |
729 | If not. | 817 | * If del_bytes == -1, starting from position 'first' delete del_num |
730 | If last_first == 0. Starting from position 'first' delete del_num-1 items in whole. Delete part of body of | 818 | * items in whole in buffer CUR. |
731 | the first item. Part defined by del_bytes. Don't delete first item header | 819 | * If not. |
732 | If last_first == 1. Starting from position 'first+1' delete del_num-1 items in whole. Delete part of body of | 820 | * If last_first == 0. Starting from position 'first' delete del_num-1 |
733 | the last item . Part defined by del_bytes. Don't delete last item header. | 821 | * items in whole. Delete part of body of the first item. Part defined by |
822 | * del_bytes. Don't delete first item header | ||
823 | * If last_first == 1. Starting from position 'first+1' delete del_num-1 | ||
824 | * items in whole. Delete part of body of the last item . Part defined by | ||
825 | * del_bytes. Don't delete last item header. | ||
734 | */ | 826 | */ |
735 | void leaf_delete_items(struct buffer_info *cur_bi, int last_first, | 827 | void leaf_delete_items(struct buffer_info *cur_bi, int last_first, |
736 | int first, int del_num, int del_bytes) | 828 | int first, int del_num, int del_bytes) |
@@ -761,32 +853,43 @@ void leaf_delete_items(struct buffer_info *cur_bi, int last_first, | |||
761 | leaf_delete_items_entirely(cur_bi, first, del_num); | 853 | leaf_delete_items_entirely(cur_bi, first, del_num); |
762 | else { | 854 | else { |
763 | if (last_first == FIRST_TO_LAST) { | 855 | if (last_first == FIRST_TO_LAST) { |
764 | /* delete del_num-1 items beginning from item in position first */ | 856 | /* |
857 | * delete del_num-1 items beginning from | ||
858 | * item in position first | ||
859 | */ | ||
765 | leaf_delete_items_entirely(cur_bi, first, del_num - 1); | 860 | leaf_delete_items_entirely(cur_bi, first, del_num - 1); |
766 | 861 | ||
767 | /* delete the part of the first item of the bh | 862 | /* |
768 | do not delete item header | 863 | * delete the part of the first item of the bh |
864 | * do not delete item header | ||
769 | */ | 865 | */ |
770 | leaf_cut_from_buffer(cur_bi, 0, 0, del_bytes); | 866 | leaf_cut_from_buffer(cur_bi, 0, 0, del_bytes); |
771 | } else { | 867 | } else { |
772 | struct item_head *ih; | 868 | struct item_head *ih; |
773 | int len; | 869 | int len; |
774 | 870 | ||
775 | /* delete del_num-1 items beginning from item in position first+1 */ | 871 | /* |
872 | * delete del_num-1 items beginning from | ||
873 | * item in position first+1 | ||
874 | */ | ||
776 | leaf_delete_items_entirely(cur_bi, first + 1, | 875 | leaf_delete_items_entirely(cur_bi, first + 1, |
777 | del_num - 1); | 876 | del_num - 1); |
778 | 877 | ||
779 | ih = item_head(bh, B_NR_ITEMS(bh) - 1); | 878 | ih = item_head(bh, B_NR_ITEMS(bh) - 1); |
780 | if (is_direntry_le_ih(ih)) | 879 | if (is_direntry_le_ih(ih)) |
781 | /* the last item is directory */ | 880 | /* the last item is directory */ |
782 | /* len = numbers of directory entries in this item */ | 881 | /* |
882 | * len = numbers of directory entries | ||
883 | * in this item | ||
884 | */ | ||
783 | len = ih_entry_count(ih); | 885 | len = ih_entry_count(ih); |
784 | else | 886 | else |
785 | /* len = body len of item */ | 887 | /* len = body len of item */ |
786 | len = ih_item_len(ih); | 888 | len = ih_item_len(ih); |
787 | 889 | ||
788 | /* delete the part of the last item of the bh | 890 | /* |
789 | do not delete item header | 891 | * delete the part of the last item of the bh |
892 | * do not delete item header | ||
790 | */ | 893 | */ |
791 | leaf_cut_from_buffer(cur_bi, B_NR_ITEMS(bh) - 1, | 894 | leaf_cut_from_buffer(cur_bi, B_NR_ITEMS(bh) - 1, |
792 | len - del_bytes, del_bytes); | 895 | len - del_bytes, del_bytes); |
@@ -867,8 +970,10 @@ void leaf_insert_into_buf(struct buffer_info *bi, int before, | |||
867 | } | 970 | } |
868 | } | 971 | } |
869 | 972 | ||
870 | /* paste paste_size bytes to affected_item_num-th item. | 973 | /* |
871 | When item is a directory, this only prepare space for new entries */ | 974 | * paste paste_size bytes to affected_item_num-th item. |
975 | * When item is a directory, this only prepare space for new entries | ||
976 | */ | ||
872 | void leaf_paste_in_buffer(struct buffer_info *bi, int affected_item_num, | 977 | void leaf_paste_in_buffer(struct buffer_info *bi, int affected_item_num, |
873 | int pos_in_item, int paste_size, | 978 | int pos_in_item, int paste_size, |
874 | const char *body, int zeros_number) | 979 | const char *body, int zeros_number) |
@@ -957,10 +1062,12 @@ void leaf_paste_in_buffer(struct buffer_info *bi, int affected_item_num, | |||
957 | } | 1062 | } |
958 | } | 1063 | } |
959 | 1064 | ||
960 | /* cuts DEL_COUNT entries beginning from FROM-th entry. Directory item | 1065 | /* |
961 | does not have free space, so it moves DEHs and remaining records as | 1066 | * cuts DEL_COUNT entries beginning from FROM-th entry. Directory item |
962 | necessary. Return value is size of removed part of directory item | 1067 | * does not have free space, so it moves DEHs and remaining records as |
963 | in bytes. */ | 1068 | * necessary. Return value is size of removed part of directory item |
1069 | * in bytes. | ||
1070 | */ | ||
964 | static int leaf_cut_entries(struct buffer_head *bh, | 1071 | static int leaf_cut_entries(struct buffer_head *bh, |
965 | struct item_head *ih, int from, int del_count) | 1072 | struct item_head *ih, int from, int del_count) |
966 | { | 1073 | { |
@@ -971,8 +1078,10 @@ static int leaf_cut_entries(struct buffer_head *bh, | |||
971 | int cut_records_len; /* length of all removed records */ | 1078 | int cut_records_len; /* length of all removed records */ |
972 | int i; | 1079 | int i; |
973 | 1080 | ||
974 | /* make sure, that item is directory and there are enough entries to | 1081 | /* |
975 | remove */ | 1082 | * make sure that item is directory and there are enough entries to |
1083 | * remove | ||
1084 | */ | ||
976 | RFALSE(!is_direntry_le_ih(ih), "10180: item is not directory item"); | 1085 | RFALSE(!is_direntry_le_ih(ih), "10180: item is not directory item"); |
977 | RFALSE(ih_entry_count(ih) < from + del_count, | 1086 | RFALSE(ih_entry_count(ih) < from + del_count, |
978 | "10185: item contains not enough entries: entry_count = %d, from = %d, to delete = %d", | 1087 | "10185: item contains not enough entries: entry_count = %d, from = %d, to delete = %d", |
@@ -987,8 +1096,10 @@ static int leaf_cut_entries(struct buffer_head *bh, | |||
987 | /* entry head array */ | 1096 | /* entry head array */ |
988 | deh = B_I_DEH(bh, ih); | 1097 | deh = B_I_DEH(bh, ih); |
989 | 1098 | ||
990 | /* first byte of remaining entries, those are BEFORE cut entries | 1099 | /* |
991 | (prev_record) and length of all removed records (cut_records_len) */ | 1100 | * first byte of remaining entries, those are BEFORE cut entries |
1101 | * (prev_record) and length of all removed records (cut_records_len) | ||
1102 | */ | ||
992 | prev_record_offset = | 1103 | prev_record_offset = |
993 | (from ? deh_location(&(deh[from - 1])) : ih_item_len(ih)); | 1104 | (from ? deh_location(&(deh[from - 1])) : ih_item_len(ih)); |
994 | cut_records_len = prev_record_offset /*from_record */ - | 1105 | cut_records_len = prev_record_offset /*from_record */ - |
@@ -1021,14 +1132,15 @@ static int leaf_cut_entries(struct buffer_head *bh, | |||
1021 | return DEH_SIZE * del_count + cut_records_len; | 1132 | return DEH_SIZE * del_count + cut_records_len; |
1022 | } | 1133 | } |
1023 | 1134 | ||
1024 | /* when cut item is part of regular file | 1135 | /* |
1025 | pos_in_item - first byte that must be cut | 1136 | * when cut item is part of regular file |
1026 | cut_size - number of bytes to be cut beginning from pos_in_item | 1137 | * pos_in_item - first byte that must be cut |
1027 | 1138 | * cut_size - number of bytes to be cut beginning from pos_in_item | |
1028 | when cut item is part of directory | 1139 | * |
1029 | pos_in_item - number of first deleted entry | 1140 | * when cut item is part of directory |
1030 | cut_size - count of deleted entries | 1141 | * pos_in_item - number of first deleted entry |
1031 | */ | 1142 | * cut_size - count of deleted entries |
1143 | */ | ||
1032 | void leaf_cut_from_buffer(struct buffer_info *bi, int cut_item_num, | 1144 | void leaf_cut_from_buffer(struct buffer_info *bi, int cut_item_num, |
1033 | int pos_in_item, int cut_size) | 1145 | int pos_in_item, int cut_size) |
1034 | { | 1146 | { |
@@ -1055,7 +1167,6 @@ void leaf_cut_from_buffer(struct buffer_info *bi, int cut_item_num, | |||
1055 | cut_item_num); | 1167 | cut_item_num); |
1056 | /* change item key by key of first entry in the item */ | 1168 | /* change item key by key of first entry in the item */ |
1057 | set_le_ih_k_offset(ih, deh_offset(B_I_DEH(bh, ih))); | 1169 | set_le_ih_k_offset(ih, deh_offset(B_I_DEH(bh, ih))); |
1058 | /*memcpy (&ih->ih_key.k_offset, &(B_I_DEH (bh, ih)->deh_offset), SHORT_KEY_SIZE); */ | ||
1059 | } | 1170 | } |
1060 | } else { | 1171 | } else { |
1061 | /* item is direct or indirect */ | 1172 | /* item is direct or indirect */ |
@@ -1195,7 +1306,10 @@ static void leaf_delete_items_entirely(struct buffer_info *bi, | |||
1195 | } | 1306 | } |
1196 | } | 1307 | } |
1197 | 1308 | ||
1198 | /* paste new_entry_count entries (new_dehs, records) into position before to item_num-th item */ | 1309 | /* |
1310 | * paste new_entry_count entries (new_dehs, records) into position | ||
1311 | * before to item_num-th item | ||
1312 | */ | ||
1199 | void leaf_paste_entries(struct buffer_info *bi, | 1313 | void leaf_paste_entries(struct buffer_info *bi, |
1200 | int item_num, | 1314 | int item_num, |
1201 | int before, | 1315 | int before, |
@@ -1215,7 +1329,10 @@ void leaf_paste_entries(struct buffer_info *bi, | |||
1215 | 1329 | ||
1216 | ih = item_head(bh, item_num); | 1330 | ih = item_head(bh, item_num); |
1217 | 1331 | ||
1218 | /* make sure, that item is directory, and there are enough records in it */ | 1332 | /* |
1333 | * make sure, that item is directory, and there are enough | ||
1334 | * records in it | ||
1335 | */ | ||
1219 | RFALSE(!is_direntry_le_ih(ih), "10225: item is not directory item"); | 1336 | RFALSE(!is_direntry_le_ih(ih), "10225: item is not directory item"); |
1220 | RFALSE(ih_entry_count(ih) < before, | 1337 | RFALSE(ih_entry_count(ih) < before, |
1221 | "10230: there are no entry we paste entries before. entry_count = %d, before = %d", | 1338 | "10230: there are no entry we paste entries before. entry_count = %d, before = %d", |
@@ -1277,8 +1394,6 @@ void leaf_paste_entries(struct buffer_info *bi, | |||
1277 | /* change item key if necessary (when we paste before 0-th entry */ | 1394 | /* change item key if necessary (when we paste before 0-th entry */ |
1278 | if (!before) { | 1395 | if (!before) { |
1279 | set_le_ih_k_offset(ih, deh_offset(new_dehs)); | 1396 | set_le_ih_k_offset(ih, deh_offset(new_dehs)); |
1280 | /* memcpy (&ih->ih_key.k_offset, | ||
1281 | &new_dehs->deh_offset, SHORT_KEY_SIZE);*/ | ||
1282 | } | 1397 | } |
1283 | #ifdef CONFIG_REISERFS_CHECK | 1398 | #ifdef CONFIG_REISERFS_CHECK |
1284 | { | 1399 | { |
diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c index 1ce8fbea4749..6bc38de8357f 100644 --- a/fs/reiserfs/namei.c +++ b/fs/reiserfs/namei.c | |||
@@ -22,8 +22,10 @@ | |||
22 | #define INC_DIR_INODE_NLINK(i) if (i->i_nlink != 1) { inc_nlink(i); if (i->i_nlink >= REISERFS_LINK_MAX) set_nlink(i, 1); } | 22 | #define INC_DIR_INODE_NLINK(i) if (i->i_nlink != 1) { inc_nlink(i); if (i->i_nlink >= REISERFS_LINK_MAX) set_nlink(i, 1); } |
23 | #define DEC_DIR_INODE_NLINK(i) if (i->i_nlink != 1) drop_nlink(i); | 23 | #define DEC_DIR_INODE_NLINK(i) if (i->i_nlink != 1) drop_nlink(i); |
24 | 24 | ||
25 | // directory item contains array of entry headers. This performs | 25 | /* |
26 | // binary search through that array | 26 | * directory item contains array of entry headers. This performs |
27 | * binary search through that array | ||
28 | */ | ||
27 | static int bin_search_in_dir_item(struct reiserfs_dir_entry *de, loff_t off) | 29 | static int bin_search_in_dir_item(struct reiserfs_dir_entry *de, loff_t off) |
28 | { | 30 | { |
29 | struct item_head *ih = de->de_ih; | 31 | struct item_head *ih = de->de_ih; |
@@ -43,7 +45,7 @@ static int bin_search_in_dir_item(struct reiserfs_dir_entry *de, loff_t off) | |||
43 | lbound = j + 1; | 45 | lbound = j + 1; |
44 | continue; | 46 | continue; |
45 | } | 47 | } |
46 | // this is not name found, but matched third key component | 48 | /* this is not name found, but matched third key component */ |
47 | de->de_entry_num = j; | 49 | de->de_entry_num = j; |
48 | return NAME_FOUND; | 50 | return NAME_FOUND; |
49 | } | 51 | } |
@@ -52,7 +54,9 @@ static int bin_search_in_dir_item(struct reiserfs_dir_entry *de, loff_t off) | |||
52 | return NAME_NOT_FOUND; | 54 | return NAME_NOT_FOUND; |
53 | } | 55 | } |
54 | 56 | ||
55 | // comment? maybe something like set de to point to what the path points to? | 57 | /* |
58 | * comment? maybe something like set de to point to what the path points to? | ||
59 | */ | ||
56 | static inline void set_de_item_location(struct reiserfs_dir_entry *de, | 60 | static inline void set_de_item_location(struct reiserfs_dir_entry *de, |
57 | struct treepath *path) | 61 | struct treepath *path) |
58 | { | 62 | { |
@@ -62,7 +66,9 @@ static inline void set_de_item_location(struct reiserfs_dir_entry *de, | |||
62 | de->de_item_num = PATH_LAST_POSITION(path); | 66 | de->de_item_num = PATH_LAST_POSITION(path); |
63 | } | 67 | } |
64 | 68 | ||
65 | // de_bh, de_ih, de_deh (points to first element of array), de_item_num is set | 69 | /* |
70 | * de_bh, de_ih, de_deh (points to first element of array), de_item_num is set | ||
71 | */ | ||
66 | inline void set_de_name_and_namelen(struct reiserfs_dir_entry *de) | 72 | inline void set_de_name_and_namelen(struct reiserfs_dir_entry *de) |
67 | { | 73 | { |
68 | struct reiserfs_de_head *deh = de->de_deh + de->de_entry_num; | 74 | struct reiserfs_de_head *deh = de->de_deh + de->de_entry_num; |
@@ -76,7 +82,7 @@ inline void set_de_name_and_namelen(struct reiserfs_dir_entry *de) | |||
76 | de->de_namelen = strlen(de->de_name); | 82 | de->de_namelen = strlen(de->de_name); |
77 | } | 83 | } |
78 | 84 | ||
79 | // what entry points to | 85 | /* what entry points to */ |
80 | static inline void set_de_object_key(struct reiserfs_dir_entry *de) | 86 | static inline void set_de_object_key(struct reiserfs_dir_entry *de) |
81 | { | 87 | { |
82 | BUG_ON(de->de_entry_num >= ih_entry_count(de->de_ih)); | 88 | BUG_ON(de->de_entry_num >= ih_entry_count(de->de_ih)); |
@@ -100,17 +106,16 @@ static inline void store_de_entry_key(struct reiserfs_dir_entry *de) | |||
100 | set_cpu_key_k_type(&(de->de_entry_key), TYPE_DIRENTRY); | 106 | set_cpu_key_k_type(&(de->de_entry_key), TYPE_DIRENTRY); |
101 | } | 107 | } |
102 | 108 | ||
103 | /* We assign a key to each directory item, and place multiple entries | 109 | /* |
104 | in a single directory item. A directory item has a key equal to the | 110 | * We assign a key to each directory item, and place multiple entries in a |
105 | key of the first directory entry in it. | 111 | * single directory item. A directory item has a key equal to the key of |
106 | 112 | * the first directory entry in it. | |
107 | This function first calls search_by_key, then, if item whose first | 113 | |
108 | entry matches is not found it looks for the entry inside directory | 114 | * This function first calls search_by_key, then, if item whose first entry |
109 | item found by search_by_key. Fills the path to the entry, and to the | 115 | * matches is not found it looks for the entry inside directory item found |
110 | entry position in the item | 116 | * by search_by_key. Fills the path to the entry, and to the entry position |
111 | 117 | * in the item | |
112 | */ | 118 | */ |
113 | |||
114 | /* The function is NOT SCHEDULE-SAFE! */ | 119 | /* The function is NOT SCHEDULE-SAFE! */ |
115 | int search_by_entry_key(struct super_block *sb, const struct cpu_key *key, | 120 | int search_by_entry_key(struct super_block *sb, const struct cpu_key *key, |
116 | struct treepath *path, struct reiserfs_dir_entry *de) | 121 | struct treepath *path, struct reiserfs_dir_entry *de) |
@@ -152,12 +157,17 @@ int search_by_entry_key(struct super_block *sb, const struct cpu_key *key, | |||
152 | } | 157 | } |
153 | #endif /* CONFIG_REISERFS_CHECK */ | 158 | #endif /* CONFIG_REISERFS_CHECK */ |
154 | 159 | ||
155 | /* binary search in directory item by third componen t of the | 160 | /* |
156 | key. sets de->de_entry_num of de */ | 161 | * binary search in directory item by third component of the |
162 | * key. sets de->de_entry_num of de | ||
163 | */ | ||
157 | retval = bin_search_in_dir_item(de, cpu_key_k_offset(key)); | 164 | retval = bin_search_in_dir_item(de, cpu_key_k_offset(key)); |
158 | path->pos_in_item = de->de_entry_num; | 165 | path->pos_in_item = de->de_entry_num; |
159 | if (retval != NAME_NOT_FOUND) { | 166 | if (retval != NAME_NOT_FOUND) { |
160 | // ugly, but rename needs de_bh, de_deh, de_name, de_namelen, de_objectid set | 167 | /* |
168 | * ugly, but rename needs de_bh, de_deh, de_name, | ||
169 | * de_namelen, de_objectid set | ||
170 | */ | ||
161 | set_de_name_and_namelen(de); | 171 | set_de_name_and_namelen(de); |
162 | set_de_object_key(de); | 172 | set_de_object_key(de); |
163 | } | 173 | } |
@@ -166,11 +176,12 @@ int search_by_entry_key(struct super_block *sb, const struct cpu_key *key, | |||
166 | 176 | ||
167 | /* Keyed 32-bit hash function using TEA in a Davis-Meyer function */ | 177 | /* Keyed 32-bit hash function using TEA in a Davis-Meyer function */ |
168 | 178 | ||
169 | /* The third component is hashed, and you can choose from more than | 179 | /* |
170 | one hash function. Per directory hashes are not yet implemented | 180 | * The third component is hashed, and you can choose from more than |
171 | but are thought about. This function should be moved to hashes.c | 181 | * one hash function. Per directory hashes are not yet implemented |
172 | Jedi, please do so. -Hans */ | 182 | * but are thought about. This function should be moved to hashes.c |
173 | 183 | * Jedi, please do so. -Hans | |
184 | */ | ||
174 | static __u32 get_third_component(struct super_block *s, | 185 | static __u32 get_third_component(struct super_block *s, |
175 | const char *name, int len) | 186 | const char *name, int len) |
176 | { | 187 | { |
@@ -183,11 +194,13 @@ static __u32 get_third_component(struct super_block *s, | |||
183 | 194 | ||
184 | res = REISERFS_SB(s)->s_hash_function(name, len); | 195 | res = REISERFS_SB(s)->s_hash_function(name, len); |
185 | 196 | ||
186 | // take bits from 7-th to 30-th including both bounds | 197 | /* take bits from 7-th to 30-th including both bounds */ |
187 | res = GET_HASH_VALUE(res); | 198 | res = GET_HASH_VALUE(res); |
188 | if (res == 0) | 199 | if (res == 0) |
189 | // needed to have no names before "." and ".." those have hash | 200 | /* |
190 | // value == 0 and generation conters 1 and 2 accordingly | 201 | * needed to have no names before "." and ".." those have hash |
202 | * value == 0 and generation conters 1 and 2 accordingly | ||
203 | */ | ||
191 | res = 128; | 204 | res = 128; |
192 | return res + MAX_GENERATION_NUMBER; | 205 | return res + MAX_GENERATION_NUMBER; |
193 | } | 206 | } |
@@ -208,7 +221,7 @@ static int reiserfs_match(struct reiserfs_dir_entry *de, | |||
208 | 221 | ||
209 | /* de's de_bh, de_ih, de_deh, de_item_num, de_entry_num are set already */ | 222 | /* de's de_bh, de_ih, de_deh, de_item_num, de_entry_num are set already */ |
210 | 223 | ||
211 | /* used when hash collisions exist */ | 224 | /* used when hash collisions exist */ |
212 | 225 | ||
213 | static int linear_search_in_dir_item(struct cpu_key *key, | 226 | static int linear_search_in_dir_item(struct cpu_key *key, |
214 | struct reiserfs_dir_entry *de, | 227 | struct reiserfs_dir_entry *de, |
@@ -232,43 +245,50 @@ static int linear_search_in_dir_item(struct cpu_key *key, | |||
232 | deh += i; | 245 | deh += i; |
233 | 246 | ||
234 | for (; i >= 0; i--, deh--) { | 247 | for (; i >= 0; i--, deh--) { |
248 | /* hash value does not match, no need to check whole name */ | ||
235 | if (GET_HASH_VALUE(deh_offset(deh)) != | 249 | if (GET_HASH_VALUE(deh_offset(deh)) != |
236 | GET_HASH_VALUE(cpu_key_k_offset(key))) { | 250 | GET_HASH_VALUE(cpu_key_k_offset(key))) { |
237 | // hash value does not match, no need to check whole name | ||
238 | return NAME_NOT_FOUND; | 251 | return NAME_NOT_FOUND; |
239 | } | 252 | } |
240 | 253 | ||
241 | /* mark, that this generation number is used */ | 254 | /* mark that this generation number is used */ |
242 | if (de->de_gen_number_bit_string) | 255 | if (de->de_gen_number_bit_string) |
243 | set_bit(GET_GENERATION_NUMBER(deh_offset(deh)), | 256 | set_bit(GET_GENERATION_NUMBER(deh_offset(deh)), |
244 | de->de_gen_number_bit_string); | 257 | de->de_gen_number_bit_string); |
245 | 258 | ||
246 | // calculate pointer to name and namelen | 259 | /* calculate pointer to name and namelen */ |
247 | de->de_entry_num = i; | 260 | de->de_entry_num = i; |
248 | set_de_name_and_namelen(de); | 261 | set_de_name_and_namelen(de); |
249 | 262 | ||
263 | /* | ||
264 | * de's de_name, de_namelen, de_recordlen are set. | ||
265 | * Fill the rest. | ||
266 | */ | ||
250 | if ((retval = | 267 | if ((retval = |
251 | reiserfs_match(de, name, namelen)) != NAME_NOT_FOUND) { | 268 | reiserfs_match(de, name, namelen)) != NAME_NOT_FOUND) { |
252 | // de's de_name, de_namelen, de_recordlen are set. Fill the rest: | ||
253 | 269 | ||
254 | // key of pointed object | 270 | /* key of pointed object */ |
255 | set_de_object_key(de); | 271 | set_de_object_key(de); |
256 | 272 | ||
257 | store_de_entry_key(de); | 273 | store_de_entry_key(de); |
258 | 274 | ||
259 | // retval can be NAME_FOUND or NAME_FOUND_INVISIBLE | 275 | /* retval can be NAME_FOUND or NAME_FOUND_INVISIBLE */ |
260 | return retval; | 276 | return retval; |
261 | } | 277 | } |
262 | } | 278 | } |
263 | 279 | ||
264 | if (GET_GENERATION_NUMBER(le_ih_k_offset(de->de_ih)) == 0) | 280 | if (GET_GENERATION_NUMBER(le_ih_k_offset(de->de_ih)) == 0) |
265 | /* we have reached left most entry in the node. In common we | 281 | /* |
266 | have to go to the left neighbor, but if generation counter | 282 | * we have reached left most entry in the node. In common we |
267 | is 0 already, we know for sure, that there is no name with | 283 | * have to go to the left neighbor, but if generation counter |
268 | the same hash value */ | 284 | * is 0 already, we know for sure, that there is no name with |
269 | // FIXME: this work correctly only because hash value can not | 285 | * the same hash value |
270 | // be 0. Btw, in case of Yura's hash it is probably possible, | 286 | */ |
271 | // so, this is a bug | 287 | /* |
288 | * FIXME: this work correctly only because hash value can not | ||
289 | * be 0. Btw, in case of Yura's hash it is probably possible, | ||
290 | * so, this is a bug | ||
291 | */ | ||
272 | return NAME_NOT_FOUND; | 292 | return NAME_NOT_FOUND; |
273 | 293 | ||
274 | RFALSE(de->de_item_num, | 294 | RFALSE(de->de_item_num, |
@@ -277,8 +297,10 @@ static int linear_search_in_dir_item(struct cpu_key *key, | |||
277 | return GOTO_PREVIOUS_ITEM; | 297 | return GOTO_PREVIOUS_ITEM; |
278 | } | 298 | } |
279 | 299 | ||
280 | // may return NAME_FOUND, NAME_FOUND_INVISIBLE, NAME_NOT_FOUND | 300 | /* |
281 | // FIXME: should add something like IOERROR | 301 | * may return NAME_FOUND, NAME_FOUND_INVISIBLE, NAME_NOT_FOUND |
302 | * FIXME: should add something like IOERROR | ||
303 | */ | ||
282 | static int reiserfs_find_entry(struct inode *dir, const char *name, int namelen, | 304 | static int reiserfs_find_entry(struct inode *dir, const char *name, int namelen, |
283 | struct treepath *path_to_entry, | 305 | struct treepath *path_to_entry, |
284 | struct reiserfs_dir_entry *de) | 306 | struct reiserfs_dir_entry *de) |
@@ -307,13 +329,19 @@ static int reiserfs_find_entry(struct inode *dir, const char *name, int namelen, | |||
307 | retval = | 329 | retval = |
308 | linear_search_in_dir_item(&key_to_search, de, name, | 330 | linear_search_in_dir_item(&key_to_search, de, name, |
309 | namelen); | 331 | namelen); |
332 | /* | ||
333 | * there is no need to scan directory anymore. | ||
334 | * Given entry found or does not exist | ||
335 | */ | ||
310 | if (retval != GOTO_PREVIOUS_ITEM) { | 336 | if (retval != GOTO_PREVIOUS_ITEM) { |
311 | /* there is no need to scan directory anymore. Given entry found or does not exist */ | ||
312 | path_to_entry->pos_in_item = de->de_entry_num; | 337 | path_to_entry->pos_in_item = de->de_entry_num; |
313 | return retval; | 338 | return retval; |
314 | } | 339 | } |
315 | 340 | ||
316 | /* there is left neighboring item of this directory and given entry can be there */ | 341 | /* |
342 | * there is left neighboring item of this directory | ||
343 | * and given entry can be there | ||
344 | */ | ||
317 | set_cpu_key_k_offset(&key_to_search, | 345 | set_cpu_key_k_offset(&key_to_search, |
318 | le_ih_k_offset(de->de_ih) - 1); | 346 | le_ih_k_offset(de->de_ih) - 1); |
319 | pathrelse(path_to_entry); | 347 | pathrelse(path_to_entry); |
@@ -347,8 +375,10 @@ static struct dentry *reiserfs_lookup(struct inode *dir, struct dentry *dentry, | |||
347 | return ERR_PTR(-EACCES); | 375 | return ERR_PTR(-EACCES); |
348 | } | 376 | } |
349 | 377 | ||
350 | /* Propagate the private flag so we know we're | 378 | /* |
351 | * in the priv tree */ | 379 | * Propagate the private flag so we know we're |
380 | * in the priv tree | ||
381 | */ | ||
352 | if (IS_PRIVATE(dir)) | 382 | if (IS_PRIVATE(dir)) |
353 | inode->i_flags |= S_PRIVATE; | 383 | inode->i_flags |= S_PRIVATE; |
354 | } | 384 | } |
@@ -361,9 +391,9 @@ static struct dentry *reiserfs_lookup(struct inode *dir, struct dentry *dentry, | |||
361 | } | 391 | } |
362 | 392 | ||
363 | /* | 393 | /* |
364 | ** looks up the dentry of the parent directory for child. | 394 | * looks up the dentry of the parent directory for child. |
365 | ** taken from ext2_get_parent | 395 | * taken from ext2_get_parent |
366 | */ | 396 | */ |
367 | struct dentry *reiserfs_get_parent(struct dentry *child) | 397 | struct dentry *reiserfs_get_parent(struct dentry *child) |
368 | { | 398 | { |
369 | int retval; | 399 | int retval; |
@@ -406,8 +436,13 @@ static int reiserfs_add_entry(struct reiserfs_transaction_handle *th, | |||
406 | struct reiserfs_dir_entry de; | 436 | struct reiserfs_dir_entry de; |
407 | DECLARE_BITMAP(bit_string, MAX_GENERATION_NUMBER + 1); | 437 | DECLARE_BITMAP(bit_string, MAX_GENERATION_NUMBER + 1); |
408 | int gen_number; | 438 | int gen_number; |
409 | char small_buf[32 + DEH_SIZE]; /* 48 bytes now and we avoid kmalloc | 439 | |
410 | if we create file with short name */ | 440 | /* |
441 | * 48 bytes now and we avoid kmalloc if we | ||
442 | * create file with short name | ||
443 | */ | ||
444 | char small_buf[32 + DEH_SIZE]; | ||
445 | |||
411 | char *buffer; | 446 | char *buffer; |
412 | int buflen, paste_size; | 447 | int buflen, paste_size; |
413 | int retval; | 448 | int retval; |
@@ -439,21 +474,30 @@ static int reiserfs_add_entry(struct reiserfs_transaction_handle *th, | |||
439 | (get_inode_sd_version(dir) == | 474 | (get_inode_sd_version(dir) == |
440 | STAT_DATA_V1) ? (DEH_SIZE + namelen) : buflen; | 475 | STAT_DATA_V1) ? (DEH_SIZE + namelen) : buflen; |
441 | 476 | ||
442 | /* fill buffer : directory entry head, name[, dir objectid | , stat data | ,stat data, dir objectid ] */ | 477 | /* |
478 | * fill buffer : directory entry head, name[, dir objectid | , | ||
479 | * stat data | ,stat data, dir objectid ] | ||
480 | */ | ||
443 | deh = (struct reiserfs_de_head *)buffer; | 481 | deh = (struct reiserfs_de_head *)buffer; |
444 | deh->deh_location = 0; /* JDM Endian safe if 0 */ | 482 | deh->deh_location = 0; /* JDM Endian safe if 0 */ |
445 | put_deh_offset(deh, cpu_key_k_offset(&entry_key)); | 483 | put_deh_offset(deh, cpu_key_k_offset(&entry_key)); |
446 | deh->deh_state = 0; /* JDM Endian safe if 0 */ | 484 | deh->deh_state = 0; /* JDM Endian safe if 0 */ |
447 | /* put key (ino analog) to de */ | 485 | /* put key (ino analog) to de */ |
448 | deh->deh_dir_id = INODE_PKEY(inode)->k_dir_id; /* safe: k_dir_id is le */ | 486 | |
449 | deh->deh_objectid = INODE_PKEY(inode)->k_objectid; /* safe: k_objectid is le */ | 487 | /* safe: k_dir_id is le */ |
488 | deh->deh_dir_id = INODE_PKEY(inode)->k_dir_id; | ||
489 | /* safe: k_objectid is le */ | ||
490 | deh->deh_objectid = INODE_PKEY(inode)->k_objectid; | ||
450 | 491 | ||
451 | /* copy name */ | 492 | /* copy name */ |
452 | memcpy((char *)(deh + 1), name, namelen); | 493 | memcpy((char *)(deh + 1), name, namelen); |
453 | /* padd by 0s to the 4 byte boundary */ | 494 | /* padd by 0s to the 4 byte boundary */ |
454 | padd_item((char *)(deh + 1), ROUND_UP(namelen), namelen); | 495 | padd_item((char *)(deh + 1), ROUND_UP(namelen), namelen); |
455 | 496 | ||
456 | /* entry is ready to be pasted into tree, set 'visibility' and 'stat data in entry' attributes */ | 497 | /* |
498 | * entry is ready to be pasted into tree, set 'visibility' | ||
499 | * and 'stat data in entry' attributes | ||
500 | */ | ||
457 | mark_de_without_sd(deh); | 501 | mark_de_without_sd(deh); |
458 | visible ? mark_de_visible(deh) : mark_de_hidden(deh); | 502 | visible ? mark_de_visible(deh) : mark_de_hidden(deh); |
459 | 503 | ||
@@ -499,7 +543,8 @@ static int reiserfs_add_entry(struct reiserfs_transaction_handle *th, | |||
499 | /* update max-hash-collisions counter in reiserfs_sb_info */ | 543 | /* update max-hash-collisions counter in reiserfs_sb_info */ |
500 | PROC_INFO_MAX(th->t_super, max_hash_collisions, gen_number); | 544 | PROC_INFO_MAX(th->t_super, max_hash_collisions, gen_number); |
501 | 545 | ||
502 | if (gen_number != 0) { /* we need to re-search for the insertion point */ | 546 | /* we need to re-search for the insertion point */ |
547 | if (gen_number != 0) { | ||
503 | if (search_by_entry_key(dir->i_sb, &entry_key, &path, &de) != | 548 | if (search_by_entry_key(dir->i_sb, &entry_key, &path, &de) != |
504 | NAME_NOT_FOUND) { | 549 | NAME_NOT_FOUND) { |
505 | reiserfs_warning(dir->i_sb, "vs-7032", | 550 | reiserfs_warning(dir->i_sb, "vs-7032", |
@@ -527,18 +572,19 @@ static int reiserfs_add_entry(struct reiserfs_transaction_handle *th, | |||
527 | dir->i_size += paste_size; | 572 | dir->i_size += paste_size; |
528 | dir->i_mtime = dir->i_ctime = CURRENT_TIME_SEC; | 573 | dir->i_mtime = dir->i_ctime = CURRENT_TIME_SEC; |
529 | if (!S_ISDIR(inode->i_mode) && visible) | 574 | if (!S_ISDIR(inode->i_mode) && visible) |
530 | // reiserfs_mkdir or reiserfs_rename will do that by itself | 575 | /* reiserfs_mkdir or reiserfs_rename will do that by itself */ |
531 | reiserfs_update_sd(th, dir); | 576 | reiserfs_update_sd(th, dir); |
532 | 577 | ||
533 | reiserfs_check_path(&path); | 578 | reiserfs_check_path(&path); |
534 | return 0; | 579 | return 0; |
535 | } | 580 | } |
536 | 581 | ||
537 | /* quota utility function, call if you've had to abort after calling | 582 | /* |
538 | ** new_inode_init, and have not called reiserfs_new_inode yet. | 583 | * quota utility function, call if you've had to abort after calling |
539 | ** This should only be called on inodes that do not have stat data | 584 | * new_inode_init, and have not called reiserfs_new_inode yet. |
540 | ** inserted into the tree yet. | 585 | * This should only be called on inodes that do not have stat data |
541 | */ | 586 | * inserted into the tree yet. |
587 | */ | ||
542 | static int drop_new_inode(struct inode *inode) | 588 | static int drop_new_inode(struct inode *inode) |
543 | { | 589 | { |
544 | dquot_drop(inode); | 590 | dquot_drop(inode); |
@@ -548,18 +594,23 @@ static int drop_new_inode(struct inode *inode) | |||
548 | return 0; | 594 | return 0; |
549 | } | 595 | } |
550 | 596 | ||
551 | /* utility function that does setup for reiserfs_new_inode. | 597 | /* |
552 | ** dquot_initialize needs lots of credits so it's better to have it | 598 | * utility function that does setup for reiserfs_new_inode. |
553 | ** outside of a transaction, so we had to pull some bits of | 599 | * dquot_initialize needs lots of credits so it's better to have it |
554 | ** reiserfs_new_inode out into this func. | 600 | * outside of a transaction, so we had to pull some bits of |
555 | */ | 601 | * reiserfs_new_inode out into this func. |
602 | */ | ||
556 | static int new_inode_init(struct inode *inode, struct inode *dir, umode_t mode) | 603 | static int new_inode_init(struct inode *inode, struct inode *dir, umode_t mode) |
557 | { | 604 | { |
558 | /* Make inode invalid - just in case we are going to drop it before | 605 | /* |
559 | * the initialization happens */ | 606 | * Make inode invalid - just in case we are going to drop it before |
607 | * the initialization happens | ||
608 | */ | ||
560 | INODE_PKEY(inode)->k_objectid = 0; | 609 | INODE_PKEY(inode)->k_objectid = 0; |
561 | /* the quota init calls have to know who to charge the quota to, so | 610 | |
562 | ** we have to set uid and gid here | 611 | /* |
612 | * the quota init calls have to know who to charge the quota to, so | ||
613 | * we have to set uid and gid here | ||
563 | */ | 614 | */ |
564 | inode_init_owner(inode, dir, mode); | 615 | inode_init_owner(inode, dir, mode); |
565 | dquot_initialize(inode); | 616 | dquot_initialize(inode); |
@@ -571,7 +622,10 @@ static int reiserfs_create(struct inode *dir, struct dentry *dentry, umode_t mod | |||
571 | { | 622 | { |
572 | int retval; | 623 | int retval; |
573 | struct inode *inode; | 624 | struct inode *inode; |
574 | /* We need blocks for transaction + (user+group)*(quotas for new inode + update of quota for directory owner) */ | 625 | /* |
626 | * We need blocks for transaction + (user+group)*(quotas | ||
627 | * for new inode + update of quota for directory owner) | ||
628 | */ | ||
575 | int jbegin_count = | 629 | int jbegin_count = |
576 | JOURNAL_PER_BALANCE_CNT * 2 + | 630 | JOURNAL_PER_BALANCE_CNT * 2 + |
577 | 2 * (REISERFS_QUOTA_INIT_BLOCKS(dir->i_sb) + | 631 | 2 * (REISERFS_QUOTA_INIT_BLOCKS(dir->i_sb) + |
@@ -644,7 +698,10 @@ static int reiserfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode | |||
644 | struct inode *inode; | 698 | struct inode *inode; |
645 | struct reiserfs_transaction_handle th; | 699 | struct reiserfs_transaction_handle th; |
646 | struct reiserfs_security_handle security; | 700 | struct reiserfs_security_handle security; |
647 | /* We need blocks for transaction + (user+group)*(quotas for new inode + update of quota for directory owner) */ | 701 | /* |
702 | * We need blocks for transaction + (user+group)*(quotas | ||
703 | * for new inode + update of quota for directory owner) | ||
704 | */ | ||
648 | int jbegin_count = | 705 | int jbegin_count = |
649 | JOURNAL_PER_BALANCE_CNT * 3 + | 706 | JOURNAL_PER_BALANCE_CNT * 3 + |
650 | 2 * (REISERFS_QUOTA_INIT_BLOCKS(dir->i_sb) + | 707 | 2 * (REISERFS_QUOTA_INIT_BLOCKS(dir->i_sb) + |
@@ -685,7 +742,7 @@ static int reiserfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode | |||
685 | inode->i_op = &reiserfs_special_inode_operations; | 742 | inode->i_op = &reiserfs_special_inode_operations; |
686 | init_special_inode(inode, inode->i_mode, rdev); | 743 | init_special_inode(inode, inode->i_mode, rdev); |
687 | 744 | ||
688 | //FIXME: needed for block and char devices only | 745 | /* FIXME: needed for block and char devices only */ |
689 | reiserfs_update_sd(&th, inode); | 746 | reiserfs_update_sd(&th, inode); |
690 | 747 | ||
691 | reiserfs_update_inode_transaction(inode); | 748 | reiserfs_update_inode_transaction(inode); |
@@ -721,7 +778,10 @@ static int reiserfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode | |||
721 | struct inode *inode; | 778 | struct inode *inode; |
722 | struct reiserfs_transaction_handle th; | 779 | struct reiserfs_transaction_handle th; |
723 | struct reiserfs_security_handle security; | 780 | struct reiserfs_security_handle security; |
724 | /* We need blocks for transaction + (user+group)*(quotas for new inode + update of quota for directory owner) */ | 781 | /* |
782 | * We need blocks for transaction + (user+group)*(quotas | ||
783 | * for new inode + update of quota for directory owner) | ||
784 | */ | ||
725 | int jbegin_count = | 785 | int jbegin_count = |
726 | JOURNAL_PER_BALANCE_CNT * 3 + | 786 | JOURNAL_PER_BALANCE_CNT * 3 + |
727 | 2 * (REISERFS_QUOTA_INIT_BLOCKS(dir->i_sb) + | 787 | 2 * (REISERFS_QUOTA_INIT_BLOCKS(dir->i_sb) + |
@@ -730,7 +790,10 @@ static int reiserfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode | |||
730 | dquot_initialize(dir); | 790 | dquot_initialize(dir); |
731 | 791 | ||
732 | #ifdef DISPLACE_NEW_PACKING_LOCALITIES | 792 | #ifdef DISPLACE_NEW_PACKING_LOCALITIES |
733 | /* set flag that new packing locality created and new blocks for the content * of that directory are not displaced yet */ | 793 | /* |
794 | * set flag that new packing locality created and new blocks | ||
795 | * for the content of that directory are not displaced yet | ||
796 | */ | ||
734 | REISERFS_I(dir)->new_packing_locality = 1; | 797 | REISERFS_I(dir)->new_packing_locality = 1; |
735 | #endif | 798 | #endif |
736 | mode = S_IFDIR | mode; | 799 | mode = S_IFDIR | mode; |
@@ -754,8 +817,9 @@ static int reiserfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode | |||
754 | goto out_failed; | 817 | goto out_failed; |
755 | } | 818 | } |
756 | 819 | ||
757 | /* inc the link count now, so another writer doesn't overflow it while | 820 | /* |
758 | ** we sleep later on. | 821 | * inc the link count now, so another writer doesn't overflow |
822 | * it while we sleep later on. | ||
759 | */ | 823 | */ |
760 | INC_DIR_INODE_NLINK(dir) | 824 | INC_DIR_INODE_NLINK(dir) |
761 | 825 | ||
@@ -774,7 +838,7 @@ static int reiserfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode | |||
774 | inode->i_op = &reiserfs_dir_inode_operations; | 838 | inode->i_op = &reiserfs_dir_inode_operations; |
775 | inode->i_fop = &reiserfs_dir_operations; | 839 | inode->i_fop = &reiserfs_dir_operations; |
776 | 840 | ||
777 | // note, _this_ add_entry will not update dir's stat data | 841 | /* note, _this_ add_entry will not update dir's stat data */ |
778 | retval = | 842 | retval = |
779 | reiserfs_add_entry(&th, dir, dentry->d_name.name, | 843 | reiserfs_add_entry(&th, dir, dentry->d_name.name, |
780 | dentry->d_name.len, inode, 1 /*visible */ ); | 844 | dentry->d_name.len, inode, 1 /*visible */ ); |
@@ -790,7 +854,7 @@ static int reiserfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode | |||
790 | iput(inode); | 854 | iput(inode); |
791 | goto out_failed; | 855 | goto out_failed; |
792 | } | 856 | } |
793 | // the above add_entry did not update dir's stat data | 857 | /* the above add_entry did not update dir's stat data */ |
794 | reiserfs_update_sd(&th, dir); | 858 | reiserfs_update_sd(&th, dir); |
795 | 859 | ||
796 | unlock_new_inode(inode); | 860 | unlock_new_inode(inode); |
@@ -803,10 +867,11 @@ out_failed: | |||
803 | 867 | ||
804 | static inline int reiserfs_empty_dir(struct inode *inode) | 868 | static inline int reiserfs_empty_dir(struct inode *inode) |
805 | { | 869 | { |
806 | /* we can cheat because an old format dir cannot have | 870 | /* |
807 | ** EMPTY_DIR_SIZE, and a new format dir cannot have | 871 | * we can cheat because an old format dir cannot have |
808 | ** EMPTY_DIR_SIZE_V1. So, if the inode is either size, | 872 | * EMPTY_DIR_SIZE, and a new format dir cannot have |
809 | ** regardless of disk format version, the directory is empty. | 873 | * EMPTY_DIR_SIZE_V1. So, if the inode is either size, |
874 | * regardless of disk format version, the directory is empty. | ||
810 | */ | 875 | */ |
811 | if (inode->i_size != EMPTY_DIR_SIZE && | 876 | if (inode->i_size != EMPTY_DIR_SIZE && |
812 | inode->i_size != EMPTY_DIR_SIZE_V1) { | 877 | inode->i_size != EMPTY_DIR_SIZE_V1) { |
@@ -824,10 +889,12 @@ static int reiserfs_rmdir(struct inode *dir, struct dentry *dentry) | |||
824 | INITIALIZE_PATH(path); | 889 | INITIALIZE_PATH(path); |
825 | struct reiserfs_dir_entry de; | 890 | struct reiserfs_dir_entry de; |
826 | 891 | ||
827 | /* we will be doing 2 balancings and update 2 stat data, we change quotas | 892 | /* |
828 | * of the owner of the directory and of the owner of the parent directory. | 893 | * we will be doing 2 balancings and update 2 stat data, we |
829 | * The quota structure is possibly deleted only on last iput => outside | 894 | * change quotas of the owner of the directory and of the owner |
830 | * of this transaction */ | 895 | * of the parent directory. The quota structure is possibly |
896 | * deleted only on last iput => outside of this transaction | ||
897 | */ | ||
831 | jbegin_count = | 898 | jbegin_count = |
832 | JOURNAL_PER_BALANCE_CNT * 2 + 2 + | 899 | JOURNAL_PER_BALANCE_CNT * 2 + 2 + |
833 | 4 * REISERFS_QUOTA_TRANS_BLOCKS(dir->i_sb); | 900 | 4 * REISERFS_QUOTA_TRANS_BLOCKS(dir->i_sb); |
@@ -856,8 +923,9 @@ static int reiserfs_rmdir(struct inode *dir, struct dentry *dentry) | |||
856 | reiserfs_update_inode_transaction(dir); | 923 | reiserfs_update_inode_transaction(dir); |
857 | 924 | ||
858 | if (de.de_objectid != inode->i_ino) { | 925 | if (de.de_objectid != inode->i_ino) { |
859 | // FIXME: compare key of an object and a key found in the | 926 | /* |
860 | // entry | 927 | * FIXME: compare key of an object and a key found in the entry |
928 | */ | ||
861 | retval = -EIO; | 929 | retval = -EIO; |
862 | goto end_rmdir; | 930 | goto end_rmdir; |
863 | } | 931 | } |
@@ -895,9 +963,11 @@ static int reiserfs_rmdir(struct inode *dir, struct dentry *dentry) | |||
895 | return retval; | 963 | return retval; |
896 | 964 | ||
897 | end_rmdir: | 965 | end_rmdir: |
898 | /* we must release path, because we did not call | 966 | /* |
899 | reiserfs_cut_from_item, or reiserfs_cut_from_item does not | 967 | * we must release path, because we did not call |
900 | release path if operation was not complete */ | 968 | * reiserfs_cut_from_item, or reiserfs_cut_from_item does not |
969 | * release path if operation was not complete | ||
970 | */ | ||
901 | pathrelse(&path); | 971 | pathrelse(&path); |
902 | err = journal_end(&th, dir->i_sb, jbegin_count); | 972 | err = journal_end(&th, dir->i_sb, jbegin_count); |
903 | reiserfs_write_unlock(dir->i_sb); | 973 | reiserfs_write_unlock(dir->i_sb); |
@@ -918,10 +988,13 @@ static int reiserfs_unlink(struct inode *dir, struct dentry *dentry) | |||
918 | 988 | ||
919 | inode = dentry->d_inode; | 989 | inode = dentry->d_inode; |
920 | 990 | ||
921 | /* in this transaction we can be doing at max two balancings and update | 991 | /* |
922 | * two stat datas, we change quotas of the owner of the directory and of | 992 | * in this transaction we can be doing at max two balancings and |
923 | * the owner of the parent directory. The quota structure is possibly | 993 | * update two stat datas, we change quotas of the owner of the |
924 | * deleted only on iput => outside of this transaction */ | 994 | * directory and of the owner of the parent directory. The quota |
995 | * structure is possibly deleted only on iput => outside of | ||
996 | * this transaction | ||
997 | */ | ||
925 | jbegin_count = | 998 | jbegin_count = |
926 | JOURNAL_PER_BALANCE_CNT * 2 + 2 + | 999 | JOURNAL_PER_BALANCE_CNT * 2 + 2 + |
927 | 4 * REISERFS_QUOTA_TRANS_BLOCKS(dir->i_sb); | 1000 | 4 * REISERFS_QUOTA_TRANS_BLOCKS(dir->i_sb); |
@@ -946,8 +1019,9 @@ static int reiserfs_unlink(struct inode *dir, struct dentry *dentry) | |||
946 | reiserfs_update_inode_transaction(dir); | 1019 | reiserfs_update_inode_transaction(dir); |
947 | 1020 | ||
948 | if (de.de_objectid != inode->i_ino) { | 1021 | if (de.de_objectid != inode->i_ino) { |
949 | // FIXME: compare key of an object and a key found in the | 1022 | /* |
950 | // entry | 1023 | * FIXME: compare key of an object and a key found in the entry |
1024 | */ | ||
951 | retval = -EIO; | 1025 | retval = -EIO; |
952 | goto end_unlink; | 1026 | goto end_unlink; |
953 | } | 1027 | } |
@@ -1011,7 +1085,10 @@ static int reiserfs_symlink(struct inode *parent_dir, | |||
1011 | struct reiserfs_transaction_handle th; | 1085 | struct reiserfs_transaction_handle th; |
1012 | struct reiserfs_security_handle security; | 1086 | struct reiserfs_security_handle security; |
1013 | int mode = S_IFLNK | S_IRWXUGO; | 1087 | int mode = S_IFLNK | S_IRWXUGO; |
1014 | /* We need blocks for transaction + (user+group)*(quotas for new inode + update of quota for directory owner) */ | 1088 | /* |
1089 | * We need blocks for transaction + (user+group)*(quotas for | ||
1090 | * new inode + update of quota for directory owner) | ||
1091 | */ | ||
1015 | int jbegin_count = | 1092 | int jbegin_count = |
1016 | JOURNAL_PER_BALANCE_CNT * 3 + | 1093 | JOURNAL_PER_BALANCE_CNT * 3 + |
1017 | 2 * (REISERFS_QUOTA_INIT_BLOCKS(parent_dir->i_sb) + | 1094 | 2 * (REISERFS_QUOTA_INIT_BLOCKS(parent_dir->i_sb) + |
@@ -1070,10 +1147,6 @@ static int reiserfs_symlink(struct inode *parent_dir, | |||
1070 | inode->i_op = &reiserfs_symlink_inode_operations; | 1147 | inode->i_op = &reiserfs_symlink_inode_operations; |
1071 | inode->i_mapping->a_ops = &reiserfs_address_space_operations; | 1148 | inode->i_mapping->a_ops = &reiserfs_address_space_operations; |
1072 | 1149 | ||
1073 | // must be sure this inode is written with this transaction | ||
1074 | // | ||
1075 | //reiserfs_update_sd (&th, inode, READ_BLOCKS); | ||
1076 | |||
1077 | retval = reiserfs_add_entry(&th, parent_dir, dentry->d_name.name, | 1150 | retval = reiserfs_add_entry(&th, parent_dir, dentry->d_name.name, |
1078 | dentry->d_name.len, inode, 1 /*visible */ ); | 1151 | dentry->d_name.len, inode, 1 /*visible */ ); |
1079 | if (retval) { | 1152 | if (retval) { |
@@ -1102,7 +1175,10 @@ static int reiserfs_link(struct dentry *old_dentry, struct inode *dir, | |||
1102 | int retval; | 1175 | int retval; |
1103 | struct inode *inode = old_dentry->d_inode; | 1176 | struct inode *inode = old_dentry->d_inode; |
1104 | struct reiserfs_transaction_handle th; | 1177 | struct reiserfs_transaction_handle th; |
1105 | /* We need blocks for transaction + update of quotas for the owners of the directory */ | 1178 | /* |
1179 | * We need blocks for transaction + update of quotas for | ||
1180 | * the owners of the directory | ||
1181 | */ | ||
1106 | int jbegin_count = | 1182 | int jbegin_count = |
1107 | JOURNAL_PER_BALANCE_CNT * 3 + | 1183 | JOURNAL_PER_BALANCE_CNT * 3 + |
1108 | 2 * REISERFS_QUOTA_TRANS_BLOCKS(dir->i_sb); | 1184 | 2 * REISERFS_QUOTA_TRANS_BLOCKS(dir->i_sb); |
@@ -1111,7 +1187,7 @@ static int reiserfs_link(struct dentry *old_dentry, struct inode *dir, | |||
1111 | 1187 | ||
1112 | reiserfs_write_lock(dir->i_sb); | 1188 | reiserfs_write_lock(dir->i_sb); |
1113 | if (inode->i_nlink >= REISERFS_LINK_MAX) { | 1189 | if (inode->i_nlink >= REISERFS_LINK_MAX) { |
1114 | //FIXME: sd_nlink is 32 bit for new files | 1190 | /* FIXME: sd_nlink is 32 bit for new files */ |
1115 | reiserfs_write_unlock(dir->i_sb); | 1191 | reiserfs_write_unlock(dir->i_sb); |
1116 | return -EMLINK; | 1192 | return -EMLINK; |
1117 | } | 1193 | } |
@@ -1158,9 +1234,9 @@ static int de_still_valid(const char *name, int len, | |||
1158 | { | 1234 | { |
1159 | struct reiserfs_dir_entry tmp = *de; | 1235 | struct reiserfs_dir_entry tmp = *de; |
1160 | 1236 | ||
1161 | // recalculate pointer to name and name length | 1237 | /* recalculate pointer to name and name length */ |
1162 | set_de_name_and_namelen(&tmp); | 1238 | set_de_name_and_namelen(&tmp); |
1163 | // FIXME: could check more | 1239 | /* FIXME: could check more */ |
1164 | if (tmp.de_namelen != len || memcmp(name, de->de_name, len)) | 1240 | if (tmp.de_namelen != len || memcmp(name, de->de_name, len)) |
1165 | return 0; | 1241 | return 0; |
1166 | return 1; | 1242 | return 1; |
@@ -1217,14 +1293,16 @@ static int reiserfs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
1217 | unsigned long savelink = 1; | 1293 | unsigned long savelink = 1; |
1218 | struct timespec ctime; | 1294 | struct timespec ctime; |
1219 | 1295 | ||
1220 | /* three balancings: (1) old name removal, (2) new name insertion | 1296 | /* |
1221 | and (3) maybe "save" link insertion | 1297 | * three balancings: (1) old name removal, (2) new name insertion |
1222 | stat data updates: (1) old directory, | 1298 | * and (3) maybe "save" link insertion |
1223 | (2) new directory and (3) maybe old object stat data (when it is | 1299 | * stat data updates: (1) old directory, |
1224 | directory) and (4) maybe stat data of object to which new entry | 1300 | * (2) new directory and (3) maybe old object stat data (when it is |
1225 | pointed initially and (5) maybe block containing ".." of | 1301 | * directory) and (4) maybe stat data of object to which new entry |
1226 | renamed directory | 1302 | * pointed initially and (5) maybe block containing ".." of |
1227 | quota updates: two parent directories */ | 1303 | * renamed directory |
1304 | * quota updates: two parent directories | ||
1305 | */ | ||
1228 | jbegin_count = | 1306 | jbegin_count = |
1229 | JOURNAL_PER_BALANCE_CNT * 3 + 5 + | 1307 | JOURNAL_PER_BALANCE_CNT * 3 + 5 + |
1230 | 4 * REISERFS_QUOTA_TRANS_BLOCKS(old_dir->i_sb); | 1308 | 4 * REISERFS_QUOTA_TRANS_BLOCKS(old_dir->i_sb); |
@@ -1235,8 +1313,10 @@ static int reiserfs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
1235 | old_inode = old_dentry->d_inode; | 1313 | old_inode = old_dentry->d_inode; |
1236 | new_dentry_inode = new_dentry->d_inode; | 1314 | new_dentry_inode = new_dentry->d_inode; |
1237 | 1315 | ||
1238 | // make sure, that oldname still exists and points to an object we | 1316 | /* |
1239 | // are going to rename | 1317 | * make sure that oldname still exists and points to an object we |
1318 | * are going to rename | ||
1319 | */ | ||
1240 | old_de.de_gen_number_bit_string = NULL; | 1320 | old_de.de_gen_number_bit_string = NULL; |
1241 | reiserfs_write_lock(old_dir->i_sb); | 1321 | reiserfs_write_lock(old_dir->i_sb); |
1242 | retval = | 1322 | retval = |
@@ -1256,10 +1336,11 @@ static int reiserfs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
1256 | 1336 | ||
1257 | old_inode_mode = old_inode->i_mode; | 1337 | old_inode_mode = old_inode->i_mode; |
1258 | if (S_ISDIR(old_inode_mode)) { | 1338 | if (S_ISDIR(old_inode_mode)) { |
1259 | // make sure, that directory being renamed has correct ".." | 1339 | /* |
1260 | // and that its new parent directory has not too many links | 1340 | * make sure that directory being renamed has correct ".." |
1261 | // already | 1341 | * and that its new parent directory has not too many links |
1262 | 1342 | * already | |
1343 | */ | ||
1263 | if (new_dentry_inode) { | 1344 | if (new_dentry_inode) { |
1264 | if (!reiserfs_empty_dir(new_dentry_inode)) { | 1345 | if (!reiserfs_empty_dir(new_dentry_inode)) { |
1265 | reiserfs_write_unlock(old_dir->i_sb); | 1346 | reiserfs_write_unlock(old_dir->i_sb); |
@@ -1267,8 +1348,9 @@ static int reiserfs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
1267 | } | 1348 | } |
1268 | } | 1349 | } |
1269 | 1350 | ||
1270 | /* directory is renamed, its parent directory will be changed, | 1351 | /* |
1271 | ** so find ".." entry | 1352 | * directory is renamed, its parent directory will be changed, |
1353 | * so find ".." entry | ||
1272 | */ | 1354 | */ |
1273 | dot_dot_de.de_gen_number_bit_string = NULL; | 1355 | dot_dot_de.de_gen_number_bit_string = NULL; |
1274 | retval = | 1356 | retval = |
@@ -1311,8 +1393,9 @@ static int reiserfs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
1311 | reiserfs_update_inode_transaction(old_dir); | 1393 | reiserfs_update_inode_transaction(old_dir); |
1312 | reiserfs_update_inode_transaction(new_dir); | 1394 | reiserfs_update_inode_transaction(new_dir); |
1313 | 1395 | ||
1314 | /* this makes it so an fsync on an open fd for the old name will | 1396 | /* |
1315 | ** commit the rename operation | 1397 | * this makes it so an fsync on an open fd for the old name will |
1398 | * commit the rename operation | ||
1316 | */ | 1399 | */ |
1317 | reiserfs_update_inode_transaction(old_inode); | 1400 | reiserfs_update_inode_transaction(old_inode); |
1318 | 1401 | ||
@@ -1320,7 +1403,10 @@ static int reiserfs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
1320 | reiserfs_update_inode_transaction(new_dentry_inode); | 1403 | reiserfs_update_inode_transaction(new_dentry_inode); |
1321 | 1404 | ||
1322 | while (1) { | 1405 | while (1) { |
1323 | // look for old name using corresponding entry key (found by reiserfs_find_entry) | 1406 | /* |
1407 | * look for old name using corresponding entry key | ||
1408 | * (found by reiserfs_find_entry) | ||
1409 | */ | ||
1324 | if ((retval = | 1410 | if ((retval = |
1325 | search_by_entry_key(new_dir->i_sb, &old_de.de_entry_key, | 1411 | search_by_entry_key(new_dir->i_sb, &old_de.de_entry_key, |
1326 | &old_entry_path, | 1412 | &old_entry_path, |
@@ -1335,14 +1421,18 @@ static int reiserfs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
1335 | 1421 | ||
1336 | reiserfs_prepare_for_journal(old_inode->i_sb, old_de.de_bh, 1); | 1422 | reiserfs_prepare_for_journal(old_inode->i_sb, old_de.de_bh, 1); |
1337 | 1423 | ||
1338 | // look for new name by reiserfs_find_entry | 1424 | /* look for new name by reiserfs_find_entry */ |
1339 | new_de.de_gen_number_bit_string = NULL; | 1425 | new_de.de_gen_number_bit_string = NULL; |
1340 | retval = | 1426 | retval = |
1341 | reiserfs_find_entry(new_dir, new_dentry->d_name.name, | 1427 | reiserfs_find_entry(new_dir, new_dentry->d_name.name, |
1342 | new_dentry->d_name.len, &new_entry_path, | 1428 | new_dentry->d_name.len, &new_entry_path, |
1343 | &new_de); | 1429 | &new_de); |
1344 | // reiserfs_add_entry should not return IO_ERROR, because it is called with essentially same parameters from | 1430 | /* |
1345 | // reiserfs_add_entry above, and we'll catch any i/o errors before we get here. | 1431 | * reiserfs_add_entry should not return IO_ERROR, |
1432 | * because it is called with essentially same parameters from | ||
1433 | * reiserfs_add_entry above, and we'll catch any i/o errors | ||
1434 | * before we get here. | ||
1435 | */ | ||
1346 | if (retval != NAME_FOUND_INVISIBLE && retval != NAME_FOUND) { | 1436 | if (retval != NAME_FOUND_INVISIBLE && retval != NAME_FOUND) { |
1347 | pathrelse(&new_entry_path); | 1437 | pathrelse(&new_entry_path); |
1348 | pathrelse(&old_entry_path); | 1438 | pathrelse(&old_entry_path); |
@@ -1370,22 +1460,26 @@ static int reiserfs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
1370 | } | 1460 | } |
1371 | copy_item_head(&dot_dot_ih, | 1461 | copy_item_head(&dot_dot_ih, |
1372 | tp_item_head(&dot_dot_entry_path)); | 1462 | tp_item_head(&dot_dot_entry_path)); |
1373 | // node containing ".." gets into transaction | 1463 | /* node containing ".." gets into transaction */ |
1374 | reiserfs_prepare_for_journal(old_inode->i_sb, | 1464 | reiserfs_prepare_for_journal(old_inode->i_sb, |
1375 | dot_dot_de.de_bh, 1); | 1465 | dot_dot_de.de_bh, 1); |
1376 | } | 1466 | } |
1377 | /* we should check seals here, not do | 1467 | /* |
1378 | this stuff, yes? Then, having | 1468 | * we should check seals here, not do |
1379 | gathered everything into RAM we | 1469 | * this stuff, yes? Then, having |
1380 | should lock the buffers, yes? -Hans */ | 1470 | * gathered everything into RAM we |
1381 | /* probably. our rename needs to hold more | 1471 | * should lock the buffers, yes? -Hans |
1382 | ** than one path at once. The seals would | 1472 | */ |
1383 | ** have to be written to deal with multi-path | 1473 | /* |
1384 | ** issues -chris | 1474 | * probably. our rename needs to hold more |
1475 | * than one path at once. The seals would | ||
1476 | * have to be written to deal with multi-path | ||
1477 | * issues -chris | ||
1385 | */ | 1478 | */ |
1386 | /* sanity checking before doing the rename - avoid races many | 1479 | /* |
1387 | ** of the above checks could have scheduled. We have to be | 1480 | * sanity checking before doing the rename - avoid races many |
1388 | ** sure our items haven't been shifted by another process. | 1481 | * of the above checks could have scheduled. We have to be |
1482 | * sure our items haven't been shifted by another process. | ||
1389 | */ | 1483 | */ |
1390 | if (item_moved(&new_entry_ih, &new_entry_path) || | 1484 | if (item_moved(&new_entry_ih, &new_entry_path) || |
1391 | !entry_points_to_object(new_dentry->d_name.name, | 1485 | !entry_points_to_object(new_dentry->d_name.name, |
@@ -1430,8 +1524,10 @@ static int reiserfs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
1430 | break; | 1524 | break; |
1431 | } | 1525 | } |
1432 | 1526 | ||
1433 | /* ok, all the changes can be done in one fell swoop when we | 1527 | /* |
1434 | have claimed all the buffers needed. */ | 1528 | * ok, all the changes can be done in one fell swoop when we |
1529 | * have claimed all the buffers needed. | ||
1530 | */ | ||
1435 | 1531 | ||
1436 | mark_de_visible(new_de.de_deh + new_de.de_entry_num); | 1532 | mark_de_visible(new_de.de_deh + new_de.de_entry_num); |
1437 | set_ino_in_dir_entry(&new_de, INODE_PKEY(old_inode)); | 1533 | set_ino_in_dir_entry(&new_de, INODE_PKEY(old_inode)); |
@@ -1442,12 +1538,14 @@ static int reiserfs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
1442 | ctime = CURRENT_TIME_SEC; | 1538 | ctime = CURRENT_TIME_SEC; |
1443 | old_dir->i_ctime = old_dir->i_mtime = ctime; | 1539 | old_dir->i_ctime = old_dir->i_mtime = ctime; |
1444 | new_dir->i_ctime = new_dir->i_mtime = ctime; | 1540 | new_dir->i_ctime = new_dir->i_mtime = ctime; |
1445 | /* thanks to Alex Adriaanse <alex_a@caltech.edu> for patch which adds ctime update of | 1541 | /* |
1446 | renamed object */ | 1542 | * thanks to Alex Adriaanse <alex_a@caltech.edu> for patch |
1543 | * which adds ctime update of renamed object | ||
1544 | */ | ||
1447 | old_inode->i_ctime = ctime; | 1545 | old_inode->i_ctime = ctime; |
1448 | 1546 | ||
1449 | if (new_dentry_inode) { | 1547 | if (new_dentry_inode) { |
1450 | // adjust link number of the victim | 1548 | /* adjust link number of the victim */ |
1451 | if (S_ISDIR(new_dentry_inode->i_mode)) { | 1549 | if (S_ISDIR(new_dentry_inode->i_mode)) { |
1452 | clear_nlink(new_dentry_inode); | 1550 | clear_nlink(new_dentry_inode); |
1453 | } else { | 1551 | } else { |
@@ -1462,21 +1560,28 @@ static int reiserfs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
1462 | set_ino_in_dir_entry(&dot_dot_de, INODE_PKEY(new_dir)); | 1560 | set_ino_in_dir_entry(&dot_dot_de, INODE_PKEY(new_dir)); |
1463 | journal_mark_dirty(&th, new_dir->i_sb, dot_dot_de.de_bh); | 1561 | journal_mark_dirty(&th, new_dir->i_sb, dot_dot_de.de_bh); |
1464 | 1562 | ||
1563 | /* | ||
1564 | * there (in new_dir) was no directory, so it got new link | ||
1565 | * (".." of renamed directory) | ||
1566 | */ | ||
1465 | if (!new_dentry_inode) | 1567 | if (!new_dentry_inode) |
1466 | /* there (in new_dir) was no directory, so it got new link | ||
1467 | (".." of renamed directory) */ | ||
1468 | INC_DIR_INODE_NLINK(new_dir); | 1568 | INC_DIR_INODE_NLINK(new_dir); |
1469 | 1569 | ||
1470 | /* old directory lost one link - ".. " of renamed directory */ | 1570 | /* old directory lost one link - ".. " of renamed directory */ |
1471 | DEC_DIR_INODE_NLINK(old_dir); | 1571 | DEC_DIR_INODE_NLINK(old_dir); |
1472 | } | 1572 | } |
1473 | // looks like in 2.3.99pre3 brelse is atomic. so we can use pathrelse | 1573 | /* |
1574 | * looks like in 2.3.99pre3 brelse is atomic. | ||
1575 | * so we can use pathrelse | ||
1576 | */ | ||
1474 | pathrelse(&new_entry_path); | 1577 | pathrelse(&new_entry_path); |
1475 | pathrelse(&dot_dot_entry_path); | 1578 | pathrelse(&dot_dot_entry_path); |
1476 | 1579 | ||
1477 | // FIXME: this reiserfs_cut_from_item's return value may screw up | 1580 | /* |
1478 | // anybody, but it will panic if will not be able to find the | 1581 | * FIXME: this reiserfs_cut_from_item's return value may screw up |
1479 | // entry. This needs one more clean up | 1582 | * anybody, but it will panic if will not be able to find the |
1583 | * entry. This needs one more clean up | ||
1584 | */ | ||
1480 | if (reiserfs_cut_from_item | 1585 | if (reiserfs_cut_from_item |
1481 | (&th, &old_entry_path, &(old_de.de_entry_key), old_dir, NULL, | 1586 | (&th, &old_entry_path, &(old_de.de_entry_key), old_dir, NULL, |
1482 | 0) < 0) | 1587 | 0) < 0) |
@@ -1501,11 +1606,8 @@ static int reiserfs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
1501 | return retval; | 1606 | return retval; |
1502 | } | 1607 | } |
1503 | 1608 | ||
1504 | /* | 1609 | /* directories can handle most operations... */ |
1505 | * directories can handle most operations... | ||
1506 | */ | ||
1507 | const struct inode_operations reiserfs_dir_inode_operations = { | 1610 | const struct inode_operations reiserfs_dir_inode_operations = { |
1508 | //&reiserfs_dir_operations, /* default_file_ops */ | ||
1509 | .create = reiserfs_create, | 1611 | .create = reiserfs_create, |
1510 | .lookup = reiserfs_lookup, | 1612 | .lookup = reiserfs_lookup, |
1511 | .link = reiserfs_link, | 1613 | .link = reiserfs_link, |
diff --git a/fs/reiserfs/objectid.c b/fs/reiserfs/objectid.c index f732d6a5251d..99f66f885785 100644 --- a/fs/reiserfs/objectid.c +++ b/fs/reiserfs/objectid.c | |||
@@ -7,7 +7,7 @@ | |||
7 | #include <linux/time.h> | 7 | #include <linux/time.h> |
8 | #include "reiserfs.h" | 8 | #include "reiserfs.h" |
9 | 9 | ||
10 | // find where objectid map starts | 10 | /* find where objectid map starts */ |
11 | #define objectid_map(s,rs) (old_format_only (s) ? \ | 11 | #define objectid_map(s,rs) (old_format_only (s) ? \ |
12 | (__le32 *)((struct reiserfs_super_block_v1 *)(rs) + 1) :\ | 12 | (__le32 *)((struct reiserfs_super_block_v1 *)(rs) + 1) :\ |
13 | (__le32 *)((rs) + 1)) | 13 | (__le32 *)((rs) + 1)) |
@@ -20,7 +20,7 @@ static void check_objectid_map(struct super_block *s, __le32 * map) | |||
20 | reiserfs_panic(s, "vs-15010", "map corrupted: %lx", | 20 | reiserfs_panic(s, "vs-15010", "map corrupted: %lx", |
21 | (long unsigned int)le32_to_cpu(map[0])); | 21 | (long unsigned int)le32_to_cpu(map[0])); |
22 | 22 | ||
23 | // FIXME: add something else here | 23 | /* FIXME: add something else here */ |
24 | } | 24 | } |
25 | 25 | ||
26 | #else | 26 | #else |
@@ -29,19 +29,21 @@ static void check_objectid_map(struct super_block *s, __le32 * map) | |||
29 | } | 29 | } |
30 | #endif | 30 | #endif |
31 | 31 | ||
32 | /* When we allocate objectids we allocate the first unused objectid. | 32 | /* |
33 | Each sequence of objectids in use (the odd sequences) is followed | 33 | * When we allocate objectids we allocate the first unused objectid. |
34 | by a sequence of objectids not in use (the even sequences). We | 34 | * Each sequence of objectids in use (the odd sequences) is followed |
35 | only need to record the last objectid in each of these sequences | 35 | * by a sequence of objectids not in use (the even sequences). We |
36 | (both the odd and even sequences) in order to fully define the | 36 | * only need to record the last objectid in each of these sequences |
37 | boundaries of the sequences. A consequence of allocating the first | 37 | * (both the odd and even sequences) in order to fully define the |
38 | objectid not in use is that under most conditions this scheme is | 38 | * boundaries of the sequences. A consequence of allocating the first |
39 | extremely compact. The exception is immediately after a sequence | 39 | * objectid not in use is that under most conditions this scheme is |
40 | of operations which deletes a large number of objects of | 40 | * extremely compact. The exception is immediately after a sequence |
41 | non-sequential objectids, and even then it will become compact | 41 | * of operations which deletes a large number of objects of |
42 | again as soon as more objects are created. Note that many | 42 | * non-sequential objectids, and even then it will become compact |
43 | interesting optimizations of layout could result from complicating | 43 | * again as soon as more objects are created. Note that many |
44 | objectid assignment, but we have deferred making them for now. */ | 44 | * interesting optimizations of layout could result from complicating |
45 | * objectid assignment, but we have deferred making them for now. | ||
46 | */ | ||
45 | 47 | ||
46 | /* get unique object identifier */ | 48 | /* get unique object identifier */ |
47 | __u32 reiserfs_get_unused_objectid(struct reiserfs_transaction_handle *th) | 49 | __u32 reiserfs_get_unused_objectid(struct reiserfs_transaction_handle *th) |
@@ -64,19 +66,23 @@ __u32 reiserfs_get_unused_objectid(struct reiserfs_transaction_handle *th) | |||
64 | return 0; | 66 | return 0; |
65 | } | 67 | } |
66 | 68 | ||
67 | /* This incrementation allocates the first unused objectid. That | 69 | /* |
68 | is to say, the first entry on the objectid map is the first | 70 | * This incrementation allocates the first unused objectid. That |
69 | unused objectid, and by incrementing it we use it. See below | 71 | * is to say, the first entry on the objectid map is the first |
70 | where we check to see if we eliminated a sequence of unused | 72 | * unused objectid, and by incrementing it we use it. See below |
71 | objectids.... */ | 73 | * where we check to see if we eliminated a sequence of unused |
74 | * objectids.... | ||
75 | */ | ||
72 | map[1] = cpu_to_le32(unused_objectid + 1); | 76 | map[1] = cpu_to_le32(unused_objectid + 1); |
73 | 77 | ||
74 | /* Now we check to see if we eliminated the last remaining member of | 78 | /* |
75 | the first even sequence (and can eliminate the sequence by | 79 | * Now we check to see if we eliminated the last remaining member of |
76 | eliminating its last objectid from oids), and can collapse the | 80 | * the first even sequence (and can eliminate the sequence by |
77 | first two odd sequences into one sequence. If so, then the net | 81 | * eliminating its last objectid from oids), and can collapse the |
78 | result is to eliminate a pair of objectids from oids. We do this | 82 | * first two odd sequences into one sequence. If so, then the net |
79 | by shifting the entire map to the left. */ | 83 | * result is to eliminate a pair of objectids from oids. We do this |
84 | * by shifting the entire map to the left. | ||
85 | */ | ||
80 | if (sb_oid_cursize(rs) > 2 && map[1] == map[2]) { | 86 | if (sb_oid_cursize(rs) > 2 && map[1] == map[2]) { |
81 | memmove(map + 1, map + 3, | 87 | memmove(map + 1, map + 3, |
82 | (sb_oid_cursize(rs) - 3) * sizeof(__u32)); | 88 | (sb_oid_cursize(rs) - 3) * sizeof(__u32)); |
@@ -97,30 +103,33 @@ void reiserfs_release_objectid(struct reiserfs_transaction_handle *th, | |||
97 | int i = 0; | 103 | int i = 0; |
98 | 104 | ||
99 | BUG_ON(!th->t_trans_id); | 105 | BUG_ON(!th->t_trans_id); |
100 | //return; | 106 | /*return; */ |
101 | check_objectid_map(s, map); | 107 | check_objectid_map(s, map); |
102 | 108 | ||
103 | reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s), 1); | 109 | reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s), 1); |
104 | journal_mark_dirty(th, s, SB_BUFFER_WITH_SB(s)); | 110 | journal_mark_dirty(th, s, SB_BUFFER_WITH_SB(s)); |
105 | 111 | ||
106 | /* start at the beginning of the objectid map (i = 0) and go to | 112 | /* |
107 | the end of it (i = disk_sb->s_oid_cursize). Linear search is | 113 | * start at the beginning of the objectid map (i = 0) and go to |
108 | what we use, though it is possible that binary search would be | 114 | * the end of it (i = disk_sb->s_oid_cursize). Linear search is |
109 | more efficient after performing lots of deletions (which is | 115 | * what we use, though it is possible that binary search would be |
110 | when oids is large.) We only check even i's. */ | 116 | * more efficient after performing lots of deletions (which is |
117 | * when oids is large.) We only check even i's. | ||
118 | */ | ||
111 | while (i < sb_oid_cursize(rs)) { | 119 | while (i < sb_oid_cursize(rs)) { |
112 | if (objectid_to_release == le32_to_cpu(map[i])) { | 120 | if (objectid_to_release == le32_to_cpu(map[i])) { |
113 | /* This incrementation unallocates the objectid. */ | 121 | /* This incrementation unallocates the objectid. */ |
114 | //map[i]++; | ||
115 | le32_add_cpu(&map[i], 1); | 122 | le32_add_cpu(&map[i], 1); |
116 | 123 | ||
117 | /* Did we unallocate the last member of an odd sequence, and can shrink oids? */ | 124 | /* |
125 | * Did we unallocate the last member of an | ||
126 | * odd sequence, and can shrink oids? | ||
127 | */ | ||
118 | if (map[i] == map[i + 1]) { | 128 | if (map[i] == map[i + 1]) { |
119 | /* shrink objectid map */ | 129 | /* shrink objectid map */ |
120 | memmove(map + i, map + i + 2, | 130 | memmove(map + i, map + i + 2, |
121 | (sb_oid_cursize(rs) - i - | 131 | (sb_oid_cursize(rs) - i - |
122 | 2) * sizeof(__u32)); | 132 | 2) * sizeof(__u32)); |
123 | //disk_sb->s_oid_cursize -= 2; | ||
124 | set_sb_oid_cursize(rs, sb_oid_cursize(rs) - 2); | 133 | set_sb_oid_cursize(rs, sb_oid_cursize(rs) - 2); |
125 | 134 | ||
126 | RFALSE(sb_oid_cursize(rs) < 2 || | 135 | RFALSE(sb_oid_cursize(rs) < 2 || |
@@ -135,14 +144,19 @@ void reiserfs_release_objectid(struct reiserfs_transaction_handle *th, | |||
135 | objectid_to_release < le32_to_cpu(map[i + 1])) { | 144 | objectid_to_release < le32_to_cpu(map[i + 1])) { |
136 | /* size of objectid map is not changed */ | 145 | /* size of objectid map is not changed */ |
137 | if (objectid_to_release + 1 == le32_to_cpu(map[i + 1])) { | 146 | if (objectid_to_release + 1 == le32_to_cpu(map[i + 1])) { |
138 | //objectid_map[i+1]--; | ||
139 | le32_add_cpu(&map[i + 1], -1); | 147 | le32_add_cpu(&map[i + 1], -1); |
140 | return; | 148 | return; |
141 | } | 149 | } |
142 | 150 | ||
143 | /* JDM comparing two little-endian values for equality -- safe */ | 151 | /* |
152 | * JDM comparing two little-endian values for | ||
153 | * equality -- safe | ||
154 | */ | ||
155 | /* | ||
156 | * objectid map must be expanded, but | ||
157 | * there is no space | ||
158 | */ | ||
144 | if (sb_oid_cursize(rs) == sb_oid_maxsize(rs)) { | 159 | if (sb_oid_cursize(rs) == sb_oid_maxsize(rs)) { |
145 | /* objectid map must be expanded, but there is no space */ | ||
146 | PROC_INFO_INC(s, leaked_oid); | 160 | PROC_INFO_INC(s, leaked_oid); |
147 | return; | 161 | return; |
148 | } | 162 | } |
@@ -178,8 +192,9 @@ int reiserfs_convert_objectid_map_v1(struct super_block *s) | |||
178 | new_objectid_map = (__le32 *) (disk_sb + 1); | 192 | new_objectid_map = (__le32 *) (disk_sb + 1); |
179 | 193 | ||
180 | if (cur_size > new_size) { | 194 | if (cur_size > new_size) { |
181 | /* mark everyone used that was listed as free at the end of the objectid | 195 | /* |
182 | ** map | 196 | * mark everyone used that was listed as free at |
197 | * the end of the objectid map | ||
183 | */ | 198 | */ |
184 | objectid_map[new_size - 1] = objectid_map[cur_size - 1]; | 199 | objectid_map[new_size - 1] = objectid_map[cur_size - 1]; |
185 | set_sb_oid_cursize(disk_sb, new_size); | 200 | set_sb_oid_cursize(disk_sb, new_size); |
diff --git a/fs/reiserfs/prints.c b/fs/reiserfs/prints.c index 41f788148d44..c7425fdf19f9 100644 --- a/fs/reiserfs/prints.c +++ b/fs/reiserfs/prints.c | |||
@@ -172,18 +172,19 @@ static char *is_there_reiserfs_struct(char *fmt, int *what) | |||
172 | return k; | 172 | return k; |
173 | } | 173 | } |
174 | 174 | ||
175 | /* debugging reiserfs we used to print out a lot of different | 175 | /* |
176 | variables, like keys, item headers, buffer heads etc. Values of | 176 | * debugging reiserfs we used to print out a lot of different |
177 | most fields matter. So it took a long time just to write | 177 | * variables, like keys, item headers, buffer heads etc. Values of |
178 | appropriative printk. With this reiserfs_warning you can use format | 178 | * most fields matter. So it took a long time just to write |
179 | specification for complex structures like you used to do with | 179 | * appropriative printk. With this reiserfs_warning you can use format |
180 | printfs for integers, doubles and pointers. For instance, to print | 180 | * specification for complex structures like you used to do with |
181 | out key structure you have to write just: | 181 | * printfs for integers, doubles and pointers. For instance, to print |
182 | reiserfs_warning ("bad key %k", key); | 182 | * out key structure you have to write just: |
183 | instead of | 183 | * reiserfs_warning ("bad key %k", key); |
184 | printk ("bad key %lu %lu %lu %lu", key->k_dir_id, key->k_objectid, | 184 | * instead of |
185 | key->k_offset, key->k_uniqueness); | 185 | * printk ("bad key %lu %lu %lu %lu", key->k_dir_id, key->k_objectid, |
186 | */ | 186 | * key->k_offset, key->k_uniqueness); |
187 | */ | ||
187 | static DEFINE_SPINLOCK(error_lock); | 188 | static DEFINE_SPINLOCK(error_lock); |
188 | static void prepare_error_buf(const char *fmt, va_list args) | 189 | static void prepare_error_buf(const char *fmt, va_list args) |
189 | { | 190 | { |
@@ -243,15 +244,16 @@ static void prepare_error_buf(const char *fmt, va_list args) | |||
243 | 244 | ||
244 | } | 245 | } |
245 | 246 | ||
246 | /* in addition to usual conversion specifiers this accepts reiserfs | 247 | /* |
247 | specific conversion specifiers: | 248 | * in addition to usual conversion specifiers this accepts reiserfs |
248 | %k to print little endian key, | 249 | * specific conversion specifiers: |
249 | %K to print cpu key, | 250 | * %k to print little endian key, |
250 | %h to print item_head, | 251 | * %K to print cpu key, |
251 | %t to print directory entry | 252 | * %h to print item_head, |
252 | %z to print block head (arg must be struct buffer_head * | 253 | * %t to print directory entry |
253 | %b to print buffer_head | 254 | * %z to print block head (arg must be struct buffer_head * |
254 | */ | 255 | * %b to print buffer_head |
256 | */ | ||
255 | 257 | ||
256 | #define do_reiserfs_warning(fmt)\ | 258 | #define do_reiserfs_warning(fmt)\ |
257 | {\ | 259 | {\ |
@@ -304,50 +306,52 @@ void reiserfs_debug(struct super_block *s, int level, const char *fmt, ...) | |||
304 | #endif | 306 | #endif |
305 | } | 307 | } |
306 | 308 | ||
307 | /* The format: | 309 | /* |
308 | 310 | * The format: | |
309 | maintainer-errorid: [function-name:] message | 311 | * |
310 | 312 | * maintainer-errorid: [function-name:] message | |
311 | where errorid is unique to the maintainer and function-name is | 313 | * |
312 | optional, is recommended, so that anyone can easily find the bug | 314 | * where errorid is unique to the maintainer and function-name is |
313 | with a simple grep for the short to type string | 315 | * optional, is recommended, so that anyone can easily find the bug |
314 | maintainer-errorid. Don't bother with reusing errorids, there are | 316 | * with a simple grep for the short to type string |
315 | lots of numbers out there. | 317 | * maintainer-errorid. Don't bother with reusing errorids, there are |
316 | 318 | * lots of numbers out there. | |
317 | Example: | 319 | * |
318 | 320 | * Example: | |
319 | reiserfs_panic( | 321 | * |
320 | p_sb, "reiser-29: reiserfs_new_blocknrs: " | 322 | * reiserfs_panic( |
321 | "one of search_start or rn(%d) is equal to MAX_B_NUM," | 323 | * p_sb, "reiser-29: reiserfs_new_blocknrs: " |
322 | "which means that we are optimizing location based on the bogus location of a temp buffer (%p).", | 324 | * "one of search_start or rn(%d) is equal to MAX_B_NUM," |
323 | rn, bh | 325 | * "which means that we are optimizing location based on the " |
324 | ); | 326 | * "bogus location of a temp buffer (%p).", |
325 | 327 | * rn, bh | |
326 | Regular panic()s sometimes clear the screen before the message can | 328 | * ); |
327 | be read, thus the need for the while loop. | 329 | * |
328 | 330 | * Regular panic()s sometimes clear the screen before the message can | |
329 | Numbering scheme for panic used by Vladimir and Anatoly( Hans completely ignores this scheme, and considers it | 331 | * be read, thus the need for the while loop. |
330 | pointless complexity): | 332 | * |
331 | 333 | * Numbering scheme for panic used by Vladimir and Anatoly( Hans completely | |
332 | panics in reiserfs.h have numbers from 1000 to 1999 | 334 | * ignores this scheme, and considers it pointless complexity): |
333 | super.c 2000 to 2999 | 335 | * |
334 | preserve.c (unused) 3000 to 3999 | 336 | * panics in reiserfs_fs.h have numbers from 1000 to 1999 |
335 | bitmap.c 4000 to 4999 | 337 | * super.c 2000 to 2999 |
336 | stree.c 5000 to 5999 | 338 | * preserve.c (unused) 3000 to 3999 |
337 | prints.c 6000 to 6999 | 339 | * bitmap.c 4000 to 4999 |
338 | namei.c 7000 to 7999 | 340 | * stree.c 5000 to 5999 |
339 | fix_nodes.c 8000 to 8999 | 341 | * prints.c 6000 to 6999 |
340 | dir.c 9000 to 9999 | 342 | * namei.c 7000 to 7999 |
341 | lbalance.c 10000 to 10999 | 343 | * fix_nodes.c 8000 to 8999 |
342 | ibalance.c 11000 to 11999 not ready | 344 | * dir.c 9000 to 9999 |
343 | do_balan.c 12000 to 12999 | 345 | * lbalance.c 10000 to 10999 |
344 | inode.c 13000 to 13999 | 346 | * ibalance.c 11000 to 11999 not ready |
345 | file.c 14000 to 14999 | 347 | * do_balan.c 12000 to 12999 |
346 | objectid.c 15000 - 15999 | 348 | * inode.c 13000 to 13999 |
347 | buffer.c 16000 - 16999 | 349 | * file.c 14000 to 14999 |
348 | symlink.c 17000 - 17999 | 350 | * objectid.c 15000 - 15999 |
349 | 351 | * buffer.c 16000 - 16999 | |
350 | . */ | 352 | * symlink.c 17000 - 17999 |
353 | * | ||
354 | * . */ | ||
351 | 355 | ||
352 | void __reiserfs_panic(struct super_block *sb, const char *id, | 356 | void __reiserfs_panic(struct super_block *sb, const char *id, |
353 | const char *function, const char *fmt, ...) | 357 | const char *function, const char *fmt, ...) |
@@ -411,9 +415,11 @@ void reiserfs_abort(struct super_block *sb, int errno, const char *fmt, ...) | |||
411 | reiserfs_abort_journal(sb, errno); | 415 | reiserfs_abort_journal(sb, errno); |
412 | } | 416 | } |
413 | 417 | ||
414 | /* this prints internal nodes (4 keys/items in line) (dc_number, | 418 | /* |
415 | dc_size)[k_dirid, k_objectid, k_offset, k_uniqueness](dc_number, | 419 | * this prints internal nodes (4 keys/items in line) (dc_number, |
416 | dc_size)...*/ | 420 | * dc_size)[k_dirid, k_objectid, k_offset, k_uniqueness](dc_number, |
421 | * dc_size)... | ||
422 | */ | ||
417 | static int print_internal(struct buffer_head *bh, int first, int last) | 423 | static int print_internal(struct buffer_head *bh, int first, int last) |
418 | { | 424 | { |
419 | struct reiserfs_key *key; | 425 | struct reiserfs_key *key; |
@@ -543,9 +549,11 @@ static int print_super_block(struct buffer_head *bh) | |||
543 | printk("Block count %u\n", sb_block_count(rs)); | 549 | printk("Block count %u\n", sb_block_count(rs)); |
544 | printk("Blocksize %d\n", sb_blocksize(rs)); | 550 | printk("Blocksize %d\n", sb_blocksize(rs)); |
545 | printk("Free blocks %u\n", sb_free_blocks(rs)); | 551 | printk("Free blocks %u\n", sb_free_blocks(rs)); |
546 | // FIXME: this would be confusing if | 552 | /* |
547 | // someone stores reiserfs super block in some data block ;) | 553 | * FIXME: this would be confusing if |
554 | * someone stores reiserfs super block in some data block ;) | ||
548 | // skipped = (bh->b_blocknr * bh->b_size) / sb_blocksize(rs); | 555 | // skipped = (bh->b_blocknr * bh->b_size) / sb_blocksize(rs); |
556 | */ | ||
549 | skipped = bh->b_blocknr; | 557 | skipped = bh->b_blocknr; |
550 | data_blocks = sb_block_count(rs) - skipped - 1 - sb_bmap_nr(rs) - | 558 | data_blocks = sb_block_count(rs) - skipped - 1 - sb_bmap_nr(rs) - |
551 | (!is_reiserfs_jr(rs) ? sb_jp_journal_size(rs) + | 559 | (!is_reiserfs_jr(rs) ? sb_jp_journal_size(rs) + |
@@ -581,8 +589,8 @@ static int print_desc_block(struct buffer_head *bh) | |||
581 | 589 | ||
582 | return 0; | 590 | return 0; |
583 | } | 591 | } |
584 | 592 | /* ..., int print_mode, int first, int last) */ | |
585 | void print_block(struct buffer_head *bh, ...) //int print_mode, int first, int last) | 593 | void print_block(struct buffer_head *bh, ...) |
586 | { | 594 | { |
587 | va_list args; | 595 | va_list args; |
588 | int mode, first, last; | 596 | int mode, first, last; |
diff --git a/fs/reiserfs/reiserfs.h b/fs/reiserfs/reiserfs.h index 35bfde10ca0f..2195e7f2297f 100644 --- a/fs/reiserfs/reiserfs.h +++ b/fs/reiserfs/reiserfs.h | |||
@@ -1,5 +1,6 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright 1996, 1997, 1998 Hans Reiser, see reiserfs/README for licensing and copyright details | 2 | * Copyright 1996, 1997, 1998 Hans Reiser, see reiserfs/README for |
3 | * licensing and copyright details | ||
3 | */ | 4 | */ |
4 | 5 | ||
5 | #include <linux/reiserfs_fs.h> | 6 | #include <linux/reiserfs_fs.h> |
@@ -23,52 +24,73 @@ | |||
23 | 24 | ||
24 | struct reiserfs_journal_list; | 25 | struct reiserfs_journal_list; |
25 | 26 | ||
26 | /** bitmasks for i_flags field in reiserfs-specific part of inode */ | 27 | /* bitmasks for i_flags field in reiserfs-specific part of inode */ |
27 | typedef enum { | 28 | typedef enum { |
28 | /** this says what format of key do all items (but stat data) of | 29 | /* |
29 | an object have. If this is set, that format is 3.6 otherwise | 30 | * this says what format of key do all items (but stat data) of |
30 | - 3.5 */ | 31 | * an object have. If this is set, that format is 3.6 otherwise - 3.5 |
32 | */ | ||
31 | i_item_key_version_mask = 0x0001, | 33 | i_item_key_version_mask = 0x0001, |
32 | /** If this is unset, object has 3.5 stat data, otherwise, it has | 34 | |
33 | 3.6 stat data with 64bit size, 32bit nlink etc. */ | 35 | /* |
36 | * If this is unset, object has 3.5 stat data, otherwise, | ||
37 | * it has 3.6 stat data with 64bit size, 32bit nlink etc. | ||
38 | */ | ||
34 | i_stat_data_version_mask = 0x0002, | 39 | i_stat_data_version_mask = 0x0002, |
35 | /** file might need tail packing on close */ | 40 | |
41 | /* file might need tail packing on close */ | ||
36 | i_pack_on_close_mask = 0x0004, | 42 | i_pack_on_close_mask = 0x0004, |
37 | /** don't pack tail of file */ | 43 | |
44 | /* don't pack tail of file */ | ||
38 | i_nopack_mask = 0x0008, | 45 | i_nopack_mask = 0x0008, |
39 | /** If those is set, "safe link" was created for this file during | 46 | |
40 | truncate or unlink. Safe link is used to avoid leakage of disk | 47 | /* |
41 | space on crash with some files open, but unlinked. */ | 48 | * If either of these are set, "safe link" was created for this |
49 | * file during truncate or unlink. Safe link is used to avoid | ||
50 | * leakage of disk space on crash with some files open, but unlinked. | ||
51 | */ | ||
42 | i_link_saved_unlink_mask = 0x0010, | 52 | i_link_saved_unlink_mask = 0x0010, |
43 | i_link_saved_truncate_mask = 0x0020, | 53 | i_link_saved_truncate_mask = 0x0020, |
54 | |||
44 | i_has_xattr_dir = 0x0040, | 55 | i_has_xattr_dir = 0x0040, |
45 | i_data_log = 0x0080, | 56 | i_data_log = 0x0080, |
46 | } reiserfs_inode_flags; | 57 | } reiserfs_inode_flags; |
47 | 58 | ||
48 | struct reiserfs_inode_info { | 59 | struct reiserfs_inode_info { |
49 | __u32 i_key[4]; /* key is still 4 32 bit integers */ | 60 | __u32 i_key[4]; /* key is still 4 32 bit integers */ |
50 | /** transient inode flags that are never stored on disk. Bitmasks | 61 | |
51 | for this field are defined above. */ | 62 | /* |
63 | * transient inode flags that are never stored on disk. Bitmasks | ||
64 | * for this field are defined above. | ||
65 | */ | ||
52 | __u32 i_flags; | 66 | __u32 i_flags; |
53 | 67 | ||
54 | __u32 i_first_direct_byte; // offset of first byte stored in direct item. | 68 | /* offset of first byte stored in direct item. */ |
69 | __u32 i_first_direct_byte; | ||
55 | 70 | ||
56 | /* copy of persistent inode flags read from sd_attrs. */ | 71 | /* copy of persistent inode flags read from sd_attrs. */ |
57 | __u32 i_attrs; | 72 | __u32 i_attrs; |
58 | 73 | ||
59 | int i_prealloc_block; /* first unused block of a sequence of unused blocks */ | 74 | /* first unused block of a sequence of unused blocks */ |
75 | int i_prealloc_block; | ||
60 | int i_prealloc_count; /* length of that sequence */ | 76 | int i_prealloc_count; /* length of that sequence */ |
61 | struct list_head i_prealloc_list; /* per-transaction list of inodes which | ||
62 | * have preallocated blocks */ | ||
63 | 77 | ||
64 | unsigned new_packing_locality:1; /* new_packig_locality is created; new blocks | 78 | /* per-transaction list of inodes which have preallocated blocks */ |
65 | * for the contents of this directory should be | 79 | struct list_head i_prealloc_list; |
66 | * displaced */ | 80 | |
81 | /* | ||
82 | * new_packing_locality is created; new blocks for the contents | ||
83 | * of this directory should be displaced | ||
84 | */ | ||
85 | unsigned new_packing_locality:1; | ||
67 | 86 | ||
68 | /* we use these for fsync or O_SYNC to decide which transaction | 87 | /* |
69 | ** needs to be committed in order for this inode to be properly | 88 | * we use these for fsync or O_SYNC to decide which transaction |
70 | ** flushed */ | 89 | * needs to be committed in order for this inode to be properly |
90 | * flushed | ||
91 | */ | ||
71 | unsigned int i_trans_id; | 92 | unsigned int i_trans_id; |
93 | |||
72 | struct reiserfs_journal_list *i_jl; | 94 | struct reiserfs_journal_list *i_jl; |
73 | atomic_t openers; | 95 | atomic_t openers; |
74 | struct mutex tailpack; | 96 | struct mutex tailpack; |
@@ -82,9 +104,10 @@ typedef enum { | |||
82 | reiserfs_attrs_cleared = 0x00000001, | 104 | reiserfs_attrs_cleared = 0x00000001, |
83 | } reiserfs_super_block_flags; | 105 | } reiserfs_super_block_flags; |
84 | 106 | ||
85 | /* struct reiserfs_super_block accessors/mutators | 107 | /* |
86 | * since this is a disk structure, it will always be in | 108 | * struct reiserfs_super_block accessors/mutators since this is a disk |
87 | * little endian format. */ | 109 | * structure, it will always be in little endian format. |
110 | */ | ||
88 | #define sb_block_count(sbp) (le32_to_cpu((sbp)->s_v1.s_block_count)) | 111 | #define sb_block_count(sbp) (le32_to_cpu((sbp)->s_v1.s_block_count)) |
89 | #define set_sb_block_count(sbp,v) ((sbp)->s_v1.s_block_count = cpu_to_le32(v)) | 112 | #define set_sb_block_count(sbp,v) ((sbp)->s_v1.s_block_count = cpu_to_le32(v)) |
90 | #define sb_free_blocks(sbp) (le32_to_cpu((sbp)->s_v1.s_free_blocks)) | 113 | #define sb_free_blocks(sbp) (le32_to_cpu((sbp)->s_v1.s_free_blocks)) |
@@ -152,48 +175,61 @@ typedef enum { | |||
152 | 175 | ||
153 | /* LOGGING -- */ | 176 | /* LOGGING -- */ |
154 | 177 | ||
155 | /* These all interelate for performance. | 178 | /* |
156 | ** | 179 | * These all interelate for performance. |
157 | ** If the journal block count is smaller than n transactions, you lose speed. | 180 | * |
158 | ** I don't know what n is yet, I'm guessing 8-16. | 181 | * If the journal block count is smaller than n transactions, you lose speed. |
159 | ** | 182 | * I don't know what n is yet, I'm guessing 8-16. |
160 | ** typical transaction size depends on the application, how often fsync is | 183 | * |
161 | ** called, and how many metadata blocks you dirty in a 30 second period. | 184 | * typical transaction size depends on the application, how often fsync is |
162 | ** The more small files (<16k) you use, the larger your transactions will | 185 | * called, and how many metadata blocks you dirty in a 30 second period. |
163 | ** be. | 186 | * The more small files (<16k) you use, the larger your transactions will |
164 | ** | 187 | * be. |
165 | ** If your journal fills faster than dirty buffers get flushed to disk, it must flush them before allowing the journal | 188 | * |
166 | ** to wrap, which slows things down. If you need high speed meta data updates, the journal should be big enough | 189 | * If your journal fills faster than dirty buffers get flushed to disk, it |
167 | ** to prevent wrapping before dirty meta blocks get to disk. | 190 | * must flush them before allowing the journal to wrap, which slows things |
168 | ** | 191 | * down. If you need high speed meta data updates, the journal should be |
169 | ** If the batch max is smaller than the transaction max, you'll waste space at the end of the journal | 192 | * big enough to prevent wrapping before dirty meta blocks get to disk. |
170 | ** because journal_end sets the next transaction to start at 0 if the next transaction has any chance of wrapping. | 193 | * |
171 | ** | 194 | * If the batch max is smaller than the transaction max, you'll waste space |
172 | ** The large the batch max age, the better the speed, and the more meta data changes you'll lose after a crash. | 195 | * at the end of the journal because journal_end sets the next transaction |
173 | ** | 196 | * to start at 0 if the next transaction has any chance of wrapping. |
174 | */ | 197 | * |
198 | * The large the batch max age, the better the speed, and the more meta | ||
199 | * data changes you'll lose after a crash. | ||
200 | */ | ||
175 | 201 | ||
176 | /* don't mess with these for a while */ | 202 | /* don't mess with these for a while */ |
177 | /* we have a node size define somewhere in reiserfs_fs.h. -Hans */ | 203 | /* we have a node size define somewhere in reiserfs_fs.h. -Hans */ |
178 | #define JOURNAL_BLOCK_SIZE 4096 /* BUG gotta get rid of this */ | 204 | #define JOURNAL_BLOCK_SIZE 4096 /* BUG gotta get rid of this */ |
179 | #define JOURNAL_MAX_CNODE 1500 /* max cnodes to allocate. */ | 205 | #define JOURNAL_MAX_CNODE 1500 /* max cnodes to allocate. */ |
180 | #define JOURNAL_HASH_SIZE 8192 | 206 | #define JOURNAL_HASH_SIZE 8192 |
181 | #define JOURNAL_NUM_BITMAPS 5 /* number of copies of the bitmaps to have floating. Must be >= 2 */ | 207 | |
182 | 208 | /* number of copies of the bitmaps to have floating. Must be >= 2 */ | |
183 | /* One of these for every block in every transaction | 209 | #define JOURNAL_NUM_BITMAPS 5 |
184 | ** Each one is in two hash tables. First, a hash of the current transaction, and after journal_end, a | 210 | |
185 | ** hash of all the in memory transactions. | 211 | /* |
186 | ** next and prev are used by the current transaction (journal_hash). | 212 | * One of these for every block in every transaction |
187 | ** hnext and hprev are used by journal_list_hash. If a block is in more than one transaction, the journal_list_hash | 213 | * Each one is in two hash tables. First, a hash of the current transaction, |
188 | ** links it in multiple times. This allows flush_journal_list to remove just the cnode belonging | 214 | * and after journal_end, a hash of all the in memory transactions. |
189 | ** to a given transaction. | 215 | * next and prev are used by the current transaction (journal_hash). |
190 | */ | 216 | * hnext and hprev are used by journal_list_hash. If a block is in more |
217 | * than one transaction, the journal_list_hash links it in multiple times. | ||
218 | * This allows flush_journal_list to remove just the cnode belonging to a | ||
219 | * given transaction. | ||
220 | */ | ||
191 | struct reiserfs_journal_cnode { | 221 | struct reiserfs_journal_cnode { |
192 | struct buffer_head *bh; /* real buffer head */ | 222 | struct buffer_head *bh; /* real buffer head */ |
193 | struct super_block *sb; /* dev of real buffer head */ | 223 | struct super_block *sb; /* dev of real buffer head */ |
194 | __u32 blocknr; /* block number of real buffer head, == 0 when buffer on disk */ | 224 | |
225 | /* block number of real buffer head, == 0 when buffer on disk */ | ||
226 | __u32 blocknr; | ||
227 | |||
195 | unsigned long state; | 228 | unsigned long state; |
196 | struct reiserfs_journal_list *jlist; /* journal list this cnode lives in */ | 229 | |
230 | /* journal list this cnode lives in */ | ||
231 | struct reiserfs_journal_list *jlist; | ||
232 | |||
197 | struct reiserfs_journal_cnode *next; /* next in transaction list */ | 233 | struct reiserfs_journal_cnode *next; /* next in transaction list */ |
198 | struct reiserfs_journal_cnode *prev; /* prev in transaction list */ | 234 | struct reiserfs_journal_cnode *prev; /* prev in transaction list */ |
199 | struct reiserfs_journal_cnode *hprev; /* prev in hash list */ | 235 | struct reiserfs_journal_cnode *hprev; /* prev in hash list */ |
@@ -212,18 +248,22 @@ struct reiserfs_list_bitmap { | |||
212 | }; | 248 | }; |
213 | 249 | ||
214 | /* | 250 | /* |
215 | ** one of these for each transaction. The most important part here is the j_realblock. | 251 | * one of these for each transaction. The most important part here is the |
216 | ** this list of cnodes is used to hash all the blocks in all the commits, to mark all the | 252 | * j_realblock. this list of cnodes is used to hash all the blocks in all |
217 | ** real buffer heads dirty once all the commits hit the disk, | 253 | * the commits, to mark all the real buffer heads dirty once all the commits |
218 | ** and to make sure every real block in a transaction is on disk before allowing the log area | 254 | * hit the disk, and to make sure every real block in a transaction is on |
219 | ** to be overwritten */ | 255 | * disk before allowing the log area to be overwritten |
256 | */ | ||
220 | struct reiserfs_journal_list { | 257 | struct reiserfs_journal_list { |
221 | unsigned long j_start; | 258 | unsigned long j_start; |
222 | unsigned long j_state; | 259 | unsigned long j_state; |
223 | unsigned long j_len; | 260 | unsigned long j_len; |
224 | atomic_t j_nonzerolen; | 261 | atomic_t j_nonzerolen; |
225 | atomic_t j_commit_left; | 262 | atomic_t j_commit_left; |
226 | atomic_t j_older_commits_done; /* all commits older than this on disk */ | 263 | |
264 | /* all commits older than this on disk */ | ||
265 | atomic_t j_older_commits_done; | ||
266 | |||
227 | struct mutex j_commit_mutex; | 267 | struct mutex j_commit_mutex; |
228 | unsigned int j_trans_id; | 268 | unsigned int j_trans_id; |
229 | time_t j_timestamp; | 269 | time_t j_timestamp; |
@@ -234,11 +274,15 @@ struct reiserfs_journal_list { | |||
234 | /* time ordered list of all active transactions */ | 274 | /* time ordered list of all active transactions */ |
235 | struct list_head j_list; | 275 | struct list_head j_list; |
236 | 276 | ||
237 | /* time ordered list of all transactions we haven't tried to flush yet */ | 277 | /* |
278 | * time ordered list of all transactions we haven't tried | ||
279 | * to flush yet | ||
280 | */ | ||
238 | struct list_head j_working_list; | 281 | struct list_head j_working_list; |
239 | 282 | ||
240 | /* list of tail conversion targets in need of flush before commit */ | 283 | /* list of tail conversion targets in need of flush before commit */ |
241 | struct list_head j_tail_bh_list; | 284 | struct list_head j_tail_bh_list; |
285 | |||
242 | /* list of data=ordered buffers in need of flush before commit */ | 286 | /* list of data=ordered buffers in need of flush before commit */ |
243 | struct list_head j_bh_list; | 287 | struct list_head j_bh_list; |
244 | int j_refcount; | 288 | int j_refcount; |
@@ -246,46 +290,83 @@ struct reiserfs_journal_list { | |||
246 | 290 | ||
247 | struct reiserfs_journal { | 291 | struct reiserfs_journal { |
248 | struct buffer_head **j_ap_blocks; /* journal blocks on disk */ | 292 | struct buffer_head **j_ap_blocks; /* journal blocks on disk */ |
249 | struct reiserfs_journal_cnode *j_last; /* newest journal block */ | 293 | /* newest journal block */ |
250 | struct reiserfs_journal_cnode *j_first; /* oldest journal block. start here for traverse */ | 294 | struct reiserfs_journal_cnode *j_last; |
295 | |||
296 | /* oldest journal block. start here for traverse */ | ||
297 | struct reiserfs_journal_cnode *j_first; | ||
251 | 298 | ||
252 | struct block_device *j_dev_bd; | 299 | struct block_device *j_dev_bd; |
253 | fmode_t j_dev_mode; | 300 | fmode_t j_dev_mode; |
254 | int j_1st_reserved_block; /* first block on s_dev of reserved area journal */ | 301 | |
302 | /* first block on s_dev of reserved area journal */ | ||
303 | int j_1st_reserved_block; | ||
255 | 304 | ||
256 | unsigned long j_state; | 305 | unsigned long j_state; |
257 | unsigned int j_trans_id; | 306 | unsigned int j_trans_id; |
258 | unsigned long j_mount_id; | 307 | unsigned long j_mount_id; |
259 | unsigned long j_start; /* start of current waiting commit (index into j_ap_blocks) */ | 308 | |
309 | /* start of current waiting commit (index into j_ap_blocks) */ | ||
310 | unsigned long j_start; | ||
260 | unsigned long j_len; /* length of current waiting commit */ | 311 | unsigned long j_len; /* length of current waiting commit */ |
261 | unsigned long j_len_alloc; /* number of buffers requested by journal_begin() */ | 312 | |
313 | /* number of buffers requested by journal_begin() */ | ||
314 | unsigned long j_len_alloc; | ||
315 | |||
262 | atomic_t j_wcount; /* count of writers for current commit */ | 316 | atomic_t j_wcount; /* count of writers for current commit */ |
263 | unsigned long j_bcount; /* batch count. allows turning X transactions into 1 */ | 317 | |
264 | unsigned long j_first_unflushed_offset; /* first unflushed transactions offset */ | 318 | /* batch count. allows turning X transactions into 1 */ |
265 | unsigned j_last_flush_trans_id; /* last fully flushed journal timestamp */ | 319 | unsigned long j_bcount; |
320 | |||
321 | /* first unflushed transactions offset */ | ||
322 | unsigned long j_first_unflushed_offset; | ||
323 | |||
324 | /* last fully flushed journal timestamp */ | ||
325 | unsigned j_last_flush_trans_id; | ||
326 | |||
266 | struct buffer_head *j_header_bh; | 327 | struct buffer_head *j_header_bh; |
267 | 328 | ||
268 | time_t j_trans_start_time; /* time this transaction started */ | 329 | time_t j_trans_start_time; /* time this transaction started */ |
269 | struct mutex j_mutex; | 330 | struct mutex j_mutex; |
270 | struct mutex j_flush_mutex; | 331 | struct mutex j_flush_mutex; |
271 | wait_queue_head_t j_join_wait; /* wait for current transaction to finish before starting new one */ | 332 | |
272 | atomic_t j_jlock; /* lock for j_join_wait */ | 333 | /* wait for current transaction to finish before starting new one */ |
334 | wait_queue_head_t j_join_wait; | ||
335 | |||
336 | atomic_t j_jlock; /* lock for j_join_wait */ | ||
273 | int j_list_bitmap_index; /* number of next list bitmap to use */ | 337 | int j_list_bitmap_index; /* number of next list bitmap to use */ |
274 | int j_must_wait; /* no more journal begins allowed. MUST sleep on j_join_wait */ | 338 | |
275 | int j_next_full_flush; /* next journal_end will flush all journal list */ | 339 | /* no more journal begins allowed. MUST sleep on j_join_wait */ |
276 | int j_next_async_flush; /* next journal_end will flush all async commits */ | 340 | int j_must_wait; |
341 | |||
342 | /* next journal_end will flush all journal list */ | ||
343 | int j_next_full_flush; | ||
344 | |||
345 | /* next journal_end will flush all async commits */ | ||
346 | int j_next_async_flush; | ||
277 | 347 | ||
278 | int j_cnode_used; /* number of cnodes on the used list */ | 348 | int j_cnode_used; /* number of cnodes on the used list */ |
279 | int j_cnode_free; /* number of cnodes on the free list */ | 349 | int j_cnode_free; /* number of cnodes on the free list */ |
280 | 350 | ||
281 | unsigned int j_trans_max; /* max number of blocks in a transaction. */ | 351 | /* max number of blocks in a transaction. */ |
282 | unsigned int j_max_batch; /* max number of blocks to batch into a trans */ | 352 | unsigned int j_trans_max; |
283 | unsigned int j_max_commit_age; /* in seconds, how old can an async commit be */ | 353 | |
284 | unsigned int j_max_trans_age; /* in seconds, how old can a transaction be */ | 354 | /* max number of blocks to batch into a trans */ |
285 | unsigned int j_default_max_commit_age; /* the default for the max commit age */ | 355 | unsigned int j_max_batch; |
356 | |||
357 | /* in seconds, how old can an async commit be */ | ||
358 | unsigned int j_max_commit_age; | ||
359 | |||
360 | /* in seconds, how old can a transaction be */ | ||
361 | unsigned int j_max_trans_age; | ||
362 | |||
363 | /* the default for the max commit age */ | ||
364 | unsigned int j_default_max_commit_age; | ||
286 | 365 | ||
287 | struct reiserfs_journal_cnode *j_cnode_free_list; | 366 | struct reiserfs_journal_cnode *j_cnode_free_list; |
288 | struct reiserfs_journal_cnode *j_cnode_free_orig; /* orig pointer returned from vmalloc */ | 367 | |
368 | /* orig pointer returned from vmalloc */ | ||
369 | struct reiserfs_journal_cnode *j_cnode_free_orig; | ||
289 | 370 | ||
290 | struct reiserfs_journal_list *j_current_jl; | 371 | struct reiserfs_journal_list *j_current_jl; |
291 | int j_free_bitmap_nodes; | 372 | int j_free_bitmap_nodes; |
@@ -306,14 +387,21 @@ struct reiserfs_journal { | |||
306 | 387 | ||
307 | /* list of all active transactions */ | 388 | /* list of all active transactions */ |
308 | struct list_head j_journal_list; | 389 | struct list_head j_journal_list; |
390 | |||
309 | /* lists that haven't been touched by writeback attempts */ | 391 | /* lists that haven't been touched by writeback attempts */ |
310 | struct list_head j_working_list; | 392 | struct list_head j_working_list; |
311 | 393 | ||
312 | struct reiserfs_list_bitmap j_list_bitmap[JOURNAL_NUM_BITMAPS]; /* array of bitmaps to record the deleted blocks */ | 394 | /* hash table for real buffer heads in current trans */ |
313 | struct reiserfs_journal_cnode *j_hash_table[JOURNAL_HASH_SIZE]; /* hash table for real buffer heads in current trans */ | 395 | struct reiserfs_journal_cnode *j_hash_table[JOURNAL_HASH_SIZE]; |
314 | struct reiserfs_journal_cnode *j_list_hash_table[JOURNAL_HASH_SIZE]; /* hash table for all the real buffer heads in all | 396 | |
315 | the transactions */ | 397 | /* hash table for all the real buffer heads in all the transactions */ |
316 | struct list_head j_prealloc_list; /* list of inodes which have preallocated blocks */ | 398 | struct reiserfs_journal_cnode *j_list_hash_table[JOURNAL_HASH_SIZE]; |
399 | |||
400 | /* array of bitmaps to record the deleted blocks */ | ||
401 | struct reiserfs_list_bitmap j_list_bitmap[JOURNAL_NUM_BITMAPS]; | ||
402 | |||
403 | /* list of inodes which have preallocated blocks */ | ||
404 | struct list_head j_prealloc_list; | ||
317 | int j_persistent_trans; | 405 | int j_persistent_trans; |
318 | unsigned long j_max_trans_size; | 406 | unsigned long j_max_trans_size; |
319 | unsigned long j_max_batch_size; | 407 | unsigned long j_max_batch_size; |
@@ -328,11 +416,12 @@ struct reiserfs_journal { | |||
328 | 416 | ||
329 | enum journal_state_bits { | 417 | enum journal_state_bits { |
330 | J_WRITERS_BLOCKED = 1, /* set when new writers not allowed */ | 418 | J_WRITERS_BLOCKED = 1, /* set when new writers not allowed */ |
331 | J_WRITERS_QUEUED, /* set when log is full due to too many writers */ | 419 | J_WRITERS_QUEUED, /* set when log is full due to too many writers */ |
332 | J_ABORTED, /* set when log is aborted */ | 420 | J_ABORTED, /* set when log is aborted */ |
333 | }; | 421 | }; |
334 | 422 | ||
335 | #define JOURNAL_DESC_MAGIC "ReIsErLB" /* ick. magic string to find desc blocks in the journal */ | 423 | /* ick. magic string to find desc blocks in the journal */ |
424 | #define JOURNAL_DESC_MAGIC "ReIsErLB" | ||
336 | 425 | ||
337 | typedef __u32(*hashf_t) (const signed char *, int); | 426 | typedef __u32(*hashf_t) (const signed char *, int); |
338 | 427 | ||
@@ -364,7 +453,10 @@ typedef struct reiserfs_proc_info_data { | |||
364 | stat_cnt_t leaked_oid; | 453 | stat_cnt_t leaked_oid; |
365 | stat_cnt_t leaves_removable; | 454 | stat_cnt_t leaves_removable; |
366 | 455 | ||
367 | /* balances per level. Use explicit 5 as MAX_HEIGHT is not visible yet. */ | 456 | /* |
457 | * balances per level. | ||
458 | * Use explicit 5 as MAX_HEIGHT is not visible yet. | ||
459 | */ | ||
368 | stat_cnt_t balance_at[5]; /* XXX */ | 460 | stat_cnt_t balance_at[5]; /* XXX */ |
369 | /* sbk == search_by_key */ | 461 | /* sbk == search_by_key */ |
370 | stat_cnt_t sbk_read_at[5]; /* XXX */ | 462 | stat_cnt_t sbk_read_at[5]; /* XXX */ |
@@ -416,18 +508,24 @@ typedef struct reiserfs_proc_info_data { | |||
416 | 508 | ||
417 | /* reiserfs union of in-core super block data */ | 509 | /* reiserfs union of in-core super block data */ |
418 | struct reiserfs_sb_info { | 510 | struct reiserfs_sb_info { |
419 | struct buffer_head *s_sbh; /* Buffer containing the super block */ | 511 | /* Buffer containing the super block */ |
420 | /* both the comment and the choice of | 512 | struct buffer_head *s_sbh; |
421 | name are unclear for s_rs -Hans */ | 513 | |
422 | struct reiserfs_super_block *s_rs; /* Pointer to the super block in the buffer */ | 514 | /* Pointer to the on-disk super block in the buffer */ |
515 | struct reiserfs_super_block *s_rs; | ||
423 | struct reiserfs_bitmap_info *s_ap_bitmap; | 516 | struct reiserfs_bitmap_info *s_ap_bitmap; |
424 | struct reiserfs_journal *s_journal; /* pointer to journal information */ | 517 | |
518 | /* pointer to journal information */ | ||
519 | struct reiserfs_journal *s_journal; | ||
520 | |||
425 | unsigned short s_mount_state; /* reiserfs state (valid, invalid) */ | 521 | unsigned short s_mount_state; /* reiserfs state (valid, invalid) */ |
426 | 522 | ||
427 | /* Serialize writers access, replace the old bkl */ | 523 | /* Serialize writers access, replace the old bkl */ |
428 | struct mutex lock; | 524 | struct mutex lock; |
525 | |||
429 | /* Owner of the lock (can be recursive) */ | 526 | /* Owner of the lock (can be recursive) */ |
430 | struct task_struct *lock_owner; | 527 | struct task_struct *lock_owner; |
528 | |||
431 | /* Depth of the lock, start from -1 like the bkl */ | 529 | /* Depth of the lock, start from -1 like the bkl */ |
432 | int lock_depth; | 530 | int lock_depth; |
433 | 531 | ||
@@ -435,30 +533,50 @@ struct reiserfs_sb_info { | |||
435 | 533 | ||
436 | /* Comment? -Hans */ | 534 | /* Comment? -Hans */ |
437 | void (*end_io_handler) (struct buffer_head *, int); | 535 | void (*end_io_handler) (struct buffer_head *, int); |
438 | hashf_t s_hash_function; /* pointer to function which is used | 536 | |
439 | to sort names in directory. Set on | 537 | /* |
440 | mount */ | 538 | * pointer to function which is used to sort names in directory. |
441 | unsigned long s_mount_opt; /* reiserfs's mount options are set | 539 | * Set on mount |
442 | here (currently - NOTAIL, NOLOG, | 540 | */ |
443 | REPLAYONLY) */ | 541 | hashf_t s_hash_function; |
444 | 542 | ||
445 | struct { /* This is a structure that describes block allocator options */ | 543 | /* reiserfs's mount options are set here */ |
446 | unsigned long bits; /* Bitfield for enable/disable kind of options */ | 544 | unsigned long s_mount_opt; |
447 | unsigned long large_file_size; /* size started from which we consider file to be a large one(in blocks) */ | 545 | |
546 | /* This is a structure that describes block allocator options */ | ||
547 | struct { | ||
548 | /* Bitfield for enable/disable kind of options */ | ||
549 | unsigned long bits; | ||
550 | |||
551 | /* | ||
552 | * size started from which we consider file | ||
553 | * to be a large one (in blocks) | ||
554 | */ | ||
555 | unsigned long large_file_size; | ||
556 | |||
448 | int border; /* percentage of disk, border takes */ | 557 | int border; /* percentage of disk, border takes */ |
449 | int preallocmin; /* Minimal file size (in blocks) starting from which we do preallocations */ | 558 | |
450 | int preallocsize; /* Number of blocks we try to prealloc when file | 559 | /* |
451 | reaches preallocmin size (in blocks) or | 560 | * Minimal file size (in blocks) starting |
452 | prealloc_list is empty. */ | 561 | * from which we do preallocations |
562 | */ | ||
563 | int preallocmin; | ||
564 | |||
565 | /* | ||
566 | * Number of blocks we try to prealloc when file | ||
567 | * reaches preallocmin size (in blocks) or prealloc_list | ||
568 | is empty. | ||
569 | */ | ||
570 | int preallocsize; | ||
453 | } s_alloc_options; | 571 | } s_alloc_options; |
454 | 572 | ||
455 | /* Comment? -Hans */ | 573 | /* Comment? -Hans */ |
456 | wait_queue_head_t s_wait; | 574 | wait_queue_head_t s_wait; |
457 | /* To be obsoleted soon by per buffer seals.. -Hans */ | 575 | /* increased by one every time the tree gets re-balanced */ |
458 | atomic_t s_generation_counter; // increased by one every time the | 576 | atomic_t s_generation_counter; |
459 | // tree gets re-balanced | 577 | |
460 | unsigned long s_properties; /* File system properties. Currently holds | 578 | /* File system properties. Currently holds on-disk FS format */ |
461 | on-disk FS format */ | 579 | unsigned long s_properties; |
462 | 580 | ||
463 | /* session statistics */ | 581 | /* session statistics */ |
464 | int s_disk_reads; | 582 | int s_disk_reads; |
@@ -471,14 +589,23 @@ struct reiserfs_sb_info { | |||
471 | int s_bmaps_without_search; | 589 | int s_bmaps_without_search; |
472 | int s_direct2indirect; | 590 | int s_direct2indirect; |
473 | int s_indirect2direct; | 591 | int s_indirect2direct; |
474 | /* set up when it's ok for reiserfs_read_inode2() to read from | 592 | |
475 | disk inode with nlink==0. Currently this is only used during | 593 | /* |
476 | finish_unfinished() processing at mount time */ | 594 | * set up when it's ok for reiserfs_read_inode2() to read from |
595 | * disk inode with nlink==0. Currently this is only used during | ||
596 | * finish_unfinished() processing at mount time | ||
597 | */ | ||
477 | int s_is_unlinked_ok; | 598 | int s_is_unlinked_ok; |
599 | |||
478 | reiserfs_proc_info_data_t s_proc_info_data; | 600 | reiserfs_proc_info_data_t s_proc_info_data; |
479 | struct proc_dir_entry *procdir; | 601 | struct proc_dir_entry *procdir; |
480 | int reserved_blocks; /* amount of blocks reserved for further allocations */ | 602 | |
481 | spinlock_t bitmap_lock; /* this lock on now only used to protect reserved_blocks variable */ | 603 | /* amount of blocks reserved for further allocations */ |
604 | int reserved_blocks; | ||
605 | |||
606 | |||
607 | /* this lock on now only used to protect reserved_blocks variable */ | ||
608 | spinlock_t bitmap_lock; | ||
482 | struct dentry *priv_root; /* root of /.reiserfs_priv */ | 609 | struct dentry *priv_root; /* root of /.reiserfs_priv */ |
483 | struct dentry *xattr_root; /* root of /.reiserfs_priv/xattrs */ | 610 | struct dentry *xattr_root; /* root of /.reiserfs_priv/xattrs */ |
484 | int j_errno; | 611 | int j_errno; |
@@ -494,14 +621,13 @@ struct reiserfs_sb_info { | |||
494 | char *s_jdev; /* Stored jdev for mount option showing */ | 621 | char *s_jdev; /* Stored jdev for mount option showing */ |
495 | #ifdef CONFIG_REISERFS_CHECK | 622 | #ifdef CONFIG_REISERFS_CHECK |
496 | 623 | ||
497 | struct tree_balance *cur_tb; /* | 624 | /* |
498 | * Detects whether more than one | 625 | * Detects whether more than one copy of tb exists per superblock |
499 | * copy of tb exists per superblock | 626 | * as a means of checking whether do_balance is executing |
500 | * as a means of checking whether | 627 | * concurrently against another tree reader/writer on a same |
501 | * do_balance is executing concurrently | 628 | * mount point. |
502 | * against another tree reader/writer | 629 | */ |
503 | * on a same mount point. | 630 | struct tree_balance *cur_tb; |
504 | */ | ||
505 | #endif | 631 | #endif |
506 | }; | 632 | }; |
507 | 633 | ||
@@ -510,25 +636,36 @@ struct reiserfs_sb_info { | |||
510 | #define REISERFS_3_6 1 | 636 | #define REISERFS_3_6 1 |
511 | #define REISERFS_OLD_FORMAT 2 | 637 | #define REISERFS_OLD_FORMAT 2 |
512 | 638 | ||
513 | enum reiserfs_mount_options { | ||
514 | /* Mount options */ | 639 | /* Mount options */ |
515 | REISERFS_LARGETAIL, /* large tails will be created in a session */ | 640 | enum reiserfs_mount_options { |
516 | REISERFS_SMALLTAIL, /* small (for files less than block size) tails will be created in a session */ | 641 | /* large tails will be created in a session */ |
517 | REPLAYONLY, /* replay journal and return 0. Use by fsck */ | 642 | REISERFS_LARGETAIL, |
518 | REISERFS_CONVERT, /* -o conv: causes conversion of old | 643 | /* |
519 | format super block to the new | 644 | * small (for files less than block size) tails will |
520 | format. If not specified - old | 645 | * be created in a session |
521 | partition will be dealt with in a | 646 | */ |
522 | manner of 3.5.x */ | 647 | REISERFS_SMALLTAIL, |
523 | 648 | ||
524 | /* -o hash={tea, rupasov, r5, detect} is meant for properly mounting | 649 | /* replay journal and return 0. Use by fsck */ |
525 | ** reiserfs disks from 3.5.19 or earlier. 99% of the time, this option | 650 | REPLAYONLY, |
526 | ** is not required. If the normal autodection code can't determine which | 651 | |
527 | ** hash to use (because both hashes had the same value for a file) | 652 | /* |
528 | ** use this option to force a specific hash. It won't allow you to override | 653 | * -o conv: causes conversion of old format super block to the |
529 | ** the existing hash on the FS, so if you have a tea hash disk, and mount | 654 | * new format. If not specified - old partition will be dealt |
530 | ** with -o hash=rupasov, the mount will fail. | 655 | * with in a manner of 3.5.x |
531 | */ | 656 | */ |
657 | REISERFS_CONVERT, | ||
658 | |||
659 | /* | ||
660 | * -o hash={tea, rupasov, r5, detect} is meant for properly mounting | ||
661 | * reiserfs disks from 3.5.19 or earlier. 99% of the time, this | ||
662 | * option is not required. If the normal autodection code can't | ||
663 | * determine which hash to use (because both hashes had the same | ||
664 | * value for a file) use this option to force a specific hash. | ||
665 | * It won't allow you to override the existing hash on the FS, so | ||
666 | * if you have a tea hash disk, and mount with -o hash=rupasov, | ||
667 | * the mount will fail. | ||
668 | */ | ||
532 | FORCE_TEA_HASH, /* try to force tea hash on mount */ | 669 | FORCE_TEA_HASH, /* try to force tea hash on mount */ |
533 | FORCE_RUPASOV_HASH, /* try to force rupasov hash on mount */ | 670 | FORCE_RUPASOV_HASH, /* try to force rupasov hash on mount */ |
534 | FORCE_R5_HASH, /* try to force rupasov hash on mount */ | 671 | FORCE_R5_HASH, /* try to force rupasov hash on mount */ |
@@ -538,9 +675,11 @@ enum reiserfs_mount_options { | |||
538 | REISERFS_DATA_ORDERED, | 675 | REISERFS_DATA_ORDERED, |
539 | REISERFS_DATA_WRITEBACK, | 676 | REISERFS_DATA_WRITEBACK, |
540 | 677 | ||
541 | /* used for testing experimental features, makes benchmarking new | 678 | /* |
542 | features with and without more convenient, should never be used by | 679 | * used for testing experimental features, makes benchmarking new |
543 | users in any code shipped to users (ideally) */ | 680 | * features with and without more convenient, should never be used by |
681 | * users in any code shipped to users (ideally) | ||
682 | */ | ||
544 | 683 | ||
545 | REISERFS_NO_BORDER, | 684 | REISERFS_NO_BORDER, |
546 | REISERFS_NO_UNHASHED_RELOCATION, | 685 | REISERFS_NO_UNHASHED_RELOCATION, |
@@ -707,28 +846,28 @@ static inline void reiserfs_cond_resched(struct super_block *s) | |||
707 | 846 | ||
708 | struct fid; | 847 | struct fid; |
709 | 848 | ||
710 | /* in reading the #defines, it may help to understand that they employ | 849 | /* |
711 | the following abbreviations: | 850 | * in reading the #defines, it may help to understand that they employ |
712 | 851 | * the following abbreviations: | |
713 | B = Buffer | 852 | * |
714 | I = Item header | 853 | * B = Buffer |
715 | H = Height within the tree (should be changed to LEV) | 854 | * I = Item header |
716 | N = Number of the item in the node | 855 | * H = Height within the tree (should be changed to LEV) |
717 | STAT = stat data | 856 | * N = Number of the item in the node |
718 | DEH = Directory Entry Header | 857 | * STAT = stat data |
719 | EC = Entry Count | 858 | * DEH = Directory Entry Header |
720 | E = Entry number | 859 | * EC = Entry Count |
721 | UL = Unsigned Long | 860 | * E = Entry number |
722 | BLKH = BLocK Header | 861 | * UL = Unsigned Long |
723 | UNFM = UNForMatted node | 862 | * BLKH = BLocK Header |
724 | DC = Disk Child | 863 | * UNFM = UNForMatted node |
725 | P = Path | 864 | * DC = Disk Child |
726 | 865 | * P = Path | |
727 | These #defines are named by concatenating these abbreviations, | 866 | * |
728 | where first comes the arguments, and last comes the return value, | 867 | * These #defines are named by concatenating these abbreviations, |
729 | of the macro. | 868 | * where first comes the arguments, and last comes the return value, |
730 | 869 | * of the macro. | |
731 | */ | 870 | */ |
732 | 871 | ||
733 | #define USE_INODE_GENERATION_COUNTER | 872 | #define USE_INODE_GENERATION_COUNTER |
734 | 873 | ||
@@ -739,14 +878,17 @@ struct fid; | |||
739 | /* n must be power of 2 */ | 878 | /* n must be power of 2 */ |
740 | #define _ROUND_UP(x,n) (((x)+(n)-1u) & ~((n)-1u)) | 879 | #define _ROUND_UP(x,n) (((x)+(n)-1u) & ~((n)-1u)) |
741 | 880 | ||
742 | // to be ok for alpha and others we have to align structures to 8 byte | 881 | /* |
743 | // boundary. | 882 | * to be ok for alpha and others we have to align structures to 8 byte |
744 | // FIXME: do not change 4 by anything else: there is code which relies on that | 883 | * boundary. |
884 | * FIXME: do not change 4 by anything else: there is code which relies on that | ||
885 | */ | ||
745 | #define ROUND_UP(x) _ROUND_UP(x,8LL) | 886 | #define ROUND_UP(x) _ROUND_UP(x,8LL) |
746 | 887 | ||
747 | /* debug levels. Right now, CONFIG_REISERFS_CHECK means print all debug | 888 | /* |
748 | ** messages. | 889 | * debug levels. Right now, CONFIG_REISERFS_CHECK means print all debug |
749 | */ | 890 | * messages. |
891 | */ | ||
750 | #define REISERFS_DEBUG_CODE 5 /* extra messages to help find/debug errors */ | 892 | #define REISERFS_DEBUG_CODE 5 /* extra messages to help find/debug errors */ |
751 | 893 | ||
752 | void __reiserfs_warning(struct super_block *s, const char *id, | 894 | void __reiserfs_warning(struct super_block *s, const char *id, |
@@ -755,7 +897,7 @@ void __reiserfs_warning(struct super_block *s, const char *id, | |||
755 | __reiserfs_warning(s, id, __func__, fmt, ##args) | 897 | __reiserfs_warning(s, id, __func__, fmt, ##args) |
756 | /* assertions handling */ | 898 | /* assertions handling */ |
757 | 899 | ||
758 | /** always check a condition and panic if it's false. */ | 900 | /* always check a condition and panic if it's false. */ |
759 | #define __RASSERT(cond, scond, format, args...) \ | 901 | #define __RASSERT(cond, scond, format, args...) \ |
760 | do { \ | 902 | do { \ |
761 | if (!(cond)) \ | 903 | if (!(cond)) \ |
@@ -778,35 +920,48 @@ do { \ | |||
778 | * Disk Data Structures | 920 | * Disk Data Structures |
779 | */ | 921 | */ |
780 | 922 | ||
781 | /***************************************************************************/ | 923 | /*************************************************************************** |
782 | /* SUPER BLOCK */ | 924 | * SUPER BLOCK * |
783 | /***************************************************************************/ | 925 | ***************************************************************************/ |
784 | 926 | ||
785 | /* | 927 | /* |
786 | * Structure of super block on disk, a version of which in RAM is often accessed as REISERFS_SB(s)->s_rs | 928 | * Structure of super block on disk, a version of which in RAM is often |
787 | * the version in RAM is part of a larger structure containing fields never written to disk. | 929 | * accessed as REISERFS_SB(s)->s_rs. The version in RAM is part of a larger |
930 | * structure containing fields never written to disk. | ||
788 | */ | 931 | */ |
789 | #define UNSET_HASH 0 // read_super will guess about, what hash names | 932 | #define UNSET_HASH 0 /* Detect hash on disk */ |
790 | // in directories were sorted with | ||
791 | #define TEA_HASH 1 | 933 | #define TEA_HASH 1 |
792 | #define YURA_HASH 2 | 934 | #define YURA_HASH 2 |
793 | #define R5_HASH 3 | 935 | #define R5_HASH 3 |
794 | #define DEFAULT_HASH R5_HASH | 936 | #define DEFAULT_HASH R5_HASH |
795 | 937 | ||
796 | struct journal_params { | 938 | struct journal_params { |
797 | __le32 jp_journal_1st_block; /* where does journal start from on its | 939 | /* where does journal start from on its * device */ |
798 | * device */ | 940 | __le32 jp_journal_1st_block; |
799 | __le32 jp_journal_dev; /* journal device st_rdev */ | 941 | |
800 | __le32 jp_journal_size; /* size of the journal */ | 942 | /* journal device st_rdev */ |
801 | __le32 jp_journal_trans_max; /* max number of blocks in a transaction. */ | 943 | __le32 jp_journal_dev; |
802 | __le32 jp_journal_magic; /* random value made on fs creation (this | 944 | |
803 | * was sb_journal_block_count) */ | 945 | /* size of the journal */ |
804 | __le32 jp_journal_max_batch; /* max number of blocks to batch into a | 946 | __le32 jp_journal_size; |
805 | * trans */ | 947 | |
806 | __le32 jp_journal_max_commit_age; /* in seconds, how old can an async | 948 | /* max number of blocks in a transaction. */ |
807 | * commit be */ | 949 | __le32 jp_journal_trans_max; |
808 | __le32 jp_journal_max_trans_age; /* in seconds, how old can a transaction | 950 | |
809 | * be */ | 951 | /* |
952 | * random value made on fs creation | ||
953 | * (this was sb_journal_block_count) | ||
954 | */ | ||
955 | __le32 jp_journal_magic; | ||
956 | |||
957 | /* max number of blocks to batch into a trans */ | ||
958 | __le32 jp_journal_max_batch; | ||
959 | |||
960 | /* in seconds, how old can an async commit be */ | ||
961 | __le32 jp_journal_max_commit_age; | ||
962 | |||
963 | /* in seconds, how old can a transaction be */ | ||
964 | __le32 jp_journal_max_trans_age; | ||
810 | }; | 965 | }; |
811 | 966 | ||
812 | /* this is the super from 3.5.X, where X >= 10 */ | 967 | /* this is the super from 3.5.X, where X >= 10 */ |
@@ -816,26 +971,48 @@ struct reiserfs_super_block_v1 { | |||
816 | __le32 s_root_block; /* root block number */ | 971 | __le32 s_root_block; /* root block number */ |
817 | struct journal_params s_journal; | 972 | struct journal_params s_journal; |
818 | __le16 s_blocksize; /* block size */ | 973 | __le16 s_blocksize; /* block size */ |
819 | __le16 s_oid_maxsize; /* max size of object id array, see | 974 | |
820 | * get_objectid() commentary */ | 975 | /* max size of object id array, see get_objectid() commentary */ |
976 | __le16 s_oid_maxsize; | ||
821 | __le16 s_oid_cursize; /* current size of object id array */ | 977 | __le16 s_oid_cursize; /* current size of object id array */ |
822 | __le16 s_umount_state; /* this is set to 1 when filesystem was | 978 | |
823 | * umounted, to 2 - when not */ | 979 | /* this is set to 1 when filesystem was umounted, to 2 - when not */ |
824 | char s_magic[10]; /* reiserfs magic string indicates that | 980 | __le16 s_umount_state; |
825 | * file system is reiserfs: | 981 | |
826 | * "ReIsErFs" or "ReIsEr2Fs" or "ReIsEr3Fs" */ | 982 | /* |
827 | __le16 s_fs_state; /* it is set to used by fsck to mark which | 983 | * reiserfs magic string indicates that file system is reiserfs: |
828 | * phase of rebuilding is done */ | 984 | * "ReIsErFs" or "ReIsEr2Fs" or "ReIsEr3Fs" |
829 | __le32 s_hash_function_code; /* indicate, what hash function is being use | 985 | */ |
830 | * to sort names in a directory*/ | 986 | char s_magic[10]; |
987 | |||
988 | /* | ||
989 | * it is set to used by fsck to mark which | ||
990 | * phase of rebuilding is done | ||
991 | */ | ||
992 | __le16 s_fs_state; | ||
993 | /* | ||
994 | * indicate, what hash function is being use | ||
995 | * to sort names in a directory | ||
996 | */ | ||
997 | __le32 s_hash_function_code; | ||
831 | __le16 s_tree_height; /* height of disk tree */ | 998 | __le16 s_tree_height; /* height of disk tree */ |
832 | __le16 s_bmap_nr; /* amount of bitmap blocks needed to address | 999 | |
833 | * each block of file system */ | 1000 | /* |
834 | __le16 s_version; /* this field is only reliable on filesystem | 1001 | * amount of bitmap blocks needed to address |
835 | * with non-standard journal */ | 1002 | * each block of file system |
836 | __le16 s_reserved_for_journal; /* size in blocks of journal area on main | 1003 | */ |
837 | * device, we need to keep after | 1004 | __le16 s_bmap_nr; |
838 | * making fs with non-standard journal */ | 1005 | |
1006 | /* | ||
1007 | * this field is only reliable on filesystem with non-standard journal | ||
1008 | */ | ||
1009 | __le16 s_version; | ||
1010 | |||
1011 | /* | ||
1012 | * size in blocks of journal area on main device, we need to | ||
1013 | * keep after making fs with non-standard journal | ||
1014 | */ | ||
1015 | __le16 s_reserved_for_journal; | ||
839 | } __attribute__ ((__packed__)); | 1016 | } __attribute__ ((__packed__)); |
840 | 1017 | ||
841 | #define SB_SIZE_V1 (sizeof(struct reiserfs_super_block_v1)) | 1018 | #define SB_SIZE_V1 (sizeof(struct reiserfs_super_block_v1)) |
@@ -844,17 +1021,21 @@ struct reiserfs_super_block_v1 { | |||
844 | struct reiserfs_super_block { | 1021 | struct reiserfs_super_block { |
845 | struct reiserfs_super_block_v1 s_v1; | 1022 | struct reiserfs_super_block_v1 s_v1; |
846 | __le32 s_inode_generation; | 1023 | __le32 s_inode_generation; |
847 | __le32 s_flags; /* Right now used only by inode-attributes, if enabled */ | 1024 | |
1025 | /* Right now used only by inode-attributes, if enabled */ | ||
1026 | __le32 s_flags; | ||
1027 | |||
848 | unsigned char s_uuid[16]; /* filesystem unique identifier */ | 1028 | unsigned char s_uuid[16]; /* filesystem unique identifier */ |
849 | unsigned char s_label[16]; /* filesystem volume label */ | 1029 | unsigned char s_label[16]; /* filesystem volume label */ |
850 | __le16 s_mnt_count; /* Count of mounts since last fsck */ | 1030 | __le16 s_mnt_count; /* Count of mounts since last fsck */ |
851 | __le16 s_max_mnt_count; /* Maximum mounts before check */ | 1031 | __le16 s_max_mnt_count; /* Maximum mounts before check */ |
852 | __le32 s_lastcheck; /* Timestamp of last fsck */ | 1032 | __le32 s_lastcheck; /* Timestamp of last fsck */ |
853 | __le32 s_check_interval; /* Interval between checks */ | 1033 | __le32 s_check_interval; /* Interval between checks */ |
854 | char s_unused[76]; /* zero filled by mkreiserfs and | 1034 | |
855 | * reiserfs_convert_objectid_map_v1() | 1035 | /* |
856 | * so any additions must be updated | 1036 | * zero filled by mkreiserfs and reiserfs_convert_objectid_map_v1() |
857 | * there as well. */ | 1037 | * so any additions must be updated there as well. */ |
1038 | char s_unused[76]; | ||
858 | } __attribute__ ((__packed__)); | 1039 | } __attribute__ ((__packed__)); |
859 | 1040 | ||
860 | #define SB_SIZE (sizeof(struct reiserfs_super_block)) | 1041 | #define SB_SIZE (sizeof(struct reiserfs_super_block)) |
@@ -862,7 +1043,7 @@ struct reiserfs_super_block { | |||
862 | #define REISERFS_VERSION_1 0 | 1043 | #define REISERFS_VERSION_1 0 |
863 | #define REISERFS_VERSION_2 2 | 1044 | #define REISERFS_VERSION_2 2 |
864 | 1045 | ||
865 | // on-disk super block fields converted to cpu form | 1046 | /* on-disk super block fields converted to cpu form */ |
866 | #define SB_DISK_SUPER_BLOCK(s) (REISERFS_SB(s)->s_rs) | 1047 | #define SB_DISK_SUPER_BLOCK(s) (REISERFS_SB(s)->s_rs) |
867 | #define SB_V1_DISK_SUPER_BLOCK(s) (&(SB_DISK_SUPER_BLOCK(s)->s_v1)) | 1048 | #define SB_V1_DISK_SUPER_BLOCK(s) (&(SB_DISK_SUPER_BLOCK(s)->s_v1)) |
868 | #define SB_BLOCKSIZE(s) \ | 1049 | #define SB_BLOCKSIZE(s) \ |
@@ -917,11 +1098,13 @@ int is_reiserfs_3_5(struct reiserfs_super_block *rs); | |||
917 | int is_reiserfs_3_6(struct reiserfs_super_block *rs); | 1098 | int is_reiserfs_3_6(struct reiserfs_super_block *rs); |
918 | int is_reiserfs_jr(struct reiserfs_super_block *rs); | 1099 | int is_reiserfs_jr(struct reiserfs_super_block *rs); |
919 | 1100 | ||
920 | /* ReiserFS leaves the first 64k unused, so that partition labels have | 1101 | /* |
921 | enough space. If someone wants to write a fancy bootloader that | 1102 | * ReiserFS leaves the first 64k unused, so that partition labels have |
922 | needs more than 64k, let us know, and this will be increased in size. | 1103 | * enough space. If someone wants to write a fancy bootloader that |
923 | This number must be larger than than the largest block size on any | 1104 | * needs more than 64k, let us know, and this will be increased in size. |
924 | platform, or code will break. -Hans */ | 1105 | * This number must be larger than than the largest block size on any |
1106 | * platform, or code will break. -Hans | ||
1107 | */ | ||
925 | #define REISERFS_DISK_OFFSET_IN_BYTES (64 * 1024) | 1108 | #define REISERFS_DISK_OFFSET_IN_BYTES (64 * 1024) |
926 | #define REISERFS_FIRST_BLOCK unused_define | 1109 | #define REISERFS_FIRST_BLOCK unused_define |
927 | #define REISERFS_JOURNAL_OFFSET_IN_BYTES REISERFS_DISK_OFFSET_IN_BYTES | 1110 | #define REISERFS_JOURNAL_OFFSET_IN_BYTES REISERFS_DISK_OFFSET_IN_BYTES |
@@ -946,8 +1129,7 @@ struct unfm_nodeinfo { | |||
946 | unsigned short unfm_freespace; | 1129 | unsigned short unfm_freespace; |
947 | }; | 1130 | }; |
948 | 1131 | ||
949 | /* there are two formats of keys: 3.5 and 3.6 | 1132 | /* there are two formats of keys: 3.5 and 3.6 */ |
950 | */ | ||
951 | #define KEY_FORMAT_3_5 0 | 1133 | #define KEY_FORMAT_3_5 0 |
952 | #define KEY_FORMAT_3_6 1 | 1134 | #define KEY_FORMAT_3_6 1 |
953 | 1135 | ||
@@ -965,8 +1147,10 @@ static inline struct reiserfs_sb_info *REISERFS_SB(const struct super_block *sb) | |||
965 | return sb->s_fs_info; | 1147 | return sb->s_fs_info; |
966 | } | 1148 | } |
967 | 1149 | ||
968 | /* Don't trust REISERFS_SB(sb)->s_bmap_nr, it's a u16 | 1150 | /* |
969 | * which overflows on large file systems. */ | 1151 | * Don't trust REISERFS_SB(sb)->s_bmap_nr, it's a u16 |
1152 | * which overflows on large file systems. | ||
1153 | */ | ||
970 | static inline __u32 reiserfs_bmap_count(struct super_block *sb) | 1154 | static inline __u32 reiserfs_bmap_count(struct super_block *sb) |
971 | { | 1155 | { |
972 | return (SB_BLOCK_COUNT(sb) - 1) / (sb->s_blocksize * 8) + 1; | 1156 | return (SB_BLOCK_COUNT(sb) - 1) / (sb->s_blocksize * 8) + 1; |
@@ -977,8 +1161,10 @@ static inline int bmap_would_wrap(unsigned bmap_nr) | |||
977 | return bmap_nr > ((1LL << 16) - 1); | 1161 | return bmap_nr > ((1LL << 16) - 1); |
978 | } | 1162 | } |
979 | 1163 | ||
980 | /** this says about version of key of all items (but stat data) the | 1164 | /* |
981 | object consists of */ | 1165 | * this says about version of key of all items (but stat data) the |
1166 | * object consists of | ||
1167 | */ | ||
982 | #define get_inode_item_key_version( inode ) \ | 1168 | #define get_inode_item_key_version( inode ) \ |
983 | ((REISERFS_I(inode)->i_flags & i_item_key_version_mask) ? KEY_FORMAT_3_6 : KEY_FORMAT_3_5) | 1169 | ((REISERFS_I(inode)->i_flags & i_item_key_version_mask) ? KEY_FORMAT_3_6 : KEY_FORMAT_3_5) |
984 | 1170 | ||
@@ -997,16 +1183,18 @@ static inline int bmap_would_wrap(unsigned bmap_nr) | |||
997 | else \ | 1183 | else \ |
998 | REISERFS_I(inode)->i_flags &= ~i_stat_data_version_mask; }) | 1184 | REISERFS_I(inode)->i_flags &= ~i_stat_data_version_mask; }) |
999 | 1185 | ||
1000 | /* This is an aggressive tail suppression policy, I am hoping it | 1186 | /* |
1001 | improves our benchmarks. The principle behind it is that percentage | 1187 | * This is an aggressive tail suppression policy, I am hoping it |
1002 | space saving is what matters, not absolute space saving. This is | 1188 | * improves our benchmarks. The principle behind it is that percentage |
1003 | non-intuitive, but it helps to understand it if you consider that the | 1189 | * space saving is what matters, not absolute space saving. This is |
1004 | cost to access 4 blocks is not much more than the cost to access 1 | 1190 | * non-intuitive, but it helps to understand it if you consider that the |
1005 | block, if you have to do a seek and rotate. A tail risks a | 1191 | * cost to access 4 blocks is not much more than the cost to access 1 |
1006 | non-linear disk access that is significant as a percentage of total | 1192 | * block, if you have to do a seek and rotate. A tail risks a |
1007 | time cost for a 4 block file and saves an amount of space that is | 1193 | * non-linear disk access that is significant as a percentage of total |
1008 | less significant as a percentage of space, or so goes the hypothesis. | 1194 | * time cost for a 4 block file and saves an amount of space that is |
1009 | -Hans */ | 1195 | * less significant as a percentage of space, or so goes the hypothesis. |
1196 | * -Hans | ||
1197 | */ | ||
1010 | #define STORE_TAIL_IN_UNFM_S1(n_file_size,n_tail_size,n_block_size) \ | 1198 | #define STORE_TAIL_IN_UNFM_S1(n_file_size,n_tail_size,n_block_size) \ |
1011 | (\ | 1199 | (\ |
1012 | (!(n_tail_size)) || \ | 1200 | (!(n_tail_size)) || \ |
@@ -1020,10 +1208,11 @@ static inline int bmap_would_wrap(unsigned bmap_nr) | |||
1020 | ( (n_tail_size) >= (MAX_DIRECT_ITEM_LEN(n_block_size) * 3)/4) ) ) \ | 1208 | ( (n_tail_size) >= (MAX_DIRECT_ITEM_LEN(n_block_size) * 3)/4) ) ) \ |
1021 | ) | 1209 | ) |
1022 | 1210 | ||
1023 | /* Another strategy for tails, this one means only create a tail if all the | 1211 | /* |
1024 | file would fit into one DIRECT item. | 1212 | * Another strategy for tails, this one means only create a tail if all the |
1025 | Primary intention for this one is to increase performance by decreasing | 1213 | * file would fit into one DIRECT item. |
1026 | seeking. | 1214 | * Primary intention for this one is to increase performance by decreasing |
1215 | * seeking. | ||
1027 | */ | 1216 | */ |
1028 | #define STORE_TAIL_IN_UNFM_S2(n_file_size,n_tail_size,n_block_size) \ | 1217 | #define STORE_TAIL_IN_UNFM_S2(n_file_size,n_tail_size,n_block_size) \ |
1029 | (\ | 1218 | (\ |
@@ -1037,23 +1226,21 @@ static inline int bmap_would_wrap(unsigned bmap_nr) | |||
1037 | #define REISERFS_VALID_FS 1 | 1226 | #define REISERFS_VALID_FS 1 |
1038 | #define REISERFS_ERROR_FS 2 | 1227 | #define REISERFS_ERROR_FS 2 |
1039 | 1228 | ||
1040 | // | 1229 | /* |
1041 | // there are 5 item types currently | 1230 | * there are 5 item types currently |
1042 | // | 1231 | */ |
1043 | #define TYPE_STAT_DATA 0 | 1232 | #define TYPE_STAT_DATA 0 |
1044 | #define TYPE_INDIRECT 1 | 1233 | #define TYPE_INDIRECT 1 |
1045 | #define TYPE_DIRECT 2 | 1234 | #define TYPE_DIRECT 2 |
1046 | #define TYPE_DIRENTRY 3 | 1235 | #define TYPE_DIRENTRY 3 |
1047 | #define TYPE_MAXTYPE 3 | 1236 | #define TYPE_MAXTYPE 3 |
1048 | #define TYPE_ANY 15 // FIXME: comment is required | 1237 | #define TYPE_ANY 15 /* FIXME: comment is required */ |
1049 | 1238 | ||
1050 | /***************************************************************************/ | 1239 | /*************************************************************************** |
1051 | /* KEY & ITEM HEAD */ | 1240 | * KEY & ITEM HEAD * |
1052 | /***************************************************************************/ | 1241 | ***************************************************************************/ |
1053 | 1242 | ||
1054 | // | 1243 | /* * directories use this key as well as old files */ |
1055 | // directories use this key as well as old files | ||
1056 | // | ||
1057 | struct offset_v1 { | 1244 | struct offset_v1 { |
1058 | __le32 k_offset; | 1245 | __le32 k_offset; |
1059 | __le32 k_uniqueness; | 1246 | __le32 k_uniqueness; |
@@ -1086,11 +1273,14 @@ static inline void set_offset_v2_k_offset(struct offset_v2 *v2, loff_t offset) | |||
1086 | v2->v = (v2->v & cpu_to_le64(15ULL << 60)) | cpu_to_le64(offset); | 1273 | v2->v = (v2->v & cpu_to_le64(15ULL << 60)) | cpu_to_le64(offset); |
1087 | } | 1274 | } |
1088 | 1275 | ||
1089 | /* Key of an item determines its location in the S+tree, and | 1276 | /* |
1090 | is composed of 4 components */ | 1277 | * Key of an item determines its location in the S+tree, and |
1278 | * is composed of 4 components | ||
1279 | */ | ||
1091 | struct reiserfs_key { | 1280 | struct reiserfs_key { |
1092 | __le32 k_dir_id; /* packing locality: by default parent | 1281 | /* packing locality: by default parent directory object id */ |
1093 | directory object id */ | 1282 | __le32 k_dir_id; |
1283 | |||
1094 | __le32 k_objectid; /* object identifier */ | 1284 | __le32 k_objectid; /* object identifier */ |
1095 | union { | 1285 | union { |
1096 | struct offset_v1 k_offset_v1; | 1286 | struct offset_v1 k_offset_v1; |
@@ -1099,8 +1289,8 @@ struct reiserfs_key { | |||
1099 | } __attribute__ ((__packed__)); | 1289 | } __attribute__ ((__packed__)); |
1100 | 1290 | ||
1101 | struct in_core_key { | 1291 | struct in_core_key { |
1102 | __u32 k_dir_id; /* packing locality: by default parent | 1292 | /* packing locality: by default parent directory object id */ |
1103 | directory object id */ | 1293 | __u32 k_dir_id; |
1104 | __u32 k_objectid; /* object identifier */ | 1294 | __u32 k_objectid; /* object identifier */ |
1105 | __u64 k_offset; | 1295 | __u64 k_offset; |
1106 | __u8 k_type; | 1296 | __u8 k_type; |
@@ -1109,14 +1299,16 @@ struct in_core_key { | |||
1109 | struct cpu_key { | 1299 | struct cpu_key { |
1110 | struct in_core_key on_disk_key; | 1300 | struct in_core_key on_disk_key; |
1111 | int version; | 1301 | int version; |
1112 | int key_length; /* 3 in all cases but direct2indirect and | 1302 | /* 3 in all cases but direct2indirect and indirect2direct conversion */ |
1113 | indirect2direct conversion */ | 1303 | int key_length; |
1114 | }; | 1304 | }; |
1115 | 1305 | ||
1116 | /* Our function for comparing keys can compare keys of different | 1306 | /* |
1117 | lengths. It takes as a parameter the length of the keys it is to | 1307 | * Our function for comparing keys can compare keys of different |
1118 | compare. These defines are used in determining what is to be passed | 1308 | * lengths. It takes as a parameter the length of the keys it is to |
1119 | to it as that parameter. */ | 1309 | * compare. These defines are used in determining what is to be passed |
1310 | * to it as that parameter. | ||
1311 | */ | ||
1120 | #define REISERFS_FULL_KEY_LEN 4 | 1312 | #define REISERFS_FULL_KEY_LEN 4 |
1121 | #define REISERFS_SHORT_KEY_LEN 2 | 1313 | #define REISERFS_SHORT_KEY_LEN 2 |
1122 | 1314 | ||
@@ -1145,40 +1337,52 @@ struct cpu_key { | |||
1145 | #define POSITION_FOUND 1 | 1337 | #define POSITION_FOUND 1 |
1146 | #define POSITION_NOT_FOUND 0 | 1338 | #define POSITION_NOT_FOUND 0 |
1147 | 1339 | ||
1148 | // return values for reiserfs_find_entry and search_by_entry_key | 1340 | /* return values for reiserfs_find_entry and search_by_entry_key */ |
1149 | #define NAME_FOUND 1 | 1341 | #define NAME_FOUND 1 |
1150 | #define NAME_NOT_FOUND 0 | 1342 | #define NAME_NOT_FOUND 0 |
1151 | #define GOTO_PREVIOUS_ITEM 2 | 1343 | #define GOTO_PREVIOUS_ITEM 2 |
1152 | #define NAME_FOUND_INVISIBLE 3 | 1344 | #define NAME_FOUND_INVISIBLE 3 |
1153 | 1345 | ||
1154 | /* Everything in the filesystem is stored as a set of items. The | 1346 | /* |
1155 | item head contains the key of the item, its free space (for | 1347 | * Everything in the filesystem is stored as a set of items. The |
1156 | indirect items) and specifies the location of the item itself | 1348 | * item head contains the key of the item, its free space (for |
1157 | within the block. */ | 1349 | * indirect items) and specifies the location of the item itself |
1350 | * within the block. | ||
1351 | */ | ||
1158 | 1352 | ||
1159 | struct item_head { | 1353 | struct item_head { |
1160 | /* Everything in the tree is found by searching for it based on | 1354 | /* |
1161 | * its key.*/ | 1355 | * Everything in the tree is found by searching for it based on |
1356 | * its key. | ||
1357 | */ | ||
1162 | struct reiserfs_key ih_key; | 1358 | struct reiserfs_key ih_key; |
1163 | union { | 1359 | union { |
1164 | /* The free space in the last unformatted node of an | 1360 | /* |
1165 | indirect item if this is an indirect item. This | 1361 | * The free space in the last unformatted node of an |
1166 | equals 0xFFFF iff this is a direct item or stat data | 1362 | * indirect item if this is an indirect item. This |
1167 | item. Note that the key, not this field, is used to | 1363 | * equals 0xFFFF iff this is a direct item or stat data |
1168 | determine the item type, and thus which field this | 1364 | * item. Note that the key, not this field, is used to |
1169 | union contains. */ | 1365 | * determine the item type, and thus which field this |
1366 | * union contains. | ||
1367 | */ | ||
1170 | __le16 ih_free_space_reserved; | 1368 | __le16 ih_free_space_reserved; |
1171 | /* Iff this is a directory item, this field equals the | 1369 | |
1172 | number of directory entries in the directory item. */ | 1370 | /* |
1371 | * Iff this is a directory item, this field equals the | ||
1372 | * number of directory entries in the directory item. | ||
1373 | */ | ||
1173 | __le16 ih_entry_count; | 1374 | __le16 ih_entry_count; |
1174 | } __attribute__ ((__packed__)) u; | 1375 | } __attribute__ ((__packed__)) u; |
1175 | __le16 ih_item_len; /* total size of the item body */ | 1376 | __le16 ih_item_len; /* total size of the item body */ |
1176 | __le16 ih_item_location; /* an offset to the item body | 1377 | |
1177 | * within the block */ | 1378 | /* an offset to the item body within the block */ |
1178 | __le16 ih_version; /* 0 for all old items, 2 for new | 1379 | __le16 ih_item_location; |
1179 | ones. Highest bit is set by fsck | 1380 | |
1180 | temporary, cleaned after all | 1381 | /* |
1181 | done */ | 1382 | * 0 for all old items, 2 for new ones. Highest bit is set by fsck |
1383 | * temporary, cleaned after all done | ||
1384 | */ | ||
1385 | __le16 ih_version; | ||
1182 | } __attribute__ ((__packed__)); | 1386 | } __attribute__ ((__packed__)); |
1183 | /* size of item header */ | 1387 | /* size of item header */ |
1184 | #define IH_SIZE (sizeof(struct item_head)) | 1388 | #define IH_SIZE (sizeof(struct item_head)) |
@@ -1200,27 +1404,24 @@ struct item_head { | |||
1200 | #define get_ih_free_space(ih) (ih_version (ih) == KEY_FORMAT_3_6 ? 0 : ih_free_space (ih)) | 1404 | #define get_ih_free_space(ih) (ih_version (ih) == KEY_FORMAT_3_6 ? 0 : ih_free_space (ih)) |
1201 | #define set_ih_free_space(ih,val) put_ih_free_space((ih), ((ih_version(ih) == KEY_FORMAT_3_6) ? 0 : (val))) | 1405 | #define set_ih_free_space(ih,val) put_ih_free_space((ih), ((ih_version(ih) == KEY_FORMAT_3_6) ? 0 : (val))) |
1202 | 1406 | ||
1203 | /* these operate on indirect items, where you've got an array of ints | 1407 | /* |
1204 | ** at a possibly unaligned location. These are a noop on ia32 | 1408 | * these operate on indirect items, where you've got an array of ints |
1205 | ** | 1409 | * at a possibly unaligned location. These are a noop on ia32 |
1206 | ** p is the array of __u32, i is the index into the array, v is the value | 1410 | * |
1207 | ** to store there. | 1411 | * p is the array of __u32, i is the index into the array, v is the value |
1208 | */ | 1412 | * to store there. |
1413 | */ | ||
1209 | #define get_block_num(p, i) get_unaligned_le32((p) + (i)) | 1414 | #define get_block_num(p, i) get_unaligned_le32((p) + (i)) |
1210 | #define put_block_num(p, i, v) put_unaligned_le32((v), (p) + (i)) | 1415 | #define put_block_num(p, i, v) put_unaligned_le32((v), (p) + (i)) |
1211 | 1416 | ||
1212 | // | 1417 | /* * in old version uniqueness field shows key type */ |
1213 | // in old version uniqueness field shows key type | ||
1214 | // | ||
1215 | #define V1_SD_UNIQUENESS 0 | 1418 | #define V1_SD_UNIQUENESS 0 |
1216 | #define V1_INDIRECT_UNIQUENESS 0xfffffffe | 1419 | #define V1_INDIRECT_UNIQUENESS 0xfffffffe |
1217 | #define V1_DIRECT_UNIQUENESS 0xffffffff | 1420 | #define V1_DIRECT_UNIQUENESS 0xffffffff |
1218 | #define V1_DIRENTRY_UNIQUENESS 500 | 1421 | #define V1_DIRENTRY_UNIQUENESS 500 |
1219 | #define V1_ANY_UNIQUENESS 555 // FIXME: comment is required | 1422 | #define V1_ANY_UNIQUENESS 555 /* FIXME: comment is required */ |
1220 | 1423 | ||
1221 | // | 1424 | /* here are conversion routines */ |
1222 | // here are conversion routines | ||
1223 | // | ||
1224 | static inline int uniqueness2type(__u32 uniqueness) CONSTF; | 1425 | static inline int uniqueness2type(__u32 uniqueness) CONSTF; |
1225 | static inline int uniqueness2type(__u32 uniqueness) | 1426 | static inline int uniqueness2type(__u32 uniqueness) |
1226 | { | 1427 | { |
@@ -1257,11 +1458,11 @@ static inline __u32 type2uniqueness(int type) | |||
1257 | } | 1458 | } |
1258 | } | 1459 | } |
1259 | 1460 | ||
1260 | // | 1461 | /* |
1261 | // key is pointer to on disk key which is stored in le, result is cpu, | 1462 | * key is pointer to on disk key which is stored in le, result is cpu, |
1262 | // there is no way to get version of object from key, so, provide | 1463 | * there is no way to get version of object from key, so, provide |
1263 | // version to these defines | 1464 | * version to these defines |
1264 | // | 1465 | */ |
1265 | static inline loff_t le_key_k_offset(int version, | 1466 | static inline loff_t le_key_k_offset(int version, |
1266 | const struct reiserfs_key *key) | 1467 | const struct reiserfs_key *key) |
1267 | { | 1468 | { |
@@ -1350,9 +1551,7 @@ static inline int is_statdata_le_key(int version, struct reiserfs_key *key) | |||
1350 | return le_key_k_type(version, key) == TYPE_STAT_DATA; | 1551 | return le_key_k_type(version, key) == TYPE_STAT_DATA; |
1351 | } | 1552 | } |
1352 | 1553 | ||
1353 | // | 1554 | /* item header has version. */ |
1354 | // item header has version. | ||
1355 | // | ||
1356 | static inline int is_direntry_le_ih(struct item_head *ih) | 1555 | static inline int is_direntry_le_ih(struct item_head *ih) |
1357 | { | 1556 | { |
1358 | return is_direntry_le_key(ih_version(ih), &ih->ih_key); | 1557 | return is_direntry_le_key(ih_version(ih), &ih->ih_key); |
@@ -1373,9 +1572,7 @@ static inline int is_statdata_le_ih(struct item_head *ih) | |||
1373 | return is_statdata_le_key(ih_version(ih), &ih->ih_key); | 1572 | return is_statdata_le_key(ih_version(ih), &ih->ih_key); |
1374 | } | 1573 | } |
1375 | 1574 | ||
1376 | // | 1575 | /* key is pointer to cpu key, result is cpu */ |
1377 | // key is pointer to cpu key, result is cpu | ||
1378 | // | ||
1379 | static inline loff_t cpu_key_k_offset(const struct cpu_key *key) | 1576 | static inline loff_t cpu_key_k_offset(const struct cpu_key *key) |
1380 | { | 1577 | { |
1381 | return key->on_disk_key.k_offset; | 1578 | return key->on_disk_key.k_offset; |
@@ -1426,7 +1623,7 @@ static inline void cpu_key_k_offset_dec(struct cpu_key *key) | |||
1426 | 1623 | ||
1427 | extern struct reiserfs_key root_key; | 1624 | extern struct reiserfs_key root_key; |
1428 | 1625 | ||
1429 | /* | 1626 | /* |
1430 | * Picture represents a leaf of the S+tree | 1627 | * Picture represents a leaf of the S+tree |
1431 | * ______________________________________________________ | 1628 | * ______________________________________________________ |
1432 | * | | Array of | | | | 1629 | * | | Array of | | | |
@@ -1435,15 +1632,19 @@ extern struct reiserfs_key root_key; | |||
1435 | * |______|_______________|___________________|___________| | 1632 | * |______|_______________|___________________|___________| |
1436 | */ | 1633 | */ |
1437 | 1634 | ||
1438 | /* Header of a disk block. More precisely, header of a formatted leaf | 1635 | /* |
1439 | or internal node, and not the header of an unformatted node. */ | 1636 | * Header of a disk block. More precisely, header of a formatted leaf |
1637 | * or internal node, and not the header of an unformatted node. | ||
1638 | */ | ||
1440 | struct block_head { | 1639 | struct block_head { |
1441 | __le16 blk_level; /* Level of a block in the tree. */ | 1640 | __le16 blk_level; /* Level of a block in the tree. */ |
1442 | __le16 blk_nr_item; /* Number of keys/items in a block. */ | 1641 | __le16 blk_nr_item; /* Number of keys/items in a block. */ |
1443 | __le16 blk_free_space; /* Block free space in bytes. */ | 1642 | __le16 blk_free_space; /* Block free space in bytes. */ |
1444 | __le16 blk_reserved; | 1643 | __le16 blk_reserved; |
1445 | /* dump this in v4/planA */ | 1644 | /* dump this in v4/planA */ |
1446 | struct reiserfs_key blk_right_delim_key; /* kept only for compatibility */ | 1645 | |
1646 | /* kept only for compatibility */ | ||
1647 | struct reiserfs_key blk_right_delim_key; | ||
1447 | }; | 1648 | }; |
1448 | 1649 | ||
1449 | #define BLKH_SIZE (sizeof(struct block_head)) | 1650 | #define BLKH_SIZE (sizeof(struct block_head)) |
@@ -1458,18 +1659,20 @@ struct block_head { | |||
1458 | #define blkh_right_delim_key(p_blkh) ((p_blkh)->blk_right_delim_key) | 1659 | #define blkh_right_delim_key(p_blkh) ((p_blkh)->blk_right_delim_key) |
1459 | #define set_blkh_right_delim_key(p_blkh,val) ((p_blkh)->blk_right_delim_key = val) | 1660 | #define set_blkh_right_delim_key(p_blkh,val) ((p_blkh)->blk_right_delim_key = val) |
1460 | 1661 | ||
1662 | /* values for blk_level field of the struct block_head */ | ||
1663 | |||
1461 | /* | 1664 | /* |
1462 | * values for blk_level field of the struct block_head | 1665 | * When node gets removed from the tree its blk_level is set to FREE_LEVEL. |
1666 | * It is then used to see whether the node is still in the tree | ||
1463 | */ | 1667 | */ |
1464 | 1668 | #define FREE_LEVEL 0 | |
1465 | #define FREE_LEVEL 0 /* when node gets removed from the tree its | ||
1466 | blk_level is set to FREE_LEVEL. It is then | ||
1467 | used to see whether the node is still in the | ||
1468 | tree */ | ||
1469 | 1669 | ||
1470 | #define DISK_LEAF_NODE_LEVEL 1 /* Leaf node level. */ | 1670 | #define DISK_LEAF_NODE_LEVEL 1 /* Leaf node level. */ |
1471 | 1671 | ||
1472 | /* Given the buffer head of a formatted node, resolve to the block head of that node. */ | 1672 | /* |
1673 | * Given the buffer head of a formatted node, resolve to the | ||
1674 | * block head of that node. | ||
1675 | */ | ||
1473 | #define B_BLK_HEAD(bh) ((struct block_head *)((bh)->b_data)) | 1676 | #define B_BLK_HEAD(bh) ((struct block_head *)((bh)->b_data)) |
1474 | /* Number of items that are in buffer. */ | 1677 | /* Number of items that are in buffer. */ |
1475 | #define B_NR_ITEMS(bh) (blkh_nr_item(B_BLK_HEAD(bh))) | 1678 | #define B_NR_ITEMS(bh) (blkh_nr_item(B_BLK_HEAD(bh))) |
@@ -1490,14 +1693,14 @@ struct block_head { | |||
1490 | #define B_IS_KEYS_LEVEL(bh) (B_LEVEL(bh) > DISK_LEAF_NODE_LEVEL \ | 1693 | #define B_IS_KEYS_LEVEL(bh) (B_LEVEL(bh) > DISK_LEAF_NODE_LEVEL \ |
1491 | && B_LEVEL(bh) <= MAX_HEIGHT) | 1694 | && B_LEVEL(bh) <= MAX_HEIGHT) |
1492 | 1695 | ||
1493 | /***************************************************************************/ | 1696 | /*************************************************************************** |
1494 | /* STAT DATA */ | 1697 | * STAT DATA * |
1495 | /***************************************************************************/ | 1698 | ***************************************************************************/ |
1496 | 1699 | ||
1497 | // | 1700 | /* |
1498 | // old stat data is 32 bytes long. We are going to distinguish new one by | 1701 | * old stat data is 32 bytes long. We are going to distinguish new one by |
1499 | // different size | 1702 | * different size |
1500 | // | 1703 | */ |
1501 | struct stat_data_v1 { | 1704 | struct stat_data_v1 { |
1502 | __le16 sd_mode; /* file type, permissions */ | 1705 | __le16 sd_mode; /* file type, permissions */ |
1503 | __le16 sd_nlink; /* number of hard links */ | 1706 | __le16 sd_nlink; /* number of hard links */ |
@@ -1506,20 +1709,25 @@ struct stat_data_v1 { | |||
1506 | __le32 sd_size; /* file size */ | 1709 | __le32 sd_size; /* file size */ |
1507 | __le32 sd_atime; /* time of last access */ | 1710 | __le32 sd_atime; /* time of last access */ |
1508 | __le32 sd_mtime; /* time file was last modified */ | 1711 | __le32 sd_mtime; /* time file was last modified */ |
1509 | __le32 sd_ctime; /* time inode (stat data) was last changed (except changes to sd_atime and sd_mtime) */ | 1712 | |
1713 | /* | ||
1714 | * time inode (stat data) was last changed | ||
1715 | * (except changes to sd_atime and sd_mtime) | ||
1716 | */ | ||
1717 | __le32 sd_ctime; | ||
1510 | union { | 1718 | union { |
1511 | __le32 sd_rdev; | 1719 | __le32 sd_rdev; |
1512 | __le32 sd_blocks; /* number of blocks file uses */ | 1720 | __le32 sd_blocks; /* number of blocks file uses */ |
1513 | } __attribute__ ((__packed__)) u; | 1721 | } __attribute__ ((__packed__)) u; |
1514 | __le32 sd_first_direct_byte; /* first byte of file which is stored | 1722 | |
1515 | in a direct item: except that if it | 1723 | /* |
1516 | equals 1 it is a symlink and if it | 1724 | * first byte of file which is stored in a direct item: except that if |
1517 | equals ~(__u32)0 there is no | 1725 | * it equals 1 it is a symlink and if it equals ~(__u32)0 there is no |
1518 | direct item. The existence of this | 1726 | * direct item. The existence of this field really grates on me. |
1519 | field really grates on me. Let's | 1727 | * Let's replace it with a macro based on sd_size and our tail |
1520 | replace it with a macro based on | 1728 | * suppression policy. Someday. -Hans |
1521 | sd_size and our tail suppression | 1729 | */ |
1522 | policy. Someday. -Hans */ | 1730 | __le32 sd_first_direct_byte; |
1523 | } __attribute__ ((__packed__)); | 1731 | } __attribute__ ((__packed__)); |
1524 | 1732 | ||
1525 | #define SD_V1_SIZE (sizeof(struct stat_data_v1)) | 1733 | #define SD_V1_SIZE (sizeof(struct stat_data_v1)) |
@@ -1551,8 +1759,10 @@ struct stat_data_v1 { | |||
1551 | 1759 | ||
1552 | /* inode flags stored in sd_attrs (nee sd_reserved) */ | 1760 | /* inode flags stored in sd_attrs (nee sd_reserved) */ |
1553 | 1761 | ||
1554 | /* we want common flags to have the same values as in ext2, | 1762 | /* |
1555 | so chattr(1) will work without problems */ | 1763 | * we want common flags to have the same values as in ext2, |
1764 | * so chattr(1) will work without problems | ||
1765 | */ | ||
1556 | #define REISERFS_IMMUTABLE_FL FS_IMMUTABLE_FL | 1766 | #define REISERFS_IMMUTABLE_FL FS_IMMUTABLE_FL |
1557 | #define REISERFS_APPEND_FL FS_APPEND_FL | 1767 | #define REISERFS_APPEND_FL FS_APPEND_FL |
1558 | #define REISERFS_SYNC_FL FS_SYNC_FL | 1768 | #define REISERFS_SYNC_FL FS_SYNC_FL |
@@ -1572,8 +1782,10 @@ struct stat_data_v1 { | |||
1572 | REISERFS_COMPR_FL | \ | 1782 | REISERFS_COMPR_FL | \ |
1573 | REISERFS_NOTAIL_FL ) | 1783 | REISERFS_NOTAIL_FL ) |
1574 | 1784 | ||
1575 | /* Stat Data on disk (reiserfs version of UFS disk inode minus the | 1785 | /* |
1576 | address blocks) */ | 1786 | * Stat Data on disk (reiserfs version of UFS disk inode minus the |
1787 | * address blocks) | ||
1788 | */ | ||
1577 | struct stat_data { | 1789 | struct stat_data { |
1578 | __le16 sd_mode; /* file type, permissions */ | 1790 | __le16 sd_mode; /* file type, permissions */ |
1579 | __le16 sd_attrs; /* persistent inode flags */ | 1791 | __le16 sd_attrs; /* persistent inode flags */ |
@@ -1583,25 +1795,20 @@ struct stat_data { | |||
1583 | __le32 sd_gid; /* group */ | 1795 | __le32 sd_gid; /* group */ |
1584 | __le32 sd_atime; /* time of last access */ | 1796 | __le32 sd_atime; /* time of last access */ |
1585 | __le32 sd_mtime; /* time file was last modified */ | 1797 | __le32 sd_mtime; /* time file was last modified */ |
1586 | __le32 sd_ctime; /* time inode (stat data) was last changed (except changes to sd_atime and sd_mtime) */ | 1798 | |
1799 | /* | ||
1800 | * time inode (stat data) was last changed | ||
1801 | * (except changes to sd_atime and sd_mtime) | ||
1802 | */ | ||
1803 | __le32 sd_ctime; | ||
1587 | __le32 sd_blocks; | 1804 | __le32 sd_blocks; |
1588 | union { | 1805 | union { |
1589 | __le32 sd_rdev; | 1806 | __le32 sd_rdev; |
1590 | __le32 sd_generation; | 1807 | __le32 sd_generation; |
1591 | //__le32 sd_first_direct_byte; | ||
1592 | /* first byte of file which is stored in a | ||
1593 | direct item: except that if it equals 1 | ||
1594 | it is a symlink and if it equals | ||
1595 | ~(__u32)0 there is no direct item. The | ||
1596 | existence of this field really grates | ||
1597 | on me. Let's replace it with a macro | ||
1598 | based on sd_size and our tail | ||
1599 | suppression policy? */ | ||
1600 | } __attribute__ ((__packed__)) u; | 1808 | } __attribute__ ((__packed__)) u; |
1601 | } __attribute__ ((__packed__)); | 1809 | } __attribute__ ((__packed__)); |
1602 | // | 1810 | |
1603 | // this is 44 bytes long | 1811 | /* this is 44 bytes long */ |
1604 | // | ||
1605 | #define SD_SIZE (sizeof(struct stat_data)) | 1812 | #define SD_SIZE (sizeof(struct stat_data)) |
1606 | #define SD_V2_SIZE SD_SIZE | 1813 | #define SD_V2_SIZE SD_SIZE |
1607 | #define stat_data_v2(ih) (ih_version (ih) == KEY_FORMAT_3_6) | 1814 | #define stat_data_v2(ih) (ih_version (ih) == KEY_FORMAT_3_6) |
@@ -1632,48 +1839,61 @@ struct stat_data { | |||
1632 | #define sd_v2_attrs(sdp) (le16_to_cpu((sdp)->sd_attrs)) | 1839 | #define sd_v2_attrs(sdp) (le16_to_cpu((sdp)->sd_attrs)) |
1633 | #define set_sd_v2_attrs(sdp,v) ((sdp)->sd_attrs = cpu_to_le16(v)) | 1840 | #define set_sd_v2_attrs(sdp,v) ((sdp)->sd_attrs = cpu_to_le16(v)) |
1634 | 1841 | ||
1635 | /***************************************************************************/ | 1842 | /*************************************************************************** |
1636 | /* DIRECTORY STRUCTURE */ | 1843 | * DIRECTORY STRUCTURE * |
1637 | /***************************************************************************/ | 1844 | ***************************************************************************/ |
1638 | /* | 1845 | /* |
1639 | Picture represents the structure of directory items | 1846 | * Picture represents the structure of directory items |
1640 | ________________________________________________ | 1847 | * ________________________________________________ |
1641 | | Array of | | | | | | | 1848 | * | Array of | | | | | | |
1642 | | directory |N-1| N-2 | .... | 1st |0th| | 1849 | * | directory |N-1| N-2 | .... | 1st |0th| |
1643 | | entry headers | | | | | | | 1850 | * | entry headers | | | | | | |
1644 | |_______________|___|_____|________|_______|___| | 1851 | * |_______________|___|_____|________|_______|___| |
1645 | <---- directory entries ------> | 1852 | * <---- directory entries ------> |
1646 | 1853 | * | |
1647 | First directory item has k_offset component 1. We store "." and ".." | 1854 | * First directory item has k_offset component 1. We store "." and ".." |
1648 | in one item, always, we never split "." and ".." into differing | 1855 | * in one item, always, we never split "." and ".." into differing |
1649 | items. This makes, among other things, the code for removing | 1856 | * items. This makes, among other things, the code for removing |
1650 | directories simpler. */ | 1857 | * directories simpler. |
1858 | */ | ||
1651 | #define SD_OFFSET 0 | 1859 | #define SD_OFFSET 0 |
1652 | #define SD_UNIQUENESS 0 | 1860 | #define SD_UNIQUENESS 0 |
1653 | #define DOT_OFFSET 1 | 1861 | #define DOT_OFFSET 1 |
1654 | #define DOT_DOT_OFFSET 2 | 1862 | #define DOT_DOT_OFFSET 2 |
1655 | #define DIRENTRY_UNIQUENESS 500 | 1863 | #define DIRENTRY_UNIQUENESS 500 |
1656 | 1864 | ||
1657 | /* */ | ||
1658 | #define FIRST_ITEM_OFFSET 1 | 1865 | #define FIRST_ITEM_OFFSET 1 |
1659 | 1866 | ||
1660 | /* | 1867 | /* |
1661 | Q: How to get key of object pointed to by entry from entry? | 1868 | * Q: How to get key of object pointed to by entry from entry? |
1662 | 1869 | * | |
1663 | A: Each directory entry has its header. This header has deh_dir_id and deh_objectid fields, those are key | 1870 | * A: Each directory entry has its header. This header has deh_dir_id |
1664 | of object, entry points to */ | 1871 | * and deh_objectid fields, those are key of object, entry points to |
1872 | */ | ||
1665 | 1873 | ||
1666 | /* NOT IMPLEMENTED: | 1874 | /* |
1667 | Directory will someday contain stat data of object */ | 1875 | * NOT IMPLEMENTED: |
1876 | * Directory will someday contain stat data of object | ||
1877 | */ | ||
1668 | 1878 | ||
1669 | struct reiserfs_de_head { | 1879 | struct reiserfs_de_head { |
1670 | __le32 deh_offset; /* third component of the directory entry key */ | 1880 | __le32 deh_offset; /* third component of the directory entry key */ |
1671 | __le32 deh_dir_id; /* objectid of the parent directory of the object, that is referenced | 1881 | |
1672 | by directory entry */ | 1882 | /* |
1673 | __le32 deh_objectid; /* objectid of the object, that is referenced by directory entry */ | 1883 | * objectid of the parent directory of the object, that is referenced |
1884 | * by directory entry | ||
1885 | */ | ||
1886 | __le32 deh_dir_id; | ||
1887 | |||
1888 | /* objectid of the object, that is referenced by directory entry */ | ||
1889 | __le32 deh_objectid; | ||
1674 | __le16 deh_location; /* offset of name in the whole item */ | 1890 | __le16 deh_location; /* offset of name in the whole item */ |
1675 | __le16 deh_state; /* whether 1) entry contains stat data (for future), and 2) whether | 1891 | |
1676 | entry is hidden (unlinked) */ | 1892 | /* |
1893 | * whether 1) entry contains stat data (for future), and | ||
1894 | * 2) whether entry is hidden (unlinked) | ||
1895 | */ | ||
1896 | __le16 deh_state; | ||
1677 | } __attribute__ ((__packed__)); | 1897 | } __attribute__ ((__packed__)); |
1678 | #define DEH_SIZE sizeof(struct reiserfs_de_head) | 1898 | #define DEH_SIZE sizeof(struct reiserfs_de_head) |
1679 | #define deh_offset(p_deh) (le32_to_cpu((p_deh)->deh_offset)) | 1899 | #define deh_offset(p_deh) (le32_to_cpu((p_deh)->deh_offset)) |
@@ -1703,9 +1923,11 @@ struct reiserfs_de_head { | |||
1703 | # define ADDR_UNALIGNED_BITS (3) | 1923 | # define ADDR_UNALIGNED_BITS (3) |
1704 | #endif | 1924 | #endif |
1705 | 1925 | ||
1706 | /* These are only used to manipulate deh_state. | 1926 | /* |
1927 | * These are only used to manipulate deh_state. | ||
1707 | * Because of this, we'll use the ext2_ bit routines, | 1928 | * Because of this, we'll use the ext2_ bit routines, |
1708 | * since they are little endian */ | 1929 | * since they are little endian |
1930 | */ | ||
1709 | #ifdef ADDR_UNALIGNED_BITS | 1931 | #ifdef ADDR_UNALIGNED_BITS |
1710 | 1932 | ||
1711 | # define aligned_address(addr) ((void *)((long)(addr) & ~((1UL << ADDR_UNALIGNED_BITS) - 1))) | 1933 | # define aligned_address(addr) ((void *)((long)(addr) & ~((1UL << ADDR_UNALIGNED_BITS) - 1))) |
@@ -1740,13 +1962,16 @@ extern void make_empty_dir_item_v1(char *body, __le32 dirid, __le32 objid, | |||
1740 | extern void make_empty_dir_item(char *body, __le32 dirid, __le32 objid, | 1962 | extern void make_empty_dir_item(char *body, __le32 dirid, __le32 objid, |
1741 | __le32 par_dirid, __le32 par_objid); | 1963 | __le32 par_dirid, __le32 par_objid); |
1742 | 1964 | ||
1743 | // two entries per block (at least) | 1965 | /* two entries per block (at least) */ |
1744 | #define REISERFS_MAX_NAME(block_size) 255 | 1966 | #define REISERFS_MAX_NAME(block_size) 255 |
1745 | 1967 | ||
1746 | /* this structure is used for operations on directory entries. It is | 1968 | /* |
1747 | not a disk structure. */ | 1969 | * this structure is used for operations on directory entries. It is |
1748 | /* When reiserfs_find_entry or search_by_entry_key find directory | 1970 | * not a disk structure. |
1749 | entry, they return filled reiserfs_dir_entry structure */ | 1971 | * |
1972 | * When reiserfs_find_entry or search_by_entry_key find directory | ||
1973 | * entry, they return filled reiserfs_dir_entry structure | ||
1974 | */ | ||
1750 | struct reiserfs_dir_entry { | 1975 | struct reiserfs_dir_entry { |
1751 | struct buffer_head *de_bh; | 1976 | struct buffer_head *de_bh; |
1752 | int de_item_num; | 1977 | int de_item_num; |
@@ -1764,7 +1989,10 @@ struct reiserfs_dir_entry { | |||
1764 | struct cpu_key de_entry_key; | 1989 | struct cpu_key de_entry_key; |
1765 | }; | 1990 | }; |
1766 | 1991 | ||
1767 | /* these defines are useful when a particular member of a reiserfs_dir_entry is needed */ | 1992 | /* |
1993 | * these defines are useful when a particular member of | ||
1994 | * a reiserfs_dir_entry is needed | ||
1995 | */ | ||
1768 | 1996 | ||
1769 | /* pointer to file name, stored in entry */ | 1997 | /* pointer to file name, stored in entry */ |
1770 | #define B_I_DEH_ENTRY_FILE_NAME(bh, ih, deh) \ | 1998 | #define B_I_DEH_ENTRY_FILE_NAME(bh, ih, deh) \ |
@@ -1791,11 +2019,13 @@ struct reiserfs_dir_entry { | |||
1791 | * |______|_______________|___________________|___________| | 2019 | * |______|_______________|___________________|___________| |
1792 | */ | 2020 | */ |
1793 | 2021 | ||
1794 | /***************************************************************************/ | 2022 | /*************************************************************************** |
1795 | /* DISK CHILD */ | 2023 | * DISK CHILD * |
1796 | /***************************************************************************/ | 2024 | ***************************************************************************/ |
1797 | /* Disk child pointer: The pointer from an internal node of the tree | 2025 | /* |
1798 | to a node that is on disk. */ | 2026 | * Disk child pointer: |
2027 | * The pointer from an internal node of the tree to a node that is on disk. | ||
2028 | */ | ||
1799 | struct disk_child { | 2029 | struct disk_child { |
1800 | __le32 dc_block_number; /* Disk child's block number. */ | 2030 | __le32 dc_block_number; /* Disk child's block number. */ |
1801 | __le16 dc_size; /* Disk child's used space. */ | 2031 | __le16 dc_size; /* Disk child's used space. */ |
@@ -1828,47 +2058,66 @@ struct disk_child { | |||
1828 | #define MAX_NR_KEY(bh) ( (MAX_CHILD_SIZE(bh)-DC_SIZE)/(KEY_SIZE+DC_SIZE) ) | 2058 | #define MAX_NR_KEY(bh) ( (MAX_CHILD_SIZE(bh)-DC_SIZE)/(KEY_SIZE+DC_SIZE) ) |
1829 | #define MIN_NR_KEY(bh) (MAX_NR_KEY(bh)/2) | 2059 | #define MIN_NR_KEY(bh) (MAX_NR_KEY(bh)/2) |
1830 | 2060 | ||
1831 | /***************************************************************************/ | 2061 | /*************************************************************************** |
1832 | /* PATH STRUCTURES AND DEFINES */ | 2062 | * PATH STRUCTURES AND DEFINES * |
1833 | /***************************************************************************/ | 2063 | ***************************************************************************/ |
1834 | 2064 | ||
1835 | /* Search_by_key fills up the path from the root to the leaf as it descends the tree looking for the | 2065 | /* |
1836 | key. It uses reiserfs_bread to try to find buffers in the cache given their block number. If it | 2066 | * search_by_key fills up the path from the root to the leaf as it descends |
1837 | does not find them in the cache it reads them from disk. For each node search_by_key finds using | 2067 | * the tree looking for the key. It uses reiserfs_bread to try to find |
1838 | reiserfs_bread it then uses bin_search to look through that node. bin_search will find the | 2068 | * buffers in the cache given their block number. If it does not find |
1839 | position of the block_number of the next node if it is looking through an internal node. If it | 2069 | * them in the cache it reads them from disk. For each node search_by_key |
1840 | is looking through a leaf node bin_search will find the position of the item which has key either | 2070 | * finds using reiserfs_bread it then uses bin_search to look through that |
1841 | equal to given key, or which is the maximal key less than the given key. */ | 2071 | * node. bin_search will find the position of the block_number of the next |
2072 | * node if it is looking through an internal node. If it is looking through | ||
2073 | * a leaf node bin_search will find the position of the item which has key | ||
2074 | * either equal to given key, or which is the maximal key less than the | ||
2075 | * given key. | ||
2076 | */ | ||
1842 | 2077 | ||
1843 | struct path_element { | 2078 | struct path_element { |
1844 | struct buffer_head *pe_buffer; /* Pointer to the buffer at the path in the tree. */ | 2079 | /* Pointer to the buffer at the path in the tree. */ |
1845 | int pe_position; /* Position in the tree node which is placed in the */ | 2080 | struct buffer_head *pe_buffer; |
1846 | /* buffer above. */ | 2081 | /* Position in the tree node which is placed in the buffer above. */ |
2082 | int pe_position; | ||
1847 | }; | 2083 | }; |
1848 | 2084 | ||
1849 | #define MAX_HEIGHT 5 /* maximal height of a tree. don't change this without changing JOURNAL_PER_BALANCE_CNT */ | 2085 | /* |
1850 | #define EXTENDED_MAX_HEIGHT 7 /* Must be equals MAX_HEIGHT + FIRST_PATH_ELEMENT_OFFSET */ | 2086 | * maximal height of a tree. don't change this without |
1851 | #define FIRST_PATH_ELEMENT_OFFSET 2 /* Must be equal to at least 2. */ | 2087 | * changing JOURNAL_PER_BALANCE_CNT |
1852 | 2088 | */ | |
1853 | #define ILLEGAL_PATH_ELEMENT_OFFSET 1 /* Must be equal to FIRST_PATH_ELEMENT_OFFSET - 1 */ | 2089 | #define MAX_HEIGHT 5 |
1854 | #define MAX_FEB_SIZE 6 /* this MUST be MAX_HEIGHT + 1. See about FEB below */ | 2090 | |
1855 | 2091 | /* Must be equals MAX_HEIGHT + FIRST_PATH_ELEMENT_OFFSET */ | |
1856 | /* We need to keep track of who the ancestors of nodes are. When we | 2092 | #define EXTENDED_MAX_HEIGHT 7 |
1857 | perform a search we record which nodes were visited while | 2093 | |
1858 | descending the tree looking for the node we searched for. This list | 2094 | /* Must be equal to at least 2. */ |
1859 | of nodes is called the path. This information is used while | 2095 | #define FIRST_PATH_ELEMENT_OFFSET 2 |
1860 | performing balancing. Note that this path information may become | 2096 | |
1861 | invalid, and this means we must check it when using it to see if it | 2097 | /* Must be equal to FIRST_PATH_ELEMENT_OFFSET - 1 */ |
1862 | is still valid. You'll need to read search_by_key and the comments | 2098 | #define ILLEGAL_PATH_ELEMENT_OFFSET 1 |
1863 | in it, especially about decrement_counters_in_path(), to understand | 2099 | |
1864 | this structure. | 2100 | /* this MUST be MAX_HEIGHT + 1. See about FEB below */ |
1865 | 2101 | #define MAX_FEB_SIZE 6 | |
1866 | Paths make the code so much harder to work with and debug.... An | 2102 | |
1867 | enormous number of bugs are due to them, and trying to write or modify | 2103 | /* |
1868 | code that uses them just makes my head hurt. They are based on an | 2104 | * We need to keep track of who the ancestors of nodes are. When we |
1869 | excessive effort to avoid disturbing the precious VFS code.:-( The | 2105 | * perform a search we record which nodes were visited while |
1870 | gods only know how we are going to SMP the code that uses them. | 2106 | * descending the tree looking for the node we searched for. This list |
1871 | znodes are the way! */ | 2107 | * of nodes is called the path. This information is used while |
2108 | * performing balancing. Note that this path information may become | ||
2109 | * invalid, and this means we must check it when using it to see if it | ||
2110 | * is still valid. You'll need to read search_by_key and the comments | ||
2111 | * in it, especially about decrement_counters_in_path(), to understand | ||
2112 | * this structure. | ||
2113 | * | ||
2114 | * Paths make the code so much harder to work with and debug.... An | ||
2115 | * enormous number of bugs are due to them, and trying to write or modify | ||
2116 | * code that uses them just makes my head hurt. They are based on an | ||
2117 | * excessive effort to avoid disturbing the precious VFS code.:-( The | ||
2118 | * gods only know how we are going to SMP the code that uses them. | ||
2119 | * znodes are the way! | ||
2120 | */ | ||
1872 | 2121 | ||
1873 | #define PATH_READA 0x1 /* do read ahead */ | 2122 | #define PATH_READA 0x1 /* do read ahead */ |
1874 | #define PATH_READA_BACK 0x2 /* read backwards */ | 2123 | #define PATH_READA_BACK 0x2 /* read backwards */ |
@@ -1876,7 +2125,8 @@ znodes are the way! */ | |||
1876 | struct treepath { | 2125 | struct treepath { |
1877 | int path_length; /* Length of the array above. */ | 2126 | int path_length; /* Length of the array above. */ |
1878 | int reada; | 2127 | int reada; |
1879 | struct path_element path_elements[EXTENDED_MAX_HEIGHT]; /* Array of the path elements. */ | 2128 | /* Array of the path elements. */ |
2129 | struct path_element path_elements[EXTENDED_MAX_HEIGHT]; | ||
1880 | int pos_in_item; | 2130 | int pos_in_item; |
1881 | }; | 2131 | }; |
1882 | 2132 | ||
@@ -1895,20 +2145,31 @@ struct treepath var = {.path_length = ILLEGAL_PATH_ELEMENT_OFFSET, .reada = 0,} | |||
1895 | #define PATH_OFFSET_POSITION(path, n_offset) (PATH_OFFSET_PELEMENT(path, n_offset)->pe_position) | 2145 | #define PATH_OFFSET_POSITION(path, n_offset) (PATH_OFFSET_PELEMENT(path, n_offset)->pe_position) |
1896 | 2146 | ||
1897 | #define PATH_PLAST_BUFFER(path) (PATH_OFFSET_PBUFFER((path), (path)->path_length)) | 2147 | #define PATH_PLAST_BUFFER(path) (PATH_OFFSET_PBUFFER((path), (path)->path_length)) |
1898 | /* you know, to the person who didn't | 2148 | |
1899 | write this the macro name does not | 2149 | /* |
1900 | at first suggest what it does. | 2150 | * you know, to the person who didn't write this the macro name does not |
1901 | Maybe POSITION_FROM_PATH_END? Or | 2151 | * at first suggest what it does. Maybe POSITION_FROM_PATH_END? Or |
1902 | maybe we should just focus on | 2152 | * maybe we should just focus on dumping paths... -Hans |
1903 | dumping paths... -Hans */ | 2153 | */ |
1904 | #define PATH_LAST_POSITION(path) (PATH_OFFSET_POSITION((path), (path)->path_length)) | 2154 | #define PATH_LAST_POSITION(path) (PATH_OFFSET_POSITION((path), (path)->path_length)) |
1905 | 2155 | ||
1906 | /* in do_balance leaf has h == 0 in contrast with path structure, | 2156 | /* |
1907 | where root has level == 0. That is why we need these defines */ | 2157 | * in do_balance leaf has h == 0 in contrast with path structure, |
1908 | #define PATH_H_PBUFFER(path, h) PATH_OFFSET_PBUFFER (path, path->path_length - (h)) /* tb->S[h] */ | 2158 | * where root has level == 0. That is why we need these defines |
1909 | #define PATH_H_PPARENT(path, h) PATH_H_PBUFFER (path, (h) + 1) /* tb->F[h] or tb->S[0]->b_parent */ | 2159 | */ |
1910 | #define PATH_H_POSITION(path, h) PATH_OFFSET_POSITION (path, path->path_length - (h)) | 2160 | |
1911 | #define PATH_H_B_ITEM_ORDER(path, h) PATH_H_POSITION(path, h + 1) /* tb->S[h]->b_item_order */ | 2161 | /* tb->S[h] */ |
2162 | #define PATH_H_PBUFFER(path, h) \ | ||
2163 | PATH_OFFSET_PBUFFER(path, path->path_length - (h)) | ||
2164 | |||
2165 | /* tb->F[h] or tb->S[0]->b_parent */ | ||
2166 | #define PATH_H_PPARENT(path, h) PATH_H_PBUFFER(path, (h) + 1) | ||
2167 | |||
2168 | #define PATH_H_POSITION(path, h) \ | ||
2169 | PATH_OFFSET_POSITION(path, path->path_length - (h)) | ||
2170 | |||
2171 | /* tb->S[h]->b_item_order */ | ||
2172 | #define PATH_H_B_ITEM_ORDER(path, h) PATH_H_POSITION(path, h + 1) | ||
1912 | 2173 | ||
1913 | #define PATH_H_PATH_OFFSET(path, n_h) ((path)->path_length - (n_h)) | 2174 | #define PATH_H_PATH_OFFSET(path, n_h) ((path)->path_length - (n_h)) |
1914 | 2175 | ||
@@ -1973,16 +2234,14 @@ static inline void *tp_item_body(const struct treepath *path) | |||
1973 | /* get item body */ | 2234 | /* get item body */ |
1974 | #define B_I_DEH(bh, ih) ((struct reiserfs_de_head *)(ih_item_body(bh, ih))) | 2235 | #define B_I_DEH(bh, ih) ((struct reiserfs_de_head *)(ih_item_body(bh, ih))) |
1975 | 2236 | ||
1976 | /* length of the directory entry in directory item. This define | ||
1977 | calculates length of i-th directory entry using directory entry | ||
1978 | locations from dir entry head. When it calculates length of 0-th | ||
1979 | directory entry, it uses length of whole item in place of entry | ||
1980 | location of the non-existent following entry in the calculation. | ||
1981 | See picture above.*/ | ||
1982 | /* | 2237 | /* |
1983 | #define I_DEH_N_ENTRY_LENGTH(ih,deh,i) \ | 2238 | * length of the directory entry in directory item. This define |
1984 | ((i) ? (deh_location((deh)-1) - deh_location((deh))) : (ih_item_len((ih)) - deh_location((deh)))) | 2239 | * calculates length of i-th directory entry using directory entry |
1985 | */ | 2240 | * locations from dir entry head. When it calculates length of 0-th |
2241 | * directory entry, it uses length of whole item in place of entry | ||
2242 | * location of the non-existent following entry in the calculation. | ||
2243 | * See picture above. | ||
2244 | */ | ||
1986 | static inline int entry_length(const struct buffer_head *bh, | 2245 | static inline int entry_length(const struct buffer_head *bh, |
1987 | const struct item_head *ih, int pos_in_item) | 2246 | const struct item_head *ih, int pos_in_item) |
1988 | { | 2247 | { |
@@ -1995,15 +2254,15 @@ static inline int entry_length(const struct buffer_head *bh, | |||
1995 | return ih_item_len(ih) - deh_location(deh); | 2254 | return ih_item_len(ih) - deh_location(deh); |
1996 | } | 2255 | } |
1997 | 2256 | ||
1998 | /***************************************************************************/ | 2257 | /*************************************************************************** |
1999 | /* MISC */ | 2258 | * MISC * |
2000 | /***************************************************************************/ | 2259 | ***************************************************************************/ |
2001 | 2260 | ||
2002 | /* Size of pointer to the unformatted node. */ | 2261 | /* Size of pointer to the unformatted node. */ |
2003 | #define UNFM_P_SIZE (sizeof(unp_t)) | 2262 | #define UNFM_P_SIZE (sizeof(unp_t)) |
2004 | #define UNFM_P_SHIFT 2 | 2263 | #define UNFM_P_SHIFT 2 |
2005 | 2264 | ||
2006 | // in in-core inode key is stored on le form | 2265 | /* in in-core inode key is stored on le form */ |
2007 | #define INODE_PKEY(inode) ((struct reiserfs_key *)(REISERFS_I(inode)->i_key)) | 2266 | #define INODE_PKEY(inode) ((struct reiserfs_key *)(REISERFS_I(inode)->i_key)) |
2008 | 2267 | ||
2009 | #define MAX_UL_INT 0xffffffff | 2268 | #define MAX_UL_INT 0xffffffff |
@@ -2019,7 +2278,6 @@ static inline loff_t max_reiserfs_offset(struct inode *inode) | |||
2019 | return (loff_t) ((~(__u64) 0) >> 4); | 2278 | return (loff_t) ((~(__u64) 0) >> 4); |
2020 | } | 2279 | } |
2021 | 2280 | ||
2022 | /*#define MAX_KEY_UNIQUENESS MAX_UL_INT*/ | ||
2023 | #define MAX_KEY_OBJECTID MAX_UL_INT | 2281 | #define MAX_KEY_OBJECTID MAX_UL_INT |
2024 | 2282 | ||
2025 | #define MAX_B_NUM MAX_UL_INT | 2283 | #define MAX_B_NUM MAX_UL_INT |
@@ -2028,9 +2286,12 @@ static inline loff_t max_reiserfs_offset(struct inode *inode) | |||
2028 | /* the purpose is to detect overflow of an unsigned short */ | 2286 | /* the purpose is to detect overflow of an unsigned short */ |
2029 | #define REISERFS_LINK_MAX (MAX_US_INT - 1000) | 2287 | #define REISERFS_LINK_MAX (MAX_US_INT - 1000) |
2030 | 2288 | ||
2031 | /* The following defines are used in reiserfs_insert_item and reiserfs_append_item */ | 2289 | /* |
2032 | #define REISERFS_KERNEL_MEM 0 /* reiserfs kernel memory mode */ | 2290 | * The following defines are used in reiserfs_insert_item |
2033 | #define REISERFS_USER_MEM 1 /* reiserfs user memory mode */ | 2291 | * and reiserfs_append_item |
2292 | */ | ||
2293 | #define REISERFS_KERNEL_MEM 0 /* kernel memory mode */ | ||
2294 | #define REISERFS_USER_MEM 1 /* user memory mode */ | ||
2034 | 2295 | ||
2035 | #define fs_generation(s) (REISERFS_SB(s)->s_generation_counter) | 2296 | #define fs_generation(s) (REISERFS_SB(s)->s_generation_counter) |
2036 | #define get_generation(s) atomic_read (&fs_generation(s)) | 2297 | #define get_generation(s) atomic_read (&fs_generation(s)) |
@@ -2042,46 +2303,65 @@ static inline loff_t max_reiserfs_offset(struct inode *inode) | |||
2042 | __fs_changed(gen, s); \ | 2303 | __fs_changed(gen, s); \ |
2043 | }) | 2304 | }) |
2044 | 2305 | ||
2045 | /***************************************************************************/ | 2306 | /*************************************************************************** |
2046 | /* FIXATE NODES */ | 2307 | * FIXATE NODES * |
2047 | /***************************************************************************/ | 2308 | ***************************************************************************/ |
2048 | 2309 | ||
2049 | #define VI_TYPE_LEFT_MERGEABLE 1 | 2310 | #define VI_TYPE_LEFT_MERGEABLE 1 |
2050 | #define VI_TYPE_RIGHT_MERGEABLE 2 | 2311 | #define VI_TYPE_RIGHT_MERGEABLE 2 |
2051 | 2312 | ||
2052 | /* To make any changes in the tree we always first find node, that | 2313 | /* |
2053 | contains item to be changed/deleted or place to insert a new | 2314 | * To make any changes in the tree we always first find node, that |
2054 | item. We call this node S. To do balancing we need to decide what | 2315 | * contains item to be changed/deleted or place to insert a new |
2055 | we will shift to left/right neighbor, or to a new node, where new | 2316 | * item. We call this node S. To do balancing we need to decide what |
2056 | item will be etc. To make this analysis simpler we build virtual | 2317 | * we will shift to left/right neighbor, or to a new node, where new |
2057 | node. Virtual node is an array of items, that will replace items of | 2318 | * item will be etc. To make this analysis simpler we build virtual |
2058 | node S. (For instance if we are going to delete an item, virtual | 2319 | * node. Virtual node is an array of items, that will replace items of |
2059 | node does not contain it). Virtual node keeps information about | 2320 | * node S. (For instance if we are going to delete an item, virtual |
2060 | item sizes and types, mergeability of first and last items, sizes | 2321 | * node does not contain it). Virtual node keeps information about |
2061 | of all entries in directory item. We use this array of items when | 2322 | * item sizes and types, mergeability of first and last items, sizes |
2062 | calculating what we can shift to neighbors and how many nodes we | 2323 | * of all entries in directory item. We use this array of items when |
2063 | have to have if we do not any shiftings, if we shift to left/right | 2324 | * calculating what we can shift to neighbors and how many nodes we |
2064 | neighbor or to both. */ | 2325 | * have to have if we do not any shiftings, if we shift to left/right |
2326 | * neighbor or to both. | ||
2327 | */ | ||
2065 | struct virtual_item { | 2328 | struct virtual_item { |
2066 | int vi_index; // index in the array of item operations | 2329 | int vi_index; /* index in the array of item operations */ |
2067 | unsigned short vi_type; // left/right mergeability | 2330 | unsigned short vi_type; /* left/right mergeability */ |
2068 | unsigned short vi_item_len; /* length of item that it will have after balancing */ | 2331 | |
2332 | /* length of item that it will have after balancing */ | ||
2333 | unsigned short vi_item_len; | ||
2334 | |||
2069 | struct item_head *vi_ih; | 2335 | struct item_head *vi_ih; |
2070 | const char *vi_item; // body of item (old or new) | 2336 | const char *vi_item; /* body of item (old or new) */ |
2071 | const void *vi_new_data; // 0 always but paste mode | 2337 | const void *vi_new_data; /* 0 always but paste mode */ |
2072 | void *vi_uarea; // item specific area | 2338 | void *vi_uarea; /* item specific area */ |
2073 | }; | 2339 | }; |
2074 | 2340 | ||
2075 | struct virtual_node { | 2341 | struct virtual_node { |
2076 | char *vn_free_ptr; /* this is a pointer to the free space in the buffer */ | 2342 | /* this is a pointer to the free space in the buffer */ |
2343 | char *vn_free_ptr; | ||
2344 | |||
2077 | unsigned short vn_nr_item; /* number of items in virtual node */ | 2345 | unsigned short vn_nr_item; /* number of items in virtual node */ |
2078 | short vn_size; /* size of node , that node would have if it has unlimited size and no balancing is performed */ | 2346 | |
2079 | short vn_mode; /* mode of balancing (paste, insert, delete, cut) */ | 2347 | /* |
2348 | * size of node , that node would have if it has | ||
2349 | * unlimited size and no balancing is performed | ||
2350 | */ | ||
2351 | short vn_size; | ||
2352 | |||
2353 | /* mode of balancing (paste, insert, delete, cut) */ | ||
2354 | short vn_mode; | ||
2355 | |||
2080 | short vn_affected_item_num; | 2356 | short vn_affected_item_num; |
2081 | short vn_pos_in_item; | 2357 | short vn_pos_in_item; |
2082 | struct item_head *vn_ins_ih; /* item header of inserted item, 0 for other modes */ | 2358 | |
2359 | /* item header of inserted item, 0 for other modes */ | ||
2360 | struct item_head *vn_ins_ih; | ||
2083 | const void *vn_data; | 2361 | const void *vn_data; |
2084 | struct virtual_item *vn_vi; /* array of items (including a new one, excluding item to be deleted) */ | 2362 | |
2363 | /* array of items (including a new one, excluding item to be deleted) */ | ||
2364 | struct virtual_item *vn_vi; | ||
2085 | }; | 2365 | }; |
2086 | 2366 | ||
2087 | /* used by directory items when creating virtual nodes */ | 2367 | /* used by directory items when creating virtual nodes */ |
@@ -2091,22 +2371,25 @@ struct direntry_uarea { | |||
2091 | __u16 entry_sizes[1]; | 2371 | __u16 entry_sizes[1]; |
2092 | } __attribute__ ((__packed__)); | 2372 | } __attribute__ ((__packed__)); |
2093 | 2373 | ||
2094 | /***************************************************************************/ | 2374 | /*************************************************************************** |
2095 | /* TREE BALANCE */ | 2375 | * TREE BALANCE * |
2096 | /***************************************************************************/ | 2376 | ***************************************************************************/ |
2097 | 2377 | ||
2098 | /* This temporary structure is used in tree balance algorithms, and | 2378 | /* |
2099 | constructed as we go to the extent that its various parts are | 2379 | * This temporary structure is used in tree balance algorithms, and |
2100 | needed. It contains arrays of nodes that can potentially be | 2380 | * constructed as we go to the extent that its various parts are |
2101 | involved in the balancing of node S, and parameters that define how | 2381 | * needed. It contains arrays of nodes that can potentially be |
2102 | each of the nodes must be balanced. Note that in these algorithms | 2382 | * involved in the balancing of node S, and parameters that define how |
2103 | for balancing the worst case is to need to balance the current node | 2383 | * each of the nodes must be balanced. Note that in these algorithms |
2104 | S and the left and right neighbors and all of their parents plus | 2384 | * for balancing the worst case is to need to balance the current node |
2105 | create a new node. We implement S1 balancing for the leaf nodes | 2385 | * S and the left and right neighbors and all of their parents plus |
2106 | and S0 balancing for the internal nodes (S1 and S0 are defined in | 2386 | * create a new node. We implement S1 balancing for the leaf nodes |
2107 | our papers.)*/ | 2387 | * and S0 balancing for the internal nodes (S1 and S0 are defined in |
2388 | * our papers.) | ||
2389 | */ | ||
2108 | 2390 | ||
2109 | #define MAX_FREE_BLOCK 7 /* size of the array of buffers to free at end of do_balance */ | 2391 | /* size of the array of buffers to free at end of do_balance */ |
2392 | #define MAX_FREE_BLOCK 7 | ||
2110 | 2393 | ||
2111 | /* maximum number of FEB blocknrs on a single level */ | 2394 | /* maximum number of FEB blocknrs on a single level */ |
2112 | #define MAX_AMOUNT_NEEDED 2 | 2395 | #define MAX_AMOUNT_NEEDED 2 |
@@ -2118,64 +2401,132 @@ struct tree_balance { | |||
2118 | struct super_block *tb_sb; | 2401 | struct super_block *tb_sb; |
2119 | struct reiserfs_transaction_handle *transaction_handle; | 2402 | struct reiserfs_transaction_handle *transaction_handle; |
2120 | struct treepath *tb_path; | 2403 | struct treepath *tb_path; |
2121 | struct buffer_head *L[MAX_HEIGHT]; /* array of left neighbors of nodes in the path */ | 2404 | |
2122 | struct buffer_head *R[MAX_HEIGHT]; /* array of right neighbors of nodes in the path */ | 2405 | /* array of left neighbors of nodes in the path */ |
2123 | struct buffer_head *FL[MAX_HEIGHT]; /* array of fathers of the left neighbors */ | 2406 | struct buffer_head *L[MAX_HEIGHT]; |
2124 | struct buffer_head *FR[MAX_HEIGHT]; /* array of fathers of the right neighbors */ | 2407 | |
2125 | struct buffer_head *CFL[MAX_HEIGHT]; /* array of common parents of center node and its left neighbor */ | 2408 | /* array of right neighbors of nodes in the path */ |
2126 | struct buffer_head *CFR[MAX_HEIGHT]; /* array of common parents of center node and its right neighbor */ | 2409 | struct buffer_head *R[MAX_HEIGHT]; |
2127 | 2410 | ||
2128 | struct buffer_head *FEB[MAX_FEB_SIZE]; /* array of empty buffers. Number of buffers in array equals | 2411 | /* array of fathers of the left neighbors */ |
2129 | cur_blknum. */ | 2412 | struct buffer_head *FL[MAX_HEIGHT]; |
2413 | |||
2414 | /* array of fathers of the right neighbors */ | ||
2415 | struct buffer_head *FR[MAX_HEIGHT]; | ||
2416 | /* array of common parents of center node and its left neighbor */ | ||
2417 | struct buffer_head *CFL[MAX_HEIGHT]; | ||
2418 | |||
2419 | /* array of common parents of center node and its right neighbor */ | ||
2420 | struct buffer_head *CFR[MAX_HEIGHT]; | ||
2421 | |||
2422 | /* | ||
2423 | * array of empty buffers. Number of buffers in array equals | ||
2424 | * cur_blknum. | ||
2425 | */ | ||
2426 | struct buffer_head *FEB[MAX_FEB_SIZE]; | ||
2130 | struct buffer_head *used[MAX_FEB_SIZE]; | 2427 | struct buffer_head *used[MAX_FEB_SIZE]; |
2131 | struct buffer_head *thrown[MAX_FEB_SIZE]; | 2428 | struct buffer_head *thrown[MAX_FEB_SIZE]; |
2132 | int lnum[MAX_HEIGHT]; /* array of number of items which must be | 2429 | |
2133 | shifted to the left in order to balance the | 2430 | /* |
2134 | current node; for leaves includes item that | 2431 | * array of number of items which must be shifted to the left in |
2135 | will be partially shifted; for internal | 2432 | * order to balance the current node; for leaves includes item that |
2136 | nodes, it is the number of child pointers | 2433 | * will be partially shifted; for internal nodes, it is the number |
2137 | rather than items. It includes the new item | 2434 | * of child pointers rather than items. It includes the new item |
2138 | being created. The code sometimes subtracts | 2435 | * being created. The code sometimes subtracts one to get the |
2139 | one to get the number of wholly shifted | 2436 | * number of wholly shifted items for other purposes. |
2140 | items for other purposes. */ | 2437 | */ |
2141 | int rnum[MAX_HEIGHT]; /* substitute right for left in comment above */ | 2438 | int lnum[MAX_HEIGHT]; |
2142 | int lkey[MAX_HEIGHT]; /* array indexed by height h mapping the key delimiting L[h] and | 2439 | |
2143 | S[h] to its item number within the node CFL[h] */ | 2440 | /* substitute right for left in comment above */ |
2144 | int rkey[MAX_HEIGHT]; /* substitute r for l in comment above */ | 2441 | int rnum[MAX_HEIGHT]; |
2145 | int insert_size[MAX_HEIGHT]; /* the number of bytes by we are trying to add or remove from | 2442 | |
2146 | S[h]. A negative value means removing. */ | 2443 | /* |
2147 | int blknum[MAX_HEIGHT]; /* number of nodes that will replace node S[h] after | 2444 | * array indexed by height h mapping the key delimiting L[h] and |
2148 | balancing on the level h of the tree. If 0 then S is | 2445 | * S[h] to its item number within the node CFL[h] |
2149 | being deleted, if 1 then S is remaining and no new nodes | 2446 | */ |
2150 | are being created, if 2 or 3 then 1 or 2 new nodes is | 2447 | int lkey[MAX_HEIGHT]; |
2151 | being created */ | 2448 | |
2449 | /* substitute r for l in comment above */ | ||
2450 | int rkey[MAX_HEIGHT]; | ||
2451 | |||
2452 | /* | ||
2453 | * the number of bytes by we are trying to add or remove from | ||
2454 | * S[h]. A negative value means removing. | ||
2455 | */ | ||
2456 | int insert_size[MAX_HEIGHT]; | ||
2457 | |||
2458 | /* | ||
2459 | * number of nodes that will replace node S[h] after balancing | ||
2460 | * on the level h of the tree. If 0 then S is being deleted, | ||
2461 | * if 1 then S is remaining and no new nodes are being created, | ||
2462 | * if 2 or 3 then 1 or 2 new nodes is being created | ||
2463 | */ | ||
2464 | int blknum[MAX_HEIGHT]; | ||
2152 | 2465 | ||
2153 | /* fields that are used only for balancing leaves of the tree */ | 2466 | /* fields that are used only for balancing leaves of the tree */ |
2154 | int cur_blknum; /* number of empty blocks having been already allocated */ | 2467 | |
2155 | int s0num; /* number of items that fall into left most node when S[0] splits */ | 2468 | /* number of empty blocks having been already allocated */ |
2156 | int s1num; /* number of items that fall into first new node when S[0] splits */ | 2469 | int cur_blknum; |
2157 | int s2num; /* number of items that fall into second new node when S[0] splits */ | 2470 | |
2158 | int lbytes; /* number of bytes which can flow to the left neighbor from the left */ | 2471 | /* number of items that fall into left most node when S[0] splits */ |
2159 | /* most liquid item that cannot be shifted from S[0] entirely */ | 2472 | int s0num; |
2160 | /* if -1 then nothing will be partially shifted */ | 2473 | |
2161 | int rbytes; /* number of bytes which will flow to the right neighbor from the right */ | 2474 | /* number of items that fall into first new node when S[0] splits */ |
2162 | /* most liquid item that cannot be shifted from S[0] entirely */ | 2475 | int s1num; |
2163 | /* if -1 then nothing will be partially shifted */ | 2476 | |
2164 | int s1bytes; /* number of bytes which flow to the first new node when S[0] splits */ | 2477 | /* number of items that fall into second new node when S[0] splits */ |
2165 | /* note: if S[0] splits into 3 nodes, then items do not need to be cut */ | 2478 | int s2num; |
2479 | |||
2480 | /* | ||
2481 | * number of bytes which can flow to the left neighbor from the left | ||
2482 | * most liquid item that cannot be shifted from S[0] entirely | ||
2483 | * if -1 then nothing will be partially shifted | ||
2484 | */ | ||
2485 | int lbytes; | ||
2486 | |||
2487 | /* | ||
2488 | * number of bytes which will flow to the right neighbor from the right | ||
2489 | * most liquid item that cannot be shifted from S[0] entirely | ||
2490 | * if -1 then nothing will be partially shifted | ||
2491 | */ | ||
2492 | int rbytes; | ||
2493 | |||
2494 | /* | ||
2495 | * number of bytes which flow to the first new node when S[0] splits | ||
2496 | * note: if S[0] splits into 3 nodes, then items do not need to be cut | ||
2497 | */ | ||
2498 | int s1bytes; | ||
2166 | int s2bytes; | 2499 | int s2bytes; |
2167 | struct buffer_head *buf_to_free[MAX_FREE_BLOCK]; /* buffers which are to be freed after do_balance finishes by unfix_nodes */ | 2500 | |
2168 | char *vn_buf; /* kmalloced memory. Used to create | 2501 | /* |
2169 | virtual node and keep map of | 2502 | * buffers which are to be freed after do_balance finishes |
2170 | dirtied bitmap blocks */ | 2503 | * by unfix_nodes |
2504 | */ | ||
2505 | struct buffer_head *buf_to_free[MAX_FREE_BLOCK]; | ||
2506 | |||
2507 | /* | ||
2508 | * kmalloced memory. Used to create virtual node and keep | ||
2509 | * map of dirtied bitmap blocks | ||
2510 | */ | ||
2511 | char *vn_buf; | ||
2512 | |||
2171 | int vn_buf_size; /* size of the vn_buf */ | 2513 | int vn_buf_size; /* size of the vn_buf */ |
2172 | struct virtual_node *tb_vn; /* VN starts after bitmap of bitmap blocks */ | ||
2173 | 2514 | ||
2174 | int fs_gen; /* saved value of `reiserfs_generation' counter | 2515 | /* VN starts after bitmap of bitmap blocks */ |
2175 | see FILESYSTEM_CHANGED() macro in reiserfs_fs.h */ | 2516 | struct virtual_node *tb_vn; |
2517 | |||
2518 | /* | ||
2519 | * saved value of `reiserfs_generation' counter see | ||
2520 | * FILESYSTEM_CHANGED() macro in reiserfs_fs.h | ||
2521 | */ | ||
2522 | int fs_gen; | ||
2523 | |||
2176 | #ifdef DISPLACE_NEW_PACKING_LOCALITIES | 2524 | #ifdef DISPLACE_NEW_PACKING_LOCALITIES |
2177 | struct in_core_key key; /* key pointer, to pass to block allocator or | 2525 | /* |
2178 | another low-level subsystem */ | 2526 | * key pointer, to pass to block allocator or |
2527 | * another low-level subsystem | ||
2528 | */ | ||
2529 | struct in_core_key key; | ||
2179 | #endif | 2530 | #endif |
2180 | }; | 2531 | }; |
2181 | 2532 | ||
@@ -2183,20 +2534,24 @@ struct tree_balance { | |||
2183 | 2534 | ||
2184 | /* When inserting an item. */ | 2535 | /* When inserting an item. */ |
2185 | #define M_INSERT 'i' | 2536 | #define M_INSERT 'i' |
2186 | /* When inserting into (directories only) or appending onto an already | 2537 | /* |
2187 | existent item. */ | 2538 | * When inserting into (directories only) or appending onto an already |
2539 | * existent item. | ||
2540 | */ | ||
2188 | #define M_PASTE 'p' | 2541 | #define M_PASTE 'p' |
2189 | /* When deleting an item. */ | 2542 | /* When deleting an item. */ |
2190 | #define M_DELETE 'd' | 2543 | #define M_DELETE 'd' |
2191 | /* When truncating an item or removing an entry from a (directory) item. */ | 2544 | /* When truncating an item or removing an entry from a (directory) item. */ |
2192 | #define M_CUT 'c' | 2545 | #define M_CUT 'c' |
2193 | 2546 | ||
2194 | /* used when balancing on leaf level skipped (in reiserfsck) */ | 2547 | /* used when balancing on leaf level skipped (in reiserfsck) */ |
2195 | #define M_INTERNAL 'n' | 2548 | #define M_INTERNAL 'n' |
2196 | 2549 | ||
2197 | /* When further balancing is not needed, then do_balance does not need | 2550 | /* |
2198 | to be called. */ | 2551 | * When further balancing is not needed, then do_balance does not need |
2199 | #define M_SKIP_BALANCING 's' | 2552 | * to be called. |
2553 | */ | ||
2554 | #define M_SKIP_BALANCING 's' | ||
2200 | #define M_CONVERT 'v' | 2555 | #define M_CONVERT 'v' |
2201 | 2556 | ||
2202 | /* modes of leaf_move_items */ | 2557 | /* modes of leaf_move_items */ |
@@ -2209,8 +2564,10 @@ struct tree_balance { | |||
2209 | #define FIRST_TO_LAST 0 | 2564 | #define FIRST_TO_LAST 0 |
2210 | #define LAST_TO_FIRST 1 | 2565 | #define LAST_TO_FIRST 1 |
2211 | 2566 | ||
2212 | /* used in do_balance for passing parent of node information that has | 2567 | /* |
2213 | been gotten from tb struct */ | 2568 | * used in do_balance for passing parent of node information that has |
2569 | * been gotten from tb struct | ||
2570 | */ | ||
2214 | struct buffer_info { | 2571 | struct buffer_info { |
2215 | struct tree_balance *tb; | 2572 | struct tree_balance *tb; |
2216 | struct buffer_head *bi_bh; | 2573 | struct buffer_head *bi_bh; |
@@ -2228,20 +2585,24 @@ static inline struct super_block *sb_from_bi(struct buffer_info *bi) | |||
2228 | return bi ? sb_from_tb(bi->tb) : NULL; | 2585 | return bi ? sb_from_tb(bi->tb) : NULL; |
2229 | } | 2586 | } |
2230 | 2587 | ||
2231 | /* there are 4 types of items: stat data, directory item, indirect, direct. | 2588 | /* |
2232 | +-------------------+------------+--------------+------------+ | 2589 | * there are 4 types of items: stat data, directory item, indirect, direct. |
2233 | | | k_offset | k_uniqueness | mergeable? | | 2590 | * +-------------------+------------+--------------+------------+ |
2234 | +-------------------+------------+--------------+------------+ | 2591 | * | | k_offset | k_uniqueness | mergeable? | |
2235 | | stat data | 0 | 0 | no | | 2592 | * +-------------------+------------+--------------+------------+ |
2236 | +-------------------+------------+--------------+------------+ | 2593 | * | stat data | 0 | 0 | no | |
2237 | | 1st directory item| DOT_OFFSET |DIRENTRY_UNIQUENESS| no | | 2594 | * +-------------------+------------+--------------+------------+ |
2238 | | non 1st directory | hash value | | yes | | 2595 | * | 1st directory item| DOT_OFFSET | DIRENTRY_ .. | no | |
2239 | | item | | | | | 2596 | * | non 1st directory | hash value | UNIQUENESS | yes | |
2240 | +-------------------+------------+--------------+------------+ | 2597 | * | item | | | | |
2241 | | indirect item | offset + 1 |TYPE_INDIRECT | if this is not the first indirect item of the object | 2598 | * +-------------------+------------+--------------+------------+ |
2242 | +-------------------+------------+--------------+------------+ | 2599 | * | indirect item | offset + 1 |TYPE_INDIRECT | [1] | |
2243 | | direct item | offset + 1 |TYPE_DIRECT | if not this is not the first direct item of the object | 2600 | * +-------------------+------------+--------------+------------+ |
2244 | +-------------------+------------+--------------+------------+ | 2601 | * | direct item | offset + 1 |TYPE_DIRECT | [2] | |
2602 | * +-------------------+------------+--------------+------------+ | ||
2603 | * | ||
2604 | * [1] if this is not the first indirect item of the object | ||
2605 | * [2] if this is not the first direct item of the object | ||
2245 | */ | 2606 | */ |
2246 | 2607 | ||
2247 | struct item_operations { | 2608 | struct item_operations { |
@@ -2280,22 +2641,30 @@ extern struct item_operations *item_ops[TYPE_ANY + 1]; | |||
2280 | /* number of blocks pointed to by the indirect item */ | 2641 | /* number of blocks pointed to by the indirect item */ |
2281 | #define I_UNFM_NUM(ih) (ih_item_len(ih) / UNFM_P_SIZE) | 2642 | #define I_UNFM_NUM(ih) (ih_item_len(ih) / UNFM_P_SIZE) |
2282 | 2643 | ||
2283 | /* the used space within the unformatted node corresponding to pos within the item pointed to by ih */ | 2644 | /* |
2645 | * the used space within the unformatted node corresponding | ||
2646 | * to pos within the item pointed to by ih | ||
2647 | */ | ||
2284 | #define I_POS_UNFM_SIZE(ih,pos,size) (((pos) == I_UNFM_NUM(ih) - 1 ) ? (size) - ih_free_space(ih) : (size)) | 2648 | #define I_POS_UNFM_SIZE(ih,pos,size) (((pos) == I_UNFM_NUM(ih) - 1 ) ? (size) - ih_free_space(ih) : (size)) |
2285 | 2649 | ||
2286 | /* number of bytes contained by the direct item or the unformatted nodes the indirect item points to */ | 2650 | /* |
2651 | * number of bytes contained by the direct item or the | ||
2652 | * unformatted nodes the indirect item points to | ||
2653 | */ | ||
2287 | 2654 | ||
2288 | /* following defines use reiserfs buffer header and item header */ | 2655 | /* following defines use reiserfs buffer header and item header */ |
2289 | 2656 | ||
2290 | /* get stat-data */ | 2657 | /* get stat-data */ |
2291 | #define B_I_STAT_DATA(bh, ih) ( (struct stat_data * )((bh)->b_data + ih_location(ih)) ) | 2658 | #define B_I_STAT_DATA(bh, ih) ( (struct stat_data * )((bh)->b_data + ih_location(ih)) ) |
2292 | 2659 | ||
2293 | // this is 3976 for size==4096 | 2660 | /* this is 3976 for size==4096 */ |
2294 | #define MAX_DIRECT_ITEM_LEN(size) ((size) - BLKH_SIZE - 2*IH_SIZE - SD_SIZE - UNFM_P_SIZE) | 2661 | #define MAX_DIRECT_ITEM_LEN(size) ((size) - BLKH_SIZE - 2*IH_SIZE - SD_SIZE - UNFM_P_SIZE) |
2295 | 2662 | ||
2296 | /* indirect items consist of entries which contain blocknrs, pos | 2663 | /* |
2297 | indicates which entry, and B_I_POS_UNFM_POINTER resolves to the | 2664 | * indirect items consist of entries which contain blocknrs, pos |
2298 | blocknr contained by the entry pos points to */ | 2665 | * indicates which entry, and B_I_POS_UNFM_POINTER resolves to the |
2666 | * blocknr contained by the entry pos points to | ||
2667 | */ | ||
2299 | #define B_I_POS_UNFM_POINTER(bh, ih, pos) \ | 2668 | #define B_I_POS_UNFM_POINTER(bh, ih, pos) \ |
2300 | le32_to_cpu(*(((unp_t *)ih_item_body(bh, ih)) + (pos))) | 2669 | le32_to_cpu(*(((unp_t *)ih_item_body(bh, ih)) + (pos))) |
2301 | #define PUT_B_I_POS_UNFM_POINTER(bh, ih, pos, val) \ | 2670 | #define PUT_B_I_POS_UNFM_POINTER(bh, ih, pos, val) \ |
@@ -2306,9 +2675,9 @@ struct reiserfs_iget_args { | |||
2306 | __u32 dirid; | 2675 | __u32 dirid; |
2307 | }; | 2676 | }; |
2308 | 2677 | ||
2309 | /***************************************************************************/ | 2678 | /*************************************************************************** |
2310 | /* FUNCTION DECLARATIONS */ | 2679 | * FUNCTION DECLARATIONS * |
2311 | /***************************************************************************/ | 2680 | ***************************************************************************/ |
2312 | 2681 | ||
2313 | #define get_journal_desc_magic(bh) (bh->b_data + bh->b_size - 12) | 2682 | #define get_journal_desc_magic(bh) (bh->b_data + bh->b_size - 12) |
2314 | 2683 | ||
@@ -2320,7 +2689,10 @@ struct reiserfs_iget_args { | |||
2320 | /* first block written in a commit. */ | 2689 | /* first block written in a commit. */ |
2321 | struct reiserfs_journal_desc { | 2690 | struct reiserfs_journal_desc { |
2322 | __le32 j_trans_id; /* id of commit */ | 2691 | __le32 j_trans_id; /* id of commit */ |
2323 | __le32 j_len; /* length of commit. len +1 is the commit block */ | 2692 | |
2693 | /* length of commit. len +1 is the commit block */ | ||
2694 | __le32 j_len; | ||
2695 | |||
2324 | __le32 j_mount_id; /* mount id of this trans */ | 2696 | __le32 j_mount_id; /* mount id of this trans */ |
2325 | __le32 j_realblock[1]; /* real locations for each block */ | 2697 | __le32 j_realblock[1]; /* real locations for each block */ |
2326 | }; | 2698 | }; |
@@ -2347,22 +2719,35 @@ struct reiserfs_journal_commit { | |||
2347 | #define set_commit_trans_id(c,val) do { (c)->j_trans_id = cpu_to_le32 (val); } while (0) | 2719 | #define set_commit_trans_id(c,val) do { (c)->j_trans_id = cpu_to_le32 (val); } while (0) |
2348 | #define set_commit_trans_len(c,val) do { (c)->j_len = cpu_to_le32 (val); } while (0) | 2720 | #define set_commit_trans_len(c,val) do { (c)->j_len = cpu_to_le32 (val); } while (0) |
2349 | 2721 | ||
2350 | /* this header block gets written whenever a transaction is considered fully flushed, and is more recent than the | 2722 | /* |
2351 | ** last fully flushed transaction. fully flushed means all the log blocks and all the real blocks are on disk, | 2723 | * this header block gets written whenever a transaction is considered |
2352 | ** and this transaction does not need to be replayed. | 2724 | * fully flushed, and is more recent than the last fully flushed transaction. |
2353 | */ | 2725 | * fully flushed means all the log blocks and all the real blocks are on |
2726 | * disk, and this transaction does not need to be replayed. | ||
2727 | */ | ||
2354 | struct reiserfs_journal_header { | 2728 | struct reiserfs_journal_header { |
2355 | __le32 j_last_flush_trans_id; /* id of last fully flushed transaction */ | 2729 | /* id of last fully flushed transaction */ |
2356 | __le32 j_first_unflushed_offset; /* offset in the log of where to start replay after a crash */ | 2730 | __le32 j_last_flush_trans_id; |
2731 | |||
2732 | /* offset in the log of where to start replay after a crash */ | ||
2733 | __le32 j_first_unflushed_offset; | ||
2734 | |||
2357 | __le32 j_mount_id; | 2735 | __le32 j_mount_id; |
2358 | /* 12 */ struct journal_params jh_journal; | 2736 | /* 12 */ struct journal_params jh_journal; |
2359 | }; | 2737 | }; |
2360 | 2738 | ||
2361 | /* biggest tunable defines are right here */ | 2739 | /* biggest tunable defines are right here */ |
2362 | #define JOURNAL_BLOCK_COUNT 8192 /* number of blocks in the journal */ | 2740 | #define JOURNAL_BLOCK_COUNT 8192 /* number of blocks in the journal */ |
2363 | #define JOURNAL_TRANS_MAX_DEFAULT 1024 /* biggest possible single transaction, don't change for now (8/3/99) */ | 2741 | |
2742 | /* biggest possible single transaction, don't change for now (8/3/99) */ | ||
2743 | #define JOURNAL_TRANS_MAX_DEFAULT 1024 | ||
2364 | #define JOURNAL_TRANS_MIN_DEFAULT 256 | 2744 | #define JOURNAL_TRANS_MIN_DEFAULT 256 |
2365 | #define JOURNAL_MAX_BATCH_DEFAULT 900 /* max blocks to batch into one transaction, don't make this any bigger than 900 */ | 2745 | |
2746 | /* | ||
2747 | * max blocks to batch into one transaction, | ||
2748 | * don't make this any bigger than 900 | ||
2749 | */ | ||
2750 | #define JOURNAL_MAX_BATCH_DEFAULT 900 | ||
2366 | #define JOURNAL_MIN_RATIO 2 | 2751 | #define JOURNAL_MIN_RATIO 2 |
2367 | #define JOURNAL_MAX_COMMIT_AGE 30 | 2752 | #define JOURNAL_MAX_COMMIT_AGE 30 |
2368 | #define JOURNAL_MAX_TRANS_AGE 30 | 2753 | #define JOURNAL_MAX_TRANS_AGE 30 |
@@ -2387,16 +2772,18 @@ struct reiserfs_journal_header { | |||
2387 | #define REISERFS_QUOTA_DEL_BLOCKS(s) 0 | 2772 | #define REISERFS_QUOTA_DEL_BLOCKS(s) 0 |
2388 | #endif | 2773 | #endif |
2389 | 2774 | ||
2390 | /* both of these can be as low as 1, or as high as you want. The min is the | 2775 | /* |
2391 | ** number of 4k bitmap nodes preallocated on mount. New nodes are allocated | 2776 | * both of these can be as low as 1, or as high as you want. The min is the |
2392 | ** as needed, and released when transactions are committed. On release, if | 2777 | * number of 4k bitmap nodes preallocated on mount. New nodes are allocated |
2393 | ** the current number of nodes is > max, the node is freed, otherwise, | 2778 | * as needed, and released when transactions are committed. On release, if |
2394 | ** it is put on a free list for faster use later. | 2779 | * the current number of nodes is > max, the node is freed, otherwise, |
2780 | * it is put on a free list for faster use later. | ||
2395 | */ | 2781 | */ |
2396 | #define REISERFS_MIN_BITMAP_NODES 10 | 2782 | #define REISERFS_MIN_BITMAP_NODES 10 |
2397 | #define REISERFS_MAX_BITMAP_NODES 100 | 2783 | #define REISERFS_MAX_BITMAP_NODES 100 |
2398 | 2784 | ||
2399 | #define JBH_HASH_SHIFT 13 /* these are based on journal hash size of 8192 */ | 2785 | /* these are based on journal hash size of 8192 */ |
2786 | #define JBH_HASH_SHIFT 13 | ||
2400 | #define JBH_HASH_MASK 8191 | 2787 | #define JBH_HASH_MASK 8191 |
2401 | 2788 | ||
2402 | #define _jhashfn(sb,block) \ | 2789 | #define _jhashfn(sb,block) \ |
@@ -2404,7 +2791,7 @@ struct reiserfs_journal_header { | |||
2404 | (((block)<<(JBH_HASH_SHIFT - 6)) ^ ((block) >> 13) ^ ((block) << (JBH_HASH_SHIFT - 12)))) | 2791 | (((block)<<(JBH_HASH_SHIFT - 6)) ^ ((block) >> 13) ^ ((block) << (JBH_HASH_SHIFT - 12)))) |
2405 | #define journal_hash(t,sb,block) ((t)[_jhashfn((sb),(block)) & JBH_HASH_MASK]) | 2792 | #define journal_hash(t,sb,block) ((t)[_jhashfn((sb),(block)) & JBH_HASH_MASK]) |
2406 | 2793 | ||
2407 | // We need these to make journal.c code more readable | 2794 | /* We need these to make journal.c code more readable */ |
2408 | #define journal_find_get_block(s, block) __find_get_block(SB_JOURNAL(s)->j_dev_bd, block, s->s_blocksize) | 2795 | #define journal_find_get_block(s, block) __find_get_block(SB_JOURNAL(s)->j_dev_bd, block, s->s_blocksize) |
2409 | #define journal_getblk(s, block) __getblk(SB_JOURNAL(s)->j_dev_bd, block, s->s_blocksize) | 2796 | #define journal_getblk(s, block) __getblk(SB_JOURNAL(s)->j_dev_bd, block, s->s_blocksize) |
2410 | #define journal_bread(s, block) __bread(SB_JOURNAL(s)->j_dev_bd, block, s->s_blocksize) | 2797 | #define journal_bread(s, block) __bread(SB_JOURNAL(s)->j_dev_bd, block, s->s_blocksize) |
@@ -2412,12 +2799,14 @@ struct reiserfs_journal_header { | |||
2412 | enum reiserfs_bh_state_bits { | 2799 | enum reiserfs_bh_state_bits { |
2413 | BH_JDirty = BH_PrivateStart, /* buffer is in current transaction */ | 2800 | BH_JDirty = BH_PrivateStart, /* buffer is in current transaction */ |
2414 | BH_JDirty_wait, | 2801 | BH_JDirty_wait, |
2415 | BH_JNew, /* disk block was taken off free list before | 2802 | /* |
2416 | * being in a finished transaction, or | 2803 | * disk block was taken off free list before being in a |
2417 | * written to disk. Can be reused immed. */ | 2804 | * finished transaction, or written to disk. Can be reused immed. |
2805 | */ | ||
2806 | BH_JNew, | ||
2418 | BH_JPrepared, | 2807 | BH_JPrepared, |
2419 | BH_JRestore_dirty, | 2808 | BH_JRestore_dirty, |
2420 | BH_JTest, // debugging only will go away | 2809 | BH_JTest, /* debugging only will go away */ |
2421 | }; | 2810 | }; |
2422 | 2811 | ||
2423 | BUFFER_FNS(JDirty, journaled); | 2812 | BUFFER_FNS(JDirty, journaled); |
@@ -2433,27 +2822,36 @@ TAS_BUFFER_FNS(JRestore_dirty, journal_restore_dirty); | |||
2433 | BUFFER_FNS(JTest, journal_test); | 2822 | BUFFER_FNS(JTest, journal_test); |
2434 | TAS_BUFFER_FNS(JTest, journal_test); | 2823 | TAS_BUFFER_FNS(JTest, journal_test); |
2435 | 2824 | ||
2436 | /* | 2825 | /* transaction handle which is passed around for all journal calls */ |
2437 | ** transaction handle which is passed around for all journal calls | ||
2438 | */ | ||
2439 | struct reiserfs_transaction_handle { | 2826 | struct reiserfs_transaction_handle { |
2440 | struct super_block *t_super; /* super for this FS when journal_begin was | 2827 | /* |
2441 | called. saves calls to reiserfs_get_super | 2828 | * super for this FS when journal_begin was called. saves calls to |
2442 | also used by nested transactions to make | 2829 | * reiserfs_get_super also used by nested transactions to make |
2443 | sure they are nesting on the right FS | 2830 | * sure they are nesting on the right FS _must_ be first |
2444 | _must_ be first in the handle | 2831 | * in the handle |
2445 | */ | 2832 | */ |
2833 | struct super_block *t_super; | ||
2834 | |||
2446 | int t_refcount; | 2835 | int t_refcount; |
2447 | int t_blocks_logged; /* number of blocks this writer has logged */ | 2836 | int t_blocks_logged; /* number of blocks this writer has logged */ |
2448 | int t_blocks_allocated; /* number of blocks this writer allocated */ | 2837 | int t_blocks_allocated; /* number of blocks this writer allocated */ |
2449 | unsigned int t_trans_id; /* sanity check, equals the current trans id */ | 2838 | |
2839 | /* sanity check, equals the current trans id */ | ||
2840 | unsigned int t_trans_id; | ||
2841 | |||
2450 | void *t_handle_save; /* save existing current->journal_info */ | 2842 | void *t_handle_save; /* save existing current->journal_info */ |
2451 | unsigned displace_new_blocks:1; /* if new block allocation occurres, that block | 2843 | |
2452 | should be displaced from others */ | 2844 | /* |
2845 | * if new block allocation occurres, that block | ||
2846 | * should be displaced from others | ||
2847 | */ | ||
2848 | unsigned displace_new_blocks:1; | ||
2849 | |||
2453 | struct list_head t_list; | 2850 | struct list_head t_list; |
2454 | }; | 2851 | }; |
2455 | 2852 | ||
2456 | /* used to keep track of ordered and tail writes, attached to the buffer | 2853 | /* |
2854 | * used to keep track of ordered and tail writes, attached to the buffer | ||
2457 | * head through b_journal_head. | 2855 | * head through b_journal_head. |
2458 | */ | 2856 | */ |
2459 | struct reiserfs_jh { | 2857 | struct reiserfs_jh { |
@@ -2550,20 +2948,18 @@ int B_IS_IN_TREE(const struct buffer_head *); | |||
2550 | extern void copy_item_head(struct item_head *to, | 2948 | extern void copy_item_head(struct item_head *to, |
2551 | const struct item_head *from); | 2949 | const struct item_head *from); |
2552 | 2950 | ||
2553 | // first key is in cpu form, second - le | 2951 | /* first key is in cpu form, second - le */ |
2554 | extern int comp_short_keys(const struct reiserfs_key *le_key, | 2952 | extern int comp_short_keys(const struct reiserfs_key *le_key, |
2555 | const struct cpu_key *cpu_key); | 2953 | const struct cpu_key *cpu_key); |
2556 | extern void le_key2cpu_key(struct cpu_key *to, const struct reiserfs_key *from); | 2954 | extern void le_key2cpu_key(struct cpu_key *to, const struct reiserfs_key *from); |
2557 | 2955 | ||
2558 | // both are in le form | 2956 | /* both are in le form */ |
2559 | extern int comp_le_keys(const struct reiserfs_key *, | 2957 | extern int comp_le_keys(const struct reiserfs_key *, |
2560 | const struct reiserfs_key *); | 2958 | const struct reiserfs_key *); |
2561 | extern int comp_short_le_keys(const struct reiserfs_key *, | 2959 | extern int comp_short_le_keys(const struct reiserfs_key *, |
2562 | const struct reiserfs_key *); | 2960 | const struct reiserfs_key *); |
2563 | 2961 | ||
2564 | // | 2962 | /* * get key version from on disk key - kludge */ |
2565 | // get key version from on disk key - kludge | ||
2566 | // | ||
2567 | static inline int le_key_version(const struct reiserfs_key *key) | 2963 | static inline int le_key_version(const struct reiserfs_key *key) |
2568 | { | 2964 | { |
2569 | int type; | 2965 | int type; |
@@ -2640,12 +3036,12 @@ void padd_item(char *item, int total_length, int length); | |||
2640 | 3036 | ||
2641 | /* inode.c */ | 3037 | /* inode.c */ |
2642 | /* args for the create parameter of reiserfs_get_block */ | 3038 | /* args for the create parameter of reiserfs_get_block */ |
2643 | #define GET_BLOCK_NO_CREATE 0 /* don't create new blocks or convert tails */ | 3039 | #define GET_BLOCK_NO_CREATE 0 /* don't create new blocks or convert tails */ |
2644 | #define GET_BLOCK_CREATE 1 /* add anything you need to find block */ | 3040 | #define GET_BLOCK_CREATE 1 /* add anything you need to find block */ |
2645 | #define GET_BLOCK_NO_HOLE 2 /* return -ENOENT for file holes */ | 3041 | #define GET_BLOCK_NO_HOLE 2 /* return -ENOENT for file holes */ |
2646 | #define GET_BLOCK_READ_DIRECT 4 /* read the tail if indirect item not found */ | 3042 | #define GET_BLOCK_READ_DIRECT 4 /* read the tail if indirect item not found */ |
2647 | #define GET_BLOCK_NO_IMUX 8 /* i_mutex is not held, don't preallocate */ | 3043 | #define GET_BLOCK_NO_IMUX 8 /* i_mutex is not held, don't preallocate */ |
2648 | #define GET_BLOCK_NO_DANGLE 16 /* don't leave any transactions running */ | 3044 | #define GET_BLOCK_NO_DANGLE 16 /* don't leave any transactions running */ |
2649 | 3045 | ||
2650 | void reiserfs_read_locked_inode(struct inode *inode, | 3046 | void reiserfs_read_locked_inode(struct inode *inode, |
2651 | struct reiserfs_iget_args *args); | 3047 | struct reiserfs_iget_args *args); |
@@ -2844,25 +3240,49 @@ struct buffer_head *get_FEB(struct tree_balance *); | |||
2844 | 3240 | ||
2845 | /* bitmap.c */ | 3241 | /* bitmap.c */ |
2846 | 3242 | ||
2847 | /* structure contains hints for block allocator, and it is a container for | 3243 | /* |
2848 | * arguments, such as node, search path, transaction_handle, etc. */ | 3244 | * structure contains hints for block allocator, and it is a container for |
3245 | * arguments, such as node, search path, transaction_handle, etc. | ||
3246 | */ | ||
2849 | struct __reiserfs_blocknr_hint { | 3247 | struct __reiserfs_blocknr_hint { |
2850 | struct inode *inode; /* inode passed to allocator, if we allocate unf. nodes */ | 3248 | /* inode passed to allocator, if we allocate unf. nodes */ |
3249 | struct inode *inode; | ||
3250 | |||
2851 | sector_t block; /* file offset, in blocks */ | 3251 | sector_t block; /* file offset, in blocks */ |
2852 | struct in_core_key key; | 3252 | struct in_core_key key; |
2853 | struct treepath *path; /* search path, used by allocator to deternine search_start by | 3253 | |
2854 | * various ways */ | 3254 | /* |
2855 | struct reiserfs_transaction_handle *th; /* transaction handle is needed to log super blocks and | 3255 | * search path, used by allocator to deternine search_start by |
2856 | * bitmap blocks changes */ | 3256 | * various ways |
3257 | */ | ||
3258 | struct treepath *path; | ||
3259 | |||
3260 | /* | ||
3261 | * transaction handle is needed to log super blocks | ||
3262 | * and bitmap blocks changes | ||
3263 | */ | ||
3264 | struct reiserfs_transaction_handle *th; | ||
3265 | |||
2857 | b_blocknr_t beg, end; | 3266 | b_blocknr_t beg, end; |
2858 | b_blocknr_t search_start; /* a field used to transfer search start value (block number) | 3267 | |
2859 | * between different block allocator procedures | 3268 | /* |
2860 | * (determine_search_start() and others) */ | 3269 | * a field used to transfer search start value (block number) |
2861 | int prealloc_size; /* is set in determine_prealloc_size() function, used by underlayed | 3270 | * between different block allocator procedures |
2862 | * function that do actual allocation */ | 3271 | * (determine_search_start() and others) |
2863 | 3272 | */ | |
2864 | unsigned formatted_node:1; /* the allocator uses different polices for getting disk space for | 3273 | b_blocknr_t search_start; |
2865 | * formatted/unformatted blocks with/without preallocation */ | 3274 | |
3275 | /* | ||
3276 | * is set in determine_prealloc_size() function, | ||
3277 | * used by underlayed function that do actual allocation | ||
3278 | */ | ||
3279 | int prealloc_size; | ||
3280 | |||
3281 | /* | ||
3282 | * the allocator uses different polices for getting disk | ||
3283 | * space for formatted/unformatted blocks with/without preallocation | ||
3284 | */ | ||
3285 | unsigned formatted_node:1; | ||
2866 | unsigned preallocate:1; | 3286 | unsigned preallocate:1; |
2867 | }; | 3287 | }; |
2868 | 3288 | ||
@@ -2956,13 +3376,15 @@ __u32 r5_hash(const signed char *msg, int len); | |||
2956 | #define reiserfs_test_le_bit test_bit_le | 3376 | #define reiserfs_test_le_bit test_bit_le |
2957 | #define reiserfs_find_next_zero_le_bit find_next_zero_bit_le | 3377 | #define reiserfs_find_next_zero_le_bit find_next_zero_bit_le |
2958 | 3378 | ||
2959 | /* sometimes reiserfs_truncate may require to allocate few new blocks | 3379 | /* |
2960 | to perform indirect2direct conversion. People probably used to | 3380 | * sometimes reiserfs_truncate may require to allocate few new blocks |
2961 | think, that truncate should work without problems on a filesystem | 3381 | * to perform indirect2direct conversion. People probably used to |
2962 | without free disk space. They may complain that they can not | 3382 | * think, that truncate should work without problems on a filesystem |
2963 | truncate due to lack of free disk space. This spare space allows us | 3383 | * without free disk space. They may complain that they can not |
2964 | to not worry about it. 500 is probably too much, but it should be | 3384 | * truncate due to lack of free disk space. This spare space allows us |
2965 | absolutely safe */ | 3385 | * to not worry about it. 500 is probably too much, but it should be |
3386 | * absolutely safe | ||
3387 | */ | ||
2966 | #define SPARE_SPACE 500 | 3388 | #define SPARE_SPACE 500 |
2967 | 3389 | ||
2968 | /* prototypes from ioctl.c */ | 3390 | /* prototypes from ioctl.c */ |
diff --git a/fs/reiserfs/resize.c b/fs/reiserfs/resize.c index a4ef5cd606eb..037b00c40f1f 100644 --- a/fs/reiserfs/resize.c +++ b/fs/reiserfs/resize.c | |||
@@ -53,8 +53,10 @@ int reiserfs_resize(struct super_block *s, unsigned long block_count_new) | |||
53 | } | 53 | } |
54 | bforget(bh); | 54 | bforget(bh); |
55 | 55 | ||
56 | /* old disk layout detection; those partitions can be mounted, but | 56 | /* |
57 | * cannot be resized */ | 57 | * old disk layout detection; those partitions can be mounted, but |
58 | * cannot be resized | ||
59 | */ | ||
58 | if (SB_BUFFER_WITH_SB(s)->b_blocknr * SB_BUFFER_WITH_SB(s)->b_size | 60 | if (SB_BUFFER_WITH_SB(s)->b_blocknr * SB_BUFFER_WITH_SB(s)->b_size |
59 | != REISERFS_DISK_OFFSET_IN_BYTES) { | 61 | != REISERFS_DISK_OFFSET_IN_BYTES) { |
60 | printk | 62 | printk |
@@ -86,12 +88,14 @@ int reiserfs_resize(struct super_block *s, unsigned long block_count_new) | |||
86 | ("reiserfs_resize: unable to allocate memory for journal bitmaps\n"); | 88 | ("reiserfs_resize: unable to allocate memory for journal bitmaps\n"); |
87 | return -ENOMEM; | 89 | return -ENOMEM; |
88 | } | 90 | } |
89 | /* the new journal bitmaps are zero filled, now we copy in the bitmap | 91 | /* |
90 | ** node pointers from the old journal bitmap structs, and then | 92 | * the new journal bitmaps are zero filled, now we copy i |
91 | ** transfer the new data structures into the journal struct. | 93 | * the bitmap node pointers from the old journal bitmap |
92 | ** | 94 | * structs, and then transfer the new data structures |
93 | ** using the copy_size var below allows this code to work for | 95 | * into the journal struct. |
94 | ** both shrinking and expanding the FS. | 96 | * |
97 | * using the copy_size var below allows this code to work for | ||
98 | * both shrinking and expanding the FS. | ||
95 | */ | 99 | */ |
96 | copy_size = bmap_nr_new < bmap_nr ? bmap_nr_new : bmap_nr; | 100 | copy_size = bmap_nr_new < bmap_nr ? bmap_nr_new : bmap_nr; |
97 | copy_size = | 101 | copy_size = |
@@ -101,36 +105,45 @@ int reiserfs_resize(struct super_block *s, unsigned long block_count_new) | |||
101 | jb = SB_JOURNAL(s)->j_list_bitmap + i; | 105 | jb = SB_JOURNAL(s)->j_list_bitmap + i; |
102 | memcpy(jbitmap[i].bitmaps, jb->bitmaps, copy_size); | 106 | memcpy(jbitmap[i].bitmaps, jb->bitmaps, copy_size); |
103 | 107 | ||
104 | /* just in case vfree schedules on us, copy the new | 108 | /* |
105 | ** pointer into the journal struct before freeing the | 109 | * just in case vfree schedules on us, copy the new |
106 | ** old one | 110 | * pointer into the journal struct before freeing the |
111 | * old one | ||
107 | */ | 112 | */ |
108 | node_tmp = jb->bitmaps; | 113 | node_tmp = jb->bitmaps; |
109 | jb->bitmaps = jbitmap[i].bitmaps; | 114 | jb->bitmaps = jbitmap[i].bitmaps; |
110 | vfree(node_tmp); | 115 | vfree(node_tmp); |
111 | } | 116 | } |
112 | 117 | ||
113 | /* allocate additional bitmap blocks, reallocate array of bitmap | 118 | /* |
114 | * block pointers */ | 119 | * allocate additional bitmap blocks, reallocate |
120 | * array of bitmap block pointers | ||
121 | */ | ||
115 | bitmap = | 122 | bitmap = |
116 | vzalloc(sizeof(struct reiserfs_bitmap_info) * bmap_nr_new); | 123 | vzalloc(sizeof(struct reiserfs_bitmap_info) * bmap_nr_new); |
117 | if (!bitmap) { | 124 | if (!bitmap) { |
118 | /* Journal bitmaps are still supersized, but the memory isn't | 125 | /* |
119 | * leaked, so I guess it's ok */ | 126 | * Journal bitmaps are still supersized, but the |
127 | * memory isn't leaked, so I guess it's ok | ||
128 | */ | ||
120 | printk("reiserfs_resize: unable to allocate memory.\n"); | 129 | printk("reiserfs_resize: unable to allocate memory.\n"); |
121 | return -ENOMEM; | 130 | return -ENOMEM; |
122 | } | 131 | } |
123 | for (i = 0; i < bmap_nr; i++) | 132 | for (i = 0; i < bmap_nr; i++) |
124 | bitmap[i] = old_bitmap[i]; | 133 | bitmap[i] = old_bitmap[i]; |
125 | 134 | ||
126 | /* This doesn't go through the journal, but it doesn't have to. | 135 | /* |
127 | * The changes are still atomic: We're synced up when the journal | 136 | * This doesn't go through the journal, but it doesn't have to. |
128 | * transaction begins, and the new bitmaps don't matter if the | 137 | * The changes are still atomic: We're synced up when the |
129 | * transaction fails. */ | 138 | * journal transaction begins, and the new bitmaps don't |
139 | * matter if the transaction fails. | ||
140 | */ | ||
130 | for (i = bmap_nr; i < bmap_nr_new; i++) { | 141 | for (i = bmap_nr; i < bmap_nr_new; i++) { |
131 | int depth; | 142 | int depth; |
132 | /* don't use read_bitmap_block since it will cache | 143 | /* |
133 | * the uninitialized bitmap */ | 144 | * don't use read_bitmap_block since it will cache |
145 | * the uninitialized bitmap | ||
146 | */ | ||
134 | depth = reiserfs_write_unlock_nested(s); | 147 | depth = reiserfs_write_unlock_nested(s); |
135 | bh = sb_bread(s, i * s->s_blocksize * 8); | 148 | bh = sb_bread(s, i * s->s_blocksize * 8); |
136 | reiserfs_write_lock_nested(s, depth); | 149 | reiserfs_write_lock_nested(s, depth); |
@@ -147,7 +160,7 @@ int reiserfs_resize(struct super_block *s, unsigned long block_count_new) | |||
147 | depth = reiserfs_write_unlock_nested(s); | 160 | depth = reiserfs_write_unlock_nested(s); |
148 | sync_dirty_buffer(bh); | 161 | sync_dirty_buffer(bh); |
149 | reiserfs_write_lock_nested(s, depth); | 162 | reiserfs_write_lock_nested(s, depth); |
150 | // update bitmap_info stuff | 163 | /* update bitmap_info stuff */ |
151 | bitmap[i].free_count = sb_blocksize(sb) * 8 - 1; | 164 | bitmap[i].free_count = sb_blocksize(sb) * 8 - 1; |
152 | brelse(bh); | 165 | brelse(bh); |
153 | } | 166 | } |
@@ -156,9 +169,11 @@ int reiserfs_resize(struct super_block *s, unsigned long block_count_new) | |||
156 | vfree(old_bitmap); | 169 | vfree(old_bitmap); |
157 | } | 170 | } |
158 | 171 | ||
159 | /* begin transaction, if there was an error, it's fine. Yes, we have | 172 | /* |
173 | * begin transaction, if there was an error, it's fine. Yes, we have | ||
160 | * incorrect bitmaps now, but none of it is ever going to touch the | 174 | * incorrect bitmaps now, but none of it is ever going to touch the |
161 | * disk anyway. */ | 175 | * disk anyway. |
176 | */ | ||
162 | err = journal_begin(&th, s, 10); | 177 | err = journal_begin(&th, s, 10); |
163 | if (err) | 178 | if (err) |
164 | return err; | 179 | return err; |
diff --git a/fs/reiserfs/stree.c b/fs/reiserfs/stree.c index 40b3e77c8ff3..aa86757e48f8 100644 --- a/fs/reiserfs/stree.c +++ b/fs/reiserfs/stree.c | |||
@@ -8,46 +8,6 @@ | |||
8 | * Pereslavl-Zalessky Russia | 8 | * Pereslavl-Zalessky Russia |
9 | */ | 9 | */ |
10 | 10 | ||
11 | /* | ||
12 | * This file contains functions dealing with S+tree | ||
13 | * | ||
14 | * B_IS_IN_TREE | ||
15 | * copy_item_head | ||
16 | * comp_short_keys | ||
17 | * comp_keys | ||
18 | * comp_short_le_keys | ||
19 | * le_key2cpu_key | ||
20 | * comp_le_keys | ||
21 | * bin_search | ||
22 | * get_lkey | ||
23 | * get_rkey | ||
24 | * key_in_buffer | ||
25 | * decrement_bcount | ||
26 | * reiserfs_check_path | ||
27 | * pathrelse_and_restore | ||
28 | * pathrelse | ||
29 | * search_by_key_reada | ||
30 | * search_by_key | ||
31 | * search_for_position_by_key | ||
32 | * comp_items | ||
33 | * prepare_for_direct_item | ||
34 | * prepare_for_direntry_item | ||
35 | * prepare_for_delete_or_cut | ||
36 | * calc_deleted_bytes_number | ||
37 | * init_tb_struct | ||
38 | * padd_item | ||
39 | * reiserfs_delete_item | ||
40 | * reiserfs_delete_solid_item | ||
41 | * reiserfs_delete_object | ||
42 | * maybe_indirect_to_direct | ||
43 | * indirect_to_direct_roll_back | ||
44 | * reiserfs_cut_from_item | ||
45 | * truncate_directory | ||
46 | * reiserfs_do_truncate | ||
47 | * reiserfs_paste_into_item | ||
48 | * reiserfs_insert_item | ||
49 | */ | ||
50 | |||
51 | #include <linux/time.h> | 11 | #include <linux/time.h> |
52 | #include <linux/string.h> | 12 | #include <linux/string.h> |
53 | #include <linux/pagemap.h> | 13 | #include <linux/pagemap.h> |
@@ -65,21 +25,21 @@ inline int B_IS_IN_TREE(const struct buffer_head *bh) | |||
65 | return (B_LEVEL(bh) != FREE_LEVEL); | 25 | return (B_LEVEL(bh) != FREE_LEVEL); |
66 | } | 26 | } |
67 | 27 | ||
68 | // | 28 | /* to get item head in le form */ |
69 | // to gets item head in le form | ||
70 | // | ||
71 | inline void copy_item_head(struct item_head *to, | 29 | inline void copy_item_head(struct item_head *to, |
72 | const struct item_head *from) | 30 | const struct item_head *from) |
73 | { | 31 | { |
74 | memcpy(to, from, IH_SIZE); | 32 | memcpy(to, from, IH_SIZE); |
75 | } | 33 | } |
76 | 34 | ||
77 | /* k1 is pointer to on-disk structure which is stored in little-endian | 35 | /* |
78 | form. k2 is pointer to cpu variable. For key of items of the same | 36 | * k1 is pointer to on-disk structure which is stored in little-endian |
79 | object this returns 0. | 37 | * form. k2 is pointer to cpu variable. For key of items of the same |
80 | Returns: -1 if key1 < key2 | 38 | * object this returns 0. |
81 | 0 if key1 == key2 | 39 | * Returns: -1 if key1 < key2 |
82 | 1 if key1 > key2 */ | 40 | * 0 if key1 == key2 |
41 | * 1 if key1 > key2 | ||
42 | */ | ||
83 | inline int comp_short_keys(const struct reiserfs_key *le_key, | 43 | inline int comp_short_keys(const struct reiserfs_key *le_key, |
84 | const struct cpu_key *cpu_key) | 44 | const struct cpu_key *cpu_key) |
85 | { | 45 | { |
@@ -97,11 +57,13 @@ inline int comp_short_keys(const struct reiserfs_key *le_key, | |||
97 | return 0; | 57 | return 0; |
98 | } | 58 | } |
99 | 59 | ||
100 | /* k1 is pointer to on-disk structure which is stored in little-endian | 60 | /* |
101 | form. k2 is pointer to cpu variable. | 61 | * k1 is pointer to on-disk structure which is stored in little-endian |
102 | Compare keys using all 4 key fields. | 62 | * form. k2 is pointer to cpu variable. |
103 | Returns: -1 if key1 < key2 0 | 63 | * Compare keys using all 4 key fields. |
104 | if key1 = key2 1 if key1 > key2 */ | 64 | * Returns: -1 if key1 < key2 0 |
65 | * if key1 = key2 1 if key1 > key2 | ||
66 | */ | ||
105 | static inline int comp_keys(const struct reiserfs_key *le_key, | 67 | static inline int comp_keys(const struct reiserfs_key *le_key, |
106 | const struct cpu_key *cpu_key) | 68 | const struct cpu_key *cpu_key) |
107 | { | 69 | { |
@@ -155,15 +117,17 @@ inline void le_key2cpu_key(struct cpu_key *to, const struct reiserfs_key *from) | |||
155 | to->on_disk_key.k_dir_id = le32_to_cpu(from->k_dir_id); | 117 | to->on_disk_key.k_dir_id = le32_to_cpu(from->k_dir_id); |
156 | to->on_disk_key.k_objectid = le32_to_cpu(from->k_objectid); | 118 | to->on_disk_key.k_objectid = le32_to_cpu(from->k_objectid); |
157 | 119 | ||
158 | // find out version of the key | 120 | /* find out version of the key */ |
159 | version = le_key_version(from); | 121 | version = le_key_version(from); |
160 | to->version = version; | 122 | to->version = version; |
161 | to->on_disk_key.k_offset = le_key_k_offset(version, from); | 123 | to->on_disk_key.k_offset = le_key_k_offset(version, from); |
162 | to->on_disk_key.k_type = le_key_k_type(version, from); | 124 | to->on_disk_key.k_type = le_key_k_type(version, from); |
163 | } | 125 | } |
164 | 126 | ||
165 | // this does not say which one is bigger, it only returns 1 if keys | 127 | /* |
166 | // are not equal, 0 otherwise | 128 | * this does not say which one is bigger, it only returns 1 if keys |
129 | * are not equal, 0 otherwise | ||
130 | */ | ||
167 | inline int comp_le_keys(const struct reiserfs_key *k1, | 131 | inline int comp_le_keys(const struct reiserfs_key *k1, |
168 | const struct reiserfs_key *k2) | 132 | const struct reiserfs_key *k2) |
169 | { | 133 | { |
@@ -177,24 +141,27 @@ inline int comp_le_keys(const struct reiserfs_key *k1, | |||
177 | * *pos = number of the searched element if found, else the * | 141 | * *pos = number of the searched element if found, else the * |
178 | * number of the first element that is larger than key. * | 142 | * number of the first element that is larger than key. * |
179 | **************************************************************************/ | 143 | **************************************************************************/ |
180 | /* For those not familiar with binary search: lbound is the leftmost item that it | 144 | /* |
181 | could be, rbound the rightmost item that it could be. We examine the item | 145 | * For those not familiar with binary search: lbound is the leftmost item |
182 | halfway between lbound and rbound, and that tells us either that we can increase | 146 | * that it could be, rbound the rightmost item that it could be. We examine |
183 | lbound, or decrease rbound, or that we have found it, or if lbound <= rbound that | 147 | * the item halfway between lbound and rbound, and that tells us either |
184 | there are no possible items, and we have not found it. With each examination we | 148 | * that we can increase lbound, or decrease rbound, or that we have found it, |
185 | cut the number of possible items it could be by one more than half rounded down, | 149 | * or if lbound <= rbound that there are no possible items, and we have not |
186 | or we find it. */ | 150 | * found it. With each examination we cut the number of possible items it |
151 | * could be by one more than half rounded down, or we find it. | ||
152 | */ | ||
187 | static inline int bin_search(const void *key, /* Key to search for. */ | 153 | static inline int bin_search(const void *key, /* Key to search for. */ |
188 | const void *base, /* First item in the array. */ | 154 | const void *base, /* First item in the array. */ |
189 | int num, /* Number of items in the array. */ | 155 | int num, /* Number of items in the array. */ |
190 | int width, /* Item size in the array. | 156 | /* |
191 | searched. Lest the reader be | 157 | * Item size in the array. searched. Lest the |
192 | confused, note that this is crafted | 158 | * reader be confused, note that this is crafted |
193 | as a general function, and when it | 159 | * as a general function, and when it is applied |
194 | is applied specifically to the array | 160 | * specifically to the array of item headers in a |
195 | of item headers in a node, width | 161 | * node, width is actually the item header size |
196 | is actually the item header size not | 162 | * not the item size. |
197 | the item size. */ | 163 | */ |
164 | int width, | ||
198 | int *pos /* Number of the searched for element. */ | 165 | int *pos /* Number of the searched for element. */ |
199 | ) | 166 | ) |
200 | { | 167 | { |
@@ -216,8 +183,10 @@ static inline int bin_search(const void *key, /* Key to search for. */ | |||
216 | return ITEM_FOUND; /* Key found in the array. */ | 183 | return ITEM_FOUND; /* Key found in the array. */ |
217 | } | 184 | } |
218 | 185 | ||
219 | /* bin_search did not find given key, it returns position of key, | 186 | /* |
220 | that is minimal and greater than the given one. */ | 187 | * bin_search did not find given key, it returns position of key, |
188 | * that is minimal and greater than the given one. | ||
189 | */ | ||
221 | *pos = lbound; | 190 | *pos = lbound; |
222 | return ITEM_NOT_FOUND; | 191 | return ITEM_NOT_FOUND; |
223 | } | 192 | } |
@@ -234,10 +203,14 @@ static const struct reiserfs_key MAX_KEY = { | |||
234 | __constant_cpu_to_le32(0xffffffff)},} | 203 | __constant_cpu_to_le32(0xffffffff)},} |
235 | }; | 204 | }; |
236 | 205 | ||
237 | /* Get delimiting key of the buffer by looking for it in the buffers in the path, starting from the bottom | 206 | /* |
238 | of the path, and going upwards. We must check the path's validity at each step. If the key is not in | 207 | * Get delimiting key of the buffer by looking for it in the buffers in the |
239 | the path, there is no delimiting key in the tree (buffer is first or last buffer in tree), and in this | 208 | * path, starting from the bottom of the path, and going upwards. We must |
240 | case we return a special key, either MIN_KEY or MAX_KEY. */ | 209 | * check the path's validity at each step. If the key is not in the path, |
210 | * there is no delimiting key in the tree (buffer is first or last buffer | ||
211 | * in tree), and in this case we return a special key, either MIN_KEY or | ||
212 | * MAX_KEY. | ||
213 | */ | ||
241 | static inline const struct reiserfs_key *get_lkey(const struct treepath *chk_path, | 214 | static inline const struct reiserfs_key *get_lkey(const struct treepath *chk_path, |
242 | const struct super_block *sb) | 215 | const struct super_block *sb) |
243 | { | 216 | { |
@@ -270,7 +243,10 @@ static inline const struct reiserfs_key *get_lkey(const struct treepath *chk_pat | |||
270 | PATH_OFFSET_PBUFFER(chk_path, | 243 | PATH_OFFSET_PBUFFER(chk_path, |
271 | path_offset + 1)->b_blocknr) | 244 | path_offset + 1)->b_blocknr) |
272 | return &MAX_KEY; | 245 | return &MAX_KEY; |
273 | /* Return delimiting key if position in the parent is not equal to zero. */ | 246 | /* |
247 | * Return delimiting key if position in the parent | ||
248 | * is not equal to zero. | ||
249 | */ | ||
274 | if (position) | 250 | if (position) |
275 | return internal_key(parent, position - 1); | 251 | return internal_key(parent, position - 1); |
276 | } | 252 | } |
@@ -308,15 +284,23 @@ inline const struct reiserfs_key *get_rkey(const struct treepath *chk_path, | |||
308 | path_offset)) > | 284 | path_offset)) > |
309 | B_NR_ITEMS(parent)) | 285 | B_NR_ITEMS(parent)) |
310 | return &MIN_KEY; | 286 | return &MIN_KEY; |
311 | /* Check whether parent at the path really points to the child. */ | 287 | /* |
288 | * Check whether parent at the path really points | ||
289 | * to the child. | ||
290 | */ | ||
312 | if (B_N_CHILD_NUM(parent, position) != | 291 | if (B_N_CHILD_NUM(parent, position) != |
313 | PATH_OFFSET_PBUFFER(chk_path, | 292 | PATH_OFFSET_PBUFFER(chk_path, |
314 | path_offset + 1)->b_blocknr) | 293 | path_offset + 1)->b_blocknr) |
315 | return &MIN_KEY; | 294 | return &MIN_KEY; |
316 | /* Return delimiting key if position in the parent is not the last one. */ | 295 | |
296 | /* | ||
297 | * Return delimiting key if position in the parent | ||
298 | * is not the last one. | ||
299 | */ | ||
317 | if (position != B_NR_ITEMS(parent)) | 300 | if (position != B_NR_ITEMS(parent)) |
318 | return internal_key(parent, position); | 301 | return internal_key(parent, position); |
319 | } | 302 | } |
303 | |||
320 | /* Return MAX_KEY if we are in the root of the buffer tree. */ | 304 | /* Return MAX_KEY if we are in the root of the buffer tree. */ |
321 | if (PATH_OFFSET_PBUFFER(chk_path, FIRST_PATH_ELEMENT_OFFSET)-> | 305 | if (PATH_OFFSET_PBUFFER(chk_path, FIRST_PATH_ELEMENT_OFFSET)-> |
322 | b_blocknr == SB_ROOT_BLOCK(sb)) | 306 | b_blocknr == SB_ROOT_BLOCK(sb)) |
@@ -324,13 +308,20 @@ inline const struct reiserfs_key *get_rkey(const struct treepath *chk_path, | |||
324 | return &MIN_KEY; | 308 | return &MIN_KEY; |
325 | } | 309 | } |
326 | 310 | ||
327 | /* Check whether a key is contained in the tree rooted from a buffer at a path. */ | 311 | /* |
328 | /* This works by looking at the left and right delimiting keys for the buffer in the last path_element in | 312 | * Check whether a key is contained in the tree rooted from a buffer at a path. |
329 | the path. These delimiting keys are stored at least one level above that buffer in the tree. If the | 313 | * This works by looking at the left and right delimiting keys for the buffer |
330 | buffer is the first or last node in the tree order then one of the delimiting keys may be absent, and in | 314 | * in the last path_element in the path. These delimiting keys are stored |
331 | this case get_lkey and get_rkey return a special key which is MIN_KEY or MAX_KEY. */ | 315 | * at least one level above that buffer in the tree. If the buffer is the |
332 | static inline int key_in_buffer(struct treepath *chk_path, /* Path which should be checked. */ | 316 | * first or last node in the tree order then one of the delimiting keys may |
333 | const struct cpu_key *key, /* Key which should be checked. */ | 317 | * be absent, and in this case get_lkey and get_rkey return a special key |
318 | * which is MIN_KEY or MAX_KEY. | ||
319 | */ | ||
320 | static inline int key_in_buffer( | ||
321 | /* Path which should be checked. */ | ||
322 | struct treepath *chk_path, | ||
323 | /* Key which should be checked. */ | ||
324 | const struct cpu_key *key, | ||
334 | struct super_block *sb | 325 | struct super_block *sb |
335 | ) | 326 | ) |
336 | { | 327 | { |
@@ -359,9 +350,11 @@ int reiserfs_check_path(struct treepath *p) | |||
359 | return 0; | 350 | return 0; |
360 | } | 351 | } |
361 | 352 | ||
362 | /* Drop the reference to each buffer in a path and restore | 353 | /* |
354 | * Drop the reference to each buffer in a path and restore | ||
363 | * dirty bits clean when preparing the buffer for the log. | 355 | * dirty bits clean when preparing the buffer for the log. |
364 | * This version should only be called from fix_nodes() */ | 356 | * This version should only be called from fix_nodes() |
357 | */ | ||
365 | void pathrelse_and_restore(struct super_block *sb, | 358 | void pathrelse_and_restore(struct super_block *sb, |
366 | struct treepath *search_path) | 359 | struct treepath *search_path) |
367 | { | 360 | { |
@@ -418,14 +411,17 @@ static int is_leaf(char *buf, int blocksize, struct buffer_head *bh) | |||
418 | } | 411 | } |
419 | ih = (struct item_head *)(buf + BLKH_SIZE) + nr - 1; | 412 | ih = (struct item_head *)(buf + BLKH_SIZE) + nr - 1; |
420 | used_space = BLKH_SIZE + IH_SIZE * nr + (blocksize - ih_location(ih)); | 413 | used_space = BLKH_SIZE + IH_SIZE * nr + (blocksize - ih_location(ih)); |
414 | |||
415 | /* free space does not match to calculated amount of use space */ | ||
421 | if (used_space != blocksize - blkh_free_space(blkh)) { | 416 | if (used_space != blocksize - blkh_free_space(blkh)) { |
422 | /* free space does not match to calculated amount of use space */ | ||
423 | reiserfs_warning(NULL, "reiserfs-5082", | 417 | reiserfs_warning(NULL, "reiserfs-5082", |
424 | "free space seems wrong: %z", bh); | 418 | "free space seems wrong: %z", bh); |
425 | return 0; | 419 | return 0; |
426 | } | 420 | } |
427 | // FIXME: it is_leaf will hit performance too much - we may have | 421 | /* |
428 | // return 1 here | 422 | * FIXME: it is_leaf will hit performance too much - we may have |
423 | * return 1 here | ||
424 | */ | ||
429 | 425 | ||
430 | /* check tables of item heads */ | 426 | /* check tables of item heads */ |
431 | ih = (struct item_head *)(buf + BLKH_SIZE); | 427 | ih = (struct item_head *)(buf + BLKH_SIZE); |
@@ -460,7 +456,7 @@ static int is_leaf(char *buf, int blocksize, struct buffer_head *bh) | |||
460 | prev_location = ih_location(ih); | 456 | prev_location = ih_location(ih); |
461 | } | 457 | } |
462 | 458 | ||
463 | // one may imagine much more checks | 459 | /* one may imagine many more checks */ |
464 | return 1; | 460 | return 1; |
465 | } | 461 | } |
466 | 462 | ||
@@ -481,8 +477,8 @@ static int is_internal(char *buf, int blocksize, struct buffer_head *bh) | |||
481 | } | 477 | } |
482 | 478 | ||
483 | nr = blkh_nr_item(blkh); | 479 | nr = blkh_nr_item(blkh); |
480 | /* for internal which is not root we might check min number of keys */ | ||
484 | if (nr > (blocksize - BLKH_SIZE - DC_SIZE) / (KEY_SIZE + DC_SIZE)) { | 481 | if (nr > (blocksize - BLKH_SIZE - DC_SIZE) / (KEY_SIZE + DC_SIZE)) { |
485 | /* for internal which is not root we might check min number of keys */ | ||
486 | reiserfs_warning(NULL, "reiserfs-5088", | 482 | reiserfs_warning(NULL, "reiserfs-5088", |
487 | "number of key seems wrong: %z", bh); | 483 | "number of key seems wrong: %z", bh); |
488 | return 0; | 484 | return 0; |
@@ -494,12 +490,15 @@ static int is_internal(char *buf, int blocksize, struct buffer_head *bh) | |||
494 | "free space seems wrong: %z", bh); | 490 | "free space seems wrong: %z", bh); |
495 | return 0; | 491 | return 0; |
496 | } | 492 | } |
497 | // one may imagine much more checks | 493 | |
494 | /* one may imagine many more checks */ | ||
498 | return 1; | 495 | return 1; |
499 | } | 496 | } |
500 | 497 | ||
501 | // make sure that bh contains formatted node of reiserfs tree of | 498 | /* |
502 | // 'level'-th level | 499 | * make sure that bh contains formatted node of reiserfs tree of |
500 | * 'level'-th level | ||
501 | */ | ||
503 | static int is_tree_node(struct buffer_head *bh, int level) | 502 | static int is_tree_node(struct buffer_head *bh, int level) |
504 | { | 503 | { |
505 | if (B_LEVEL(bh) != level) { | 504 | if (B_LEVEL(bh) != level) { |
@@ -546,7 +545,8 @@ static int search_by_key_reada(struct super_block *s, | |||
546 | for (j = 0; j < i; j++) { | 545 | for (j = 0; j < i; j++) { |
547 | /* | 546 | /* |
548 | * note, this needs attention if we are getting rid of the BKL | 547 | * note, this needs attention if we are getting rid of the BKL |
549 | * you have to make sure the prepared bit isn't set on this buffer | 548 | * you have to make sure the prepared bit isn't set on this |
549 | * buffer | ||
550 | */ | 550 | */ |
551 | if (!buffer_uptodate(bh[j])) { | 551 | if (!buffer_uptodate(bh[j])) { |
552 | if (depth == -1) | 552 | if (depth == -1) |
@@ -558,39 +558,34 @@ static int search_by_key_reada(struct super_block *s, | |||
558 | return depth; | 558 | return depth; |
559 | } | 559 | } |
560 | 560 | ||
561 | /************************************************************************** | 561 | /* |
562 | * Algorithm SearchByKey * | 562 | * This function fills up the path from the root to the leaf as it |
563 | * look for item in the Disk S+Tree by its key * | 563 | * descends the tree looking for the key. It uses reiserfs_bread to |
564 | * Input: sb - super block * | 564 | * try to find buffers in the cache given their block number. If it |
565 | * key - pointer to the key to search * | 565 | * does not find them in the cache it reads them from disk. For each |
566 | * Output: ITEM_FOUND, ITEM_NOT_FOUND or IO_ERROR * | 566 | * node search_by_key finds using reiserfs_bread it then uses |
567 | * search_path - path from the root to the needed leaf * | 567 | * bin_search to look through that node. bin_search will find the |
568 | **************************************************************************/ | 568 | * position of the block_number of the next node if it is looking |
569 | 569 | * through an internal node. If it is looking through a leaf node | |
570 | /* This function fills up the path from the root to the leaf as it | 570 | * bin_search will find the position of the item which has key either |
571 | descends the tree looking for the key. It uses reiserfs_bread to | 571 | * equal to given key, or which is the maximal key less than the given |
572 | try to find buffers in the cache given their block number. If it | 572 | * key. search_by_key returns a path that must be checked for the |
573 | does not find them in the cache it reads them from disk. For each | 573 | * correctness of the top of the path but need not be checked for the |
574 | node search_by_key finds using reiserfs_bread it then uses | 574 | * correctness of the bottom of the path |
575 | bin_search to look through that node. bin_search will find the | 575 | */ |
576 | position of the block_number of the next node if it is looking | 576 | /* |
577 | through an internal node. If it is looking through a leaf node | 577 | * search_by_key - search for key (and item) in stree |
578 | bin_search will find the position of the item which has key either | 578 | * @sb: superblock |
579 | equal to given key, or which is the maximal key less than the given | 579 | * @key: pointer to key to search for |
580 | key. search_by_key returns a path that must be checked for the | 580 | * @search_path: Allocated and initialized struct treepath; Returned filled |
581 | correctness of the top of the path but need not be checked for the | 581 | * on success. |
582 | correctness of the bottom of the path */ | 582 | * @stop_level: How far down the tree to search, Use DISK_LEAF_NODE_LEVEL to |
583 | /* The function is NOT SCHEDULE-SAFE! */ | 583 | * stop at leaf level. |
584 | int search_by_key(struct super_block *sb, const struct cpu_key *key, /* Key to search. */ | 584 | * |
585 | struct treepath *search_path,/* This structure was | 585 | * The function is NOT SCHEDULE-SAFE! |
586 | allocated and initialized | 586 | */ |
587 | by the calling | 587 | int search_by_key(struct super_block *sb, const struct cpu_key *key, |
588 | function. It is filled up | 588 | struct treepath *search_path, int stop_level) |
589 | by this function. */ | ||
590 | int stop_level /* How far down the tree to search. To | ||
591 | stop at leaf level - set to | ||
592 | DISK_LEAF_NODE_LEVEL */ | ||
593 | ) | ||
594 | { | 589 | { |
595 | b_blocknr_t block_number; | 590 | b_blocknr_t block_number; |
596 | int expected_level; | 591 | int expected_level; |
@@ -609,17 +604,22 @@ int search_by_key(struct super_block *sb, const struct cpu_key *key, /* Key to s | |||
609 | 604 | ||
610 | PROC_INFO_INC(sb, search_by_key); | 605 | PROC_INFO_INC(sb, search_by_key); |
611 | 606 | ||
612 | /* As we add each node to a path we increase its count. This means that | 607 | /* |
613 | we must be careful to release all nodes in a path before we either | 608 | * As we add each node to a path we increase its count. This means |
614 | discard the path struct or re-use the path struct, as we do here. */ | 609 | * that we must be careful to release all nodes in a path before we |
610 | * either discard the path struct or re-use the path struct, as we | ||
611 | * do here. | ||
612 | */ | ||
615 | 613 | ||
616 | pathrelse(search_path); | 614 | pathrelse(search_path); |
617 | 615 | ||
618 | right_neighbor_of_leaf_node = 0; | 616 | right_neighbor_of_leaf_node = 0; |
619 | 617 | ||
620 | /* With each iteration of this loop we search through the items in the | 618 | /* |
621 | current node, and calculate the next current node(next path element) | 619 | * With each iteration of this loop we search through the items in the |
622 | for the next iteration of this loop.. */ | 620 | * current node, and calculate the next current node(next path element) |
621 | * for the next iteration of this loop.. | ||
622 | */ | ||
623 | block_number = SB_ROOT_BLOCK(sb); | 623 | block_number = SB_ROOT_BLOCK(sb); |
624 | expected_level = -1; | 624 | expected_level = -1; |
625 | while (1) { | 625 | while (1) { |
@@ -639,8 +639,10 @@ int search_by_key(struct super_block *sb, const struct cpu_key *key, /* Key to s | |||
639 | ++search_path->path_length); | 639 | ++search_path->path_length); |
640 | fs_gen = get_generation(sb); | 640 | fs_gen = get_generation(sb); |
641 | 641 | ||
642 | /* Read the next tree node, and set the last element in the path to | 642 | /* |
643 | have a pointer to it. */ | 643 | * Read the next tree node, and set the last element |
644 | * in the path to have a pointer to it. | ||
645 | */ | ||
644 | if ((bh = last_element->pe_buffer = | 646 | if ((bh = last_element->pe_buffer = |
645 | sb_getblk(sb, block_number))) { | 647 | sb_getblk(sb, block_number))) { |
646 | 648 | ||
@@ -676,9 +678,12 @@ int search_by_key(struct super_block *sb, const struct cpu_key *key, /* Key to s | |||
676 | expected_level = SB_TREE_HEIGHT(sb); | 678 | expected_level = SB_TREE_HEIGHT(sb); |
677 | expected_level--; | 679 | expected_level--; |
678 | 680 | ||
679 | /* It is possible that schedule occurred. We must check whether the key | 681 | /* |
680 | to search is still in the tree rooted from the current buffer. If | 682 | * It is possible that schedule occurred. We must check |
681 | not then repeat search from the root. */ | 683 | * whether the key to search is still in the tree rooted |
684 | * from the current buffer. If not then repeat search | ||
685 | * from the root. | ||
686 | */ | ||
682 | if (fs_changed(fs_gen, sb) && | 687 | if (fs_changed(fs_gen, sb) && |
683 | (!B_IS_IN_TREE(bh) || | 688 | (!B_IS_IN_TREE(bh) || |
684 | B_LEVEL(bh) != expected_level || | 689 | B_LEVEL(bh) != expected_level || |
@@ -689,8 +694,10 @@ int search_by_key(struct super_block *sb, const struct cpu_key *key, /* Key to s | |||
689 | sbk_restarted[expected_level - 1]); | 694 | sbk_restarted[expected_level - 1]); |
690 | pathrelse(search_path); | 695 | pathrelse(search_path); |
691 | 696 | ||
692 | /* Get the root block number so that we can repeat the search | 697 | /* |
693 | starting from the root. */ | 698 | * Get the root block number so that we can |
699 | * repeat the search starting from the root. | ||
700 | */ | ||
694 | block_number = SB_ROOT_BLOCK(sb); | 701 | block_number = SB_ROOT_BLOCK(sb); |
695 | expected_level = -1; | 702 | expected_level = -1; |
696 | right_neighbor_of_leaf_node = 0; | 703 | right_neighbor_of_leaf_node = 0; |
@@ -699,9 +706,11 @@ int search_by_key(struct super_block *sb, const struct cpu_key *key, /* Key to s | |||
699 | continue; | 706 | continue; |
700 | } | 707 | } |
701 | 708 | ||
702 | /* only check that the key is in the buffer if key is not | 709 | /* |
703 | equal to the MAX_KEY. Latter case is only possible in | 710 | * only check that the key is in the buffer if key is not |
704 | "finish_unfinished()" processing during mount. */ | 711 | * equal to the MAX_KEY. Latter case is only possible in |
712 | * "finish_unfinished()" processing during mount. | ||
713 | */ | ||
705 | RFALSE(comp_keys(&MAX_KEY, key) && | 714 | RFALSE(comp_keys(&MAX_KEY, key) && |
706 | !key_in_buffer(search_path, key, sb), | 715 | !key_in_buffer(search_path, key, sb), |
707 | "PAP-5130: key is not in the buffer"); | 716 | "PAP-5130: key is not in the buffer"); |
@@ -713,8 +722,10 @@ int search_by_key(struct super_block *sb, const struct cpu_key *key, /* Key to s | |||
713 | } | 722 | } |
714 | #endif | 723 | #endif |
715 | 724 | ||
716 | // make sure, that the node contents look like a node of | 725 | /* |
717 | // certain level | 726 | * make sure, that the node contents look like a node of |
727 | * certain level | ||
728 | */ | ||
718 | if (!is_tree_node(bh, expected_level)) { | 729 | if (!is_tree_node(bh, expected_level)) { |
719 | reiserfs_error(sb, "vs-5150", | 730 | reiserfs_error(sb, "vs-5150", |
720 | "invalid format found in block %ld. " | 731 | "invalid format found in block %ld. " |
@@ -743,21 +754,31 @@ int search_by_key(struct super_block *sb, const struct cpu_key *key, /* Key to s | |||
743 | } | 754 | } |
744 | 755 | ||
745 | /* we are not in the stop level */ | 756 | /* we are not in the stop level */ |
757 | /* | ||
758 | * item has been found, so we choose the pointer which | ||
759 | * is to the right of the found one | ||
760 | */ | ||
746 | if (retval == ITEM_FOUND) | 761 | if (retval == ITEM_FOUND) |
747 | /* item has been found, so we choose the pointer which is to the right of the found one */ | ||
748 | last_element->pe_position++; | 762 | last_element->pe_position++; |
749 | 763 | ||
750 | /* if item was not found we choose the position which is to | 764 | /* |
751 | the left of the found item. This requires no code, | 765 | * if item was not found we choose the position which is to |
752 | bin_search did it already. */ | 766 | * the left of the found item. This requires no code, |
767 | * bin_search did it already. | ||
768 | */ | ||
753 | 769 | ||
754 | /* So we have chosen a position in the current node which is | 770 | /* |
755 | an internal node. Now we calculate child block number by | 771 | * So we have chosen a position in the current node which is |
756 | position in the node. */ | 772 | * an internal node. Now we calculate child block number by |
773 | * position in the node. | ||
774 | */ | ||
757 | block_number = | 775 | block_number = |
758 | B_N_CHILD_NUM(bh, last_element->pe_position); | 776 | B_N_CHILD_NUM(bh, last_element->pe_position); |
759 | 777 | ||
760 | /* if we are going to read leaf nodes, try for read ahead as well */ | 778 | /* |
779 | * if we are going to read leaf nodes, try for read | ||
780 | * ahead as well | ||
781 | */ | ||
761 | if ((search_path->reada & PATH_READA) && | 782 | if ((search_path->reada & PATH_READA) && |
762 | node_level == DISK_LEAF_NODE_LEVEL + 1) { | 783 | node_level == DISK_LEAF_NODE_LEVEL + 1) { |
763 | int pos = last_element->pe_position; | 784 | int pos = last_element->pe_position; |
@@ -789,26 +810,28 @@ int search_by_key(struct super_block *sb, const struct cpu_key *key, /* Key to s | |||
789 | } | 810 | } |
790 | } | 811 | } |
791 | 812 | ||
792 | /* Form the path to an item and position in this item which contains | 813 | /* |
793 | file byte defined by key. If there is no such item | 814 | * Form the path to an item and position in this item which contains |
794 | corresponding to the key, we point the path to the item with | 815 | * file byte defined by key. If there is no such item |
795 | maximal key less than key, and *pos_in_item is set to one | 816 | * corresponding to the key, we point the path to the item with |
796 | past the last entry/byte in the item. If searching for entry in a | 817 | * maximal key less than key, and *pos_in_item is set to one |
797 | directory item, and it is not found, *pos_in_item is set to one | 818 | * past the last entry/byte in the item. If searching for entry in a |
798 | entry more than the entry with maximal key which is less than the | 819 | * directory item, and it is not found, *pos_in_item is set to one |
799 | sought key. | 820 | * entry more than the entry with maximal key which is less than the |
800 | 821 | * sought key. | |
801 | Note that if there is no entry in this same node which is one more, | 822 | * |
802 | then we point to an imaginary entry. for direct items, the | 823 | * Note that if there is no entry in this same node which is one more, |
803 | position is in units of bytes, for indirect items the position is | 824 | * then we point to an imaginary entry. for direct items, the |
804 | in units of blocknr entries, for directory items the position is in | 825 | * position is in units of bytes, for indirect items the position is |
805 | units of directory entries. */ | 826 | * in units of blocknr entries, for directory items the position is in |
806 | 827 | * units of directory entries. | |
828 | */ | ||
807 | /* The function is NOT SCHEDULE-SAFE! */ | 829 | /* The function is NOT SCHEDULE-SAFE! */ |
808 | int search_for_position_by_key(struct super_block *sb, /* Pointer to the super block. */ | 830 | int search_for_position_by_key(struct super_block *sb, |
809 | const struct cpu_key *p_cpu_key, /* Key to search (cpu variable) */ | 831 | /* Key to search (cpu variable) */ |
810 | struct treepath *search_path /* Filled up by this function. */ | 832 | const struct cpu_key *p_cpu_key, |
811 | ) | 833 | /* Filled up by this function. */ |
834 | struct treepath *search_path) | ||
812 | { | 835 | { |
813 | struct item_head *p_le_ih; /* pointer to on-disk structure */ | 836 | struct item_head *p_le_ih; /* pointer to on-disk structure */ |
814 | int blk_size; | 837 | int blk_size; |
@@ -851,7 +874,8 @@ int search_for_position_by_key(struct super_block *sb, /* Pointer to the super b | |||
851 | if (comp_short_keys(&(p_le_ih->ih_key), p_cpu_key)) { | 874 | if (comp_short_keys(&(p_le_ih->ih_key), p_cpu_key)) { |
852 | return FILE_NOT_FOUND; | 875 | return FILE_NOT_FOUND; |
853 | } | 876 | } |
854 | // FIXME: quite ugly this far | 877 | |
878 | /* FIXME: quite ugly this far */ | ||
855 | 879 | ||
856 | item_offset = le_ih_k_offset(p_le_ih); | 880 | item_offset = le_ih_k_offset(p_le_ih); |
857 | offset = cpu_key_k_offset(p_cpu_key); | 881 | offset = cpu_key_k_offset(p_cpu_key); |
@@ -866,8 +890,10 @@ int search_for_position_by_key(struct super_block *sb, /* Pointer to the super b | |||
866 | return POSITION_FOUND; | 890 | return POSITION_FOUND; |
867 | } | 891 | } |
868 | 892 | ||
869 | /* Needed byte is not contained in the item pointed to by the | 893 | /* |
870 | path. Set pos_in_item out of the item. */ | 894 | * Needed byte is not contained in the item pointed to by the |
895 | * path. Set pos_in_item out of the item. | ||
896 | */ | ||
871 | if (is_indirect_le_ih(p_le_ih)) | 897 | if (is_indirect_le_ih(p_le_ih)) |
872 | pos_in_item(search_path) = | 898 | pos_in_item(search_path) = |
873 | ih_item_len(p_le_ih) / UNFM_P_SIZE; | 899 | ih_item_len(p_le_ih) / UNFM_P_SIZE; |
@@ -896,15 +922,13 @@ int comp_items(const struct item_head *stored_ih, const struct treepath *path) | |||
896 | return memcmp(stored_ih, ih, IH_SIZE); | 922 | return memcmp(stored_ih, ih, IH_SIZE); |
897 | } | 923 | } |
898 | 924 | ||
899 | /* unformatted nodes are not logged anymore, ever. This is safe | 925 | /* unformatted nodes are not logged anymore, ever. This is safe now */ |
900 | ** now | ||
901 | */ | ||
902 | #define held_by_others(bh) (atomic_read(&(bh)->b_count) > 1) | 926 | #define held_by_others(bh) (atomic_read(&(bh)->b_count) > 1) |
903 | 927 | ||
904 | // block can not be forgotten as it is in I/O or held by someone | 928 | /* block can not be forgotten as it is in I/O or held by someone */ |
905 | #define block_in_use(bh) (buffer_locked(bh) || (held_by_others(bh))) | 929 | #define block_in_use(bh) (buffer_locked(bh) || (held_by_others(bh))) |
906 | 930 | ||
907 | // prepare for delete or cut of direct item | 931 | /* prepare for delete or cut of direct item */ |
908 | static inline int prepare_for_direct_item(struct treepath *path, | 932 | static inline int prepare_for_direct_item(struct treepath *path, |
909 | struct item_head *le_ih, | 933 | struct item_head *le_ih, |
910 | struct inode *inode, | 934 | struct inode *inode, |
@@ -917,9 +941,8 @@ static inline int prepare_for_direct_item(struct treepath *path, | |||
917 | *cut_size = -(IH_SIZE + ih_item_len(le_ih)); | 941 | *cut_size = -(IH_SIZE + ih_item_len(le_ih)); |
918 | return M_DELETE; | 942 | return M_DELETE; |
919 | } | 943 | } |
920 | // new file gets truncated | 944 | /* new file gets truncated */ |
921 | if (get_inode_item_key_version(inode) == KEY_FORMAT_3_6) { | 945 | if (get_inode_item_key_version(inode) == KEY_FORMAT_3_6) { |
922 | // | ||
923 | round_len = ROUND_UP(new_file_length); | 946 | round_len = ROUND_UP(new_file_length); |
924 | /* this was new_file_length < le_ih ... */ | 947 | /* this was new_file_length < le_ih ... */ |
925 | if (round_len < le_ih_k_offset(le_ih)) { | 948 | if (round_len < le_ih_k_offset(le_ih)) { |
@@ -933,12 +956,13 @@ static inline int prepare_for_direct_item(struct treepath *path, | |||
933 | return M_CUT; /* Cut from this item. */ | 956 | return M_CUT; /* Cut from this item. */ |
934 | } | 957 | } |
935 | 958 | ||
936 | // old file: items may have any length | 959 | /* old file: items may have any length */ |
937 | 960 | ||
938 | if (new_file_length < le_ih_k_offset(le_ih)) { | 961 | if (new_file_length < le_ih_k_offset(le_ih)) { |
939 | *cut_size = -(IH_SIZE + ih_item_len(le_ih)); | 962 | *cut_size = -(IH_SIZE + ih_item_len(le_ih)); |
940 | return M_DELETE; /* Delete this item. */ | 963 | return M_DELETE; /* Delete this item. */ |
941 | } | 964 | } |
965 | |||
942 | /* Calculate first position and size for cutting from item. */ | 966 | /* Calculate first position and size for cutting from item. */ |
943 | *cut_size = -(ih_item_len(le_ih) - | 967 | *cut_size = -(ih_item_len(le_ih) - |
944 | (pos_in_item(path) = | 968 | (pos_in_item(path) = |
@@ -957,12 +981,15 @@ static inline int prepare_for_direntry_item(struct treepath *path, | |||
957 | RFALSE(ih_entry_count(le_ih) != 2, | 981 | RFALSE(ih_entry_count(le_ih) != 2, |
958 | "PAP-5220: incorrect empty directory item (%h)", le_ih); | 982 | "PAP-5220: incorrect empty directory item (%h)", le_ih); |
959 | *cut_size = -(IH_SIZE + ih_item_len(le_ih)); | 983 | *cut_size = -(IH_SIZE + ih_item_len(le_ih)); |
960 | return M_DELETE; /* Delete the directory item containing "." and ".." entry. */ | 984 | /* Delete the directory item containing "." and ".." entry. */ |
985 | return M_DELETE; | ||
961 | } | 986 | } |
962 | 987 | ||
963 | if (ih_entry_count(le_ih) == 1) { | 988 | if (ih_entry_count(le_ih) == 1) { |
964 | /* Delete the directory item such as there is one record only | 989 | /* |
965 | in this item */ | 990 | * Delete the directory item such as there is one record only |
991 | * in this item | ||
992 | */ | ||
966 | *cut_size = -(IH_SIZE + ih_item_len(le_ih)); | 993 | *cut_size = -(IH_SIZE + ih_item_len(le_ih)); |
967 | return M_DELETE; | 994 | return M_DELETE; |
968 | } | 995 | } |
@@ -976,14 +1003,30 @@ static inline int prepare_for_direntry_item(struct treepath *path, | |||
976 | 1003 | ||
977 | #define JOURNAL_FOR_FREE_BLOCK_AND_UPDATE_SD (2 * JOURNAL_PER_BALANCE_CNT + 1) | 1004 | #define JOURNAL_FOR_FREE_BLOCK_AND_UPDATE_SD (2 * JOURNAL_PER_BALANCE_CNT + 1) |
978 | 1005 | ||
979 | /* If the path points to a directory or direct item, calculate mode and the size cut, for balance. | 1006 | /* |
980 | If the path points to an indirect item, remove some number of its unformatted nodes. | 1007 | * If the path points to a directory or direct item, calculate mode |
981 | In case of file truncate calculate whether this item must be deleted/truncated or last | 1008 | * and the size cut, for balance. |
982 | unformatted node of this item will be converted to a direct item. | 1009 | * If the path points to an indirect item, remove some number of its |
983 | This function returns a determination of what balance mode the calling function should employ. */ | 1010 | * unformatted nodes. |
984 | static char prepare_for_delete_or_cut(struct reiserfs_transaction_handle *th, struct inode *inode, struct treepath *path, const struct cpu_key *item_key, int *removed, /* Number of unformatted nodes which were removed | 1011 | * In case of file truncate calculate whether this item must be |
985 | from end of the file. */ | 1012 | * deleted/truncated or last unformatted node of this item will be |
986 | int *cut_size, unsigned long long new_file_length /* MAX_KEY_OFFSET in case of delete. */ | 1013 | * converted to a direct item. |
1014 | * This function returns a determination of what balance mode the | ||
1015 | * calling function should employ. | ||
1016 | */ | ||
1017 | static char prepare_for_delete_or_cut(struct reiserfs_transaction_handle *th, | ||
1018 | struct inode *inode, | ||
1019 | struct treepath *path, | ||
1020 | const struct cpu_key *item_key, | ||
1021 | /* | ||
1022 | * Number of unformatted nodes | ||
1023 | * which were removed from end | ||
1024 | * of the file. | ||
1025 | */ | ||
1026 | int *removed, | ||
1027 | int *cut_size, | ||
1028 | /* MAX_KEY_OFFSET in case of delete. */ | ||
1029 | unsigned long long new_file_length | ||
987 | ) | 1030 | ) |
988 | { | 1031 | { |
989 | struct super_block *sb = inode->i_sb; | 1032 | struct super_block *sb = inode->i_sb; |
@@ -1023,8 +1066,10 @@ static char prepare_for_delete_or_cut(struct reiserfs_transaction_handle *th, st | |||
1023 | int pos = 0; | 1066 | int pos = 0; |
1024 | 1067 | ||
1025 | if ( new_file_length == max_reiserfs_offset (inode) ) { | 1068 | if ( new_file_length == max_reiserfs_offset (inode) ) { |
1026 | /* prepare_for_delete_or_cut() is called by | 1069 | /* |
1027 | * reiserfs_delete_item() */ | 1070 | * prepare_for_delete_or_cut() is called by |
1071 | * reiserfs_delete_item() | ||
1072 | */ | ||
1028 | new_file_length = 0; | 1073 | new_file_length = 0; |
1029 | delete = 1; | 1074 | delete = 1; |
1030 | } | 1075 | } |
@@ -1040,9 +1085,12 @@ static char prepare_for_delete_or_cut(struct reiserfs_transaction_handle *th, st | |||
1040 | __le32 *unfm; | 1085 | __le32 *unfm; |
1041 | __u32 block; | 1086 | __u32 block; |
1042 | 1087 | ||
1043 | /* Each unformatted block deletion may involve one additional | 1088 | /* |
1044 | * bitmap block into the transaction, thereby the initial | 1089 | * Each unformatted block deletion may involve |
1045 | * journal space reservation might not be enough. */ | 1090 | * one additional bitmap block into the transaction, |
1091 | * thereby the initial journal space reservation | ||
1092 | * might not be enough. | ||
1093 | */ | ||
1046 | if (!delete && (*cut_size) != 0 && | 1094 | if (!delete && (*cut_size) != 0 && |
1047 | reiserfs_transaction_free_space(th) < JOURNAL_FOR_FREE_BLOCK_AND_UPDATE_SD) | 1095 | reiserfs_transaction_free_space(th) < JOURNAL_FOR_FREE_BLOCK_AND_UPDATE_SD) |
1048 | break; | 1096 | break; |
@@ -1074,17 +1122,21 @@ static char prepare_for_delete_or_cut(struct reiserfs_transaction_handle *th, st | |||
1074 | break; | 1122 | break; |
1075 | } | 1123 | } |
1076 | } | 1124 | } |
1077 | /* a trick. If the buffer has been logged, this will do nothing. If | 1125 | /* |
1078 | ** we've broken the loop without logging it, it will restore the | 1126 | * a trick. If the buffer has been logged, this will |
1079 | ** buffer */ | 1127 | * do nothing. If we've broken the loop without logging |
1128 | * it, it will restore the buffer | ||
1129 | */ | ||
1080 | reiserfs_restore_prepared_buffer(sb, bh); | 1130 | reiserfs_restore_prepared_buffer(sb, bh); |
1081 | } while (need_re_search && | 1131 | } while (need_re_search && |
1082 | search_for_position_by_key(sb, item_key, path) == POSITION_FOUND); | 1132 | search_for_position_by_key(sb, item_key, path) == POSITION_FOUND); |
1083 | pos_in_item(path) = pos * UNFM_P_SIZE; | 1133 | pos_in_item(path) = pos * UNFM_P_SIZE; |
1084 | 1134 | ||
1085 | if (*cut_size == 0) { | 1135 | if (*cut_size == 0) { |
1086 | /* Nothing were cut. maybe convert last unformatted node to the | 1136 | /* |
1087 | * direct item? */ | 1137 | * Nothing was cut. maybe convert last unformatted node to the |
1138 | * direct item? | ||
1139 | */ | ||
1088 | result = M_CONVERT; | 1140 | result = M_CONVERT; |
1089 | } | 1141 | } |
1090 | return result; | 1142 | return result; |
@@ -1104,9 +1156,11 @@ static int calc_deleted_bytes_number(struct tree_balance *tb, char mode) | |||
1104 | (mode == | 1156 | (mode == |
1105 | M_DELETE) ? ih_item_len(p_le_ih) : -tb->insert_size[0]; | 1157 | M_DELETE) ? ih_item_len(p_le_ih) : -tb->insert_size[0]; |
1106 | if (is_direntry_le_ih(p_le_ih)) { | 1158 | if (is_direntry_le_ih(p_le_ih)) { |
1107 | /* return EMPTY_DIR_SIZE; We delete emty directoris only. | 1159 | /* |
1108 | * we can't use EMPTY_DIR_SIZE, as old format dirs have a different | 1160 | * return EMPTY_DIR_SIZE; We delete emty directories only. |
1109 | * empty size. ick. FIXME, is this right? */ | 1161 | * we can't use EMPTY_DIR_SIZE, as old format dirs have a |
1162 | * different empty size. ick. FIXME, is this right? | ||
1163 | */ | ||
1110 | return del_size; | 1164 | return del_size; |
1111 | } | 1165 | } |
1112 | 1166 | ||
@@ -1169,7 +1223,8 @@ char head2type(struct item_head *ih) | |||
1169 | } | 1223 | } |
1170 | #endif | 1224 | #endif |
1171 | 1225 | ||
1172 | /* Delete object item. | 1226 | /* |
1227 | * Delete object item. | ||
1173 | * th - active transaction handle | 1228 | * th - active transaction handle |
1174 | * path - path to the deleted item | 1229 | * path - path to the deleted item |
1175 | * item_key - key to search for the deleted item | 1230 | * item_key - key to search for the deleted item |
@@ -1221,7 +1276,7 @@ int reiserfs_delete_item(struct reiserfs_transaction_handle *th, | |||
1221 | 1276 | ||
1222 | PROC_INFO_INC(sb, delete_item_restarted); | 1277 | PROC_INFO_INC(sb, delete_item_restarted); |
1223 | 1278 | ||
1224 | // file system changed, repeat search | 1279 | /* file system changed, repeat search */ |
1225 | ret_value = | 1280 | ret_value = |
1226 | search_for_position_by_key(sb, item_key, path); | 1281 | search_for_position_by_key(sb, item_key, path); |
1227 | if (ret_value == IO_ERROR) | 1282 | if (ret_value == IO_ERROR) |
@@ -1238,16 +1293,18 @@ int reiserfs_delete_item(struct reiserfs_transaction_handle *th, | |||
1238 | unfix_nodes(&s_del_balance); | 1293 | unfix_nodes(&s_del_balance); |
1239 | return 0; | 1294 | return 0; |
1240 | } | 1295 | } |
1241 | // reiserfs_delete_item returns item length when success | 1296 | |
1297 | /* reiserfs_delete_item returns item length when success */ | ||
1242 | ret_value = calc_deleted_bytes_number(&s_del_balance, M_DELETE); | 1298 | ret_value = calc_deleted_bytes_number(&s_del_balance, M_DELETE); |
1243 | q_ih = tp_item_head(path); | 1299 | q_ih = tp_item_head(path); |
1244 | quota_cut_bytes = ih_item_len(q_ih); | 1300 | quota_cut_bytes = ih_item_len(q_ih); |
1245 | 1301 | ||
1246 | /* hack so the quota code doesn't have to guess if the file | 1302 | /* |
1247 | ** has a tail. On tail insert, we allocate quota for 1 unformatted node. | 1303 | * hack so the quota code doesn't have to guess if the file has a |
1248 | ** We test the offset because the tail might have been | 1304 | * tail. On tail insert, we allocate quota for 1 unformatted node. |
1249 | ** split into multiple items, and we only want to decrement for | 1305 | * We test the offset because the tail might have been |
1250 | ** the unfm node once | 1306 | * split into multiple items, and we only want to decrement for |
1307 | * the unfm node once | ||
1251 | */ | 1308 | */ |
1252 | if (!S_ISLNK(inode->i_mode) && is_direct_le_ih(q_ih)) { | 1309 | if (!S_ISLNK(inode->i_mode) && is_direct_le_ih(q_ih)) { |
1253 | if ((le_ih_k_offset(q_ih) & (sb->s_blocksize - 1)) == 1) { | 1310 | if ((le_ih_k_offset(q_ih) & (sb->s_blocksize - 1)) == 1) { |
@@ -1261,24 +1318,28 @@ int reiserfs_delete_item(struct reiserfs_transaction_handle *th, | |||
1261 | int off; | 1318 | int off; |
1262 | char *data; | 1319 | char *data; |
1263 | 1320 | ||
1264 | /* We are in direct2indirect conversion, so move tail contents | 1321 | /* |
1265 | to the unformatted node */ | 1322 | * We are in direct2indirect conversion, so move tail contents |
1266 | /* note, we do the copy before preparing the buffer because we | 1323 | * to the unformatted node |
1267 | ** don't care about the contents of the unformatted node yet. | 1324 | */ |
1268 | ** the only thing we really care about is the direct item's data | 1325 | /* |
1269 | ** is in the unformatted node. | 1326 | * note, we do the copy before preparing the buffer because we |
1270 | ** | 1327 | * don't care about the contents of the unformatted node yet. |
1271 | ** Otherwise, we would have to call reiserfs_prepare_for_journal on | 1328 | * the only thing we really care about is the direct item's |
1272 | ** the unformatted node, which might schedule, meaning we'd have to | 1329 | * data is in the unformatted node. |
1273 | ** loop all the way back up to the start of the while loop. | 1330 | * |
1274 | ** | 1331 | * Otherwise, we would have to call |
1275 | ** The unformatted node must be dirtied later on. We can't be | 1332 | * reiserfs_prepare_for_journal on the unformatted node, |
1276 | ** sure here if the entire tail has been deleted yet. | 1333 | * which might schedule, meaning we'd have to loop all the |
1277 | ** | 1334 | * way back up to the start of the while loop. |
1278 | ** un_bh is from the page cache (all unformatted nodes are | 1335 | * |
1279 | ** from the page cache) and might be a highmem page. So, we | 1336 | * The unformatted node must be dirtied later on. We can't be |
1280 | ** can't use un_bh->b_data. | 1337 | * sure here if the entire tail has been deleted yet. |
1281 | ** -clm | 1338 | * |
1339 | * un_bh is from the page cache (all unformatted nodes are | ||
1340 | * from the page cache) and might be a highmem page. So, we | ||
1341 | * can't use un_bh->b_data. | ||
1342 | * -clm | ||
1282 | */ | 1343 | */ |
1283 | 1344 | ||
1284 | data = kmap_atomic(un_bh->b_page); | 1345 | data = kmap_atomic(un_bh->b_page); |
@@ -1288,6 +1349,7 @@ int reiserfs_delete_item(struct reiserfs_transaction_handle *th, | |||
1288 | ret_value); | 1349 | ret_value); |
1289 | kunmap_atomic(data); | 1350 | kunmap_atomic(data); |
1290 | } | 1351 | } |
1352 | |||
1291 | /* Perform balancing after all resources have been collected at once. */ | 1353 | /* Perform balancing after all resources have been collected at once. */ |
1292 | do_balance(&s_del_balance, NULL, NULL, M_DELETE); | 1354 | do_balance(&s_del_balance, NULL, NULL, M_DELETE); |
1293 | 1355 | ||
@@ -1304,20 +1366,21 @@ int reiserfs_delete_item(struct reiserfs_transaction_handle *th, | |||
1304 | return ret_value; | 1366 | return ret_value; |
1305 | } | 1367 | } |
1306 | 1368 | ||
1307 | /* Summary Of Mechanisms For Handling Collisions Between Processes: | 1369 | /* |
1308 | 1370 | * Summary Of Mechanisms For Handling Collisions Between Processes: | |
1309 | deletion of the body of the object is performed by iput(), with the | 1371 | * |
1310 | result that if multiple processes are operating on a file, the | 1372 | * deletion of the body of the object is performed by iput(), with the |
1311 | deletion of the body of the file is deferred until the last process | 1373 | * result that if multiple processes are operating on a file, the |
1312 | that has an open inode performs its iput(). | 1374 | * deletion of the body of the file is deferred until the last process |
1313 | 1375 | * that has an open inode performs its iput(). | |
1314 | writes and truncates are protected from collisions by use of | 1376 | * |
1315 | semaphores. | 1377 | * writes and truncates are protected from collisions by use of |
1316 | 1378 | * semaphores. | |
1317 | creates, linking, and mknod are protected from collisions with other | 1379 | * |
1318 | processes by making the reiserfs_add_entry() the last step in the | 1380 | * creates, linking, and mknod are protected from collisions with other |
1319 | creation, and then rolling back all changes if there was a collision. | 1381 | * processes by making the reiserfs_add_entry() the last step in the |
1320 | - Hans | 1382 | * creation, and then rolling back all changes if there was a collision. |
1383 | * - Hans | ||
1321 | */ | 1384 | */ |
1322 | 1385 | ||
1323 | /* this deletes item which never gets split */ | 1386 | /* this deletes item which never gets split */ |
@@ -1347,7 +1410,11 @@ void reiserfs_delete_solid_item(struct reiserfs_transaction_handle *th, | |||
1347 | } | 1410 | } |
1348 | if (retval != ITEM_FOUND) { | 1411 | if (retval != ITEM_FOUND) { |
1349 | pathrelse(&path); | 1412 | pathrelse(&path); |
1350 | // No need for a warning, if there is just no free space to insert '..' item into the newly-created subdir | 1413 | /* |
1414 | * No need for a warning, if there is just no free | ||
1415 | * space to insert '..' item into the | ||
1416 | * newly-created subdir | ||
1417 | */ | ||
1351 | if (! | 1418 | if (! |
1352 | ((unsigned long long) | 1419 | ((unsigned long long) |
1353 | GET_HASH_VALUE(le_key_k_offset | 1420 | GET_HASH_VALUE(le_key_k_offset |
@@ -1376,7 +1443,11 @@ void reiserfs_delete_solid_item(struct reiserfs_transaction_handle *th, | |||
1376 | 1443 | ||
1377 | if (retval == CARRY_ON) { | 1444 | if (retval == CARRY_ON) { |
1378 | do_balance(&tb, NULL, NULL, M_DELETE); | 1445 | do_balance(&tb, NULL, NULL, M_DELETE); |
1379 | if (inode) { /* Should we count quota for item? (we don't count quotas for save-links) */ | 1446 | /* |
1447 | * Should we count quota for item? (we don't | ||
1448 | * count quotas for save-links) | ||
1449 | */ | ||
1450 | if (inode) { | ||
1380 | int depth; | 1451 | int depth; |
1381 | #ifdef REISERQUOTA_DEBUG | 1452 | #ifdef REISERQUOTA_DEBUG |
1382 | reiserfs_debug(th->t_super, REISERFS_DEBUG_CODE, | 1453 | reiserfs_debug(th->t_super, REISERFS_DEBUG_CODE, |
@@ -1391,7 +1462,8 @@ void reiserfs_delete_solid_item(struct reiserfs_transaction_handle *th, | |||
1391 | } | 1462 | } |
1392 | break; | 1463 | break; |
1393 | } | 1464 | } |
1394 | // IO_ERROR, NO_DISK_SPACE, etc | 1465 | |
1466 | /* IO_ERROR, NO_DISK_SPACE, etc */ | ||
1395 | reiserfs_warning(th->t_super, "vs-5360", | 1467 | reiserfs_warning(th->t_super, "vs-5360", |
1396 | "could not delete %K due to fix_nodes failure", | 1468 | "could not delete %K due to fix_nodes failure", |
1397 | &cpu_key); | 1469 | &cpu_key); |
@@ -1447,11 +1519,13 @@ static void unmap_buffers(struct page *page, loff_t pos) | |||
1447 | do { | 1519 | do { |
1448 | next = bh->b_this_page; | 1520 | next = bh->b_this_page; |
1449 | 1521 | ||
1450 | /* we want to unmap the buffers that contain the tail, and | 1522 | /* |
1451 | ** all the buffers after it (since the tail must be at the | 1523 | * we want to unmap the buffers that contain |
1452 | ** end of the file). We don't want to unmap file data | 1524 | * the tail, and all the buffers after it |
1453 | ** before the tail, since it might be dirty and waiting to | 1525 | * (since the tail must be at the end of the |
1454 | ** reach disk | 1526 | * file). We don't want to unmap file data |
1527 | * before the tail, since it might be dirty | ||
1528 | * and waiting to reach disk | ||
1455 | */ | 1529 | */ |
1456 | cur_index += bh->b_size; | 1530 | cur_index += bh->b_size; |
1457 | if (cur_index > tail_index) { | 1531 | if (cur_index > tail_index) { |
@@ -1476,9 +1550,10 @@ static int maybe_indirect_to_direct(struct reiserfs_transaction_handle *th, | |||
1476 | BUG_ON(!th->t_trans_id); | 1550 | BUG_ON(!th->t_trans_id); |
1477 | BUG_ON(new_file_size != inode->i_size); | 1551 | BUG_ON(new_file_size != inode->i_size); |
1478 | 1552 | ||
1479 | /* the page being sent in could be NULL if there was an i/o error | 1553 | /* |
1480 | ** reading in the last block. The user will hit problems trying to | 1554 | * the page being sent in could be NULL if there was an i/o error |
1481 | ** read the file, but for now we just skip the indirect2direct | 1555 | * reading in the last block. The user will hit problems trying to |
1556 | * read the file, but for now we just skip the indirect2direct | ||
1482 | */ | 1557 | */ |
1483 | if (atomic_read(&inode->i_count) > 1 || | 1558 | if (atomic_read(&inode->i_count) > 1 || |
1484 | !tail_has_to_be_packed(inode) || | 1559 | !tail_has_to_be_packed(inode) || |
@@ -1490,17 +1565,18 @@ static int maybe_indirect_to_direct(struct reiserfs_transaction_handle *th, | |||
1490 | pathrelse(path); | 1565 | pathrelse(path); |
1491 | return cut_bytes; | 1566 | return cut_bytes; |
1492 | } | 1567 | } |
1568 | |||
1493 | /* Perform the conversion to a direct_item. */ | 1569 | /* Perform the conversion to a direct_item. */ |
1494 | /* return indirect_to_direct(inode, path, item_key, | ||
1495 | new_file_size, mode); */ | ||
1496 | return indirect2direct(th, inode, page, path, item_key, | 1570 | return indirect2direct(th, inode, page, path, item_key, |
1497 | new_file_size, mode); | 1571 | new_file_size, mode); |
1498 | } | 1572 | } |
1499 | 1573 | ||
1500 | /* we did indirect_to_direct conversion. And we have inserted direct | 1574 | /* |
1501 | item successesfully, but there were no disk space to cut unfm | 1575 | * we did indirect_to_direct conversion. And we have inserted direct |
1502 | pointer being converted. Therefore we have to delete inserted | 1576 | * item successesfully, but there were no disk space to cut unfm |
1503 | direct item(s) */ | 1577 | * pointer being converted. Therefore we have to delete inserted |
1578 | * direct item(s) | ||
1579 | */ | ||
1504 | static void indirect_to_direct_roll_back(struct reiserfs_transaction_handle *th, | 1580 | static void indirect_to_direct_roll_back(struct reiserfs_transaction_handle *th, |
1505 | struct inode *inode, struct treepath *path) | 1581 | struct inode *inode, struct treepath *path) |
1506 | { | 1582 | { |
@@ -1509,7 +1585,7 @@ static void indirect_to_direct_roll_back(struct reiserfs_transaction_handle *th, | |||
1509 | int removed; | 1585 | int removed; |
1510 | BUG_ON(!th->t_trans_id); | 1586 | BUG_ON(!th->t_trans_id); |
1511 | 1587 | ||
1512 | make_cpu_key(&tail_key, inode, inode->i_size + 1, TYPE_DIRECT, 4); // !!!! | 1588 | make_cpu_key(&tail_key, inode, inode->i_size + 1, TYPE_DIRECT, 4); |
1513 | tail_key.key_length = 4; | 1589 | tail_key.key_length = 4; |
1514 | 1590 | ||
1515 | tail_len = | 1591 | tail_len = |
@@ -1539,7 +1615,6 @@ static void indirect_to_direct_roll_back(struct reiserfs_transaction_handle *th, | |||
1539 | reiserfs_warning(inode->i_sb, "reiserfs-5091", "indirect_to_direct " | 1615 | reiserfs_warning(inode->i_sb, "reiserfs-5091", "indirect_to_direct " |
1540 | "conversion has been rolled back due to " | 1616 | "conversion has been rolled back due to " |
1541 | "lack of disk space"); | 1617 | "lack of disk space"); |
1542 | //mark_file_without_tail (inode); | ||
1543 | mark_inode_dirty(inode); | 1618 | mark_inode_dirty(inode); |
1544 | } | 1619 | } |
1545 | 1620 | ||
@@ -1551,15 +1626,18 @@ int reiserfs_cut_from_item(struct reiserfs_transaction_handle *th, | |||
1551 | struct page *page, loff_t new_file_size) | 1626 | struct page *page, loff_t new_file_size) |
1552 | { | 1627 | { |
1553 | struct super_block *sb = inode->i_sb; | 1628 | struct super_block *sb = inode->i_sb; |
1554 | /* Every function which is going to call do_balance must first | 1629 | /* |
1555 | create a tree_balance structure. Then it must fill up this | 1630 | * Every function which is going to call do_balance must first |
1556 | structure by using the init_tb_struct and fix_nodes functions. | 1631 | * create a tree_balance structure. Then it must fill up this |
1557 | After that we can make tree balancing. */ | 1632 | * structure by using the init_tb_struct and fix_nodes functions. |
1633 | * After that we can make tree balancing. | ||
1634 | */ | ||
1558 | struct tree_balance s_cut_balance; | 1635 | struct tree_balance s_cut_balance; |
1559 | struct item_head *p_le_ih; | 1636 | struct item_head *p_le_ih; |
1560 | int cut_size = 0, /* Amount to be cut. */ | 1637 | int cut_size = 0; /* Amount to be cut. */ |
1561 | ret_value = CARRY_ON, removed = 0, /* Number of the removed unformatted nodes. */ | 1638 | int ret_value = CARRY_ON; |
1562 | is_inode_locked = 0; | 1639 | int removed = 0; /* Number of the removed unformatted nodes. */ |
1640 | int is_inode_locked = 0; | ||
1563 | char mode; /* Mode of the balance. */ | 1641 | char mode; /* Mode of the balance. */ |
1564 | int retval2 = -1; | 1642 | int retval2 = -1; |
1565 | int quota_cut_bytes; | 1643 | int quota_cut_bytes; |
@@ -1571,21 +1649,27 @@ int reiserfs_cut_from_item(struct reiserfs_transaction_handle *th, | |||
1571 | init_tb_struct(th, &s_cut_balance, inode->i_sb, path, | 1649 | init_tb_struct(th, &s_cut_balance, inode->i_sb, path, |
1572 | cut_size); | 1650 | cut_size); |
1573 | 1651 | ||
1574 | /* Repeat this loop until we either cut the item without needing | 1652 | /* |
1575 | to balance, or we fix_nodes without schedule occurring */ | 1653 | * Repeat this loop until we either cut the item without needing |
1654 | * to balance, or we fix_nodes without schedule occurring | ||
1655 | */ | ||
1576 | while (1) { | 1656 | while (1) { |
1577 | /* Determine the balance mode, position of the first byte to | 1657 | /* |
1578 | be cut, and size to be cut. In case of the indirect item | 1658 | * Determine the balance mode, position of the first byte to |
1579 | free unformatted nodes which are pointed to by the cut | 1659 | * be cut, and size to be cut. In case of the indirect item |
1580 | pointers. */ | 1660 | * free unformatted nodes which are pointed to by the cut |
1661 | * pointers. | ||
1662 | */ | ||
1581 | 1663 | ||
1582 | mode = | 1664 | mode = |
1583 | prepare_for_delete_or_cut(th, inode, path, | 1665 | prepare_for_delete_or_cut(th, inode, path, |
1584 | item_key, &removed, | 1666 | item_key, &removed, |
1585 | &cut_size, new_file_size); | 1667 | &cut_size, new_file_size); |
1586 | if (mode == M_CONVERT) { | 1668 | if (mode == M_CONVERT) { |
1587 | /* convert last unformatted node to direct item or leave | 1669 | /* |
1588 | tail in the unformatted node */ | 1670 | * convert last unformatted node to direct item or |
1671 | * leave tail in the unformatted node | ||
1672 | */ | ||
1589 | RFALSE(ret_value != CARRY_ON, | 1673 | RFALSE(ret_value != CARRY_ON, |
1590 | "PAP-5570: can not convert twice"); | 1674 | "PAP-5570: can not convert twice"); |
1591 | 1675 | ||
@@ -1599,15 +1683,20 @@ int reiserfs_cut_from_item(struct reiserfs_transaction_handle *th, | |||
1599 | 1683 | ||
1600 | is_inode_locked = 1; | 1684 | is_inode_locked = 1; |
1601 | 1685 | ||
1602 | /* removing of last unformatted node will change value we | 1686 | /* |
1603 | have to return to truncate. Save it */ | 1687 | * removing of last unformatted node will |
1688 | * change value we have to return to truncate. | ||
1689 | * Save it | ||
1690 | */ | ||
1604 | retval2 = ret_value; | 1691 | retval2 = ret_value; |
1605 | /*retval2 = sb->s_blocksize - (new_file_size & (sb->s_blocksize - 1)); */ | ||
1606 | 1692 | ||
1607 | /* So, we have performed the first part of the conversion: | 1693 | /* |
1608 | inserting the new direct item. Now we are removing the | 1694 | * So, we have performed the first part of the |
1609 | last unformatted node pointer. Set key to search for | 1695 | * conversion: |
1610 | it. */ | 1696 | * inserting the new direct item. Now we are |
1697 | * removing the last unformatted node pointer. | ||
1698 | * Set key to search for it. | ||
1699 | */ | ||
1611 | set_cpu_key_k_type(item_key, TYPE_INDIRECT); | 1700 | set_cpu_key_k_type(item_key, TYPE_INDIRECT); |
1612 | item_key->key_length = 4; | 1701 | item_key->key_length = 4; |
1613 | new_file_size -= | 1702 | new_file_size -= |
@@ -1650,11 +1739,13 @@ int reiserfs_cut_from_item(struct reiserfs_transaction_handle *th, | |||
1650 | return (ret_value == IO_ERROR) ? -EIO : -ENOENT; | 1739 | return (ret_value == IO_ERROR) ? -EIO : -ENOENT; |
1651 | } /* while */ | 1740 | } /* while */ |
1652 | 1741 | ||
1653 | // check fix_nodes results (IO_ERROR or NO_DISK_SPACE) | 1742 | /* check fix_nodes results (IO_ERROR or NO_DISK_SPACE) */ |
1654 | if (ret_value != CARRY_ON) { | 1743 | if (ret_value != CARRY_ON) { |
1655 | if (is_inode_locked) { | 1744 | if (is_inode_locked) { |
1656 | // FIXME: this seems to be not needed: we are always able | 1745 | /* |
1657 | // to cut item | 1746 | * FIXME: this seems to be not needed: we are always |
1747 | * able to cut item | ||
1748 | */ | ||
1658 | indirect_to_direct_roll_back(th, inode, path); | 1749 | indirect_to_direct_roll_back(th, inode, path); |
1659 | } | 1750 | } |
1660 | if (ret_value == NO_DISK_SPACE) | 1751 | if (ret_value == NO_DISK_SPACE) |
@@ -1678,15 +1769,16 @@ int reiserfs_cut_from_item(struct reiserfs_transaction_handle *th, | |||
1678 | else | 1769 | else |
1679 | ret_value = retval2; | 1770 | ret_value = retval2; |
1680 | 1771 | ||
1681 | /* For direct items, we only change the quota when deleting the last | 1772 | /* |
1682 | ** item. | 1773 | * For direct items, we only change the quota when deleting the last |
1774 | * item. | ||
1683 | */ | 1775 | */ |
1684 | p_le_ih = tp_item_head(s_cut_balance.tb_path); | 1776 | p_le_ih = tp_item_head(s_cut_balance.tb_path); |
1685 | if (!S_ISLNK(inode->i_mode) && is_direct_le_ih(p_le_ih)) { | 1777 | if (!S_ISLNK(inode->i_mode) && is_direct_le_ih(p_le_ih)) { |
1686 | if (mode == M_DELETE && | 1778 | if (mode == M_DELETE && |
1687 | (le_ih_k_offset(p_le_ih) & (sb->s_blocksize - 1)) == | 1779 | (le_ih_k_offset(p_le_ih) & (sb->s_blocksize - 1)) == |
1688 | 1) { | 1780 | 1) { |
1689 | // FIXME: this is to keep 3.5 happy | 1781 | /* FIXME: this is to keep 3.5 happy */ |
1690 | REISERFS_I(inode)->i_first_direct_byte = U32_MAX; | 1782 | REISERFS_I(inode)->i_first_direct_byte = U32_MAX; |
1691 | quota_cut_bytes = sb->s_blocksize + UNFM_P_SIZE; | 1783 | quota_cut_bytes = sb->s_blocksize + UNFM_P_SIZE; |
1692 | } else { | 1784 | } else { |
@@ -1697,9 +1789,11 @@ int reiserfs_cut_from_item(struct reiserfs_transaction_handle *th, | |||
1697 | if (is_inode_locked) { | 1789 | if (is_inode_locked) { |
1698 | struct item_head *le_ih = | 1790 | struct item_head *le_ih = |
1699 | tp_item_head(s_cut_balance.tb_path); | 1791 | tp_item_head(s_cut_balance.tb_path); |
1700 | /* we are going to complete indirect2direct conversion. Make | 1792 | /* |
1701 | sure, that we exactly remove last unformatted node pointer | 1793 | * we are going to complete indirect2direct conversion. Make |
1702 | of the item */ | 1794 | * sure, that we exactly remove last unformatted node pointer |
1795 | * of the item | ||
1796 | */ | ||
1703 | if (!is_indirect_le_ih(le_ih)) | 1797 | if (!is_indirect_le_ih(le_ih)) |
1704 | reiserfs_panic(sb, "vs-5652", | 1798 | reiserfs_panic(sb, "vs-5652", |
1705 | "item must be indirect %h", le_ih); | 1799 | "item must be indirect %h", le_ih); |
@@ -1717,17 +1811,20 @@ int reiserfs_cut_from_item(struct reiserfs_transaction_handle *th, | |||
1717 | "(CUT, insert_size==%d)", | 1811 | "(CUT, insert_size==%d)", |
1718 | le_ih, s_cut_balance.insert_size[0]); | 1812 | le_ih, s_cut_balance.insert_size[0]); |
1719 | } | 1813 | } |
1720 | /* it would be useful to make sure, that right neighboring | 1814 | /* |
1721 | item is direct item of this file */ | 1815 | * it would be useful to make sure, that right neighboring |
1816 | * item is direct item of this file | ||
1817 | */ | ||
1722 | } | 1818 | } |
1723 | #endif | 1819 | #endif |
1724 | 1820 | ||
1725 | do_balance(&s_cut_balance, NULL, NULL, mode); | 1821 | do_balance(&s_cut_balance, NULL, NULL, mode); |
1726 | if (is_inode_locked) { | 1822 | if (is_inode_locked) { |
1727 | /* we've done an indirect->direct conversion. when the data block | 1823 | /* |
1728 | ** was freed, it was removed from the list of blocks that must | 1824 | * we've done an indirect->direct conversion. when the |
1729 | ** be flushed before the transaction commits, make sure to | 1825 | * data block was freed, it was removed from the list of |
1730 | ** unmap and invalidate it | 1826 | * blocks that must be flushed before the transaction |
1827 | * commits, make sure to unmap and invalidate it | ||
1731 | */ | 1828 | */ |
1732 | unmap_buffers(page, tail_pos); | 1829 | unmap_buffers(page, tail_pos); |
1733 | REISERFS_I(inode)->i_flags &= ~i_pack_on_close_mask; | 1830 | REISERFS_I(inode)->i_flags &= ~i_pack_on_close_mask; |
@@ -1758,20 +1855,25 @@ static void truncate_directory(struct reiserfs_transaction_handle *th, | |||
1758 | set_le_key_k_type(KEY_FORMAT_3_5, INODE_PKEY(inode), TYPE_STAT_DATA); | 1855 | set_le_key_k_type(KEY_FORMAT_3_5, INODE_PKEY(inode), TYPE_STAT_DATA); |
1759 | } | 1856 | } |
1760 | 1857 | ||
1761 | /* Truncate file to the new size. Note, this must be called with a transaction | 1858 | /* |
1762 | already started */ | 1859 | * Truncate file to the new size. Note, this must be called with a |
1860 | * transaction already started | ||
1861 | */ | ||
1763 | int reiserfs_do_truncate(struct reiserfs_transaction_handle *th, | 1862 | int reiserfs_do_truncate(struct reiserfs_transaction_handle *th, |
1764 | struct inode *inode, /* ->i_size contains new size */ | 1863 | struct inode *inode, /* ->i_size contains new size */ |
1765 | struct page *page, /* up to date for last block */ | 1864 | struct page *page, /* up to date for last block */ |
1766 | int update_timestamps /* when it is called by | 1865 | /* |
1767 | file_release to convert | 1866 | * when it is called by file_release to convert |
1768 | the tail - no timestamps | 1867 | * the tail - no timestamps should be updated |
1769 | should be updated */ | 1868 | */ |
1869 | int update_timestamps | ||
1770 | ) | 1870 | ) |
1771 | { | 1871 | { |
1772 | INITIALIZE_PATH(s_search_path); /* Path to the current object item. */ | 1872 | INITIALIZE_PATH(s_search_path); /* Path to the current object item. */ |
1773 | struct item_head *p_le_ih; /* Pointer to an item header. */ | 1873 | struct item_head *p_le_ih; /* Pointer to an item header. */ |
1774 | struct cpu_key s_item_key; /* Key to search for a previous file item. */ | 1874 | |
1875 | /* Key to search for a previous file item. */ | ||
1876 | struct cpu_key s_item_key; | ||
1775 | loff_t file_size, /* Old file size. */ | 1877 | loff_t file_size, /* Old file size. */ |
1776 | new_file_size; /* New file size. */ | 1878 | new_file_size; /* New file size. */ |
1777 | int deleted; /* Number of deleted or truncated bytes. */ | 1879 | int deleted; /* Number of deleted or truncated bytes. */ |
@@ -1784,8 +1886,8 @@ int reiserfs_do_truncate(struct reiserfs_transaction_handle *th, | |||
1784 | || S_ISLNK(inode->i_mode))) | 1886 | || S_ISLNK(inode->i_mode))) |
1785 | return 0; | 1887 | return 0; |
1786 | 1888 | ||
1889 | /* deletion of directory - no need to update timestamps */ | ||
1787 | if (S_ISDIR(inode->i_mode)) { | 1890 | if (S_ISDIR(inode->i_mode)) { |
1788 | // deletion of directory - no need to update timestamps | ||
1789 | truncate_directory(th, inode); | 1891 | truncate_directory(th, inode); |
1790 | return 0; | 1892 | return 0; |
1791 | } | 1893 | } |
@@ -1793,7 +1895,7 @@ int reiserfs_do_truncate(struct reiserfs_transaction_handle *th, | |||
1793 | /* Get new file size. */ | 1895 | /* Get new file size. */ |
1794 | new_file_size = inode->i_size; | 1896 | new_file_size = inode->i_size; |
1795 | 1897 | ||
1796 | // FIXME: note, that key type is unimportant here | 1898 | /* FIXME: note, that key type is unimportant here */ |
1797 | make_cpu_key(&s_item_key, inode, max_reiserfs_offset(inode), | 1899 | make_cpu_key(&s_item_key, inode, max_reiserfs_offset(inode), |
1798 | TYPE_DIRECT, 3); | 1900 | TYPE_DIRECT, 3); |
1799 | 1901 | ||
@@ -1827,9 +1929,11 @@ int reiserfs_do_truncate(struct reiserfs_transaction_handle *th, | |||
1827 | int bytes = | 1929 | int bytes = |
1828 | op_bytes_number(p_le_ih, inode->i_sb->s_blocksize); | 1930 | op_bytes_number(p_le_ih, inode->i_sb->s_blocksize); |
1829 | 1931 | ||
1830 | /* this may mismatch with real file size: if last direct item | 1932 | /* |
1831 | had no padding zeros and last unformatted node had no free | 1933 | * this may mismatch with real file size: if last direct item |
1832 | space, this file would have this file size */ | 1934 | * had no padding zeros and last unformatted node had no free |
1935 | * space, this file would have this file size | ||
1936 | */ | ||
1833 | file_size = offset + bytes - 1; | 1937 | file_size = offset + bytes - 1; |
1834 | } | 1938 | } |
1835 | /* | 1939 | /* |
@@ -1867,14 +1971,17 @@ int reiserfs_do_truncate(struct reiserfs_transaction_handle *th, | |||
1867 | 1971 | ||
1868 | set_cpu_key_k_offset(&s_item_key, file_size); | 1972 | set_cpu_key_k_offset(&s_item_key, file_size); |
1869 | 1973 | ||
1870 | /* While there are bytes to truncate and previous file item is presented in the tree. */ | 1974 | /* |
1975 | * While there are bytes to truncate and previous | ||
1976 | * file item is presented in the tree. | ||
1977 | */ | ||
1871 | 1978 | ||
1872 | /* | 1979 | /* |
1873 | ** This loop could take a really long time, and could log | 1980 | * This loop could take a really long time, and could log |
1874 | ** many more blocks than a transaction can hold. So, we do a polite | 1981 | * many more blocks than a transaction can hold. So, we do |
1875 | ** journal end here, and if the transaction needs ending, we make | 1982 | * a polite journal end here, and if the transaction needs |
1876 | ** sure the file is consistent before ending the current trans | 1983 | * ending, we make sure the file is consistent before ending |
1877 | ** and starting a new one | 1984 | * the current trans and starting a new one |
1878 | */ | 1985 | */ |
1879 | if (journal_transaction_should_end(th, 0) || | 1986 | if (journal_transaction_should_end(th, 0) || |
1880 | reiserfs_transaction_free_space(th) <= JOURNAL_FOR_FREE_BLOCK_AND_UPDATE_SD) { | 1987 | reiserfs_transaction_free_space(th) <= JOURNAL_FOR_FREE_BLOCK_AND_UPDATE_SD) { |
@@ -1906,7 +2013,7 @@ int reiserfs_do_truncate(struct reiserfs_transaction_handle *th, | |||
1906 | 2013 | ||
1907 | update_and_out: | 2014 | update_and_out: |
1908 | if (update_timestamps) { | 2015 | if (update_timestamps) { |
1909 | // this is truncate, not file closing | 2016 | /* this is truncate, not file closing */ |
1910 | inode->i_mtime = CURRENT_TIME_SEC; | 2017 | inode->i_mtime = CURRENT_TIME_SEC; |
1911 | inode->i_ctime = CURRENT_TIME_SEC; | 2018 | inode->i_ctime = CURRENT_TIME_SEC; |
1912 | } | 2019 | } |
@@ -1918,7 +2025,7 @@ int reiserfs_do_truncate(struct reiserfs_transaction_handle *th, | |||
1918 | } | 2025 | } |
1919 | 2026 | ||
1920 | #ifdef CONFIG_REISERFS_CHECK | 2027 | #ifdef CONFIG_REISERFS_CHECK |
1921 | // this makes sure, that we __append__, not overwrite or add holes | 2028 | /* this makes sure, that we __append__, not overwrite or add holes */ |
1922 | static void check_research_for_paste(struct treepath *path, | 2029 | static void check_research_for_paste(struct treepath *path, |
1923 | const struct cpu_key *key) | 2030 | const struct cpu_key *key) |
1924 | { | 2031 | { |
@@ -1952,13 +2059,22 @@ static void check_research_for_paste(struct treepath *path, | |||
1952 | } | 2059 | } |
1953 | #endif /* config reiserfs check */ | 2060 | #endif /* config reiserfs check */ |
1954 | 2061 | ||
1955 | /* Paste bytes to the existing item. Returns bytes number pasted into the item. */ | 2062 | /* |
1956 | int reiserfs_paste_into_item(struct reiserfs_transaction_handle *th, struct treepath *search_path, /* Path to the pasted item. */ | 2063 | * Paste bytes to the existing item. |
1957 | const struct cpu_key *key, /* Key to search for the needed item. */ | 2064 | * Returns bytes number pasted into the item. |
1958 | struct inode *inode, /* Inode item belongs to */ | 2065 | */ |
1959 | const char *body, /* Pointer to the bytes to paste. */ | 2066 | int reiserfs_paste_into_item(struct reiserfs_transaction_handle *th, |
2067 | /* Path to the pasted item. */ | ||
2068 | struct treepath *search_path, | ||
2069 | /* Key to search for the needed item. */ | ||
2070 | const struct cpu_key *key, | ||
2071 | /* Inode item belongs to */ | ||
2072 | struct inode *inode, | ||
2073 | /* Pointer to the bytes to paste. */ | ||
2074 | const char *body, | ||
2075 | /* Size of pasted bytes. */ | ||
1960 | int pasted_size) | 2076 | int pasted_size) |
1961 | { /* Size of pasted bytes. */ | 2077 | { |
1962 | struct super_block *sb = inode->i_sb; | 2078 | struct super_block *sb = inode->i_sb; |
1963 | struct tree_balance s_paste_balance; | 2079 | struct tree_balance s_paste_balance; |
1964 | int retval; | 2080 | int retval; |
@@ -2019,8 +2135,10 @@ int reiserfs_paste_into_item(struct reiserfs_transaction_handle *th, struct tree | |||
2019 | #endif | 2135 | #endif |
2020 | } | 2136 | } |
2021 | 2137 | ||
2022 | /* Perform balancing after all resources are collected by fix_nodes, and | 2138 | /* |
2023 | accessing them will not risk triggering schedule. */ | 2139 | * Perform balancing after all resources are collected by fix_nodes, |
2140 | * and accessing them will not risk triggering schedule. | ||
2141 | */ | ||
2024 | if (retval == CARRY_ON) { | 2142 | if (retval == CARRY_ON) { |
2025 | do_balance(&s_paste_balance, NULL /*ih */ , body, M_PASTE); | 2143 | do_balance(&s_paste_balance, NULL /*ih */ , body, M_PASTE); |
2026 | return 0; | 2144 | return 0; |
@@ -2041,7 +2159,8 @@ int reiserfs_paste_into_item(struct reiserfs_transaction_handle *th, struct tree | |||
2041 | return retval; | 2159 | return retval; |
2042 | } | 2160 | } |
2043 | 2161 | ||
2044 | /* Insert new item into the buffer at the path. | 2162 | /* |
2163 | * Insert new item into the buffer at the path. | ||
2045 | * th - active transaction handle | 2164 | * th - active transaction handle |
2046 | * path - path to the inserted item | 2165 | * path - path to the inserted item |
2047 | * ih - pointer to the item header to insert | 2166 | * ih - pointer to the item header to insert |
@@ -2064,8 +2183,10 @@ int reiserfs_insert_item(struct reiserfs_transaction_handle *th, | |||
2064 | fs_gen = get_generation(inode->i_sb); | 2183 | fs_gen = get_generation(inode->i_sb); |
2065 | quota_bytes = ih_item_len(ih); | 2184 | quota_bytes = ih_item_len(ih); |
2066 | 2185 | ||
2067 | /* hack so the quota code doesn't have to guess if the file has | 2186 | /* |
2068 | ** a tail, links are always tails, so there's no guessing needed | 2187 | * hack so the quota code doesn't have to guess |
2188 | * if the file has a tail, links are always tails, | ||
2189 | * so there's no guessing needed | ||
2069 | */ | 2190 | */ |
2070 | if (!S_ISLNK(inode->i_mode) && is_direct_le_ih(ih)) | 2191 | if (!S_ISLNK(inode->i_mode) && is_direct_le_ih(ih)) |
2071 | quota_bytes = inode->i_sb->s_blocksize + UNFM_P_SIZE; | 2192 | quota_bytes = inode->i_sb->s_blocksize + UNFM_P_SIZE; |
@@ -2074,8 +2195,10 @@ int reiserfs_insert_item(struct reiserfs_transaction_handle *th, | |||
2074 | "reiserquota insert_item(): allocating %u id=%u type=%c", | 2195 | "reiserquota insert_item(): allocating %u id=%u type=%c", |
2075 | quota_bytes, inode->i_uid, head2type(ih)); | 2196 | quota_bytes, inode->i_uid, head2type(ih)); |
2076 | #endif | 2197 | #endif |
2077 | /* We can't dirty inode here. It would be immediately written but | 2198 | /* |
2078 | * appropriate stat item isn't inserted yet... */ | 2199 | * We can't dirty inode here. It would be immediately |
2200 | * written but appropriate stat item isn't inserted yet... | ||
2201 | */ | ||
2079 | depth = reiserfs_write_unlock_nested(inode->i_sb); | 2202 | depth = reiserfs_write_unlock_nested(inode->i_sb); |
2080 | retval = dquot_alloc_space_nodirty(inode, quota_bytes); | 2203 | retval = dquot_alloc_space_nodirty(inode, quota_bytes); |
2081 | reiserfs_write_lock_nested(inode->i_sb, depth); | 2204 | reiserfs_write_lock_nested(inode->i_sb, depth); |
@@ -2089,7 +2212,10 @@ int reiserfs_insert_item(struct reiserfs_transaction_handle *th, | |||
2089 | #ifdef DISPLACE_NEW_PACKING_LOCALITIES | 2212 | #ifdef DISPLACE_NEW_PACKING_LOCALITIES |
2090 | s_ins_balance.key = key->on_disk_key; | 2213 | s_ins_balance.key = key->on_disk_key; |
2091 | #endif | 2214 | #endif |
2092 | /* DQUOT_* can schedule, must check to be sure calling fix_nodes is safe */ | 2215 | /* |
2216 | * DQUOT_* can schedule, must check to be sure calling | ||
2217 | * fix_nodes is safe | ||
2218 | */ | ||
2093 | if (inode && fs_changed(fs_gen, inode->i_sb)) { | 2219 | if (inode && fs_changed(fs_gen, inode->i_sb)) { |
2094 | goto search_again; | 2220 | goto search_again; |
2095 | } | 2221 | } |
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index c02b6b07508d..6268bb8195c5 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c | |||
@@ -153,13 +153,15 @@ static int reiserfs_unfreeze(struct super_block *s) | |||
153 | 153 | ||
154 | extern const struct in_core_key MAX_IN_CORE_KEY; | 154 | extern const struct in_core_key MAX_IN_CORE_KEY; |
155 | 155 | ||
156 | /* this is used to delete "save link" when there are no items of a | 156 | /* |
157 | file it points to. It can either happen if unlink is completed but | 157 | * this is used to delete "save link" when there are no items of a |
158 | "save unlink" removal, or if file has both unlink and truncate | 158 | * file it points to. It can either happen if unlink is completed but |
159 | pending and as unlink completes first (because key of "save link" | 159 | * "save unlink" removal, or if file has both unlink and truncate |
160 | protecting unlink is bigger that a key lf "save link" which | 160 | * pending and as unlink completes first (because key of "save link" |
161 | protects truncate), so there left no items to make truncate | 161 | * protecting unlink is bigger that a key lf "save link" which |
162 | completion on */ | 162 | * protects truncate), so there left no items to make truncate |
163 | * completion on | ||
164 | */ | ||
163 | static int remove_save_link_only(struct super_block *s, | 165 | static int remove_save_link_only(struct super_block *s, |
164 | struct reiserfs_key *key, int oid_free) | 166 | struct reiserfs_key *key, int oid_free) |
165 | { | 167 | { |
@@ -282,8 +284,10 @@ static int finish_unfinished(struct super_block *s) | |||
282 | 284 | ||
283 | inode = reiserfs_iget(s, &obj_key); | 285 | inode = reiserfs_iget(s, &obj_key); |
284 | if (!inode) { | 286 | if (!inode) { |
285 | /* the unlink almost completed, it just did not manage to remove | 287 | /* |
286 | "save" link and release objectid */ | 288 | * the unlink almost completed, it just did not |
289 | * manage to remove "save" link and release objectid | ||
290 | */ | ||
287 | reiserfs_warning(s, "vs-2180", "iget failed for %K", | 291 | reiserfs_warning(s, "vs-2180", "iget failed for %K", |
288 | &obj_key); | 292 | &obj_key); |
289 | retval = remove_save_link_only(s, &save_link_key, 1); | 293 | retval = remove_save_link_only(s, &save_link_key, 1); |
@@ -303,10 +307,13 @@ static int finish_unfinished(struct super_block *s) | |||
303 | reiserfs_write_lock_nested(inode->i_sb, depth); | 307 | reiserfs_write_lock_nested(inode->i_sb, depth); |
304 | 308 | ||
305 | if (truncate && S_ISDIR(inode->i_mode)) { | 309 | if (truncate && S_ISDIR(inode->i_mode)) { |
306 | /* We got a truncate request for a dir which is impossible. | 310 | /* |
307 | The only imaginable way is to execute unfinished truncate request | 311 | * We got a truncate request for a dir which |
308 | then boot into old kernel, remove the file and create dir with | 312 | * is impossible. The only imaginable way is to |
309 | the same key. */ | 313 | * execute unfinished truncate request then boot |
314 | * into old kernel, remove the file and create dir | ||
315 | * with the same key. | ||
316 | */ | ||
310 | reiserfs_warning(s, "green-2101", | 317 | reiserfs_warning(s, "green-2101", |
311 | "impossible truncate on a " | 318 | "impossible truncate on a " |
312 | "directory %k. Please report", | 319 | "directory %k. Please report", |
@@ -320,14 +327,16 @@ static int finish_unfinished(struct super_block *s) | |||
320 | if (truncate) { | 327 | if (truncate) { |
321 | REISERFS_I(inode)->i_flags |= | 328 | REISERFS_I(inode)->i_flags |= |
322 | i_link_saved_truncate_mask; | 329 | i_link_saved_truncate_mask; |
323 | /* not completed truncate found. New size was committed together | 330 | /* |
324 | with "save" link */ | 331 | * not completed truncate found. New size was |
332 | * committed together with "save" link | ||
333 | */ | ||
325 | reiserfs_info(s, "Truncating %k to %Ld ..", | 334 | reiserfs_info(s, "Truncating %k to %Ld ..", |
326 | INODE_PKEY(inode), inode->i_size); | 335 | INODE_PKEY(inode), inode->i_size); |
327 | reiserfs_truncate_file(inode, | 336 | |
328 | 0 | 337 | /* don't update modification time */ |
329 | /*don't update modification time */ | 338 | reiserfs_truncate_file(inode, 0); |
330 | ); | 339 | |
331 | retval = remove_save_link(inode, truncate); | 340 | retval = remove_save_link(inode, truncate); |
332 | } else { | 341 | } else { |
333 | REISERFS_I(inode)->i_flags |= i_link_saved_unlink_mask; | 342 | REISERFS_I(inode)->i_flags |= i_link_saved_unlink_mask; |
@@ -373,10 +382,12 @@ static int finish_unfinished(struct super_block *s) | |||
373 | return retval; | 382 | return retval; |
374 | } | 383 | } |
375 | 384 | ||
376 | /* to protect file being unlinked from getting lost we "safe" link files | 385 | /* |
377 | being unlinked. This link will be deleted in the same transaction with last | 386 | * to protect file being unlinked from getting lost we "safe" link files |
378 | item of file. mounting the filesystem we scan all these links and remove | 387 | * being unlinked. This link will be deleted in the same transaction with last |
379 | files which almost got lost */ | 388 | * item of file. mounting the filesystem we scan all these links and remove |
389 | * files which almost got lost | ||
390 | */ | ||
380 | void add_save_link(struct reiserfs_transaction_handle *th, | 391 | void add_save_link(struct reiserfs_transaction_handle *th, |
381 | struct inode *inode, int truncate) | 392 | struct inode *inode, int truncate) |
382 | { | 393 | { |
@@ -530,7 +541,10 @@ static void reiserfs_put_super(struct super_block *s) | |||
530 | 541 | ||
531 | reiserfs_write_lock(s); | 542 | reiserfs_write_lock(s); |
532 | 543 | ||
533 | /* change file system state to current state if it was mounted with read-write permissions */ | 544 | /* |
545 | * change file system state to current state if it was mounted | ||
546 | * with read-write permissions | ||
547 | */ | ||
534 | if (!(s->s_flags & MS_RDONLY)) { | 548 | if (!(s->s_flags & MS_RDONLY)) { |
535 | if (!journal_begin(&th, s, 10)) { | 549 | if (!journal_begin(&th, s, 10)) { |
536 | reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s), | 550 | reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s), |
@@ -541,8 +555,9 @@ static void reiserfs_put_super(struct super_block *s) | |||
541 | } | 555 | } |
542 | } | 556 | } |
543 | 557 | ||
544 | /* note, journal_release checks for readonly mount, and can decide not | 558 | /* |
545 | ** to do a journal_end | 559 | * note, journal_release checks for readonly mount, and can |
560 | * decide not to do a journal_end | ||
546 | */ | 561 | */ |
547 | journal_release(&th, s); | 562 | journal_release(&th, s); |
548 | 563 | ||
@@ -635,8 +650,9 @@ static void reiserfs_dirty_inode(struct inode *inode, int flags) | |||
635 | } | 650 | } |
636 | reiserfs_write_lock(inode->i_sb); | 651 | reiserfs_write_lock(inode->i_sb); |
637 | 652 | ||
638 | /* this is really only used for atime updates, so they don't have | 653 | /* |
639 | ** to be included in O_SYNC or fsync | 654 | * this is really only used for atime updates, so they don't have |
655 | * to be included in O_SYNC or fsync | ||
640 | */ | 656 | */ |
641 | err = journal_begin(&th, inode->i_sb, 1); | 657 | err = journal_begin(&th, inode->i_sb, 1); |
642 | if (err) | 658 | if (err) |
@@ -789,31 +805,53 @@ static const struct export_operations reiserfs_export_ops = { | |||
789 | .get_parent = reiserfs_get_parent, | 805 | .get_parent = reiserfs_get_parent, |
790 | }; | 806 | }; |
791 | 807 | ||
792 | /* this struct is used in reiserfs_getopt () for containing the value for those | 808 | /* |
793 | mount options that have values rather than being toggles. */ | 809 | * this struct is used in reiserfs_getopt () for containing the value for |
810 | * those mount options that have values rather than being toggles. | ||
811 | */ | ||
794 | typedef struct { | 812 | typedef struct { |
795 | char *value; | 813 | char *value; |
796 | int setmask; /* bitmask which is to set on mount_options bitmask when this | 814 | /* |
797 | value is found, 0 is no bits are to be changed. */ | 815 | * bitmask which is to set on mount_options bitmask |
798 | int clrmask; /* bitmask which is to clear on mount_options bitmask when this | 816 | * when this value is found, 0 is no bits are to be changed. |
799 | value is found, 0 is no bits are to be changed. This is | 817 | */ |
800 | applied BEFORE setmask */ | 818 | int setmask; |
819 | /* | ||
820 | * bitmask which is to clear on mount_options bitmask | ||
821 | * when this value is found, 0 is no bits are to be changed. | ||
822 | * This is applied BEFORE setmask | ||
823 | */ | ||
824 | int clrmask; | ||
801 | } arg_desc_t; | 825 | } arg_desc_t; |
802 | 826 | ||
803 | /* Set this bit in arg_required to allow empty arguments */ | 827 | /* Set this bit in arg_required to allow empty arguments */ |
804 | #define REISERFS_OPT_ALLOWEMPTY 31 | 828 | #define REISERFS_OPT_ALLOWEMPTY 31 |
805 | 829 | ||
806 | /* this struct is used in reiserfs_getopt() for describing the set of reiserfs | 830 | /* |
807 | mount options */ | 831 | * this struct is used in reiserfs_getopt() for describing the |
832 | * set of reiserfs mount options | ||
833 | */ | ||
808 | typedef struct { | 834 | typedef struct { |
809 | char *option_name; | 835 | char *option_name; |
810 | int arg_required; /* 0 if argument is not required, not 0 otherwise */ | 836 | |
811 | const arg_desc_t *values; /* list of values accepted by an option */ | 837 | /* 0 if argument is not required, not 0 otherwise */ |
812 | int setmask; /* bitmask which is to set on mount_options bitmask when this | 838 | int arg_required; |
813 | value is found, 0 is no bits are to be changed. */ | 839 | |
814 | int clrmask; /* bitmask which is to clear on mount_options bitmask when this | 840 | /* list of values accepted by an option */ |
815 | value is found, 0 is no bits are to be changed. This is | 841 | const arg_desc_t *values; |
816 | applied BEFORE setmask */ | 842 | |
843 | /* | ||
844 | * bitmask which is to set on mount_options bitmask | ||
845 | * when this value is found, 0 is no bits are to be changed. | ||
846 | */ | ||
847 | int setmask; | ||
848 | |||
849 | /* | ||
850 | * bitmask which is to clear on mount_options bitmask | ||
851 | * when this value is found, 0 is no bits are to be changed. | ||
852 | * This is applied BEFORE setmask | ||
853 | */ | ||
854 | int clrmask; | ||
817 | } opt_desc_t; | 855 | } opt_desc_t; |
818 | 856 | ||
819 | /* possible values for -o data= */ | 857 | /* possible values for -o data= */ |
@@ -834,8 +872,10 @@ static const arg_desc_t barrier_mode[] = { | |||
834 | {.value = NULL} | 872 | {.value = NULL} |
835 | }; | 873 | }; |
836 | 874 | ||
837 | /* possible values for "-o block-allocator=" and bits which are to be set in | 875 | /* |
838 | s_mount_opt of reiserfs specific part of in-core super block */ | 876 | * possible values for "-o block-allocator=" and bits which are to be set in |
877 | * s_mount_opt of reiserfs specific part of in-core super block | ||
878 | */ | ||
839 | static const arg_desc_t balloc[] = { | 879 | static const arg_desc_t balloc[] = { |
840 | {"noborder", 1 << REISERFS_NO_BORDER, 0}, | 880 | {"noborder", 1 << REISERFS_NO_BORDER, 0}, |
841 | {"border", 0, 1 << REISERFS_NO_BORDER}, | 881 | {"border", 0, 1 << REISERFS_NO_BORDER}, |
@@ -865,21 +905,25 @@ static const arg_desc_t error_actions[] = { | |||
865 | {NULL, 0, 0}, | 905 | {NULL, 0, 0}, |
866 | }; | 906 | }; |
867 | 907 | ||
868 | /* proceed only one option from a list *cur - string containing of mount options | 908 | /* |
869 | opts - array of options which are accepted | 909 | * proceed only one option from a list *cur - string containing of mount |
870 | opt_arg - if option is found and requires an argument and if it is specifed | 910 | * options |
871 | in the input - pointer to the argument is stored here | 911 | * opts - array of options which are accepted |
872 | bit_flags - if option requires to set a certain bit - it is set here | 912 | * opt_arg - if option is found and requires an argument and if it is specifed |
873 | return -1 if unknown option is found, opt->arg_required otherwise */ | 913 | * in the input - pointer to the argument is stored here |
914 | * bit_flags - if option requires to set a certain bit - it is set here | ||
915 | * return -1 if unknown option is found, opt->arg_required otherwise | ||
916 | */ | ||
874 | static int reiserfs_getopt(struct super_block *s, char **cur, opt_desc_t * opts, | 917 | static int reiserfs_getopt(struct super_block *s, char **cur, opt_desc_t * opts, |
875 | char **opt_arg, unsigned long *bit_flags) | 918 | char **opt_arg, unsigned long *bit_flags) |
876 | { | 919 | { |
877 | char *p; | 920 | char *p; |
878 | /* foo=bar, | 921 | /* |
879 | ^ ^ ^ | 922 | * foo=bar, |
880 | | | +-- option_end | 923 | * ^ ^ ^ |
881 | | +-- arg_start | 924 | * | | +-- option_end |
882 | +-- option_start | 925 | * | +-- arg_start |
926 | * +-- option_start | ||
883 | */ | 927 | */ |
884 | const opt_desc_t *opt; | 928 | const opt_desc_t *opt; |
885 | const arg_desc_t *arg; | 929 | const arg_desc_t *arg; |
@@ -894,9 +938,12 @@ static int reiserfs_getopt(struct super_block *s, char **cur, opt_desc_t * opts, | |||
894 | } | 938 | } |
895 | 939 | ||
896 | if (!strncmp(p, "alloc=", 6)) { | 940 | if (!strncmp(p, "alloc=", 6)) { |
897 | /* Ugly special case, probably we should redo options parser so that | 941 | /* |
898 | it can understand several arguments for some options, also so that | 942 | * Ugly special case, probably we should redo options |
899 | it can fill several bitfields with option values. */ | 943 | * parser so that it can understand several arguments for |
944 | * some options, also so that it can fill several bitfields | ||
945 | * with option values. | ||
946 | */ | ||
900 | if (reiserfs_parse_alloc_options(s, p + 6)) { | 947 | if (reiserfs_parse_alloc_options(s, p + 6)) { |
901 | return -1; | 948 | return -1; |
902 | } else { | 949 | } else { |
@@ -959,7 +1006,10 @@ static int reiserfs_getopt(struct super_block *s, char **cur, opt_desc_t * opts, | |||
959 | return -1; | 1006 | return -1; |
960 | } | 1007 | } |
961 | 1008 | ||
962 | /* move to the argument, or to next option if argument is not required */ | 1009 | /* |
1010 | * move to the argument, or to next option if argument is not | ||
1011 | * required | ||
1012 | */ | ||
963 | p++; | 1013 | p++; |
964 | 1014 | ||
965 | if (opt->arg_required | 1015 | if (opt->arg_required |
@@ -996,12 +1046,20 @@ static int reiserfs_getopt(struct super_block *s, char **cur, opt_desc_t * opts, | |||
996 | } | 1046 | } |
997 | 1047 | ||
998 | /* returns 0 if something is wrong in option string, 1 - otherwise */ | 1048 | /* returns 0 if something is wrong in option string, 1 - otherwise */ |
999 | static int reiserfs_parse_options(struct super_block *s, char *options, /* string given via mount's -o */ | 1049 | static int reiserfs_parse_options(struct super_block *s, |
1050 | |||
1051 | /* string given via mount's -o */ | ||
1052 | char *options, | ||
1053 | |||
1054 | /* | ||
1055 | * after the parsing phase, contains the | ||
1056 | * collection of bitflags defining what | ||
1057 | * mount options were selected. | ||
1058 | */ | ||
1000 | unsigned long *mount_options, | 1059 | unsigned long *mount_options, |
1001 | /* after the parsing phase, contains the | 1060 | |
1002 | collection of bitflags defining what | 1061 | /* strtol-ed from NNN of resize=NNN */ |
1003 | mount options were selected. */ | 1062 | unsigned long *blocks, |
1004 | unsigned long *blocks, /* strtol-ed from NNN of resize=NNN */ | ||
1005 | char **jdev_name, | 1063 | char **jdev_name, |
1006 | unsigned int *commit_max_age, | 1064 | unsigned int *commit_max_age, |
1007 | char **qf_names, | 1065 | char **qf_names, |
@@ -1011,7 +1069,10 @@ static int reiserfs_parse_options(struct super_block *s, char *options, /* strin | |||
1011 | char *arg = NULL; | 1069 | char *arg = NULL; |
1012 | char *pos; | 1070 | char *pos; |
1013 | opt_desc_t opts[] = { | 1071 | opt_desc_t opts[] = { |
1014 | /* Compatibility stuff, so that -o notail for old setups still work */ | 1072 | /* |
1073 | * Compatibility stuff, so that -o notail for old | ||
1074 | * setups still work | ||
1075 | */ | ||
1015 | {"tails",.arg_required = 't',.values = tails}, | 1076 | {"tails",.arg_required = 't',.values = tails}, |
1016 | {"notail",.clrmask = | 1077 | {"notail",.clrmask = |
1017 | (1 << REISERFS_LARGETAIL) | (1 << REISERFS_SMALLTAIL)}, | 1078 | (1 << REISERFS_LARGETAIL) | (1 << REISERFS_SMALLTAIL)}, |
@@ -1056,8 +1117,10 @@ static int reiserfs_parse_options(struct super_block *s, char *options, /* strin | |||
1056 | 1117 | ||
1057 | *blocks = 0; | 1118 | *blocks = 0; |
1058 | if (!options || !*options) | 1119 | if (!options || !*options) |
1059 | /* use default configuration: create tails, journaling on, no | 1120 | /* |
1060 | conversion to newest format */ | 1121 | * use default configuration: create tails, journaling on, no |
1122 | * conversion to newest format | ||
1123 | */ | ||
1061 | return 1; | 1124 | return 1; |
1062 | 1125 | ||
1063 | for (pos = options; pos;) { | 1126 | for (pos = options; pos;) { |
@@ -1110,7 +1173,8 @@ static int reiserfs_parse_options(struct super_block *s, char *options, /* strin | |||
1110 | 1173 | ||
1111 | if (c == 'j') { | 1174 | if (c == 'j') { |
1112 | if (arg && *arg && jdev_name) { | 1175 | if (arg && *arg && jdev_name) { |
1113 | if (*jdev_name) { //Hm, already assigned? | 1176 | /* Hm, already assigned? */ |
1177 | if (*jdev_name) { | ||
1114 | reiserfs_warning(s, "super-6510", | 1178 | reiserfs_warning(s, "super-6510", |
1115 | "journal device was " | 1179 | "journal device was " |
1116 | "already specified to " | 1180 | "already specified to " |
@@ -1363,8 +1427,10 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg) | |||
1363 | safe_mask |= 1 << REISERFS_USRQUOTA; | 1427 | safe_mask |= 1 << REISERFS_USRQUOTA; |
1364 | safe_mask |= 1 << REISERFS_GRPQUOTA; | 1428 | safe_mask |= 1 << REISERFS_GRPQUOTA; |
1365 | 1429 | ||
1366 | /* Update the bitmask, taking care to keep | 1430 | /* |
1367 | * the bits we're not allowed to change here */ | 1431 | * Update the bitmask, taking care to keep |
1432 | * the bits we're not allowed to change here | ||
1433 | */ | ||
1368 | REISERFS_SB(s)->s_mount_opt = | 1434 | REISERFS_SB(s)->s_mount_opt = |
1369 | (REISERFS_SB(s)-> | 1435 | (REISERFS_SB(s)-> |
1370 | s_mount_opt & ~safe_mask) | (mount_options & safe_mask); | 1436 | s_mount_opt & ~safe_mask) | (mount_options & safe_mask); |
@@ -1428,7 +1494,9 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg) | |||
1428 | handle_data_mode(s, mount_options); | 1494 | handle_data_mode(s, mount_options); |
1429 | handle_barrier_mode(s, mount_options); | 1495 | handle_barrier_mode(s, mount_options); |
1430 | REISERFS_SB(s)->s_mount_state = sb_umount_state(rs); | 1496 | REISERFS_SB(s)->s_mount_state = sb_umount_state(rs); |
1431 | s->s_flags &= ~MS_RDONLY; /* now it is safe to call journal_begin */ | 1497 | |
1498 | /* now it is safe to call journal_begin */ | ||
1499 | s->s_flags &= ~MS_RDONLY; | ||
1432 | err = journal_begin(&th, s, 10); | 1500 | err = journal_begin(&th, s, 10); |
1433 | if (err) | 1501 | if (err) |
1434 | goto out_err_unlock; | 1502 | goto out_err_unlock; |
@@ -1490,9 +1558,9 @@ static int read_super_block(struct super_block *s, int offset) | |||
1490 | brelse(bh); | 1558 | brelse(bh); |
1491 | return 1; | 1559 | return 1; |
1492 | } | 1560 | } |
1493 | // | 1561 | /* |
1494 | // ok, reiserfs signature (old or new) found in at the given offset | 1562 | * ok, reiserfs signature (old or new) found in at the given offset |
1495 | // | 1563 | */ |
1496 | fs_blocksize = sb_blocksize(rs); | 1564 | fs_blocksize = sb_blocksize(rs); |
1497 | brelse(bh); | 1565 | brelse(bh); |
1498 | sb_set_blocksize(s, fs_blocksize); | 1566 | sb_set_blocksize(s, fs_blocksize); |
@@ -1530,9 +1598,11 @@ static int read_super_block(struct super_block *s, int offset) | |||
1530 | SB_BUFFER_WITH_SB(s) = bh; | 1598 | SB_BUFFER_WITH_SB(s) = bh; |
1531 | SB_DISK_SUPER_BLOCK(s) = rs; | 1599 | SB_DISK_SUPER_BLOCK(s) = rs; |
1532 | 1600 | ||
1601 | /* | ||
1602 | * magic is of non-standard journal filesystem, look at s_version to | ||
1603 | * find which format is in use | ||
1604 | */ | ||
1533 | if (is_reiserfs_jr(rs)) { | 1605 | if (is_reiserfs_jr(rs)) { |
1534 | /* magic is of non-standard journal filesystem, look at s_version to | ||
1535 | find which format is in use */ | ||
1536 | if (sb_version(rs) == REISERFS_VERSION_2) | 1606 | if (sb_version(rs) == REISERFS_VERSION_2) |
1537 | reiserfs_info(s, "found reiserfs format \"3.6\"" | 1607 | reiserfs_info(s, "found reiserfs format \"3.6\"" |
1538 | " with non-standard journal\n"); | 1608 | " with non-standard journal\n"); |
@@ -1546,8 +1616,10 @@ static int read_super_block(struct super_block *s, int offset) | |||
1546 | return 1; | 1616 | return 1; |
1547 | } | 1617 | } |
1548 | } else | 1618 | } else |
1549 | /* s_version of standard format may contain incorrect information, | 1619 | /* |
1550 | so we just look at the magic string */ | 1620 | * s_version of standard format may contain incorrect |
1621 | * information, so we just look at the magic string | ||
1622 | */ | ||
1551 | reiserfs_info(s, | 1623 | reiserfs_info(s, |
1552 | "found reiserfs format \"%s\" with standard journal\n", | 1624 | "found reiserfs format \"%s\" with standard journal\n", |
1553 | is_reiserfs_3_5(rs) ? "3.5" : "3.6"); | 1625 | is_reiserfs_3_5(rs) ? "3.5" : "3.6"); |
@@ -1559,8 +1631,9 @@ static int read_super_block(struct super_block *s, int offset) | |||
1559 | s->dq_op = &reiserfs_quota_operations; | 1631 | s->dq_op = &reiserfs_quota_operations; |
1560 | #endif | 1632 | #endif |
1561 | 1633 | ||
1562 | /* new format is limited by the 32 bit wide i_blocks field, want to | 1634 | /* |
1563 | ** be one full block below that. | 1635 | * new format is limited by the 32 bit wide i_blocks field, want to |
1636 | * be one full block below that. | ||
1564 | */ | 1637 | */ |
1565 | s->s_maxbytes = (512LL << 32) - s->s_blocksize; | 1638 | s->s_maxbytes = (512LL << 32) - s->s_blocksize; |
1566 | return 0; | 1639 | return 0; |
@@ -1579,14 +1652,15 @@ static int reread_meta_blocks(struct super_block *s) | |||
1579 | return 0; | 1652 | return 0; |
1580 | } | 1653 | } |
1581 | 1654 | ||
1582 | ///////////////////////////////////////////////////// | 1655 | /* hash detection stuff */ |
1583 | // hash detection stuff | ||
1584 | 1656 | ||
1585 | // if root directory is empty - we set default - Yura's - hash and | 1657 | /* |
1586 | // warn about it | 1658 | * if root directory is empty - we set default - Yura's - hash and |
1587 | // FIXME: we look for only one name in a directory. If tea and yura | 1659 | * warn about it |
1588 | // bith have the same value - we ask user to send report to the | 1660 | * FIXME: we look for only one name in a directory. If tea and yura |
1589 | // mailing list | 1661 | * both have the same value - we ask user to send report to the |
1662 | * mailing list | ||
1663 | */ | ||
1590 | static __u32 find_hash_out(struct super_block *s) | 1664 | static __u32 find_hash_out(struct super_block *s) |
1591 | { | 1665 | { |
1592 | int retval; | 1666 | int retval; |
@@ -1598,7 +1672,7 @@ static __u32 find_hash_out(struct super_block *s) | |||
1598 | 1672 | ||
1599 | inode = s->s_root->d_inode; | 1673 | inode = s->s_root->d_inode; |
1600 | 1674 | ||
1601 | do { // Some serious "goto"-hater was there ;) | 1675 | do { /* Some serious "goto"-hater was there ;) */ |
1602 | u32 teahash, r5hash, yurahash; | 1676 | u32 teahash, r5hash, yurahash; |
1603 | 1677 | ||
1604 | make_cpu_key(&key, inode, ~0, TYPE_DIRENTRY, 3); | 1678 | make_cpu_key(&key, inode, ~0, TYPE_DIRENTRY, 3); |
@@ -1663,23 +1737,25 @@ static __u32 find_hash_out(struct super_block *s) | |||
1663 | return hash; | 1737 | return hash; |
1664 | } | 1738 | } |
1665 | 1739 | ||
1666 | // finds out which hash names are sorted with | 1740 | /* finds out which hash names are sorted with */ |
1667 | static int what_hash(struct super_block *s) | 1741 | static int what_hash(struct super_block *s) |
1668 | { | 1742 | { |
1669 | __u32 code; | 1743 | __u32 code; |
1670 | 1744 | ||
1671 | code = sb_hash_function_code(SB_DISK_SUPER_BLOCK(s)); | 1745 | code = sb_hash_function_code(SB_DISK_SUPER_BLOCK(s)); |
1672 | 1746 | ||
1673 | /* reiserfs_hash_detect() == true if any of the hash mount options | 1747 | /* |
1674 | ** were used. We must check them to make sure the user isn't | 1748 | * reiserfs_hash_detect() == true if any of the hash mount options |
1675 | ** using a bad hash value | 1749 | * were used. We must check them to make sure the user isn't |
1750 | * using a bad hash value | ||
1676 | */ | 1751 | */ |
1677 | if (code == UNSET_HASH || reiserfs_hash_detect(s)) | 1752 | if (code == UNSET_HASH || reiserfs_hash_detect(s)) |
1678 | code = find_hash_out(s); | 1753 | code = find_hash_out(s); |
1679 | 1754 | ||
1680 | if (code != UNSET_HASH && reiserfs_hash_detect(s)) { | 1755 | if (code != UNSET_HASH && reiserfs_hash_detect(s)) { |
1681 | /* detection has found the hash, and we must check against the | 1756 | /* |
1682 | ** mount options | 1757 | * detection has found the hash, and we must check against the |
1758 | * mount options | ||
1683 | */ | 1759 | */ |
1684 | if (reiserfs_rupasov_hash(s) && code != YURA_HASH) { | 1760 | if (reiserfs_rupasov_hash(s) && code != YURA_HASH) { |
1685 | reiserfs_warning(s, "reiserfs-2507", | 1761 | reiserfs_warning(s, "reiserfs-2507", |
@@ -1701,7 +1777,10 @@ static int what_hash(struct super_block *s) | |||
1701 | code = UNSET_HASH; | 1777 | code = UNSET_HASH; |
1702 | } | 1778 | } |
1703 | } else { | 1779 | } else { |
1704 | /* find_hash_out was not called or could not determine the hash */ | 1780 | /* |
1781 | * find_hash_out was not called or | ||
1782 | * could not determine the hash | ||
1783 | */ | ||
1705 | if (reiserfs_rupasov_hash(s)) { | 1784 | if (reiserfs_rupasov_hash(s)) { |
1706 | code = YURA_HASH; | 1785 | code = YURA_HASH; |
1707 | } else if (reiserfs_tea_hash(s)) { | 1786 | } else if (reiserfs_tea_hash(s)) { |
@@ -1711,8 +1790,9 @@ static int what_hash(struct super_block *s) | |||
1711 | } | 1790 | } |
1712 | } | 1791 | } |
1713 | 1792 | ||
1714 | /* if we are mounted RW, and we have a new valid hash code, update | 1793 | /* |
1715 | ** the super | 1794 | * if we are mounted RW, and we have a new valid hash code, update |
1795 | * the super | ||
1716 | */ | 1796 | */ |
1717 | if (code != UNSET_HASH && | 1797 | if (code != UNSET_HASH && |
1718 | !(s->s_flags & MS_RDONLY) && | 1798 | !(s->s_flags & MS_RDONLY) && |
@@ -1722,7 +1802,7 @@ static int what_hash(struct super_block *s) | |||
1722 | return code; | 1802 | return code; |
1723 | } | 1803 | } |
1724 | 1804 | ||
1725 | // return pointer to appropriate function | 1805 | /* return pointer to appropriate function */ |
1726 | static hashf_t hash_function(struct super_block *s) | 1806 | static hashf_t hash_function(struct super_block *s) |
1727 | { | 1807 | { |
1728 | switch (what_hash(s)) { | 1808 | switch (what_hash(s)) { |
@@ -1739,7 +1819,7 @@ static hashf_t hash_function(struct super_block *s) | |||
1739 | return NULL; | 1819 | return NULL; |
1740 | } | 1820 | } |
1741 | 1821 | ||
1742 | // this is used to set up correct value for old partitions | 1822 | /* this is used to set up correct value for old partitions */ |
1743 | static int function2code(hashf_t func) | 1823 | static int function2code(hashf_t func) |
1744 | { | 1824 | { |
1745 | if (func == keyed_hash) | 1825 | if (func == keyed_hash) |
@@ -1749,7 +1829,7 @@ static int function2code(hashf_t func) | |||
1749 | if (func == r5_hash) | 1829 | if (func == r5_hash) |
1750 | return R5_HASH; | 1830 | return R5_HASH; |
1751 | 1831 | ||
1752 | BUG(); // should never happen | 1832 | BUG(); /* should never happen */ |
1753 | 1833 | ||
1754 | return 0; | 1834 | return 0; |
1755 | } | 1835 | } |
@@ -1784,8 +1864,7 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent) | |||
1784 | sbi->s_mount_opt |= (1 << REISERFS_SMALLTAIL); | 1864 | sbi->s_mount_opt |= (1 << REISERFS_SMALLTAIL); |
1785 | sbi->s_mount_opt |= (1 << REISERFS_ERROR_RO); | 1865 | sbi->s_mount_opt |= (1 << REISERFS_ERROR_RO); |
1786 | sbi->s_mount_opt |= (1 << REISERFS_BARRIER_FLUSH); | 1866 | sbi->s_mount_opt |= (1 << REISERFS_BARRIER_FLUSH); |
1787 | /* no preallocation minimum, be smart in | 1867 | /* no preallocation minimum, be smart in reiserfs_file_write instead */ |
1788 | reiserfs_file_write instead */ | ||
1789 | sbi->s_alloc_options.preallocmin = 0; | 1868 | sbi->s_alloc_options.preallocmin = 0; |
1790 | /* Preallocate by 16 blocks (17-1) at once */ | 1869 | /* Preallocate by 16 blocks (17-1) at once */ |
1791 | sbi->s_alloc_options.preallocsize = 17; | 1870 | sbi->s_alloc_options.preallocsize = 17; |
@@ -1828,10 +1907,17 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent) | |||
1828 | goto error_unlocked; | 1907 | goto error_unlocked; |
1829 | } | 1908 | } |
1830 | 1909 | ||
1831 | /* try old format (undistributed bitmap, super block in 8-th 1k block of a device) */ | 1910 | /* |
1911 | * try old format (undistributed bitmap, super block in 8-th 1k | ||
1912 | * block of a device) | ||
1913 | */ | ||
1832 | if (!read_super_block(s, REISERFS_OLD_DISK_OFFSET_IN_BYTES)) | 1914 | if (!read_super_block(s, REISERFS_OLD_DISK_OFFSET_IN_BYTES)) |
1833 | old_format = 1; | 1915 | old_format = 1; |
1834 | /* try new format (64-th 1k block), which can contain reiserfs super block */ | 1916 | |
1917 | /* | ||
1918 | * try new format (64-th 1k block), which can contain reiserfs | ||
1919 | * super block | ||
1920 | */ | ||
1835 | else if (read_super_block(s, REISERFS_DISK_OFFSET_IN_BYTES)) { | 1921 | else if (read_super_block(s, REISERFS_DISK_OFFSET_IN_BYTES)) { |
1836 | SWARN(silent, s, "sh-2021", "can not find reiserfs on %s", | 1922 | SWARN(silent, s, "sh-2021", "can not find reiserfs on %s", |
1837 | s->s_id); | 1923 | s->s_id); |
@@ -1839,9 +1925,11 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent) | |||
1839 | } | 1925 | } |
1840 | 1926 | ||
1841 | rs = SB_DISK_SUPER_BLOCK(s); | 1927 | rs = SB_DISK_SUPER_BLOCK(s); |
1842 | /* Let's do basic sanity check to verify that underlying device is not | 1928 | /* |
1843 | smaller than the filesystem. If the check fails then abort and scream, | 1929 | * Let's do basic sanity check to verify that underlying device is not |
1844 | because bad stuff will happen otherwise. */ | 1930 | * smaller than the filesystem. If the check fails then abort and |
1931 | * scream, because bad stuff will happen otherwise. | ||
1932 | */ | ||
1845 | if (s->s_bdev && s->s_bdev->bd_inode | 1933 | if (s->s_bdev && s->s_bdev->bd_inode |
1846 | && i_size_read(s->s_bdev->bd_inode) < | 1934 | && i_size_read(s->s_bdev->bd_inode) < |
1847 | sb_block_count(rs) * sb_blocksize(rs)) { | 1935 | sb_block_count(rs) * sb_blocksize(rs)) { |
@@ -1885,15 +1973,16 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent) | |||
1885 | printk("reiserfs: using flush barriers\n"); | 1973 | printk("reiserfs: using flush barriers\n"); |
1886 | } | 1974 | } |
1887 | 1975 | ||
1888 | // set_device_ro(s->s_dev, 1) ; | ||
1889 | if (journal_init(s, jdev_name, old_format, commit_max_age)) { | 1976 | if (journal_init(s, jdev_name, old_format, commit_max_age)) { |
1890 | SWARN(silent, s, "sh-2022", | 1977 | SWARN(silent, s, "sh-2022", |
1891 | "unable to initialize journal space"); | 1978 | "unable to initialize journal space"); |
1892 | goto error_unlocked; | 1979 | goto error_unlocked; |
1893 | } else { | 1980 | } else { |
1894 | jinit_done = 1; /* once this is set, journal_release must be called | 1981 | /* |
1895 | ** if we error out of the mount | 1982 | * once this is set, journal_release must be called |
1896 | */ | 1983 | * if we error out of the mount |
1984 | */ | ||
1985 | jinit_done = 1; | ||
1897 | } | 1986 | } |
1898 | 1987 | ||
1899 | if (reread_meta_blocks(s)) { | 1988 | if (reread_meta_blocks(s)) { |
@@ -1938,7 +2027,7 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent) | |||
1938 | s->s_root = d_make_root(root_inode); | 2027 | s->s_root = d_make_root(root_inode); |
1939 | if (!s->s_root) | 2028 | if (!s->s_root) |
1940 | goto error; | 2029 | goto error; |
1941 | // define and initialize hash function | 2030 | /* define and initialize hash function */ |
1942 | sbi->s_hash_function = hash_function(s); | 2031 | sbi->s_hash_function = hash_function(s); |
1943 | if (sbi->s_hash_function == NULL) { | 2032 | if (sbi->s_hash_function == NULL) { |
1944 | dput(s->s_root); | 2033 | dput(s->s_root); |
@@ -1967,10 +2056,12 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent) | |||
1967 | set_sb_umount_state(rs, REISERFS_ERROR_FS); | 2056 | set_sb_umount_state(rs, REISERFS_ERROR_FS); |
1968 | set_sb_fs_state(rs, 0); | 2057 | set_sb_fs_state(rs, 0); |
1969 | 2058 | ||
1970 | /* Clear out s_bmap_nr if it would wrap. We can handle this | 2059 | /* |
2060 | * Clear out s_bmap_nr if it would wrap. We can handle this | ||
1971 | * case, but older revisions can't. This will cause the | 2061 | * case, but older revisions can't. This will cause the |
1972 | * file system to fail mount on those older implementations, | 2062 | * file system to fail mount on those older implementations, |
1973 | * avoiding corruption. -jeffm */ | 2063 | * avoiding corruption. -jeffm |
2064 | */ | ||
1974 | if (bmap_would_wrap(reiserfs_bmap_count(s)) && | 2065 | if (bmap_would_wrap(reiserfs_bmap_count(s)) && |
1975 | sb_bmap_nr(rs) != 0) { | 2066 | sb_bmap_nr(rs) != 0) { |
1976 | reiserfs_warning(s, "super-2030", "This file system " | 2067 | reiserfs_warning(s, "super-2030", "This file system " |
@@ -1983,8 +2074,10 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent) | |||
1983 | } | 2074 | } |
1984 | 2075 | ||
1985 | if (old_format_only(s)) { | 2076 | if (old_format_only(s)) { |
1986 | /* filesystem of format 3.5 either with standard or non-standard | 2077 | /* |
1987 | journal */ | 2078 | * filesystem of format 3.5 either with standard |
2079 | * or non-standard journal | ||
2080 | */ | ||
1988 | if (convert_reiserfs(s)) { | 2081 | if (convert_reiserfs(s)) { |
1989 | /* and -o conv is given */ | 2082 | /* and -o conv is given */ |
1990 | if (!silent) | 2083 | if (!silent) |
@@ -1992,8 +2085,11 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent) | |||
1992 | "converting 3.5 filesystem to the 3.6 format"); | 2085 | "converting 3.5 filesystem to the 3.6 format"); |
1993 | 2086 | ||
1994 | if (is_reiserfs_3_5(rs)) | 2087 | if (is_reiserfs_3_5(rs)) |
1995 | /* put magic string of 3.6 format. 2.2 will not be able to | 2088 | /* |
1996 | mount this filesystem anymore */ | 2089 | * put magic string of 3.6 format. |
2090 | * 2.2 will not be able to | ||
2091 | * mount this filesystem anymore | ||
2092 | */ | ||
1997 | memcpy(rs->s_v1.s_magic, | 2093 | memcpy(rs->s_v1.s_magic, |
1998 | reiserfs_3_6_magic_string, | 2094 | reiserfs_3_6_magic_string, |
1999 | sizeof | 2095 | sizeof |
@@ -2027,7 +2123,9 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent) | |||
2027 | } | 2123 | } |
2028 | reiserfs_write_lock(s); | 2124 | reiserfs_write_lock(s); |
2029 | 2125 | ||
2030 | /* look for files which were to be removed in previous session */ | 2126 | /* |
2127 | * look for files which were to be removed in previous session | ||
2128 | */ | ||
2031 | finish_unfinished(s); | 2129 | finish_unfinished(s); |
2032 | } else { | 2130 | } else { |
2033 | if (old_format_only(s) && !silent) { | 2131 | if (old_format_only(s) && !silent) { |
@@ -2043,7 +2141,9 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent) | |||
2043 | } | 2141 | } |
2044 | reiserfs_write_lock(s); | 2142 | reiserfs_write_lock(s); |
2045 | } | 2143 | } |
2046 | // mark hash in super block: it could be unset. overwrite should be ok | 2144 | /* |
2145 | * mark hash in super block: it could be unset. overwrite should be ok | ||
2146 | */ | ||
2047 | set_sb_hash_function_code(rs, function2code(sbi->s_hash_function)); | 2147 | set_sb_hash_function_code(rs, function2code(sbi->s_hash_function)); |
2048 | 2148 | ||
2049 | handle_attrs(s); | 2149 | handle_attrs(s); |
@@ -2247,7 +2347,10 @@ static int reiserfs_quota_on(struct super_block *sb, int type, int format_id, | |||
2247 | goto out; | 2347 | goto out; |
2248 | } | 2348 | } |
2249 | inode = path->dentry->d_inode; | 2349 | inode = path->dentry->d_inode; |
2250 | /* We must not pack tails for quota files on reiserfs for quota IO to work */ | 2350 | /* |
2351 | * We must not pack tails for quota files on reiserfs for quota | ||
2352 | * IO to work | ||
2353 | */ | ||
2251 | if (!(REISERFS_I(inode)->i_flags & i_nopack_mask)) { | 2354 | if (!(REISERFS_I(inode)->i_flags & i_nopack_mask)) { |
2252 | err = reiserfs_unpack(inode, NULL); | 2355 | err = reiserfs_unpack(inode, NULL); |
2253 | if (err) { | 2356 | if (err) { |
@@ -2288,10 +2391,12 @@ out: | |||
2288 | return err; | 2391 | return err; |
2289 | } | 2392 | } |
2290 | 2393 | ||
2291 | /* Read data from quotafile - avoid pagecache and such because we cannot afford | 2394 | /* |
2395 | * Read data from quotafile - avoid pagecache and such because we cannot afford | ||
2292 | * acquiring the locks... As quota files are never truncated and quota code | 2396 | * acquiring the locks... As quota files are never truncated and quota code |
2293 | * itself serializes the operations (and no one else should touch the files) | 2397 | * itself serializes the operations (and no one else should touch the files) |
2294 | * we don't have to be afraid of races */ | 2398 | * we don't have to be afraid of races |
2399 | */ | ||
2295 | static ssize_t reiserfs_quota_read(struct super_block *sb, int type, char *data, | 2400 | static ssize_t reiserfs_quota_read(struct super_block *sb, int type, char *data, |
2296 | size_t len, loff_t off) | 2401 | size_t len, loff_t off) |
2297 | { | 2402 | { |
@@ -2312,7 +2417,10 @@ static ssize_t reiserfs_quota_read(struct super_block *sb, int type, char *data, | |||
2312 | sb->s_blocksize - offset < | 2417 | sb->s_blocksize - offset < |
2313 | toread ? sb->s_blocksize - offset : toread; | 2418 | toread ? sb->s_blocksize - offset : toread; |
2314 | tmp_bh.b_state = 0; | 2419 | tmp_bh.b_state = 0; |
2315 | /* Quota files are without tails so we can safely use this function */ | 2420 | /* |
2421 | * Quota files are without tails so we can safely | ||
2422 | * use this function | ||
2423 | */ | ||
2316 | reiserfs_write_lock(sb); | 2424 | reiserfs_write_lock(sb); |
2317 | err = reiserfs_get_block(inode, blk, &tmp_bh, 0); | 2425 | err = reiserfs_get_block(inode, blk, &tmp_bh, 0); |
2318 | reiserfs_write_unlock(sb); | 2426 | reiserfs_write_unlock(sb); |
@@ -2335,8 +2443,10 @@ static ssize_t reiserfs_quota_read(struct super_block *sb, int type, char *data, | |||
2335 | return len; | 2443 | return len; |
2336 | } | 2444 | } |
2337 | 2445 | ||
2338 | /* Write to quotafile (we know the transaction is already started and has | 2446 | /* |
2339 | * enough credits) */ | 2447 | * Write to quotafile (we know the transaction is already started and has |
2448 | * enough credits) | ||
2449 | */ | ||
2340 | static ssize_t reiserfs_quota_write(struct super_block *sb, int type, | 2450 | static ssize_t reiserfs_quota_write(struct super_block *sb, int type, |
2341 | const char *data, size_t len, loff_t off) | 2451 | const char *data, size_t len, loff_t off) |
2342 | { | 2452 | { |
diff --git a/fs/reiserfs/tail_conversion.c b/fs/reiserfs/tail_conversion.c index fc1981d858dc..f41e19b4bb42 100644 --- a/fs/reiserfs/tail_conversion.c +++ b/fs/reiserfs/tail_conversion.c | |||
@@ -1,5 +1,6 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright 1999 Hans Reiser, see reiserfs/README for licensing and copyright details | 2 | * Copyright 1999 Hans Reiser, see reiserfs/README for licensing and copyright |
3 | * details | ||
3 | */ | 4 | */ |
4 | 5 | ||
5 | #include <linux/time.h> | 6 | #include <linux/time.h> |
@@ -7,13 +8,19 @@ | |||
7 | #include <linux/buffer_head.h> | 8 | #include <linux/buffer_head.h> |
8 | #include "reiserfs.h" | 9 | #include "reiserfs.h" |
9 | 10 | ||
10 | /* access to tail : when one is going to read tail it must make sure, that is not running. | 11 | /* |
11 | direct2indirect and indirect2direct can not run concurrently */ | 12 | * access to tail : when one is going to read tail it must make sure, that is |
13 | * not running. direct2indirect and indirect2direct can not run concurrently | ||
14 | */ | ||
12 | 15 | ||
13 | /* Converts direct items to an unformatted node. Panics if file has no | 16 | /* |
14 | tail. -ENOSPC if no disk space for conversion */ | 17 | * Converts direct items to an unformatted node. Panics if file has no |
15 | /* path points to first direct item of the file regarless of how many of | 18 | * tail. -ENOSPC if no disk space for conversion |
16 | them are there */ | 19 | */ |
20 | /* | ||
21 | * path points to first direct item of the file regardless of how many of | ||
22 | * them are there | ||
23 | */ | ||
17 | int direct2indirect(struct reiserfs_transaction_handle *th, struct inode *inode, | 24 | int direct2indirect(struct reiserfs_transaction_handle *th, struct inode *inode, |
18 | struct treepath *path, struct buffer_head *unbh, | 25 | struct treepath *path, struct buffer_head *unbh, |
19 | loff_t tail_offset) | 26 | loff_t tail_offset) |
@@ -22,14 +29,20 @@ int direct2indirect(struct reiserfs_transaction_handle *th, struct inode *inode, | |||
22 | struct buffer_head *up_to_date_bh; | 29 | struct buffer_head *up_to_date_bh; |
23 | struct item_head *p_le_ih = tp_item_head(path); | 30 | struct item_head *p_le_ih = tp_item_head(path); |
24 | unsigned long total_tail = 0; | 31 | unsigned long total_tail = 0; |
25 | struct cpu_key end_key; /* Key to search for the last byte of the | 32 | |
26 | converted item. */ | 33 | /* Key to search for the last byte of the converted item. */ |
27 | struct item_head ind_ih; /* new indirect item to be inserted or | 34 | struct cpu_key end_key; |
28 | key of unfm pointer to be pasted */ | 35 | |
29 | int blk_size, retval; /* returned value for reiserfs_insert_item and clones */ | 36 | /* |
30 | unp_t unfm_ptr; /* Handle on an unformatted node | 37 | * new indirect item to be inserted or key |
31 | that will be inserted in the | 38 | * of unfm pointer to be pasted |
32 | tree. */ | 39 | */ |
40 | struct item_head ind_ih; | ||
41 | int blk_size; | ||
42 | /* returned value for reiserfs_insert_item and clones */ | ||
43 | int retval; | ||
44 | /* Handle on an unformatted node that will be inserted in the tree. */ | ||
45 | unp_t unfm_ptr; | ||
33 | 46 | ||
34 | BUG_ON(!th->t_trans_id); | 47 | BUG_ON(!th->t_trans_id); |
35 | 48 | ||
@@ -37,8 +50,10 @@ int direct2indirect(struct reiserfs_transaction_handle *th, struct inode *inode, | |||
37 | 50 | ||
38 | blk_size = sb->s_blocksize; | 51 | blk_size = sb->s_blocksize; |
39 | 52 | ||
40 | /* and key to search for append or insert pointer to the new | 53 | /* |
41 | unformatted node. */ | 54 | * and key to search for append or insert pointer to the new |
55 | * unformatted node. | ||
56 | */ | ||
42 | copy_item_head(&ind_ih, p_le_ih); | 57 | copy_item_head(&ind_ih, p_le_ih); |
43 | set_le_ih_k_offset(&ind_ih, tail_offset); | 58 | set_le_ih_k_offset(&ind_ih, tail_offset); |
44 | set_le_ih_k_type(&ind_ih, TYPE_INDIRECT); | 59 | set_le_ih_k_type(&ind_ih, TYPE_INDIRECT); |
@@ -76,20 +91,26 @@ int direct2indirect(struct reiserfs_transaction_handle *th, struct inode *inode, | |||
76 | if (retval) { | 91 | if (retval) { |
77 | return retval; | 92 | return retval; |
78 | } | 93 | } |
79 | // note: from here there are two keys which have matching first | 94 | /* |
80 | // three key components. They only differ by the fourth one. | 95 | * note: from here there are two keys which have matching first |
96 | * three key components. They only differ by the fourth one. | ||
97 | */ | ||
81 | 98 | ||
82 | /* Set the key to search for the direct items of the file */ | 99 | /* Set the key to search for the direct items of the file */ |
83 | make_cpu_key(&end_key, inode, max_reiserfs_offset(inode), TYPE_DIRECT, | 100 | make_cpu_key(&end_key, inode, max_reiserfs_offset(inode), TYPE_DIRECT, |
84 | 4); | 101 | 4); |
85 | 102 | ||
86 | /* Move bytes from the direct items to the new unformatted node | 103 | /* |
87 | and delete them. */ | 104 | * Move bytes from the direct items to the new unformatted node |
105 | * and delete them. | ||
106 | */ | ||
88 | while (1) { | 107 | while (1) { |
89 | int tail_size; | 108 | int tail_size; |
90 | 109 | ||
91 | /* end_key.k_offset is set so, that we will always have found | 110 | /* |
92 | last item of the file */ | 111 | * end_key.k_offset is set so, that we will always have found |
112 | * last item of the file | ||
113 | */ | ||
93 | if (search_for_position_by_key(sb, &end_key, path) == | 114 | if (search_for_position_by_key(sb, &end_key, path) == |
94 | POSITION_FOUND) | 115 | POSITION_FOUND) |
95 | reiserfs_panic(sb, "PAP-14050", | 116 | reiserfs_panic(sb, "PAP-14050", |
@@ -101,11 +122,12 @@ int direct2indirect(struct reiserfs_transaction_handle *th, struct inode *inode, | |||
101 | tail_size = (le_ih_k_offset(p_le_ih) & (blk_size - 1)) | 122 | tail_size = (le_ih_k_offset(p_le_ih) & (blk_size - 1)) |
102 | + ih_item_len(p_le_ih) - 1; | 123 | + ih_item_len(p_le_ih) - 1; |
103 | 124 | ||
104 | /* we only send the unbh pointer if the buffer is not up to date. | 125 | /* |
105 | ** this avoids overwriting good data from writepage() with old data | 126 | * we only send the unbh pointer if the buffer is not |
106 | ** from the disk or buffer cache | 127 | * up to date. this avoids overwriting good data from |
107 | ** Special case: unbh->b_page will be NULL if we are coming through | 128 | * writepage() with old data from the disk or buffer cache |
108 | ** DIRECT_IO handler here. | 129 | * Special case: unbh->b_page will be NULL if we are coming |
130 | * through DIRECT_IO handler here. | ||
109 | */ | 131 | */ |
110 | if (!unbh->b_page || buffer_uptodate(unbh) | 132 | if (!unbh->b_page || buffer_uptodate(unbh) |
111 | || PageUptodate(unbh->b_page)) { | 133 | || PageUptodate(unbh->b_page)) { |
@@ -117,13 +139,15 @@ int direct2indirect(struct reiserfs_transaction_handle *th, struct inode *inode, | |||
117 | up_to_date_bh); | 139 | up_to_date_bh); |
118 | 140 | ||
119 | total_tail += retval; | 141 | total_tail += retval; |
142 | |||
143 | /* done: file does not have direct items anymore */ | ||
120 | if (tail_size == retval) | 144 | if (tail_size == retval) |
121 | // done: file does not have direct items anymore | ||
122 | break; | 145 | break; |
123 | 146 | ||
124 | } | 147 | } |
125 | /* if we've copied bytes from disk into the page, we need to zero | 148 | /* |
126 | ** out the unused part of the block (it was not up to date before) | 149 | * if we've copied bytes from disk into the page, we need to zero |
150 | * out the unused part of the block (it was not up to date before) | ||
127 | */ | 151 | */ |
128 | if (up_to_date_bh) { | 152 | if (up_to_date_bh) { |
129 | unsigned pgoff = | 153 | unsigned pgoff = |
@@ -146,9 +170,11 @@ void reiserfs_unmap_buffer(struct buffer_head *bh) | |||
146 | BUG(); | 170 | BUG(); |
147 | } | 171 | } |
148 | clear_buffer_dirty(bh); | 172 | clear_buffer_dirty(bh); |
149 | /* Remove the buffer from whatever list it belongs to. We are mostly | 173 | /* |
150 | interested in removing it from per-sb j_dirty_buffers list, to avoid | 174 | * Remove the buffer from whatever list it belongs to. We are mostly |
151 | BUG() on attempt to write not mapped buffer */ | 175 | * interested in removing it from per-sb j_dirty_buffers list, to avoid |
176 | * BUG() on attempt to write not mapped buffer | ||
177 | */ | ||
152 | if ((!list_empty(&bh->b_assoc_buffers) || bh->b_private) && bh->b_page) { | 178 | if ((!list_empty(&bh->b_assoc_buffers) || bh->b_private) && bh->b_page) { |
153 | struct inode *inode = bh->b_page->mapping->host; | 179 | struct inode *inode = bh->b_page->mapping->host; |
154 | struct reiserfs_journal *j = SB_JOURNAL(inode->i_sb); | 180 | struct reiserfs_journal *j = SB_JOURNAL(inode->i_sb); |
@@ -164,12 +190,14 @@ void reiserfs_unmap_buffer(struct buffer_head *bh) | |||
164 | unlock_buffer(bh); | 190 | unlock_buffer(bh); |
165 | } | 191 | } |
166 | 192 | ||
167 | /* this first locks inode (neither reads nor sync are permitted), | 193 | /* |
168 | reads tail through page cache, insert direct item. When direct item | 194 | * this first locks inode (neither reads nor sync are permitted), |
169 | inserted successfully inode is left locked. Return value is always | 195 | * reads tail through page cache, insert direct item. When direct item |
170 | what we expect from it (number of cut bytes). But when tail remains | 196 | * inserted successfully inode is left locked. Return value is always |
171 | in the unformatted node, we set mode to SKIP_BALANCING and unlock | 197 | * what we expect from it (number of cut bytes). But when tail remains |
172 | inode */ | 198 | * in the unformatted node, we set mode to SKIP_BALANCING and unlock |
199 | * inode | ||
200 | */ | ||
173 | int indirect2direct(struct reiserfs_transaction_handle *th, | 201 | int indirect2direct(struct reiserfs_transaction_handle *th, |
174 | struct inode *inode, struct page *page, | 202 | struct inode *inode, struct page *page, |
175 | struct treepath *path, /* path to the indirect item. */ | 203 | struct treepath *path, /* path to the indirect item. */ |
@@ -207,9 +235,11 @@ int indirect2direct(struct reiserfs_transaction_handle *th, | |||
207 | 1) * sb->s_blocksize; | 235 | 1) * sb->s_blocksize; |
208 | pos1 = pos; | 236 | pos1 = pos; |
209 | 237 | ||
210 | // we are protected by i_mutex. The tail can not disapper, not | 238 | /* |
211 | // append can be done either | 239 | * we are protected by i_mutex. The tail can not disapper, not |
212 | // we are in truncate or packing tail in file_release | 240 | * append can be done either |
241 | * we are in truncate or packing tail in file_release | ||
242 | */ | ||
213 | 243 | ||
214 | tail = (char *)kmap(page); /* this can schedule */ | 244 | tail = (char *)kmap(page); /* this can schedule */ |
215 | 245 | ||
@@ -236,9 +266,10 @@ int indirect2direct(struct reiserfs_transaction_handle *th, | |||
236 | pos1 + 1, TYPE_DIRECT, round_tail_len, | 266 | pos1 + 1, TYPE_DIRECT, round_tail_len, |
237 | 0xffff /*ih_free_space */ ); | 267 | 0xffff /*ih_free_space */ ); |
238 | 268 | ||
239 | /* we want a pointer to the first byte of the tail in the page. | 269 | /* |
240 | ** the page was locked and this part of the page was up to date when | 270 | * we want a pointer to the first byte of the tail in the page. |
241 | ** indirect2direct was called, so we know the bytes are still valid | 271 | * the page was locked and this part of the page was up to date when |
272 | * indirect2direct was called, so we know the bytes are still valid | ||
242 | */ | 273 | */ |
243 | tail = tail + (pos & (PAGE_CACHE_SIZE - 1)); | 274 | tail = tail + (pos & (PAGE_CACHE_SIZE - 1)); |
244 | 275 | ||
@@ -250,12 +281,14 @@ int indirect2direct(struct reiserfs_transaction_handle *th, | |||
250 | /* Insert tail as new direct item in the tree */ | 281 | /* Insert tail as new direct item in the tree */ |
251 | if (reiserfs_insert_item(th, path, &key, &s_ih, inode, | 282 | if (reiserfs_insert_item(th, path, &key, &s_ih, inode, |
252 | tail ? tail : NULL) < 0) { | 283 | tail ? tail : NULL) < 0) { |
253 | /* No disk memory. So we can not convert last unformatted node | 284 | /* |
254 | to the direct item. In this case we used to adjust | 285 | * No disk memory. So we can not convert last unformatted node |
255 | indirect items's ih_free_space. Now ih_free_space is not | 286 | * to the direct item. In this case we used to adjust |
256 | used, it would be ideal to write zeros to corresponding | 287 | * indirect items's ih_free_space. Now ih_free_space is not |
257 | unformatted node. For now i_size is considered as guard for | 288 | * used, it would be ideal to write zeros to corresponding |
258 | going out of file size */ | 289 | * unformatted node. For now i_size is considered as guard for |
290 | * going out of file size | ||
291 | */ | ||
259 | kunmap(page); | 292 | kunmap(page); |
260 | return block_size - round_tail_len; | 293 | return block_size - round_tail_len; |
261 | } | 294 | } |
@@ -264,12 +297,16 @@ int indirect2direct(struct reiserfs_transaction_handle *th, | |||
264 | /* make sure to get the i_blocks changes from reiserfs_insert_item */ | 297 | /* make sure to get the i_blocks changes from reiserfs_insert_item */ |
265 | reiserfs_update_sd(th, inode); | 298 | reiserfs_update_sd(th, inode); |
266 | 299 | ||
267 | // note: we have now the same as in above direct2indirect | 300 | /* |
268 | // conversion: there are two keys which have matching first three | 301 | * note: we have now the same as in above direct2indirect |
269 | // key components. They only differ by the fouhth one. | 302 | * conversion: there are two keys which have matching first three |
303 | * key components. They only differ by the fourth one. | ||
304 | */ | ||
270 | 305 | ||
271 | /* We have inserted new direct item and must remove last | 306 | /* |
272 | unformatted node. */ | 307 | * We have inserted new direct item and must remove last |
308 | * unformatted node. | ||
309 | */ | ||
273 | *mode = M_CUT; | 310 | *mode = M_CUT; |
274 | 311 | ||
275 | /* we store position of first direct item in the in-core inode */ | 312 | /* we store position of first direct item in the in-core inode */ |
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c index 5cdfbd638b5c..f669990376af 100644 --- a/fs/reiserfs/xattr.c +++ b/fs/reiserfs/xattr.c | |||
@@ -56,9 +56,11 @@ | |||
56 | #define XAROOT_NAME "xattrs" | 56 | #define XAROOT_NAME "xattrs" |
57 | 57 | ||
58 | 58 | ||
59 | /* Helpers for inode ops. We do this so that we don't have all the VFS | 59 | /* |
60 | * Helpers for inode ops. We do this so that we don't have all the VFS | ||
60 | * overhead and also for proper i_mutex annotation. | 61 | * overhead and also for proper i_mutex annotation. |
61 | * dir->i_mutex must be held for all of them. */ | 62 | * dir->i_mutex must be held for all of them. |
63 | */ | ||
62 | #ifdef CONFIG_REISERFS_FS_XATTR | 64 | #ifdef CONFIG_REISERFS_FS_XATTR |
63 | static int xattr_create(struct inode *dir, struct dentry *dentry, int mode) | 65 | static int xattr_create(struct inode *dir, struct dentry *dentry, int mode) |
64 | { | 66 | { |
@@ -73,10 +75,12 @@ static int xattr_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) | |||
73 | return dir->i_op->mkdir(dir, dentry, mode); | 75 | return dir->i_op->mkdir(dir, dentry, mode); |
74 | } | 76 | } |
75 | 77 | ||
76 | /* We use I_MUTEX_CHILD here to silence lockdep. It's safe because xattr | 78 | /* |
79 | * We use I_MUTEX_CHILD here to silence lockdep. It's safe because xattr | ||
77 | * mutation ops aren't called during rename or splace, which are the | 80 | * mutation ops aren't called during rename or splace, which are the |
78 | * only other users of I_MUTEX_CHILD. It violates the ordering, but that's | 81 | * only other users of I_MUTEX_CHILD. It violates the ordering, but that's |
79 | * better than allocating another subclass just for this code. */ | 82 | * better than allocating another subclass just for this code. |
83 | */ | ||
80 | static int xattr_unlink(struct inode *dir, struct dentry *dentry) | 84 | static int xattr_unlink(struct inode *dir, struct dentry *dentry) |
81 | { | 85 | { |
82 | int error; | 86 | int error; |
@@ -166,9 +170,11 @@ static struct dentry *open_xa_dir(const struct inode *inode, int flags) | |||
166 | return xadir; | 170 | return xadir; |
167 | } | 171 | } |
168 | 172 | ||
169 | /* The following are side effects of other operations that aren't explicitly | 173 | /* |
174 | * The following are side effects of other operations that aren't explicitly | ||
170 | * modifying extended attributes. This includes operations such as permissions | 175 | * modifying extended attributes. This includes operations such as permissions |
171 | * or ownership changes, object deletions, etc. */ | 176 | * or ownership changes, object deletions, etc. |
177 | */ | ||
172 | struct reiserfs_dentry_buf { | 178 | struct reiserfs_dentry_buf { |
173 | struct dir_context ctx; | 179 | struct dir_context ctx; |
174 | struct dentry *xadir; | 180 | struct dentry *xadir; |
@@ -267,11 +273,13 @@ static int reiserfs_for_each_xattr(struct inode *inode, | |||
267 | cleanup_dentry_buf(&buf); | 273 | cleanup_dentry_buf(&buf); |
268 | 274 | ||
269 | if (!err) { | 275 | if (!err) { |
270 | /* We start a transaction here to avoid a ABBA situation | 276 | /* |
277 | * We start a transaction here to avoid a ABBA situation | ||
271 | * between the xattr root's i_mutex and the journal lock. | 278 | * between the xattr root's i_mutex and the journal lock. |
272 | * This doesn't incur much additional overhead since the | 279 | * This doesn't incur much additional overhead since the |
273 | * new transaction will just nest inside the | 280 | * new transaction will just nest inside the |
274 | * outer transaction. */ | 281 | * outer transaction. |
282 | */ | ||
275 | int blocks = JOURNAL_PER_BALANCE_CNT * 2 + 2 + | 283 | int blocks = JOURNAL_PER_BALANCE_CNT * 2 + 2 + |
276 | 4 * REISERFS_QUOTA_TRANS_BLOCKS(inode->i_sb); | 284 | 4 * REISERFS_QUOTA_TRANS_BLOCKS(inode->i_sb); |
277 | struct reiserfs_transaction_handle th; | 285 | struct reiserfs_transaction_handle th; |
@@ -349,9 +357,11 @@ int reiserfs_chown_xattrs(struct inode *inode, struct iattr *attrs) | |||
349 | } | 357 | } |
350 | 358 | ||
351 | #ifdef CONFIG_REISERFS_FS_XATTR | 359 | #ifdef CONFIG_REISERFS_FS_XATTR |
352 | /* Returns a dentry corresponding to a specific extended attribute file | 360 | /* |
361 | * Returns a dentry corresponding to a specific extended attribute file | ||
353 | * for the inode. If flags allow, the file is created. Otherwise, a | 362 | * for the inode. If flags allow, the file is created. Otherwise, a |
354 | * valid or negative dentry, or an error is returned. */ | 363 | * valid or negative dentry, or an error is returned. |
364 | */ | ||
355 | static struct dentry *xattr_lookup(struct inode *inode, const char *name, | 365 | static struct dentry *xattr_lookup(struct inode *inode, const char *name, |
356 | int flags) | 366 | int flags) |
357 | { | 367 | { |
@@ -400,8 +410,10 @@ static struct page *reiserfs_get_page(struct inode *dir, size_t n) | |||
400 | { | 410 | { |
401 | struct address_space *mapping = dir->i_mapping; | 411 | struct address_space *mapping = dir->i_mapping; |
402 | struct page *page; | 412 | struct page *page; |
403 | /* We can deadlock if we try to free dentries, | 413 | /* |
404 | and an unlink/rmdir has just occurred - GFP_NOFS avoids this */ | 414 | * We can deadlock if we try to free dentries, |
415 | * and an unlink/rmdir has just occurred - GFP_NOFS avoids this | ||
416 | */ | ||
405 | mapping_set_gfp_mask(mapping, GFP_NOFS); | 417 | mapping_set_gfp_mask(mapping, GFP_NOFS); |
406 | page = read_mapping_page(mapping, n >> PAGE_CACHE_SHIFT, NULL); | 418 | page = read_mapping_page(mapping, n >> PAGE_CACHE_SHIFT, NULL); |
407 | if (!IS_ERR(page)) { | 419 | if (!IS_ERR(page)) { |
@@ -615,8 +627,10 @@ reiserfs_xattr_get(struct inode *inode, const char *name, void *buffer, | |||
615 | if (name == NULL) | 627 | if (name == NULL) |
616 | return -EINVAL; | 628 | return -EINVAL; |
617 | 629 | ||
618 | /* We can't have xattrs attached to v1 items since they don't have | 630 | /* |
619 | * generation numbers */ | 631 | * We can't have xattrs attached to v1 items since they don't have |
632 | * generation numbers | ||
633 | */ | ||
620 | if (get_inode_sd_version(inode) == STAT_DATA_V1) | 634 | if (get_inode_sd_version(inode) == STAT_DATA_V1) |
621 | return -EOPNOTSUPP; | 635 | return -EOPNOTSUPP; |
622 | 636 | ||
@@ -913,12 +927,16 @@ static const struct xattr_handler *reiserfs_xattr_handlers[] = { | |||
913 | 927 | ||
914 | static int xattr_mount_check(struct super_block *s) | 928 | static int xattr_mount_check(struct super_block *s) |
915 | { | 929 | { |
916 | /* We need generation numbers to ensure that the oid mapping is correct | 930 | /* |
917 | * v3.5 filesystems don't have them. */ | 931 | * We need generation numbers to ensure that the oid mapping is correct |
932 | * v3.5 filesystems don't have them. | ||
933 | */ | ||
918 | if (old_format_only(s)) { | 934 | if (old_format_only(s)) { |
919 | if (reiserfs_xattrs_optional(s)) { | 935 | if (reiserfs_xattrs_optional(s)) { |
920 | /* Old format filesystem, but optional xattrs have | 936 | /* |
921 | * been enabled. Error out. */ | 937 | * Old format filesystem, but optional xattrs have |
938 | * been enabled. Error out. | ||
939 | */ | ||
922 | reiserfs_warning(s, "jdm-2005", | 940 | reiserfs_warning(s, "jdm-2005", |
923 | "xattrs/ACLs not supported " | 941 | "xattrs/ACLs not supported " |
924 | "on pre-v3.6 format filesystems. " | 942 | "on pre-v3.6 format filesystems. " |
@@ -972,9 +990,11 @@ int reiserfs_lookup_privroot(struct super_block *s) | |||
972 | return err; | 990 | return err; |
973 | } | 991 | } |
974 | 992 | ||
975 | /* We need to take a copy of the mount flags since things like | 993 | /* |
994 | * We need to take a copy of the mount flags since things like | ||
976 | * MS_RDONLY don't get set until *after* we're called. | 995 | * MS_RDONLY don't get set until *after* we're called. |
977 | * mount_flags != mount_options */ | 996 | * mount_flags != mount_options |
997 | */ | ||
978 | int reiserfs_xattr_init(struct super_block *s, int mount_flags) | 998 | int reiserfs_xattr_init(struct super_block *s, int mount_flags) |
979 | { | 999 | { |
980 | int err = 0; | 1000 | int err = 0; |
diff --git a/fs/reiserfs/xattr.h b/fs/reiserfs/xattr.h index f59626c5d33b..857ec7e3016f 100644 --- a/fs/reiserfs/xattr.h +++ b/fs/reiserfs/xattr.h | |||
@@ -61,7 +61,8 @@ static inline loff_t reiserfs_xattr_nblocks(struct inode *inode, loff_t size) | |||
61 | return ret; | 61 | return ret; |
62 | } | 62 | } |
63 | 63 | ||
64 | /* We may have to create up to 3 objects: xattr root, xattr dir, xattr file. | 64 | /* |
65 | * We may have to create up to 3 objects: xattr root, xattr dir, xattr file. | ||
65 | * Let's try to be smart about it. | 66 | * Let's try to be smart about it. |
66 | * xattr root: We cache it. If it's not cached, we may need to create it. | 67 | * xattr root: We cache it. If it's not cached, we may need to create it. |
67 | * xattr dir: If anything has been loaded for this inode, we can set a flag | 68 | * xattr dir: If anything has been loaded for this inode, we can set a flag |
diff --git a/fs/reiserfs/xattr_acl.c b/fs/reiserfs/xattr_acl.c index a6ce532402dc..a333a073bea8 100644 --- a/fs/reiserfs/xattr_acl.c +++ b/fs/reiserfs/xattr_acl.c | |||
@@ -25,8 +25,10 @@ reiserfs_set_acl(struct inode *inode, struct posix_acl *acl, int type) | |||
25 | int size = acl ? posix_acl_xattr_size(acl->a_count) : 0; | 25 | int size = acl ? posix_acl_xattr_size(acl->a_count) : 0; |
26 | 26 | ||
27 | 27 | ||
28 | /* Pessimism: We can't assume that anything from the xattr root up | 28 | /* |
29 | * has been created. */ | 29 | * Pessimism: We can't assume that anything from the xattr root up |
30 | * has been created. | ||
31 | */ | ||
30 | 32 | ||
31 | jcreate_blocks = reiserfs_xattr_jcreate_nblocks(inode) + | 33 | jcreate_blocks = reiserfs_xattr_jcreate_nblocks(inode) + |
32 | reiserfs_xattr_nblocks(inode, size) * 2; | 34 | reiserfs_xattr_nblocks(inode, size) * 2; |
@@ -208,8 +210,10 @@ struct posix_acl *reiserfs_get_acl(struct inode *inode, int type) | |||
208 | 210 | ||
209 | retval = reiserfs_xattr_get(inode, name, value, size); | 211 | retval = reiserfs_xattr_get(inode, name, value, size); |
210 | if (retval == -ENODATA || retval == -ENOSYS) { | 212 | if (retval == -ENODATA || retval == -ENOSYS) { |
211 | /* This shouldn't actually happen as it should have | 213 | /* |
212 | been caught above.. but just in case */ | 214 | * This shouldn't actually happen as it should have |
215 | * been caught above.. but just in case | ||
216 | */ | ||
213 | acl = NULL; | 217 | acl = NULL; |
214 | } else if (retval < 0) { | 218 | } else if (retval < 0) { |
215 | acl = ERR_PTR(retval); | 219 | acl = ERR_PTR(retval); |
@@ -290,8 +294,10 @@ __reiserfs_set_acl(struct reiserfs_transaction_handle *th, struct inode *inode, | |||
290 | return error; | 294 | return error; |
291 | } | 295 | } |
292 | 296 | ||
293 | /* dir->i_mutex: locked, | 297 | /* |
294 | * inode is new and not released into the wild yet */ | 298 | * dir->i_mutex: locked, |
299 | * inode is new and not released into the wild yet | ||
300 | */ | ||
295 | int | 301 | int |
296 | reiserfs_inherit_default_acl(struct reiserfs_transaction_handle *th, | 302 | reiserfs_inherit_default_acl(struct reiserfs_transaction_handle *th, |
297 | struct inode *dir, struct dentry *dentry, | 303 | struct inode *dir, struct dentry *dentry, |
@@ -304,14 +310,18 @@ reiserfs_inherit_default_acl(struct reiserfs_transaction_handle *th, | |||
304 | if (S_ISLNK(inode->i_mode)) | 310 | if (S_ISLNK(inode->i_mode)) |
305 | return 0; | 311 | return 0; |
306 | 312 | ||
307 | /* ACLs can only be used on "new" objects, so if it's an old object | 313 | /* |
308 | * there is nothing to inherit from */ | 314 | * ACLs can only be used on "new" objects, so if it's an old object |
315 | * there is nothing to inherit from | ||
316 | */ | ||
309 | if (get_inode_sd_version(dir) == STAT_DATA_V1) | 317 | if (get_inode_sd_version(dir) == STAT_DATA_V1) |
310 | goto apply_umask; | 318 | goto apply_umask; |
311 | 319 | ||
312 | /* Don't apply ACLs to objects in the .reiserfs_priv tree.. This | 320 | /* |
321 | * Don't apply ACLs to objects in the .reiserfs_priv tree.. This | ||
313 | * would be useless since permissions are ignored, and a pain because | 322 | * would be useless since permissions are ignored, and a pain because |
314 | * it introduces locking cycles */ | 323 | * it introduces locking cycles |
324 | */ | ||
315 | if (IS_PRIVATE(dir)) { | 325 | if (IS_PRIVATE(dir)) { |
316 | inode->i_flags |= S_PRIVATE; | 326 | inode->i_flags |= S_PRIVATE; |
317 | goto apply_umask; | 327 | goto apply_umask; |