aboutsummaryrefslogtreecommitdiffstats
path: root/fs/reiserfs
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@g5.osdl.org>2005-07-12 23:21:28 -0400
committerLinus Torvalds <torvalds@g5.osdl.org>2005-07-12 23:21:28 -0400
commitbd4c625c061c2a38568d0add3478f59172455159 (patch)
tree1c44a17c55bce2ee7ad5ea3d15a208ecc0955f74 /fs/reiserfs
parent7fa94c8868edfef8cb6a201fcc9a5078b7b961da (diff)
reiserfs: run scripts/Lindent on reiserfs code
This was a pure indentation change, using: scripts/Lindent fs/reiserfs/*.c include/linux/reiserfs_*.h to make reiserfs match the regular Linux indentation style. As Jeff Mahoney <jeffm@suse.com> writes: The ReiserFS code is a mix of a number of different coding styles, sometimes different even from line-to-line. Since the code has been relatively stable for quite some time and there are few outstanding patches to be applied, it is time to reformat the code to conform to the Linux style standard outlined in Documentation/CodingStyle. This patch contains the result of running scripts/Lindent against fs/reiserfs/*.c and include/linux/reiserfs_*.h. There are places where the code can be made to look better, but I'd rather keep those patches separate so that there isn't a subtle by-hand hand accident in the middle of a huge patch. To be clear: This patch is reformatting *only*. A number of patches may follow that continue to make the code more consistent with the Linux coding style. Hans wasn't particularly enthusiastic about these patches, but said he wouldn't really oppose them either. Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'fs/reiserfs')
-rw-r--r--fs/reiserfs/bitmap.c1842
-rw-r--r--fs/reiserfs/dir.c488
-rw-r--r--fs/reiserfs/do_balan.c3236
-rw-r--r--fs/reiserfs/file.c2564
-rw-r--r--fs/reiserfs/fix_node.c4051
-rw-r--r--fs/reiserfs/hashes.c193
-rw-r--r--fs/reiserfs/ibalance.c1844
-rw-r--r--fs/reiserfs/inode.c4915
-rw-r--r--fs/reiserfs/ioctl.c197
-rw-r--r--fs/reiserfs/item_ops.c977
-rw-r--r--fs/reiserfs/journal.c6855
-rw-r--r--fs/reiserfs/lbalance.c2218
-rw-r--r--fs/reiserfs/namei.c2574
-rw-r--r--fs/reiserfs/objectid.c303
-rw-r--r--fs/reiserfs/prints.c1003
-rw-r--r--fs/reiserfs/procfs.c695
-rw-r--r--fs/reiserfs/resize.c207
-rw-r--r--fs/reiserfs/stree.c3369
-rw-r--r--fs/reiserfs/super.c3623
-rw-r--r--fs/reiserfs/tail_conversion.c463
-rw-r--r--fs/reiserfs/xattr.c2173
-rw-r--r--fs/reiserfs/xattr_acl.c641
-rw-r--r--fs/reiserfs/xattr_security.c54
-rw-r--r--fs/reiserfs/xattr_trusted.c70
-rw-r--r--fs/reiserfs/xattr_user.c89
25 files changed, 23222 insertions, 21422 deletions
diff --git a/fs/reiserfs/bitmap.c b/fs/reiserfs/bitmap.c
index 49c479c9454..909f71e9a30 100644
--- a/fs/reiserfs/bitmap.c
+++ b/fs/reiserfs/bitmap.c
@@ -46,1125 +46,1221 @@
46#define TEST_OPTION(optname, s) \ 46#define TEST_OPTION(optname, s) \
47 test_bit(_ALLOC_ ## optname , &SB_ALLOC_OPTS(s)) 47 test_bit(_ALLOC_ ## optname , &SB_ALLOC_OPTS(s))
48 48
49static inline void get_bit_address (struct super_block * s, 49static inline void get_bit_address(struct super_block *s,
50 b_blocknr_t block, int * bmap_nr, int * offset) 50 b_blocknr_t block, int *bmap_nr, int *offset)
51{ 51{
52 /* It is in the bitmap block number equal to the block 52 /* It is in the bitmap block number equal to the block
53 * number divided by the number of bits in a block. */ 53 * number divided by the number of bits in a block. */
54 *bmap_nr = block / (s->s_blocksize << 3); 54 *bmap_nr = block / (s->s_blocksize << 3);
55 /* Within that bitmap block it is located at bit offset *offset. */ 55 /* Within that bitmap block it is located at bit offset *offset. */
56 *offset = block & ((s->s_blocksize << 3) - 1 ); 56 *offset = block & ((s->s_blocksize << 3) - 1);
57 return; 57 return;
58} 58}
59 59
60#ifdef CONFIG_REISERFS_CHECK 60#ifdef CONFIG_REISERFS_CHECK
61int is_reusable (struct super_block * s, b_blocknr_t block, int bit_value) 61int is_reusable(struct super_block *s, b_blocknr_t block, int bit_value)
62{ 62{
63 int i, j; 63 int i, j;
64 64
65 if (block == 0 || block >= SB_BLOCK_COUNT (s)) { 65 if (block == 0 || block >= SB_BLOCK_COUNT(s)) {
66 reiserfs_warning (s, "vs-4010: is_reusable: block number is out of range %lu (%u)", 66 reiserfs_warning(s,
67 block, SB_BLOCK_COUNT (s)); 67 "vs-4010: is_reusable: block number is out of range %lu (%u)",
68 return 0; 68 block, SB_BLOCK_COUNT(s));
69 } 69 return 0;
70
71 /* it can't be one of the bitmap blocks */
72 for (i = 0; i < SB_BMAP_NR (s); i ++)
73 if (block == SB_AP_BITMAP (s)[i].bh->b_blocknr) {
74 reiserfs_warning (s, "vs: 4020: is_reusable: "
75 "bitmap block %lu(%u) can't be freed or reused",
76 block, SB_BMAP_NR (s));
77 return 0;
78 } 70 }
79
80 get_bit_address (s, block, &i, &j);
81 71
82 if (i >= SB_BMAP_NR (s)) { 72 /* it can't be one of the bitmap blocks */
83 reiserfs_warning (s, "vs-4030: is_reusable: there is no so many bitmap blocks: " 73 for (i = 0; i < SB_BMAP_NR(s); i++)
84 "block=%lu, bitmap_nr=%d", block, i); 74 if (block == SB_AP_BITMAP(s)[i].bh->b_blocknr) {
85 return 0; 75 reiserfs_warning(s, "vs: 4020: is_reusable: "
86 } 76 "bitmap block %lu(%u) can't be freed or reused",
77 block, SB_BMAP_NR(s));
78 return 0;
79 }
87 80
88 if ((bit_value == 0 && 81 get_bit_address(s, block, &i, &j);
89 reiserfs_test_le_bit(j, SB_AP_BITMAP(s)[i].bh->b_data)) ||
90 (bit_value == 1 &&
91 reiserfs_test_le_bit(j, SB_AP_BITMAP (s)[i].bh->b_data) == 0)) {
92 reiserfs_warning (s, "vs-4040: is_reusable: corresponding bit of block %lu does not "
93 "match required value (i==%d, j==%d) test_bit==%d",
94 block, i, j, reiserfs_test_le_bit (j, SB_AP_BITMAP (s)[i].bh->b_data));
95 82
96 return 0; 83 if (i >= SB_BMAP_NR(s)) {
97 } 84 reiserfs_warning(s,
85 "vs-4030: is_reusable: there is no so many bitmap blocks: "
86 "block=%lu, bitmap_nr=%d", block, i);
87 return 0;
88 }
98 89
99 if (bit_value == 0 && block == SB_ROOT_BLOCK (s)) { 90 if ((bit_value == 0 &&
100 reiserfs_warning (s, "vs-4050: is_reusable: this is root block (%u), " 91 reiserfs_test_le_bit(j, SB_AP_BITMAP(s)[i].bh->b_data)) ||
101 "it must be busy", SB_ROOT_BLOCK (s)); 92 (bit_value == 1 &&
102 return 0; 93 reiserfs_test_le_bit(j, SB_AP_BITMAP(s)[i].bh->b_data) == 0)) {
103 } 94 reiserfs_warning(s,
95 "vs-4040: is_reusable: corresponding bit of block %lu does not "
96 "match required value (i==%d, j==%d) test_bit==%d",
97 block, i, j, reiserfs_test_le_bit(j,
98 SB_AP_BITMAP
99 (s)[i].bh->
100 b_data));
101
102 return 0;
103 }
104 104
105 return 1; 105 if (bit_value == 0 && block == SB_ROOT_BLOCK(s)) {
106 reiserfs_warning(s,
107 "vs-4050: is_reusable: this is root block (%u), "
108 "it must be busy", SB_ROOT_BLOCK(s));
109 return 0;
110 }
111
112 return 1;
106} 113}
107#endif /* CONFIG_REISERFS_CHECK */ 114#endif /* CONFIG_REISERFS_CHECK */
108 115
109/* searches in journal structures for a given block number (bmap, off). If block 116/* searches in journal structures for a given block number (bmap, off). If block
110 is found in reiserfs journal it suggests next free block candidate to test. */ 117 is found in reiserfs journal it suggests next free block candidate to test. */
111static inline int is_block_in_journal (struct super_block * s, int bmap, int 118static inline int is_block_in_journal(struct super_block *s, int bmap, int
112off, int *next) 119 off, int *next)
113{ 120{
114 b_blocknr_t tmp; 121 b_blocknr_t tmp;
115 122
116 if (reiserfs_in_journal (s, bmap, off, 1, &tmp)) { 123 if (reiserfs_in_journal(s, bmap, off, 1, &tmp)) {
117 if (tmp) { /* hint supplied */ 124 if (tmp) { /* hint supplied */
118 *next = tmp; 125 *next = tmp;
119 PROC_INFO_INC( s, scan_bitmap.in_journal_hint ); 126 PROC_INFO_INC(s, scan_bitmap.in_journal_hint);
120 } else { 127 } else {
121 (*next) = off + 1; /* inc offset to avoid looping. */ 128 (*next) = off + 1; /* inc offset to avoid looping. */
122 PROC_INFO_INC( s, scan_bitmap.in_journal_nohint ); 129 PROC_INFO_INC(s, scan_bitmap.in_journal_nohint);
130 }
131 PROC_INFO_INC(s, scan_bitmap.retry);
132 return 1;
123 } 133 }
124 PROC_INFO_INC( s, scan_bitmap.retry ); 134 return 0;
125 return 1;
126 }
127 return 0;
128} 135}
129 136
130/* it searches for a window of zero bits with given minimum and maximum lengths in one bitmap 137/* it searches for a window of zero bits with given minimum and maximum lengths in one bitmap
131 * block; */ 138 * block; */
132static int scan_bitmap_block (struct reiserfs_transaction_handle *th, 139static int scan_bitmap_block(struct reiserfs_transaction_handle *th,
133 int bmap_n, int *beg, int boundary, int min, int max, int unfm) 140 int bmap_n, int *beg, int boundary, int min,
141 int max, int unfm)
134{ 142{
135 struct super_block *s = th->t_super; 143 struct super_block *s = th->t_super;
136 struct reiserfs_bitmap_info *bi=&SB_AP_BITMAP(s)[bmap_n]; 144 struct reiserfs_bitmap_info *bi = &SB_AP_BITMAP(s)[bmap_n];
137 int end, next; 145 int end, next;
138 int org = *beg; 146 int org = *beg;
139 147
140 BUG_ON (!th->t_trans_id); 148 BUG_ON(!th->t_trans_id);
141 149
142 RFALSE(bmap_n >= SB_BMAP_NR (s), "Bitmap %d is out of range (0..%d)",bmap_n, SB_BMAP_NR (s) - 1); 150 RFALSE(bmap_n >= SB_BMAP_NR(s), "Bitmap %d is out of range (0..%d)",
143 PROC_INFO_INC( s, scan_bitmap.bmap ); 151 bmap_n, SB_BMAP_NR(s) - 1);
152 PROC_INFO_INC(s, scan_bitmap.bmap);
144/* this is unclear and lacks comments, explain how journal bitmaps 153/* this is unclear and lacks comments, explain how journal bitmaps
145 work here for the reader. Convey a sense of the design here. What 154 work here for the reader. Convey a sense of the design here. What
146 is a window? */ 155 is a window? */
147/* - I mean `a window of zero bits' as in description of this function - Zam. */ 156/* - I mean `a window of zero bits' as in description of this function - Zam. */
148
149 if ( !bi ) {
150 reiserfs_warning (s, "NULL bitmap info pointer for bitmap %d", bmap_n);
151 return 0;
152 }
153 if (buffer_locked (bi->bh)) {
154 PROC_INFO_INC( s, scan_bitmap.wait );
155 __wait_on_buffer (bi->bh);
156 }
157
158 while (1) {
159 cont:
160 if (bi->free_count < min)
161 return 0; // No free blocks in this bitmap
162
163 /* search for a first zero bit -- beggining of a window */
164 *beg = reiserfs_find_next_zero_le_bit
165 ((unsigned long*)(bi->bh->b_data), boundary, *beg);
166
167 if (*beg + min > boundary) { /* search for a zero bit fails or the rest of bitmap block
168 * cannot contain a zero window of minimum size */
169 return 0;
170 }
171 157
172 if (unfm && is_block_in_journal(s,bmap_n, *beg, beg)) 158 if (!bi) {
173 continue; 159 reiserfs_warning(s, "NULL bitmap info pointer for bitmap %d",
174 /* first zero bit found; we check next bits */ 160 bmap_n);
175 for (end = *beg + 1;; end ++) { 161 return 0;
176 if (end >= *beg + max || end >= boundary || reiserfs_test_le_bit (end, bi->bh->b_data)) { 162 }
177 next = end; 163 if (buffer_locked(bi->bh)) {
178 break; 164 PROC_INFO_INC(s, scan_bitmap.wait);
179 } 165 __wait_on_buffer(bi->bh);
180 /* finding the other end of zero bit window requires looking into journal structures (in
181 * case of searching for free blocks for unformatted nodes) */
182 if (unfm && is_block_in_journal(s, bmap_n, end, &next))
183 break;
184 } 166 }
185 167
186 /* now (*beg) points to beginning of zero bits window, 168 while (1) {
187 * (end) points to one bit after the window end */ 169 cont:
188 if (end - *beg >= min) { /* it seems we have found window of proper size */ 170 if (bi->free_count < min)
189 int i; 171 return 0; // No free blocks in this bitmap
190 reiserfs_prepare_for_journal (s, bi->bh, 1); 172
191 /* try to set all blocks used checking are they still free */ 173 /* search for a first zero bit -- beggining of a window */
192 for (i = *beg; i < end; i++) { 174 *beg = reiserfs_find_next_zero_le_bit
193 /* It seems that we should not check in journal again. */ 175 ((unsigned long *)(bi->bh->b_data), boundary, *beg);
194 if (reiserfs_test_and_set_le_bit (i, bi->bh->b_data)) { 176
195 /* bit was set by another process 177 if (*beg + min > boundary) { /* search for a zero bit fails or the rest of bitmap block
196 * while we slept in prepare_for_journal() */ 178 * cannot contain a zero window of minimum size */
197 PROC_INFO_INC( s, scan_bitmap.stolen ); 179 return 0;
198 if (i >= *beg + min) { /* we can continue with smaller set of allocated blocks,
199 * if length of this set is more or equal to `min' */
200 end = i;
201 break;
202 }
203 /* otherwise we clear all bit were set ... */
204 while (--i >= *beg)
205 reiserfs_test_and_clear_le_bit (i, bi->bh->b_data);
206 reiserfs_restore_prepared_buffer (s, bi->bh);
207 *beg = org;
208 /* ... and search again in current block from beginning */
209 goto cont;
210 } 180 }
211 }
212 bi->free_count -= (end - *beg);
213 journal_mark_dirty (th, s, bi->bh);
214 181
215 /* free block count calculation */ 182 if (unfm && is_block_in_journal(s, bmap_n, *beg, beg))
216 reiserfs_prepare_for_journal (s, SB_BUFFER_WITH_SB(s), 1); 183 continue;
217 PUT_SB_FREE_BLOCKS(s, SB_FREE_BLOCKS(s) - (end - *beg)); 184 /* first zero bit found; we check next bits */
218 journal_mark_dirty (th, s, SB_BUFFER_WITH_SB(s)); 185 for (end = *beg + 1;; end++) {
186 if (end >= *beg + max || end >= boundary
187 || reiserfs_test_le_bit(end, bi->bh->b_data)) {
188 next = end;
189 break;
190 }
191 /* finding the other end of zero bit window requires looking into journal structures (in
192 * case of searching for free blocks for unformatted nodes) */
193 if (unfm && is_block_in_journal(s, bmap_n, end, &next))
194 break;
195 }
219 196
220 return end - (*beg); 197 /* now (*beg) points to beginning of zero bits window,
221 } else { 198 * (end) points to one bit after the window end */
222 *beg = next; 199 if (end - *beg >= min) { /* it seems we have found window of proper size */
200 int i;
201 reiserfs_prepare_for_journal(s, bi->bh, 1);
202 /* try to set all blocks used checking are they still free */
203 for (i = *beg; i < end; i++) {
204 /* It seems that we should not check in journal again. */
205 if (reiserfs_test_and_set_le_bit
206 (i, bi->bh->b_data)) {
207 /* bit was set by another process
208 * while we slept in prepare_for_journal() */
209 PROC_INFO_INC(s, scan_bitmap.stolen);
210 if (i >= *beg + min) { /* we can continue with smaller set of allocated blocks,
211 * if length of this set is more or equal to `min' */
212 end = i;
213 break;
214 }
215 /* otherwise we clear all bit were set ... */
216 while (--i >= *beg)
217 reiserfs_test_and_clear_le_bit
218 (i, bi->bh->b_data);
219 reiserfs_restore_prepared_buffer(s,
220 bi->
221 bh);
222 *beg = org;
223 /* ... and search again in current block from beginning */
224 goto cont;
225 }
226 }
227 bi->free_count -= (end - *beg);
228 journal_mark_dirty(th, s, bi->bh);
229
230 /* free block count calculation */
231 reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s),
232 1);
233 PUT_SB_FREE_BLOCKS(s, SB_FREE_BLOCKS(s) - (end - *beg));
234 journal_mark_dirty(th, s, SB_BUFFER_WITH_SB(s));
235
236 return end - (*beg);
237 } else {
238 *beg = next;
239 }
223 } 240 }
224 }
225} 241}
226 242
227static int bmap_hash_id(struct super_block *s, u32 id) { 243static int bmap_hash_id(struct super_block *s, u32 id)
228 char * hash_in = NULL; 244{
229 unsigned long hash; 245 char *hash_in = NULL;
230 unsigned bm; 246 unsigned long hash;
231 247 unsigned bm;
232 if (id <= 2) { 248
233 bm = 1; 249 if (id <= 2) {
234 } else { 250 bm = 1;
235 hash_in = (char *)(&id); 251 } else {
236 hash = keyed_hash(hash_in, 4); 252 hash_in = (char *)(&id);
237 bm = hash % SB_BMAP_NR(s); 253 hash = keyed_hash(hash_in, 4);
238 if (!bm) 254 bm = hash % SB_BMAP_NR(s);
239 bm = 1; 255 if (!bm)
240 } 256 bm = 1;
241 /* this can only be true when SB_BMAP_NR = 1 */ 257 }
242 if (bm >= SB_BMAP_NR(s)) 258 /* this can only be true when SB_BMAP_NR = 1 */
243 bm = 0; 259 if (bm >= SB_BMAP_NR(s))
244 return bm; 260 bm = 0;
261 return bm;
245} 262}
246 263
247/* 264/*
248 * hashes the id and then returns > 0 if the block group for the 265 * hashes the id and then returns > 0 if the block group for the
249 * corresponding hash is full 266 * corresponding hash is full
250 */ 267 */
251static inline int block_group_used(struct super_block *s, u32 id) { 268static inline int block_group_used(struct super_block *s, u32 id)
252 int bm; 269{
253 bm = bmap_hash_id(s, id); 270 int bm;
254 if (SB_AP_BITMAP(s)[bm].free_count > ((s->s_blocksize << 3) * 60 / 100) ) { 271 bm = bmap_hash_id(s, id);
255 return 0; 272 if (SB_AP_BITMAP(s)[bm].free_count > ((s->s_blocksize << 3) * 60 / 100)) {
256 } 273 return 0;
257 return 1; 274 }
275 return 1;
258} 276}
259 277
260/* 278/*
261 * the packing is returned in disk byte order 279 * the packing is returned in disk byte order
262 */ 280 */
263__le32 reiserfs_choose_packing(struct inode *dir) 281__le32 reiserfs_choose_packing(struct inode * dir)
264{ 282{
265 __le32 packing; 283 __le32 packing;
266 if (TEST_OPTION(packing_groups, dir->i_sb)) { 284 if (TEST_OPTION(packing_groups, dir->i_sb)) {
267 u32 parent_dir = le32_to_cpu(INODE_PKEY(dir)->k_dir_id); 285 u32 parent_dir = le32_to_cpu(INODE_PKEY(dir)->k_dir_id);
268 /* 286 /*
269 * some versions of reiserfsck expect packing locality 1 to be 287 * some versions of reiserfsck expect packing locality 1 to be
270 * special 288 * special
271 */ 289 */
272 if (parent_dir == 1 || block_group_used(dir->i_sb,parent_dir)) 290 if (parent_dir == 1 || block_group_used(dir->i_sb, parent_dir))
273 packing = INODE_PKEY(dir)->k_objectid; 291 packing = INODE_PKEY(dir)->k_objectid;
274 else 292 else
275 packing = INODE_PKEY(dir)->k_dir_id; 293 packing = INODE_PKEY(dir)->k_dir_id;
276 } else 294 } else
277 packing = INODE_PKEY(dir)->k_objectid; 295 packing = INODE_PKEY(dir)->k_objectid;
278 return packing; 296 return packing;
279} 297}
280 298
281/* Tries to find contiguous zero bit window (given size) in given region of 299/* Tries to find contiguous zero bit window (given size) in given region of
282 * bitmap and place new blocks there. Returns number of allocated blocks. */ 300 * bitmap and place new blocks there. Returns number of allocated blocks. */
283static int scan_bitmap (struct reiserfs_transaction_handle *th, 301static int scan_bitmap(struct reiserfs_transaction_handle *th,
284 b_blocknr_t *start, b_blocknr_t finish, 302 b_blocknr_t * start, b_blocknr_t finish,
285 int min, int max, int unfm, unsigned long file_block) 303 int min, int max, int unfm, unsigned long file_block)
286{ 304{
287 int nr_allocated=0; 305 int nr_allocated = 0;
288 struct super_block * s = th->t_super; 306 struct super_block *s = th->t_super;
289 /* find every bm and bmap and bmap_nr in this file, and change them all to bitmap_blocknr 307 /* find every bm and bmap and bmap_nr in this file, and change them all to bitmap_blocknr
290 * - Hans, it is not a block number - Zam. */ 308 * - Hans, it is not a block number - Zam. */
291 309
292 int bm, off; 310 int bm, off;
293 int end_bm, end_off; 311 int end_bm, end_off;
294 int off_max = s->s_blocksize << 3; 312 int off_max = s->s_blocksize << 3;
295 313
296 BUG_ON (!th->t_trans_id); 314 BUG_ON(!th->t_trans_id);
297 315
298 PROC_INFO_INC( s, scan_bitmap.call ); 316 PROC_INFO_INC(s, scan_bitmap.call);
299 if ( SB_FREE_BLOCKS(s) <= 0) 317 if (SB_FREE_BLOCKS(s) <= 0)
300 return 0; // No point in looking for more free blocks 318 return 0; // No point in looking for more free blocks
301 319
302 get_bit_address (s, *start, &bm, &off); 320 get_bit_address(s, *start, &bm, &off);
303 get_bit_address (s, finish, &end_bm, &end_off); 321 get_bit_address(s, finish, &end_bm, &end_off);
304 if (bm > SB_BMAP_NR(s)) 322 if (bm > SB_BMAP_NR(s))
305 return 0; 323 return 0;
306 if (end_bm > SB_BMAP_NR(s)) 324 if (end_bm > SB_BMAP_NR(s))
307 end_bm = SB_BMAP_NR(s); 325 end_bm = SB_BMAP_NR(s);
308 326
309 /* When the bitmap is more than 10% free, anyone can allocate. 327 /* When the bitmap is more than 10% free, anyone can allocate.
310 * When it's less than 10% free, only files that already use the 328 * When it's less than 10% free, only files that already use the
311 * bitmap are allowed. Once we pass 80% full, this restriction 329 * bitmap are allowed. Once we pass 80% full, this restriction
312 * is lifted. 330 * is lifted.
313 * 331 *
314 * We do this so that files that grow later still have space close to 332 * We do this so that files that grow later still have space close to
315 * their original allocation. This improves locality, and presumably 333 * their original allocation. This improves locality, and presumably
316 * performance as a result. 334 * performance as a result.
317 * 335 *
318 * This is only an allocation policy and does not make up for getting a 336 * This is only an allocation policy and does not make up for getting a
319 * bad hint. Decent hinting must be implemented for this to work well. 337 * bad hint. Decent hinting must be implemented for this to work well.
320 */ 338 */
321 if ( TEST_OPTION(skip_busy, s) && SB_FREE_BLOCKS(s) > SB_BLOCK_COUNT(s)/20 ) { 339 if (TEST_OPTION(skip_busy, s)
322 for (;bm < end_bm; bm++, off = 0) { 340 && SB_FREE_BLOCKS(s) > SB_BLOCK_COUNT(s) / 20) {
323 if ( ( off && (!unfm || (file_block != 0))) || SB_AP_BITMAP(s)[bm].free_count > (s->s_blocksize << 3) / 10 ) 341 for (; bm < end_bm; bm++, off = 0) {
324 nr_allocated = scan_bitmap_block(th, bm, &off, off_max, min, max, unfm); 342 if ((off && (!unfm || (file_block != 0)))
325 if (nr_allocated) 343 || SB_AP_BITMAP(s)[bm].free_count >
326 goto ret; 344 (s->s_blocksize << 3) / 10)
327 } 345 nr_allocated =
328 /* we know from above that start is a reasonable number */ 346 scan_bitmap_block(th, bm, &off, off_max,
329 get_bit_address (s, *start, &bm, &off); 347 min, max, unfm);
330 } 348 if (nr_allocated)
331 349 goto ret;
332 for (;bm < end_bm; bm++, off = 0) { 350 }
333 nr_allocated = scan_bitmap_block(th, bm, &off, off_max, min, max, unfm); 351 /* we know from above that start is a reasonable number */
334 if (nr_allocated) 352 get_bit_address(s, *start, &bm, &off);
335 goto ret; 353 }
336 } 354
337 355 for (; bm < end_bm; bm++, off = 0) {
338 nr_allocated = scan_bitmap_block(th, bm, &off, end_off + 1, min, max, unfm); 356 nr_allocated =
339 357 scan_bitmap_block(th, bm, &off, off_max, min, max, unfm);
340 ret: 358 if (nr_allocated)
341 *start = bm * off_max + off; 359 goto ret;
342 return nr_allocated; 360 }
361
362 nr_allocated =
363 scan_bitmap_block(th, bm, &off, end_off + 1, min, max, unfm);
364
365 ret:
366 *start = bm * off_max + off;
367 return nr_allocated;
343 368
344} 369}
345 370
346static void _reiserfs_free_block (struct reiserfs_transaction_handle *th, 371static void _reiserfs_free_block(struct reiserfs_transaction_handle *th,
347 struct inode *inode, b_blocknr_t block, 372 struct inode *inode, b_blocknr_t block,
348 int for_unformatted) 373 int for_unformatted)
349{ 374{
350 struct super_block * s = th->t_super; 375 struct super_block *s = th->t_super;
351 struct reiserfs_super_block * rs; 376 struct reiserfs_super_block *rs;
352 struct buffer_head * sbh; 377 struct buffer_head *sbh;
353 struct reiserfs_bitmap_info *apbi; 378 struct reiserfs_bitmap_info *apbi;
354 int nr, offset; 379 int nr, offset;
355 380
356 BUG_ON (!th->t_trans_id); 381 BUG_ON(!th->t_trans_id);
357 382
358 PROC_INFO_INC( s, free_block ); 383 PROC_INFO_INC(s, free_block);
359 384
360 rs = SB_DISK_SUPER_BLOCK (s); 385 rs = SB_DISK_SUPER_BLOCK(s);
361 sbh = SB_BUFFER_WITH_SB (s); 386 sbh = SB_BUFFER_WITH_SB(s);
362 apbi = SB_AP_BITMAP(s); 387 apbi = SB_AP_BITMAP(s);
363 388
364 get_bit_address (s, block, &nr, &offset); 389 get_bit_address(s, block, &nr, &offset);
365 390
366 if (nr >= sb_bmap_nr (rs)) { 391 if (nr >= sb_bmap_nr(rs)) {
367 reiserfs_warning (s, "vs-4075: reiserfs_free_block: " 392 reiserfs_warning(s, "vs-4075: reiserfs_free_block: "
368 "block %lu is out of range on %s", 393 "block %lu is out of range on %s",
369 block, reiserfs_bdevname (s)); 394 block, reiserfs_bdevname(s));
370 return; 395 return;
371 } 396 }
372 397
373 reiserfs_prepare_for_journal(s, apbi[nr].bh, 1 ) ; 398 reiserfs_prepare_for_journal(s, apbi[nr].bh, 1);
374 399
375 /* clear bit for the given block in bit map */ 400 /* clear bit for the given block in bit map */
376 if (!reiserfs_test_and_clear_le_bit (offset, apbi[nr].bh->b_data)) { 401 if (!reiserfs_test_and_clear_le_bit(offset, apbi[nr].bh->b_data)) {
377 reiserfs_warning (s, "vs-4080: reiserfs_free_block: " 402 reiserfs_warning(s, "vs-4080: reiserfs_free_block: "
378 "free_block (%s:%lu)[dev:blocknr]: bit already cleared", 403 "free_block (%s:%lu)[dev:blocknr]: bit already cleared",
379 reiserfs_bdevname (s), block); 404 reiserfs_bdevname(s), block);
380 } 405 }
381 apbi[nr].free_count ++; 406 apbi[nr].free_count++;
382 journal_mark_dirty (th, s, apbi[nr].bh); 407 journal_mark_dirty(th, s, apbi[nr].bh);
383 408
384 reiserfs_prepare_for_journal(s, sbh, 1) ; 409 reiserfs_prepare_for_journal(s, sbh, 1);
385 /* update super block */ 410 /* update super block */
386 set_sb_free_blocks( rs, sb_free_blocks(rs) + 1 ); 411 set_sb_free_blocks(rs, sb_free_blocks(rs) + 1);
387 412
388 journal_mark_dirty (th, s, sbh); 413 journal_mark_dirty(th, s, sbh);
389 if (for_unformatted) 414 if (for_unformatted)
390 DQUOT_FREE_BLOCK_NODIRTY(inode, 1); 415 DQUOT_FREE_BLOCK_NODIRTY(inode, 1);
391} 416}
392 417
393void reiserfs_free_block (struct reiserfs_transaction_handle *th, 418void reiserfs_free_block(struct reiserfs_transaction_handle *th,
394 struct inode *inode, b_blocknr_t block, 419 struct inode *inode, b_blocknr_t block,
395 int for_unformatted) 420 int for_unformatted)
396{ 421{
397 struct super_block * s = th->t_super; 422 struct super_block *s = th->t_super;
398 423
399 BUG_ON (!th->t_trans_id); 424 BUG_ON(!th->t_trans_id);
400 425
401 RFALSE(!s, "vs-4061: trying to free block on nonexistent device"); 426 RFALSE(!s, "vs-4061: trying to free block on nonexistent device");
402 RFALSE(is_reusable (s, block, 1) == 0, "vs-4071: can not free such block"); 427 RFALSE(is_reusable(s, block, 1) == 0,
403 /* mark it before we clear it, just in case */ 428 "vs-4071: can not free such block");
404 journal_mark_freed(th, s, block) ; 429 /* mark it before we clear it, just in case */
405 _reiserfs_free_block(th, inode, block, for_unformatted) ; 430 journal_mark_freed(th, s, block);
431 _reiserfs_free_block(th, inode, block, for_unformatted);
406} 432}
407 433
408/* preallocated blocks don't need to be run through journal_mark_freed */ 434/* preallocated blocks don't need to be run through journal_mark_freed */
409static void reiserfs_free_prealloc_block (struct reiserfs_transaction_handle *th, 435static void reiserfs_free_prealloc_block(struct reiserfs_transaction_handle *th,
410 struct inode *inode, b_blocknr_t block) { 436 struct inode *inode, b_blocknr_t block)
411 RFALSE(!th->t_super, "vs-4060: trying to free block on nonexistent device"); 437{
412 RFALSE(is_reusable (th->t_super, block, 1) == 0, "vs-4070: can not free such block"); 438 RFALSE(!th->t_super,
413 BUG_ON (!th->t_trans_id); 439 "vs-4060: trying to free block on nonexistent device");
414 _reiserfs_free_block(th, inode, block, 1) ; 440 RFALSE(is_reusable(th->t_super, block, 1) == 0,
441 "vs-4070: can not free such block");
442 BUG_ON(!th->t_trans_id);
443 _reiserfs_free_block(th, inode, block, 1);
415} 444}
416 445
417static void __discard_prealloc (struct reiserfs_transaction_handle * th, 446static void __discard_prealloc(struct reiserfs_transaction_handle *th,
418 struct reiserfs_inode_info *ei) 447 struct reiserfs_inode_info *ei)
419{ 448{
420 unsigned long save = ei->i_prealloc_block ; 449 unsigned long save = ei->i_prealloc_block;
421 int dirty = 0; 450 int dirty = 0;
422 struct inode *inode = &ei->vfs_inode; 451 struct inode *inode = &ei->vfs_inode;
423 BUG_ON (!th->t_trans_id); 452 BUG_ON(!th->t_trans_id);
424#ifdef CONFIG_REISERFS_CHECK 453#ifdef CONFIG_REISERFS_CHECK
425 if (ei->i_prealloc_count < 0) 454 if (ei->i_prealloc_count < 0)
426 reiserfs_warning (th->t_super, "zam-4001:%s: inode has negative prealloc blocks count.", __FUNCTION__ ); 455 reiserfs_warning(th->t_super,
456 "zam-4001:%s: inode has negative prealloc blocks count.",
457 __FUNCTION__);
427#endif 458#endif
428 while (ei->i_prealloc_count > 0) { 459 while (ei->i_prealloc_count > 0) {
429 reiserfs_free_prealloc_block(th, inode, ei->i_prealloc_block); 460 reiserfs_free_prealloc_block(th, inode, ei->i_prealloc_block);
430 ei->i_prealloc_block++; 461 ei->i_prealloc_block++;
431 ei->i_prealloc_count --; 462 ei->i_prealloc_count--;
432 dirty = 1; 463 dirty = 1;
433 } 464 }
434 if (dirty) 465 if (dirty)
435 reiserfs_update_sd(th, inode); 466 reiserfs_update_sd(th, inode);
436 ei->i_prealloc_block = save; 467 ei->i_prealloc_block = save;
437 list_del_init(&(ei->i_prealloc_list)); 468 list_del_init(&(ei->i_prealloc_list));
438} 469}
439 470
440/* FIXME: It should be inline function */ 471/* FIXME: It should be inline function */
441void reiserfs_discard_prealloc (struct reiserfs_transaction_handle *th, 472void reiserfs_discard_prealloc(struct reiserfs_transaction_handle *th,
442 struct inode *inode) 473 struct inode *inode)
443{ 474{
444 struct reiserfs_inode_info *ei = REISERFS_I(inode); 475 struct reiserfs_inode_info *ei = REISERFS_I(inode);
445 BUG_ON (!th->t_trans_id); 476 BUG_ON(!th->t_trans_id);
446 if (ei->i_prealloc_count) 477 if (ei->i_prealloc_count)
447 __discard_prealloc(th, ei); 478 __discard_prealloc(th, ei);
448} 479}
449 480
450void reiserfs_discard_all_prealloc (struct reiserfs_transaction_handle *th) 481void reiserfs_discard_all_prealloc(struct reiserfs_transaction_handle *th)
451{ 482{
452 struct list_head * plist = &SB_JOURNAL(th->t_super)->j_prealloc_list; 483 struct list_head *plist = &SB_JOURNAL(th->t_super)->j_prealloc_list;
453 484
454 BUG_ON (!th->t_trans_id); 485 BUG_ON(!th->t_trans_id);
455 486
456 while (!list_empty(plist)) { 487 while (!list_empty(plist)) {
457 struct reiserfs_inode_info *ei; 488 struct reiserfs_inode_info *ei;
458 ei = list_entry(plist->next, struct reiserfs_inode_info, i_prealloc_list); 489 ei = list_entry(plist->next, struct reiserfs_inode_info,
490 i_prealloc_list);
459#ifdef CONFIG_REISERFS_CHECK 491#ifdef CONFIG_REISERFS_CHECK
460 if (!ei->i_prealloc_count) { 492 if (!ei->i_prealloc_count) {
461 reiserfs_warning (th->t_super, "zam-4001:%s: inode is in prealloc list but has no preallocated blocks.", __FUNCTION__); 493 reiserfs_warning(th->t_super,
462 } 494 "zam-4001:%s: inode is in prealloc list but has no preallocated blocks.",
495 __FUNCTION__);
496 }
463#endif 497#endif
464 __discard_prealloc(th, ei); 498 __discard_prealloc(th, ei);
465 } 499 }
466} 500}
467 501
468void reiserfs_init_alloc_options (struct super_block *s) 502void reiserfs_init_alloc_options(struct super_block *s)
469{ 503{
470 set_bit (_ALLOC_skip_busy, &SB_ALLOC_OPTS(s)); 504 set_bit(_ALLOC_skip_busy, &SB_ALLOC_OPTS(s));
471 set_bit (_ALLOC_dirid_groups, &SB_ALLOC_OPTS(s)); 505 set_bit(_ALLOC_dirid_groups, &SB_ALLOC_OPTS(s));
472 set_bit (_ALLOC_packing_groups, &SB_ALLOC_OPTS(s)); 506 set_bit(_ALLOC_packing_groups, &SB_ALLOC_OPTS(s));
473} 507}
474 508
475/* block allocator related options are parsed here */ 509/* block allocator related options are parsed here */
476int reiserfs_parse_alloc_options(struct super_block * s, char * options) 510int reiserfs_parse_alloc_options(struct super_block *s, char *options)
477{ 511{
478 char * this_char, * value; 512 char *this_char, *value;
479 513
480 REISERFS_SB(s)->s_alloc_options.bits = 0; /* clear default settings */ 514 REISERFS_SB(s)->s_alloc_options.bits = 0; /* clear default settings */
481 515
482 while ( (this_char = strsep (&options, ":")) != NULL ) { 516 while ((this_char = strsep(&options, ":")) != NULL) {
483 if ((value = strchr (this_char, '=')) != NULL) 517 if ((value = strchr(this_char, '=')) != NULL)
484 *value++ = 0; 518 *value++ = 0;
485 519
486 if (!strcmp(this_char, "concentrating_formatted_nodes")) { 520 if (!strcmp(this_char, "concentrating_formatted_nodes")) {
487 int temp; 521 int temp;
488 SET_OPTION(concentrating_formatted_nodes); 522 SET_OPTION(concentrating_formatted_nodes);
489 temp = (value && *value) ? simple_strtoul (value, &value, 0) : 10; 523 temp = (value
490 if (temp <= 0 || temp > 100) { 524 && *value) ? simple_strtoul(value, &value,
491 REISERFS_SB(s)->s_alloc_options.border = 10; 525 0) : 10;
492 } else { 526 if (temp <= 0 || temp > 100) {
493 REISERFS_SB(s)->s_alloc_options.border = 100 / temp; 527 REISERFS_SB(s)->s_alloc_options.border = 10;
494 } 528 } else {
495 continue; 529 REISERFS_SB(s)->s_alloc_options.border =
496 } 530 100 / temp;
497 if (!strcmp(this_char, "displacing_large_files")) { 531 }
498 SET_OPTION(displacing_large_files); 532 continue;
499 REISERFS_SB(s)->s_alloc_options.large_file_size = 533 }
500 (value && *value) ? simple_strtoul (value, &value, 0) : 16; 534 if (!strcmp(this_char, "displacing_large_files")) {
501 continue; 535 SET_OPTION(displacing_large_files);
502 } 536 REISERFS_SB(s)->s_alloc_options.large_file_size =
503 if (!strcmp(this_char, "displacing_new_packing_localities")) { 537 (value
504 SET_OPTION(displacing_new_packing_localities); 538 && *value) ? simple_strtoul(value, &value, 0) : 16;
505 continue; 539 continue;
506 }; 540 }
507 541 if (!strcmp(this_char, "displacing_new_packing_localities")) {
508 if (!strcmp(this_char, "old_hashed_relocation")) { 542 SET_OPTION(displacing_new_packing_localities);
509 SET_OPTION(old_hashed_relocation); 543 continue;
510 continue; 544 };
511 } 545
546 if (!strcmp(this_char, "old_hashed_relocation")) {
547 SET_OPTION(old_hashed_relocation);
548 continue;
549 }
512 550
513 if (!strcmp(this_char, "new_hashed_relocation")) { 551 if (!strcmp(this_char, "new_hashed_relocation")) {
514 SET_OPTION(new_hashed_relocation); 552 SET_OPTION(new_hashed_relocation);
515 continue; 553 continue;
516 } 554 }
517 555
518 if (!strcmp(this_char, "dirid_groups")) { 556 if (!strcmp(this_char, "dirid_groups")) {
519 SET_OPTION(dirid_groups); 557 SET_OPTION(dirid_groups);
520 continue; 558 continue;
521 } 559 }
522 if (!strcmp(this_char, "oid_groups")) { 560 if (!strcmp(this_char, "oid_groups")) {
523 SET_OPTION(oid_groups); 561 SET_OPTION(oid_groups);
524 continue; 562 continue;
525 } 563 }
526 if (!strcmp(this_char, "packing_groups")) { 564 if (!strcmp(this_char, "packing_groups")) {
527 SET_OPTION(packing_groups); 565 SET_OPTION(packing_groups);
528 continue; 566 continue;
529 } 567 }
530 if (!strcmp(this_char, "hashed_formatted_nodes")) { 568 if (!strcmp(this_char, "hashed_formatted_nodes")) {
531 SET_OPTION(hashed_formatted_nodes); 569 SET_OPTION(hashed_formatted_nodes);
532 continue; 570 continue;
533 } 571 }
534 572
535 if (!strcmp(this_char, "skip_busy")) { 573 if (!strcmp(this_char, "skip_busy")) {
536 SET_OPTION(skip_busy); 574 SET_OPTION(skip_busy);
537 continue; 575 continue;
538 } 576 }
539 577
540 if (!strcmp(this_char, "hundredth_slices")) { 578 if (!strcmp(this_char, "hundredth_slices")) {
541 SET_OPTION(hundredth_slices); 579 SET_OPTION(hundredth_slices);
542 continue; 580 continue;
543 } 581 }
544 582
545 if (!strcmp(this_char, "old_way")) { 583 if (!strcmp(this_char, "old_way")) {
546 SET_OPTION(old_way); 584 SET_OPTION(old_way);
547 continue; 585 continue;
548 } 586 }
549 587
550 if (!strcmp(this_char, "displace_based_on_dirid")) { 588 if (!strcmp(this_char, "displace_based_on_dirid")) {
551 SET_OPTION(displace_based_on_dirid); 589 SET_OPTION(displace_based_on_dirid);
552 continue; 590 continue;
553 } 591 }
554 592
555 if (!strcmp(this_char, "preallocmin")) { 593 if (!strcmp(this_char, "preallocmin")) {
556 REISERFS_SB(s)->s_alloc_options.preallocmin = 594 REISERFS_SB(s)->s_alloc_options.preallocmin =
557 (value && *value) ? simple_strtoul (value, &value, 0) : 4; 595 (value
558 continue; 596 && *value) ? simple_strtoul(value, &value, 0) : 4;
559 } 597 continue;
598 }
599
600 if (!strcmp(this_char, "preallocsize")) {
601 REISERFS_SB(s)->s_alloc_options.preallocsize =
602 (value
603 && *value) ? simple_strtoul(value, &value,
604 0) :
605 PREALLOCATION_SIZE;
606 continue;
607 }
560 608
561 if (!strcmp(this_char, "preallocsize")) { 609 reiserfs_warning(s, "zam-4001: %s : unknown option - %s",
562 REISERFS_SB(s)->s_alloc_options.preallocsize = 610 __FUNCTION__, this_char);
563 (value && *value) ? simple_strtoul (value, &value, 0) : PREALLOCATION_SIZE; 611 return 1;
564 continue;
565 } 612 }
566 613
567 reiserfs_warning (s, "zam-4001: %s : unknown option - %s", 614 reiserfs_warning(s, "allocator options = [%08x]\n", SB_ALLOC_OPTS(s));
568 __FUNCTION__ , this_char); 615 return 0;
569 return 1;
570 }
571
572 reiserfs_warning (s, "allocator options = [%08x]\n", SB_ALLOC_OPTS(s));
573 return 0;
574} 616}
575 617
576static inline void new_hashed_relocation (reiserfs_blocknr_hint_t * hint) 618static inline void new_hashed_relocation(reiserfs_blocknr_hint_t * hint)
577{ 619{
578 char * hash_in; 620 char *hash_in;
579 if (hint->formatted_node) { 621 if (hint->formatted_node) {
580 hash_in = (char*)&hint->key.k_dir_id; 622 hash_in = (char *)&hint->key.k_dir_id;
581 } else { 623 } else {
582 if (!hint->inode) { 624 if (!hint->inode) {
583 //hint->search_start = hint->beg; 625 //hint->search_start = hint->beg;
584 hash_in = (char*)&hint->key.k_dir_id; 626 hash_in = (char *)&hint->key.k_dir_id;
585 } else 627 } else
586 if ( TEST_OPTION(displace_based_on_dirid, hint->th->t_super)) 628 if (TEST_OPTION(displace_based_on_dirid, hint->th->t_super))
587 hash_in = (char *)(&INODE_PKEY(hint->inode)->k_dir_id); 629 hash_in = (char *)(&INODE_PKEY(hint->inode)->k_dir_id);
588 else 630 else
589 hash_in = (char *)(&INODE_PKEY(hint->inode)->k_objectid); 631 hash_in =
590 } 632 (char *)(&INODE_PKEY(hint->inode)->k_objectid);
633 }
591 634
592 hint->search_start = hint->beg + keyed_hash(hash_in, 4) % (hint->end - hint->beg); 635 hint->search_start =
636 hint->beg + keyed_hash(hash_in, 4) % (hint->end - hint->beg);
593} 637}
594 638
595/* 639/*
596 * Relocation based on dirid, hashing them into a given bitmap block 640 * Relocation based on dirid, hashing them into a given bitmap block
597 * files. Formatted nodes are unaffected, a seperate policy covers them 641 * files. Formatted nodes are unaffected, a seperate policy covers them
598 */ 642 */
599static void 643static void dirid_groups(reiserfs_blocknr_hint_t * hint)
600dirid_groups (reiserfs_blocknr_hint_t *hint)
601{ 644{
602 unsigned long hash; 645 unsigned long hash;
603 __u32 dirid = 0; 646 __u32 dirid = 0;
604 int bm = 0; 647 int bm = 0;
605 struct super_block *sb = hint->th->t_super; 648 struct super_block *sb = hint->th->t_super;
606 if (hint->inode)
607 dirid = le32_to_cpu(INODE_PKEY(hint->inode)->k_dir_id);
608 else if (hint->formatted_node)
609 dirid = hint->key.k_dir_id;
610
611 if (dirid) {
612 bm = bmap_hash_id(sb, dirid);
613 hash = bm * (sb->s_blocksize << 3);
614 /* give a portion of the block group to metadata */
615 if (hint->inode) 649 if (hint->inode)
616 hash += sb->s_blocksize/2; 650 dirid = le32_to_cpu(INODE_PKEY(hint->inode)->k_dir_id);
617 hint->search_start = hash; 651 else if (hint->formatted_node)
618 } 652 dirid = hint->key.k_dir_id;
653
654 if (dirid) {
655 bm = bmap_hash_id(sb, dirid);
656 hash = bm * (sb->s_blocksize << 3);
657 /* give a portion of the block group to metadata */
658 if (hint->inode)
659 hash += sb->s_blocksize / 2;
660 hint->search_start = hash;
661 }
619} 662}
620 663
621/* 664/*
622 * Relocation based on oid, hashing them into a given bitmap block 665 * Relocation based on oid, hashing them into a given bitmap block
623 * files. Formatted nodes are unaffected, a seperate policy covers them 666 * files. Formatted nodes are unaffected, a seperate policy covers them
624 */ 667 */
625static void 668static void oid_groups(reiserfs_blocknr_hint_t * hint)
626oid_groups (reiserfs_blocknr_hint_t *hint)
627{ 669{
628 if (hint->inode) { 670 if (hint->inode) {
629 unsigned long hash; 671 unsigned long hash;
630 __u32 oid; 672 __u32 oid;
631 __u32 dirid; 673 __u32 dirid;
632 int bm; 674 int bm;
633 675
634 dirid = le32_to_cpu(INODE_PKEY(hint->inode)->k_dir_id); 676 dirid = le32_to_cpu(INODE_PKEY(hint->inode)->k_dir_id);
635 677
636 /* keep the root dir and it's first set of subdirs close to 678 /* keep the root dir and it's first set of subdirs close to
637 * the start of the disk 679 * the start of the disk
638 */ 680 */
639 if (dirid <= 2) 681 if (dirid <= 2)
640 hash = (hint->inode->i_sb->s_blocksize << 3); 682 hash = (hint->inode->i_sb->s_blocksize << 3);
641 else { 683 else {
642 oid = le32_to_cpu(INODE_PKEY(hint->inode)->k_objectid); 684 oid = le32_to_cpu(INODE_PKEY(hint->inode)->k_objectid);
643 bm = bmap_hash_id(hint->inode->i_sb, oid); 685 bm = bmap_hash_id(hint->inode->i_sb, oid);
644 hash = bm * (hint->inode->i_sb->s_blocksize << 3); 686 hash = bm * (hint->inode->i_sb->s_blocksize << 3);
687 }
688 hint->search_start = hash;
645 } 689 }
646 hint->search_start = hash;
647 }
648} 690}
649 691
650/* returns 1 if it finds an indirect item and gets valid hint info 692/* returns 1 if it finds an indirect item and gets valid hint info
651 * from it, otherwise 0 693 * from it, otherwise 0
652 */ 694 */
653static int get_left_neighbor(reiserfs_blocknr_hint_t *hint) 695static int get_left_neighbor(reiserfs_blocknr_hint_t * hint)
654{ 696{
655 struct path * path; 697 struct path *path;
656 struct buffer_head * bh; 698 struct buffer_head *bh;
657 struct item_head * ih; 699 struct item_head *ih;
658 int pos_in_item; 700 int pos_in_item;
659 __le32 * item; 701 __le32 *item;
660 int ret = 0; 702 int ret = 0;
661 703
662 if (!hint->path) /* reiserfs code can call this function w/o pointer to path 704 if (!hint->path) /* reiserfs code can call this function w/o pointer to path
663 * structure supplied; then we rely on supplied search_start */ 705 * structure supplied; then we rely on supplied search_start */
664 return 0; 706 return 0;
665 707
666 path = hint->path; 708 path = hint->path;
667 bh = get_last_bh(path); 709 bh = get_last_bh(path);
668 RFALSE( !bh, "green-4002: Illegal path specified to get_left_neighbor"); 710 RFALSE(!bh, "green-4002: Illegal path specified to get_left_neighbor");
669 ih = get_ih(path); 711 ih = get_ih(path);
670 pos_in_item = path->pos_in_item; 712 pos_in_item = path->pos_in_item;
671 item = get_item (path); 713 item = get_item(path);
672 714
673 hint->search_start = bh->b_blocknr; 715 hint->search_start = bh->b_blocknr;
674 716
675 if (!hint->formatted_node && is_indirect_le_ih (ih)) { 717 if (!hint->formatted_node && is_indirect_le_ih(ih)) {
676 /* for indirect item: go to left and look for the first non-hole entry 718 /* for indirect item: go to left and look for the first non-hole entry
677 in the indirect item */ 719 in the indirect item */
678 if (pos_in_item == I_UNFM_NUM (ih)) 720 if (pos_in_item == I_UNFM_NUM(ih))
679 pos_in_item--; 721 pos_in_item--;
680// pos_in_item = I_UNFM_NUM (ih) - 1; 722// pos_in_item = I_UNFM_NUM (ih) - 1;
681 while (pos_in_item >= 0) { 723 while (pos_in_item >= 0) {
682 int t=get_block_num(item,pos_in_item); 724 int t = get_block_num(item, pos_in_item);
683 if (t) { 725 if (t) {
684 hint->search_start = t; 726 hint->search_start = t;
685 ret = 1; 727 ret = 1;
686 break; 728 break;
687 } 729 }
688 pos_in_item --; 730 pos_in_item--;
731 }
689 } 732 }
690 }
691 733
692 /* does result value fit into specified region? */ 734 /* does result value fit into specified region? */
693 return ret; 735 return ret;
694} 736}
695 737
696/* should be, if formatted node, then try to put on first part of the device 738/* should be, if formatted node, then try to put on first part of the device
697 specified as number of percent with mount option device, else try to put 739 specified as number of percent with mount option device, else try to put
698 on last of device. This is not to say it is good code to do so, 740 on last of device. This is not to say it is good code to do so,
699 but the effect should be measured. */ 741 but the effect should be measured. */
700static inline void set_border_in_hint(struct super_block *s, reiserfs_blocknr_hint_t *hint) 742static inline void set_border_in_hint(struct super_block *s,
743 reiserfs_blocknr_hint_t * hint)
701{ 744{
702 b_blocknr_t border = SB_BLOCK_COUNT(s) / REISERFS_SB(s)->s_alloc_options.border; 745 b_blocknr_t border =
746 SB_BLOCK_COUNT(s) / REISERFS_SB(s)->s_alloc_options.border;
703 747
704 if (hint->formatted_node) 748 if (hint->formatted_node)
705 hint->end = border - 1; 749 hint->end = border - 1;
706 else 750 else
707 hint->beg = border; 751 hint->beg = border;
708} 752}
709 753
710static inline void displace_large_file(reiserfs_blocknr_hint_t *hint) 754static inline void displace_large_file(reiserfs_blocknr_hint_t * hint)
711{ 755{
712 if ( TEST_OPTION(displace_based_on_dirid, hint->th->t_super)) 756 if (TEST_OPTION(displace_based_on_dirid, hint->th->t_super))
713 hint->search_start = hint->beg + keyed_hash((char *)(&INODE_PKEY(hint->inode)->k_dir_id), 4) % (hint->end - hint->beg); 757 hint->search_start =
714 else 758 hint->beg +
715 hint->search_start = hint->beg + keyed_hash((char *)(&INODE_PKEY(hint->inode)->k_objectid), 4) % (hint->end - hint->beg); 759 keyed_hash((char *)(&INODE_PKEY(hint->inode)->k_dir_id),
760 4) % (hint->end - hint->beg);
761 else
762 hint->search_start =
763 hint->beg +
764 keyed_hash((char *)(&INODE_PKEY(hint->inode)->k_objectid),
765 4) % (hint->end - hint->beg);
716} 766}
717 767
718static inline void hash_formatted_node(reiserfs_blocknr_hint_t *hint) 768static inline void hash_formatted_node(reiserfs_blocknr_hint_t * hint)
719{ 769{
720 char * hash_in; 770 char *hash_in;
721 771
722 if (!hint->inode) 772 if (!hint->inode)
723 hash_in = (char*)&hint->key.k_dir_id; 773 hash_in = (char *)&hint->key.k_dir_id;
724 else if ( TEST_OPTION(displace_based_on_dirid, hint->th->t_super)) 774 else if (TEST_OPTION(displace_based_on_dirid, hint->th->t_super))
725 hash_in = (char *)(&INODE_PKEY(hint->inode)->k_dir_id); 775 hash_in = (char *)(&INODE_PKEY(hint->inode)->k_dir_id);
726 else 776 else
727 hash_in = (char *)(&INODE_PKEY(hint->inode)->k_objectid); 777 hash_in = (char *)(&INODE_PKEY(hint->inode)->k_objectid);
728 778
729 hint->search_start = hint->beg + keyed_hash(hash_in, 4) % (hint->end - hint->beg); 779 hint->search_start =
780 hint->beg + keyed_hash(hash_in, 4) % (hint->end - hint->beg);
730} 781}
731 782
732static inline int this_blocknr_allocation_would_make_it_a_large_file(reiserfs_blocknr_hint_t *hint) 783static inline int
784this_blocknr_allocation_would_make_it_a_large_file(reiserfs_blocknr_hint_t *
785 hint)
733{ 786{
734 return hint->block == REISERFS_SB(hint->th->t_super)->s_alloc_options.large_file_size; 787 return hint->block ==
788 REISERFS_SB(hint->th->t_super)->s_alloc_options.large_file_size;
735} 789}
736 790
737#ifdef DISPLACE_NEW_PACKING_LOCALITIES 791#ifdef DISPLACE_NEW_PACKING_LOCALITIES
738static inline void displace_new_packing_locality (reiserfs_blocknr_hint_t *hint) 792static inline void displace_new_packing_locality(reiserfs_blocknr_hint_t * hint)
739{ 793{
740 struct in_core_key * key = &hint->key; 794 struct in_core_key *key = &hint->key;
741 795
742 hint->th->displace_new_blocks = 0; 796 hint->th->displace_new_blocks = 0;
743 hint->search_start = hint->beg + keyed_hash((char*)(&key->k_objectid),4) % (hint->end - hint->beg); 797 hint->search_start =
798 hint->beg + keyed_hash((char *)(&key->k_objectid),
799 4) % (hint->end - hint->beg);
744} 800}
745 #endif 801#endif
746 802
747static inline int old_hashed_relocation (reiserfs_blocknr_hint_t * hint) 803static inline int old_hashed_relocation(reiserfs_blocknr_hint_t * hint)
748{ 804{
749 b_blocknr_t border; 805 b_blocknr_t border;
750 u32 hash_in; 806 u32 hash_in;
751
752 if (hint->formatted_node || hint->inode == NULL) {
753 return 0;
754 }
755 807
756 hash_in = le32_to_cpu((INODE_PKEY(hint->inode))->k_dir_id); 808 if (hint->formatted_node || hint->inode == NULL) {
757 border = hint->beg + (u32) keyed_hash(((char *) (&hash_in)), 4) % (hint->end - hint->beg - 1); 809 return 0;
758 if (border > hint->search_start) 810 }
759 hint->search_start = border;
760 811
761 return 1; 812 hash_in = le32_to_cpu((INODE_PKEY(hint->inode))->k_dir_id);
762 } 813 border =
763 814 hint->beg + (u32) keyed_hash(((char *)(&hash_in)),
764static inline int old_way (reiserfs_blocknr_hint_t * hint) 815 4) % (hint->end - hint->beg - 1);
765{ 816 if (border > hint->search_start)
766 b_blocknr_t border; 817 hint->search_start = border;
767
768 if (hint->formatted_node || hint->inode == NULL) {
769 return 0;
770 }
771
772 border = hint->beg + le32_to_cpu(INODE_PKEY(hint->inode)->k_dir_id) % (hint->end - hint->beg);
773 if (border > hint->search_start)
774 hint->search_start = border;
775 818
776 return 1; 819 return 1;
777} 820}
778 821
779static inline void hundredth_slices (reiserfs_blocknr_hint_t * hint) 822static inline int old_way(reiserfs_blocknr_hint_t * hint)
780{ 823{
781 struct in_core_key * key = &hint->key; 824 b_blocknr_t border;
782 b_blocknr_t slice_start; 825
826 if (hint->formatted_node || hint->inode == NULL) {
827 return 0;
828 }
829
830 border =
831 hint->beg +
832 le32_to_cpu(INODE_PKEY(hint->inode)->k_dir_id) % (hint->end -
833 hint->beg);
834 if (border > hint->search_start)
835 hint->search_start = border;
783 836
784 slice_start = (keyed_hash((char*)(&key->k_dir_id),4) % 100) * (hint->end / 100); 837 return 1;
785 if ( slice_start > hint->search_start || slice_start + (hint->end / 100) <= hint->search_start) { 838}
786 hint->search_start = slice_start; 839
787 } 840static inline void hundredth_slices(reiserfs_blocknr_hint_t * hint)
841{
842 struct in_core_key *key = &hint->key;
843 b_blocknr_t slice_start;
844
845 slice_start =
846 (keyed_hash((char *)(&key->k_dir_id), 4) % 100) * (hint->end / 100);
847 if (slice_start > hint->search_start
848 || slice_start + (hint->end / 100) <= hint->search_start) {
849 hint->search_start = slice_start;
850 }
788} 851}
789 852
790static void determine_search_start(reiserfs_blocknr_hint_t *hint, 853static void determine_search_start(reiserfs_blocknr_hint_t * hint,
791 int amount_needed) 854 int amount_needed)
792{ 855{
793 struct super_block *s = hint->th->t_super; 856 struct super_block *s = hint->th->t_super;
794 int unfm_hint; 857 int unfm_hint;
795 858
796 hint->beg = 0; 859 hint->beg = 0;
797 hint->end = SB_BLOCK_COUNT(s) - 1; 860 hint->end = SB_BLOCK_COUNT(s) - 1;
798 861
799 /* This is former border algorithm. Now with tunable border offset */ 862 /* This is former border algorithm. Now with tunable border offset */
800 if (concentrating_formatted_nodes(s)) 863 if (concentrating_formatted_nodes(s))
801 set_border_in_hint(s, hint); 864 set_border_in_hint(s, hint);
802 865
803#ifdef DISPLACE_NEW_PACKING_LOCALITIES 866#ifdef DISPLACE_NEW_PACKING_LOCALITIES
804 /* whenever we create a new directory, we displace it. At first we will 867 /* whenever we create a new directory, we displace it. At first we will
805 hash for location, later we might look for a moderately empty place for 868 hash for location, later we might look for a moderately empty place for
806 it */ 869 it */
807 if (displacing_new_packing_localities(s) 870 if (displacing_new_packing_localities(s)
808 && hint->th->displace_new_blocks) { 871 && hint->th->displace_new_blocks) {
809 displace_new_packing_locality(hint); 872 displace_new_packing_locality(hint);
810 873
811 /* we do not continue determine_search_start, 874 /* we do not continue determine_search_start,
812 * if new packing locality is being displaced */ 875 * if new packing locality is being displaced */
813 return; 876 return;
814 } 877 }
815#endif 878#endif
816
817 /* all persons should feel encouraged to add more special cases here and
818 * test them */
819 879
820 if (displacing_large_files(s) && !hint->formatted_node 880 /* all persons should feel encouraged to add more special cases here and
821 && this_blocknr_allocation_would_make_it_a_large_file(hint)) { 881 * test them */
822 displace_large_file(hint);
823 return;
824 }
825
826 /* if none of our special cases is relevant, use the left neighbor in the
827 tree order of the new node we are allocating for */
828 if (hint->formatted_node && TEST_OPTION(hashed_formatted_nodes,s)) {
829 hash_formatted_node(hint);
830 return;
831 }
832 882
833 unfm_hint = get_left_neighbor(hint); 883 if (displacing_large_files(s) && !hint->formatted_node
884 && this_blocknr_allocation_would_make_it_a_large_file(hint)) {
885 displace_large_file(hint);
886 return;
887 }
834 888
835 /* Mimic old block allocator behaviour, that is if VFS allowed for preallocation, 889 /* if none of our special cases is relevant, use the left neighbor in the
836 new blocks are displaced based on directory ID. Also, if suggested search_start 890 tree order of the new node we are allocating for */
837 is less than last preallocated block, we start searching from it, assuming that 891 if (hint->formatted_node && TEST_OPTION(hashed_formatted_nodes, s)) {
838 HDD dataflow is faster in forward direction */ 892 hash_formatted_node(hint);
839 if ( TEST_OPTION(old_way, s)) { 893 return;
840 if (!hint->formatted_node) { 894 }
841 if ( !reiserfs_hashed_relocation(s))
842 old_way(hint);
843 else if (!reiserfs_no_unhashed_relocation(s))
844 old_hashed_relocation(hint);
845 895
846 if ( hint->inode && hint->search_start < REISERFS_I(hint->inode)->i_prealloc_block) 896 unfm_hint = get_left_neighbor(hint);
847 hint->search_start = REISERFS_I(hint->inode)->i_prealloc_block; 897
898 /* Mimic old block allocator behaviour, that is if VFS allowed for preallocation,
899 new blocks are displaced based on directory ID. Also, if suggested search_start
900 is less than last preallocated block, we start searching from it, assuming that
901 HDD dataflow is faster in forward direction */
902 if (TEST_OPTION(old_way, s)) {
903 if (!hint->formatted_node) {
904 if (!reiserfs_hashed_relocation(s))
905 old_way(hint);
906 else if (!reiserfs_no_unhashed_relocation(s))
907 old_hashed_relocation(hint);
908
909 if (hint->inode
910 && hint->search_start <
911 REISERFS_I(hint->inode)->i_prealloc_block)
912 hint->search_start =
913 REISERFS_I(hint->inode)->i_prealloc_block;
914 }
915 return;
848 } 916 }
849 return;
850 }
851 917
852 /* This is an approach proposed by Hans */ 918 /* This is an approach proposed by Hans */
853 if ( TEST_OPTION(hundredth_slices, s) && ! (displacing_large_files(s) && !hint->formatted_node)) { 919 if (TEST_OPTION(hundredth_slices, s)
854 hundredth_slices(hint); 920 && !(displacing_large_files(s) && !hint->formatted_node)) {
855 return; 921 hundredth_slices(hint);
856 } 922 return;
857 923 }
858 /* old_hashed_relocation only works on unformatted */
859 if (!unfm_hint && !hint->formatted_node &&
860 TEST_OPTION(old_hashed_relocation, s))
861 {
862 old_hashed_relocation(hint);
863 }
864 /* new_hashed_relocation works with both formatted/unformatted nodes */
865 if ((!unfm_hint || hint->formatted_node) &&
866 TEST_OPTION(new_hashed_relocation, s))
867 {
868 new_hashed_relocation(hint);
869 }
870 /* dirid grouping works only on unformatted nodes */
871 if (!unfm_hint && !hint->formatted_node && TEST_OPTION(dirid_groups,s))
872 {
873 dirid_groups(hint);
874 }
875 924
925 /* old_hashed_relocation only works on unformatted */
926 if (!unfm_hint && !hint->formatted_node &&
927 TEST_OPTION(old_hashed_relocation, s)) {
928 old_hashed_relocation(hint);
929 }
930 /* new_hashed_relocation works with both formatted/unformatted nodes */
931 if ((!unfm_hint || hint->formatted_node) &&
932 TEST_OPTION(new_hashed_relocation, s)) {
933 new_hashed_relocation(hint);
934 }
935 /* dirid grouping works only on unformatted nodes */
936 if (!unfm_hint && !hint->formatted_node && TEST_OPTION(dirid_groups, s)) {
937 dirid_groups(hint);
938 }
876#ifdef DISPLACE_NEW_PACKING_LOCALITIES 939#ifdef DISPLACE_NEW_PACKING_LOCALITIES
877 if (hint->formatted_node && TEST_OPTION(dirid_groups,s)) 940 if (hint->formatted_node && TEST_OPTION(dirid_groups, s)) {
878 { 941 dirid_groups(hint);
879 dirid_groups(hint); 942 }
880 }
881#endif 943#endif
882 944
883 /* oid grouping works only on unformatted nodes */ 945 /* oid grouping works only on unformatted nodes */
884 if (!unfm_hint && !hint->formatted_node && TEST_OPTION(oid_groups,s)) 946 if (!unfm_hint && !hint->formatted_node && TEST_OPTION(oid_groups, s)) {
885 { 947 oid_groups(hint);
886 oid_groups(hint); 948 }
887 } 949 return;
888 return;
889} 950}
890 951
891static int determine_prealloc_size(reiserfs_blocknr_hint_t * hint) 952static int determine_prealloc_size(reiserfs_blocknr_hint_t * hint)
892{ 953{
893 /* make minimum size a mount option and benchmark both ways */ 954 /* make minimum size a mount option and benchmark both ways */
894 /* we preallocate blocks only for regular files, specific size */ 955 /* we preallocate blocks only for regular files, specific size */
895 /* benchmark preallocating always and see what happens */ 956 /* benchmark preallocating always and see what happens */
896 957
897 hint->prealloc_size = 0; 958 hint->prealloc_size = 0;
898 959
899 if (!hint->formatted_node && hint->preallocate) { 960 if (!hint->formatted_node && hint->preallocate) {
900 if (S_ISREG(hint->inode->i_mode) 961 if (S_ISREG(hint->inode->i_mode)
901 && hint->inode->i_size >= REISERFS_SB(hint->th->t_super)->s_alloc_options.preallocmin * hint->inode->i_sb->s_blocksize) 962 && hint->inode->i_size >=
902 hint->prealloc_size = REISERFS_SB(hint->th->t_super)->s_alloc_options.preallocsize - 1; 963 REISERFS_SB(hint->th->t_super)->s_alloc_options.
903 } 964 preallocmin * hint->inode->i_sb->s_blocksize)
904 return CARRY_ON; 965 hint->prealloc_size =
966 REISERFS_SB(hint->th->t_super)->s_alloc_options.
967 preallocsize - 1;
968 }
969 return CARRY_ON;
905} 970}
906 971
907/* XXX I know it could be merged with upper-level function; 972/* XXX I know it could be merged with upper-level function;
908 but may be result function would be too complex. */ 973 but may be result function would be too complex. */
909static inline int allocate_without_wrapping_disk (reiserfs_blocknr_hint_t * hint, 974static inline int allocate_without_wrapping_disk(reiserfs_blocknr_hint_t * hint,
910 b_blocknr_t * new_blocknrs, 975 b_blocknr_t * new_blocknrs,
911 b_blocknr_t start, b_blocknr_t finish, 976 b_blocknr_t start,
912 int min, 977 b_blocknr_t finish, int min,
913 int amount_needed, int prealloc_size) 978 int amount_needed,
979 int prealloc_size)
914{ 980{
915 int rest = amount_needed; 981 int rest = amount_needed;
916 int nr_allocated; 982 int nr_allocated;
917 983
918 while (rest > 0 && start <= finish) { 984 while (rest > 0 && start <= finish) {
919 nr_allocated = scan_bitmap (hint->th, &start, finish, min, 985 nr_allocated = scan_bitmap(hint->th, &start, finish, min,
920 rest + prealloc_size, !hint->formatted_node, 986 rest + prealloc_size,
921 hint->block); 987 !hint->formatted_node, hint->block);
922 988
923 if (nr_allocated == 0) /* no new blocks allocated, return */ 989 if (nr_allocated == 0) /* no new blocks allocated, return */
924 break; 990 break;
925 991
926 /* fill free_blocknrs array first */ 992 /* fill free_blocknrs array first */
927 while (rest > 0 && nr_allocated > 0) { 993 while (rest > 0 && nr_allocated > 0) {
928 * new_blocknrs ++ = start ++; 994 *new_blocknrs++ = start++;
929 rest --; nr_allocated --; 995 rest--;
930 } 996 nr_allocated--;
997 }
931 998
932 /* do we have something to fill prealloc. array also ? */ 999 /* do we have something to fill prealloc. array also ? */
933 if (nr_allocated > 0) { 1000 if (nr_allocated > 0) {
934 /* it means prealloc_size was greater that 0 and we do preallocation */ 1001 /* it means prealloc_size was greater that 0 and we do preallocation */
935 list_add(&REISERFS_I(hint->inode)->i_prealloc_list, 1002 list_add(&REISERFS_I(hint->inode)->i_prealloc_list,
936 &SB_JOURNAL(hint->th->t_super)->j_prealloc_list); 1003 &SB_JOURNAL(hint->th->t_super)->
937 REISERFS_I(hint->inode)->i_prealloc_block = start; 1004 j_prealloc_list);
938 REISERFS_I(hint->inode)->i_prealloc_count = nr_allocated; 1005 REISERFS_I(hint->inode)->i_prealloc_block = start;
939 break; 1006 REISERFS_I(hint->inode)->i_prealloc_count =
1007 nr_allocated;
1008 break;
1009 }
940 } 1010 }
941 }
942 1011
943 return (amount_needed - rest); 1012 return (amount_needed - rest);
944} 1013}
945 1014
946static inline int blocknrs_and_prealloc_arrays_from_search_start 1015static inline int blocknrs_and_prealloc_arrays_from_search_start
947 (reiserfs_blocknr_hint_t *hint, b_blocknr_t *new_blocknrs, int amount_needed) 1016 (reiserfs_blocknr_hint_t * hint, b_blocknr_t * new_blocknrs,
948{ 1017 int amount_needed) {
949 struct super_block *s = hint->th->t_super; 1018 struct super_block *s = hint->th->t_super;
950 b_blocknr_t start = hint->search_start; 1019 b_blocknr_t start = hint->search_start;
951 b_blocknr_t finish = SB_BLOCK_COUNT(s) - 1; 1020 b_blocknr_t finish = SB_BLOCK_COUNT(s) - 1;
952 int passno = 0; 1021 int passno = 0;
953 int nr_allocated = 0; 1022 int nr_allocated = 0;
954 int bigalloc = 0; 1023 int bigalloc = 0;
955 1024
956 determine_prealloc_size(hint); 1025 determine_prealloc_size(hint);
957 if (!hint->formatted_node) { 1026 if (!hint->formatted_node) {
958 int quota_ret; 1027 int quota_ret;
959#ifdef REISERQUOTA_DEBUG 1028#ifdef REISERQUOTA_DEBUG
960 reiserfs_debug (s, REISERFS_DEBUG_CODE, "reiserquota: allocating %d blocks id=%u", amount_needed, hint->inode->i_uid); 1029 reiserfs_debug(s, REISERFS_DEBUG_CODE,
1030 "reiserquota: allocating %d blocks id=%u",
1031 amount_needed, hint->inode->i_uid);
961#endif 1032#endif
962 quota_ret = DQUOT_ALLOC_BLOCK_NODIRTY(hint->inode, amount_needed); 1033 quota_ret =
963 if (quota_ret) /* Quota exceeded? */ 1034 DQUOT_ALLOC_BLOCK_NODIRTY(hint->inode, amount_needed);
964 return QUOTA_EXCEEDED; 1035 if (quota_ret) /* Quota exceeded? */
965 if (hint->preallocate && hint->prealloc_size ) { 1036 return QUOTA_EXCEEDED;
1037 if (hint->preallocate && hint->prealloc_size) {
966#ifdef REISERQUOTA_DEBUG 1038#ifdef REISERQUOTA_DEBUG
967 reiserfs_debug (s, REISERFS_DEBUG_CODE, "reiserquota: allocating (prealloc) %d blocks id=%u", hint->prealloc_size, hint->inode->i_uid); 1039 reiserfs_debug(s, REISERFS_DEBUG_CODE,
1040 "reiserquota: allocating (prealloc) %d blocks id=%u",
1041 hint->prealloc_size, hint->inode->i_uid);
968#endif 1042#endif
969 quota_ret = DQUOT_PREALLOC_BLOCK_NODIRTY(hint->inode, hint->prealloc_size); 1043 quota_ret =
970 if (quota_ret) 1044 DQUOT_PREALLOC_BLOCK_NODIRTY(hint->inode,
971 hint->preallocate=hint->prealloc_size=0; 1045 hint->prealloc_size);
1046 if (quota_ret)
1047 hint->preallocate = hint->prealloc_size = 0;
1048 }
1049 /* for unformatted nodes, force large allocations */
1050 bigalloc = amount_needed;
972 } 1051 }
973 /* for unformatted nodes, force large allocations */
974 bigalloc = amount_needed;
975 }
976 1052
977 do { 1053 do {
978 /* in bigalloc mode, nr_allocated should stay zero until 1054 /* in bigalloc mode, nr_allocated should stay zero until
979 * the entire allocation is filled 1055 * the entire allocation is filled
980 */ 1056 */
981 if (unlikely(bigalloc && nr_allocated)) { 1057 if (unlikely(bigalloc && nr_allocated)) {
982 reiserfs_warning(s, "bigalloc is %d, nr_allocated %d\n", 1058 reiserfs_warning(s, "bigalloc is %d, nr_allocated %d\n",
983 bigalloc, nr_allocated); 1059 bigalloc, nr_allocated);
984 /* reset things to a sane value */ 1060 /* reset things to a sane value */
985 bigalloc = amount_needed - nr_allocated; 1061 bigalloc = amount_needed - nr_allocated;
986 } 1062 }
987 /* 1063 /*
988 * try pass 0 and pass 1 looking for a nice big 1064 * try pass 0 and pass 1 looking for a nice big
989 * contiguous allocation. Then reset and look 1065 * contiguous allocation. Then reset and look
990 * for anything you can find. 1066 * for anything you can find.
991 */ 1067 */
992 if (passno == 2 && bigalloc) { 1068 if (passno == 2 && bigalloc) {
993 passno = 0; 1069 passno = 0;
994 bigalloc = 0; 1070 bigalloc = 0;
995 } 1071 }
996 switch (passno++) { 1072 switch (passno++) {
997 case 0: /* Search from hint->search_start to end of disk */ 1073 case 0: /* Search from hint->search_start to end of disk */
998 start = hint->search_start; 1074 start = hint->search_start;
999 finish = SB_BLOCK_COUNT(s) - 1; 1075 finish = SB_BLOCK_COUNT(s) - 1;
1000 break; 1076 break;
1001 case 1: /* Search from hint->beg to hint->search_start */ 1077 case 1: /* Search from hint->beg to hint->search_start */
1002 start = hint->beg; 1078 start = hint->beg;
1003 finish = hint->search_start; 1079 finish = hint->search_start;
1004 break; 1080 break;
1005 case 2: /* Last chance: Search from 0 to hint->beg */ 1081 case 2: /* Last chance: Search from 0 to hint->beg */
1006 start = 0; 1082 start = 0;
1007 finish = hint->beg; 1083 finish = hint->beg;
1008 break; 1084 break;
1009 default: /* We've tried searching everywhere, not enough space */ 1085 default: /* We've tried searching everywhere, not enough space */
1010 /* Free the blocks */ 1086 /* Free the blocks */
1011 if (!hint->formatted_node) { 1087 if (!hint->formatted_node) {
1012#ifdef REISERQUOTA_DEBUG 1088#ifdef REISERQUOTA_DEBUG
1013 reiserfs_debug (s, REISERFS_DEBUG_CODE, "reiserquota: freeing (nospace) %d blocks id=%u", amount_needed + hint->prealloc_size - nr_allocated, hint->inode->i_uid); 1089 reiserfs_debug(s, REISERFS_DEBUG_CODE,
1090 "reiserquota: freeing (nospace) %d blocks id=%u",
1091 amount_needed +
1092 hint->prealloc_size -
1093 nr_allocated,
1094 hint->inode->i_uid);
1014#endif 1095#endif
1015 DQUOT_FREE_BLOCK_NODIRTY(hint->inode, amount_needed + hint->prealloc_size - nr_allocated); /* Free not allocated blocks */ 1096 DQUOT_FREE_BLOCK_NODIRTY(hint->inode, amount_needed + hint->prealloc_size - nr_allocated); /* Free not allocated blocks */
1016 } 1097 }
1017 while (nr_allocated --) 1098 while (nr_allocated--)
1018 reiserfs_free_block(hint->th, hint->inode, new_blocknrs[nr_allocated], !hint->formatted_node); 1099 reiserfs_free_block(hint->th, hint->inode,
1019 1100 new_blocknrs[nr_allocated],
1020 return NO_DISK_SPACE; 1101 !hint->formatted_node);
1021 } 1102
1022 } while ((nr_allocated += allocate_without_wrapping_disk (hint, 1103 return NO_DISK_SPACE;
1023 new_blocknrs + nr_allocated, start, finish, 1104 }
1024 bigalloc ? bigalloc : 1, 1105 } while ((nr_allocated += allocate_without_wrapping_disk(hint,
1025 amount_needed - nr_allocated, 1106 new_blocknrs +
1026 hint->prealloc_size)) 1107 nr_allocated,
1027 < amount_needed); 1108 start, finish,
1028 if ( !hint->formatted_node && 1109 bigalloc ?
1029 amount_needed + hint->prealloc_size > 1110 bigalloc : 1,
1030 nr_allocated + REISERFS_I(hint->inode)->i_prealloc_count) { 1111 amount_needed -
1031 /* Some of preallocation blocks were not allocated */ 1112 nr_allocated,
1113 hint->
1114 prealloc_size))
1115 < amount_needed);
1116 if (!hint->formatted_node &&
1117 amount_needed + hint->prealloc_size >
1118 nr_allocated + REISERFS_I(hint->inode)->i_prealloc_count) {
1119 /* Some of preallocation blocks were not allocated */
1032#ifdef REISERQUOTA_DEBUG 1120#ifdef REISERQUOTA_DEBUG
1033 reiserfs_debug (s, REISERFS_DEBUG_CODE, "reiserquota: freeing (failed prealloc) %d blocks id=%u", amount_needed + hint->prealloc_size - nr_allocated - REISERFS_I(hint->inode)->i_prealloc_count, hint->inode->i_uid); 1121 reiserfs_debug(s, REISERFS_DEBUG_CODE,
1122 "reiserquota: freeing (failed prealloc) %d blocks id=%u",
1123 amount_needed + hint->prealloc_size -
1124 nr_allocated -
1125 REISERFS_I(hint->inode)->i_prealloc_count,
1126 hint->inode->i_uid);
1034#endif 1127#endif
1035 DQUOT_FREE_BLOCK_NODIRTY(hint->inode, amount_needed + 1128 DQUOT_FREE_BLOCK_NODIRTY(hint->inode, amount_needed +
1036 hint->prealloc_size - nr_allocated - 1129 hint->prealloc_size - nr_allocated -
1037 REISERFS_I(hint->inode)->i_prealloc_count); 1130 REISERFS_I(hint->inode)->
1038 } 1131 i_prealloc_count);
1132 }
1039 1133
1040 return CARRY_ON; 1134 return CARRY_ON;
1041} 1135}
1042 1136
1043/* grab new blocknrs from preallocated list */ 1137/* grab new blocknrs from preallocated list */
1044/* return amount still needed after using them */ 1138/* return amount still needed after using them */
1045static int use_preallocated_list_if_available (reiserfs_blocknr_hint_t *hint, 1139static int use_preallocated_list_if_available(reiserfs_blocknr_hint_t * hint,
1046 b_blocknr_t *new_blocknrs, int amount_needed) 1140 b_blocknr_t * new_blocknrs,
1141 int amount_needed)
1047{ 1142{
1048 struct inode * inode = hint->inode; 1143 struct inode *inode = hint->inode;
1049 1144
1050 if (REISERFS_I(inode)->i_prealloc_count > 0) { 1145 if (REISERFS_I(inode)->i_prealloc_count > 0) {
1051 while (amount_needed) { 1146 while (amount_needed) {
1052 1147
1053 *new_blocknrs ++ = REISERFS_I(inode)->i_prealloc_block ++; 1148 *new_blocknrs++ = REISERFS_I(inode)->i_prealloc_block++;
1054 REISERFS_I(inode)->i_prealloc_count --; 1149 REISERFS_I(inode)->i_prealloc_count--;
1055 1150
1056 amount_needed --; 1151 amount_needed--;
1057 1152
1058 if (REISERFS_I(inode)->i_prealloc_count <= 0) { 1153 if (REISERFS_I(inode)->i_prealloc_count <= 0) {
1059 list_del(&REISERFS_I(inode)->i_prealloc_list); 1154 list_del(&REISERFS_I(inode)->i_prealloc_list);
1060 break; 1155 break;
1061 } 1156 }
1157 }
1062 } 1158 }
1063 } 1159 /* return amount still needed after using preallocated blocks */
1064 /* return amount still needed after using preallocated blocks */ 1160 return amount_needed;
1065 return amount_needed;
1066} 1161}
1067 1162
1068int reiserfs_allocate_blocknrs(reiserfs_blocknr_hint_t *hint, 1163int reiserfs_allocate_blocknrs(reiserfs_blocknr_hint_t * hint, b_blocknr_t * new_blocknrs, int amount_needed, int reserved_by_us /* Amount of blocks we have
1069 b_blocknr_t * new_blocknrs, int amount_needed, 1164 already reserved */ )
1070 int reserved_by_us /* Amount of blocks we have
1071 already reserved */)
1072{ 1165{
1073 int initial_amount_needed = amount_needed; 1166 int initial_amount_needed = amount_needed;
1074 int ret; 1167 int ret;
1075 struct super_block *s = hint->th->t_super; 1168 struct super_block *s = hint->th->t_super;
1076 1169
1077 /* Check if there is enough space, taking into account reserved space */ 1170 /* Check if there is enough space, taking into account reserved space */
1078 if ( SB_FREE_BLOCKS(s) - REISERFS_SB(s)->reserved_blocks < 1171 if (SB_FREE_BLOCKS(s) - REISERFS_SB(s)->reserved_blocks <
1079 amount_needed - reserved_by_us) 1172 amount_needed - reserved_by_us)
1080 return NO_DISK_SPACE; 1173 return NO_DISK_SPACE;
1081 /* should this be if !hint->inode && hint->preallocate? */ 1174 /* should this be if !hint->inode && hint->preallocate? */
1082 /* do you mean hint->formatted_node can be removed ? - Zam */ 1175 /* do you mean hint->formatted_node can be removed ? - Zam */
1083 /* hint->formatted_node cannot be removed because we try to access 1176 /* hint->formatted_node cannot be removed because we try to access
1084 inode information here, and there is often no inode assotiated with 1177 inode information here, and there is often no inode assotiated with
1085 metadata allocations - green */ 1178 metadata allocations - green */
1086 1179
1087 if (!hint->formatted_node && hint->preallocate) { 1180 if (!hint->formatted_node && hint->preallocate) {
1088 amount_needed = use_preallocated_list_if_available 1181 amount_needed = use_preallocated_list_if_available
1182 (hint, new_blocknrs, amount_needed);
1183 if (amount_needed == 0) /* all blocknrs we need we got from
1184 prealloc. list */
1185 return CARRY_ON;
1186 new_blocknrs += (initial_amount_needed - amount_needed);
1187 }
1188
1189 /* find search start and save it in hint structure */
1190 determine_search_start(hint, amount_needed);
1191 if (hint->search_start >= SB_BLOCK_COUNT(s))
1192 hint->search_start = SB_BLOCK_COUNT(s) - 1;
1193
1194 /* allocation itself; fill new_blocknrs and preallocation arrays */
1195 ret = blocknrs_and_prealloc_arrays_from_search_start
1089 (hint, new_blocknrs, amount_needed); 1196 (hint, new_blocknrs, amount_needed);
1090 if (amount_needed == 0) /* all blocknrs we need we got from 1197
1091 prealloc. list */ 1198 /* we used prealloc. list to fill (partially) new_blocknrs array. If final allocation fails we
1092 return CARRY_ON; 1199 * need to return blocks back to prealloc. list or just free them. -- Zam (I chose second
1093 new_blocknrs += (initial_amount_needed - amount_needed); 1200 * variant) */
1094 } 1201
1095 1202 if (ret != CARRY_ON) {
1096 /* find search start and save it in hint structure */ 1203 while (amount_needed++ < initial_amount_needed) {
1097 determine_search_start(hint, amount_needed); 1204 reiserfs_free_block(hint->th, hint->inode,
1098 if (hint->search_start >= SB_BLOCK_COUNT(s)) 1205 *(--new_blocknrs), 1);
1099 hint->search_start = SB_BLOCK_COUNT(s) - 1; 1206 }
1100
1101 /* allocation itself; fill new_blocknrs and preallocation arrays */
1102 ret = blocknrs_and_prealloc_arrays_from_search_start
1103 (hint, new_blocknrs, amount_needed);
1104
1105 /* we used prealloc. list to fill (partially) new_blocknrs array. If final allocation fails we
1106 * need to return blocks back to prealloc. list or just free them. -- Zam (I chose second
1107 * variant) */
1108
1109 if (ret != CARRY_ON) {
1110 while (amount_needed ++ < initial_amount_needed) {
1111 reiserfs_free_block(hint->th, hint->inode, *(--new_blocknrs), 1);
1112 } 1207 }
1113 } 1208 return ret;
1114 return ret;
1115} 1209}
1116 1210
1117/* These 2 functions are here to provide blocks reservation to the rest of kernel */ 1211/* These 2 functions are here to provide blocks reservation to the rest of kernel */
1118/* Reserve @blocks amount of blocks in fs pointed by @sb. Caller must make sure 1212/* Reserve @blocks amount of blocks in fs pointed by @sb. Caller must make sure
1119 there are actually this much blocks on the FS available */ 1213 there are actually this much blocks on the FS available */
1120void reiserfs_claim_blocks_to_be_allocated( 1214void reiserfs_claim_blocks_to_be_allocated(struct super_block *sb, /* super block of
1121 struct super_block *sb, /* super block of 1215 filesystem where
1122 filesystem where 1216 blocks should be
1123 blocks should be 1217 reserved */
1124 reserved */ 1218 int blocks /* How much to reserve */
1125 int blocks /* How much to reserve */ 1219 )
1126 )
1127{ 1220{
1128 1221
1129 /* Fast case, if reservation is zero - exit immediately. */ 1222 /* Fast case, if reservation is zero - exit immediately. */
1130 if ( !blocks ) 1223 if (!blocks)
1131 return; 1224 return;
1132 1225
1133 spin_lock(&REISERFS_SB(sb)->bitmap_lock); 1226 spin_lock(&REISERFS_SB(sb)->bitmap_lock);
1134 REISERFS_SB(sb)->reserved_blocks += blocks; 1227 REISERFS_SB(sb)->reserved_blocks += blocks;
1135 spin_unlock(&REISERFS_SB(sb)->bitmap_lock); 1228 spin_unlock(&REISERFS_SB(sb)->bitmap_lock);
1136} 1229}
1137 1230
1138/* Unreserve @blocks amount of blocks in fs pointed by @sb */ 1231/* Unreserve @blocks amount of blocks in fs pointed by @sb */
1139void reiserfs_release_claimed_blocks( 1232void reiserfs_release_claimed_blocks(struct super_block *sb, /* super block of
1140 struct super_block *sb, /* super block of 1233 filesystem where
1141 filesystem where 1234 blocks should be
1142 blocks should be 1235 reserved */
1143 reserved */ 1236 int blocks /* How much to unreserve */
1144 int blocks /* How much to unreserve */ 1237 )
1145 )
1146{ 1238{
1147 1239
1148 /* Fast case, if unreservation is zero - exit immediately. */ 1240 /* Fast case, if unreservation is zero - exit immediately. */
1149 if ( !blocks ) 1241 if (!blocks)
1150 return; 1242 return;
1151 1243
1152 spin_lock(&REISERFS_SB(sb)->bitmap_lock); 1244 spin_lock(&REISERFS_SB(sb)->bitmap_lock);
1153 REISERFS_SB(sb)->reserved_blocks -= blocks; 1245 REISERFS_SB(sb)->reserved_blocks -= blocks;
1154 spin_unlock(&REISERFS_SB(sb)->bitmap_lock); 1246 spin_unlock(&REISERFS_SB(sb)->bitmap_lock);
1155 RFALSE( REISERFS_SB(sb)->reserved_blocks < 0, "amount of blocks reserved became zero?"); 1247 RFALSE(REISERFS_SB(sb)->reserved_blocks < 0,
1248 "amount of blocks reserved became zero?");
1156} 1249}
1157 1250
1158/* This function estimates how much pages we will be able to write to FS 1251/* This function estimates how much pages we will be able to write to FS
1159 used for reiserfs_file_write() purposes for now. */ 1252 used for reiserfs_file_write() purposes for now. */
1160int reiserfs_can_fit_pages ( struct super_block *sb /* superblock of filesystem 1253int reiserfs_can_fit_pages(struct super_block *sb /* superblock of filesystem
1161 to estimate space */ ) 1254 to estimate space */ )
1162{ 1255{
1163 int space; 1256 int space;
1164 1257
1165 spin_lock(&REISERFS_SB(sb)->bitmap_lock); 1258 spin_lock(&REISERFS_SB(sb)->bitmap_lock);
1166 space = (SB_FREE_BLOCKS(sb) - REISERFS_SB(sb)->reserved_blocks) >> ( PAGE_CACHE_SHIFT - sb->s_blocksize_bits); 1259 space =
1260 (SB_FREE_BLOCKS(sb) -
1261 REISERFS_SB(sb)->reserved_blocks) >> (PAGE_CACHE_SHIFT -
1262 sb->s_blocksize_bits);
1167 spin_unlock(&REISERFS_SB(sb)->bitmap_lock); 1263 spin_unlock(&REISERFS_SB(sb)->bitmap_lock);
1168 1264
1169 return space>0?space:0; 1265 return space > 0 ? space : 0;
1170} 1266}
diff --git a/fs/reiserfs/dir.c b/fs/reiserfs/dir.c
index fbde4b01a32..9dd71e80703 100644
--- a/fs/reiserfs/dir.c
+++ b/fs/reiserfs/dir.c
@@ -12,264 +12,286 @@
12#include <linux/buffer_head.h> 12#include <linux/buffer_head.h>
13#include <asm/uaccess.h> 13#include <asm/uaccess.h>
14 14
15extern struct reiserfs_key MIN_KEY; 15extern struct reiserfs_key MIN_KEY;
16 16
17static int reiserfs_readdir (struct file *, void *, filldir_t); 17static int reiserfs_readdir(struct file *, void *, filldir_t);
18static int reiserfs_dir_fsync(struct file *filp, struct dentry *dentry, int datasync) ; 18static int reiserfs_dir_fsync(struct file *filp, struct dentry *dentry,
19 int datasync);
19 20
20struct file_operations reiserfs_dir_operations = { 21struct file_operations reiserfs_dir_operations = {
21 .read = generic_read_dir, 22 .read = generic_read_dir,
22 .readdir = reiserfs_readdir, 23 .readdir = reiserfs_readdir,
23 .fsync = reiserfs_dir_fsync, 24 .fsync = reiserfs_dir_fsync,
24 .ioctl = reiserfs_ioctl, 25 .ioctl = reiserfs_ioctl,
25}; 26};
26 27
27static int reiserfs_dir_fsync(struct file *filp, struct dentry *dentry, int datasync) { 28static int reiserfs_dir_fsync(struct file *filp, struct dentry *dentry,
28 struct inode *inode = dentry->d_inode; 29 int datasync)
29 int err; 30{
30 reiserfs_write_lock(inode->i_sb); 31 struct inode *inode = dentry->d_inode;
31 err = reiserfs_commit_for_inode(inode) ; 32 int err;
32 reiserfs_write_unlock(inode->i_sb) ; 33 reiserfs_write_lock(inode->i_sb);
33 if (err < 0) 34 err = reiserfs_commit_for_inode(inode);
34 return err; 35 reiserfs_write_unlock(inode->i_sb);
35 return 0; 36 if (err < 0)
37 return err;
38 return 0;
36} 39}
37 40
38
39#define store_ih(where,what) copy_item_head (where, what) 41#define store_ih(where,what) copy_item_head (where, what)
40 42
41// 43//
42static int reiserfs_readdir (struct file * filp, void * dirent, filldir_t filldir) 44static int reiserfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
43{ 45{
44 struct inode *inode = filp->f_dentry->d_inode; 46 struct inode *inode = filp->f_dentry->d_inode;
45 struct cpu_key pos_key; /* key of current position in the directory (key of directory entry) */ 47 struct cpu_key pos_key; /* key of current position in the directory (key of directory entry) */
46 INITIALIZE_PATH (path_to_entry); 48 INITIALIZE_PATH(path_to_entry);
47 struct buffer_head * bh; 49 struct buffer_head *bh;
48 int item_num, entry_num; 50 int item_num, entry_num;
49 const struct reiserfs_key * rkey; 51 const struct reiserfs_key *rkey;
50 struct item_head * ih, tmp_ih; 52 struct item_head *ih, tmp_ih;
51 int search_res; 53 int search_res;
52 char * local_buf; 54 char *local_buf;
53 loff_t next_pos; 55 loff_t next_pos;
54 char small_buf[32] ; /* avoid kmalloc if we can */ 56 char small_buf[32]; /* avoid kmalloc if we can */
55 struct reiserfs_dir_entry de; 57 struct reiserfs_dir_entry de;
56 int ret = 0; 58 int ret = 0;
57 59
58 reiserfs_write_lock(inode->i_sb); 60 reiserfs_write_lock(inode->i_sb);
59 61
60 reiserfs_check_lock_depth(inode->i_sb, "readdir") ; 62 reiserfs_check_lock_depth(inode->i_sb, "readdir");
61 63
62 /* form key for search the next directory entry using f_pos field of 64 /* form key for search the next directory entry using f_pos field of
63 file structure */ 65 file structure */
64 make_cpu_key (&pos_key, inode, (filp->f_pos) ? (filp->f_pos) : DOT_OFFSET, 66 make_cpu_key(&pos_key, inode,
65 TYPE_DIRENTRY, 3); 67 (filp->f_pos) ? (filp->f_pos) : DOT_OFFSET, TYPE_DIRENTRY,
66 next_pos = cpu_key_k_offset (&pos_key); 68 3);
67 69 next_pos = cpu_key_k_offset(&pos_key);
68 /* reiserfs_warning (inode->i_sb, "reiserfs_readdir 1: f_pos = %Ld", filp->f_pos);*/ 70
69 71 /* reiserfs_warning (inode->i_sb, "reiserfs_readdir 1: f_pos = %Ld", filp->f_pos); */
70 path_to_entry.reada = PATH_READA; 72
71 while (1) { 73 path_to_entry.reada = PATH_READA;
72 research: 74 while (1) {
73 /* search the directory item, containing entry with specified key */ 75 research:
74 search_res = search_by_entry_key (inode->i_sb, &pos_key, &path_to_entry, &de); 76 /* search the directory item, containing entry with specified key */
75 if (search_res == IO_ERROR) { 77 search_res =
76 // FIXME: we could just skip part of directory which could 78 search_by_entry_key(inode->i_sb, &pos_key, &path_to_entry,
77 // not be read 79 &de);
78 ret = -EIO; 80 if (search_res == IO_ERROR) {
79 goto out; 81 // FIXME: we could just skip part of directory which could
80 } 82 // not be read
81 entry_num = de.de_entry_num; 83 ret = -EIO;
82 bh = de.de_bh;
83 item_num = de.de_item_num;
84 ih = de.de_ih;
85 store_ih (&tmp_ih, ih);
86
87 /* we must have found item, that is item of this directory, */
88 RFALSE( COMP_SHORT_KEYS (&(ih->ih_key), &pos_key),
89 "vs-9000: found item %h does not match to dir we readdir %K",
90 ih, &pos_key);
91 RFALSE( item_num > B_NR_ITEMS (bh) - 1,
92 "vs-9005 item_num == %d, item amount == %d",
93 item_num, B_NR_ITEMS (bh));
94
95 /* and entry must be not more than number of entries in the item */
96 RFALSE( I_ENTRY_COUNT (ih) < entry_num,
97 "vs-9010: entry number is too big %d (%d)",
98 entry_num, I_ENTRY_COUNT (ih));
99
100 if (search_res == POSITION_FOUND || entry_num < I_ENTRY_COUNT (ih)) {
101 /* go through all entries in the directory item beginning from the entry, that has been found */
102 struct reiserfs_de_head * deh = B_I_DEH (bh, ih) + entry_num;
103
104 for (; entry_num < I_ENTRY_COUNT (ih); entry_num ++, deh ++) {
105 int d_reclen;
106 char * d_name;
107 off_t d_off;
108 ino_t d_ino;
109
110 if (!de_visible (deh))
111 /* it is hidden entry */
112 continue;
113 d_reclen = entry_length (bh, ih, entry_num);
114 d_name = B_I_DEH_ENTRY_FILE_NAME (bh, ih, deh);
115 if (!d_name[d_reclen - 1])
116 d_reclen = strlen (d_name);
117
118 if (d_reclen > REISERFS_MAX_NAME(inode->i_sb->s_blocksize)){
119 /* too big to send back to VFS */
120 continue ;
121 }
122
123 /* Ignore the .reiserfs_priv entry */
124 if (reiserfs_xattrs (inode->i_sb) &&
125 !old_format_only(inode->i_sb) &&
126 filp->f_dentry == inode->i_sb->s_root &&
127 REISERFS_SB(inode->i_sb)->priv_root &&
128 REISERFS_SB(inode->i_sb)->priv_root->d_inode &&
129 deh_objectid(deh) == le32_to_cpu (INODE_PKEY(REISERFS_SB(inode->i_sb)->priv_root->d_inode)->k_objectid)) {
130 continue;
131 }
132
133 d_off = deh_offset (deh);
134 filp->f_pos = d_off ;
135 d_ino = deh_objectid (deh);
136 if (d_reclen <= 32) {
137 local_buf = small_buf ;
138 } else {
139 local_buf = reiserfs_kmalloc(d_reclen, GFP_NOFS, inode->i_sb) ;
140 if (!local_buf) {
141 pathrelse (&path_to_entry);
142 ret = -ENOMEM ;
143 goto out; 84 goto out;
144 }
145 if (item_moved (&tmp_ih, &path_to_entry)) {
146 reiserfs_kfree(local_buf, d_reclen, inode->i_sb) ;
147 goto research;
148 }
149 }
150 // Note, that we copy name to user space via temporary
151 // buffer (local_buf) because filldir will block if
152 // user space buffer is swapped out. At that time
153 // entry can move to somewhere else
154 memcpy (local_buf, d_name, d_reclen);
155 if (filldir (dirent, local_buf, d_reclen, d_off, d_ino,
156 DT_UNKNOWN) < 0) {
157 if (local_buf != small_buf) {
158 reiserfs_kfree(local_buf, d_reclen, inode->i_sb) ;
159 }
160 goto end;
161 } 85 }
162 if (local_buf != small_buf) { 86 entry_num = de.de_entry_num;
163 reiserfs_kfree(local_buf, d_reclen, inode->i_sb) ; 87 bh = de.de_bh;
88 item_num = de.de_item_num;
89 ih = de.de_ih;
90 store_ih(&tmp_ih, ih);
91
92 /* we must have found item, that is item of this directory, */
93 RFALSE(COMP_SHORT_KEYS(&(ih->ih_key), &pos_key),
94 "vs-9000: found item %h does not match to dir we readdir %K",
95 ih, &pos_key);
96 RFALSE(item_num > B_NR_ITEMS(bh) - 1,
97 "vs-9005 item_num == %d, item amount == %d",
98 item_num, B_NR_ITEMS(bh));
99
100 /* and entry must be not more than number of entries in the item */
101 RFALSE(I_ENTRY_COUNT(ih) < entry_num,
102 "vs-9010: entry number is too big %d (%d)",
103 entry_num, I_ENTRY_COUNT(ih));
104
105 if (search_res == POSITION_FOUND
106 || entry_num < I_ENTRY_COUNT(ih)) {
107 /* go through all entries in the directory item beginning from the entry, that has been found */
108 struct reiserfs_de_head *deh =
109 B_I_DEH(bh, ih) + entry_num;
110
111 for (; entry_num < I_ENTRY_COUNT(ih);
112 entry_num++, deh++) {
113 int d_reclen;
114 char *d_name;
115 off_t d_off;
116 ino_t d_ino;
117
118 if (!de_visible(deh))
119 /* it is hidden entry */
120 continue;
121 d_reclen = entry_length(bh, ih, entry_num);
122 d_name = B_I_DEH_ENTRY_FILE_NAME(bh, ih, deh);
123 if (!d_name[d_reclen - 1])
124 d_reclen = strlen(d_name);
125
126 if (d_reclen >
127 REISERFS_MAX_NAME(inode->i_sb->
128 s_blocksize)) {
129 /* too big to send back to VFS */
130 continue;
131 }
132
133 /* Ignore the .reiserfs_priv entry */
134 if (reiserfs_xattrs(inode->i_sb) &&
135 !old_format_only(inode->i_sb) &&
136 filp->f_dentry == inode->i_sb->s_root &&
137 REISERFS_SB(inode->i_sb)->priv_root &&
138 REISERFS_SB(inode->i_sb)->priv_root->d_inode
139 && deh_objectid(deh) ==
140 le32_to_cpu(INODE_PKEY
141 (REISERFS_SB(inode->i_sb)->
142 priv_root->d_inode)->
143 k_objectid)) {
144 continue;
145 }
146
147 d_off = deh_offset(deh);
148 filp->f_pos = d_off;
149 d_ino = deh_objectid(deh);
150 if (d_reclen <= 32) {
151 local_buf = small_buf;
152 } else {
153 local_buf =
154 reiserfs_kmalloc(d_reclen, GFP_NOFS,
155 inode->i_sb);
156 if (!local_buf) {
157 pathrelse(&path_to_entry);
158 ret = -ENOMEM;
159 goto out;
160 }
161 if (item_moved(&tmp_ih, &path_to_entry)) {
162 reiserfs_kfree(local_buf,
163 d_reclen,
164 inode->i_sb);
165 goto research;
166 }
167 }
168 // Note, that we copy name to user space via temporary
169 // buffer (local_buf) because filldir will block if
170 // user space buffer is swapped out. At that time
171 // entry can move to somewhere else
172 memcpy(local_buf, d_name, d_reclen);
173 if (filldir
174 (dirent, local_buf, d_reclen, d_off, d_ino,
175 DT_UNKNOWN) < 0) {
176 if (local_buf != small_buf) {
177 reiserfs_kfree(local_buf,
178 d_reclen,
179 inode->i_sb);
180 }
181 goto end;
182 }
183 if (local_buf != small_buf) {
184 reiserfs_kfree(local_buf, d_reclen,
185 inode->i_sb);
186 }
187 // next entry should be looked for with such offset
188 next_pos = deh_offset(deh) + 1;
189
190 if (item_moved(&tmp_ih, &path_to_entry)) {
191 goto research;
192 }
193 } /* for */
164 } 194 }
165 195
166 // next entry should be looked for with such offset 196 if (item_num != B_NR_ITEMS(bh) - 1)
167 next_pos = deh_offset (deh) + 1; 197 // end of directory has been reached
198 goto end;
199
200 /* item we went through is last item of node. Using right
201 delimiting key check is it directory end */
202 rkey = get_rkey(&path_to_entry, inode->i_sb);
203 if (!comp_le_keys(rkey, &MIN_KEY)) {
204 /* set pos_key to key, that is the smallest and greater
205 that key of the last entry in the item */
206 set_cpu_key_k_offset(&pos_key, next_pos);
207 continue;
208 }
168 209
169 if (item_moved (&tmp_ih, &path_to_entry)) { 210 if (COMP_SHORT_KEYS(rkey, &pos_key)) {
170 goto research; 211 // end of directory has been reached
212 goto end;
171 } 213 }
172 } /* for */ 214
173 } 215 /* directory continues in the right neighboring block */
174 216 set_cpu_key_k_offset(&pos_key,
175 if (item_num != B_NR_ITEMS (bh) - 1) 217 le_key_k_offset(KEY_FORMAT_3_5, rkey));
176 // end of directory has been reached 218
177 goto end; 219 } /* while */
178 220
179 /* item we went through is last item of node. Using right 221 end:
180 delimiting key check is it directory end */ 222 filp->f_pos = next_pos;
181 rkey = get_rkey (&path_to_entry, inode->i_sb); 223 pathrelse(&path_to_entry);
182 if (! comp_le_keys (rkey, &MIN_KEY)) { 224 reiserfs_check_path(&path_to_entry);
183 /* set pos_key to key, that is the smallest and greater 225 out:
184 that key of the last entry in the item */ 226 reiserfs_write_unlock(inode->i_sb);
185 set_cpu_key_k_offset (&pos_key, next_pos); 227 return ret;
186 continue;
187 }
188
189 if ( COMP_SHORT_KEYS (rkey, &pos_key)) {
190 // end of directory has been reached
191 goto end;
192 }
193
194 /* directory continues in the right neighboring block */
195 set_cpu_key_k_offset (&pos_key, le_key_k_offset (KEY_FORMAT_3_5, rkey));
196
197 } /* while */
198
199
200 end:
201 filp->f_pos = next_pos;
202 pathrelse (&path_to_entry);
203 reiserfs_check_path(&path_to_entry) ;
204 out:
205 reiserfs_write_unlock(inode->i_sb);
206 return ret;
207} 228}
208 229
209/* compose directory item containing "." and ".." entries (entries are 230/* compose directory item containing "." and ".." entries (entries are
210 not aligned to 4 byte boundary) */ 231 not aligned to 4 byte boundary) */
211/* the last four params are LE */ 232/* the last four params are LE */
212void make_empty_dir_item_v1 (char * body, __le32 dirid, __le32 objid, 233void make_empty_dir_item_v1(char *body, __le32 dirid, __le32 objid,
213 __le32 par_dirid, __le32 par_objid) 234 __le32 par_dirid, __le32 par_objid)
214{ 235{
215 struct reiserfs_de_head * deh; 236 struct reiserfs_de_head *deh;
216 237
217 memset (body, 0, EMPTY_DIR_SIZE_V1); 238 memset(body, 0, EMPTY_DIR_SIZE_V1);
218 deh = (struct reiserfs_de_head *)body; 239 deh = (struct reiserfs_de_head *)body;
219 240
220 /* direntry header of "." */ 241 /* direntry header of "." */
221 put_deh_offset( &(deh[0]), DOT_OFFSET ); 242 put_deh_offset(&(deh[0]), DOT_OFFSET);
222 /* these two are from make_le_item_head, and are are LE */ 243 /* these two are from make_le_item_head, and are are LE */
223 deh[0].deh_dir_id = dirid; 244 deh[0].deh_dir_id = dirid;
224 deh[0].deh_objectid = objid; 245 deh[0].deh_objectid = objid;
225 deh[0].deh_state = 0; /* Endian safe if 0 */ 246 deh[0].deh_state = 0; /* Endian safe if 0 */
226 put_deh_location( &(deh[0]), EMPTY_DIR_SIZE_V1 - strlen( "." )); 247 put_deh_location(&(deh[0]), EMPTY_DIR_SIZE_V1 - strlen("."));
227 mark_de_visible(&(deh[0])); 248 mark_de_visible(&(deh[0]));
228 249
229 /* direntry header of ".." */ 250 /* direntry header of ".." */
230 put_deh_offset( &(deh[1]), DOT_DOT_OFFSET); 251 put_deh_offset(&(deh[1]), DOT_DOT_OFFSET);
231 /* key of ".." for the root directory */ 252 /* key of ".." for the root directory */
232 /* these two are from the inode, and are are LE */ 253 /* these two are from the inode, and are are LE */
233 deh[1].deh_dir_id = par_dirid; 254 deh[1].deh_dir_id = par_dirid;
234 deh[1].deh_objectid = par_objid; 255 deh[1].deh_objectid = par_objid;
235 deh[1].deh_state = 0; /* Endian safe if 0 */ 256 deh[1].deh_state = 0; /* Endian safe if 0 */
236 put_deh_location( &(deh[1]), deh_location( &(deh[0]) ) - strlen( ".." ) ); 257 put_deh_location(&(deh[1]), deh_location(&(deh[0])) - strlen(".."));
237 mark_de_visible(&(deh[1])); 258 mark_de_visible(&(deh[1]));
238 259
239 /* copy ".." and "." */ 260 /* copy ".." and "." */
240 memcpy (body + deh_location( &(deh[0]) ), ".", 1); 261 memcpy(body + deh_location(&(deh[0])), ".", 1);
241 memcpy (body + deh_location( &(deh[1]) ), "..", 2); 262 memcpy(body + deh_location(&(deh[1])), "..", 2);
242} 263}
243 264
244/* compose directory item containing "." and ".." entries */ 265/* compose directory item containing "." and ".." entries */
245void make_empty_dir_item (char * body, __le32 dirid, __le32 objid, 266void make_empty_dir_item(char *body, __le32 dirid, __le32 objid,
246 __le32 par_dirid, __le32 par_objid) 267 __le32 par_dirid, __le32 par_objid)
247{ 268{
248 struct reiserfs_de_head * deh; 269 struct reiserfs_de_head *deh;
249 270
250 memset (body, 0, EMPTY_DIR_SIZE); 271 memset(body, 0, EMPTY_DIR_SIZE);
251 deh = (struct reiserfs_de_head *)body; 272 deh = (struct reiserfs_de_head *)body;
252 273
253 /* direntry header of "." */ 274 /* direntry header of "." */
254 put_deh_offset( &(deh[0]), DOT_OFFSET ); 275 put_deh_offset(&(deh[0]), DOT_OFFSET);
255 /* these two are from make_le_item_head, and are are LE */ 276 /* these two are from make_le_item_head, and are are LE */
256 deh[0].deh_dir_id = dirid; 277 deh[0].deh_dir_id = dirid;
257 deh[0].deh_objectid = objid; 278 deh[0].deh_objectid = objid;
258 deh[0].deh_state = 0; /* Endian safe if 0 */ 279 deh[0].deh_state = 0; /* Endian safe if 0 */
259 put_deh_location( &(deh[0]), EMPTY_DIR_SIZE - ROUND_UP( strlen( "." ) ) ); 280 put_deh_location(&(deh[0]), EMPTY_DIR_SIZE - ROUND_UP(strlen(".")));
260 mark_de_visible(&(deh[0])); 281 mark_de_visible(&(deh[0]));
261 282
262 /* direntry header of ".." */ 283 /* direntry header of ".." */
263 put_deh_offset( &(deh[1]), DOT_DOT_OFFSET ); 284 put_deh_offset(&(deh[1]), DOT_DOT_OFFSET);
264 /* key of ".." for the root directory */ 285 /* key of ".." for the root directory */
265 /* these two are from the inode, and are are LE */ 286 /* these two are from the inode, and are are LE */
266 deh[1].deh_dir_id = par_dirid; 287 deh[1].deh_dir_id = par_dirid;
267 deh[1].deh_objectid = par_objid; 288 deh[1].deh_objectid = par_objid;
268 deh[1].deh_state = 0; /* Endian safe if 0 */ 289 deh[1].deh_state = 0; /* Endian safe if 0 */
269 put_deh_location( &(deh[1]), deh_location( &(deh[0])) - ROUND_UP( strlen( ".." ) ) ); 290 put_deh_location(&(deh[1]),
270 mark_de_visible(&(deh[1])); 291 deh_location(&(deh[0])) - ROUND_UP(strlen("..")));
271 292 mark_de_visible(&(deh[1]));
272 /* copy ".." and "." */ 293
273 memcpy (body + deh_location( &(deh[0]) ), ".", 1); 294 /* copy ".." and "." */
274 memcpy (body + deh_location( &(deh[1]) ), "..", 2); 295 memcpy(body + deh_location(&(deh[0])), ".", 1);
296 memcpy(body + deh_location(&(deh[1])), "..", 2);
275} 297}
diff --git a/fs/reiserfs/do_balan.c b/fs/reiserfs/do_balan.c
index 2118db2896c..b2264ba3cc5 100644
--- a/fs/reiserfs/do_balan.c
+++ b/fs/reiserfs/do_balan.c
@@ -8,7 +8,6 @@
8/* balance the tree according to the analysis made before, */ 8/* balance the tree according to the analysis made before, */
9/* and using buffers obtained after all above. */ 9/* and using buffers obtained after all above. */
10 10
11
12/** 11/**
13 ** balance_leaf_when_delete 12 ** balance_leaf_when_delete
14 ** balance_leaf 13 ** balance_leaf
@@ -24,23 +23,22 @@
24 23
25#ifdef CONFIG_REISERFS_CHECK 24#ifdef CONFIG_REISERFS_CHECK
26 25
27struct tree_balance * cur_tb = NULL; /* detects whether more than one 26struct tree_balance *cur_tb = NULL; /* detects whether more than one
28 copy of tb exists as a means 27 copy of tb exists as a means
29 of checking whether schedule 28 of checking whether schedule
30 is interrupting do_balance */ 29 is interrupting do_balance */
31#endif 30#endif
32 31
33inline void do_balance_mark_leaf_dirty (struct tree_balance * tb, 32inline void do_balance_mark_leaf_dirty(struct tree_balance *tb,
34 struct buffer_head * bh, int flag) 33 struct buffer_head *bh, int flag)
35{ 34{
36 journal_mark_dirty(tb->transaction_handle, 35 journal_mark_dirty(tb->transaction_handle,
37 tb->transaction_handle->t_super, bh) ; 36 tb->transaction_handle->t_super, bh);
38} 37}
39 38
40#define do_balance_mark_internal_dirty do_balance_mark_leaf_dirty 39#define do_balance_mark_internal_dirty do_balance_mark_leaf_dirty
41#define do_balance_mark_sb_dirty do_balance_mark_leaf_dirty 40#define do_balance_mark_sb_dirty do_balance_mark_leaf_dirty
42 41
43
44/* summary: 42/* summary:
45 if deleting something ( tb->insert_size[0] < 0 ) 43 if deleting something ( tb->insert_size[0] < 0 )
46 return(balance_leaf_when_delete()); (flag d handled here) 44 return(balance_leaf_when_delete()); (flag d handled here)
@@ -64,8 +62,6 @@ be performed by do_balance.
64 62
65-Hans */ 63-Hans */
66 64
67
68
69/* Balance leaf node in case of delete or cut: insert_size[0] < 0 65/* Balance leaf node in case of delete or cut: insert_size[0] < 0
70 * 66 *
71 * lnum, rnum can have values >= -1 67 * lnum, rnum can have values >= -1
@@ -73,1384 +69,1933 @@ be performed by do_balance.
73 * 0 means that nothing should be done with the neighbor 69 * 0 means that nothing should be done with the neighbor
74 * >0 means to shift entirely or partly the specified number of items to the neighbor 70 * >0 means to shift entirely or partly the specified number of items to the neighbor
75 */ 71 */
76static int balance_leaf_when_delete (struct tree_balance * tb, int flag) 72static int balance_leaf_when_delete(struct tree_balance *tb, int flag)
77{ 73{
78 struct buffer_head * tbS0 = PATH_PLAST_BUFFER (tb->tb_path); 74 struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path);
79 int item_pos = PATH_LAST_POSITION (tb->tb_path); 75 int item_pos = PATH_LAST_POSITION(tb->tb_path);
80 int pos_in_item = tb->tb_path->pos_in_item; 76 int pos_in_item = tb->tb_path->pos_in_item;
81 struct buffer_info bi; 77 struct buffer_info bi;
82 int n; 78 int n;
83 struct item_head * ih; 79 struct item_head *ih;
84 80
85 RFALSE( tb->FR[0] && B_LEVEL (tb->FR[0]) != DISK_LEAF_NODE_LEVEL + 1, 81 RFALSE(tb->FR[0] && B_LEVEL(tb->FR[0]) != DISK_LEAF_NODE_LEVEL + 1,
86 "vs- 12000: level: wrong FR %z", tb->FR[0]); 82 "vs- 12000: level: wrong FR %z", tb->FR[0]);
87 RFALSE( tb->blknum[0] > 1, 83 RFALSE(tb->blknum[0] > 1,
88 "PAP-12005: tb->blknum == %d, can not be > 1", tb->blknum[0]); 84 "PAP-12005: tb->blknum == %d, can not be > 1", tb->blknum[0]);
89 RFALSE( ! tb->blknum[0] && ! PATH_H_PPARENT(tb->tb_path, 0), 85 RFALSE(!tb->blknum[0] && !PATH_H_PPARENT(tb->tb_path, 0),
90 "PAP-12010: tree can not be empty"); 86 "PAP-12010: tree can not be empty");
91 87
92 ih = B_N_PITEM_HEAD (tbS0, item_pos); 88 ih = B_N_PITEM_HEAD(tbS0, item_pos);
93 89
94 /* Delete or truncate the item */ 90 /* Delete or truncate the item */
95 91
96 switch (flag) { 92 switch (flag) {
97 case M_DELETE: /* delete item in S[0] */ 93 case M_DELETE: /* delete item in S[0] */
94
95 RFALSE(ih_item_len(ih) + IH_SIZE != -tb->insert_size[0],
96 "vs-12013: mode Delete, insert size %d, ih to be deleted %h",
97 -tb->insert_size[0], ih);
98
99 bi.tb = tb;
100 bi.bi_bh = tbS0;
101 bi.bi_parent = PATH_H_PPARENT(tb->tb_path, 0);
102 bi.bi_position = PATH_H_POSITION(tb->tb_path, 1);
103 leaf_delete_items(&bi, 0, item_pos, 1, -1);
104
105 if (!item_pos && tb->CFL[0]) {
106 if (B_NR_ITEMS(tbS0)) {
107 replace_key(tb, tb->CFL[0], tb->lkey[0], tbS0,
108 0);
109 } else {
110 if (!PATH_H_POSITION(tb->tb_path, 1))
111 replace_key(tb, tb->CFL[0], tb->lkey[0],
112 PATH_H_PPARENT(tb->tb_path,
113 0), 0);
114 }
115 }
98 116
99 RFALSE( ih_item_len(ih) + IH_SIZE != -tb->insert_size[0], 117 RFALSE(!item_pos && !tb->CFL[0],
100 "vs-12013: mode Delete, insert size %d, ih to be deleted %h", 118 "PAP-12020: tb->CFL[0]==%p, tb->L[0]==%p", tb->CFL[0],
101 -tb->insert_size [0], ih); 119 tb->L[0]);
102 120
103 bi.tb = tb; 121 break;
104 bi.bi_bh = tbS0; 122
105 bi.bi_parent = PATH_H_PPARENT (tb->tb_path, 0); 123 case M_CUT:{ /* cut item in S[0] */
106 bi.bi_position = PATH_H_POSITION (tb->tb_path, 1); 124 bi.tb = tb;
107 leaf_delete_items (&bi, 0, item_pos, 1, -1); 125 bi.bi_bh = tbS0;
108 126 bi.bi_parent = PATH_H_PPARENT(tb->tb_path, 0);
109 if ( ! item_pos && tb->CFL[0] ) { 127 bi.bi_position = PATH_H_POSITION(tb->tb_path, 1);
110 if ( B_NR_ITEMS(tbS0) ) { 128 if (is_direntry_le_ih(ih)) {
111 replace_key(tb, tb->CFL[0],tb->lkey[0],tbS0,0); 129
112 } 130 /* UFS unlink semantics are such that you can only delete one directory entry at a time. */
113 else { 131 /* when we cut a directory tb->insert_size[0] means number of entries to be cut (always 1) */
114 if ( ! PATH_H_POSITION (tb->tb_path, 1) ) 132 tb->insert_size[0] = -1;
115 replace_key(tb, tb->CFL[0],tb->lkey[0],PATH_H_PPARENT(tb->tb_path, 0),0); 133 leaf_cut_from_buffer(&bi, item_pos, pos_in_item,
116 } 134 -tb->insert_size[0]);
117 } 135
118 136 RFALSE(!item_pos && !pos_in_item && !tb->CFL[0],
119 RFALSE( ! item_pos && !tb->CFL[0], 137 "PAP-12030: can not change delimiting key. CFL[0]=%p",
120 "PAP-12020: tb->CFL[0]==%p, tb->L[0]==%p", tb->CFL[0], tb->L[0]); 138 tb->CFL[0]);
121 139
122 break; 140 if (!item_pos && !pos_in_item && tb->CFL[0]) {
123 141 replace_key(tb, tb->CFL[0], tb->lkey[0],
124 case M_CUT: { /* cut item in S[0] */ 142 tbS0, 0);
125 bi.tb = tb; 143 }
126 bi.bi_bh = tbS0; 144 } else {
127 bi.bi_parent = PATH_H_PPARENT (tb->tb_path, 0); 145 leaf_cut_from_buffer(&bi, item_pos, pos_in_item,
128 bi.bi_position = PATH_H_POSITION (tb->tb_path, 1); 146 -tb->insert_size[0]);
129 if (is_direntry_le_ih (ih)) { 147
130 148 RFALSE(!ih_item_len(ih),
131 /* UFS unlink semantics are such that you can only delete one directory entry at a time. */ 149 "PAP-12035: cut must leave non-zero dynamic length of item");
132 /* when we cut a directory tb->insert_size[0] means number of entries to be cut (always 1) */ 150 }
133 tb->insert_size[0] = -1; 151 break;
134 leaf_cut_from_buffer (&bi, item_pos, pos_in_item, -tb->insert_size[0]);
135
136 RFALSE( ! item_pos && ! pos_in_item && ! tb->CFL[0],
137 "PAP-12030: can not change delimiting key. CFL[0]=%p",
138 tb->CFL[0]);
139
140 if ( ! item_pos && ! pos_in_item && tb->CFL[0] ) {
141 replace_key(tb, tb->CFL[0],tb->lkey[0],tbS0,0);
142 }
143 } else {
144 leaf_cut_from_buffer (&bi, item_pos, pos_in_item, -tb->insert_size[0]);
145
146 RFALSE( ! ih_item_len(ih),
147 "PAP-12035: cut must leave non-zero dynamic length of item");
148 }
149 break;
150 }
151
152 default:
153 print_cur_tb ("12040");
154 reiserfs_panic (tb->tb_sb, "PAP-12040: balance_leaf_when_delete: unexpectable mode: %s(%d)",
155 (flag == M_PASTE) ? "PASTE" : ((flag == M_INSERT) ? "INSERT" : "UNKNOWN"), flag);
156 }
157
158 /* the rule is that no shifting occurs unless by shifting a node can be freed */
159 n = B_NR_ITEMS(tbS0);
160 if ( tb->lnum[0] ) /* L[0] takes part in balancing */
161 {
162 if ( tb->lnum[0] == -1 ) /* L[0] must be joined with S[0] */
163 {
164 if ( tb->rnum[0] == -1 ) /* R[0] must be also joined with S[0] */
165 {
166 if ( tb->FR[0] == PATH_H_PPARENT(tb->tb_path, 0) )
167 {
168 /* all contents of all the 3 buffers will be in L[0] */
169 if ( PATH_H_POSITION (tb->tb_path, 1) == 0 && 1 < B_NR_ITEMS(tb->FR[0]) )
170 replace_key(tb, tb->CFL[0],tb->lkey[0],tb->FR[0],1);
171
172 leaf_move_items (LEAF_FROM_S_TO_L, tb, n, -1, NULL);
173 leaf_move_items (LEAF_FROM_R_TO_L, tb, B_NR_ITEMS(tb->R[0]), -1, NULL);
174
175 reiserfs_invalidate_buffer (tb, tbS0);
176 reiserfs_invalidate_buffer (tb, tb->R[0]);
177
178 return 0;
179 } 152 }
180 /* all contents of all the 3 buffers will be in R[0] */
181 leaf_move_items (LEAF_FROM_S_TO_R, tb, n, -1, NULL);
182 leaf_move_items (LEAF_FROM_L_TO_R, tb, B_NR_ITEMS(tb->L[0]), -1, NULL);
183 153
184 /* right_delimiting_key is correct in R[0] */ 154 default:
185 replace_key(tb, tb->CFR[0],tb->rkey[0],tb->R[0],0); 155 print_cur_tb("12040");
156 reiserfs_panic(tb->tb_sb,
157 "PAP-12040: balance_leaf_when_delete: unexpectable mode: %s(%d)",
158 (flag ==
159 M_PASTE) ? "PASTE" : ((flag ==
160 M_INSERT) ? "INSERT" :
161 "UNKNOWN"), flag);
162 }
186 163
187 reiserfs_invalidate_buffer (tb, tbS0); 164 /* the rule is that no shifting occurs unless by shifting a node can be freed */
188 reiserfs_invalidate_buffer (tb, tb->L[0]); 165 n = B_NR_ITEMS(tbS0);
166 if (tb->lnum[0]) { /* L[0] takes part in balancing */
167 if (tb->lnum[0] == -1) { /* L[0] must be joined with S[0] */
168 if (tb->rnum[0] == -1) { /* R[0] must be also joined with S[0] */
169 if (tb->FR[0] == PATH_H_PPARENT(tb->tb_path, 0)) {
170 /* all contents of all the 3 buffers will be in L[0] */
171 if (PATH_H_POSITION(tb->tb_path, 1) == 0
172 && 1 < B_NR_ITEMS(tb->FR[0]))
173 replace_key(tb, tb->CFL[0],
174 tb->lkey[0],
175 tb->FR[0], 1);
176
177 leaf_move_items(LEAF_FROM_S_TO_L, tb, n,
178 -1, NULL);
179 leaf_move_items(LEAF_FROM_R_TO_L, tb,
180 B_NR_ITEMS(tb->R[0]),
181 -1, NULL);
182
183 reiserfs_invalidate_buffer(tb, tbS0);
184 reiserfs_invalidate_buffer(tb,
185 tb->R[0]);
186
187 return 0;
188 }
189 /* all contents of all the 3 buffers will be in R[0] */
190 leaf_move_items(LEAF_FROM_S_TO_R, tb, n, -1,
191 NULL);
192 leaf_move_items(LEAF_FROM_L_TO_R, tb,
193 B_NR_ITEMS(tb->L[0]), -1, NULL);
194
195 /* right_delimiting_key is correct in R[0] */
196 replace_key(tb, tb->CFR[0], tb->rkey[0],
197 tb->R[0], 0);
189 198
190 return -1; 199 reiserfs_invalidate_buffer(tb, tbS0);
191 } 200 reiserfs_invalidate_buffer(tb, tb->L[0]);
192 201
193 RFALSE( tb->rnum[0] != 0, 202 return -1;
194 "PAP-12045: rnum must be 0 (%d)", tb->rnum[0]); 203 }
195 /* all contents of L[0] and S[0] will be in L[0] */
196 leaf_shift_left(tb, n, -1);
197 204
198 reiserfs_invalidate_buffer (tb, tbS0); 205 RFALSE(tb->rnum[0] != 0,
206 "PAP-12045: rnum must be 0 (%d)", tb->rnum[0]);
207 /* all contents of L[0] and S[0] will be in L[0] */
208 leaf_shift_left(tb, n, -1);
199 209
200 return 0; 210 reiserfs_invalidate_buffer(tb, tbS0);
211
212 return 0;
213 }
214 /* a part of contents of S[0] will be in L[0] and the rest part of S[0] will be in R[0] */
215
216 RFALSE((tb->lnum[0] + tb->rnum[0] < n) ||
217 (tb->lnum[0] + tb->rnum[0] > n + 1),
218 "PAP-12050: rnum(%d) and lnum(%d) and item number(%d) in S[0] are not consistent",
219 tb->rnum[0], tb->lnum[0], n);
220 RFALSE((tb->lnum[0] + tb->rnum[0] == n) &&
221 (tb->lbytes != -1 || tb->rbytes != -1),
222 "PAP-12055: bad rbytes (%d)/lbytes (%d) parameters when items are not split",
223 tb->rbytes, tb->lbytes);
224 RFALSE((tb->lnum[0] + tb->rnum[0] == n + 1) &&
225 (tb->lbytes < 1 || tb->rbytes != -1),
226 "PAP-12060: bad rbytes (%d)/lbytes (%d) parameters when items are split",
227 tb->rbytes, tb->lbytes);
228
229 leaf_shift_left(tb, tb->lnum[0], tb->lbytes);
230 leaf_shift_right(tb, tb->rnum[0], tb->rbytes);
231
232 reiserfs_invalidate_buffer(tb, tbS0);
233
234 return 0;
201 } 235 }
202 /* a part of contents of S[0] will be in L[0] and the rest part of S[0] will be in R[0] */
203
204 RFALSE( ( tb->lnum[0] + tb->rnum[0] < n ) ||
205 ( tb->lnum[0] + tb->rnum[0] > n+1 ),
206 "PAP-12050: rnum(%d) and lnum(%d) and item number(%d) in S[0] are not consistent",
207 tb->rnum[0], tb->lnum[0], n);
208 RFALSE( ( tb->lnum[0] + tb->rnum[0] == n ) &&
209 (tb->lbytes != -1 || tb->rbytes != -1),
210 "PAP-12055: bad rbytes (%d)/lbytes (%d) parameters when items are not split",
211 tb->rbytes, tb->lbytes);
212 RFALSE( ( tb->lnum[0] + tb->rnum[0] == n + 1 ) &&
213 (tb->lbytes < 1 || tb->rbytes != -1),
214 "PAP-12060: bad rbytes (%d)/lbytes (%d) parameters when items are split",
215 tb->rbytes, tb->lbytes);
216
217 leaf_shift_left (tb, tb->lnum[0], tb->lbytes);
218 leaf_shift_right(tb, tb->rnum[0], tb->rbytes);
219
220 reiserfs_invalidate_buffer (tb, tbS0);
221 236
222 return 0; 237 if (tb->rnum[0] == -1) {
223 } 238 /* all contents of R[0] and S[0] will be in R[0] */
239 leaf_shift_right(tb, n, -1);
240 reiserfs_invalidate_buffer(tb, tbS0);
241 return 0;
242 }
224 243
225 if ( tb->rnum[0] == -1 ) { 244 RFALSE(tb->rnum[0],
226 /* all contents of R[0] and S[0] will be in R[0] */ 245 "PAP-12065: bad rnum parameter must be 0 (%d)", tb->rnum[0]);
227 leaf_shift_right(tb, n, -1);
228 reiserfs_invalidate_buffer (tb, tbS0);
229 return 0; 246 return 0;
230 }
231
232 RFALSE( tb->rnum[0],
233 "PAP-12065: bad rnum parameter must be 0 (%d)", tb->rnum[0]);
234 return 0;
235} 247}
236 248
237 249static int balance_leaf(struct tree_balance *tb, struct item_head *ih, /* item header of inserted item (this is on little endian) */
238static int balance_leaf (struct tree_balance * tb, 250 const char *body, /* body of inserted item or bytes to paste */
239 struct item_head * ih, /* item header of inserted item (this is on little endian) */ 251 int flag, /* i - insert, d - delete, c - cut, p - paste
240 const char * body, /* body of inserted item or bytes to paste */ 252 (see comment to do_balance) */
241 int flag, /* i - insert, d - delete, c - cut, p - paste 253 struct item_head *insert_key, /* in our processing of one level we sometimes determine what
242 (see comment to do_balance) */ 254 must be inserted into the next higher level. This insertion
243 struct item_head * insert_key, /* in our processing of one level we sometimes determine what 255 consists of a key or two keys and their corresponding
244 must be inserted into the next higher level. This insertion 256 pointers */
245 consists of a key or two keys and their corresponding 257 struct buffer_head **insert_ptr /* inserted node-ptrs for the next level */
246 pointers */
247 struct buffer_head ** insert_ptr /* inserted node-ptrs for the next level */
248 ) 258 )
249{ 259{
250 struct buffer_head * tbS0 = PATH_PLAST_BUFFER (tb->tb_path); 260 struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path);
251 int item_pos = PATH_LAST_POSITION (tb->tb_path); /* index into the array of item headers in S[0] 261 int item_pos = PATH_LAST_POSITION(tb->tb_path); /* index into the array of item headers in S[0]
252 of the affected item */ 262 of the affected item */
253 struct buffer_info bi; 263 struct buffer_info bi;
254 struct buffer_head *S_new[2]; /* new nodes allocated to hold what could not fit into S */ 264 struct buffer_head *S_new[2]; /* new nodes allocated to hold what could not fit into S */
255 int snum[2]; /* number of items that will be placed 265 int snum[2]; /* number of items that will be placed
256 into S_new (includes partially shifted 266 into S_new (includes partially shifted
257 items) */ 267 items) */
258 int sbytes[2]; /* if an item is partially shifted into S_new then 268 int sbytes[2]; /* if an item is partially shifted into S_new then
259 if it is a directory item 269 if it is a directory item
260 it is the number of entries from the item that are shifted into S_new 270 it is the number of entries from the item that are shifted into S_new
261 else 271 else
262 it is the number of bytes from the item that are shifted into S_new 272 it is the number of bytes from the item that are shifted into S_new
263 */ 273 */
264 int n, i; 274 int n, i;
265 int ret_val; 275 int ret_val;
266 int pos_in_item; 276 int pos_in_item;
267 int zeros_num; 277 int zeros_num;
268 278
269 PROC_INFO_INC( tb -> tb_sb, balance_at[ 0 ] ); 279 PROC_INFO_INC(tb->tb_sb, balance_at[0]);
270 280
271 /* Make balance in case insert_size[0] < 0 */ 281 /* Make balance in case insert_size[0] < 0 */
272 if ( tb->insert_size[0] < 0 ) 282 if (tb->insert_size[0] < 0)
273 return balance_leaf_when_delete (tb, flag); 283 return balance_leaf_when_delete(tb, flag);
274 284
275 zeros_num = 0; 285 zeros_num = 0;
276 if (flag == M_INSERT && body == 0) 286 if (flag == M_INSERT && body == 0)
277 zeros_num = ih_item_len( ih ); 287 zeros_num = ih_item_len(ih);
278 288
279 pos_in_item = tb->tb_path->pos_in_item; 289 pos_in_item = tb->tb_path->pos_in_item;
280 /* for indirect item pos_in_item is measured in unformatted node 290 /* for indirect item pos_in_item is measured in unformatted node
281 pointers. Recalculate to bytes */ 291 pointers. Recalculate to bytes */
282 if (flag != M_INSERT && is_indirect_le_ih (B_N_PITEM_HEAD (tbS0, item_pos))) 292 if (flag != M_INSERT
283 pos_in_item *= UNFM_P_SIZE; 293 && is_indirect_le_ih(B_N_PITEM_HEAD(tbS0, item_pos)))
284 294 pos_in_item *= UNFM_P_SIZE;
285 if ( tb->lnum[0] > 0 ) { 295
286 /* Shift lnum[0] items from S[0] to the left neighbor L[0] */ 296 if (tb->lnum[0] > 0) {
287 if ( item_pos < tb->lnum[0] ) { 297 /* Shift lnum[0] items from S[0] to the left neighbor L[0] */
288 /* new item or it part falls to L[0], shift it too */ 298 if (item_pos < tb->lnum[0]) {
289 n = B_NR_ITEMS(tb->L[0]); 299 /* new item or it part falls to L[0], shift it too */
290 300 n = B_NR_ITEMS(tb->L[0]);
291 switch (flag) { 301
292 case M_INSERT: /* insert item into L[0] */ 302 switch (flag) {
293 303 case M_INSERT: /* insert item into L[0] */
294 if ( item_pos == tb->lnum[0] - 1 && tb->lbytes != -1 ) { 304
295 /* part of new item falls into L[0] */ 305 if (item_pos == tb->lnum[0] - 1
296 int new_item_len; 306 && tb->lbytes != -1) {
297 int version; 307 /* part of new item falls into L[0] */
298 308 int new_item_len;
299 ret_val = leaf_shift_left (tb, tb->lnum[0]-1, -1); 309 int version;
300 310
301 /* Calculate item length to insert to S[0] */ 311 ret_val =
302 new_item_len = ih_item_len(ih) - tb->lbytes; 312 leaf_shift_left(tb, tb->lnum[0] - 1,
303 /* Calculate and check item length to insert to L[0] */ 313 -1);
304 put_ih_item_len(ih, ih_item_len(ih) - new_item_len ); 314
305 315 /* Calculate item length to insert to S[0] */
306 RFALSE( ih_item_len(ih) <= 0, 316 new_item_len =
307 "PAP-12080: there is nothing to insert into L[0]: ih_item_len=%d", 317 ih_item_len(ih) - tb->lbytes;
308 ih_item_len(ih)); 318 /* Calculate and check item length to insert to L[0] */
309 319 put_ih_item_len(ih,
310 /* Insert new item into L[0] */ 320 ih_item_len(ih) -
311 bi.tb = tb; 321 new_item_len);
312 bi.bi_bh = tb->L[0]; 322
313 bi.bi_parent = tb->FL[0]; 323 RFALSE(ih_item_len(ih) <= 0,
314 bi.bi_position = get_left_neighbor_position (tb, 0); 324 "PAP-12080: there is nothing to insert into L[0]: ih_item_len=%d",
315 leaf_insert_into_buf (&bi, n + item_pos - ret_val, ih, body, 325 ih_item_len(ih));
316 zeros_num > ih_item_len(ih) ? ih_item_len(ih) : zeros_num); 326
317 327 /* Insert new item into L[0] */
318 version = ih_version (ih); 328 bi.tb = tb;
319 329 bi.bi_bh = tb->L[0];
320 /* Calculate key component, item length and body to insert into S[0] */ 330 bi.bi_parent = tb->FL[0];
321 set_le_ih_k_offset( ih, le_ih_k_offset( ih ) + (tb->lbytes << (is_indirect_le_ih(ih)?tb->tb_sb->s_blocksize_bits - UNFM_P_SHIFT:0)) ); 331 bi.bi_position =
322 332 get_left_neighbor_position(tb, 0);
323 put_ih_item_len( ih, new_item_len ); 333 leaf_insert_into_buf(&bi,
324 if ( tb->lbytes > zeros_num ) { 334 n + item_pos -
325 body += (tb->lbytes - zeros_num); 335 ret_val, ih, body,
326 zeros_num = 0; 336 zeros_num >
327 } 337 ih_item_len(ih) ?
328 else 338 ih_item_len(ih) :
329 zeros_num -= tb->lbytes; 339 zeros_num);
330 340
331 RFALSE( ih_item_len(ih) <= 0, 341 version = ih_version(ih);
332 "PAP-12085: there is nothing to insert into S[0]: ih_item_len=%d", 342
333 ih_item_len(ih)); 343 /* Calculate key component, item length and body to insert into S[0] */
334 } else { 344 set_le_ih_k_offset(ih,
335 /* new item in whole falls into L[0] */ 345 le_ih_k_offset(ih) +
336 /* Shift lnum[0]-1 items to L[0] */ 346 (tb->
337 ret_val = leaf_shift_left(tb, tb->lnum[0]-1, tb->lbytes); 347 lbytes <<
338 /* Insert new item into L[0] */ 348 (is_indirect_le_ih
339 bi.tb = tb; 349 (ih) ? tb->tb_sb->
340 bi.bi_bh = tb->L[0]; 350 s_blocksize_bits -
341 bi.bi_parent = tb->FL[0]; 351 UNFM_P_SHIFT :
342 bi.bi_position = get_left_neighbor_position (tb, 0); 352 0)));
343 leaf_insert_into_buf (&bi, n + item_pos - ret_val, ih, body, zeros_num); 353
344 tb->insert_size[0] = 0; 354 put_ih_item_len(ih, new_item_len);
345 zeros_num = 0; 355 if (tb->lbytes > zeros_num) {
346 } 356 body +=
347 break; 357 (tb->lbytes - zeros_num);
348 358 zeros_num = 0;
349 case M_PASTE: /* append item in L[0] */ 359 } else
350 360 zeros_num -= tb->lbytes;
351 if ( item_pos == tb->lnum[0] - 1 && tb->lbytes != -1 ) { 361
352 /* we must shift the part of the appended item */ 362 RFALSE(ih_item_len(ih) <= 0,
353 if ( is_direntry_le_ih (B_N_PITEM_HEAD (tbS0, item_pos))) { 363 "PAP-12085: there is nothing to insert into S[0]: ih_item_len=%d",
354 364 ih_item_len(ih));
355 RFALSE( zeros_num, 365 } else {
356 "PAP-12090: invalid parameter in case of a directory"); 366 /* new item in whole falls into L[0] */
357 /* directory item */ 367 /* Shift lnum[0]-1 items to L[0] */
358 if ( tb->lbytes > pos_in_item ) { 368 ret_val =
359 /* new directory entry falls into L[0] */ 369 leaf_shift_left(tb, tb->lnum[0] - 1,
360 struct item_head * pasted; 370 tb->lbytes);
361 int l_pos_in_item = pos_in_item; 371 /* Insert new item into L[0] */
362 372 bi.tb = tb;
363 /* Shift lnum[0] - 1 items in whole. Shift lbytes - 1 entries from given directory item */ 373 bi.bi_bh = tb->L[0];
364 ret_val = leaf_shift_left(tb, tb->lnum[0], tb->lbytes - 1); 374 bi.bi_parent = tb->FL[0];
365 if ( ret_val && ! item_pos ) { 375 bi.bi_position =
366 pasted = B_N_PITEM_HEAD(tb->L[0],B_NR_ITEMS(tb->L[0])-1); 376 get_left_neighbor_position(tb, 0);
367 l_pos_in_item += I_ENTRY_COUNT(pasted) - (tb->lbytes-1); 377 leaf_insert_into_buf(&bi,
368 } 378 n + item_pos -
369 379 ret_val, ih, body,
370 /* Append given directory entry to directory item */ 380 zeros_num);
371 bi.tb = tb; 381 tb->insert_size[0] = 0;
372 bi.bi_bh = tb->L[0]; 382 zeros_num = 0;
373 bi.bi_parent = tb->FL[0];
374 bi.bi_position = get_left_neighbor_position (tb, 0);
375 leaf_paste_in_buffer (&bi, n + item_pos - ret_val, l_pos_in_item,
376 tb->insert_size[0], body, zeros_num);
377
378 /* previous string prepared space for pasting new entry, following string pastes this entry */
379
380 /* when we have merge directory item, pos_in_item has been changed too */
381
382 /* paste new directory entry. 1 is entry number */
383 leaf_paste_entries (bi.bi_bh, n + item_pos - ret_val, l_pos_in_item, 1,
384 (struct reiserfs_de_head *)body,
385 body + DEH_SIZE, tb->insert_size[0]
386 );
387 tb->insert_size[0] = 0;
388 } else {
389 /* new directory item doesn't fall into L[0] */
390 /* Shift lnum[0]-1 items in whole. Shift lbytes directory entries from directory item number lnum[0] */
391 leaf_shift_left (tb, tb->lnum[0], tb->lbytes);
392 }
393 /* Calculate new position to append in item body */
394 pos_in_item -= tb->lbytes;
395 }
396 else {
397 /* regular object */
398 RFALSE( tb->lbytes <= 0,
399 "PAP-12095: there is nothing to shift to L[0]. lbytes=%d",
400 tb->lbytes);
401 RFALSE( pos_in_item != ih_item_len(B_N_PITEM_HEAD(tbS0, item_pos)),
402 "PAP-12100: incorrect position to paste: item_len=%d, pos_in_item=%d",
403 ih_item_len(B_N_PITEM_HEAD(tbS0,item_pos)), pos_in_item);
404
405 if ( tb->lbytes >= pos_in_item ) {
406 /* appended item will be in L[0] in whole */
407 int l_n;
408
409 /* this bytes number must be appended to the last item of L[h] */
410 l_n = tb->lbytes - pos_in_item;
411
412 /* Calculate new insert_size[0] */
413 tb->insert_size[0] -= l_n;
414
415 RFALSE( tb->insert_size[0] <= 0,
416 "PAP-12105: there is nothing to paste into L[0]. insert_size=%d",
417 tb->insert_size[0]);
418 ret_val = leaf_shift_left(tb,tb->lnum[0],
419 ih_item_len(B_N_PITEM_HEAD(tbS0,item_pos)));
420 /* Append to body of item in L[0] */
421 bi.tb = tb;
422 bi.bi_bh = tb->L[0];
423 bi.bi_parent = tb->FL[0];
424 bi.bi_position = get_left_neighbor_position (tb, 0);
425 leaf_paste_in_buffer(
426 &bi,n + item_pos - ret_val,
427 ih_item_len( B_N_PITEM_HEAD(tb->L[0],n+item_pos-ret_val)),
428 l_n,body, zeros_num > l_n ? l_n : zeros_num
429 );
430 /* 0-th item in S0 can be only of DIRECT type when l_n != 0*/
431 {
432 int version;
433 int temp_l = l_n;
434
435 RFALSE (ih_item_len (B_N_PITEM_HEAD (tbS0, 0)),
436 "PAP-12106: item length must be 0");
437 RFALSE (comp_short_le_keys (B_N_PKEY (tbS0, 0),
438 B_N_PKEY (tb->L[0],
439 n + item_pos - ret_val)),
440 "PAP-12107: items must be of the same file");
441 if (is_indirect_le_ih(B_N_PITEM_HEAD (tb->L[0],
442 n + item_pos - ret_val))) {
443 temp_l = l_n << (tb->tb_sb->s_blocksize_bits - UNFM_P_SHIFT);
444 } 383 }
445 /* update key of first item in S0 */ 384 break;
446 version = ih_version (B_N_PITEM_HEAD (tbS0, 0)); 385
447 set_le_key_k_offset (version, B_N_PKEY (tbS0, 0), 386 case M_PASTE: /* append item in L[0] */
448 le_key_k_offset (version, B_N_PKEY (tbS0, 0)) + temp_l); 387
449 /* update left delimiting key */ 388 if (item_pos == tb->lnum[0] - 1
450 set_le_key_k_offset (version, B_N_PDELIM_KEY(tb->CFL[0],tb->lkey[0]), 389 && tb->lbytes != -1) {
451 le_key_k_offset (version, B_N_PDELIM_KEY(tb->CFL[0],tb->lkey[0])) + temp_l); 390 /* we must shift the part of the appended item */
452 } 391 if (is_direntry_le_ih
453 392 (B_N_PITEM_HEAD(tbS0, item_pos))) {
454 /* Calculate new body, position in item and insert_size[0] */ 393
455 if ( l_n > zeros_num ) { 394 RFALSE(zeros_num,
456 body += (l_n - zeros_num); 395 "PAP-12090: invalid parameter in case of a directory");
457 zeros_num = 0; 396 /* directory item */
458 } 397 if (tb->lbytes > pos_in_item) {
459 else 398 /* new directory entry falls into L[0] */
460 zeros_num -= l_n; 399 struct item_head
461 pos_in_item = 0; 400 *pasted;
462 401 int l_pos_in_item =
463 RFALSE( comp_short_le_keys 402 pos_in_item;
464 (B_N_PKEY(tbS0,0), 403
465 B_N_PKEY(tb->L[0],B_NR_ITEMS(tb->L[0])-1)) || 404 /* Shift lnum[0] - 1 items in whole. Shift lbytes - 1 entries from given directory item */
466 405 ret_val =
467 !op_is_left_mergeable 406 leaf_shift_left(tb,
468 (B_N_PKEY (tbS0, 0), tbS0->b_size) || 407 tb->
469 !op_is_left_mergeable 408 lnum
470 (B_N_PDELIM_KEY(tb->CFL[0],tb->lkey[0]), 409 [0],
471 tbS0->b_size), 410 tb->
472 "PAP-12120: item must be merge-able with left neighboring item"); 411 lbytes
473 } 412 -
474 else /* only part of the appended item will be in L[0] */ 413 1);
475 { 414 if (ret_val
476 /* Calculate position in item for append in S[0] */ 415 && !item_pos) {
477 pos_in_item -= tb->lbytes; 416 pasted =
478 417 B_N_PITEM_HEAD
479 RFALSE( pos_in_item <= 0, 418 (tb->L[0],
480 "PAP-12125: no place for paste. pos_in_item=%d", pos_in_item); 419 B_NR_ITEMS
481 420 (tb->
482 /* Shift lnum[0] - 1 items in whole. Shift lbytes - 1 byte from item number lnum[0] */ 421 L[0]) -
483 leaf_shift_left(tb,tb->lnum[0],tb->lbytes); 422 1);
484 } 423 l_pos_in_item +=
485 } 424 I_ENTRY_COUNT
486 } 425 (pasted) -
487 else /* appended item will be in L[0] in whole */ 426 (tb->
488 { 427 lbytes -
489 struct item_head * pasted; 428 1);
490 429 }
491 if ( ! item_pos && op_is_left_mergeable (B_N_PKEY (tbS0, 0), tbS0->b_size) ) 430
492 { /* if we paste into first item of S[0] and it is left mergable */ 431 /* Append given directory entry to directory item */
493 /* then increment pos_in_item by the size of the last item in L[0] */ 432 bi.tb = tb;
494 pasted = B_N_PITEM_HEAD(tb->L[0],n-1); 433 bi.bi_bh = tb->L[0];
495 if ( is_direntry_le_ih (pasted) ) 434 bi.bi_parent =
496 pos_in_item += ih_entry_count(pasted); 435 tb->FL[0];
497 else 436 bi.bi_position =
498 pos_in_item += ih_item_len(pasted); 437 get_left_neighbor_position
438 (tb, 0);
439 leaf_paste_in_buffer
440 (&bi,
441 n + item_pos -
442 ret_val,
443 l_pos_in_item,
444 tb->insert_size[0],
445 body, zeros_num);
446
447 /* previous string prepared space for pasting new entry, following string pastes this entry */
448
449 /* when we have merge directory item, pos_in_item has been changed too */
450
451 /* paste new directory entry. 1 is entry number */
452 leaf_paste_entries(bi.
453 bi_bh,
454 n +
455 item_pos
456 -
457 ret_val,
458 l_pos_in_item,
459 1,
460 (struct
461 reiserfs_de_head
462 *)
463 body,
464 body
465 +
466 DEH_SIZE,
467 tb->
468 insert_size
469 [0]
470 );
471 tb->insert_size[0] = 0;
472 } else {
473 /* new directory item doesn't fall into L[0] */
474 /* Shift lnum[0]-1 items in whole. Shift lbytes directory entries from directory item number lnum[0] */
475 leaf_shift_left(tb,
476 tb->
477 lnum[0],
478 tb->
479 lbytes);
480 }
481 /* Calculate new position to append in item body */
482 pos_in_item -= tb->lbytes;
483 } else {
484 /* regular object */
485 RFALSE(tb->lbytes <= 0,
486 "PAP-12095: there is nothing to shift to L[0]. lbytes=%d",
487 tb->lbytes);
488 RFALSE(pos_in_item !=
489 ih_item_len
490 (B_N_PITEM_HEAD
491 (tbS0, item_pos)),
492 "PAP-12100: incorrect position to paste: item_len=%d, pos_in_item=%d",
493 ih_item_len
494 (B_N_PITEM_HEAD
495 (tbS0, item_pos)),
496 pos_in_item);
497
498 if (tb->lbytes >= pos_in_item) {
499 /* appended item will be in L[0] in whole */
500 int l_n;
501
502 /* this bytes number must be appended to the last item of L[h] */
503 l_n =
504 tb->lbytes -
505 pos_in_item;
506
507 /* Calculate new insert_size[0] */
508 tb->insert_size[0] -=
509 l_n;
510
511 RFALSE(tb->
512 insert_size[0] <=
513 0,
514 "PAP-12105: there is nothing to paste into L[0]. insert_size=%d",
515 tb->
516 insert_size[0]);
517 ret_val =
518 leaf_shift_left(tb,
519 tb->
520 lnum
521 [0],
522 ih_item_len
523 (B_N_PITEM_HEAD
524 (tbS0,
525 item_pos)));
526 /* Append to body of item in L[0] */
527 bi.tb = tb;
528 bi.bi_bh = tb->L[0];
529 bi.bi_parent =
530 tb->FL[0];
531 bi.bi_position =
532 get_left_neighbor_position
533 (tb, 0);
534 leaf_paste_in_buffer
535 (&bi,
536 n + item_pos -
537 ret_val,
538 ih_item_len
539 (B_N_PITEM_HEAD
540 (tb->L[0],
541 n + item_pos -
542 ret_val)), l_n,
543 body,
544 zeros_num >
545 l_n ? l_n :
546 zeros_num);
547 /* 0-th item in S0 can be only of DIRECT type when l_n != 0 */
548 {
549 int version;
550 int temp_l =
551 l_n;
552
553 RFALSE
554 (ih_item_len
555 (B_N_PITEM_HEAD
556 (tbS0,
557 0)),
558 "PAP-12106: item length must be 0");
559 RFALSE
560 (comp_short_le_keys
561 (B_N_PKEY
562 (tbS0, 0),
563 B_N_PKEY
564 (tb->L[0],
565 n +
566 item_pos
567 -
568 ret_val)),
569 "PAP-12107: items must be of the same file");
570 if (is_indirect_le_ih(B_N_PITEM_HEAD(tb->L[0], n + item_pos - ret_val))) {
571 temp_l =
572 l_n
573 <<
574 (tb->
575 tb_sb->
576 s_blocksize_bits
577 -
578 UNFM_P_SHIFT);
579 }
580 /* update key of first item in S0 */
581 version =
582 ih_version
583 (B_N_PITEM_HEAD
584 (tbS0, 0));
585 set_le_key_k_offset
586 (version,
587 B_N_PKEY
588 (tbS0, 0),
589 le_key_k_offset
590 (version,
591 B_N_PKEY
592 (tbS0,
593 0)) +
594 temp_l);
595 /* update left delimiting key */
596 set_le_key_k_offset
597 (version,
598 B_N_PDELIM_KEY
599 (tb->
600 CFL[0],
601 tb->
602 lkey[0]),
603 le_key_k_offset
604 (version,
605 B_N_PDELIM_KEY
606 (tb->
607 CFL[0],
608 tb->
609 lkey[0]))
610 + temp_l);
611 }
612
613 /* Calculate new body, position in item and insert_size[0] */
614 if (l_n > zeros_num) {
615 body +=
616 (l_n -
617 zeros_num);
618 zeros_num = 0;
619 } else
620 zeros_num -=
621 l_n;
622 pos_in_item = 0;
623
624 RFALSE
625 (comp_short_le_keys
626 (B_N_PKEY(tbS0, 0),
627 B_N_PKEY(tb->L[0],
628 B_NR_ITEMS
629 (tb->
630 L[0]) -
631 1))
632 ||
633 !op_is_left_mergeable
634 (B_N_PKEY(tbS0, 0),
635 tbS0->b_size)
636 ||
637 !op_is_left_mergeable
638 (B_N_PDELIM_KEY
639 (tb->CFL[0],
640 tb->lkey[0]),
641 tbS0->b_size),
642 "PAP-12120: item must be merge-able with left neighboring item");
643 } else { /* only part of the appended item will be in L[0] */
644
645 /* Calculate position in item for append in S[0] */
646 pos_in_item -=
647 tb->lbytes;
648
649 RFALSE(pos_in_item <= 0,
650 "PAP-12125: no place for paste. pos_in_item=%d",
651 pos_in_item);
652
653 /* Shift lnum[0] - 1 items in whole. Shift lbytes - 1 byte from item number lnum[0] */
654 leaf_shift_left(tb,
655 tb->
656 lnum[0],
657 tb->
658 lbytes);
659 }
660 }
661 } else { /* appended item will be in L[0] in whole */
662
663 struct item_head *pasted;
664
665 if (!item_pos && op_is_left_mergeable(B_N_PKEY(tbS0, 0), tbS0->b_size)) { /* if we paste into first item of S[0] and it is left mergable */
666 /* then increment pos_in_item by the size of the last item in L[0] */
667 pasted =
668 B_N_PITEM_HEAD(tb->L[0],
669 n - 1);
670 if (is_direntry_le_ih(pasted))
671 pos_in_item +=
672 ih_entry_count
673 (pasted);
674 else
675 pos_in_item +=
676 ih_item_len(pasted);
677 }
678
679 /* Shift lnum[0] - 1 items in whole. Shift lbytes - 1 byte from item number lnum[0] */
680 ret_val =
681 leaf_shift_left(tb, tb->lnum[0],
682 tb->lbytes);
683 /* Append to body of item in L[0] */
684 bi.tb = tb;
685 bi.bi_bh = tb->L[0];
686 bi.bi_parent = tb->FL[0];
687 bi.bi_position =
688 get_left_neighbor_position(tb, 0);
689 leaf_paste_in_buffer(&bi,
690 n + item_pos -
691 ret_val,
692 pos_in_item,
693 tb->insert_size[0],
694 body, zeros_num);
695
696 /* if appended item is directory, paste entry */
697 pasted =
698 B_N_PITEM_HEAD(tb->L[0],
699 n + item_pos -
700 ret_val);
701 if (is_direntry_le_ih(pasted))
702 leaf_paste_entries(bi.bi_bh,
703 n +
704 item_pos -
705 ret_val,
706 pos_in_item,
707 1,
708 (struct
709 reiserfs_de_head
710 *)body,
711 body +
712 DEH_SIZE,
713 tb->
714 insert_size
715 [0]
716 );
717 /* if appended item is indirect item, put unformatted node into un list */
718 if (is_indirect_le_ih(pasted))
719 set_ih_free_space(pasted, 0);
720 tb->insert_size[0] = 0;
721 zeros_num = 0;
722 }
723 break;
724 default: /* cases d and t */
725 reiserfs_panic(tb->tb_sb,
726 "PAP-12130: balance_leaf: lnum > 0: unexpectable mode: %s(%d)",
727 (flag ==
728 M_DELETE) ? "DELETE" : ((flag ==
729 M_CUT)
730 ? "CUT"
731 :
732 "UNKNOWN"),
733 flag);
499 } 734 }
500 735 } else {
501 /* Shift lnum[0] - 1 items in whole. Shift lbytes - 1 byte from item number lnum[0] */ 736 /* new item doesn't fall into L[0] */
502 ret_val = leaf_shift_left(tb,tb->lnum[0],tb->lbytes); 737 leaf_shift_left(tb, tb->lnum[0], tb->lbytes);
503 /* Append to body of item in L[0] */
504 bi.tb = tb;
505 bi.bi_bh = tb->L[0];
506 bi.bi_parent = tb->FL[0];
507 bi.bi_position = get_left_neighbor_position (tb, 0);
508 leaf_paste_in_buffer (&bi, n + item_pos - ret_val, pos_in_item, tb->insert_size[0],
509 body, zeros_num);
510
511 /* if appended item is directory, paste entry */
512 pasted = B_N_PITEM_HEAD (tb->L[0], n + item_pos - ret_val);
513 if (is_direntry_le_ih (pasted))
514 leaf_paste_entries (
515 bi.bi_bh, n + item_pos - ret_val, pos_in_item, 1,
516 (struct reiserfs_de_head *)body, body + DEH_SIZE, tb->insert_size[0]
517 );
518 /* if appended item is indirect item, put unformatted node into un list */
519 if (is_indirect_le_ih (pasted))
520 set_ih_free_space (pasted, 0);
521 tb->insert_size[0] = 0;
522 zeros_num = 0;
523 } 738 }
524 break;
525 default: /* cases d and t */
526 reiserfs_panic (tb->tb_sb, "PAP-12130: balance_leaf: lnum > 0: unexpectable mode: %s(%d)",
527 (flag == M_DELETE) ? "DELETE" : ((flag == M_CUT) ? "CUT" : "UNKNOWN"), flag);
528 }
529 } else {
530 /* new item doesn't fall into L[0] */
531 leaf_shift_left(tb,tb->lnum[0],tb->lbytes);
532 } 739 }
533 } /* tb->lnum[0] > 0 */
534 740
535 /* Calculate new item position */ 741 /* tb->lnum[0] > 0 */
536 item_pos -= ( tb->lnum[0] - (( tb->lbytes != -1 ) ? 1 : 0)); 742 /* Calculate new item position */
537 743 item_pos -= (tb->lnum[0] - ((tb->lbytes != -1) ? 1 : 0));
538 if ( tb->rnum[0] > 0 ) { 744
539 /* shift rnum[0] items from S[0] to the right neighbor R[0] */ 745 if (tb->rnum[0] > 0) {
540 n = B_NR_ITEMS(tbS0); 746 /* shift rnum[0] items from S[0] to the right neighbor R[0] */
541 switch ( flag ) { 747 n = B_NR_ITEMS(tbS0);
542 748 switch (flag) {
543 case M_INSERT: /* insert item */ 749
544 if ( n - tb->rnum[0] < item_pos ) 750 case M_INSERT: /* insert item */
545 { /* new item or its part falls to R[0] */ 751 if (n - tb->rnum[0] < item_pos) { /* new item or its part falls to R[0] */
546 if ( item_pos == n - tb->rnum[0] + 1 && tb->rbytes != -1 ) 752 if (item_pos == n - tb->rnum[0] + 1 && tb->rbytes != -1) { /* part of new item falls into R[0] */
547 { /* part of new item falls into R[0] */ 753 loff_t old_key_comp, old_len,
548 loff_t old_key_comp, old_len, r_zeros_number; 754 r_zeros_number;
549 const char * r_body; 755 const char *r_body;
550 int version; 756 int version;
551 loff_t offset; 757 loff_t offset;
552 758
553 leaf_shift_right(tb,tb->rnum[0]-1,-1); 759 leaf_shift_right(tb, tb->rnum[0] - 1,
554 760 -1);
555 version = ih_version(ih); 761
556 /* Remember key component and item length */ 762 version = ih_version(ih);
557 old_key_comp = le_ih_k_offset( ih ); 763 /* Remember key component and item length */
558 old_len = ih_item_len(ih); 764 old_key_comp = le_ih_k_offset(ih);
559 765 old_len = ih_item_len(ih);
560 /* Calculate key component and item length to insert into R[0] */ 766
561 offset = le_ih_k_offset( ih ) + ((old_len - tb->rbytes )<<(is_indirect_le_ih(ih)?tb->tb_sb->s_blocksize_bits - UNFM_P_SHIFT:0)); 767 /* Calculate key component and item length to insert into R[0] */
562 set_le_ih_k_offset( ih, offset ); 768 offset =
563 put_ih_item_len( ih, tb->rbytes); 769 le_ih_k_offset(ih) +
564 /* Insert part of the item into R[0] */ 770 ((old_len -
565 bi.tb = tb; 771 tb->
566 bi.bi_bh = tb->R[0]; 772 rbytes) << (is_indirect_le_ih(ih)
567 bi.bi_parent = tb->FR[0]; 773 ? tb->tb_sb->
568 bi.bi_position = get_right_neighbor_position (tb, 0); 774 s_blocksize_bits -
569 if ( (old_len - tb->rbytes) > zeros_num ) { 775 UNFM_P_SHIFT : 0));
570 r_zeros_number = 0; 776 set_le_ih_k_offset(ih, offset);
571 r_body = body + (old_len - tb->rbytes) - zeros_num; 777 put_ih_item_len(ih, tb->rbytes);
572 } 778 /* Insert part of the item into R[0] */
573 else { 779 bi.tb = tb;
574 r_body = body; 780 bi.bi_bh = tb->R[0];
575 r_zeros_number = zeros_num - (old_len - tb->rbytes); 781 bi.bi_parent = tb->FR[0];
576 zeros_num -= r_zeros_number; 782 bi.bi_position =
577 } 783 get_right_neighbor_position(tb, 0);
578 784 if ((old_len - tb->rbytes) > zeros_num) {
579 leaf_insert_into_buf (&bi, 0, ih, r_body, r_zeros_number); 785 r_zeros_number = 0;
580 786 r_body =
581 /* Replace right delimiting key by first key in R[0] */ 787 body + (old_len -
582 replace_key(tb, tb->CFR[0],tb->rkey[0],tb->R[0],0); 788 tb->rbytes) -
583 789 zeros_num;
584 /* Calculate key component and item length to insert into S[0] */ 790 } else {
585 set_le_ih_k_offset( ih, old_key_comp ); 791 r_body = body;
586 put_ih_item_len( ih, old_len - tb->rbytes ); 792 r_zeros_number =
587 793 zeros_num - (old_len -
588 tb->insert_size[0] -= tb->rbytes; 794 tb->rbytes);
795 zeros_num -= r_zeros_number;
796 }
797
798 leaf_insert_into_buf(&bi, 0, ih, r_body,
799 r_zeros_number);
800
801 /* Replace right delimiting key by first key in R[0] */
802 replace_key(tb, tb->CFR[0], tb->rkey[0],
803 tb->R[0], 0);
804
805 /* Calculate key component and item length to insert into S[0] */
806 set_le_ih_k_offset(ih, old_key_comp);
807 put_ih_item_len(ih,
808 old_len - tb->rbytes);
809
810 tb->insert_size[0] -= tb->rbytes;
811
812 } else { /* whole new item falls into R[0] */
813
814 /* Shift rnum[0]-1 items to R[0] */
815 ret_val =
816 leaf_shift_right(tb,
817 tb->rnum[0] - 1,
818 tb->rbytes);
819 /* Insert new item into R[0] */
820 bi.tb = tb;
821 bi.bi_bh = tb->R[0];
822 bi.bi_parent = tb->FR[0];
823 bi.bi_position =
824 get_right_neighbor_position(tb, 0);
825 leaf_insert_into_buf(&bi,
826 item_pos - n +
827 tb->rnum[0] - 1,
828 ih, body,
829 zeros_num);
830
831 if (item_pos - n + tb->rnum[0] - 1 == 0) {
832 replace_key(tb, tb->CFR[0],
833 tb->rkey[0],
834 tb->R[0], 0);
835
836 }
837 zeros_num = tb->insert_size[0] = 0;
838 }
839 } else { /* new item or part of it doesn't fall into R[0] */
589 840
590 } 841 leaf_shift_right(tb, tb->rnum[0], tb->rbytes);
591 else /* whole new item falls into R[0] */
592 {
593 /* Shift rnum[0]-1 items to R[0] */
594 ret_val = leaf_shift_right(tb,tb->rnum[0]-1,tb->rbytes);
595 /* Insert new item into R[0] */
596 bi.tb = tb;
597 bi.bi_bh = tb->R[0];
598 bi.bi_parent = tb->FR[0];
599 bi.bi_position = get_right_neighbor_position (tb, 0);
600 leaf_insert_into_buf (&bi, item_pos - n + tb->rnum[0] - 1, ih, body, zeros_num);
601
602 if ( item_pos - n + tb->rnum[0] - 1 == 0 ) {
603 replace_key(tb, tb->CFR[0],tb->rkey[0],tb->R[0],0);
604
605 }
606 zeros_num = tb->insert_size[0] = 0;
607 }
608 }
609 else /* new item or part of it doesn't fall into R[0] */
610 {
611 leaf_shift_right(tb,tb->rnum[0],tb->rbytes);
612 }
613 break;
614
615 case M_PASTE: /* append item */
616
617 if ( n - tb->rnum[0] <= item_pos ) /* pasted item or part of it falls to R[0] */
618 {
619 if ( item_pos == n - tb->rnum[0] && tb->rbytes != -1 )
620 { /* we must shift the part of the appended item */
621 if ( is_direntry_le_ih (B_N_PITEM_HEAD(tbS0, item_pos)))
622 { /* we append to directory item */
623 int entry_count;
624
625 RFALSE( zeros_num,
626 "PAP-12145: invalid parameter in case of a directory");
627 entry_count = I_ENTRY_COUNT(B_N_PITEM_HEAD(tbS0, item_pos));
628 if ( entry_count - tb->rbytes < pos_in_item )
629 /* new directory entry falls into R[0] */
630 {
631 int paste_entry_position;
632
633 RFALSE( tb->rbytes - 1 >= entry_count ||
634 ! tb->insert_size[0],
635 "PAP-12150: no enough of entries to shift to R[0]: rbytes=%d, entry_count=%d",
636 tb->rbytes, entry_count);
637 /* Shift rnum[0]-1 items in whole. Shift rbytes-1 directory entries from directory item number rnum[0] */
638 leaf_shift_right(tb,tb->rnum[0],tb->rbytes - 1);
639 /* Paste given directory entry to directory item */
640 paste_entry_position = pos_in_item - entry_count + tb->rbytes - 1;
641 bi.tb = tb;
642 bi.bi_bh = tb->R[0];
643 bi.bi_parent = tb->FR[0];
644 bi.bi_position = get_right_neighbor_position (tb, 0);
645 leaf_paste_in_buffer (&bi, 0, paste_entry_position,
646 tb->insert_size[0],body,zeros_num);
647 /* paste entry */
648 leaf_paste_entries (
649 bi.bi_bh, 0, paste_entry_position, 1, (struct reiserfs_de_head *)body,
650 body + DEH_SIZE, tb->insert_size[0]
651 );
652
653 if ( paste_entry_position == 0 ) {
654 /* change delimiting keys */
655 replace_key(tb, tb->CFR[0],tb->rkey[0],tb->R[0],0);
656 }
657
658 tb->insert_size[0] = 0;
659 pos_in_item++;
660 }
661 else /* new directory entry doesn't fall into R[0] */
662 {
663 leaf_shift_right(tb,tb->rnum[0],tb->rbytes);
664 }
665 }
666 else /* regular object */
667 {
668 int n_shift, n_rem, r_zeros_number;
669 const char * r_body;
670
671 /* Calculate number of bytes which must be shifted from appended item */
672 if ( (n_shift = tb->rbytes - tb->insert_size[0]) < 0 )
673 n_shift = 0;
674
675 RFALSE(pos_in_item != ih_item_len(B_N_PITEM_HEAD (tbS0, item_pos)),
676 "PAP-12155: invalid position to paste. ih_item_len=%d, pos_in_item=%d",
677 pos_in_item, ih_item_len( B_N_PITEM_HEAD(tbS0,item_pos)));
678
679 leaf_shift_right(tb,tb->rnum[0],n_shift);
680 /* Calculate number of bytes which must remain in body after appending to R[0] */
681 if ( (n_rem = tb->insert_size[0] - tb->rbytes) < 0 )
682 n_rem = 0;
683
684 {
685 int version;
686 unsigned long temp_rem = n_rem;
687
688 version = ih_version (B_N_PITEM_HEAD (tb->R[0],0));
689 if (is_indirect_le_key(version,B_N_PKEY(tb->R[0],0))){
690 temp_rem = n_rem << (tb->tb_sb->s_blocksize_bits -
691 UNFM_P_SHIFT);
692 }
693 set_le_key_k_offset (version, B_N_PKEY(tb->R[0],0),
694 le_key_k_offset (version, B_N_PKEY(tb->R[0],0)) + temp_rem);
695 set_le_key_k_offset (version, B_N_PDELIM_KEY(tb->CFR[0],tb->rkey[0]),
696 le_key_k_offset (version, B_N_PDELIM_KEY(tb->CFR[0],tb->rkey[0])) + temp_rem);
697 } 842 }
843 break;
844
845 case M_PASTE: /* append item */
846
847 if (n - tb->rnum[0] <= item_pos) { /* pasted item or part of it falls to R[0] */
848 if (item_pos == n - tb->rnum[0] && tb->rbytes != -1) { /* we must shift the part of the appended item */
849 if (is_direntry_le_ih(B_N_PITEM_HEAD(tbS0, item_pos))) { /* we append to directory item */
850 int entry_count;
851
852 RFALSE(zeros_num,
853 "PAP-12145: invalid parameter in case of a directory");
854 entry_count =
855 I_ENTRY_COUNT(B_N_PITEM_HEAD
856 (tbS0,
857 item_pos));
858 if (entry_count - tb->rbytes <
859 pos_in_item)
860 /* new directory entry falls into R[0] */
861 {
862 int paste_entry_position;
863
864 RFALSE(tb->rbytes - 1 >=
865 entry_count
866 || !tb->
867 insert_size[0],
868 "PAP-12150: no enough of entries to shift to R[0]: rbytes=%d, entry_count=%d",
869 tb->rbytes,
870 entry_count);
871 /* Shift rnum[0]-1 items in whole. Shift rbytes-1 directory entries from directory item number rnum[0] */
872 leaf_shift_right(tb,
873 tb->
874 rnum
875 [0],
876 tb->
877 rbytes
878 - 1);
879 /* Paste given directory entry to directory item */
880 paste_entry_position =
881 pos_in_item -
882 entry_count +
883 tb->rbytes - 1;
884 bi.tb = tb;
885 bi.bi_bh = tb->R[0];
886 bi.bi_parent =
887 tb->FR[0];
888 bi.bi_position =
889 get_right_neighbor_position
890 (tb, 0);
891 leaf_paste_in_buffer
892 (&bi, 0,
893 paste_entry_position,
894 tb->insert_size[0],
895 body, zeros_num);
896 /* paste entry */
897 leaf_paste_entries(bi.
898 bi_bh,
899 0,
900 paste_entry_position,
901 1,
902 (struct
903 reiserfs_de_head
904 *)
905 body,
906 body
907 +
908 DEH_SIZE,
909 tb->
910 insert_size
911 [0]
912 );
913
914 if (paste_entry_position
915 == 0) {
916 /* change delimiting keys */
917 replace_key(tb,
918 tb->
919 CFR
920 [0],
921 tb->
922 rkey
923 [0],
924 tb->
925 R
926 [0],
927 0);
928 }
929
930 tb->insert_size[0] = 0;
931 pos_in_item++;
932 } else { /* new directory entry doesn't fall into R[0] */
933
934 leaf_shift_right(tb,
935 tb->
936 rnum
937 [0],
938 tb->
939 rbytes);
940 }
941 } else { /* regular object */
942
943 int n_shift, n_rem,
944 r_zeros_number;
945 const char *r_body;
946
947 /* Calculate number of bytes which must be shifted from appended item */
948 if ((n_shift =
949 tb->rbytes -
950 tb->insert_size[0]) < 0)
951 n_shift = 0;
952
953 RFALSE(pos_in_item !=
954 ih_item_len
955 (B_N_PITEM_HEAD
956 (tbS0, item_pos)),
957 "PAP-12155: invalid position to paste. ih_item_len=%d, pos_in_item=%d",
958 pos_in_item,
959 ih_item_len
960 (B_N_PITEM_HEAD
961 (tbS0, item_pos)));
962
963 leaf_shift_right(tb,
964 tb->rnum[0],
965 n_shift);
966 /* Calculate number of bytes which must remain in body after appending to R[0] */
967 if ((n_rem =
968 tb->insert_size[0] -
969 tb->rbytes) < 0)
970 n_rem = 0;
971
972 {
973 int version;
974 unsigned long temp_rem =
975 n_rem;
976
977 version =
978 ih_version
979 (B_N_PITEM_HEAD
980 (tb->R[0], 0));
981 if (is_indirect_le_key
982 (version,
983 B_N_PKEY(tb->R[0],
984 0))) {
985 temp_rem =
986 n_rem <<
987 (tb->tb_sb->
988 s_blocksize_bits
989 -
990 UNFM_P_SHIFT);
991 }
992 set_le_key_k_offset
993 (version,
994 B_N_PKEY(tb->R[0],
995 0),
996 le_key_k_offset
997 (version,
998 B_N_PKEY(tb->R[0],
999 0)) +
1000 temp_rem);
1001 set_le_key_k_offset
1002 (version,
1003 B_N_PDELIM_KEY(tb->
1004 CFR
1005 [0],
1006 tb->
1007 rkey
1008 [0]),
1009 le_key_k_offset
1010 (version,
1011 B_N_PDELIM_KEY
1012 (tb->CFR[0],
1013 tb->rkey[0])) +
1014 temp_rem);
1015 }
698/* k_offset (B_N_PKEY(tb->R[0],0)) += n_rem; 1016/* k_offset (B_N_PKEY(tb->R[0],0)) += n_rem;
699 k_offset (B_N_PDELIM_KEY(tb->CFR[0],tb->rkey[0])) += n_rem;*/ 1017 k_offset (B_N_PDELIM_KEY(tb->CFR[0],tb->rkey[0])) += n_rem;*/
700 do_balance_mark_internal_dirty (tb, tb->CFR[0], 0); 1018 do_balance_mark_internal_dirty
701 1019 (tb, tb->CFR[0], 0);
702 /* Append part of body into R[0] */ 1020
703 bi.tb = tb; 1021 /* Append part of body into R[0] */
704 bi.bi_bh = tb->R[0]; 1022 bi.tb = tb;
705 bi.bi_parent = tb->FR[0]; 1023 bi.bi_bh = tb->R[0];
706 bi.bi_position = get_right_neighbor_position (tb, 0); 1024 bi.bi_parent = tb->FR[0];
707 if ( n_rem > zeros_num ) { 1025 bi.bi_position =
708 r_zeros_number = 0; 1026 get_right_neighbor_position
709 r_body = body + n_rem - zeros_num; 1027 (tb, 0);
710 } 1028 if (n_rem > zeros_num) {
711 else { 1029 r_zeros_number = 0;
712 r_body = body; 1030 r_body =
713 r_zeros_number = zeros_num - n_rem; 1031 body + n_rem -
714 zeros_num -= r_zeros_number; 1032 zeros_num;
715 } 1033 } else {
716 1034 r_body = body;
717 leaf_paste_in_buffer(&bi, 0, n_shift, tb->insert_size[0] - n_rem, r_body, r_zeros_number); 1035 r_zeros_number =
718 1036 zeros_num - n_rem;
719 if (is_indirect_le_ih (B_N_PITEM_HEAD(tb->R[0],0))) { 1037 zeros_num -=
1038 r_zeros_number;
1039 }
1040
1041 leaf_paste_in_buffer(&bi, 0,
1042 n_shift,
1043 tb->
1044 insert_size
1045 [0] -
1046 n_rem,
1047 r_body,
1048 r_zeros_number);
1049
1050 if (is_indirect_le_ih
1051 (B_N_PITEM_HEAD
1052 (tb->R[0], 0))) {
720#if 0 1053#if 0
721 RFALSE( n_rem, 1054 RFALSE(n_rem,
722 "PAP-12160: paste more than one unformatted node pointer"); 1055 "PAP-12160: paste more than one unformatted node pointer");
723#endif 1056#endif
724 set_ih_free_space (B_N_PITEM_HEAD(tb->R[0],0), 0); 1057 set_ih_free_space
725 } 1058 (B_N_PITEM_HEAD
726 tb->insert_size[0] = n_rem; 1059 (tb->R[0], 0), 0);
727 if ( ! n_rem ) 1060 }
728 pos_in_item ++; 1061 tb->insert_size[0] = n_rem;
729 } 1062 if (!n_rem)
730 } 1063 pos_in_item++;
731 else /* pasted item in whole falls into R[0] */ 1064 }
732 { 1065 } else { /* pasted item in whole falls into R[0] */
733 struct item_head * pasted; 1066
1067 struct item_head *pasted;
1068
1069 ret_val =
1070 leaf_shift_right(tb, tb->rnum[0],
1071 tb->rbytes);
1072 /* append item in R[0] */
1073 if (pos_in_item >= 0) {
1074 bi.tb = tb;
1075 bi.bi_bh = tb->R[0];
1076 bi.bi_parent = tb->FR[0];
1077 bi.bi_position =
1078 get_right_neighbor_position
1079 (tb, 0);
1080 leaf_paste_in_buffer(&bi,
1081 item_pos -
1082 n +
1083 tb->
1084 rnum[0],
1085 pos_in_item,
1086 tb->
1087 insert_size
1088 [0], body,
1089 zeros_num);
1090 }
1091
1092 /* paste new entry, if item is directory item */
1093 pasted =
1094 B_N_PITEM_HEAD(tb->R[0],
1095 item_pos - n +
1096 tb->rnum[0]);
1097 if (is_direntry_le_ih(pasted)
1098 && pos_in_item >= 0) {
1099 leaf_paste_entries(bi.bi_bh,
1100 item_pos -
1101 n +
1102 tb->rnum[0],
1103 pos_in_item,
1104 1,
1105 (struct
1106 reiserfs_de_head
1107 *)body,
1108 body +
1109 DEH_SIZE,
1110 tb->
1111 insert_size
1112 [0]
1113 );
1114 if (!pos_in_item) {
1115
1116 RFALSE(item_pos - n +
1117 tb->rnum[0],
1118 "PAP-12165: directory item must be first item of node when pasting is in 0th position");
1119
1120 /* update delimiting keys */
1121 replace_key(tb,
1122 tb->CFR[0],
1123 tb->rkey[0],
1124 tb->R[0],
1125 0);
1126 }
1127 }
1128
1129 if (is_indirect_le_ih(pasted))
1130 set_ih_free_space(pasted, 0);
1131 zeros_num = tb->insert_size[0] = 0;
1132 }
1133 } else { /* new item doesn't fall into R[0] */
734 1134
735 ret_val = leaf_shift_right(tb,tb->rnum[0],tb->rbytes); 1135 leaf_shift_right(tb, tb->rnum[0], tb->rbytes);
736 /* append item in R[0] */
737 if ( pos_in_item >= 0 ) {
738 bi.tb = tb;
739 bi.bi_bh = tb->R[0];
740 bi.bi_parent = tb->FR[0];
741 bi.bi_position = get_right_neighbor_position (tb, 0);
742 leaf_paste_in_buffer(&bi,item_pos - n + tb->rnum[0], pos_in_item,
743 tb->insert_size[0],body, zeros_num);
744 }
745
746 /* paste new entry, if item is directory item */
747 pasted = B_N_PITEM_HEAD(tb->R[0], item_pos - n + tb->rnum[0]);
748 if (is_direntry_le_ih (pasted) && pos_in_item >= 0 ) {
749 leaf_paste_entries (
750 bi.bi_bh, item_pos - n + tb->rnum[0], pos_in_item, 1,
751 (struct reiserfs_de_head *)body, body + DEH_SIZE, tb->insert_size[0]
752 );
753 if ( ! pos_in_item ) {
754
755 RFALSE( item_pos - n + tb->rnum[0],
756 "PAP-12165: directory item must be first item of node when pasting is in 0th position");
757
758 /* update delimiting keys */
759 replace_key(tb, tb->CFR[0],tb->rkey[0],tb->R[0],0);
760 } 1136 }
761 } 1137 break;
762 1138 default: /* cases d and t */
763 if (is_indirect_le_ih (pasted)) 1139 reiserfs_panic(tb->tb_sb,
764 set_ih_free_space (pasted, 0); 1140 "PAP-12175: balance_leaf: rnum > 0: unexpectable mode: %s(%d)",
765 zeros_num = tb->insert_size[0] = 0; 1141 (flag ==
1142 M_DELETE) ? "DELETE" : ((flag ==
1143 M_CUT) ? "CUT"
1144 : "UNKNOWN"),
1145 flag);
766 } 1146 }
767 }
768 else /* new item doesn't fall into R[0] */
769 {
770 leaf_shift_right(tb,tb->rnum[0],tb->rbytes);
771 }
772 break;
773 default: /* cases d and t */
774 reiserfs_panic (tb->tb_sb, "PAP-12175: balance_leaf: rnum > 0: unexpectable mode: %s(%d)",
775 (flag == M_DELETE) ? "DELETE" : ((flag == M_CUT) ? "CUT" : "UNKNOWN"), flag);
776 }
777
778 } /* tb->rnum[0] > 0 */
779
780
781 RFALSE( tb->blknum[0] > 3,
782 "PAP-12180: blknum can not be %d. It must be <= 3", tb->blknum[0]);
783 RFALSE( tb->blknum[0] < 0,
784 "PAP-12185: blknum can not be %d. It must be >= 0", tb->blknum[0]);
785
786 /* if while adding to a node we discover that it is possible to split
787 it in two, and merge the left part into the left neighbor and the
788 right part into the right neighbor, eliminating the node */
789 if ( tb->blknum[0] == 0 ) { /* node S[0] is empty now */
790
791 RFALSE( ! tb->lnum[0] || ! tb->rnum[0],
792 "PAP-12190: lnum and rnum must not be zero");
793 /* if insertion was done before 0-th position in R[0], right
794 delimiting key of the tb->L[0]'s and left delimiting key are
795 not set correctly */
796 if (tb->CFL[0]) {
797 if (!tb->CFR[0])
798 reiserfs_panic (tb->tb_sb, "vs-12195: balance_leaf: CFR not initialized");
799 copy_key (B_N_PDELIM_KEY (tb->CFL[0], tb->lkey[0]), B_N_PDELIM_KEY (tb->CFR[0], tb->rkey[0]));
800 do_balance_mark_internal_dirty (tb, tb->CFL[0], 0);
801 }
802
803 reiserfs_invalidate_buffer(tb,tbS0);
804 return 0;
805 }
806
807
808 /* Fill new nodes that appear in place of S[0] */
809 1147
810 /* I am told that this copying is because we need an array to enable 1148 }
811 the looping code. -Hans */
812 snum[0] = tb->s1num,
813 snum[1] = tb->s2num;
814 sbytes[0] = tb->s1bytes;
815 sbytes[1] = tb->s2bytes;
816 for( i = tb->blknum[0] - 2; i >= 0; i-- ) {
817
818 RFALSE( !snum[i], "PAP-12200: snum[%d] == %d. Must be > 0", i, snum[i]);
819 1149
820 /* here we shift from S to S_new nodes */ 1150 /* tb->rnum[0] > 0 */
1151 RFALSE(tb->blknum[0] > 3,
1152 "PAP-12180: blknum can not be %d. It must be <= 3",
1153 tb->blknum[0]);
1154 RFALSE(tb->blknum[0] < 0,
1155 "PAP-12185: blknum can not be %d. It must be >= 0",
1156 tb->blknum[0]);
1157
1158 /* if while adding to a node we discover that it is possible to split
1159 it in two, and merge the left part into the left neighbor and the
1160 right part into the right neighbor, eliminating the node */
1161 if (tb->blknum[0] == 0) { /* node S[0] is empty now */
1162
1163 RFALSE(!tb->lnum[0] || !tb->rnum[0],
1164 "PAP-12190: lnum and rnum must not be zero");
1165 /* if insertion was done before 0-th position in R[0], right
1166 delimiting key of the tb->L[0]'s and left delimiting key are
1167 not set correctly */
1168 if (tb->CFL[0]) {
1169 if (!tb->CFR[0])
1170 reiserfs_panic(tb->tb_sb,
1171 "vs-12195: balance_leaf: CFR not initialized");
1172 copy_key(B_N_PDELIM_KEY(tb->CFL[0], tb->lkey[0]),
1173 B_N_PDELIM_KEY(tb->CFR[0], tb->rkey[0]));
1174 do_balance_mark_internal_dirty(tb, tb->CFL[0], 0);
1175 }
821 1176
822 S_new[i] = get_FEB(tb); 1177 reiserfs_invalidate_buffer(tb, tbS0);
1178 return 0;
1179 }
823 1180
824 /* initialized block type and tree level */ 1181 /* Fill new nodes that appear in place of S[0] */
825 set_blkh_level( B_BLK_HEAD(S_new[i]), DISK_LEAF_NODE_LEVEL ); 1182
1183 /* I am told that this copying is because we need an array to enable
1184 the looping code. -Hans */
1185 snum[0] = tb->s1num, snum[1] = tb->s2num;
1186 sbytes[0] = tb->s1bytes;
1187 sbytes[1] = tb->s2bytes;
1188 for (i = tb->blknum[0] - 2; i >= 0; i--) {
1189
1190 RFALSE(!snum[i], "PAP-12200: snum[%d] == %d. Must be > 0", i,
1191 snum[i]);
1192
1193 /* here we shift from S to S_new nodes */
1194
1195 S_new[i] = get_FEB(tb);
1196
1197 /* initialized block type and tree level */
1198 set_blkh_level(B_BLK_HEAD(S_new[i]), DISK_LEAF_NODE_LEVEL);
1199
1200 n = B_NR_ITEMS(tbS0);
1201
1202 switch (flag) {
1203 case M_INSERT: /* insert item */
1204
1205 if (n - snum[i] < item_pos) { /* new item or it's part falls to first new node S_new[i] */
1206 if (item_pos == n - snum[i] + 1 && sbytes[i] != -1) { /* part of new item falls into S_new[i] */
1207 int old_key_comp, old_len,
1208 r_zeros_number;
1209 const char *r_body;
1210 int version;
1211
1212 /* Move snum[i]-1 items from S[0] to S_new[i] */
1213 leaf_move_items(LEAF_FROM_S_TO_SNEW, tb,
1214 snum[i] - 1, -1,
1215 S_new[i]);
1216 /* Remember key component and item length */
1217 version = ih_version(ih);
1218 old_key_comp = le_ih_k_offset(ih);
1219 old_len = ih_item_len(ih);
1220
1221 /* Calculate key component and item length to insert into S_new[i] */
1222 set_le_ih_k_offset(ih,
1223 le_ih_k_offset(ih) +
1224 ((old_len -
1225 sbytes[i]) <<
1226 (is_indirect_le_ih
1227 (ih) ? tb->tb_sb->
1228 s_blocksize_bits -
1229 UNFM_P_SHIFT :
1230 0)));
1231
1232 put_ih_item_len(ih, sbytes[i]);
1233
1234 /* Insert part of the item into S_new[i] before 0-th item */
1235 bi.tb = tb;
1236 bi.bi_bh = S_new[i];
1237 bi.bi_parent = NULL;
1238 bi.bi_position = 0;
1239
1240 if ((old_len - sbytes[i]) > zeros_num) {
1241 r_zeros_number = 0;
1242 r_body =
1243 body + (old_len -
1244 sbytes[i]) -
1245 zeros_num;
1246 } else {
1247 r_body = body;
1248 r_zeros_number =
1249 zeros_num - (old_len -
1250 sbytes[i]);
1251 zeros_num -= r_zeros_number;
1252 }
1253
1254 leaf_insert_into_buf(&bi, 0, ih, r_body,
1255 r_zeros_number);
1256
1257 /* Calculate key component and item length to insert into S[i] */
1258 set_le_ih_k_offset(ih, old_key_comp);
1259 put_ih_item_len(ih,
1260 old_len - sbytes[i]);
1261 tb->insert_size[0] -= sbytes[i];
1262 } else { /* whole new item falls into S_new[i] */
1263
1264 /* Shift snum[0] - 1 items to S_new[i] (sbytes[i] of split item) */
1265 leaf_move_items(LEAF_FROM_S_TO_SNEW, tb,
1266 snum[i] - 1, sbytes[i],
1267 S_new[i]);
1268
1269 /* Insert new item into S_new[i] */
1270 bi.tb = tb;
1271 bi.bi_bh = S_new[i];
1272 bi.bi_parent = NULL;
1273 bi.bi_position = 0;
1274 leaf_insert_into_buf(&bi,
1275 item_pos - n +
1276 snum[i] - 1, ih,
1277 body, zeros_num);
1278
1279 zeros_num = tb->insert_size[0] = 0;
1280 }
1281 }
826 1282
1283 else { /* new item or it part don't falls into S_new[i] */
827 1284
828 n = B_NR_ITEMS(tbS0); 1285 leaf_move_items(LEAF_FROM_S_TO_SNEW, tb,
829 1286 snum[i], sbytes[i], S_new[i]);
830 switch (flag) {
831 case M_INSERT: /* insert item */
832
833 if ( n - snum[i] < item_pos )
834 { /* new item or it's part falls to first new node S_new[i]*/
835 if ( item_pos == n - snum[i] + 1 && sbytes[i] != -1 )
836 { /* part of new item falls into S_new[i] */
837 int old_key_comp, old_len, r_zeros_number;
838 const char * r_body;
839 int version;
840
841 /* Move snum[i]-1 items from S[0] to S_new[i] */
842 leaf_move_items (LEAF_FROM_S_TO_SNEW, tb, snum[i] - 1, -1, S_new[i]);
843 /* Remember key component and item length */
844 version = ih_version (ih);
845 old_key_comp = le_ih_k_offset( ih );
846 old_len = ih_item_len(ih);
847
848 /* Calculate key component and item length to insert into S_new[i] */
849 set_le_ih_k_offset( ih,
850 le_ih_k_offset(ih) + ((old_len - sbytes[i] )<<(is_indirect_le_ih(ih)?tb->tb_sb->s_blocksize_bits - UNFM_P_SHIFT:0)) );
851
852 put_ih_item_len( ih, sbytes[i] );
853
854 /* Insert part of the item into S_new[i] before 0-th item */
855 bi.tb = tb;
856 bi.bi_bh = S_new[i];
857 bi.bi_parent = NULL;
858 bi.bi_position = 0;
859
860 if ( (old_len - sbytes[i]) > zeros_num ) {
861 r_zeros_number = 0;
862 r_body = body + (old_len - sbytes[i]) - zeros_num;
863 }
864 else {
865 r_body = body;
866 r_zeros_number = zeros_num - (old_len - sbytes[i]);
867 zeros_num -= r_zeros_number;
868 }
869
870 leaf_insert_into_buf (&bi, 0, ih, r_body, r_zeros_number);
871
872 /* Calculate key component and item length to insert into S[i] */
873 set_le_ih_k_offset( ih, old_key_comp );
874 put_ih_item_len( ih, old_len - sbytes[i] );
875 tb->insert_size[0] -= sbytes[i];
876 }
877 else /* whole new item falls into S_new[i] */
878 {
879 /* Shift snum[0] - 1 items to S_new[i] (sbytes[i] of split item) */
880 leaf_move_items (LEAF_FROM_S_TO_SNEW, tb, snum[i] - 1, sbytes[i], S_new[i]);
881
882 /* Insert new item into S_new[i] */
883 bi.tb = tb;
884 bi.bi_bh = S_new[i];
885 bi.bi_parent = NULL;
886 bi.bi_position = 0;
887 leaf_insert_into_buf (&bi, item_pos - n + snum[i] - 1, ih, body, zeros_num);
888
889 zeros_num = tb->insert_size[0] = 0;
890 }
891 }
892
893 else /* new item or it part don't falls into S_new[i] */
894 {
895 leaf_move_items (LEAF_FROM_S_TO_SNEW, tb, snum[i], sbytes[i], S_new[i]);
896 }
897 break;
898
899 case M_PASTE: /* append item */
900
901 if ( n - snum[i] <= item_pos ) /* pasted item or part if it falls to S_new[i] */
902 {
903 if ( item_pos == n - snum[i] && sbytes[i] != -1 )
904 { /* we must shift part of the appended item */
905 struct item_head * aux_ih;
906
907 RFALSE( ih, "PAP-12210: ih must be 0");
908
909 if ( is_direntry_le_ih (aux_ih = B_N_PITEM_HEAD(tbS0,item_pos))) {
910 /* we append to directory item */
911
912 int entry_count;
913
914 entry_count = ih_entry_count(aux_ih);
915
916 if ( entry_count - sbytes[i] < pos_in_item && pos_in_item <= entry_count ) {
917 /* new directory entry falls into S_new[i] */
918
919 RFALSE( ! tb->insert_size[0],
920 "PAP-12215: insert_size is already 0");
921 RFALSE( sbytes[i] - 1 >= entry_count,
922 "PAP-12220: there are no so much entries (%d), only %d",
923 sbytes[i] - 1, entry_count);
924
925 /* Shift snum[i]-1 items in whole. Shift sbytes[i] directory entries from directory item number snum[i] */
926 leaf_move_items (LEAF_FROM_S_TO_SNEW, tb, snum[i], sbytes[i]-1, S_new[i]);
927 /* Paste given directory entry to directory item */
928 bi.tb = tb;
929 bi.bi_bh = S_new[i];
930 bi.bi_parent = NULL;
931 bi.bi_position = 0;
932 leaf_paste_in_buffer (&bi, 0, pos_in_item - entry_count + sbytes[i] - 1,
933 tb->insert_size[0], body,zeros_num);
934 /* paste new directory entry */
935 leaf_paste_entries (
936 bi.bi_bh, 0, pos_in_item - entry_count + sbytes[i] - 1,
937 1, (struct reiserfs_de_head *)body, body + DEH_SIZE,
938 tb->insert_size[0]
939 );
940 tb->insert_size[0] = 0;
941 pos_in_item++;
942 } else { /* new directory entry doesn't fall into S_new[i] */
943 leaf_move_items (LEAF_FROM_S_TO_SNEW, tb, snum[i], sbytes[i], S_new[i]);
944 } 1287 }
945 } 1288 break;
946 else /* regular object */ 1289
947 { 1290 case M_PASTE: /* append item */
948 int n_shift, n_rem, r_zeros_number; 1291
949 const char * r_body; 1292 if (n - snum[i] <= item_pos) { /* pasted item or part if it falls to S_new[i] */
950 1293 if (item_pos == n - snum[i] && sbytes[i] != -1) { /* we must shift part of the appended item */
951 RFALSE( pos_in_item != ih_item_len(B_N_PITEM_HEAD(tbS0,item_pos)) || 1294 struct item_head *aux_ih;
952 tb->insert_size[0] <= 0, 1295
953 "PAP-12225: item too short or insert_size <= 0"); 1296 RFALSE(ih, "PAP-12210: ih must be 0");
954 1297
955 /* Calculate number of bytes which must be shifted from appended item */ 1298 if (is_direntry_le_ih
956 n_shift = sbytes[i] - tb->insert_size[0]; 1299 (aux_ih =
957 if ( n_shift < 0 ) 1300 B_N_PITEM_HEAD(tbS0, item_pos))) {
958 n_shift = 0; 1301 /* we append to directory item */
959 leaf_move_items (LEAF_FROM_S_TO_SNEW, tb, snum[i], n_shift, S_new[i]); 1302
960 1303 int entry_count;
961 /* Calculate number of bytes which must remain in body after append to S_new[i] */ 1304
962 n_rem = tb->insert_size[0] - sbytes[i]; 1305 entry_count =
963 if ( n_rem < 0 ) 1306 ih_entry_count(aux_ih);
964 n_rem = 0; 1307
965 /* Append part of body into S_new[0] */ 1308 if (entry_count - sbytes[i] <
966 bi.tb = tb; 1309 pos_in_item
967 bi.bi_bh = S_new[i]; 1310 && pos_in_item <=
968 bi.bi_parent = NULL; 1311 entry_count) {
969 bi.bi_position = 0; 1312 /* new directory entry falls into S_new[i] */
1313
1314 RFALSE(!tb->
1315 insert_size[0],
1316 "PAP-12215: insert_size is already 0");
1317 RFALSE(sbytes[i] - 1 >=
1318 entry_count,
1319 "PAP-12220: there are no so much entries (%d), only %d",
1320 sbytes[i] - 1,
1321 entry_count);
1322
1323 /* Shift snum[i]-1 items in whole. Shift sbytes[i] directory entries from directory item number snum[i] */
1324 leaf_move_items
1325 (LEAF_FROM_S_TO_SNEW,
1326 tb, snum[i],
1327 sbytes[i] - 1,
1328 S_new[i]);
1329 /* Paste given directory entry to directory item */
1330 bi.tb = tb;
1331 bi.bi_bh = S_new[i];
1332 bi.bi_parent = NULL;
1333 bi.bi_position = 0;
1334 leaf_paste_in_buffer
1335 (&bi, 0,
1336 pos_in_item -
1337 entry_count +
1338 sbytes[i] - 1,
1339 tb->insert_size[0],
1340 body, zeros_num);
1341 /* paste new directory entry */
1342 leaf_paste_entries(bi.
1343 bi_bh,
1344 0,
1345 pos_in_item
1346 -
1347 entry_count
1348 +
1349 sbytes
1350 [i] -
1351 1, 1,
1352 (struct
1353 reiserfs_de_head
1354 *)
1355 body,
1356 body
1357 +
1358 DEH_SIZE,
1359 tb->
1360 insert_size
1361 [0]
1362 );
1363 tb->insert_size[0] = 0;
1364 pos_in_item++;
1365 } else { /* new directory entry doesn't fall into S_new[i] */
1366 leaf_move_items
1367 (LEAF_FROM_S_TO_SNEW,
1368 tb, snum[i],
1369 sbytes[i],
1370 S_new[i]);
1371 }
1372 } else { /* regular object */
1373
1374 int n_shift, n_rem,
1375 r_zeros_number;
1376 const char *r_body;
1377
1378 RFALSE(pos_in_item !=
1379 ih_item_len
1380 (B_N_PITEM_HEAD
1381 (tbS0, item_pos))
1382 || tb->insert_size[0] <=
1383 0,
1384 "PAP-12225: item too short or insert_size <= 0");
1385
1386 /* Calculate number of bytes which must be shifted from appended item */
1387 n_shift =
1388 sbytes[i] -
1389 tb->insert_size[0];
1390 if (n_shift < 0)
1391 n_shift = 0;
1392 leaf_move_items
1393 (LEAF_FROM_S_TO_SNEW, tb,
1394 snum[i], n_shift,
1395 S_new[i]);
1396
1397 /* Calculate number of bytes which must remain in body after append to S_new[i] */
1398 n_rem =
1399 tb->insert_size[0] -
1400 sbytes[i];
1401 if (n_rem < 0)
1402 n_rem = 0;
1403 /* Append part of body into S_new[0] */
1404 bi.tb = tb;
1405 bi.bi_bh = S_new[i];
1406 bi.bi_parent = NULL;
1407 bi.bi_position = 0;
1408
1409 if (n_rem > zeros_num) {
1410 r_zeros_number = 0;
1411 r_body =
1412 body + n_rem -
1413 zeros_num;
1414 } else {
1415 r_body = body;
1416 r_zeros_number =
1417 zeros_num - n_rem;
1418 zeros_num -=
1419 r_zeros_number;
1420 }
1421
1422 leaf_paste_in_buffer(&bi, 0,
1423 n_shift,
1424 tb->
1425 insert_size
1426 [0] -
1427 n_rem,
1428 r_body,
1429 r_zeros_number);
1430 {
1431 struct item_head *tmp;
1432
1433 tmp =
1434 B_N_PITEM_HEAD(S_new
1435 [i],
1436 0);
1437 if (is_indirect_le_ih
1438 (tmp)) {
1439 set_ih_free_space
1440 (tmp, 0);
1441 set_le_ih_k_offset
1442 (tmp,
1443 le_ih_k_offset
1444 (tmp) +
1445 (n_rem <<
1446 (tb->
1447 tb_sb->
1448 s_blocksize_bits
1449 -
1450 UNFM_P_SHIFT)));
1451 } else {
1452 set_le_ih_k_offset
1453 (tmp,
1454 le_ih_k_offset
1455 (tmp) +
1456 n_rem);
1457 }
1458 }
1459
1460 tb->insert_size[0] = n_rem;
1461 if (!n_rem)
1462 pos_in_item++;
1463 }
1464 } else
1465 /* item falls wholly into S_new[i] */
1466 {
1467 int ret_val;
1468 struct item_head *pasted;
970 1469
971 if ( n_rem > zeros_num ) { 1470#ifdef CONFIG_REISERFS_CHECK
972 r_zeros_number = 0; 1471 struct item_head *ih =
973 r_body = body + n_rem - zeros_num; 1472 B_N_PITEM_HEAD(tbS0, item_pos);
974 } 1473
975 else { 1474 if (!is_direntry_le_ih(ih)
976 r_body = body; 1475 && (pos_in_item != ih_item_len(ih)
977 r_zeros_number = zeros_num - n_rem; 1476 || tb->insert_size[0] <= 0))
978 zeros_num -= r_zeros_number; 1477 reiserfs_panic(tb->tb_sb,
1478 "PAP-12235: balance_leaf: pos_in_item must be equal to ih_item_len");
1479#endif /* CONFIG_REISERFS_CHECK */
1480
1481 ret_val =
1482 leaf_move_items(LEAF_FROM_S_TO_SNEW,
1483 tb, snum[i],
1484 sbytes[i],
1485 S_new[i]);
1486
1487 RFALSE(ret_val,
1488 "PAP-12240: unexpected value returned by leaf_move_items (%d)",
1489 ret_val);
1490
1491 /* paste into item */
1492 bi.tb = tb;
1493 bi.bi_bh = S_new[i];
1494 bi.bi_parent = NULL;
1495 bi.bi_position = 0;
1496 leaf_paste_in_buffer(&bi,
1497 item_pos - n +
1498 snum[i],
1499 pos_in_item,
1500 tb->insert_size[0],
1501 body, zeros_num);
1502
1503 pasted =
1504 B_N_PITEM_HEAD(S_new[i],
1505 item_pos - n +
1506 snum[i]);
1507 if (is_direntry_le_ih(pasted)) {
1508 leaf_paste_entries(bi.bi_bh,
1509 item_pos -
1510 n + snum[i],
1511 pos_in_item,
1512 1,
1513 (struct
1514 reiserfs_de_head
1515 *)body,
1516 body +
1517 DEH_SIZE,
1518 tb->
1519 insert_size
1520 [0]
1521 );
1522 }
1523
1524 /* if we paste to indirect item update ih_free_space */
1525 if (is_indirect_le_ih(pasted))
1526 set_ih_free_space(pasted, 0);
1527 zeros_num = tb->insert_size[0] = 0;
1528 }
979 } 1529 }
980 1530
981 leaf_paste_in_buffer(&bi, 0, n_shift, tb->insert_size[0]-n_rem, r_body,r_zeros_number); 1531 else { /* pasted item doesn't fall into S_new[i] */
982 {
983 struct item_head * tmp;
984
985 tmp = B_N_PITEM_HEAD(S_new[i],0);
986 if (is_indirect_le_ih (tmp)) {
987 set_ih_free_space (tmp, 0);
988 set_le_ih_k_offset( tmp, le_ih_k_offset(tmp) +
989 (n_rem << (tb->tb_sb->s_blocksize_bits - UNFM_P_SHIFT)));
990 } else {
991 set_le_ih_k_offset( tmp, le_ih_k_offset(tmp) +
992 n_rem );
993 }
994 }
995 1532
996 tb->insert_size[0] = n_rem; 1533 leaf_move_items(LEAF_FROM_S_TO_SNEW, tb,
997 if ( ! n_rem ) 1534 snum[i], sbytes[i], S_new[i]);
998 pos_in_item++; 1535 }
999 } 1536 break;
1537 default: /* cases d and t */
1538 reiserfs_panic(tb->tb_sb,
1539 "PAP-12245: balance_leaf: blknum > 2: unexpectable mode: %s(%d)",
1540 (flag ==
1541 M_DELETE) ? "DELETE" : ((flag ==
1542 M_CUT) ? "CUT"
1543 : "UNKNOWN"),
1544 flag);
1000 } 1545 }
1001 else
1002 /* item falls wholly into S_new[i] */
1003 {
1004 int ret_val;
1005 struct item_head * pasted;
1006 1546
1007#ifdef CONFIG_REISERFS_CHECK 1547 memcpy(insert_key + i, B_N_PKEY(S_new[i], 0), KEY_SIZE);
1008 struct item_head * ih = B_N_PITEM_HEAD(tbS0,item_pos); 1548 insert_ptr[i] = S_new[i];
1009 1549
1010 if ( ! is_direntry_le_ih(ih) && (pos_in_item != ih_item_len(ih) || 1550 RFALSE(!buffer_journaled(S_new[i])
1011 tb->insert_size[0] <= 0) ) 1551 || buffer_journal_dirty(S_new[i])
1012 reiserfs_panic (tb->tb_sb, "PAP-12235: balance_leaf: pos_in_item must be equal to ih_item_len"); 1552 || buffer_dirty(S_new[i]), "PAP-12247: S_new[%d] : (%b)",
1013#endif /* CONFIG_REISERFS_CHECK */ 1553 i, S_new[i]);
1014
1015 ret_val = leaf_move_items (LEAF_FROM_S_TO_SNEW, tb, snum[i], sbytes[i], S_new[i]);
1016
1017 RFALSE( ret_val,
1018 "PAP-12240: unexpected value returned by leaf_move_items (%d)",
1019 ret_val);
1020
1021 /* paste into item */
1022 bi.tb = tb;
1023 bi.bi_bh = S_new[i];
1024 bi.bi_parent = NULL;
1025 bi.bi_position = 0;
1026 leaf_paste_in_buffer(&bi, item_pos - n + snum[i], pos_in_item, tb->insert_size[0], body, zeros_num);
1027
1028 pasted = B_N_PITEM_HEAD(S_new[i], item_pos - n + snum[i]);
1029 if (is_direntry_le_ih (pasted))
1030 {
1031 leaf_paste_entries (
1032 bi.bi_bh, item_pos - n + snum[i], pos_in_item, 1,
1033 (struct reiserfs_de_head *)body, body + DEH_SIZE, tb->insert_size[0]
1034 );
1035 }
1036
1037 /* if we paste to indirect item update ih_free_space */
1038 if (is_indirect_le_ih (pasted))
1039 set_ih_free_space (pasted, 0);
1040 zeros_num = tb->insert_size[0] = 0;
1041 }
1042 }
1043
1044 else /* pasted item doesn't fall into S_new[i] */
1045 {
1046 leaf_move_items (LEAF_FROM_S_TO_SNEW, tb, snum[i], sbytes[i], S_new[i]);
1047 }
1048 break;
1049 default: /* cases d and t */
1050 reiserfs_panic (tb->tb_sb, "PAP-12245: balance_leaf: blknum > 2: unexpectable mode: %s(%d)",
1051 (flag == M_DELETE) ? "DELETE" : ((flag == M_CUT) ? "CUT" : "UNKNOWN"), flag);
1052 } 1554 }
1053 1555
1054 memcpy (insert_key + i,B_N_PKEY(S_new[i],0),KEY_SIZE); 1556 /* if the affected item was not wholly shifted then we perform all necessary operations on that part or whole of the
1055 insert_ptr[i] = S_new[i]; 1557 affected item which remains in S */
1056 1558 if (0 <= item_pos && item_pos < tb->s0num) { /* if we must insert or append into buffer S[0] */
1057 RFALSE (!buffer_journaled (S_new [i]) || buffer_journal_dirty (S_new [i]) || 1559
1058 buffer_dirty (S_new [i]), 1560 switch (flag) {
1059 "PAP-12247: S_new[%d] : (%b)", i, S_new[i]); 1561 case M_INSERT: /* insert item into S[0] */
1060 } 1562 bi.tb = tb;
1061 1563 bi.bi_bh = tbS0;
1062 /* if the affected item was not wholly shifted then we perform all necessary operations on that part or whole of the 1564 bi.bi_parent = PATH_H_PPARENT(tb->tb_path, 0);
1063 affected item which remains in S */ 1565 bi.bi_position = PATH_H_POSITION(tb->tb_path, 1);
1064 if ( 0 <= item_pos && item_pos < tb->s0num ) 1566 leaf_insert_into_buf(&bi, item_pos, ih, body,
1065 { /* if we must insert or append into buffer S[0] */ 1567 zeros_num);
1066 1568
1067 switch (flag) 1569 /* If we insert the first key change the delimiting key */
1068 { 1570 if (item_pos == 0) {
1069 case M_INSERT: /* insert item into S[0] */ 1571 if (tb->CFL[0]) /* can be 0 in reiserfsck */
1070 bi.tb = tb; 1572 replace_key(tb, tb->CFL[0], tb->lkey[0],
1071 bi.bi_bh = tbS0; 1573 tbS0, 0);
1072 bi.bi_parent = PATH_H_PPARENT (tb->tb_path, 0);
1073 bi.bi_position = PATH_H_POSITION (tb->tb_path, 1);
1074 leaf_insert_into_buf (&bi, item_pos, ih, body, zeros_num);
1075
1076 /* If we insert the first key change the delimiting key */
1077 if( item_pos == 0 ) {
1078 if (tb->CFL[0]) /* can be 0 in reiserfsck */
1079 replace_key(tb, tb->CFL[0], tb->lkey[0],tbS0,0);
1080
1081 }
1082 break;
1083
1084 case M_PASTE: { /* append item in S[0] */
1085 struct item_head * pasted;
1086
1087 pasted = B_N_PITEM_HEAD (tbS0, item_pos);
1088 /* when directory, may be new entry already pasted */
1089 if (is_direntry_le_ih (pasted)) {
1090 if ( pos_in_item >= 0 &&
1091 pos_in_item <= ih_entry_count(pasted) ) {
1092
1093 RFALSE( ! tb->insert_size[0],
1094 "PAP-12260: insert_size is 0 already");
1095
1096 /* prepare space */
1097 bi.tb = tb;
1098 bi.bi_bh = tbS0;
1099 bi.bi_parent = PATH_H_PPARENT (tb->tb_path, 0);
1100 bi.bi_position = PATH_H_POSITION (tb->tb_path, 1);
1101 leaf_paste_in_buffer(&bi, item_pos, pos_in_item, tb->insert_size[0], body, zeros_num);
1102
1103 /* paste entry */
1104 leaf_paste_entries (
1105 bi.bi_bh, item_pos, pos_in_item, 1, (struct reiserfs_de_head *)body,
1106 body + DEH_SIZE, tb->insert_size[0]
1107 );
1108 if ( ! item_pos && ! pos_in_item ) {
1109 RFALSE( !tb->CFL[0] || !tb->L[0],
1110 "PAP-12270: CFL[0]/L[0] must be specified");
1111 if (tb->CFL[0]) {
1112 replace_key(tb, tb->CFL[0], tb->lkey[0],tbS0,0);
1113 1574
1114 } 1575 }
1115 } 1576 break;
1116 tb->insert_size[0] = 0; 1577
1117 } 1578 case M_PASTE:{ /* append item in S[0] */
1118 } else { /* regular object */ 1579 struct item_head *pasted;
1119 if ( pos_in_item == ih_item_len(pasted) ) { 1580
1120 1581 pasted = B_N_PITEM_HEAD(tbS0, item_pos);
1121 RFALSE( tb->insert_size[0] <= 0, 1582 /* when directory, may be new entry already pasted */
1122 "PAP-12275: insert size must not be %d", 1583 if (is_direntry_le_ih(pasted)) {
1123 tb->insert_size[0]); 1584 if (pos_in_item >= 0 &&
1124 bi.tb = tb; 1585 pos_in_item <=
1125 bi.bi_bh = tbS0; 1586 ih_entry_count(pasted)) {
1126 bi.bi_parent = PATH_H_PPARENT (tb->tb_path, 0); 1587
1127 bi.bi_position = PATH_H_POSITION (tb->tb_path, 1); 1588 RFALSE(!tb->insert_size[0],
1128 leaf_paste_in_buffer (&bi, item_pos, pos_in_item, tb->insert_size[0], body, zeros_num); 1589 "PAP-12260: insert_size is 0 already");
1129 1590
1130 if (is_indirect_le_ih (pasted)) { 1591 /* prepare space */
1592 bi.tb = tb;
1593 bi.bi_bh = tbS0;
1594 bi.bi_parent =
1595 PATH_H_PPARENT(tb->tb_path,
1596 0);
1597 bi.bi_position =
1598 PATH_H_POSITION(tb->tb_path,
1599 1);
1600 leaf_paste_in_buffer(&bi,
1601 item_pos,
1602 pos_in_item,
1603 tb->
1604 insert_size
1605 [0], body,
1606 zeros_num);
1607
1608 /* paste entry */
1609 leaf_paste_entries(bi.bi_bh,
1610 item_pos,
1611 pos_in_item,
1612 1,
1613 (struct
1614 reiserfs_de_head
1615 *)body,
1616 body +
1617 DEH_SIZE,
1618 tb->
1619 insert_size
1620 [0]
1621 );
1622 if (!item_pos && !pos_in_item) {
1623 RFALSE(!tb->CFL[0]
1624 || !tb->L[0],
1625 "PAP-12270: CFL[0]/L[0] must be specified");
1626 if (tb->CFL[0]) {
1627 replace_key(tb,
1628 tb->
1629 CFL
1630 [0],
1631 tb->
1632 lkey
1633 [0],
1634 tbS0,
1635 0);
1636
1637 }
1638 }
1639 tb->insert_size[0] = 0;
1640 }
1641 } else { /* regular object */
1642 if (pos_in_item == ih_item_len(pasted)) {
1643
1644 RFALSE(tb->insert_size[0] <= 0,
1645 "PAP-12275: insert size must not be %d",
1646 tb->insert_size[0]);
1647 bi.tb = tb;
1648 bi.bi_bh = tbS0;
1649 bi.bi_parent =
1650 PATH_H_PPARENT(tb->tb_path,
1651 0);
1652 bi.bi_position =
1653 PATH_H_POSITION(tb->tb_path,
1654 1);
1655 leaf_paste_in_buffer(&bi,
1656 item_pos,
1657 pos_in_item,
1658 tb->
1659 insert_size
1660 [0], body,
1661 zeros_num);
1662
1663 if (is_indirect_le_ih(pasted)) {
1131#if 0 1664#if 0
1132 RFALSE( tb->insert_size[0] != UNFM_P_SIZE, 1665 RFALSE(tb->
1133 "PAP-12280: insert_size for indirect item must be %d, not %d", 1666 insert_size[0] !=
1134 UNFM_P_SIZE, tb->insert_size[0]); 1667 UNFM_P_SIZE,
1668 "PAP-12280: insert_size for indirect item must be %d, not %d",
1669 UNFM_P_SIZE,
1670 tb->
1671 insert_size[0]);
1135#endif 1672#endif
1136 set_ih_free_space (pasted, 0); 1673 set_ih_free_space
1137 } 1674 (pasted, 0);
1138 tb->insert_size[0] = 0; 1675 }
1139 } 1676 tb->insert_size[0] = 0;
1140 1677 }
1141#ifdef CONFIG_REISERFS_CHECK 1678#ifdef CONFIG_REISERFS_CHECK
1142 else { 1679 else {
1143 if ( tb->insert_size[0] ) { 1680 if (tb->insert_size[0]) {
1144 print_cur_tb ("12285"); 1681 print_cur_tb("12285");
1145 reiserfs_panic (tb->tb_sb, "PAP-12285: balance_leaf: insert_size must be 0 (%d)", tb->insert_size[0]); 1682 reiserfs_panic(tb->
1146 } 1683 tb_sb,
1684 "PAP-12285: balance_leaf: insert_size must be 0 (%d)",
1685 tb->
1686 insert_size
1687 [0]);
1688 }
1689 }
1690#endif /* CONFIG_REISERFS_CHECK */
1691
1692 }
1693 } /* case M_PASTE: */
1147 } 1694 }
1148#endif /* CONFIG_REISERFS_CHECK */
1149
1150 }
1151 } /* case M_PASTE: */
1152 } 1695 }
1153 }
1154
1155#ifdef CONFIG_REISERFS_CHECK 1696#ifdef CONFIG_REISERFS_CHECK
1156 if ( flag == M_PASTE && tb->insert_size[0] ) { 1697 if (flag == M_PASTE && tb->insert_size[0]) {
1157 print_cur_tb ("12290"); 1698 print_cur_tb("12290");
1158 reiserfs_panic (tb->tb_sb, "PAP-12290: balance_leaf: insert_size is still not 0 (%d)", tb->insert_size[0]); 1699 reiserfs_panic(tb->tb_sb,
1159 } 1700 "PAP-12290: balance_leaf: insert_size is still not 0 (%d)",
1160#endif /* CONFIG_REISERFS_CHECK */ 1701 tb->insert_size[0]);
1161 1702 }
1162 return 0; 1703#endif /* CONFIG_REISERFS_CHECK */
1163} /* Leaf level of the tree is balanced (end of balance_leaf) */
1164
1165 1704
1705 return 0;
1706} /* Leaf level of the tree is balanced (end of balance_leaf) */
1166 1707
1167/* Make empty node */ 1708/* Make empty node */
1168void make_empty_node (struct buffer_info * bi) 1709void make_empty_node(struct buffer_info *bi)
1169{ 1710{
1170 struct block_head * blkh; 1711 struct block_head *blkh;
1171 1712
1172 RFALSE( bi->bi_bh == NULL, "PAP-12295: pointer to the buffer is NULL"); 1713 RFALSE(bi->bi_bh == NULL, "PAP-12295: pointer to the buffer is NULL");
1173 1714
1174 blkh = B_BLK_HEAD(bi->bi_bh); 1715 blkh = B_BLK_HEAD(bi->bi_bh);
1175 set_blkh_nr_item( blkh, 0 ); 1716 set_blkh_nr_item(blkh, 0);
1176 set_blkh_free_space( blkh, MAX_CHILD_SIZE(bi->bi_bh) ); 1717 set_blkh_free_space(blkh, MAX_CHILD_SIZE(bi->bi_bh));
1177 1718
1178 if (bi->bi_parent) 1719 if (bi->bi_parent)
1179 B_N_CHILD (bi->bi_parent, bi->bi_position)->dc_size = 0; /* Endian safe if 0 */ 1720 B_N_CHILD(bi->bi_parent, bi->bi_position)->dc_size = 0; /* Endian safe if 0 */
1180} 1721}
1181 1722
1182
1183/* Get first empty buffer */ 1723/* Get first empty buffer */
1184struct buffer_head * get_FEB (struct tree_balance * tb) 1724struct buffer_head *get_FEB(struct tree_balance *tb)
1185{ 1725{
1186 int i; 1726 int i;
1187 struct buffer_head * first_b; 1727 struct buffer_head *first_b;
1188 struct buffer_info bi; 1728 struct buffer_info bi;
1189
1190 for (i = 0; i < MAX_FEB_SIZE; i ++)
1191 if (tb->FEB[i] != 0)
1192 break;
1193
1194 if (i == MAX_FEB_SIZE)
1195 reiserfs_panic(tb->tb_sb, "vs-12300: get_FEB: FEB list is empty");
1196
1197 bi.tb = tb;
1198 bi.bi_bh = first_b = tb->FEB[i];
1199 bi.bi_parent = NULL;
1200 bi.bi_position = 0;
1201 make_empty_node (&bi);
1202 set_buffer_uptodate(first_b);
1203 tb->FEB[i] = NULL;
1204 tb->used[i] = first_b;
1205
1206 return(first_b);
1207}
1208 1729
1730 for (i = 0; i < MAX_FEB_SIZE; i++)
1731 if (tb->FEB[i] != 0)
1732 break;
1733
1734 if (i == MAX_FEB_SIZE)
1735 reiserfs_panic(tb->tb_sb,
1736 "vs-12300: get_FEB: FEB list is empty");
1737
1738 bi.tb = tb;
1739 bi.bi_bh = first_b = tb->FEB[i];
1740 bi.bi_parent = NULL;
1741 bi.bi_position = 0;
1742 make_empty_node(&bi);
1743 set_buffer_uptodate(first_b);
1744 tb->FEB[i] = NULL;
1745 tb->used[i] = first_b;
1746
1747 return (first_b);
1748}
1209 1749
1210/* This is now used because reiserfs_free_block has to be able to 1750/* This is now used because reiserfs_free_block has to be able to
1211** schedule. 1751** schedule.
1212*/ 1752*/
1213static void store_thrown (struct tree_balance * tb, struct buffer_head * bh) 1753static void store_thrown(struct tree_balance *tb, struct buffer_head *bh)
1214{ 1754{
1215 int i; 1755 int i;
1216 1756
1217 if (buffer_dirty (bh)) 1757 if (buffer_dirty(bh))
1218 reiserfs_warning (tb->tb_sb, "store_thrown deals with dirty buffer"); 1758 reiserfs_warning(tb->tb_sb,
1219 for (i = 0; i < sizeof (tb->thrown)/sizeof (tb->thrown[0]); i ++) 1759 "store_thrown deals with dirty buffer");
1220 if (!tb->thrown[i]) { 1760 for (i = 0; i < sizeof(tb->thrown) / sizeof(tb->thrown[0]); i++)
1221 tb->thrown[i] = bh; 1761 if (!tb->thrown[i]) {
1222 get_bh(bh) ; /* free_thrown puts this */ 1762 tb->thrown[i] = bh;
1223 return; 1763 get_bh(bh); /* free_thrown puts this */
1224 } 1764 return;
1225 reiserfs_warning (tb->tb_sb, "store_thrown: too many thrown buffers"); 1765 }
1766 reiserfs_warning(tb->tb_sb, "store_thrown: too many thrown buffers");
1226} 1767}
1227 1768
1228static void free_thrown(struct tree_balance *tb) { 1769static void free_thrown(struct tree_balance *tb)
1229 int i ; 1770{
1230 b_blocknr_t blocknr ; 1771 int i;
1231 for (i = 0; i < sizeof (tb->thrown)/sizeof (tb->thrown[0]); i++) { 1772 b_blocknr_t blocknr;
1232 if (tb->thrown[i]) { 1773 for (i = 0; i < sizeof(tb->thrown) / sizeof(tb->thrown[0]); i++) {
1233 blocknr = tb->thrown[i]->b_blocknr ; 1774 if (tb->thrown[i]) {
1234 if (buffer_dirty (tb->thrown[i])) 1775 blocknr = tb->thrown[i]->b_blocknr;
1235 reiserfs_warning (tb->tb_sb, 1776 if (buffer_dirty(tb->thrown[i]))
1236 "free_thrown deals with dirty buffer %d", 1777 reiserfs_warning(tb->tb_sb,
1237 blocknr); 1778 "free_thrown deals with dirty buffer %d",
1238 brelse(tb->thrown[i]) ; /* incremented in store_thrown */ 1779 blocknr);
1239 reiserfs_free_block (tb->transaction_handle, NULL, blocknr, 0); 1780 brelse(tb->thrown[i]); /* incremented in store_thrown */
1781 reiserfs_free_block(tb->transaction_handle, NULL,
1782 blocknr, 0);
1783 }
1240 } 1784 }
1241 }
1242} 1785}
1243 1786
1244void reiserfs_invalidate_buffer (struct tree_balance * tb, struct buffer_head * bh) 1787void reiserfs_invalidate_buffer(struct tree_balance *tb, struct buffer_head *bh)
1245{ 1788{
1246 struct block_head *blkh; 1789 struct block_head *blkh;
1247 blkh = B_BLK_HEAD(bh); 1790 blkh = B_BLK_HEAD(bh);
1248 set_blkh_level( blkh, FREE_LEVEL ); 1791 set_blkh_level(blkh, FREE_LEVEL);
1249 set_blkh_nr_item( blkh, 0 ); 1792 set_blkh_nr_item(blkh, 0);
1250 1793
1251 clear_buffer_dirty(bh); 1794 clear_buffer_dirty(bh);
1252 store_thrown (tb, bh); 1795 store_thrown(tb, bh);
1253} 1796}
1254 1797
1255/* Replace n_dest'th key in buffer dest by n_src'th key of buffer src.*/ 1798/* Replace n_dest'th key in buffer dest by n_src'th key of buffer src.*/
1256void replace_key (struct tree_balance * tb, struct buffer_head * dest, int n_dest, 1799void replace_key(struct tree_balance *tb, struct buffer_head *dest, int n_dest,
1257 struct buffer_head * src, int n_src) 1800 struct buffer_head *src, int n_src)
1258{ 1801{
1259 1802
1260 RFALSE( dest == NULL || src == NULL, 1803 RFALSE(dest == NULL || src == NULL,
1261 "vs-12305: source or destination buffer is 0 (src=%p, dest=%p)", 1804 "vs-12305: source or destination buffer is 0 (src=%p, dest=%p)",
1262 src, dest); 1805 src, dest);
1263 RFALSE( ! B_IS_KEYS_LEVEL (dest), 1806 RFALSE(!B_IS_KEYS_LEVEL(dest),
1264 "vs-12310: invalid level (%z) for destination buffer. dest must be leaf", 1807 "vs-12310: invalid level (%z) for destination buffer. dest must be leaf",
1265 dest); 1808 dest);
1266 RFALSE( n_dest < 0 || n_src < 0, 1809 RFALSE(n_dest < 0 || n_src < 0,
1267 "vs-12315: src(%d) or dest(%d) key number < 0", n_src, n_dest); 1810 "vs-12315: src(%d) or dest(%d) key number < 0", n_src, n_dest);
1268 RFALSE( n_dest >= B_NR_ITEMS(dest) || n_src >= B_NR_ITEMS(src), 1811 RFALSE(n_dest >= B_NR_ITEMS(dest) || n_src >= B_NR_ITEMS(src),
1269 "vs-12320: src(%d(%d)) or dest(%d(%d)) key number is too big", 1812 "vs-12320: src(%d(%d)) or dest(%d(%d)) key number is too big",
1270 n_src, B_NR_ITEMS(src), n_dest, B_NR_ITEMS(dest)); 1813 n_src, B_NR_ITEMS(src), n_dest, B_NR_ITEMS(dest));
1271 1814
1272 if (B_IS_ITEMS_LEVEL (src)) 1815 if (B_IS_ITEMS_LEVEL(src))
1273 /* source buffer contains leaf node */ 1816 /* source buffer contains leaf node */
1274 memcpy (B_N_PDELIM_KEY(dest,n_dest), B_N_PITEM_HEAD(src,n_src), KEY_SIZE); 1817 memcpy(B_N_PDELIM_KEY(dest, n_dest), B_N_PITEM_HEAD(src, n_src),
1275 else 1818 KEY_SIZE);
1276 memcpy (B_N_PDELIM_KEY(dest,n_dest), B_N_PDELIM_KEY(src,n_src), KEY_SIZE); 1819 else
1277 1820 memcpy(B_N_PDELIM_KEY(dest, n_dest), B_N_PDELIM_KEY(src, n_src),
1278 do_balance_mark_internal_dirty (tb, dest, 0); 1821 KEY_SIZE);
1822
1823 do_balance_mark_internal_dirty(tb, dest, 0);
1279} 1824}
1280 1825
1281 1826int get_left_neighbor_position(struct tree_balance *tb, int h)
1282int get_left_neighbor_position (
1283 struct tree_balance * tb,
1284 int h
1285 )
1286{ 1827{
1287 int Sh_position = PATH_H_POSITION (tb->tb_path, h + 1); 1828 int Sh_position = PATH_H_POSITION(tb->tb_path, h + 1);
1288 1829
1289 RFALSE( PATH_H_PPARENT (tb->tb_path, h) == 0 || tb->FL[h] == 0, 1830 RFALSE(PATH_H_PPARENT(tb->tb_path, h) == 0 || tb->FL[h] == 0,
1290 "vs-12325: FL[%d](%p) or F[%d](%p) does not exist", 1831 "vs-12325: FL[%d](%p) or F[%d](%p) does not exist",
1291 h, tb->FL[h], h, PATH_H_PPARENT (tb->tb_path, h)); 1832 h, tb->FL[h], h, PATH_H_PPARENT(tb->tb_path, h));
1292 1833
1293 if (Sh_position == 0) 1834 if (Sh_position == 0)
1294 return B_NR_ITEMS (tb->FL[h]); 1835 return B_NR_ITEMS(tb->FL[h]);
1295 else 1836 else
1296 return Sh_position - 1; 1837 return Sh_position - 1;
1297} 1838}
1298 1839
1299 1840int get_right_neighbor_position(struct tree_balance *tb, int h)
1300int get_right_neighbor_position (struct tree_balance * tb, int h)
1301{ 1841{
1302 int Sh_position = PATH_H_POSITION (tb->tb_path, h + 1); 1842 int Sh_position = PATH_H_POSITION(tb->tb_path, h + 1);
1303 1843
1304 RFALSE( PATH_H_PPARENT (tb->tb_path, h) == 0 || tb->FR[h] == 0, 1844 RFALSE(PATH_H_PPARENT(tb->tb_path, h) == 0 || tb->FR[h] == 0,
1305 "vs-12330: F[%d](%p) or FR[%d](%p) does not exist", 1845 "vs-12330: F[%d](%p) or FR[%d](%p) does not exist",
1306 h, PATH_H_PPARENT (tb->tb_path, h), h, tb->FR[h]); 1846 h, PATH_H_PPARENT(tb->tb_path, h), h, tb->FR[h]);
1307 1847
1308 if (Sh_position == B_NR_ITEMS (PATH_H_PPARENT (tb->tb_path, h))) 1848 if (Sh_position == B_NR_ITEMS(PATH_H_PPARENT(tb->tb_path, h)))
1309 return 0; 1849 return 0;
1310 else 1850 else
1311 return Sh_position + 1; 1851 return Sh_position + 1;
1312} 1852}
1313 1853
1314
1315#ifdef CONFIG_REISERFS_CHECK 1854#ifdef CONFIG_REISERFS_CHECK
1316 1855
1317int is_reusable (struct super_block * s, b_blocknr_t block, int bit_value); 1856int is_reusable(struct super_block *s, b_blocknr_t block, int bit_value);
1318static void check_internal_node (struct super_block * s, struct buffer_head * bh, char * mes) 1857static void check_internal_node(struct super_block *s, struct buffer_head *bh,
1858 char *mes)
1319{ 1859{
1320 struct disk_child * dc; 1860 struct disk_child *dc;
1321 int i; 1861 int i;
1322
1323 RFALSE( !bh, "PAP-12336: bh == 0");
1324
1325 if (!bh || !B_IS_IN_TREE (bh))
1326 return;
1327
1328 RFALSE( !buffer_dirty (bh) &&
1329 !(buffer_journaled(bh) || buffer_journal_dirty(bh)),
1330 "PAP-12337: buffer (%b) must be dirty", bh);
1331 dc = B_N_CHILD (bh, 0);
1332
1333 for (i = 0; i <= B_NR_ITEMS (bh); i ++, dc ++) {
1334 if (!is_reusable (s, dc_block_number(dc), 1) ) {
1335 print_cur_tb (mes);
1336 reiserfs_panic (s, "PAP-12338: check_internal_node: invalid child pointer %y in %b", dc, bh);
1337 }
1338 }
1339}
1340 1862
1863 RFALSE(!bh, "PAP-12336: bh == 0");
1341 1864
1342static int locked_or_not_in_tree (struct buffer_head * bh, char * which) 1865 if (!bh || !B_IS_IN_TREE(bh))
1343{ 1866 return;
1344 if ( (!buffer_journal_prepared (bh) && buffer_locked (bh)) ||
1345 !B_IS_IN_TREE (bh) ) {
1346 reiserfs_warning (NULL, "vs-12339: locked_or_not_in_tree: %s (%b)",
1347 which, bh);
1348 return 1;
1349 }
1350 return 0;
1351}
1352 1867
1868 RFALSE(!buffer_dirty(bh) &&
1869 !(buffer_journaled(bh) || buffer_journal_dirty(bh)),
1870 "PAP-12337: buffer (%b) must be dirty", bh);
1871 dc = B_N_CHILD(bh, 0);
1353 1872
1354static int check_before_balancing (struct tree_balance * tb) 1873 for (i = 0; i <= B_NR_ITEMS(bh); i++, dc++) {
1355{ 1874 if (!is_reusable(s, dc_block_number(dc), 1)) {
1356 int retval = 0; 1875 print_cur_tb(mes);
1357 1876 reiserfs_panic(s,
1358 if ( cur_tb ) { 1877 "PAP-12338: check_internal_node: invalid child pointer %y in %b",
1359 reiserfs_panic (tb->tb_sb, "vs-12335: check_before_balancing: " 1878 dc, bh);
1360 "suspect that schedule occurred based on cur_tb not being null at this point in code. " 1879 }
1361 "do_balance cannot properly handle schedule occurring while it runs."); 1880 }
1362 }
1363
1364 /* double check that buffers that we will modify are unlocked. (fix_nodes should already have
1365 prepped all of these for us). */
1366 if ( tb->lnum[0] ) {
1367 retval |= locked_or_not_in_tree (tb->L[0], "L[0]");
1368 retval |= locked_or_not_in_tree (tb->FL[0], "FL[0]");
1369 retval |= locked_or_not_in_tree (tb->CFL[0], "CFL[0]");
1370 check_leaf (tb->L[0]);
1371 }
1372 if ( tb->rnum[0] ) {
1373 retval |= locked_or_not_in_tree (tb->R[0], "R[0]");
1374 retval |= locked_or_not_in_tree (tb->FR[0], "FR[0]");
1375 retval |= locked_or_not_in_tree (tb->CFR[0], "CFR[0]");
1376 check_leaf (tb->R[0]);
1377 }
1378 retval |= locked_or_not_in_tree (PATH_PLAST_BUFFER (tb->tb_path), "S[0]");
1379 check_leaf (PATH_PLAST_BUFFER (tb->tb_path));
1380
1381 return retval;
1382} 1881}
1383 1882
1883static int locked_or_not_in_tree(struct buffer_head *bh, char *which)
1884{
1885 if ((!buffer_journal_prepared(bh) && buffer_locked(bh)) ||
1886 !B_IS_IN_TREE(bh)) {
1887 reiserfs_warning(NULL,
1888 "vs-12339: locked_or_not_in_tree: %s (%b)",
1889 which, bh);
1890 return 1;
1891 }
1892 return 0;
1893}
1384 1894
1385static void check_after_balance_leaf (struct tree_balance * tb) 1895static int check_before_balancing(struct tree_balance *tb)
1386{ 1896{
1387 if (tb->lnum[0]) { 1897 int retval = 0;
1388 if (B_FREE_SPACE (tb->L[0]) != 1898
1389 MAX_CHILD_SIZE (tb->L[0]) - dc_size(B_N_CHILD (tb->FL[0], get_left_neighbor_position (tb, 0)))) { 1899 if (cur_tb) {
1390 print_cur_tb ("12221"); 1900 reiserfs_panic(tb->tb_sb, "vs-12335: check_before_balancing: "
1391 reiserfs_panic (tb->tb_sb, "PAP-12355: check_after_balance_leaf: shift to left was incorrect"); 1901 "suspect that schedule occurred based on cur_tb not being null at this point in code. "
1902 "do_balance cannot properly handle schedule occurring while it runs.");
1392 } 1903 }
1393 } 1904
1394 if (tb->rnum[0]) { 1905 /* double check that buffers that we will modify are unlocked. (fix_nodes should already have
1395 if (B_FREE_SPACE (tb->R[0]) != 1906 prepped all of these for us). */
1396 MAX_CHILD_SIZE (tb->R[0]) - dc_size(B_N_CHILD (tb->FR[0], get_right_neighbor_position (tb, 0)))) { 1907 if (tb->lnum[0]) {
1397 print_cur_tb ("12222"); 1908 retval |= locked_or_not_in_tree(tb->L[0], "L[0]");
1398 reiserfs_panic (tb->tb_sb, "PAP-12360: check_after_balance_leaf: shift to right was incorrect"); 1909 retval |= locked_or_not_in_tree(tb->FL[0], "FL[0]");
1910 retval |= locked_or_not_in_tree(tb->CFL[0], "CFL[0]");
1911 check_leaf(tb->L[0]);
1399 } 1912 }
1400 } 1913 if (tb->rnum[0]) {
1401 if (PATH_H_PBUFFER(tb->tb_path,1) && 1914 retval |= locked_or_not_in_tree(tb->R[0], "R[0]");
1402 (B_FREE_SPACE (PATH_H_PBUFFER(tb->tb_path,0)) != 1915 retval |= locked_or_not_in_tree(tb->FR[0], "FR[0]");
1403 (MAX_CHILD_SIZE (PATH_H_PBUFFER(tb->tb_path,0)) - 1916 retval |= locked_or_not_in_tree(tb->CFR[0], "CFR[0]");
1404 dc_size(B_N_CHILD (PATH_H_PBUFFER(tb->tb_path,1), 1917 check_leaf(tb->R[0]);
1405 PATH_H_POSITION (tb->tb_path, 1)))) )) { 1918 }
1406 int left = B_FREE_SPACE (PATH_H_PBUFFER(tb->tb_path,0)); 1919 retval |= locked_or_not_in_tree(PATH_PLAST_BUFFER(tb->tb_path), "S[0]");
1407 int right = (MAX_CHILD_SIZE (PATH_H_PBUFFER(tb->tb_path,0)) - 1920 check_leaf(PATH_PLAST_BUFFER(tb->tb_path));
1408 dc_size(B_N_CHILD (PATH_H_PBUFFER(tb->tb_path,1),
1409 PATH_H_POSITION (tb->tb_path, 1))));
1410 print_cur_tb ("12223");
1411 reiserfs_warning (tb->tb_sb,
1412 "B_FREE_SPACE (PATH_H_PBUFFER(tb->tb_path,0)) = %d; "
1413 "MAX_CHILD_SIZE (%d) - dc_size( %y, %d ) [%d] = %d",
1414 left,
1415 MAX_CHILD_SIZE (PATH_H_PBUFFER(tb->tb_path,0)),
1416 PATH_H_PBUFFER(tb->tb_path,1),
1417 PATH_H_POSITION (tb->tb_path, 1),
1418 dc_size(B_N_CHILD (PATH_H_PBUFFER(tb->tb_path,1), PATH_H_POSITION (tb->tb_path, 1 )) ),
1419 right );
1420 reiserfs_panic (tb->tb_sb, "PAP-12365: check_after_balance_leaf: S is incorrect");
1421 }
1422}
1423 1921
1922 return retval;
1923}
1424 1924
1425static void check_leaf_level (struct tree_balance * tb) 1925static void check_after_balance_leaf(struct tree_balance *tb)
1426{ 1926{
1427 check_leaf (tb->L[0]); 1927 if (tb->lnum[0]) {
1428 check_leaf (tb->R[0]); 1928 if (B_FREE_SPACE(tb->L[0]) !=
1429 check_leaf (PATH_PLAST_BUFFER (tb->tb_path)); 1929 MAX_CHILD_SIZE(tb->L[0]) -
1930 dc_size(B_N_CHILD
1931 (tb->FL[0], get_left_neighbor_position(tb, 0)))) {
1932 print_cur_tb("12221");
1933 reiserfs_panic(tb->tb_sb,
1934 "PAP-12355: check_after_balance_leaf: shift to left was incorrect");
1935 }
1936 }
1937 if (tb->rnum[0]) {
1938 if (B_FREE_SPACE(tb->R[0]) !=
1939 MAX_CHILD_SIZE(tb->R[0]) -
1940 dc_size(B_N_CHILD
1941 (tb->FR[0], get_right_neighbor_position(tb, 0)))) {
1942 print_cur_tb("12222");
1943 reiserfs_panic(tb->tb_sb,
1944 "PAP-12360: check_after_balance_leaf: shift to right was incorrect");
1945 }
1946 }
1947 if (PATH_H_PBUFFER(tb->tb_path, 1) &&
1948 (B_FREE_SPACE(PATH_H_PBUFFER(tb->tb_path, 0)) !=
1949 (MAX_CHILD_SIZE(PATH_H_PBUFFER(tb->tb_path, 0)) -
1950 dc_size(B_N_CHILD(PATH_H_PBUFFER(tb->tb_path, 1),
1951 PATH_H_POSITION(tb->tb_path, 1)))))) {
1952 int left = B_FREE_SPACE(PATH_H_PBUFFER(tb->tb_path, 0));
1953 int right = (MAX_CHILD_SIZE(PATH_H_PBUFFER(tb->tb_path, 0)) -
1954 dc_size(B_N_CHILD(PATH_H_PBUFFER(tb->tb_path, 1),
1955 PATH_H_POSITION(tb->tb_path,
1956 1))));
1957 print_cur_tb("12223");
1958 reiserfs_warning(tb->tb_sb,
1959 "B_FREE_SPACE (PATH_H_PBUFFER(tb->tb_path,0)) = %d; "
1960 "MAX_CHILD_SIZE (%d) - dc_size( %y, %d ) [%d] = %d",
1961 left,
1962 MAX_CHILD_SIZE(PATH_H_PBUFFER(tb->tb_path, 0)),
1963 PATH_H_PBUFFER(tb->tb_path, 1),
1964 PATH_H_POSITION(tb->tb_path, 1),
1965 dc_size(B_N_CHILD
1966 (PATH_H_PBUFFER(tb->tb_path, 1),
1967 PATH_H_POSITION(tb->tb_path, 1))),
1968 right);
1969 reiserfs_panic(tb->tb_sb,
1970 "PAP-12365: check_after_balance_leaf: S is incorrect");
1971 }
1430} 1972}
1431 1973
1432static void check_internal_levels (struct tree_balance * tb) 1974static void check_leaf_level(struct tree_balance *tb)
1433{ 1975{
1434 int h; 1976 check_leaf(tb->L[0]);
1977 check_leaf(tb->R[0]);
1978 check_leaf(PATH_PLAST_BUFFER(tb->tb_path));
1979}
1435 1980
1436 /* check all internal nodes */ 1981static void check_internal_levels(struct tree_balance *tb)
1437 for (h = 1; tb->insert_size[h]; h ++) { 1982{
1438 check_internal_node (tb->tb_sb, PATH_H_PBUFFER (tb->tb_path, h), "BAD BUFFER ON PATH"); 1983 int h;
1439 if (tb->lnum[h]) 1984
1440 check_internal_node (tb->tb_sb, tb->L[h], "BAD L"); 1985 /* check all internal nodes */
1441 if (tb->rnum[h]) 1986 for (h = 1; tb->insert_size[h]; h++) {
1442 check_internal_node (tb->tb_sb, tb->R[h], "BAD R"); 1987 check_internal_node(tb->tb_sb, PATH_H_PBUFFER(tb->tb_path, h),
1443 } 1988 "BAD BUFFER ON PATH");
1989 if (tb->lnum[h])
1990 check_internal_node(tb->tb_sb, tb->L[h], "BAD L");
1991 if (tb->rnum[h])
1992 check_internal_node(tb->tb_sb, tb->R[h], "BAD R");
1993 }
1444 1994
1445} 1995}
1446 1996
1447#endif 1997#endif
1448 1998
1449
1450
1451
1452
1453
1454/* Now we have all of the buffers that must be used in balancing of 1999/* Now we have all of the buffers that must be used in balancing of
1455 the tree. We rely on the assumption that schedule() will not occur 2000 the tree. We rely on the assumption that schedule() will not occur
1456 while do_balance works. ( Only interrupt handlers are acceptable.) 2001 while do_balance works. ( Only interrupt handlers are acceptable.)
@@ -1484,114 +2029,109 @@ static void check_internal_levels (struct tree_balance * tb)
1484 2029
1485*/ 2030*/
1486 2031
1487static inline void do_balance_starts (struct tree_balance *tb) 2032static inline void do_balance_starts(struct tree_balance *tb)
1488{ 2033{
1489 /* use print_cur_tb() to see initial state of struct 2034 /* use print_cur_tb() to see initial state of struct
1490 tree_balance */ 2035 tree_balance */
1491 2036
1492 /* store_print_tb (tb); */ 2037 /* store_print_tb (tb); */
1493 2038
1494 /* do not delete, just comment it out */ 2039 /* do not delete, just comment it out */
1495/* print_tb(flag, PATH_LAST_POSITION(tb->tb_path), tb->tb_path->pos_in_item, tb, 2040/* print_tb(flag, PATH_LAST_POSITION(tb->tb_path), tb->tb_path->pos_in_item, tb,
1496 "check");*/ 2041 "check");*/
1497 RFALSE( check_before_balancing (tb), "PAP-12340: locked buffers in TB"); 2042 RFALSE(check_before_balancing(tb), "PAP-12340: locked buffers in TB");
1498#ifdef CONFIG_REISERFS_CHECK 2043#ifdef CONFIG_REISERFS_CHECK
1499 cur_tb = tb; 2044 cur_tb = tb;
1500#endif 2045#endif
1501} 2046}
1502 2047
1503 2048static inline void do_balance_completed(struct tree_balance *tb)
1504static inline void do_balance_completed (struct tree_balance * tb)
1505{ 2049{
1506 2050
1507#ifdef CONFIG_REISERFS_CHECK 2051#ifdef CONFIG_REISERFS_CHECK
1508 check_leaf_level (tb); 2052 check_leaf_level(tb);
1509 check_internal_levels (tb); 2053 check_internal_levels(tb);
1510 cur_tb = NULL; 2054 cur_tb = NULL;
1511#endif 2055#endif
1512 2056
1513 /* reiserfs_free_block is no longer schedule safe. So, we need to 2057 /* reiserfs_free_block is no longer schedule safe. So, we need to
1514 ** put the buffers we want freed on the thrown list during do_balance, 2058 ** put the buffers we want freed on the thrown list during do_balance,
1515 ** and then free them now 2059 ** and then free them now
1516 */ 2060 */
1517
1518 REISERFS_SB(tb->tb_sb)->s_do_balance ++;
1519 2061
2062 REISERFS_SB(tb->tb_sb)->s_do_balance++;
1520 2063
1521 /* release all nodes hold to perform the balancing */ 2064 /* release all nodes hold to perform the balancing */
1522 unfix_nodes(tb); 2065 unfix_nodes(tb);
1523 2066
1524 free_thrown(tb) ; 2067 free_thrown(tb);
1525} 2068}
1526 2069
2070void do_balance(struct tree_balance *tb, /* tree_balance structure */
2071 struct item_head *ih, /* item header of inserted item */
2072 const char *body, /* body of inserted item or bytes to paste */
2073 int flag)
2074{ /* i - insert, d - delete
2075 c - cut, p - paste
2076
2077 Cut means delete part of an item
2078 (includes removing an entry from a
2079 directory).
2080
2081 Delete means delete whole item.
2082
2083 Insert means add a new item into the
2084 tree.
2085
2086 Paste means to append to the end of an
2087 existing file or to insert a directory
2088 entry. */
2089 int child_pos, /* position of a child node in its parent */
2090 h; /* level of the tree being processed */
2091 struct item_head insert_key[2]; /* in our processing of one level
2092 we sometimes determine what
2093 must be inserted into the next
2094 higher level. This insertion
2095 consists of a key or two keys
2096 and their corresponding
2097 pointers */
2098 struct buffer_head *insert_ptr[2]; /* inserted node-ptrs for the next
2099 level */
2100
2101 tb->tb_mode = flag;
2102 tb->need_balance_dirty = 0;
2103
2104 if (FILESYSTEM_CHANGED_TB(tb)) {
2105 reiserfs_panic(tb->tb_sb,
2106 "clm-6000: do_balance, fs generation has changed\n");
2107 }
2108 /* if we have no real work to do */
2109 if (!tb->insert_size[0]) {
2110 reiserfs_warning(tb->tb_sb,
2111 "PAP-12350: do_balance: insert_size == 0, mode == %c",
2112 flag);
2113 unfix_nodes(tb);
2114 return;
2115 }
1527 2116
2117 atomic_inc(&(fs_generation(tb->tb_sb)));
2118 do_balance_starts(tb);
1528 2119
1529
1530
1531void do_balance (struct tree_balance * tb, /* tree_balance structure */
1532 struct item_head * ih, /* item header of inserted item */
1533 const char * body, /* body of inserted item or bytes to paste */
1534 int flag) /* i - insert, d - delete
1535 c - cut, p - paste
1536
1537 Cut means delete part of an item
1538 (includes removing an entry from a
1539 directory).
1540
1541 Delete means delete whole item.
1542
1543 Insert means add a new item into the
1544 tree.
1545
1546 Paste means to append to the end of an
1547 existing file or to insert a directory
1548 entry. */
1549{
1550 int child_pos, /* position of a child node in its parent */
1551 h; /* level of the tree being processed */
1552 struct item_head insert_key[2]; /* in our processing of one level
1553 we sometimes determine what
1554 must be inserted into the next
1555 higher level. This insertion
1556 consists of a key or two keys
1557 and their corresponding
1558 pointers */
1559 struct buffer_head *insert_ptr[2]; /* inserted node-ptrs for the next
1560 level */
1561
1562 tb->tb_mode = flag;
1563 tb->need_balance_dirty = 0;
1564
1565 if (FILESYSTEM_CHANGED_TB(tb)) {
1566 reiserfs_panic(tb->tb_sb, "clm-6000: do_balance, fs generation has changed\n") ;
1567 }
1568 /* if we have no real work to do */
1569 if ( ! tb->insert_size[0] ) {
1570 reiserfs_warning (tb->tb_sb,
1571 "PAP-12350: do_balance: insert_size == 0, mode == %c",
1572 flag);
1573 unfix_nodes(tb);
1574 return;
1575 }
1576
1577 atomic_inc (&(fs_generation (tb->tb_sb)));
1578 do_balance_starts (tb);
1579
1580 /* balance leaf returns 0 except if combining L R and S into 2120 /* balance leaf returns 0 except if combining L R and S into
1581 one node. see balance_internal() for explanation of this 2121 one node. see balance_internal() for explanation of this
1582 line of code.*/ 2122 line of code. */
1583 child_pos = PATH_H_B_ITEM_ORDER (tb->tb_path, 0) + 2123 child_pos = PATH_H_B_ITEM_ORDER(tb->tb_path, 0) +
1584 balance_leaf (tb, ih, body, flag, insert_key, insert_ptr); 2124 balance_leaf(tb, ih, body, flag, insert_key, insert_ptr);
1585 2125
1586#ifdef CONFIG_REISERFS_CHECK 2126#ifdef CONFIG_REISERFS_CHECK
1587 check_after_balance_leaf (tb); 2127 check_after_balance_leaf(tb);
1588#endif 2128#endif
1589 2129
1590 /* Balance internal level of the tree. */ 2130 /* Balance internal level of the tree. */
1591 for ( h = 1; h < MAX_HEIGHT && tb->insert_size[h]; h++ ) 2131 for (h = 1; h < MAX_HEIGHT && tb->insert_size[h]; h++)
1592 child_pos = balance_internal (tb, h, child_pos, insert_key, insert_ptr); 2132 child_pos =
1593 2133 balance_internal(tb, h, child_pos, insert_key, insert_ptr);
1594 2134
1595 do_balance_completed (tb); 2135 do_balance_completed(tb);
1596 2136
1597} 2137}
diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c
index 12e91209544..c9f178fb494 100644
--- a/fs/reiserfs/file.c
+++ b/fs/reiserfs/file.c
@@ -2,7 +2,6 @@
2 * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README 2 * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README
3 */ 3 */
4 4
5
6#include <linux/time.h> 5#include <linux/time.h>
7#include <linux/reiserfs_fs.h> 6#include <linux/reiserfs_fs.h>
8#include <linux/reiserfs_acl.h> 7#include <linux/reiserfs_acl.h>
@@ -31,82 +30,84 @@
31** We use reiserfs_truncate_file to pack the tail, since it already has 30** We use reiserfs_truncate_file to pack the tail, since it already has
32** all the conditions coded. 31** all the conditions coded.
33*/ 32*/
34static int reiserfs_file_release (struct inode * inode, struct file * filp) 33static int reiserfs_file_release(struct inode *inode, struct file *filp)
35{ 34{
36 35
37 struct reiserfs_transaction_handle th ; 36 struct reiserfs_transaction_handle th;
38 int err; 37 int err;
39 int jbegin_failure = 0; 38 int jbegin_failure = 0;
40 39
41 if (!S_ISREG (inode->i_mode)) 40 if (!S_ISREG(inode->i_mode))
42 BUG (); 41 BUG();
43 42
44 /* fast out for when nothing needs to be done */ 43 /* fast out for when nothing needs to be done */
45 if ((atomic_read(&inode->i_count) > 1 || 44 if ((atomic_read(&inode->i_count) > 1 ||
46 !(REISERFS_I(inode)->i_flags & i_pack_on_close_mask) || 45 !(REISERFS_I(inode)->i_flags & i_pack_on_close_mask) ||
47 !tail_has_to_be_packed(inode)) && 46 !tail_has_to_be_packed(inode)) &&
48 REISERFS_I(inode)->i_prealloc_count <= 0) { 47 REISERFS_I(inode)->i_prealloc_count <= 0) {
49 return 0; 48 return 0;
50 } 49 }
51
52 reiserfs_write_lock(inode->i_sb);
53 down (&inode->i_sem);
54 /* freeing preallocation only involves relogging blocks that
55 * are already in the current transaction. preallocation gets
56 * freed at the end of each transaction, so it is impossible for
57 * us to log any additional blocks (including quota blocks)
58 */
59 err = journal_begin(&th, inode->i_sb, 1);
60 if (err) {
61 /* uh oh, we can't allow the inode to go away while there
62 * is still preallocation blocks pending. Try to join the
63 * aborted transaction
64 */
65 jbegin_failure = err;
66 err = journal_join_abort(&th, inode->i_sb, 1);
67 50
51 reiserfs_write_lock(inode->i_sb);
52 down(&inode->i_sem);
53 /* freeing preallocation only involves relogging blocks that
54 * are already in the current transaction. preallocation gets
55 * freed at the end of each transaction, so it is impossible for
56 * us to log any additional blocks (including quota blocks)
57 */
58 err = journal_begin(&th, inode->i_sb, 1);
68 if (err) { 59 if (err) {
69 /* hmpf, our choices here aren't good. We can pin the inode 60 /* uh oh, we can't allow the inode to go away while there
70 * which will disallow unmount from every happening, we can 61 * is still preallocation blocks pending. Try to join the
71 * do nothing, which will corrupt random memory on unmount, 62 * aborted transaction
72 * or we can forcibly remove the file from the preallocation 63 */
73 * list, which will leak blocks on disk. Lets pin the inode 64 jbegin_failure = err;
74 * and let the admin know what is going on. 65 err = journal_join_abort(&th, inode->i_sb, 1);
75 */ 66
76 igrab(inode); 67 if (err) {
77 reiserfs_warning(inode->i_sb, "pinning inode %lu because the " 68 /* hmpf, our choices here aren't good. We can pin the inode
78 "preallocation can't be freed"); 69 * which will disallow unmount from every happening, we can
79 goto out; 70 * do nothing, which will corrupt random memory on unmount,
71 * or we can forcibly remove the file from the preallocation
72 * list, which will leak blocks on disk. Lets pin the inode
73 * and let the admin know what is going on.
74 */
75 igrab(inode);
76 reiserfs_warning(inode->i_sb,
77 "pinning inode %lu because the "
78 "preallocation can't be freed");
79 goto out;
80 }
80 } 81 }
81 } 82 reiserfs_update_inode_transaction(inode);
82 reiserfs_update_inode_transaction(inode) ;
83 83
84#ifdef REISERFS_PREALLOCATE 84#ifdef REISERFS_PREALLOCATE
85 reiserfs_discard_prealloc (&th, inode); 85 reiserfs_discard_prealloc(&th, inode);
86#endif 86#endif
87 err = journal_end(&th, inode->i_sb, 1); 87 err = journal_end(&th, inode->i_sb, 1);
88 88
89 /* copy back the error code from journal_begin */ 89 /* copy back the error code from journal_begin */
90 if (!err) 90 if (!err)
91 err = jbegin_failure; 91 err = jbegin_failure;
92 92
93 if (!err && atomic_read(&inode->i_count) <= 1 && 93 if (!err && atomic_read(&inode->i_count) <= 1 &&
94 (REISERFS_I(inode)->i_flags & i_pack_on_close_mask) && 94 (REISERFS_I(inode)->i_flags & i_pack_on_close_mask) &&
95 tail_has_to_be_packed (inode)) { 95 tail_has_to_be_packed(inode)) {
96 /* if regular file is released by last holder and it has been 96 /* if regular file is released by last holder and it has been
97 appended (we append by unformatted node only) or its direct 97 appended (we append by unformatted node only) or its direct
98 item(s) had to be converted, then it may have to be 98 item(s) had to be converted, then it may have to be
99 indirect2direct converted */ 99 indirect2direct converted */
100 err = reiserfs_truncate_file(inode, 0) ; 100 err = reiserfs_truncate_file(inode, 0);
101 } 101 }
102out: 102 out:
103 up (&inode->i_sem); 103 up(&inode->i_sem);
104 reiserfs_write_unlock(inode->i_sb); 104 reiserfs_write_unlock(inode->i_sb);
105 return err; 105 return err;
106} 106}
107 107
108static void reiserfs_vfs_truncate_file(struct inode *inode) { 108static void reiserfs_vfs_truncate_file(struct inode *inode)
109 reiserfs_truncate_file(inode, 1) ; 109{
110 reiserfs_truncate_file(inode, 1);
110} 111}
111 112
112/* Sync a reiserfs file. */ 113/* Sync a reiserfs file. */
@@ -116,26 +117,24 @@ static void reiserfs_vfs_truncate_file(struct inode *inode) {
116 * be removed... 117 * be removed...
117 */ 118 */
118 119
119static int reiserfs_sync_file( 120static int reiserfs_sync_file(struct file *p_s_filp,
120 struct file * p_s_filp, 121 struct dentry *p_s_dentry, int datasync)
121 struct dentry * p_s_dentry, 122{
122 int datasync 123 struct inode *p_s_inode = p_s_dentry->d_inode;
123 ) { 124 int n_err;
124 struct inode * p_s_inode = p_s_dentry->d_inode; 125 int barrier_done;
125 int n_err; 126
126 int barrier_done; 127 if (!S_ISREG(p_s_inode->i_mode))
127 128 BUG();
128 if (!S_ISREG(p_s_inode->i_mode)) 129 n_err = sync_mapping_buffers(p_s_inode->i_mapping);
129 BUG (); 130 reiserfs_write_lock(p_s_inode->i_sb);
130 n_err = sync_mapping_buffers(p_s_inode->i_mapping) ; 131 barrier_done = reiserfs_commit_for_inode(p_s_inode);
131 reiserfs_write_lock(p_s_inode->i_sb); 132 reiserfs_write_unlock(p_s_inode->i_sb);
132 barrier_done = reiserfs_commit_for_inode(p_s_inode); 133 if (barrier_done != 1)
133 reiserfs_write_unlock(p_s_inode->i_sb); 134 blkdev_issue_flush(p_s_inode->i_sb->s_bdev, NULL);
134 if (barrier_done != 1) 135 if (barrier_done < 0)
135 blkdev_issue_flush(p_s_inode->i_sb->s_bdev, NULL); 136 return barrier_done;
136 if (barrier_done < 0) 137 return (n_err < 0) ? -EIO : 0;
137 return barrier_done;
138 return ( n_err < 0 ) ? -EIO : 0;
139} 138}
140 139
141/* I really do not want to play with memory shortage right now, so 140/* I really do not want to play with memory shortage right now, so
@@ -147,700 +146,797 @@ static int reiserfs_sync_file(
147/* Allocates blocks for a file to fulfil write request. 146/* Allocates blocks for a file to fulfil write request.
148 Maps all unmapped but prepared pages from the list. 147 Maps all unmapped but prepared pages from the list.
149 Updates metadata with newly allocated blocknumbers as needed */ 148 Updates metadata with newly allocated blocknumbers as needed */
150static int reiserfs_allocate_blocks_for_region( 149static int reiserfs_allocate_blocks_for_region(struct reiserfs_transaction_handle *th, struct inode *inode, /* Inode we work with */
151 struct reiserfs_transaction_handle *th, 150 loff_t pos, /* Writing position */
152 struct inode *inode, /* Inode we work with */ 151 int num_pages, /* number of pages write going
153 loff_t pos, /* Writing position */ 152 to touch */
154 int num_pages, /* number of pages write going 153 int write_bytes, /* amount of bytes to write */
155 to touch */ 154 struct page **prepared_pages, /* array of
156 int write_bytes, /* amount of bytes to write */ 155 prepared pages
157 struct page **prepared_pages, /* array of 156 */
158 prepared pages 157 int blocks_to_allocate /* Amount of blocks we
159 */ 158 need to allocate to
160 int blocks_to_allocate /* Amount of blocks we 159 fit the data into file
161 need to allocate to 160 */
162 fit the data into file 161 )
163 */
164 )
165{ 162{
166 struct cpu_key key; // cpu key of item that we are going to deal with 163 struct cpu_key key; // cpu key of item that we are going to deal with
167 struct item_head *ih; // pointer to item head that we are going to deal with 164 struct item_head *ih; // pointer to item head that we are going to deal with
168 struct buffer_head *bh; // Buffer head that contains items that we are going to deal with 165 struct buffer_head *bh; // Buffer head that contains items that we are going to deal with
169 __le32 * item; // pointer to item we are going to deal with 166 __le32 *item; // pointer to item we are going to deal with
170 INITIALIZE_PATH(path); // path to item, that we are going to deal with. 167 INITIALIZE_PATH(path); // path to item, that we are going to deal with.
171 b_blocknr_t *allocated_blocks; // Pointer to a place where allocated blocknumbers would be stored. 168 b_blocknr_t *allocated_blocks; // Pointer to a place where allocated blocknumbers would be stored.
172 reiserfs_blocknr_hint_t hint; // hint structure for block allocator. 169 reiserfs_blocknr_hint_t hint; // hint structure for block allocator.
173 size_t res; // return value of various functions that we call. 170 size_t res; // return value of various functions that we call.
174 int curr_block; // current block used to keep track of unmapped blocks. 171 int curr_block; // current block used to keep track of unmapped blocks.
175 int i; // loop counter 172 int i; // loop counter
176 int itempos; // position in item 173 int itempos; // position in item
177 unsigned int from = (pos & (PAGE_CACHE_SIZE - 1)); // writing position in 174 unsigned int from = (pos & (PAGE_CACHE_SIZE - 1)); // writing position in
178 // first page 175 // first page
179 unsigned int to = ((pos + write_bytes - 1) & (PAGE_CACHE_SIZE - 1)) + 1; /* last modified byte offset in last page */ 176 unsigned int to = ((pos + write_bytes - 1) & (PAGE_CACHE_SIZE - 1)) + 1; /* last modified byte offset in last page */
180 __u64 hole_size ; // amount of blocks for a file hole, if it needed to be created. 177 __u64 hole_size; // amount of blocks for a file hole, if it needed to be created.
181 int modifying_this_item = 0; // Flag for items traversal code to keep track 178 int modifying_this_item = 0; // Flag for items traversal code to keep track
182 // of the fact that we already prepared 179 // of the fact that we already prepared
183 // current block for journal 180 // current block for journal
184 int will_prealloc = 0; 181 int will_prealloc = 0;
185 RFALSE(!blocks_to_allocate, "green-9004: tried to allocate zero blocks?"); 182 RFALSE(!blocks_to_allocate,
186 183 "green-9004: tried to allocate zero blocks?");
187 /* only preallocate if this is a small write */ 184
188 if (REISERFS_I(inode)->i_prealloc_count || 185 /* only preallocate if this is a small write */
189 (!(write_bytes & (inode->i_sb->s_blocksize -1)) && 186 if (REISERFS_I(inode)->i_prealloc_count ||
190 blocks_to_allocate < 187 (!(write_bytes & (inode->i_sb->s_blocksize - 1)) &&
191 REISERFS_SB(inode->i_sb)->s_alloc_options.preallocsize)) 188 blocks_to_allocate <
192 will_prealloc = REISERFS_SB(inode->i_sb)->s_alloc_options.preallocsize; 189 REISERFS_SB(inode->i_sb)->s_alloc_options.preallocsize))
193 190 will_prealloc =
194 allocated_blocks = kmalloc((blocks_to_allocate + will_prealloc) * 191 REISERFS_SB(inode->i_sb)->s_alloc_options.preallocsize;
195 sizeof(b_blocknr_t), GFP_NOFS); 192
196 193 allocated_blocks = kmalloc((blocks_to_allocate + will_prealloc) *
197 /* First we compose a key to point at the writing position, we want to do 194 sizeof(b_blocknr_t), GFP_NOFS);
198 that outside of any locking region. */ 195
199 make_cpu_key (&key, inode, pos+1, TYPE_ANY, 3/*key length*/); 196 /* First we compose a key to point at the writing position, we want to do
200 197 that outside of any locking region. */
201 /* If we came here, it means we absolutely need to open a transaction, 198 make_cpu_key(&key, inode, pos + 1, TYPE_ANY, 3 /*key length */ );
202 since we need to allocate some blocks */ 199
203 reiserfs_write_lock(inode->i_sb); // Journaling stuff and we need that. 200 /* If we came here, it means we absolutely need to open a transaction,
204 res = journal_begin(th, inode->i_sb, JOURNAL_PER_BALANCE_CNT * 3 + 1 + 2 * REISERFS_QUOTA_TRANS_BLOCKS(inode->i_sb)); // Wish I know if this number enough 201 since we need to allocate some blocks */
205 if (res) 202 reiserfs_write_lock(inode->i_sb); // Journaling stuff and we need that.
206 goto error_exit; 203 res = journal_begin(th, inode->i_sb, JOURNAL_PER_BALANCE_CNT * 3 + 1 + 2 * REISERFS_QUOTA_TRANS_BLOCKS(inode->i_sb)); // Wish I know if this number enough
207 reiserfs_update_inode_transaction(inode) ; 204 if (res)
208
209 /* Look for the in-tree position of our write, need path for block allocator */
210 res = search_for_position_by_key(inode->i_sb, &key, &path);
211 if ( res == IO_ERROR ) {
212 res = -EIO;
213 goto error_exit;
214 }
215
216 /* Allocate blocks */
217 /* First fill in "hint" structure for block allocator */
218 hint.th = th; // transaction handle.
219 hint.path = &path; // Path, so that block allocator can determine packing locality or whatever it needs to determine.
220 hint.inode = inode; // Inode is needed by block allocator too.
221 hint.search_start = 0; // We have no hint on where to search free blocks for block allocator.
222 hint.key = key.on_disk_key; // on disk key of file.
223 hint.block = inode->i_blocks>>(inode->i_sb->s_blocksize_bits-9); // Number of disk blocks this file occupies already.
224 hint.formatted_node = 0; // We are allocating blocks for unformatted node.
225 hint.preallocate = will_prealloc;
226
227 /* Call block allocator to allocate blocks */
228 res = reiserfs_allocate_blocknrs(&hint, allocated_blocks, blocks_to_allocate, blocks_to_allocate);
229 if ( res != CARRY_ON ) {
230 if ( res == NO_DISK_SPACE ) {
231 /* We flush the transaction in case of no space. This way some
232 blocks might become free */
233 SB_JOURNAL(inode->i_sb)->j_must_wait = 1;
234 res = restart_transaction(th, inode, &path);
235 if (res)
236 goto error_exit;
237
238 /* We might have scheduled, so search again */
239 res = search_for_position_by_key(inode->i_sb, &key, &path);
240 if ( res == IO_ERROR ) {
241 res = -EIO;
242 goto error_exit; 205 goto error_exit;
243 } 206 reiserfs_update_inode_transaction(inode);
244 207
245 /* update changed info for hint structure. */ 208 /* Look for the in-tree position of our write, need path for block allocator */
246 res = reiserfs_allocate_blocknrs(&hint, allocated_blocks, blocks_to_allocate, blocks_to_allocate); 209 res = search_for_position_by_key(inode->i_sb, &key, &path);
247 if ( res != CARRY_ON ) { 210 if (res == IO_ERROR) {
248 res = -ENOSPC; 211 res = -EIO;
249 pathrelse(&path);
250 goto error_exit; 212 goto error_exit;
251 }
252 } else {
253 res = -ENOSPC;
254 pathrelse(&path);
255 goto error_exit;
256 } 213 }
257 }
258 214
259#ifdef __BIG_ENDIAN 215 /* Allocate blocks */
260 // Too bad, I have not found any way to convert a given region from 216 /* First fill in "hint" structure for block allocator */
261 // cpu format to little endian format 217 hint.th = th; // transaction handle.
262 { 218 hint.path = &path; // Path, so that block allocator can determine packing locality or whatever it needs to determine.
263 int i; 219 hint.inode = inode; // Inode is needed by block allocator too.
264 for ( i = 0; i < blocks_to_allocate ; i++) 220 hint.search_start = 0; // We have no hint on where to search free blocks for block allocator.
265 allocated_blocks[i]=cpu_to_le32(allocated_blocks[i]); 221 hint.key = key.on_disk_key; // on disk key of file.
266 } 222 hint.block = inode->i_blocks >> (inode->i_sb->s_blocksize_bits - 9); // Number of disk blocks this file occupies already.
267#endif 223 hint.formatted_node = 0; // We are allocating blocks for unformatted node.
268 224 hint.preallocate = will_prealloc;
269 /* Blocks allocating well might have scheduled and tree might have changed, 225
270 let's search the tree again */ 226 /* Call block allocator to allocate blocks */
271 /* find where in the tree our write should go */ 227 res =
272 res = search_for_position_by_key(inode->i_sb, &key, &path); 228 reiserfs_allocate_blocknrs(&hint, allocated_blocks,
273 if ( res == IO_ERROR ) { 229 blocks_to_allocate, blocks_to_allocate);
274 res = -EIO; 230 if (res != CARRY_ON) {
275 goto error_exit_free_blocks; 231 if (res == NO_DISK_SPACE) {
276 } 232 /* We flush the transaction in case of no space. This way some
277 233 blocks might become free */
278 bh = get_last_bh( &path ); // Get a bufferhead for last element in path. 234 SB_JOURNAL(inode->i_sb)->j_must_wait = 1;
279 ih = get_ih( &path ); // Get a pointer to last item head in path. 235 res = restart_transaction(th, inode, &path);
280 item = get_item( &path ); // Get a pointer to last item in path 236 if (res)
281 237 goto error_exit;
282 /* Let's see what we have found */ 238
283 if ( res != POSITION_FOUND ) { /* position not found, this means that we 239 /* We might have scheduled, so search again */
284 might need to append file with holes 240 res =
285 first */ 241 search_for_position_by_key(inode->i_sb, &key,
286 // Since we are writing past the file's end, we need to find out if 242 &path);
287 // there is a hole that needs to be inserted before our writing 243 if (res == IO_ERROR) {
288 // position, and how many blocks it is going to cover (we need to 244 res = -EIO;
289 // populate pointers to file blocks representing the hole with zeros) 245 goto error_exit;
246 }
290 247
248 /* update changed info for hint structure. */
249 res =
250 reiserfs_allocate_blocknrs(&hint, allocated_blocks,
251 blocks_to_allocate,
252 blocks_to_allocate);
253 if (res != CARRY_ON) {
254 res = -ENOSPC;
255 pathrelse(&path);
256 goto error_exit;
257 }
258 } else {
259 res = -ENOSPC;
260 pathrelse(&path);
261 goto error_exit;
262 }
263 }
264#ifdef __BIG_ENDIAN
265 // Too bad, I have not found any way to convert a given region from
266 // cpu format to little endian format
291 { 267 {
292 int item_offset = 1; 268 int i;
293 /* 269 for (i = 0; i < blocks_to_allocate; i++)
294 * if ih is stat data, its offset is 0 and we don't want to 270 allocated_blocks[i] = cpu_to_le32(allocated_blocks[i]);
295 * add 1 to pos in the hole_size calculation
296 */
297 if (is_statdata_le_ih(ih))
298 item_offset = 0;
299 hole_size = (pos + item_offset -
300 (le_key_k_offset( get_inode_item_key_version(inode),
301 &(ih->ih_key)) +
302 op_bytes_number(ih, inode->i_sb->s_blocksize))) >>
303 inode->i_sb->s_blocksize_bits;
304 } 271 }
272#endif
305 273
306 if ( hole_size > 0 ) { 274 /* Blocks allocating well might have scheduled and tree might have changed,
307 int to_paste = min_t(__u64, hole_size, MAX_ITEM_LEN(inode->i_sb->s_blocksize)/UNFM_P_SIZE ); // How much data to insert first time. 275 let's search the tree again */
308 /* area filled with zeroes, to supply as list of zero blocknumbers 276 /* find where in the tree our write should go */
309 We allocate it outside of loop just in case loop would spin for 277 res = search_for_position_by_key(inode->i_sb, &key, &path);
310 several iterations. */ 278 if (res == IO_ERROR) {
311 char *zeros = kmalloc(to_paste*UNFM_P_SIZE, GFP_ATOMIC); // We cannot insert more than MAX_ITEM_LEN bytes anyway. 279 res = -EIO;
312 if ( !zeros ) {
313 res = -ENOMEM;
314 goto error_exit_free_blocks; 280 goto error_exit_free_blocks;
315 } 281 }
316 memset ( zeros, 0, to_paste*UNFM_P_SIZE); 282
317 do { 283 bh = get_last_bh(&path); // Get a bufferhead for last element in path.
318 to_paste = min_t(__u64, hole_size, MAX_ITEM_LEN(inode->i_sb->s_blocksize)/UNFM_P_SIZE ); 284 ih = get_ih(&path); // Get a pointer to last item head in path.
319 if ( is_indirect_le_ih(ih) ) { 285 item = get_item(&path); // Get a pointer to last item in path
320 /* Ok, there is existing indirect item already. Need to append it */ 286
321 /* Calculate position past inserted item */ 287 /* Let's see what we have found */
322 make_cpu_key( &key, inode, le_key_k_offset( get_inode_item_key_version(inode), &(ih->ih_key)) + op_bytes_number(ih, inode->i_sb->s_blocksize), TYPE_INDIRECT, 3); 288 if (res != POSITION_FOUND) { /* position not found, this means that we
323 res = reiserfs_paste_into_item( th, &path, &key, inode, (char *)zeros, UNFM_P_SIZE*to_paste); 289 might need to append file with holes
324 if ( res ) { 290 first */
325 kfree(zeros); 291 // Since we are writing past the file's end, we need to find out if
326 goto error_exit_free_blocks; 292 // there is a hole that needs to be inserted before our writing
327 } 293 // position, and how many blocks it is going to cover (we need to
328 } else if ( is_statdata_le_ih(ih) ) { 294 // populate pointers to file blocks representing the hole with zeros)
329 /* No existing item, create it */ 295
330 /* item head for new item */ 296 {
331 struct item_head ins_ih; 297 int item_offset = 1;
332 298 /*
333 /* create a key for our new item */ 299 * if ih is stat data, its offset is 0 and we don't want to
334 make_cpu_key( &key, inode, 1, TYPE_INDIRECT, 3); 300 * add 1 to pos in the hole_size calculation
335 301 */
336 /* Create new item head for our new item */ 302 if (is_statdata_le_ih(ih))
337 make_le_item_head (&ins_ih, &key, key.version, 1, 303 item_offset = 0;
338 TYPE_INDIRECT, to_paste*UNFM_P_SIZE, 304 hole_size = (pos + item_offset -
339 0 /* free space */); 305 (le_key_k_offset
340 306 (get_inode_item_key_version(inode),
341 /* Find where such item should live in the tree */ 307 &(ih->ih_key)) + op_bytes_number(ih,
342 res = search_item (inode->i_sb, &key, &path); 308 inode->
343 if ( res != ITEM_NOT_FOUND ) { 309 i_sb->
344 /* item should not exist, otherwise we have error */ 310 s_blocksize)))
345 if ( res != -ENOSPC ) { 311 >> inode->i_sb->s_blocksize_bits;
346 reiserfs_warning (inode->i_sb, 312 }
347 "green-9008: search_by_key (%K) returned %d", 313
348 &key, res); 314 if (hole_size > 0) {
315 int to_paste = min_t(__u64, hole_size, MAX_ITEM_LEN(inode->i_sb->s_blocksize) / UNFM_P_SIZE); // How much data to insert first time.
316 /* area filled with zeroes, to supply as list of zero blocknumbers
317 We allocate it outside of loop just in case loop would spin for
318 several iterations. */
319 char *zeros = kmalloc(to_paste * UNFM_P_SIZE, GFP_ATOMIC); // We cannot insert more than MAX_ITEM_LEN bytes anyway.
320 if (!zeros) {
321 res = -ENOMEM;
322 goto error_exit_free_blocks;
349 } 323 }
350 res = -EIO; 324 memset(zeros, 0, to_paste * UNFM_P_SIZE);
351 kfree(zeros); 325 do {
352 goto error_exit_free_blocks; 326 to_paste =
353 } 327 min_t(__u64, hole_size,
354 res = reiserfs_insert_item( th, &path, &key, &ins_ih, inode, (char *)zeros); 328 MAX_ITEM_LEN(inode->i_sb->
355 } else { 329 s_blocksize) /
356 reiserfs_panic(inode->i_sb, "green-9011: Unexpected key type %K\n", &key); 330 UNFM_P_SIZE);
331 if (is_indirect_le_ih(ih)) {
332 /* Ok, there is existing indirect item already. Need to append it */
333 /* Calculate position past inserted item */
334 make_cpu_key(&key, inode,
335 le_key_k_offset
336 (get_inode_item_key_version
337 (inode),
338 &(ih->ih_key)) +
339 op_bytes_number(ih,
340 inode->
341 i_sb->
342 s_blocksize),
343 TYPE_INDIRECT, 3);
344 res =
345 reiserfs_paste_into_item(th, &path,
346 &key,
347 inode,
348 (char *)
349 zeros,
350 UNFM_P_SIZE
351 *
352 to_paste);
353 if (res) {
354 kfree(zeros);
355 goto error_exit_free_blocks;
356 }
357 } else if (is_statdata_le_ih(ih)) {
358 /* No existing item, create it */
359 /* item head for new item */
360 struct item_head ins_ih;
361
362 /* create a key for our new item */
363 make_cpu_key(&key, inode, 1,
364 TYPE_INDIRECT, 3);
365
366 /* Create new item head for our new item */
367 make_le_item_head(&ins_ih, &key,
368 key.version, 1,
369 TYPE_INDIRECT,
370 to_paste *
371 UNFM_P_SIZE,
372 0 /* free space */ );
373
374 /* Find where such item should live in the tree */
375 res =
376 search_item(inode->i_sb, &key,
377 &path);
378 if (res != ITEM_NOT_FOUND) {
379 /* item should not exist, otherwise we have error */
380 if (res != -ENOSPC) {
381 reiserfs_warning(inode->
382 i_sb,
383 "green-9008: search_by_key (%K) returned %d",
384 &key,
385 res);
386 }
387 res = -EIO;
388 kfree(zeros);
389 goto error_exit_free_blocks;
390 }
391 res =
392 reiserfs_insert_item(th, &path,
393 &key, &ins_ih,
394 inode,
395 (char *)zeros);
396 } else {
397 reiserfs_panic(inode->i_sb,
398 "green-9011: Unexpected key type %K\n",
399 &key);
400 }
401 if (res) {
402 kfree(zeros);
403 goto error_exit_free_blocks;
404 }
405 /* Now we want to check if transaction is too full, and if it is
406 we restart it. This will also free the path. */
407 if (journal_transaction_should_end
408 (th, th->t_blocks_allocated)) {
409 res =
410 restart_transaction(th, inode,
411 &path);
412 if (res) {
413 pathrelse(&path);
414 kfree(zeros);
415 goto error_exit;
416 }
417 }
418
419 /* Well, need to recalculate path and stuff */
420 set_cpu_key_k_offset(&key,
421 cpu_key_k_offset(&key) +
422 (to_paste << inode->
423 i_blkbits));
424 res =
425 search_for_position_by_key(inode->i_sb,
426 &key, &path);
427 if (res == IO_ERROR) {
428 res = -EIO;
429 kfree(zeros);
430 goto error_exit_free_blocks;
431 }
432 bh = get_last_bh(&path);
433 ih = get_ih(&path);
434 item = get_item(&path);
435 hole_size -= to_paste;
436 } while (hole_size);
437 kfree(zeros);
357 } 438 }
358 if ( res ) { 439 }
359 kfree(zeros); 440 // Go through existing indirect items first
360 goto error_exit_free_blocks; 441 // replace all zeroes with blocknumbers from list
442 // Note that if no corresponding item was found, by previous search,
443 // it means there are no existing in-tree representation for file area
444 // we are going to overwrite, so there is nothing to scan through for holes.
445 for (curr_block = 0, itempos = path.pos_in_item;
446 curr_block < blocks_to_allocate && res == POSITION_FOUND;) {
447 retry:
448
449 if (itempos >= ih_item_len(ih) / UNFM_P_SIZE) {
450 /* We run out of data in this indirect item, let's look for another
451 one. */
452 /* First if we are already modifying current item, log it */
453 if (modifying_this_item) {
454 journal_mark_dirty(th, inode->i_sb, bh);
455 modifying_this_item = 0;
456 }
457 /* Then set the key to look for a new indirect item (offset of old
458 item is added to old item length */
459 set_cpu_key_k_offset(&key,
460 le_key_k_offset
461 (get_inode_item_key_version(inode),
462 &(ih->ih_key)) +
463 op_bytes_number(ih,
464 inode->i_sb->
465 s_blocksize));
466 /* Search ofor position of new key in the tree. */
467 res =
468 search_for_position_by_key(inode->i_sb, &key,
469 &path);
470 if (res == IO_ERROR) {
471 res = -EIO;
472 goto error_exit_free_blocks;
473 }
474 bh = get_last_bh(&path);
475 ih = get_ih(&path);
476 item = get_item(&path);
477 itempos = path.pos_in_item;
478 continue; // loop to check all kinds of conditions and so on.
361 } 479 }
362 /* Now we want to check if transaction is too full, and if it is 480 /* Ok, we have correct position in item now, so let's see if it is
363 we restart it. This will also free the path. */ 481 representing file hole (blocknumber is zero) and fill it if needed */
364 if (journal_transaction_should_end(th, th->t_blocks_allocated)) { 482 if (!item[itempos]) {
365 res = restart_transaction(th, inode, &path); 483 /* Ok, a hole. Now we need to check if we already prepared this
366 if (res) { 484 block to be journaled */
367 pathrelse (&path); 485 while (!modifying_this_item) { // loop until succeed
368 kfree(zeros); 486 /* Well, this item is not journaled yet, so we must prepare
369 goto error_exit; 487 it for journal first, before we can change it */
370 } 488 struct item_head tmp_ih; // We copy item head of found item,
371 } 489 // here to detect if fs changed under
372 490 // us while we were preparing for
373 /* Well, need to recalculate path and stuff */ 491 // journal.
374 set_cpu_key_k_offset( &key, cpu_key_k_offset(&key) + (to_paste << inode->i_blkbits)); 492 int fs_gen; // We store fs generation here to find if someone
375 res = search_for_position_by_key(inode->i_sb, &key, &path); 493 // changes fs under our feet
376 if ( res == IO_ERROR ) { 494
377 res = -EIO; 495 copy_item_head(&tmp_ih, ih); // Remember itemhead
378 kfree(zeros); 496 fs_gen = get_generation(inode->i_sb); // remember fs generation
379 goto error_exit_free_blocks; 497 reiserfs_prepare_for_journal(inode->i_sb, bh, 1); // Prepare a buffer within which indirect item is stored for changing.
498 if (fs_changed(fs_gen, inode->i_sb)
499 && item_moved(&tmp_ih, &path)) {
500 // Sigh, fs was changed under us, we need to look for new
501 // location of item we are working with
502
503 /* unmark prepaerd area as journaled and search for it's
504 new position */
505 reiserfs_restore_prepared_buffer(inode->
506 i_sb,
507 bh);
508 res =
509 search_for_position_by_key(inode->
510 i_sb,
511 &key,
512 &path);
513 if (res == IO_ERROR) {
514 res = -EIO;
515 goto error_exit_free_blocks;
516 }
517 bh = get_last_bh(&path);
518 ih = get_ih(&path);
519 item = get_item(&path);
520 itempos = path.pos_in_item;
521 goto retry;
522 }
523 modifying_this_item = 1;
524 }
525 item[itempos] = allocated_blocks[curr_block]; // Assign new block
526 curr_block++;
380 } 527 }
381 bh=get_last_bh(&path); 528 itempos++;
382 ih=get_ih(&path);
383 item = get_item(&path);
384 hole_size -= to_paste;
385 } while ( hole_size );
386 kfree(zeros);
387 } 529 }
388 } 530
389 531 if (modifying_this_item) { // We need to log last-accessed block, if it
390 // Go through existing indirect items first 532 // was modified, but not logged yet.
391 // replace all zeroes with blocknumbers from list 533 journal_mark_dirty(th, inode->i_sb, bh);
392 // Note that if no corresponding item was found, by previous search,
393 // it means there are no existing in-tree representation for file area
394 // we are going to overwrite, so there is nothing to scan through for holes.
395 for ( curr_block = 0, itempos = path.pos_in_item ; curr_block < blocks_to_allocate && res == POSITION_FOUND ; ) {
396retry:
397
398 if ( itempos >= ih_item_len(ih)/UNFM_P_SIZE ) {
399 /* We run out of data in this indirect item, let's look for another
400 one. */
401 /* First if we are already modifying current item, log it */
402 if ( modifying_this_item ) {
403 journal_mark_dirty (th, inode->i_sb, bh);
404 modifying_this_item = 0;
405 }
406 /* Then set the key to look for a new indirect item (offset of old
407 item is added to old item length */
408 set_cpu_key_k_offset( &key, le_key_k_offset( get_inode_item_key_version(inode), &(ih->ih_key)) + op_bytes_number(ih, inode->i_sb->s_blocksize));
409 /* Search ofor position of new key in the tree. */
410 res = search_for_position_by_key(inode->i_sb, &key, &path);
411 if ( res == IO_ERROR) {
412 res = -EIO;
413 goto error_exit_free_blocks;
414 }
415 bh=get_last_bh(&path);
416 ih=get_ih(&path);
417 item = get_item(&path);
418 itempos = path.pos_in_item;
419 continue; // loop to check all kinds of conditions and so on.
420 } 534 }
421 /* Ok, we have correct position in item now, so let's see if it is 535
422 representing file hole (blocknumber is zero) and fill it if needed */ 536 if (curr_block < blocks_to_allocate) {
423 if ( !item[itempos] ) { 537 // Oh, well need to append to indirect item, or to create indirect item
424 /* Ok, a hole. Now we need to check if we already prepared this 538 // if there weren't any
425 block to be journaled */ 539 if (is_indirect_le_ih(ih)) {
426 while ( !modifying_this_item ) { // loop until succeed 540 // Existing indirect item - append. First calculate key for append
427 /* Well, this item is not journaled yet, so we must prepare 541 // position. We do not need to recalculate path as it should
428 it for journal first, before we can change it */ 542 // already point to correct place.
429 struct item_head tmp_ih; // We copy item head of found item, 543 make_cpu_key(&key, inode,
430 // here to detect if fs changed under 544 le_key_k_offset(get_inode_item_key_version
431 // us while we were preparing for 545 (inode),
432 // journal. 546 &(ih->ih_key)) +
433 int fs_gen; // We store fs generation here to find if someone 547 op_bytes_number(ih,
434 // changes fs under our feet 548 inode->i_sb->s_blocksize),
435 549 TYPE_INDIRECT, 3);
436 copy_item_head (&tmp_ih, ih); // Remember itemhead 550 res =
437 fs_gen = get_generation (inode->i_sb); // remember fs generation 551 reiserfs_paste_into_item(th, &path, &key, inode,
438 reiserfs_prepare_for_journal(inode->i_sb, bh, 1); // Prepare a buffer within which indirect item is stored for changing. 552 (char *)(allocated_blocks +
439 if (fs_changed (fs_gen, inode->i_sb) && item_moved (&tmp_ih, &path)) { 553 curr_block),
440 // Sigh, fs was changed under us, we need to look for new 554 UNFM_P_SIZE *
441 // location of item we are working with 555 (blocks_to_allocate -
442 556 curr_block));
443 /* unmark prepaerd area as journaled and search for it's 557 if (res) {
444 new position */ 558 goto error_exit_free_blocks;
445 reiserfs_restore_prepared_buffer(inode->i_sb, bh); 559 }
446 res = search_for_position_by_key(inode->i_sb, &key, &path); 560 } else if (is_statdata_le_ih(ih)) {
447 if ( res == IO_ERROR) { 561 // Last found item was statdata. That means we need to create indirect item.
448 res = -EIO; 562 struct item_head ins_ih; /* itemhead for new item */
449 goto error_exit_free_blocks; 563
450 } 564 /* create a key for our new item */
451 bh=get_last_bh(&path); 565 make_cpu_key(&key, inode, 1, TYPE_INDIRECT, 3); // Position one,
452 ih=get_ih(&path); 566 // because that's
453 item = get_item(&path); 567 // where first
454 itempos = path.pos_in_item; 568 // indirect item
455 goto retry; 569 // begins
570 /* Create new item head for our new item */
571 make_le_item_head(&ins_ih, &key, key.version, 1,
572 TYPE_INDIRECT,
573 (blocks_to_allocate -
574 curr_block) * UNFM_P_SIZE,
575 0 /* free space */ );
576 /* Find where such item should live in the tree */
577 res = search_item(inode->i_sb, &key, &path);
578 if (res != ITEM_NOT_FOUND) {
579 /* Well, if we have found such item already, or some error
580 occured, we need to warn user and return error */
581 if (res != -ENOSPC) {
582 reiserfs_warning(inode->i_sb,
583 "green-9009: search_by_key (%K) "
584 "returned %d", &key,
585 res);
586 }
587 res = -EIO;
588 goto error_exit_free_blocks;
589 }
590 /* Insert item into the tree with the data as its body */
591 res =
592 reiserfs_insert_item(th, &path, &key, &ins_ih,
593 inode,
594 (char *)(allocated_blocks +
595 curr_block));
596 } else {
597 reiserfs_panic(inode->i_sb,
598 "green-9010: unexpected item type for key %K\n",
599 &key);
456 } 600 }
457 modifying_this_item = 1;
458 }
459 item[itempos] = allocated_blocks[curr_block]; // Assign new block
460 curr_block++;
461 } 601 }
462 itempos++; 602 // the caller is responsible for closing the transaction
463 } 603 // unless we return an error, they are also responsible for logging
464 604 // the inode.
465 if ( modifying_this_item ) { // We need to log last-accessed block, if it 605 //
466 // was modified, but not logged yet. 606 pathrelse(&path);
467 journal_mark_dirty (th, inode->i_sb, bh); 607 /*
468 } 608 * cleanup prellocation from previous writes
469 609 * if this is a partial block write
470 if ( curr_block < blocks_to_allocate ) { 610 */
471 // Oh, well need to append to indirect item, or to create indirect item 611 if (write_bytes & (inode->i_sb->s_blocksize - 1))
472 // if there weren't any 612 reiserfs_discard_prealloc(th, inode);
473 if ( is_indirect_le_ih(ih) ) { 613 reiserfs_write_unlock(inode->i_sb);
474 // Existing indirect item - append. First calculate key for append 614
475 // position. We do not need to recalculate path as it should 615 // go through all the pages/buffers and map the buffers to newly allocated
476 // already point to correct place. 616 // blocks (so that system knows where to write these pages later).
477 make_cpu_key( &key, inode, le_key_k_offset( get_inode_item_key_version(inode), &(ih->ih_key)) + op_bytes_number(ih, inode->i_sb->s_blocksize), TYPE_INDIRECT, 3); 617 curr_block = 0;
478 res = reiserfs_paste_into_item( th, &path, &key, inode, (char *)(allocated_blocks+curr_block), UNFM_P_SIZE*(blocks_to_allocate-curr_block)); 618 for (i = 0; i < num_pages; i++) {
479 if ( res ) { 619 struct page *page = prepared_pages[i]; //current page
480 goto error_exit_free_blocks; 620 struct buffer_head *head = page_buffers(page); // first buffer for a page
481 } 621 int block_start, block_end; // in-page offsets for buffers.
482 } else if (is_statdata_le_ih(ih) ) { 622
483 // Last found item was statdata. That means we need to create indirect item. 623 if (!page_buffers(page))
484 struct item_head ins_ih; /* itemhead for new item */ 624 reiserfs_panic(inode->i_sb,
485 625 "green-9005: No buffers for prepared page???");
486 /* create a key for our new item */ 626
487 make_cpu_key( &key, inode, 1, TYPE_INDIRECT, 3); // Position one, 627 /* For each buffer in page */
488 // because that's 628 for (bh = head, block_start = 0; bh != head || !block_start;
489 // where first 629 block_start = block_end, bh = bh->b_this_page) {
490 // indirect item 630 if (!bh)
491 // begins 631 reiserfs_panic(inode->i_sb,
492 /* Create new item head for our new item */ 632 "green-9006: Allocated but absent buffer for a page?");
493 make_le_item_head (&ins_ih, &key, key.version, 1, TYPE_INDIRECT, 633 block_end = block_start + inode->i_sb->s_blocksize;
494 (blocks_to_allocate-curr_block)*UNFM_P_SIZE, 634 if (i == 0 && block_end <= from)
495 0 /* free space */); 635 /* if this buffer is before requested data to map, skip it */
496 /* Find where such item should live in the tree */ 636 continue;
497 res = search_item (inode->i_sb, &key, &path); 637 if (i == num_pages - 1 && block_start >= to)
498 if ( res != ITEM_NOT_FOUND ) { 638 /* If this buffer is after requested data to map, abort
499 /* Well, if we have found such item already, or some error 639 processing of current page */
500 occured, we need to warn user and return error */ 640 break;
501 if ( res != -ENOSPC ) { 641
502 reiserfs_warning (inode->i_sb, 642 if (!buffer_mapped(bh)) { // Ok, unmapped buffer, need to map it
503 "green-9009: search_by_key (%K) " 643 map_bh(bh, inode->i_sb,
504 "returned %d", &key, res); 644 le32_to_cpu(allocated_blocks
645 [curr_block]));
646 curr_block++;
647 set_buffer_new(bh);
648 }
505 } 649 }
506 res = -EIO;
507 goto error_exit_free_blocks;
508 }
509 /* Insert item into the tree with the data as its body */
510 res = reiserfs_insert_item( th, &path, &key, &ins_ih, inode, (char *)(allocated_blocks+curr_block));
511 } else {
512 reiserfs_panic(inode->i_sb, "green-9010: unexpected item type for key %K\n",&key);
513 }
514 }
515
516 // the caller is responsible for closing the transaction
517 // unless we return an error, they are also responsible for logging
518 // the inode.
519 //
520 pathrelse(&path);
521 /*
522 * cleanup prellocation from previous writes
523 * if this is a partial block write
524 */
525 if (write_bytes & (inode->i_sb->s_blocksize -1))
526 reiserfs_discard_prealloc(th, inode);
527 reiserfs_write_unlock(inode->i_sb);
528
529 // go through all the pages/buffers and map the buffers to newly allocated
530 // blocks (so that system knows where to write these pages later).
531 curr_block = 0;
532 for ( i = 0; i < num_pages ; i++ ) {
533 struct page *page=prepared_pages[i]; //current page
534 struct buffer_head *head = page_buffers(page);// first buffer for a page
535 int block_start, block_end; // in-page offsets for buffers.
536
537 if (!page_buffers(page))
538 reiserfs_panic(inode->i_sb, "green-9005: No buffers for prepared page???");
539
540 /* For each buffer in page */
541 for(bh = head, block_start = 0; bh != head || !block_start;
542 block_start=block_end, bh = bh->b_this_page) {
543 if (!bh)
544 reiserfs_panic(inode->i_sb, "green-9006: Allocated but absent buffer for a page?");
545 block_end = block_start+inode->i_sb->s_blocksize;
546 if (i == 0 && block_end <= from )
547 /* if this buffer is before requested data to map, skip it */
548 continue;
549 if (i == num_pages - 1 && block_start >= to)
550 /* If this buffer is after requested data to map, abort
551 processing of current page */
552 break;
553
554 if ( !buffer_mapped(bh) ) { // Ok, unmapped buffer, need to map it
555 map_bh( bh, inode->i_sb, le32_to_cpu(allocated_blocks[curr_block]));
556 curr_block++;
557 set_buffer_new(bh);
558 }
559 } 650 }
560 }
561 651
562 RFALSE( curr_block > blocks_to_allocate, "green-9007: Used too many blocks? weird"); 652 RFALSE(curr_block > blocks_to_allocate,
653 "green-9007: Used too many blocks? weird");
563 654
564 kfree(allocated_blocks); 655 kfree(allocated_blocks);
565 return 0; 656 return 0;
566 657
567// Need to deal with transaction here. 658// Need to deal with transaction here.
568error_exit_free_blocks: 659 error_exit_free_blocks:
569 pathrelse(&path); 660 pathrelse(&path);
570 // free blocks 661 // free blocks
571 for( i = 0; i < blocks_to_allocate; i++ ) 662 for (i = 0; i < blocks_to_allocate; i++)
572 reiserfs_free_block(th, inode, le32_to_cpu(allocated_blocks[i]), 1); 663 reiserfs_free_block(th, inode, le32_to_cpu(allocated_blocks[i]),
573 664 1);
574error_exit: 665
575 if (th->t_trans_id) { 666 error_exit:
576 int err; 667 if (th->t_trans_id) {
577 // update any changes we made to blk count 668 int err;
578 reiserfs_update_sd(th, inode); 669 // update any changes we made to blk count
579 err = journal_end(th, inode->i_sb, JOURNAL_PER_BALANCE_CNT * 3 + 1 + 2 * REISERFS_QUOTA_TRANS_BLOCKS(inode->i_sb)); 670 reiserfs_update_sd(th, inode);
580 if (err) 671 err =
581 res = err; 672 journal_end(th, inode->i_sb,
582 } 673 JOURNAL_PER_BALANCE_CNT * 3 + 1 +
583 reiserfs_write_unlock(inode->i_sb); 674 2 * REISERFS_QUOTA_TRANS_BLOCKS(inode->i_sb));
584 kfree(allocated_blocks); 675 if (err)
585 676 res = err;
586 return res; 677 }
678 reiserfs_write_unlock(inode->i_sb);
679 kfree(allocated_blocks);
680
681 return res;
587} 682}
588 683
589/* Unlock pages prepared by reiserfs_prepare_file_region_for_write */ 684/* Unlock pages prepared by reiserfs_prepare_file_region_for_write */
590static void reiserfs_unprepare_pages(struct page **prepared_pages, /* list of locked pages */ 685static void reiserfs_unprepare_pages(struct page **prepared_pages, /* list of locked pages */
591 size_t num_pages /* amount of pages */) { 686 size_t num_pages /* amount of pages */ )
592 int i; // loop counter 687{
688 int i; // loop counter
593 689
594 for (i=0; i < num_pages ; i++) { 690 for (i = 0; i < num_pages; i++) {
595 struct page *page = prepared_pages[i]; 691 struct page *page = prepared_pages[i];
596 692
597 try_to_free_buffers(page); 693 try_to_free_buffers(page);
598 unlock_page(page); 694 unlock_page(page);
599 page_cache_release(page); 695 page_cache_release(page);
600 } 696 }
601} 697}
602 698
603/* This function will copy data from userspace to specified pages within 699/* This function will copy data from userspace to specified pages within
604 supplied byte range */ 700 supplied byte range */
605static int reiserfs_copy_from_user_to_file_region( 701static int reiserfs_copy_from_user_to_file_region(loff_t pos, /* In-file position */
606 loff_t pos, /* In-file position */ 702 int num_pages, /* Number of pages affected */
607 int num_pages, /* Number of pages affected */ 703 int write_bytes, /* Amount of bytes to write */
608 int write_bytes, /* Amount of bytes to write */ 704 struct page **prepared_pages, /* pointer to
609 struct page **prepared_pages, /* pointer to 705 array to
610 array to 706 prepared pages
611 prepared pages 707 */
612 */ 708 const char __user * buf /* Pointer to user-supplied
613 const char __user *buf /* Pointer to user-supplied 709 data */
614 data*/ 710 )
615 )
616{ 711{
617 long page_fault=0; // status of copy_from_user. 712 long page_fault = 0; // status of copy_from_user.
618 int i; // loop counter. 713 int i; // loop counter.
619 int offset; // offset in page 714 int offset; // offset in page
620 715
621 for ( i = 0, offset = (pos & (PAGE_CACHE_SIZE-1)); i < num_pages ; i++,offset=0) { 716 for (i = 0, offset = (pos & (PAGE_CACHE_SIZE - 1)); i < num_pages;
622 size_t count = min_t(size_t,PAGE_CACHE_SIZE-offset,write_bytes); // How much of bytes to write to this page 717 i++, offset = 0) {
623 struct page *page=prepared_pages[i]; // Current page we process. 718 size_t count = min_t(size_t, PAGE_CACHE_SIZE - offset, write_bytes); // How much of bytes to write to this page
624 719 struct page *page = prepared_pages[i]; // Current page we process.
625 fault_in_pages_readable( buf, count); 720
626 721 fault_in_pages_readable(buf, count);
627 /* Copy data from userspace to the current page */ 722
628 kmap(page); 723 /* Copy data from userspace to the current page */
629 page_fault = __copy_from_user(page_address(page)+offset, buf, count); // Copy the data. 724 kmap(page);
630 /* Flush processor's dcache for this page */ 725 page_fault = __copy_from_user(page_address(page) + offset, buf, count); // Copy the data.
631 flush_dcache_page(page); 726 /* Flush processor's dcache for this page */
632 kunmap(page); 727 flush_dcache_page(page);
633 buf+=count; 728 kunmap(page);
634 write_bytes-=count; 729 buf += count;
635 730 write_bytes -= count;
636 if (page_fault) 731
637 break; // Was there a fault? abort. 732 if (page_fault)
638 } 733 break; // Was there a fault? abort.
639 734 }
640 return page_fault?-EFAULT:0; 735
736 return page_fault ? -EFAULT : 0;
641} 737}
642 738
643/* taken fs/buffer.c:__block_commit_write */ 739/* taken fs/buffer.c:__block_commit_write */
644int reiserfs_commit_page(struct inode *inode, struct page *page, 740int reiserfs_commit_page(struct inode *inode, struct page *page,
645 unsigned from, unsigned to) 741 unsigned from, unsigned to)
646{ 742{
647 unsigned block_start, block_end; 743 unsigned block_start, block_end;
648 int partial = 0; 744 int partial = 0;
649 unsigned blocksize; 745 unsigned blocksize;
650 struct buffer_head *bh, *head; 746 struct buffer_head *bh, *head;
651 unsigned long i_size_index = inode->i_size >> PAGE_CACHE_SHIFT; 747 unsigned long i_size_index = inode->i_size >> PAGE_CACHE_SHIFT;
652 int new; 748 int new;
653 int logit = reiserfs_file_data_log(inode); 749 int logit = reiserfs_file_data_log(inode);
654 struct super_block *s = inode->i_sb; 750 struct super_block *s = inode->i_sb;
655 int bh_per_page = PAGE_CACHE_SIZE / s->s_blocksize; 751 int bh_per_page = PAGE_CACHE_SIZE / s->s_blocksize;
656 struct reiserfs_transaction_handle th; 752 struct reiserfs_transaction_handle th;
657 int ret = 0; 753 int ret = 0;
658 754
659 th.t_trans_id = 0; 755 th.t_trans_id = 0;
660 blocksize = 1 << inode->i_blkbits; 756 blocksize = 1 << inode->i_blkbits;
661 757
662 if (logit) { 758 if (logit) {
663 reiserfs_write_lock(s); 759 reiserfs_write_lock(s);
664 ret = journal_begin(&th, s, bh_per_page + 1); 760 ret = journal_begin(&th, s, bh_per_page + 1);
665 if (ret) 761 if (ret)
666 goto drop_write_lock; 762 goto drop_write_lock;
667 reiserfs_update_inode_transaction(inode); 763 reiserfs_update_inode_transaction(inode);
668 } 764 }
669 for(bh = head = page_buffers(page), block_start = 0; 765 for (bh = head = page_buffers(page), block_start = 0;
670 bh != head || !block_start; 766 bh != head || !block_start;
671 block_start=block_end, bh = bh->b_this_page) 767 block_start = block_end, bh = bh->b_this_page) {
672 { 768
673 769 new = buffer_new(bh);
674 new = buffer_new(bh); 770 clear_buffer_new(bh);
675 clear_buffer_new(bh); 771 block_end = block_start + blocksize;
676 block_end = block_start + blocksize; 772 if (block_end <= from || block_start >= to) {
677 if (block_end <= from || block_start >= to) { 773 if (!buffer_uptodate(bh))
678 if (!buffer_uptodate(bh)) 774 partial = 1;
679 partial = 1; 775 } else {
680 } else { 776 set_buffer_uptodate(bh);
681 set_buffer_uptodate(bh); 777 if (logit) {
682 if (logit) { 778 reiserfs_prepare_for_journal(s, bh, 1);
683 reiserfs_prepare_for_journal(s, bh, 1); 779 journal_mark_dirty(&th, s, bh);
684 journal_mark_dirty(&th, s, bh); 780 } else if (!buffer_dirty(bh)) {
685 } else if (!buffer_dirty(bh)) { 781 mark_buffer_dirty(bh);
686 mark_buffer_dirty(bh); 782 /* do data=ordered on any page past the end
687 /* do data=ordered on any page past the end 783 * of file and any buffer marked BH_New.
688 * of file and any buffer marked BH_New. 784 */
689 */ 785 if (reiserfs_data_ordered(inode->i_sb) &&
690 if (reiserfs_data_ordered(inode->i_sb) && 786 (new || page->index >= i_size_index)) {
691 (new || page->index >= i_size_index)) { 787 reiserfs_add_ordered_list(inode, bh);
692 reiserfs_add_ordered_list(inode, bh); 788 }
693 } 789 }
694 } 790 }
695 } 791 }
696 } 792 if (logit) {
697 if (logit) { 793 ret = journal_end(&th, s, bh_per_page + 1);
698 ret = journal_end(&th, s, bh_per_page + 1); 794 drop_write_lock:
699drop_write_lock: 795 reiserfs_write_unlock(s);
700 reiserfs_write_unlock(s); 796 }
701 } 797 /*
702 /* 798 * If this is a partial write which happened to make all buffers
703 * If this is a partial write which happened to make all buffers 799 * uptodate then we can optimize away a bogus readpage() for
704 * uptodate then we can optimize away a bogus readpage() for 800 * the next read(). Here we 'discover' whether the page went
705 * the next read(). Here we 'discover' whether the page went 801 * uptodate as a result of this (potentially partial) write.
706 * uptodate as a result of this (potentially partial) write. 802 */
707 */ 803 if (!partial)
708 if (!partial) 804 SetPageUptodate(page);
709 SetPageUptodate(page); 805 return ret;
710 return ret;
711} 806}
712 807
713
714/* Submit pages for write. This was separated from actual file copying 808/* Submit pages for write. This was separated from actual file copying
715 because we might want to allocate block numbers in-between. 809 because we might want to allocate block numbers in-between.
716 This function assumes that caller will adjust file size to correct value. */ 810 This function assumes that caller will adjust file size to correct value. */
717static int reiserfs_submit_file_region_for_write( 811static int reiserfs_submit_file_region_for_write(struct reiserfs_transaction_handle *th, struct inode *inode, loff_t pos, /* Writing position offset */
718 struct reiserfs_transaction_handle *th, 812 size_t num_pages, /* Number of pages to write */
719 struct inode *inode, 813 size_t write_bytes, /* number of bytes to write */
720 loff_t pos, /* Writing position offset */ 814 struct page **prepared_pages /* list of pages */
721 size_t num_pages, /* Number of pages to write */ 815 )
722 size_t write_bytes, /* number of bytes to write */
723 struct page **prepared_pages /* list of pages */
724 )
725{ 816{
726 int status; // return status of block_commit_write. 817 int status; // return status of block_commit_write.
727 int retval = 0; // Return value we are going to return. 818 int retval = 0; // Return value we are going to return.
728 int i; // loop counter 819 int i; // loop counter
729 int offset; // Writing offset in page. 820 int offset; // Writing offset in page.
730 int orig_write_bytes = write_bytes; 821 int orig_write_bytes = write_bytes;
731 int sd_update = 0; 822 int sd_update = 0;
732 823
733 for ( i = 0, offset = (pos & (PAGE_CACHE_SIZE-1)); i < num_pages ; i++,offset=0) { 824 for (i = 0, offset = (pos & (PAGE_CACHE_SIZE - 1)); i < num_pages;
734 int count = min_t(int,PAGE_CACHE_SIZE-offset,write_bytes); // How much of bytes to write to this page 825 i++, offset = 0) {
735 struct page *page=prepared_pages[i]; // Current page we process. 826 int count = min_t(int, PAGE_CACHE_SIZE - offset, write_bytes); // How much of bytes to write to this page
736 827 struct page *page = prepared_pages[i]; // Current page we process.
737 status = reiserfs_commit_page(inode, page, offset, offset+count); 828
738 if ( status ) 829 status =
739 retval = status; // To not overcomplicate matters We are going to 830 reiserfs_commit_page(inode, page, offset, offset + count);
740 // submit all the pages even if there was error. 831 if (status)
741 // we only remember error status to report it on 832 retval = status; // To not overcomplicate matters We are going to
742 // exit. 833 // submit all the pages even if there was error.
743 write_bytes-=count; 834 // we only remember error status to report it on
744 } 835 // exit.
745 /* now that we've gotten all the ordered buffers marked dirty, 836 write_bytes -= count;
746 * we can safely update i_size and close any running transaction 837 }
747 */ 838 /* now that we've gotten all the ordered buffers marked dirty,
748 if ( pos + orig_write_bytes > inode->i_size) { 839 * we can safely update i_size and close any running transaction
749 inode->i_size = pos + orig_write_bytes; // Set new size 840 */
750 /* If the file have grown so much that tail packing is no 841 if (pos + orig_write_bytes > inode->i_size) {
751 * longer possible, reset "need to pack" flag */ 842 inode->i_size = pos + orig_write_bytes; // Set new size
752 if ( (have_large_tails (inode->i_sb) && 843 /* If the file have grown so much that tail packing is no
753 inode->i_size > i_block_size (inode)*4) || 844 * longer possible, reset "need to pack" flag */
754 (have_small_tails (inode->i_sb) && 845 if ((have_large_tails(inode->i_sb) &&
755 inode->i_size > i_block_size(inode)) ) 846 inode->i_size > i_block_size(inode) * 4) ||
756 REISERFS_I(inode)->i_flags &= ~i_pack_on_close_mask ; 847 (have_small_tails(inode->i_sb) &&
757 else if ( (have_large_tails (inode->i_sb) && 848 inode->i_size > i_block_size(inode)))
758 inode->i_size < i_block_size (inode)*4) || 849 REISERFS_I(inode)->i_flags &= ~i_pack_on_close_mask;
759 (have_small_tails (inode->i_sb) && 850 else if ((have_large_tails(inode->i_sb) &&
760 inode->i_size < i_block_size(inode)) ) 851 inode->i_size < i_block_size(inode) * 4) ||
761 REISERFS_I(inode)->i_flags |= i_pack_on_close_mask ; 852 (have_small_tails(inode->i_sb) &&
762 853 inode->i_size < i_block_size(inode)))
854 REISERFS_I(inode)->i_flags |= i_pack_on_close_mask;
855
856 if (th->t_trans_id) {
857 reiserfs_write_lock(inode->i_sb);
858 reiserfs_update_sd(th, inode); // And update on-disk metadata
859 reiserfs_write_unlock(inode->i_sb);
860 } else
861 inode->i_sb->s_op->dirty_inode(inode);
862
863 sd_update = 1;
864 }
763 if (th->t_trans_id) { 865 if (th->t_trans_id) {
764 reiserfs_write_lock(inode->i_sb); 866 reiserfs_write_lock(inode->i_sb);
765 reiserfs_update_sd(th, inode); // And update on-disk metadata 867 if (!sd_update)
766 reiserfs_write_unlock(inode->i_sb); 868 reiserfs_update_sd(th, inode);
767 } else 869 status = journal_end(th, th->t_super, th->t_blocks_allocated);
768 inode->i_sb->s_op->dirty_inode(inode); 870 if (status)
871 retval = status;
872 reiserfs_write_unlock(inode->i_sb);
873 }
874 th->t_trans_id = 0;
769 875
770 sd_update = 1; 876 /*
771 } 877 * we have to unlock the pages after updating i_size, otherwise
772 if (th->t_trans_id) { 878 * we race with writepage
773 reiserfs_write_lock(inode->i_sb); 879 */
774 if (!sd_update) 880 for (i = 0; i < num_pages; i++) {
775 reiserfs_update_sd(th, inode); 881 struct page *page = prepared_pages[i];
776 status = journal_end(th, th->t_super, th->t_blocks_allocated); 882 unlock_page(page);
777 if (status) 883 mark_page_accessed(page);
778 retval = status; 884 page_cache_release(page);
779 reiserfs_write_unlock(inode->i_sb); 885 }
780 } 886 return retval;
781 th->t_trans_id = 0;
782
783 /*
784 * we have to unlock the pages after updating i_size, otherwise
785 * we race with writepage
786 */
787 for ( i = 0; i < num_pages ; i++) {
788 struct page *page=prepared_pages[i];
789 unlock_page(page);
790 mark_page_accessed(page);
791 page_cache_release(page);
792 }
793 return retval;
794} 887}
795 888
796/* Look if passed writing region is going to touch file's tail 889/* Look if passed writing region is going to touch file's tail
797 (if it is present). And if it is, convert the tail to unformatted node */ 890 (if it is present). And if it is, convert the tail to unformatted node */
798static int reiserfs_check_for_tail_and_convert( struct inode *inode, /* inode to deal with */ 891static int reiserfs_check_for_tail_and_convert(struct inode *inode, /* inode to deal with */
799 loff_t pos, /* Writing position */ 892 loff_t pos, /* Writing position */
800 int write_bytes /* amount of bytes to write */ 893 int write_bytes /* amount of bytes to write */
801 ) 894 )
802{ 895{
803 INITIALIZE_PATH(path); // needed for search_for_position 896 INITIALIZE_PATH(path); // needed for search_for_position
804 struct cpu_key key; // Key that would represent last touched writing byte. 897 struct cpu_key key; // Key that would represent last touched writing byte.
805 struct item_head *ih; // item header of found block; 898 struct item_head *ih; // item header of found block;
806 int res; // Return value of various functions we call. 899 int res; // Return value of various functions we call.
807 int cont_expand_offset; // We will put offset for generic_cont_expand here 900 int cont_expand_offset; // We will put offset for generic_cont_expand here
808 // This can be int just because tails are created 901 // This can be int just because tails are created
809 // only for small files. 902 // only for small files.
810 903
811/* this embodies a dependency on a particular tail policy */ 904/* this embodies a dependency on a particular tail policy */
812 if ( inode->i_size >= inode->i_sb->s_blocksize*4 ) { 905 if (inode->i_size >= inode->i_sb->s_blocksize * 4) {
813 /* such a big files do not have tails, so we won't bother ourselves 906 /* such a big files do not have tails, so we won't bother ourselves
814 to look for tails, simply return */ 907 to look for tails, simply return */
815 return 0; 908 return 0;
816 } 909 }
817
818 reiserfs_write_lock(inode->i_sb);
819 /* find the item containing the last byte to be written, or if
820 * writing past the end of the file then the last item of the
821 * file (and then we check its type). */
822 make_cpu_key (&key, inode, pos+write_bytes+1, TYPE_ANY, 3/*key length*/);
823 res = search_for_position_by_key(inode->i_sb, &key, &path);
824 if ( res == IO_ERROR ) {
825 reiserfs_write_unlock(inode->i_sb);
826 return -EIO;
827 }
828 ih = get_ih(&path);
829 res = 0;
830 if ( is_direct_le_ih(ih) ) {
831 /* Ok, closest item is file tail (tails are stored in "direct"
832 * items), so we need to unpack it. */
833 /* To not overcomplicate matters, we just call generic_cont_expand
834 which will in turn call other stuff and finally will boil down to
835 reiserfs_get_block() that would do necessary conversion. */
836 cont_expand_offset = le_key_k_offset(get_inode_item_key_version(inode), &(ih->ih_key));
837 pathrelse(&path);
838 res = generic_cont_expand( inode, cont_expand_offset);
839 } else
840 pathrelse(&path);
841 910
842 reiserfs_write_unlock(inode->i_sb); 911 reiserfs_write_lock(inode->i_sb);
843 return res; 912 /* find the item containing the last byte to be written, or if
913 * writing past the end of the file then the last item of the
914 * file (and then we check its type). */
915 make_cpu_key(&key, inode, pos + write_bytes + 1, TYPE_ANY,
916 3 /*key length */ );
917 res = search_for_position_by_key(inode->i_sb, &key, &path);
918 if (res == IO_ERROR) {
919 reiserfs_write_unlock(inode->i_sb);
920 return -EIO;
921 }
922 ih = get_ih(&path);
923 res = 0;
924 if (is_direct_le_ih(ih)) {
925 /* Ok, closest item is file tail (tails are stored in "direct"
926 * items), so we need to unpack it. */
927 /* To not overcomplicate matters, we just call generic_cont_expand
928 which will in turn call other stuff and finally will boil down to
929 reiserfs_get_block() that would do necessary conversion. */
930 cont_expand_offset =
931 le_key_k_offset(get_inode_item_key_version(inode),
932 &(ih->ih_key));
933 pathrelse(&path);
934 res = generic_cont_expand(inode, cont_expand_offset);
935 } else
936 pathrelse(&path);
937
938 reiserfs_write_unlock(inode->i_sb);
939 return res;
844} 940}
845 941
846/* This function locks pages starting from @pos for @inode. 942/* This function locks pages starting from @pos for @inode.
@@ -851,275 +947,296 @@ static int reiserfs_check_for_tail_and_convert( struct inode *inode, /* inode to
851 append), it is zeroed, then. 947 append), it is zeroed, then.
852 Returns number of unallocated blocks that should be allocated to cover 948 Returns number of unallocated blocks that should be allocated to cover
853 new file data.*/ 949 new file data.*/
854static int reiserfs_prepare_file_region_for_write( 950static int reiserfs_prepare_file_region_for_write(struct inode *inode
855 struct inode *inode /* Inode of the file */, 951 /* Inode of the file */ ,
856 loff_t pos, /* position in the file */ 952 loff_t pos, /* position in the file */
857 size_t num_pages, /* number of pages to 953 size_t num_pages, /* number of pages to
858 prepare */ 954 prepare */
859 size_t write_bytes, /* Amount of bytes to be 955 size_t write_bytes, /* Amount of bytes to be
860 overwritten from 956 overwritten from
861 @pos */ 957 @pos */
862 struct page **prepared_pages /* pointer to array 958 struct page **prepared_pages /* pointer to array
863 where to store 959 where to store
864 prepared pages */ 960 prepared pages */
865 ) 961 )
866{ 962{
867 int res=0; // Return values of different functions we call. 963 int res = 0; // Return values of different functions we call.
868 unsigned long index = pos >> PAGE_CACHE_SHIFT; // Offset in file in pages. 964 unsigned long index = pos >> PAGE_CACHE_SHIFT; // Offset in file in pages.
869 int from = (pos & (PAGE_CACHE_SIZE - 1)); // Writing offset in first page 965 int from = (pos & (PAGE_CACHE_SIZE - 1)); // Writing offset in first page
870 int to = ((pos + write_bytes - 1) & (PAGE_CACHE_SIZE - 1)) + 1; 966 int to = ((pos + write_bytes - 1) & (PAGE_CACHE_SIZE - 1)) + 1;
871 /* offset of last modified byte in last 967 /* offset of last modified byte in last
872 page */ 968 page */
873 struct address_space *mapping = inode->i_mapping; // Pages are mapped here. 969 struct address_space *mapping = inode->i_mapping; // Pages are mapped here.
874 int i; // Simple counter 970 int i; // Simple counter
875 int blocks = 0; /* Return value (blocks that should be allocated) */ 971 int blocks = 0; /* Return value (blocks that should be allocated) */
876 struct buffer_head *bh, *head; // Current bufferhead and first bufferhead 972 struct buffer_head *bh, *head; // Current bufferhead and first bufferhead
877 // of a page. 973 // of a page.
878 unsigned block_start, block_end; // Starting and ending offsets of current 974 unsigned block_start, block_end; // Starting and ending offsets of current
879 // buffer in the page. 975 // buffer in the page.
880 struct buffer_head *wait[2], **wait_bh=wait; // Buffers for page, if 976 struct buffer_head *wait[2], **wait_bh = wait; // Buffers for page, if
881 // Page appeared to be not up 977 // Page appeared to be not up
882 // to date. Note how we have 978 // to date. Note how we have
883 // at most 2 buffers, this is 979 // at most 2 buffers, this is
884 // because we at most may 980 // because we at most may
885 // partially overwrite two 981 // partially overwrite two
886 // buffers for one page. One at // the beginning of write area 982 // buffers for one page. One at // the beginning of write area
887 // and one at the end. 983 // and one at the end.
888 // Everything inthe middle gets // overwritten totally. 984 // Everything inthe middle gets // overwritten totally.
889 985
890 struct cpu_key key; // cpu key of item that we are going to deal with 986 struct cpu_key key; // cpu key of item that we are going to deal with
891 struct item_head *ih = NULL; // pointer to item head that we are going to deal with 987 struct item_head *ih = NULL; // pointer to item head that we are going to deal with
892 struct buffer_head *itembuf=NULL; // Buffer head that contains items that we are going to deal with 988 struct buffer_head *itembuf = NULL; // Buffer head that contains items that we are going to deal with
893 INITIALIZE_PATH(path); // path to item, that we are going to deal with. 989 INITIALIZE_PATH(path); // path to item, that we are going to deal with.
894 __le32 * item=NULL; // pointer to item we are going to deal with 990 __le32 *item = NULL; // pointer to item we are going to deal with
895 int item_pos=-1; /* Position in indirect item */ 991 int item_pos = -1; /* Position in indirect item */
896 992
897 993 if (num_pages < 1) {
898 if ( num_pages < 1 ) { 994 reiserfs_warning(inode->i_sb,
899 reiserfs_warning (inode->i_sb, 995 "green-9001: reiserfs_prepare_file_region_for_write "
900 "green-9001: reiserfs_prepare_file_region_for_write " 996 "called with zero number of pages to process");
901 "called with zero number of pages to process"); 997 return -EFAULT;
902 return -EFAULT;
903 }
904
905 /* We have 2 loops for pages. In first loop we grab and lock the pages, so
906 that nobody would touch these until we release the pages. Then
907 we'd start to deal with mapping buffers to blocks. */
908 for ( i = 0; i < num_pages; i++) {
909 prepared_pages[i] = grab_cache_page(mapping, index + i); // locks the page
910 if ( !prepared_pages[i]) {
911 res = -ENOMEM;
912 goto failed_page_grabbing;
913 }
914 if (!page_has_buffers(prepared_pages[i]))
915 create_empty_buffers(prepared_pages[i], inode->i_sb->s_blocksize, 0);
916 }
917
918 /* Let's count amount of blocks for a case where all the blocks
919 overwritten are new (we will substract already allocated blocks later)*/
920 if ( num_pages > 2 )
921 /* These are full-overwritten pages so we count all the blocks in
922 these pages are counted as needed to be allocated */
923 blocks = (num_pages - 2) << (PAGE_CACHE_SHIFT - inode->i_blkbits);
924
925 /* count blocks needed for first page (possibly partially written) */
926 blocks += ((PAGE_CACHE_SIZE - from) >> inode->i_blkbits) +
927 !!(from & (inode->i_sb->s_blocksize-1)); /* roundup */
928
929 /* Now we account for last page. If last page == first page (we
930 overwrite only one page), we substract all the blocks past the
931 last writing position in a page out of already calculated number
932 of blocks */
933 blocks += ((num_pages > 1) << (PAGE_CACHE_SHIFT-inode->i_blkbits)) -
934 ((PAGE_CACHE_SIZE - to) >> inode->i_blkbits);
935 /* Note how we do not roundup here since partial blocks still
936 should be allocated */
937
938 /* Now if all the write area lies past the file end, no point in
939 maping blocks, since there is none, so we just zero out remaining
940 parts of first and last pages in write area (if needed) */
941 if ( (pos & ~((loff_t)PAGE_CACHE_SIZE - 1)) > inode->i_size ) {
942 if ( from != 0 ) {/* First page needs to be partially zeroed */
943 char *kaddr = kmap_atomic(prepared_pages[0], KM_USER0);
944 memset(kaddr, 0, from);
945 kunmap_atomic( kaddr, KM_USER0);
946 }
947 if ( to != PAGE_CACHE_SIZE ) { /* Last page needs to be partially zeroed */
948 char *kaddr = kmap_atomic(prepared_pages[num_pages-1], KM_USER0);
949 memset(kaddr+to, 0, PAGE_CACHE_SIZE - to);
950 kunmap_atomic( kaddr, KM_USER0);
951 } 998 }
952 999
953 /* Since all blocks are new - use already calculated value */ 1000 /* We have 2 loops for pages. In first loop we grab and lock the pages, so
954 return blocks; 1001 that nobody would touch these until we release the pages. Then
955 } 1002 we'd start to deal with mapping buffers to blocks. */
956 1003 for (i = 0; i < num_pages; i++) {
957 /* Well, since we write somewhere into the middle of a file, there is 1004 prepared_pages[i] = grab_cache_page(mapping, index + i); // locks the page
958 possibility we are writing over some already allocated blocks, so 1005 if (!prepared_pages[i]) {
959 let's map these blocks and substract number of such blocks out of blocks 1006 res = -ENOMEM;
960 we need to allocate (calculated above) */ 1007 goto failed_page_grabbing;
961 /* Mask write position to start on blocksize, we do it out of the
962 loop for performance reasons */
963 pos &= ~((loff_t) inode->i_sb->s_blocksize - 1);
964 /* Set cpu key to the starting position in a file (on left block boundary)*/
965 make_cpu_key (&key, inode, 1 + ((pos) & ~((loff_t) inode->i_sb->s_blocksize - 1)), TYPE_ANY, 3/*key length*/);
966
967 reiserfs_write_lock(inode->i_sb); // We need that for at least search_by_key()
968 for ( i = 0; i < num_pages ; i++ ) {
969
970 head = page_buffers(prepared_pages[i]);
971 /* For each buffer in the page */
972 for(bh = head, block_start = 0; bh != head || !block_start;
973 block_start=block_end, bh = bh->b_this_page) {
974 if (!bh)
975 reiserfs_panic(inode->i_sb, "green-9002: Allocated but absent buffer for a page?");
976 /* Find where this buffer ends */
977 block_end = block_start+inode->i_sb->s_blocksize;
978 if (i == 0 && block_end <= from )
979 /* if this buffer is before requested data to map, skip it*/
980 continue;
981
982 if (i == num_pages - 1 && block_start >= to) {
983 /* If this buffer is after requested data to map, abort
984 processing of current page */
985 break;
986 } 1008 }
1009 if (!page_has_buffers(prepared_pages[i]))
1010 create_empty_buffers(prepared_pages[i],
1011 inode->i_sb->s_blocksize, 0);
1012 }
987 1013
988 if ( buffer_mapped(bh) && bh->b_blocknr !=0 ) { 1014 /* Let's count amount of blocks for a case where all the blocks
989 /* This is optimisation for a case where buffer is mapped 1015 overwritten are new (we will substract already allocated blocks later) */
990 and have blocknumber assigned. In case significant amount 1016 if (num_pages > 2)
991 of such buffers are present, we may avoid some amount 1017 /* These are full-overwritten pages so we count all the blocks in
992 of search_by_key calls. 1018 these pages are counted as needed to be allocated */
993 Probably it would be possible to move parts of this code 1019 blocks =
994 out of BKL, but I afraid that would overcomplicate code 1020 (num_pages - 2) << (PAGE_CACHE_SHIFT - inode->i_blkbits);
995 without any noticeable benefit. 1021
996 */ 1022 /* count blocks needed for first page (possibly partially written) */
997 item_pos++; 1023 blocks += ((PAGE_CACHE_SIZE - from) >> inode->i_blkbits) + !!(from & (inode->i_sb->s_blocksize - 1)); /* roundup */
998 /* Update the key */ 1024
999 set_cpu_key_k_offset( &key, cpu_key_k_offset(&key) + inode->i_sb->s_blocksize); 1025 /* Now we account for last page. If last page == first page (we
1000 blocks--; // Decrease the amount of blocks that need to be 1026 overwrite only one page), we substract all the blocks past the
1001 // allocated 1027 last writing position in a page out of already calculated number
1002 continue; // Go to the next buffer 1028 of blocks */
1029 blocks += ((num_pages > 1) << (PAGE_CACHE_SHIFT - inode->i_blkbits)) -
1030 ((PAGE_CACHE_SIZE - to) >> inode->i_blkbits);
1031 /* Note how we do not roundup here since partial blocks still
1032 should be allocated */
1033
1034 /* Now if all the write area lies past the file end, no point in
1035 maping blocks, since there is none, so we just zero out remaining
1036 parts of first and last pages in write area (if needed) */
1037 if ((pos & ~((loff_t) PAGE_CACHE_SIZE - 1)) > inode->i_size) {
1038 if (from != 0) { /* First page needs to be partially zeroed */
1039 char *kaddr = kmap_atomic(prepared_pages[0], KM_USER0);
1040 memset(kaddr, 0, from);
1041 kunmap_atomic(kaddr, KM_USER0);
1042 }
1043 if (to != PAGE_CACHE_SIZE) { /* Last page needs to be partially zeroed */
1044 char *kaddr =
1045 kmap_atomic(prepared_pages[num_pages - 1],
1046 KM_USER0);
1047 memset(kaddr + to, 0, PAGE_CACHE_SIZE - to);
1048 kunmap_atomic(kaddr, KM_USER0);
1003 } 1049 }
1004 1050
1005 if ( !itembuf || /* if first iteration */ 1051 /* Since all blocks are new - use already calculated value */
1006 item_pos >= ih_item_len(ih)/UNFM_P_SIZE) 1052 return blocks;
1007 { /* or if we progressed past the 1053 }
1008 current unformatted_item */ 1054
1009 /* Try to find next item */ 1055 /* Well, since we write somewhere into the middle of a file, there is
1010 res = search_for_position_by_key(inode->i_sb, &key, &path); 1056 possibility we are writing over some already allocated blocks, so
1011 /* Abort if no more items */ 1057 let's map these blocks and substract number of such blocks out of blocks
1012 if ( res != POSITION_FOUND ) { 1058 we need to allocate (calculated above) */
1013 /* make sure later loops don't use this item */ 1059 /* Mask write position to start on blocksize, we do it out of the
1014 itembuf = NULL; 1060 loop for performance reasons */
1015 item = NULL; 1061 pos &= ~((loff_t) inode->i_sb->s_blocksize - 1);
1016 break; 1062 /* Set cpu key to the starting position in a file (on left block boundary) */
1063 make_cpu_key(&key, inode,
1064 1 + ((pos) & ~((loff_t) inode->i_sb->s_blocksize - 1)),
1065 TYPE_ANY, 3 /*key length */ );
1066
1067 reiserfs_write_lock(inode->i_sb); // We need that for at least search_by_key()
1068 for (i = 0; i < num_pages; i++) {
1069
1070 head = page_buffers(prepared_pages[i]);
1071 /* For each buffer in the page */
1072 for (bh = head, block_start = 0; bh != head || !block_start;
1073 block_start = block_end, bh = bh->b_this_page) {
1074 if (!bh)
1075 reiserfs_panic(inode->i_sb,
1076 "green-9002: Allocated but absent buffer for a page?");
1077 /* Find where this buffer ends */
1078 block_end = block_start + inode->i_sb->s_blocksize;
1079 if (i == 0 && block_end <= from)
1080 /* if this buffer is before requested data to map, skip it */
1081 continue;
1082
1083 if (i == num_pages - 1 && block_start >= to) {
1084 /* If this buffer is after requested data to map, abort
1085 processing of current page */
1086 break;
1017 } 1087 }
1018 1088
1019 /* Update information about current indirect item */ 1089 if (buffer_mapped(bh) && bh->b_blocknr != 0) {
1020 itembuf = get_last_bh( &path ); 1090 /* This is optimisation for a case where buffer is mapped
1021 ih = get_ih( &path ); 1091 and have blocknumber assigned. In case significant amount
1022 item = get_item( &path ); 1092 of such buffers are present, we may avoid some amount
1023 item_pos = path.pos_in_item; 1093 of search_by_key calls.
1094 Probably it would be possible to move parts of this code
1095 out of BKL, but I afraid that would overcomplicate code
1096 without any noticeable benefit.
1097 */
1098 item_pos++;
1099 /* Update the key */
1100 set_cpu_key_k_offset(&key,
1101 cpu_key_k_offset(&key) +
1102 inode->i_sb->s_blocksize);
1103 blocks--; // Decrease the amount of blocks that need to be
1104 // allocated
1105 continue; // Go to the next buffer
1106 }
1024 1107
1025 RFALSE( !is_indirect_le_ih (ih), "green-9003: indirect item expected"); 1108 if (!itembuf || /* if first iteration */
1026 } 1109 item_pos >= ih_item_len(ih) / UNFM_P_SIZE) { /* or if we progressed past the
1110 current unformatted_item */
1111 /* Try to find next item */
1112 res =
1113 search_for_position_by_key(inode->i_sb,
1114 &key, &path);
1115 /* Abort if no more items */
1116 if (res != POSITION_FOUND) {
1117 /* make sure later loops don't use this item */
1118 itembuf = NULL;
1119 item = NULL;
1120 break;
1121 }
1122
1123 /* Update information about current indirect item */
1124 itembuf = get_last_bh(&path);
1125 ih = get_ih(&path);
1126 item = get_item(&path);
1127 item_pos = path.pos_in_item;
1128
1129 RFALSE(!is_indirect_le_ih(ih),
1130 "green-9003: indirect item expected");
1131 }
1027 1132
1028 /* See if there is some block associated with the file 1133 /* See if there is some block associated with the file
1029 at that position, map the buffer to this block */ 1134 at that position, map the buffer to this block */
1030 if ( get_block_num(item,item_pos) ) { 1135 if (get_block_num(item, item_pos)) {
1031 map_bh(bh, inode->i_sb, get_block_num(item,item_pos)); 1136 map_bh(bh, inode->i_sb,
1032 blocks--; // Decrease the amount of blocks that need to be 1137 get_block_num(item, item_pos));
1033 // allocated 1138 blocks--; // Decrease the amount of blocks that need to be
1139 // allocated
1140 }
1141 item_pos++;
1142 /* Update the key */
1143 set_cpu_key_k_offset(&key,
1144 cpu_key_k_offset(&key) +
1145 inode->i_sb->s_blocksize);
1034 } 1146 }
1035 item_pos++;
1036 /* Update the key */
1037 set_cpu_key_k_offset( &key, cpu_key_k_offset(&key) + inode->i_sb->s_blocksize);
1038 } 1147 }
1039 } 1148 pathrelse(&path); // Free the path
1040 pathrelse(&path); // Free the path 1149 reiserfs_write_unlock(inode->i_sb);
1041 reiserfs_write_unlock(inode->i_sb);
1042 1150
1043 /* Now zero out unmappend buffers for the first and last pages of 1151 /* Now zero out unmappend buffers for the first and last pages of
1044 write area or issue read requests if page is mapped. */ 1152 write area or issue read requests if page is mapped. */
1045 /* First page, see if it is not uptodate */ 1153 /* First page, see if it is not uptodate */
1046 if ( !PageUptodate(prepared_pages[0]) ) { 1154 if (!PageUptodate(prepared_pages[0])) {
1047 head = page_buffers(prepared_pages[0]); 1155 head = page_buffers(prepared_pages[0]);
1048 1156
1049 /* For each buffer in page */ 1157 /* For each buffer in page */
1050 for(bh = head, block_start = 0; bh != head || !block_start; 1158 for (bh = head, block_start = 0; bh != head || !block_start;
1051 block_start=block_end, bh = bh->b_this_page) { 1159 block_start = block_end, bh = bh->b_this_page) {
1052 1160
1053 if (!bh) 1161 if (!bh)
1054 reiserfs_panic(inode->i_sb, "green-9002: Allocated but absent buffer for a page?"); 1162 reiserfs_panic(inode->i_sb,
1055 /* Find where this buffer ends */ 1163 "green-9002: Allocated but absent buffer for a page?");
1056 block_end = block_start+inode->i_sb->s_blocksize; 1164 /* Find where this buffer ends */
1057 if ( block_end <= from ) 1165 block_end = block_start + inode->i_sb->s_blocksize;
1058 /* if this buffer is before requested data to map, skip it*/ 1166 if (block_end <= from)
1059 continue; 1167 /* if this buffer is before requested data to map, skip it */
1060 if ( block_start < from ) { /* Aha, our partial buffer */ 1168 continue;
1061 if ( buffer_mapped(bh) ) { /* If it is mapped, we need to 1169 if (block_start < from) { /* Aha, our partial buffer */
1062 issue READ request for it to 1170 if (buffer_mapped(bh)) { /* If it is mapped, we need to
1063 not loose data */ 1171 issue READ request for it to
1064 ll_rw_block(READ, 1, &bh); 1172 not loose data */
1065 *wait_bh++=bh; 1173 ll_rw_block(READ, 1, &bh);
1066 } else { /* Not mapped, zero it */ 1174 *wait_bh++ = bh;
1067 char *kaddr = kmap_atomic(prepared_pages[0], KM_USER0); 1175 } else { /* Not mapped, zero it */
1068 memset(kaddr+block_start, 0, from-block_start); 1176 char *kaddr =
1069 kunmap_atomic( kaddr, KM_USER0); 1177 kmap_atomic(prepared_pages[0],
1070 set_buffer_uptodate(bh); 1178 KM_USER0);
1071 } 1179 memset(kaddr + block_start, 0,
1180 from - block_start);
1181 kunmap_atomic(kaddr, KM_USER0);
1182 set_buffer_uptodate(bh);
1183 }
1184 }
1072 } 1185 }
1073 }
1074 } 1186 }
1075 1187
1076 /* Last page, see if it is not uptodate, or if the last page is past the end of the file. */ 1188 /* Last page, see if it is not uptodate, or if the last page is past the end of the file. */
1077 if ( !PageUptodate(prepared_pages[num_pages-1]) || 1189 if (!PageUptodate(prepared_pages[num_pages - 1]) ||
1078 ((pos+write_bytes)>>PAGE_CACHE_SHIFT) > (inode->i_size>>PAGE_CACHE_SHIFT) ) { 1190 ((pos + write_bytes) >> PAGE_CACHE_SHIFT) >
1079 head = page_buffers(prepared_pages[num_pages-1]); 1191 (inode->i_size >> PAGE_CACHE_SHIFT)) {
1080 1192 head = page_buffers(prepared_pages[num_pages - 1]);
1081 /* for each buffer in page */ 1193
1082 for(bh = head, block_start = 0; bh != head || !block_start; 1194 /* for each buffer in page */
1083 block_start=block_end, bh = bh->b_this_page) { 1195 for (bh = head, block_start = 0; bh != head || !block_start;
1084 1196 block_start = block_end, bh = bh->b_this_page) {
1085 if (!bh) 1197
1086 reiserfs_panic(inode->i_sb, "green-9002: Allocated but absent buffer for a page?"); 1198 if (!bh)
1087 /* Find where this buffer ends */ 1199 reiserfs_panic(inode->i_sb,
1088 block_end = block_start+inode->i_sb->s_blocksize; 1200 "green-9002: Allocated but absent buffer for a page?");
1089 if ( block_start >= to ) 1201 /* Find where this buffer ends */
1090 /* if this buffer is after requested data to map, skip it*/ 1202 block_end = block_start + inode->i_sb->s_blocksize;
1091 break; 1203 if (block_start >= to)
1092 if ( block_end > to ) { /* Aha, our partial buffer */ 1204 /* if this buffer is after requested data to map, skip it */
1093 if ( buffer_mapped(bh) ) { /* If it is mapped, we need to 1205 break;
1094 issue READ request for it to 1206 if (block_end > to) { /* Aha, our partial buffer */
1095 not loose data */ 1207 if (buffer_mapped(bh)) { /* If it is mapped, we need to
1096 ll_rw_block(READ, 1, &bh); 1208 issue READ request for it to
1097 *wait_bh++=bh; 1209 not loose data */
1098 } else { /* Not mapped, zero it */ 1210 ll_rw_block(READ, 1, &bh);
1099 char *kaddr = kmap_atomic(prepared_pages[num_pages-1], KM_USER0); 1211 *wait_bh++ = bh;
1100 memset(kaddr+to, 0, block_end-to); 1212 } else { /* Not mapped, zero it */
1101 kunmap_atomic( kaddr, KM_USER0); 1213 char *kaddr =
1102 set_buffer_uptodate(bh); 1214 kmap_atomic(prepared_pages
1103 } 1215 [num_pages - 1],
1216 KM_USER0);
1217 memset(kaddr + to, 0, block_end - to);
1218 kunmap_atomic(kaddr, KM_USER0);
1219 set_buffer_uptodate(bh);
1220 }
1221 }
1104 } 1222 }
1105 }
1106 } 1223 }
1107 1224
1108 /* Wait for read requests we made to happen, if necessary */ 1225 /* Wait for read requests we made to happen, if necessary */
1109 while(wait_bh > wait) { 1226 while (wait_bh > wait) {
1110 wait_on_buffer(*--wait_bh); 1227 wait_on_buffer(*--wait_bh);
1111 if (!buffer_uptodate(*wait_bh)) { 1228 if (!buffer_uptodate(*wait_bh)) {
1112 res = -EIO; 1229 res = -EIO;
1113 goto failed_read; 1230 goto failed_read;
1231 }
1114 } 1232 }
1115 } 1233
1116 1234 return blocks;
1117 return blocks; 1235 failed_page_grabbing:
1118failed_page_grabbing: 1236 num_pages = i;
1119 num_pages = i; 1237 failed_read:
1120failed_read: 1238 reiserfs_unprepare_pages(prepared_pages, num_pages);
1121 reiserfs_unprepare_pages(prepared_pages, num_pages); 1239 return res;
1122 return res;
1123} 1240}
1124 1241
1125/* Write @count bytes at position @ppos in a file indicated by @file 1242/* Write @count bytes at position @ppos in a file indicated by @file
@@ -1148,262 +1265,305 @@ failed_read:
1148 Future Features: providing search_by_key with hints. 1265 Future Features: providing search_by_key with hints.
1149 1266
1150*/ 1267*/
1151static ssize_t reiserfs_file_write( struct file *file, /* the file we are going to write into */ 1268static ssize_t reiserfs_file_write(struct file *file, /* the file we are going to write into */
1152 const char __user *buf, /* pointer to user supplied data 1269 const char __user * buf, /* pointer to user supplied data
1153(in userspace) */ 1270 (in userspace) */
1154 size_t count, /* amount of bytes to write */ 1271 size_t count, /* amount of bytes to write */
1155 loff_t *ppos /* pointer to position in file that we start writing at. Should be updated to 1272 loff_t * ppos /* pointer to position in file that we start writing at. Should be updated to
1156 * new current position before returning. */ ) 1273 * new current position before returning. */
1274 )
1157{ 1275{
1158 size_t already_written = 0; // Number of bytes already written to the file. 1276 size_t already_written = 0; // Number of bytes already written to the file.
1159 loff_t pos; // Current position in the file. 1277 loff_t pos; // Current position in the file.
1160 ssize_t res; // return value of various functions that we call. 1278 ssize_t res; // return value of various functions that we call.
1161 int err = 0; 1279 int err = 0;
1162 struct inode *inode = file->f_dentry->d_inode; // Inode of the file that we are writing to. 1280 struct inode *inode = file->f_dentry->d_inode; // Inode of the file that we are writing to.
1163 /* To simplify coding at this time, we store 1281 /* To simplify coding at this time, we store
1164 locked pages in array for now */ 1282 locked pages in array for now */
1165 struct page * prepared_pages[REISERFS_WRITE_PAGES_AT_A_TIME]; 1283 struct page *prepared_pages[REISERFS_WRITE_PAGES_AT_A_TIME];
1166 struct reiserfs_transaction_handle th; 1284 struct reiserfs_transaction_handle th;
1167 th.t_trans_id = 0; 1285 th.t_trans_id = 0;
1168 1286
1169 if ( file->f_flags & O_DIRECT) { // Direct IO needs treatment 1287 if (file->f_flags & O_DIRECT) { // Direct IO needs treatment
1170 ssize_t result, after_file_end = 0; 1288 ssize_t result, after_file_end = 0;
1171 if ( (*ppos + count >= inode->i_size) || (file->f_flags & O_APPEND) ) { 1289 if ((*ppos + count >= inode->i_size)
1172 /* If we are appending a file, we need to put this savelink in here. 1290 || (file->f_flags & O_APPEND)) {
1173 If we will crash while doing direct io, finish_unfinished will 1291 /* If we are appending a file, we need to put this savelink in here.
1174 cut the garbage from the file end. */ 1292 If we will crash while doing direct io, finish_unfinished will
1175 reiserfs_write_lock(inode->i_sb); 1293 cut the garbage from the file end. */
1176 err = journal_begin(&th, inode->i_sb, JOURNAL_PER_BALANCE_CNT ); 1294 reiserfs_write_lock(inode->i_sb);
1177 if (err) { 1295 err =
1178 reiserfs_write_unlock (inode->i_sb); 1296 journal_begin(&th, inode->i_sb,
1179 return err; 1297 JOURNAL_PER_BALANCE_CNT);
1180 } 1298 if (err) {
1181 reiserfs_update_inode_transaction(inode); 1299 reiserfs_write_unlock(inode->i_sb);
1182 add_save_link (&th, inode, 1 /* Truncate */); 1300 return err;
1183 after_file_end = 1; 1301 }
1184 err = journal_end(&th, inode->i_sb, JOURNAL_PER_BALANCE_CNT ); 1302 reiserfs_update_inode_transaction(inode);
1185 reiserfs_write_unlock(inode->i_sb); 1303 add_save_link(&th, inode, 1 /* Truncate */ );
1186 if (err) 1304 after_file_end = 1;
1187 return err; 1305 err =
1188 } 1306 journal_end(&th, inode->i_sb,
1189 result = generic_file_write(file, buf, count, ppos); 1307 JOURNAL_PER_BALANCE_CNT);
1190 1308 reiserfs_write_unlock(inode->i_sb);
1191 if ( after_file_end ) { /* Now update i_size and remove the savelink */ 1309 if (err)
1192 struct reiserfs_transaction_handle th; 1310 return err;
1193 reiserfs_write_lock(inode->i_sb); 1311 }
1194 err = journal_begin(&th, inode->i_sb, 1); 1312 result = generic_file_write(file, buf, count, ppos);
1195 if (err) { 1313
1196 reiserfs_write_unlock (inode->i_sb); 1314 if (after_file_end) { /* Now update i_size and remove the savelink */
1197 return err; 1315 struct reiserfs_transaction_handle th;
1198 } 1316 reiserfs_write_lock(inode->i_sb);
1199 reiserfs_update_inode_transaction(inode); 1317 err = journal_begin(&th, inode->i_sb, 1);
1200 reiserfs_update_sd(&th, inode); 1318 if (err) {
1201 err = journal_end(&th, inode->i_sb, 1); 1319 reiserfs_write_unlock(inode->i_sb);
1202 if (err) { 1320 return err;
1203 reiserfs_write_unlock (inode->i_sb); 1321 }
1204 return err; 1322 reiserfs_update_inode_transaction(inode);
1205 } 1323 reiserfs_update_sd(&th, inode);
1206 err = remove_save_link (inode, 1/* truncate */); 1324 err = journal_end(&th, inode->i_sb, 1);
1207 reiserfs_write_unlock(inode->i_sb); 1325 if (err) {
1208 if (err) 1326 reiserfs_write_unlock(inode->i_sb);
1209 return err; 1327 return err;
1210 } 1328 }
1211 1329 err = remove_save_link(inode, 1 /* truncate */ );
1212 return result; 1330 reiserfs_write_unlock(inode->i_sb);
1213 } 1331 if (err)
1214 1332 return err;
1215 if ( unlikely((ssize_t) count < 0 )) 1333 }
1216 return -EINVAL;
1217
1218 if (unlikely(!access_ok(VERIFY_READ, buf, count)))
1219 return -EFAULT;
1220
1221 down(&inode->i_sem); // locks the entire file for just us
1222
1223 pos = *ppos;
1224
1225 /* Check if we can write to specified region of file, file
1226 is not overly big and this kind of stuff. Adjust pos and
1227 count, if needed */
1228 res = generic_write_checks(file, &pos, &count, 0);
1229 if (res)
1230 goto out;
1231
1232 if ( count == 0 )
1233 goto out;
1234
1235 res = remove_suid(file->f_dentry);
1236 if (res)
1237 goto out;
1238
1239 inode_update_time(inode, 1); /* Both mtime and ctime */
1240
1241 // Ok, we are done with all the checks.
1242 1334
1243 // Now we should start real work 1335 return result;
1336 }
1244 1337
1245 /* If we are going to write past the file's packed tail or if we are going 1338 if (unlikely((ssize_t) count < 0))
1246 to overwrite part of the tail, we need that tail to be converted into 1339 return -EINVAL;
1247 unformatted node */ 1340
1248 res = reiserfs_check_for_tail_and_convert( inode, pos, count); 1341 if (unlikely(!access_ok(VERIFY_READ, buf, count)))
1249 if (res) 1342 return -EFAULT;
1250 goto out; 1343
1344 down(&inode->i_sem); // locks the entire file for just us
1345
1346 pos = *ppos;
1347
1348 /* Check if we can write to specified region of file, file
1349 is not overly big and this kind of stuff. Adjust pos and
1350 count, if needed */
1351 res = generic_write_checks(file, &pos, &count, 0);
1352 if (res)
1353 goto out;
1354
1355 if (count == 0)
1356 goto out;
1357
1358 res = remove_suid(file->f_dentry);
1359 if (res)
1360 goto out;
1361
1362 inode_update_time(inode, 1); /* Both mtime and ctime */
1363
1364 // Ok, we are done with all the checks.
1365
1366 // Now we should start real work
1367
1368 /* If we are going to write past the file's packed tail or if we are going
1369 to overwrite part of the tail, we need that tail to be converted into
1370 unformatted node */
1371 res = reiserfs_check_for_tail_and_convert(inode, pos, count);
1372 if (res)
1373 goto out;
1374
1375 while (count > 0) {
1376 /* This is the main loop in which we running until some error occures
1377 or until we write all of the data. */
1378 size_t num_pages; /* amount of pages we are going to write this iteration */
1379 size_t write_bytes; /* amount of bytes to write during this iteration */
1380 size_t blocks_to_allocate; /* how much blocks we need to allocate for this iteration */
1381
1382 /* (pos & (PAGE_CACHE_SIZE-1)) is an idiom for offset into a page of pos */
1383 num_pages = !!((pos + count) & (PAGE_CACHE_SIZE - 1)) + /* round up partial
1384 pages */
1385 ((count +
1386 (pos & (PAGE_CACHE_SIZE - 1))) >> PAGE_CACHE_SHIFT);
1387 /* convert size to amount of
1388 pages */
1389 reiserfs_write_lock(inode->i_sb);
1390 if (num_pages > REISERFS_WRITE_PAGES_AT_A_TIME
1391 || num_pages > reiserfs_can_fit_pages(inode->i_sb)) {
1392 /* If we were asked to write more data than we want to or if there
1393 is not that much space, then we shorten amount of data to write
1394 for this iteration. */
1395 num_pages =
1396 min_t(size_t, REISERFS_WRITE_PAGES_AT_A_TIME,
1397 reiserfs_can_fit_pages(inode->i_sb));
1398 /* Also we should not forget to set size in bytes accordingly */
1399 write_bytes = (num_pages << PAGE_CACHE_SHIFT) -
1400 (pos & (PAGE_CACHE_SIZE - 1));
1401 /* If position is not on the
1402 start of the page, we need
1403 to substract the offset
1404 within page */
1405 } else
1406 write_bytes = count;
1407
1408 /* reserve the blocks to be allocated later, so that later on
1409 we still have the space to write the blocks to */
1410 reiserfs_claim_blocks_to_be_allocated(inode->i_sb,
1411 num_pages <<
1412 (PAGE_CACHE_SHIFT -
1413 inode->i_blkbits));
1414 reiserfs_write_unlock(inode->i_sb);
1415
1416 if (!num_pages) { /* If we do not have enough space even for a single page... */
1417 if (pos >
1418 inode->i_size + inode->i_sb->s_blocksize -
1419 (pos & (inode->i_sb->s_blocksize - 1))) {
1420 res = -ENOSPC;
1421 break; // In case we are writing past the end of the last file block, break.
1422 }
1423 // Otherwise we are possibly overwriting the file, so
1424 // let's set write size to be equal or less than blocksize.
1425 // This way we get it correctly for file holes.
1426 // But overwriting files on absolutelly full volumes would not
1427 // be very efficient. Well, people are not supposed to fill
1428 // 100% of disk space anyway.
1429 write_bytes =
1430 min_t(size_t, count,
1431 inode->i_sb->s_blocksize -
1432 (pos & (inode->i_sb->s_blocksize - 1)));
1433 num_pages = 1;
1434 // No blocks were claimed before, so do it now.
1435 reiserfs_claim_blocks_to_be_allocated(inode->i_sb,
1436 1 <<
1437 (PAGE_CACHE_SHIFT
1438 -
1439 inode->
1440 i_blkbits));
1441 }
1251 1442
1252 while ( count > 0) { 1443 /* Prepare for writing into the region, read in all the
1253 /* This is the main loop in which we running until some error occures 1444 partially overwritten pages, if needed. And lock the pages,
1254 or until we write all of the data. */ 1445 so that nobody else can access these until we are done.
1255 size_t num_pages;/* amount of pages we are going to write this iteration */ 1446 We get number of actual blocks needed as a result. */
1256 size_t write_bytes; /* amount of bytes to write during this iteration */ 1447 blocks_to_allocate =
1257 size_t blocks_to_allocate; /* how much blocks we need to allocate for this iteration */ 1448 reiserfs_prepare_file_region_for_write(inode, pos,
1258 1449 num_pages,
1259 /* (pos & (PAGE_CACHE_SIZE-1)) is an idiom for offset into a page of pos*/ 1450 write_bytes,
1260 num_pages = !!((pos+count) & (PAGE_CACHE_SIZE - 1)) + /* round up partial 1451 prepared_pages);
1261 pages */ 1452 if (blocks_to_allocate < 0) {
1262 ((count + (pos & (PAGE_CACHE_SIZE-1))) >> PAGE_CACHE_SHIFT); 1453 res = blocks_to_allocate;
1263 /* convert size to amount of 1454 reiserfs_release_claimed_blocks(inode->i_sb,
1264 pages */ 1455 num_pages <<
1265 reiserfs_write_lock(inode->i_sb); 1456 (PAGE_CACHE_SHIFT -
1266 if ( num_pages > REISERFS_WRITE_PAGES_AT_A_TIME 1457 inode->i_blkbits));
1267 || num_pages > reiserfs_can_fit_pages(inode->i_sb) ) { 1458 break;
1268 /* If we were asked to write more data than we want to or if there 1459 }
1269 is not that much space, then we shorten amount of data to write
1270 for this iteration. */
1271 num_pages = min_t(size_t, REISERFS_WRITE_PAGES_AT_A_TIME, reiserfs_can_fit_pages(inode->i_sb));
1272 /* Also we should not forget to set size in bytes accordingly */
1273 write_bytes = (num_pages << PAGE_CACHE_SHIFT) -
1274 (pos & (PAGE_CACHE_SIZE-1));
1275 /* If position is not on the
1276 start of the page, we need
1277 to substract the offset
1278 within page */
1279 } else
1280 write_bytes = count;
1281 1460
1282 /* reserve the blocks to be allocated later, so that later on 1461 /* First we correct our estimate of how many blocks we need */
1283 we still have the space to write the blocks to */ 1462 reiserfs_release_claimed_blocks(inode->i_sb,
1284 reiserfs_claim_blocks_to_be_allocated(inode->i_sb, num_pages << (PAGE_CACHE_SHIFT - inode->i_blkbits)); 1463 (num_pages <<
1285 reiserfs_write_unlock(inode->i_sb); 1464 (PAGE_CACHE_SHIFT -
1465 inode->i_sb->
1466 s_blocksize_bits)) -
1467 blocks_to_allocate);
1468
1469 if (blocks_to_allocate > 0) { /*We only allocate blocks if we need to */
1470 /* Fill in all the possible holes and append the file if needed */
1471 res =
1472 reiserfs_allocate_blocks_for_region(&th, inode, pos,
1473 num_pages,
1474 write_bytes,
1475 prepared_pages,
1476 blocks_to_allocate);
1477 }
1286 1478
1287 if ( !num_pages ) { /* If we do not have enough space even for a single page... */ 1479 /* well, we have allocated the blocks, so it is time to free
1288 if ( pos > inode->i_size+inode->i_sb->s_blocksize-(pos & (inode->i_sb->s_blocksize-1))) { 1480 the reservation we made earlier. */
1289 res = -ENOSPC; 1481 reiserfs_release_claimed_blocks(inode->i_sb,
1290 break; // In case we are writing past the end of the last file block, break. 1482 blocks_to_allocate);
1291 } 1483 if (res) {
1292 // Otherwise we are possibly overwriting the file, so 1484 reiserfs_unprepare_pages(prepared_pages, num_pages);
1293 // let's set write size to be equal or less than blocksize. 1485 break;
1294 // This way we get it correctly for file holes. 1486 }
1295 // But overwriting files on absolutelly full volumes would not
1296 // be very efficient. Well, people are not supposed to fill
1297 // 100% of disk space anyway.
1298 write_bytes = min_t(size_t, count, inode->i_sb->s_blocksize - (pos & (inode->i_sb->s_blocksize - 1)));
1299 num_pages = 1;
1300 // No blocks were claimed before, so do it now.
1301 reiserfs_claim_blocks_to_be_allocated(inode->i_sb, 1 << (PAGE_CACHE_SHIFT - inode->i_blkbits));
1302 }
1303 1487
1304 /* Prepare for writing into the region, read in all the 1488/* NOTE that allocating blocks and filling blocks can be done in reverse order
1305 partially overwritten pages, if needed. And lock the pages, 1489 and probably we would do that just to get rid of garbage in files after a
1306 so that nobody else can access these until we are done. 1490 crash */
1307 We get number of actual blocks needed as a result.*/
1308 blocks_to_allocate = reiserfs_prepare_file_region_for_write(inode, pos, num_pages, write_bytes, prepared_pages);
1309 if ( blocks_to_allocate < 0 ) {
1310 res = blocks_to_allocate;
1311 reiserfs_release_claimed_blocks(inode->i_sb, num_pages << (PAGE_CACHE_SHIFT - inode->i_blkbits));
1312 break;
1313 }
1314 1491
1315 /* First we correct our estimate of how many blocks we need */ 1492 /* Copy data from user-supplied buffer to file's pages */
1316 reiserfs_release_claimed_blocks(inode->i_sb, (num_pages << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits)) - blocks_to_allocate ); 1493 res =
1494 reiserfs_copy_from_user_to_file_region(pos, num_pages,
1495 write_bytes,
1496 prepared_pages, buf);
1497 if (res) {
1498 reiserfs_unprepare_pages(prepared_pages, num_pages);
1499 break;
1500 }
1317 1501
1318 if ( blocks_to_allocate > 0) {/*We only allocate blocks if we need to*/ 1502 /* Send the pages to disk and unlock them. */
1319 /* Fill in all the possible holes and append the file if needed */ 1503 res =
1320 res = reiserfs_allocate_blocks_for_region(&th, inode, pos, num_pages, write_bytes, prepared_pages, blocks_to_allocate); 1504 reiserfs_submit_file_region_for_write(&th, inode, pos,
1505 num_pages,
1506 write_bytes,
1507 prepared_pages);
1508 if (res)
1509 break;
1510
1511 already_written += write_bytes;
1512 buf += write_bytes;
1513 *ppos = pos += write_bytes;
1514 count -= write_bytes;
1515 balance_dirty_pages_ratelimited(inode->i_mapping);
1321 } 1516 }
1322 1517
1323 /* well, we have allocated the blocks, so it is time to free 1518 /* this is only true on error */
1324 the reservation we made earlier. */ 1519 if (th.t_trans_id) {
1325 reiserfs_release_claimed_blocks(inode->i_sb, blocks_to_allocate); 1520 reiserfs_write_lock(inode->i_sb);
1326 if ( res ) { 1521 err = journal_end(&th, th.t_super, th.t_blocks_allocated);
1327 reiserfs_unprepare_pages(prepared_pages, num_pages); 1522 reiserfs_write_unlock(inode->i_sb);
1328 break; 1523 if (err) {
1524 res = err;
1525 goto out;
1526 }
1329 } 1527 }
1330 1528
1331/* NOTE that allocating blocks and filling blocks can be done in reverse order 1529 if ((file->f_flags & O_SYNC) || IS_SYNC(inode))
1332 and probably we would do that just to get rid of garbage in files after a 1530 res =
1333 crash */ 1531 generic_osync_inode(inode, file->f_mapping,
1532 OSYNC_METADATA | OSYNC_DATA);
1334 1533
1335 /* Copy data from user-supplied buffer to file's pages */ 1534 up(&inode->i_sem);
1336 res = reiserfs_copy_from_user_to_file_region(pos, num_pages, write_bytes, prepared_pages, buf); 1535 reiserfs_async_progress_wait(inode->i_sb);
1337 if ( res ) { 1536 return (already_written != 0) ? already_written : res;
1338 reiserfs_unprepare_pages(prepared_pages, num_pages);
1339 break;
1340 }
1341 1537
1342 /* Send the pages to disk and unlock them. */ 1538 out:
1343 res = reiserfs_submit_file_region_for_write(&th, inode, pos, num_pages, 1539 up(&inode->i_sem); // unlock the file on exit.
1344 write_bytes,prepared_pages); 1540 return res;
1345 if ( res )
1346 break;
1347
1348 already_written += write_bytes;
1349 buf += write_bytes;
1350 *ppos = pos += write_bytes;
1351 count -= write_bytes;
1352 balance_dirty_pages_ratelimited(inode->i_mapping);
1353 }
1354
1355 /* this is only true on error */
1356 if (th.t_trans_id) {
1357 reiserfs_write_lock(inode->i_sb);
1358 err = journal_end(&th, th.t_super, th.t_blocks_allocated);
1359 reiserfs_write_unlock(inode->i_sb);
1360 if (err) {
1361 res = err;
1362 goto out;
1363 }
1364 }
1365
1366 if ((file->f_flags & O_SYNC) || IS_SYNC(inode))
1367 res = generic_osync_inode(inode, file->f_mapping, OSYNC_METADATA|OSYNC_DATA);
1368
1369 up(&inode->i_sem);
1370 reiserfs_async_progress_wait(inode->i_sb);
1371 return (already_written != 0)?already_written:res;
1372
1373out:
1374 up(&inode->i_sem); // unlock the file on exit.
1375 return res;
1376} 1541}
1377 1542
1378static ssize_t reiserfs_aio_write(struct kiocb *iocb, const char __user *buf, 1543static ssize_t reiserfs_aio_write(struct kiocb *iocb, const char __user * buf,
1379 size_t count, loff_t pos) 1544 size_t count, loff_t pos)
1380{ 1545{
1381 return generic_file_aio_write(iocb, buf, count, pos); 1546 return generic_file_aio_write(iocb, buf, count, pos);
1382} 1547}
1383 1548
1384
1385
1386struct file_operations reiserfs_file_operations = { 1549struct file_operations reiserfs_file_operations = {
1387 .read = generic_file_read, 1550 .read = generic_file_read,
1388 .write = reiserfs_file_write, 1551 .write = reiserfs_file_write,
1389 .ioctl = reiserfs_ioctl, 1552 .ioctl = reiserfs_ioctl,
1390 .mmap = generic_file_mmap, 1553 .mmap = generic_file_mmap,
1391 .release = reiserfs_file_release, 1554 .release = reiserfs_file_release,
1392 .fsync = reiserfs_sync_file, 1555 .fsync = reiserfs_sync_file,
1393 .sendfile = generic_file_sendfile, 1556 .sendfile = generic_file_sendfile,
1394 .aio_read = generic_file_aio_read, 1557 .aio_read = generic_file_aio_read,
1395 .aio_write = reiserfs_aio_write, 1558 .aio_write = reiserfs_aio_write,
1396}; 1559};
1397 1560
1398 1561struct inode_operations reiserfs_file_inode_operations = {
1399struct inode_operations reiserfs_file_inode_operations = { 1562 .truncate = reiserfs_vfs_truncate_file,
1400 .truncate = reiserfs_vfs_truncate_file, 1563 .setattr = reiserfs_setattr,
1401 .setattr = reiserfs_setattr, 1564 .setxattr = reiserfs_setxattr,
1402 .setxattr = reiserfs_setxattr, 1565 .getxattr = reiserfs_getxattr,
1403 .getxattr = reiserfs_getxattr, 1566 .listxattr = reiserfs_listxattr,
1404 .listxattr = reiserfs_listxattr, 1567 .removexattr = reiserfs_removexattr,
1405 .removexattr = reiserfs_removexattr, 1568 .permission = reiserfs_permission,
1406 .permission = reiserfs_permission,
1407}; 1569};
1408
1409
diff --git a/fs/reiserfs/fix_node.c b/fs/reiserfs/fix_node.c
index e4f64be9e15..2706e2adffa 100644
--- a/fs/reiserfs/fix_node.c
+++ b/fs/reiserfs/fix_node.c
@@ -34,14 +34,12 @@
34 ** 34 **
35 **/ 35 **/
36 36
37
38#include <linux/config.h> 37#include <linux/config.h>
39#include <linux/time.h> 38#include <linux/time.h>
40#include <linux/string.h> 39#include <linux/string.h>
41#include <linux/reiserfs_fs.h> 40#include <linux/reiserfs_fs.h>
42#include <linux/buffer_head.h> 41#include <linux/buffer_head.h>
43 42
44
45/* To make any changes in the tree we find a node, that contains item 43/* To make any changes in the tree we find a node, that contains item
46 to be changed/deleted or position in the node we insert a new item 44 to be changed/deleted or position in the node we insert a new item
47 to. We call this node S. To do balancing we need to decide what we 45 to. We call this node S. To do balancing we need to decide what we
@@ -56,490 +54,522 @@
56 have to have if we do not any shiftings, if we shift to left/right 54 have to have if we do not any shiftings, if we shift to left/right
57 neighbor or to both. */ 55 neighbor or to both. */
58 56
59
60/* taking item number in virtual node, returns number of item, that it has in source buffer */ 57/* taking item number in virtual node, returns number of item, that it has in source buffer */
61static inline int old_item_num (int new_num, int affected_item_num, int mode) 58static inline int old_item_num(int new_num, int affected_item_num, int mode)
62{ 59{
63 if (mode == M_PASTE || mode == M_CUT || new_num < affected_item_num) 60 if (mode == M_PASTE || mode == M_CUT || new_num < affected_item_num)
64 return new_num; 61 return new_num;
65 62
66 if (mode == M_INSERT) { 63 if (mode == M_INSERT) {
67 64
68 RFALSE( new_num == 0, 65 RFALSE(new_num == 0,
69 "vs-8005: for INSERT mode and item number of inserted item"); 66 "vs-8005: for INSERT mode and item number of inserted item");
70 67
71 return new_num - 1; 68 return new_num - 1;
72 } 69 }
73 70
74 RFALSE( mode != M_DELETE, 71 RFALSE(mode != M_DELETE,
75 "vs-8010: old_item_num: mode must be M_DELETE (mode = \'%c\'", mode); 72 "vs-8010: old_item_num: mode must be M_DELETE (mode = \'%c\'",
76 /* delete mode */ 73 mode);
77 return new_num + 1; 74 /* delete mode */
75 return new_num + 1;
78} 76}
79 77
80static void create_virtual_node (struct tree_balance * tb, int h) 78static void create_virtual_node(struct tree_balance *tb, int h)
81{ 79{
82 struct item_head * ih; 80 struct item_head *ih;
83 struct virtual_node * vn = tb->tb_vn; 81 struct virtual_node *vn = tb->tb_vn;
84 int new_num; 82 int new_num;
85 struct buffer_head * Sh; /* this comes from tb->S[h] */ 83 struct buffer_head *Sh; /* this comes from tb->S[h] */
86 84
87 Sh = PATH_H_PBUFFER (tb->tb_path, h); 85 Sh = PATH_H_PBUFFER(tb->tb_path, h);
88 86
89 /* size of changed node */ 87 /* size of changed node */
90 vn->vn_size = MAX_CHILD_SIZE (Sh) - B_FREE_SPACE (Sh) + tb->insert_size[h]; 88 vn->vn_size =
89 MAX_CHILD_SIZE(Sh) - B_FREE_SPACE(Sh) + tb->insert_size[h];
91 90
92 /* for internal nodes array if virtual items is not created */ 91 /* for internal nodes array if virtual items is not created */
93 if (h) { 92 if (h) {
94 vn->vn_nr_item = (vn->vn_size - DC_SIZE) / (DC_SIZE + KEY_SIZE); 93 vn->vn_nr_item = (vn->vn_size - DC_SIZE) / (DC_SIZE + KEY_SIZE);
95 return; 94 return;
96 }
97
98 /* number of items in virtual node */
99 vn->vn_nr_item = B_NR_ITEMS (Sh) + ((vn->vn_mode == M_INSERT)? 1 : 0) - ((vn->vn_mode == M_DELETE)? 1 : 0);
100
101 /* first virtual item */
102 vn->vn_vi = (struct virtual_item *)(tb->tb_vn + 1);
103 memset (vn->vn_vi, 0, vn->vn_nr_item * sizeof (struct virtual_item));
104 vn->vn_free_ptr += vn->vn_nr_item * sizeof (struct virtual_item);
105
106
107 /* first item in the node */
108 ih = B_N_PITEM_HEAD (Sh, 0);
109
110 /* define the mergeability for 0-th item (if it is not being deleted) */
111 if (op_is_left_mergeable (&(ih->ih_key), Sh->b_size) && (vn->vn_mode != M_DELETE || vn->vn_affected_item_num))
112 vn->vn_vi[0].vi_type |= VI_TYPE_LEFT_MERGEABLE;
113
114 /* go through all items those remain in the virtual node (except for the new (inserted) one) */
115 for (new_num = 0; new_num < vn->vn_nr_item; new_num ++) {
116 int j;
117 struct virtual_item * vi = vn->vn_vi + new_num;
118 int is_affected = ((new_num != vn->vn_affected_item_num) ? 0 : 1);
119
120
121 if (is_affected && vn->vn_mode == M_INSERT)
122 continue;
123
124 /* get item number in source node */
125 j = old_item_num (new_num, vn->vn_affected_item_num, vn->vn_mode);
126
127 vi->vi_item_len += ih_item_len(ih + j) + IH_SIZE;
128 vi->vi_ih = ih + j;
129 vi->vi_item = B_I_PITEM (Sh, ih + j);
130 vi->vi_uarea = vn->vn_free_ptr;
131
132 // FIXME: there is no check, that item operation did not
133 // consume too much memory
134 vn->vn_free_ptr += op_create_vi (vn, vi, is_affected, tb->insert_size [0]);
135 if (tb->vn_buf + tb->vn_buf_size < vn->vn_free_ptr)
136 reiserfs_panic (tb->tb_sb, "vs-8030: create_virtual_node: "
137 "virtual node space consumed");
138
139 if (!is_affected)
140 /* this is not being changed */
141 continue;
142
143 if (vn->vn_mode == M_PASTE || vn->vn_mode == M_CUT) {
144 vn->vn_vi[new_num].vi_item_len += tb->insert_size[0];
145 vi->vi_new_data = vn->vn_data; // pointer to data which is going to be pasted
146 } 95 }
147 }
148
149
150 /* virtual inserted item is not defined yet */
151 if (vn->vn_mode == M_INSERT) {
152 struct virtual_item * vi = vn->vn_vi + vn->vn_affected_item_num;
153
154 RFALSE( vn->vn_ins_ih == 0,
155 "vs-8040: item header of inserted item is not specified");
156 vi->vi_item_len = tb->insert_size[0];
157 vi->vi_ih = vn->vn_ins_ih;
158 vi->vi_item = vn->vn_data;
159 vi->vi_uarea = vn->vn_free_ptr;
160
161 op_create_vi (vn, vi, 0/*not pasted or cut*/, tb->insert_size [0]);
162 }
163
164 /* set right merge flag we take right delimiting key and check whether it is a mergeable item */
165 if (tb->CFR[0]) {
166 struct reiserfs_key * key;
167
168 key = B_N_PDELIM_KEY (tb->CFR[0], tb->rkey[0]);
169 if (op_is_left_mergeable (key, Sh->b_size) && (vn->vn_mode != M_DELETE ||
170 vn->vn_affected_item_num != B_NR_ITEMS (Sh) - 1))
171 vn->vn_vi[vn->vn_nr_item-1].vi_type |= VI_TYPE_RIGHT_MERGEABLE;
172 96
173#ifdef CONFIG_REISERFS_CHECK 97 /* number of items in virtual node */
174 if (op_is_left_mergeable (key, Sh->b_size) && 98 vn->vn_nr_item =
175 !(vn->vn_mode != M_DELETE || vn->vn_affected_item_num != B_NR_ITEMS (Sh) - 1) ) { 99 B_NR_ITEMS(Sh) + ((vn->vn_mode == M_INSERT) ? 1 : 0) -
176 /* we delete last item and it could be merged with right neighbor's first item */ 100 ((vn->vn_mode == M_DELETE) ? 1 : 0);
177 if (!(B_NR_ITEMS (Sh) == 1 && is_direntry_le_ih (B_N_PITEM_HEAD (Sh, 0)) && 101
178 I_ENTRY_COUNT (B_N_PITEM_HEAD (Sh, 0)) == 1)) { 102 /* first virtual item */
179 /* node contains more than 1 item, or item is not directory item, or this item contains more than 1 entry */ 103 vn->vn_vi = (struct virtual_item *)(tb->tb_vn + 1);
180 print_block (Sh, 0, -1, -1); 104 memset(vn->vn_vi, 0, vn->vn_nr_item * sizeof(struct virtual_item));
181 reiserfs_panic (tb->tb_sb, "vs-8045: create_virtual_node: rdkey %k, affected item==%d (mode==%c) Must be %c", 105 vn->vn_free_ptr += vn->vn_nr_item * sizeof(struct virtual_item);
182 key, vn->vn_affected_item_num, vn->vn_mode, M_DELETE); 106
183 } else 107 /* first item in the node */
184 /* we can delete directory item, that has only one directory entry in it */ 108 ih = B_N_PITEM_HEAD(Sh, 0);
185 ; 109
110 /* define the mergeability for 0-th item (if it is not being deleted) */
111 if (op_is_left_mergeable(&(ih->ih_key), Sh->b_size)
112 && (vn->vn_mode != M_DELETE || vn->vn_affected_item_num))
113 vn->vn_vi[0].vi_type |= VI_TYPE_LEFT_MERGEABLE;
114
115 /* go through all items those remain in the virtual node (except for the new (inserted) one) */
116 for (new_num = 0; new_num < vn->vn_nr_item; new_num++) {
117 int j;
118 struct virtual_item *vi = vn->vn_vi + new_num;
119 int is_affected =
120 ((new_num != vn->vn_affected_item_num) ? 0 : 1);
121
122 if (is_affected && vn->vn_mode == M_INSERT)
123 continue;
124
125 /* get item number in source node */
126 j = old_item_num(new_num, vn->vn_affected_item_num,
127 vn->vn_mode);
128
129 vi->vi_item_len += ih_item_len(ih + j) + IH_SIZE;
130 vi->vi_ih = ih + j;
131 vi->vi_item = B_I_PITEM(Sh, ih + j);
132 vi->vi_uarea = vn->vn_free_ptr;
133
134 // FIXME: there is no check, that item operation did not
135 // consume too much memory
136 vn->vn_free_ptr +=
137 op_create_vi(vn, vi, is_affected, tb->insert_size[0]);
138 if (tb->vn_buf + tb->vn_buf_size < vn->vn_free_ptr)
139 reiserfs_panic(tb->tb_sb,
140 "vs-8030: create_virtual_node: "
141 "virtual node space consumed");
142
143 if (!is_affected)
144 /* this is not being changed */
145 continue;
146
147 if (vn->vn_mode == M_PASTE || vn->vn_mode == M_CUT) {
148 vn->vn_vi[new_num].vi_item_len += tb->insert_size[0];
149 vi->vi_new_data = vn->vn_data; // pointer to data which is going to be pasted
150 }
186 } 151 }
152
153 /* virtual inserted item is not defined yet */
154 if (vn->vn_mode == M_INSERT) {
155 struct virtual_item *vi = vn->vn_vi + vn->vn_affected_item_num;
156
157 RFALSE(vn->vn_ins_ih == 0,
158 "vs-8040: item header of inserted item is not specified");
159 vi->vi_item_len = tb->insert_size[0];
160 vi->vi_ih = vn->vn_ins_ih;
161 vi->vi_item = vn->vn_data;
162 vi->vi_uarea = vn->vn_free_ptr;
163
164 op_create_vi(vn, vi, 0 /*not pasted or cut */ ,
165 tb->insert_size[0]);
166 }
167
168 /* set right merge flag we take right delimiting key and check whether it is a mergeable item */
169 if (tb->CFR[0]) {
170 struct reiserfs_key *key;
171
172 key = B_N_PDELIM_KEY(tb->CFR[0], tb->rkey[0]);
173 if (op_is_left_mergeable(key, Sh->b_size)
174 && (vn->vn_mode != M_DELETE
175 || vn->vn_affected_item_num != B_NR_ITEMS(Sh) - 1))
176 vn->vn_vi[vn->vn_nr_item - 1].vi_type |=
177 VI_TYPE_RIGHT_MERGEABLE;
178
179#ifdef CONFIG_REISERFS_CHECK
180 if (op_is_left_mergeable(key, Sh->b_size) &&
181 !(vn->vn_mode != M_DELETE
182 || vn->vn_affected_item_num != B_NR_ITEMS(Sh) - 1)) {
183 /* we delete last item and it could be merged with right neighbor's first item */
184 if (!
185 (B_NR_ITEMS(Sh) == 1
186 && is_direntry_le_ih(B_N_PITEM_HEAD(Sh, 0))
187 && I_ENTRY_COUNT(B_N_PITEM_HEAD(Sh, 0)) == 1)) {
188 /* node contains more than 1 item, or item is not directory item, or this item contains more than 1 entry */
189 print_block(Sh, 0, -1, -1);
190 reiserfs_panic(tb->tb_sb,
191 "vs-8045: create_virtual_node: rdkey %k, affected item==%d (mode==%c) Must be %c",
192 key, vn->vn_affected_item_num,
193 vn->vn_mode, M_DELETE);
194 } else
195 /* we can delete directory item, that has only one directory entry in it */
196 ;
197 }
187#endif 198#endif
188
189 }
190}
191 199
200 }
201}
192 202
193/* using virtual node check, how many items can be shifted to left 203/* using virtual node check, how many items can be shifted to left
194 neighbor */ 204 neighbor */
195static void check_left (struct tree_balance * tb, int h, int cur_free) 205static void check_left(struct tree_balance *tb, int h, int cur_free)
196{ 206{
197 int i; 207 int i;
198 struct virtual_node * vn = tb->tb_vn; 208 struct virtual_node *vn = tb->tb_vn;
199 struct virtual_item * vi; 209 struct virtual_item *vi;
200 int d_size, ih_size; 210 int d_size, ih_size;
201 211
202 RFALSE( cur_free < 0, "vs-8050: cur_free (%d) < 0", cur_free); 212 RFALSE(cur_free < 0, "vs-8050: cur_free (%d) < 0", cur_free);
203 213
204 /* internal level */ 214 /* internal level */
205 if (h > 0) { 215 if (h > 0) {
206 tb->lnum[h] = cur_free / (DC_SIZE + KEY_SIZE); 216 tb->lnum[h] = cur_free / (DC_SIZE + KEY_SIZE);
207 return; 217 return;
208 } 218 }
209 219
210 /* leaf level */ 220 /* leaf level */
211 221
212 if (!cur_free || !vn->vn_nr_item) { 222 if (!cur_free || !vn->vn_nr_item) {
213 /* no free space or nothing to move */ 223 /* no free space or nothing to move */
214 tb->lnum[h] = 0; 224 tb->lnum[h] = 0;
215 tb->lbytes = -1; 225 tb->lbytes = -1;
216 return; 226 return;
217 } 227 }
218 228
219 RFALSE( !PATH_H_PPARENT (tb->tb_path, 0), 229 RFALSE(!PATH_H_PPARENT(tb->tb_path, 0),
220 "vs-8055: parent does not exist or invalid"); 230 "vs-8055: parent does not exist or invalid");
221 231
222 vi = vn->vn_vi; 232 vi = vn->vn_vi;
223 if ((unsigned int)cur_free >= (vn->vn_size - ((vi->vi_type & VI_TYPE_LEFT_MERGEABLE) ? IH_SIZE : 0))) { 233 if ((unsigned int)cur_free >=
224 /* all contents of S[0] fits into L[0] */ 234 (vn->vn_size -
235 ((vi->vi_type & VI_TYPE_LEFT_MERGEABLE) ? IH_SIZE : 0))) {
236 /* all contents of S[0] fits into L[0] */
225 237
226 RFALSE( vn->vn_mode == M_INSERT || vn->vn_mode == M_PASTE, 238 RFALSE(vn->vn_mode == M_INSERT || vn->vn_mode == M_PASTE,
227 "vs-8055: invalid mode or balance condition failed"); 239 "vs-8055: invalid mode or balance condition failed");
228 240
229 tb->lnum[0] = vn->vn_nr_item; 241 tb->lnum[0] = vn->vn_nr_item;
230 tb->lbytes = -1; 242 tb->lbytes = -1;
231 return; 243 return;
232 }
233
234
235 d_size = 0, ih_size = IH_SIZE;
236
237 /* first item may be merge with last item in left neighbor */
238 if (vi->vi_type & VI_TYPE_LEFT_MERGEABLE)
239 d_size = -((int)IH_SIZE), ih_size = 0;
240
241 tb->lnum[0] = 0;
242 for (i = 0; i < vn->vn_nr_item; i ++, ih_size = IH_SIZE, d_size = 0, vi ++) {
243 d_size += vi->vi_item_len;
244 if (cur_free >= d_size) {
245 /* the item can be shifted entirely */
246 cur_free -= d_size;
247 tb->lnum[0] ++;
248 continue;
249 } 244 }
250 245
251 /* the item cannot be shifted entirely, try to split it */ 246 d_size = 0, ih_size = IH_SIZE;
252 /* check whether L[0] can hold ih and at least one byte of the item body */ 247
253 if (cur_free <= ih_size) { 248 /* first item may be merge with last item in left neighbor */
254 /* cannot shift even a part of the current item */ 249 if (vi->vi_type & VI_TYPE_LEFT_MERGEABLE)
255 tb->lbytes = -1; 250 d_size = -((int)IH_SIZE), ih_size = 0;
256 return; 251
252 tb->lnum[0] = 0;
253 for (i = 0; i < vn->vn_nr_item;
254 i++, ih_size = IH_SIZE, d_size = 0, vi++) {
255 d_size += vi->vi_item_len;
256 if (cur_free >= d_size) {
257 /* the item can be shifted entirely */
258 cur_free -= d_size;
259 tb->lnum[0]++;
260 continue;
261 }
262
263 /* the item cannot be shifted entirely, try to split it */
264 /* check whether L[0] can hold ih and at least one byte of the item body */
265 if (cur_free <= ih_size) {
266 /* cannot shift even a part of the current item */
267 tb->lbytes = -1;
268 return;
269 }
270 cur_free -= ih_size;
271
272 tb->lbytes = op_check_left(vi, cur_free, 0, 0);
273 if (tb->lbytes != -1)
274 /* count partially shifted item */
275 tb->lnum[0]++;
276
277 break;
257 } 278 }
258 cur_free -= ih_size;
259
260 tb->lbytes = op_check_left (vi, cur_free, 0, 0);
261 if (tb->lbytes != -1)
262 /* count partially shifted item */
263 tb->lnum[0] ++;
264
265 break;
266 }
267
268 return;
269}
270 279
280 return;
281}
271 282
272/* using virtual node check, how many items can be shifted to right 283/* using virtual node check, how many items can be shifted to right
273 neighbor */ 284 neighbor */
274static void check_right (struct tree_balance * tb, int h, int cur_free) 285static void check_right(struct tree_balance *tb, int h, int cur_free)
275{ 286{
276 int i; 287 int i;
277 struct virtual_node * vn = tb->tb_vn; 288 struct virtual_node *vn = tb->tb_vn;
278 struct virtual_item * vi; 289 struct virtual_item *vi;
279 int d_size, ih_size; 290 int d_size, ih_size;
280 291
281 RFALSE( cur_free < 0, "vs-8070: cur_free < 0"); 292 RFALSE(cur_free < 0, "vs-8070: cur_free < 0");
282 293
283 /* internal level */ 294 /* internal level */
284 if (h > 0) { 295 if (h > 0) {
285 tb->rnum[h] = cur_free / (DC_SIZE + KEY_SIZE); 296 tb->rnum[h] = cur_free / (DC_SIZE + KEY_SIZE);
286 return; 297 return;
287 }
288
289 /* leaf level */
290
291 if (!cur_free || !vn->vn_nr_item) {
292 /* no free space */
293 tb->rnum[h] = 0;
294 tb->rbytes = -1;
295 return;
296 }
297
298 RFALSE( !PATH_H_PPARENT (tb->tb_path, 0),
299 "vs-8075: parent does not exist or invalid");
300
301 vi = vn->vn_vi + vn->vn_nr_item - 1;
302 if ((unsigned int)cur_free >= (vn->vn_size - ((vi->vi_type & VI_TYPE_RIGHT_MERGEABLE) ? IH_SIZE : 0))) {
303 /* all contents of S[0] fits into R[0] */
304
305 RFALSE( vn->vn_mode == M_INSERT || vn->vn_mode == M_PASTE,
306 "vs-8080: invalid mode or balance condition failed");
307
308 tb->rnum[h] = vn->vn_nr_item;
309 tb->rbytes = -1;
310 return;
311 }
312
313 d_size = 0, ih_size = IH_SIZE;
314
315 /* last item may be merge with first item in right neighbor */
316 if (vi->vi_type & VI_TYPE_RIGHT_MERGEABLE)
317 d_size = -(int)IH_SIZE, ih_size = 0;
318
319 tb->rnum[0] = 0;
320 for (i = vn->vn_nr_item - 1; i >= 0; i --, d_size = 0, ih_size = IH_SIZE, vi --) {
321 d_size += vi->vi_item_len;
322 if (cur_free >= d_size) {
323 /* the item can be shifted entirely */
324 cur_free -= d_size;
325 tb->rnum[0] ++;
326 continue;
327 } 298 }
328 299
329 /* check whether R[0] can hold ih and at least one byte of the item body */ 300 /* leaf level */
330 if ( cur_free <= ih_size ) { /* cannot shift even a part of the current item */ 301
331 tb->rbytes = -1; 302 if (!cur_free || !vn->vn_nr_item) {
332 return; 303 /* no free space */
304 tb->rnum[h] = 0;
305 tb->rbytes = -1;
306 return;
333 } 307 }
334
335 /* R[0] can hold the header of the item and at least one byte of its body */
336 cur_free -= ih_size; /* cur_free is still > 0 */
337
338 tb->rbytes = op_check_right (vi, cur_free);
339 if (tb->rbytes != -1)
340 /* count partially shifted item */
341 tb->rnum[0] ++;
342
343 break;
344 }
345
346 return;
347}
348 308
309 RFALSE(!PATH_H_PPARENT(tb->tb_path, 0),
310 "vs-8075: parent does not exist or invalid");
311
312 vi = vn->vn_vi + vn->vn_nr_item - 1;
313 if ((unsigned int)cur_free >=
314 (vn->vn_size -
315 ((vi->vi_type & VI_TYPE_RIGHT_MERGEABLE) ? IH_SIZE : 0))) {
316 /* all contents of S[0] fits into R[0] */
317
318 RFALSE(vn->vn_mode == M_INSERT || vn->vn_mode == M_PASTE,
319 "vs-8080: invalid mode or balance condition failed");
320
321 tb->rnum[h] = vn->vn_nr_item;
322 tb->rbytes = -1;
323 return;
324 }
325
326 d_size = 0, ih_size = IH_SIZE;
327
328 /* last item may be merge with first item in right neighbor */
329 if (vi->vi_type & VI_TYPE_RIGHT_MERGEABLE)
330 d_size = -(int)IH_SIZE, ih_size = 0;
331
332 tb->rnum[0] = 0;
333 for (i = vn->vn_nr_item - 1; i >= 0;
334 i--, d_size = 0, ih_size = IH_SIZE, vi--) {
335 d_size += vi->vi_item_len;
336 if (cur_free >= d_size) {
337 /* the item can be shifted entirely */
338 cur_free -= d_size;
339 tb->rnum[0]++;
340 continue;
341 }
342
343 /* check whether R[0] can hold ih and at least one byte of the item body */
344 if (cur_free <= ih_size) { /* cannot shift even a part of the current item */
345 tb->rbytes = -1;
346 return;
347 }
348
349 /* R[0] can hold the header of the item and at least one byte of its body */
350 cur_free -= ih_size; /* cur_free is still > 0 */
351
352 tb->rbytes = op_check_right(vi, cur_free);
353 if (tb->rbytes != -1)
354 /* count partially shifted item */
355 tb->rnum[0]++;
356
357 break;
358 }
359
360 return;
361}
349 362
350/* 363/*
351 * from - number of items, which are shifted to left neighbor entirely 364 * from - number of items, which are shifted to left neighbor entirely
352 * to - number of item, which are shifted to right neighbor entirely 365 * to - number of item, which are shifted to right neighbor entirely
353 * from_bytes - number of bytes of boundary item (or directory entries) which are shifted to left neighbor 366 * from_bytes - number of bytes of boundary item (or directory entries) which are shifted to left neighbor
354 * to_bytes - number of bytes of boundary item (or directory entries) which are shifted to right neighbor */ 367 * to_bytes - number of bytes of boundary item (or directory entries) which are shifted to right neighbor */
355static int get_num_ver (int mode, struct tree_balance * tb, int h, 368static int get_num_ver(int mode, struct tree_balance *tb, int h,
356 int from, int from_bytes, 369 int from, int from_bytes,
357 int to, int to_bytes, 370 int to, int to_bytes, short *snum012, int flow)
358 short * snum012, int flow
359 )
360{ 371{
361 int i; 372 int i;
362 int cur_free; 373 int cur_free;
363 // int bytes; 374 // int bytes;
364 int units; 375 int units;
365 struct virtual_node * vn = tb->tb_vn; 376 struct virtual_node *vn = tb->tb_vn;
366 // struct virtual_item * vi; 377 // struct virtual_item * vi;
367 378
368 int total_node_size, max_node_size, current_item_size; 379 int total_node_size, max_node_size, current_item_size;
369 int needed_nodes; 380 int needed_nodes;
370 int start_item, /* position of item we start filling node from */ 381 int start_item, /* position of item we start filling node from */
371 end_item, /* position of item we finish filling node by */ 382 end_item, /* position of item we finish filling node by */
372 start_bytes,/* number of first bytes (entries for directory) of start_item-th item 383 start_bytes, /* number of first bytes (entries for directory) of start_item-th item
373 we do not include into node that is being filled */ 384 we do not include into node that is being filled */
374 end_bytes; /* number of last bytes (entries for directory) of end_item-th item 385 end_bytes; /* number of last bytes (entries for directory) of end_item-th item
375 we do node include into node that is being filled */ 386 we do node include into node that is being filled */
376 int split_item_positions[2]; /* these are positions in virtual item of 387 int split_item_positions[2]; /* these are positions in virtual item of
377 items, that are split between S[0] and 388 items, that are split between S[0] and
378 S1new and S1new and S2new */ 389 S1new and S1new and S2new */
379 390
380 split_item_positions[0] = -1; 391 split_item_positions[0] = -1;
381 split_item_positions[1] = -1; 392 split_item_positions[1] = -1;
382 393
383 /* We only create additional nodes if we are in insert or paste mode 394 /* We only create additional nodes if we are in insert or paste mode
384 or we are in replace mode at the internal level. If h is 0 and 395 or we are in replace mode at the internal level. If h is 0 and
385 the mode is M_REPLACE then in fix_nodes we change the mode to 396 the mode is M_REPLACE then in fix_nodes we change the mode to
386 paste or insert before we get here in the code. */ 397 paste or insert before we get here in the code. */
387 RFALSE( tb->insert_size[h] < 0 || (mode != M_INSERT && mode != M_PASTE), 398 RFALSE(tb->insert_size[h] < 0 || (mode != M_INSERT && mode != M_PASTE),
388 "vs-8100: insert_size < 0 in overflow"); 399 "vs-8100: insert_size < 0 in overflow");
389 400
390 max_node_size = MAX_CHILD_SIZE (PATH_H_PBUFFER (tb->tb_path, h)); 401 max_node_size = MAX_CHILD_SIZE(PATH_H_PBUFFER(tb->tb_path, h));
391 402
392 /* snum012 [0-2] - number of items, that lay 403 /* snum012 [0-2] - number of items, that lay
393 to S[0], first new node and second new node */ 404 to S[0], first new node and second new node */
394 snum012[3] = -1; /* s1bytes */ 405 snum012[3] = -1; /* s1bytes */
395 snum012[4] = -1; /* s2bytes */ 406 snum012[4] = -1; /* s2bytes */
396 407
397 /* internal level */ 408 /* internal level */
398 if (h > 0) { 409 if (h > 0) {
399 i = ((to - from) * (KEY_SIZE + DC_SIZE) + DC_SIZE); 410 i = ((to - from) * (KEY_SIZE + DC_SIZE) + DC_SIZE);
400 if (i == max_node_size) 411 if (i == max_node_size)
401 return 1; 412 return 1;
402 return (i / max_node_size + 1); 413 return (i / max_node_size + 1);
403 }
404
405 /* leaf level */
406 needed_nodes = 1;
407 total_node_size = 0;
408 cur_free = max_node_size;
409
410 // start from 'from'-th item
411 start_item = from;
412 // skip its first 'start_bytes' units
413 start_bytes = ((from_bytes != -1) ? from_bytes : 0);
414
415 // last included item is the 'end_item'-th one
416 end_item = vn->vn_nr_item - to - 1;
417 // do not count last 'end_bytes' units of 'end_item'-th item
418 end_bytes = (to_bytes != -1) ? to_bytes : 0;
419
420 /* go through all item beginning from the start_item-th item and ending by
421 the end_item-th item. Do not count first 'start_bytes' units of
422 'start_item'-th item and last 'end_bytes' of 'end_item'-th item */
423
424 for (i = start_item; i <= end_item; i ++) {
425 struct virtual_item * vi = vn->vn_vi + i;
426 int skip_from_end = ((i == end_item) ? end_bytes : 0);
427
428 RFALSE( needed_nodes > 3, "vs-8105: too many nodes are needed");
429
430 /* get size of current item */
431 current_item_size = vi->vi_item_len;
432
433 /* do not take in calculation head part (from_bytes) of from-th item */
434 current_item_size -= op_part_size (vi, 0/*from start*/, start_bytes);
435
436 /* do not take in calculation tail part of last item */
437 current_item_size -= op_part_size (vi, 1/*from end*/, skip_from_end);
438
439 /* if item fits into current node entierly */
440 if (total_node_size + current_item_size <= max_node_size) {
441 snum012[needed_nodes - 1] ++;
442 total_node_size += current_item_size;
443 start_bytes = 0;
444 continue;
445 } 414 }
446 415
447 if (current_item_size > max_node_size) { 416 /* leaf level */
448 /* virtual item length is longer, than max size of item in 417 needed_nodes = 1;
449 a node. It is impossible for direct item */ 418 total_node_size = 0;
450 RFALSE( is_direct_le_ih (vi->vi_ih), 419 cur_free = max_node_size;
451 "vs-8110: " 420
452 "direct item length is %d. It can not be longer than %d", 421 // start from 'from'-th item
453 current_item_size, max_node_size); 422 start_item = from;
454 /* we will try to split it */ 423 // skip its first 'start_bytes' units
455 flow = 1; 424 start_bytes = ((from_bytes != -1) ? from_bytes : 0);
425
426 // last included item is the 'end_item'-th one
427 end_item = vn->vn_nr_item - to - 1;
428 // do not count last 'end_bytes' units of 'end_item'-th item
429 end_bytes = (to_bytes != -1) ? to_bytes : 0;
430
431 /* go through all item beginning from the start_item-th item and ending by
432 the end_item-th item. Do not count first 'start_bytes' units of
433 'start_item'-th item and last 'end_bytes' of 'end_item'-th item */
434
435 for (i = start_item; i <= end_item; i++) {
436 struct virtual_item *vi = vn->vn_vi + i;
437 int skip_from_end = ((i == end_item) ? end_bytes : 0);
438
439 RFALSE(needed_nodes > 3, "vs-8105: too many nodes are needed");
440
441 /* get size of current item */
442 current_item_size = vi->vi_item_len;
443
444 /* do not take in calculation head part (from_bytes) of from-th item */
445 current_item_size -=
446 op_part_size(vi, 0 /*from start */ , start_bytes);
447
448 /* do not take in calculation tail part of last item */
449 current_item_size -=
450 op_part_size(vi, 1 /*from end */ , skip_from_end);
451
452 /* if item fits into current node entierly */
453 if (total_node_size + current_item_size <= max_node_size) {
454 snum012[needed_nodes - 1]++;
455 total_node_size += current_item_size;
456 start_bytes = 0;
457 continue;
458 }
459
460 if (current_item_size > max_node_size) {
461 /* virtual item length is longer, than max size of item in
462 a node. It is impossible for direct item */
463 RFALSE(is_direct_le_ih(vi->vi_ih),
464 "vs-8110: "
465 "direct item length is %d. It can not be longer than %d",
466 current_item_size, max_node_size);
467 /* we will try to split it */
468 flow = 1;
469 }
470
471 if (!flow) {
472 /* as we do not split items, take new node and continue */
473 needed_nodes++;
474 i--;
475 total_node_size = 0;
476 continue;
477 }
478 // calculate number of item units which fit into node being
479 // filled
480 {
481 int free_space;
482
483 free_space = max_node_size - total_node_size - IH_SIZE;
484 units =
485 op_check_left(vi, free_space, start_bytes,
486 skip_from_end);
487 if (units == -1) {
488 /* nothing fits into current node, take new node and continue */
489 needed_nodes++, i--, total_node_size = 0;
490 continue;
491 }
492 }
493
494 /* something fits into the current node */
495 //if (snum012[3] != -1 || needed_nodes != 1)
496 // reiserfs_panic (tb->tb_sb, "vs-8115: get_num_ver: too many nodes required");
497 //snum012[needed_nodes - 1 + 3] = op_unit_num (vi) - start_bytes - units;
498 start_bytes += units;
499 snum012[needed_nodes - 1 + 3] = units;
500
501 if (needed_nodes > 2)
502 reiserfs_warning(tb->tb_sb, "vs-8111: get_num_ver: "
503 "split_item_position is out of boundary");
504 snum012[needed_nodes - 1]++;
505 split_item_positions[needed_nodes - 1] = i;
506 needed_nodes++;
507 /* continue from the same item with start_bytes != -1 */
508 start_item = i;
509 i--;
510 total_node_size = 0;
456 } 511 }
457 512
458 if (!flow) { 513 // sum012[4] (if it is not -1) contains number of units of which
459 /* as we do not split items, take new node and continue */ 514 // are to be in S1new, snum012[3] - to be in S0. They are supposed
460 needed_nodes ++; i --; total_node_size = 0; 515 // to be S1bytes and S2bytes correspondingly, so recalculate
461 continue; 516 if (snum012[4] > 0) {
517 int split_item_num;
518 int bytes_to_r, bytes_to_l;
519 int bytes_to_S1new;
520
521 split_item_num = split_item_positions[1];
522 bytes_to_l =
523 ((from == split_item_num
524 && from_bytes != -1) ? from_bytes : 0);
525 bytes_to_r =
526 ((end_item == split_item_num
527 && end_bytes != -1) ? end_bytes : 0);
528 bytes_to_S1new =
529 ((split_item_positions[0] ==
530 split_item_positions[1]) ? snum012[3] : 0);
531
532 // s2bytes
533 snum012[4] =
534 op_unit_num(&vn->vn_vi[split_item_num]) - snum012[4] -
535 bytes_to_r - bytes_to_l - bytes_to_S1new;
536
537 if (vn->vn_vi[split_item_num].vi_index != TYPE_DIRENTRY &&
538 vn->vn_vi[split_item_num].vi_index != TYPE_INDIRECT)
539 reiserfs_warning(tb->tb_sb, "vs-8115: get_num_ver: not "
540 "directory or indirect item");
462 } 541 }
463 542
464 // calculate number of item units which fit into node being 543 /* now we know S2bytes, calculate S1bytes */
465 // filled 544 if (snum012[3] > 0) {
466 { 545 int split_item_num;
467 int free_space; 546 int bytes_to_r, bytes_to_l;
468 547 int bytes_to_S2new;
469 free_space = max_node_size - total_node_size - IH_SIZE; 548
470 units = op_check_left (vi, free_space, start_bytes, skip_from_end); 549 split_item_num = split_item_positions[0];
471 if (units == -1) { 550 bytes_to_l =
472 /* nothing fits into current node, take new node and continue */ 551 ((from == split_item_num
473 needed_nodes ++, i--, total_node_size = 0; 552 && from_bytes != -1) ? from_bytes : 0);
474 continue; 553 bytes_to_r =
475 } 554 ((end_item == split_item_num
555 && end_bytes != -1) ? end_bytes : 0);
556 bytes_to_S2new =
557 ((split_item_positions[0] == split_item_positions[1]
558 && snum012[4] != -1) ? snum012[4] : 0);
559
560 // s1bytes
561 snum012[3] =
562 op_unit_num(&vn->vn_vi[split_item_num]) - snum012[3] -
563 bytes_to_r - bytes_to_l - bytes_to_S2new;
476 } 564 }
477 565
478 /* something fits into the current node */ 566 return needed_nodes;
479 //if (snum012[3] != -1 || needed_nodes != 1)
480 // reiserfs_panic (tb->tb_sb, "vs-8115: get_num_ver: too many nodes required");
481 //snum012[needed_nodes - 1 + 3] = op_unit_num (vi) - start_bytes - units;
482 start_bytes += units;
483 snum012[needed_nodes - 1 + 3] = units;
484
485 if (needed_nodes > 2)
486 reiserfs_warning (tb->tb_sb, "vs-8111: get_num_ver: "
487 "split_item_position is out of boundary");
488 snum012[needed_nodes - 1] ++;
489 split_item_positions[needed_nodes - 1] = i;
490 needed_nodes ++;
491 /* continue from the same item with start_bytes != -1 */
492 start_item = i;
493 i --;
494 total_node_size = 0;
495 }
496
497 // sum012[4] (if it is not -1) contains number of units of which
498 // are to be in S1new, snum012[3] - to be in S0. They are supposed
499 // to be S1bytes and S2bytes correspondingly, so recalculate
500 if (snum012[4] > 0) {
501 int split_item_num;
502 int bytes_to_r, bytes_to_l;
503 int bytes_to_S1new;
504
505 split_item_num = split_item_positions[1];
506 bytes_to_l = ((from == split_item_num && from_bytes != -1) ? from_bytes : 0);
507 bytes_to_r = ((end_item == split_item_num && end_bytes != -1) ? end_bytes : 0);
508 bytes_to_S1new = ((split_item_positions[0] == split_item_positions[1]) ? snum012[3] : 0);
509
510 // s2bytes
511 snum012[4] = op_unit_num (&vn->vn_vi[split_item_num]) - snum012[4] - bytes_to_r - bytes_to_l - bytes_to_S1new;
512
513 if (vn->vn_vi[split_item_num].vi_index != TYPE_DIRENTRY &&
514 vn->vn_vi[split_item_num].vi_index != TYPE_INDIRECT)
515 reiserfs_warning (tb->tb_sb, "vs-8115: get_num_ver: not "
516 "directory or indirect item");
517 }
518
519 /* now we know S2bytes, calculate S1bytes */
520 if (snum012[3] > 0) {
521 int split_item_num;
522 int bytes_to_r, bytes_to_l;
523 int bytes_to_S2new;
524
525 split_item_num = split_item_positions[0];
526 bytes_to_l = ((from == split_item_num && from_bytes != -1) ? from_bytes : 0);
527 bytes_to_r = ((end_item == split_item_num && end_bytes != -1) ? end_bytes : 0);
528 bytes_to_S2new = ((split_item_positions[0] == split_item_positions[1] && snum012[4] != -1) ? snum012[4] : 0);
529
530 // s1bytes
531 snum012[3] = op_unit_num (&vn->vn_vi[split_item_num]) - snum012[3] - bytes_to_r - bytes_to_l - bytes_to_S2new;
532 }
533
534 return needed_nodes;
535} 567}
536 568
537
538#ifdef CONFIG_REISERFS_CHECK 569#ifdef CONFIG_REISERFS_CHECK
539extern struct tree_balance * cur_tb; 570extern struct tree_balance *cur_tb;
540#endif 571#endif
541 572
542
543/* Set parameters for balancing. 573/* Set parameters for balancing.
544 * Performs write of results of analysis of balancing into structure tb, 574 * Performs write of results of analysis of balancing into structure tb,
545 * where it will later be used by the functions that actually do the balancing. 575 * where it will later be used by the functions that actually do the balancing.
@@ -557,131 +587,130 @@ extern struct tree_balance * cur_tb;
557 * s1bytes number of bytes which flow to the first new node when S[0] splits (this number is contained in s012 array) 587 * s1bytes number of bytes which flow to the first new node when S[0] splits (this number is contained in s012 array)
558 */ 588 */
559 589
560static void set_parameters (struct tree_balance * tb, int h, int lnum, 590static void set_parameters(struct tree_balance *tb, int h, int lnum,
561 int rnum, int blk_num, short * s012, int lb, int rb) 591 int rnum, int blk_num, short *s012, int lb, int rb)
562{ 592{
563 593
564 tb->lnum[h] = lnum; 594 tb->lnum[h] = lnum;
565 tb->rnum[h] = rnum; 595 tb->rnum[h] = rnum;
566 tb->blknum[h] = blk_num; 596 tb->blknum[h] = blk_num;
567 597
568 if (h == 0) 598 if (h == 0) { /* only for leaf level */
569 { /* only for leaf level */ 599 if (s012 != NULL) {
570 if (s012 != NULL) 600 tb->s0num = *s012++,
571 { 601 tb->s1num = *s012++, tb->s2num = *s012++;
572 tb->s0num = * s012 ++, 602 tb->s1bytes = *s012++;
573 tb->s1num = * s012 ++, 603 tb->s2bytes = *s012;
574 tb->s2num = * s012 ++; 604 }
575 tb->s1bytes = * s012 ++; 605 tb->lbytes = lb;
576 tb->s2bytes = * s012; 606 tb->rbytes = rb;
577 } 607 }
578 tb->lbytes = lb; 608 PROC_INFO_ADD(tb->tb_sb, lnum[h], lnum);
579 tb->rbytes = rb; 609 PROC_INFO_ADD(tb->tb_sb, rnum[h], rnum);
580 }
581 PROC_INFO_ADD( tb -> tb_sb, lnum[ h ], lnum );
582 PROC_INFO_ADD( tb -> tb_sb, rnum[ h ], rnum );
583
584 PROC_INFO_ADD( tb -> tb_sb, lbytes[ h ], lb );
585 PROC_INFO_ADD( tb -> tb_sb, rbytes[ h ], rb );
586}
587
588 610
611 PROC_INFO_ADD(tb->tb_sb, lbytes[h], lb);
612 PROC_INFO_ADD(tb->tb_sb, rbytes[h], rb);
613}
589 614
590/* check, does node disappear if we shift tb->lnum[0] items to left 615/* check, does node disappear if we shift tb->lnum[0] items to left
591 neighbor and tb->rnum[0] to the right one. */ 616 neighbor and tb->rnum[0] to the right one. */
592static int is_leaf_removable (struct tree_balance * tb) 617static int is_leaf_removable(struct tree_balance *tb)
593{ 618{
594 struct virtual_node * vn = tb->tb_vn; 619 struct virtual_node *vn = tb->tb_vn;
595 int to_left, to_right; 620 int to_left, to_right;
596 int size; 621 int size;
597 int remain_items; 622 int remain_items;
598 623
599 /* number of items, that will be shifted to left (right) neighbor 624 /* number of items, that will be shifted to left (right) neighbor
600 entirely */ 625 entirely */
601 to_left = tb->lnum[0] - ((tb->lbytes != -1) ? 1 : 0); 626 to_left = tb->lnum[0] - ((tb->lbytes != -1) ? 1 : 0);
602 to_right = tb->rnum[0] - ((tb->rbytes != -1) ? 1 : 0); 627 to_right = tb->rnum[0] - ((tb->rbytes != -1) ? 1 : 0);
603 remain_items = vn->vn_nr_item; 628 remain_items = vn->vn_nr_item;
604 629
605 /* how many items remain in S[0] after shiftings to neighbors */ 630 /* how many items remain in S[0] after shiftings to neighbors */
606 remain_items -= (to_left + to_right); 631 remain_items -= (to_left + to_right);
607 632
608 if (remain_items < 1) { 633 if (remain_items < 1) {
609 /* all content of node can be shifted to neighbors */ 634 /* all content of node can be shifted to neighbors */
610 set_parameters (tb, 0, to_left, vn->vn_nr_item - to_left, 0, NULL, -1, -1); 635 set_parameters(tb, 0, to_left, vn->vn_nr_item - to_left, 0,
611 return 1; 636 NULL, -1, -1);
612 } 637 return 1;
613 638 }
614 if (remain_items > 1 || tb->lbytes == -1 || tb->rbytes == -1)
615 /* S[0] is not removable */
616 return 0;
617
618 /* check, whether we can divide 1 remaining item between neighbors */
619
620 /* get size of remaining item (in item units) */
621 size = op_unit_num (&(vn->vn_vi[to_left]));
622
623 if (tb->lbytes + tb->rbytes >= size) {
624 set_parameters (tb, 0, to_left + 1, to_right + 1, 0, NULL, tb->lbytes, -1);
625 return 1;
626 }
627
628 return 0;
629}
630 639
640 if (remain_items > 1 || tb->lbytes == -1 || tb->rbytes == -1)
641 /* S[0] is not removable */
642 return 0;
643
644 /* check, whether we can divide 1 remaining item between neighbors */
645
646 /* get size of remaining item (in item units) */
647 size = op_unit_num(&(vn->vn_vi[to_left]));
648
649 if (tb->lbytes + tb->rbytes >= size) {
650 set_parameters(tb, 0, to_left + 1, to_right + 1, 0, NULL,
651 tb->lbytes, -1);
652 return 1;
653 }
654
655 return 0;
656}
631 657
632/* check whether L, S, R can be joined in one node */ 658/* check whether L, S, R can be joined in one node */
633static int are_leaves_removable (struct tree_balance * tb, int lfree, int rfree) 659static int are_leaves_removable(struct tree_balance *tb, int lfree, int rfree)
634{ 660{
635 struct virtual_node * vn = tb->tb_vn; 661 struct virtual_node *vn = tb->tb_vn;
636 int ih_size; 662 int ih_size;
637 struct buffer_head *S0; 663 struct buffer_head *S0;
638 664
639 S0 = PATH_H_PBUFFER (tb->tb_path, 0); 665 S0 = PATH_H_PBUFFER(tb->tb_path, 0);
640 666
641 ih_size = 0; 667 ih_size = 0;
642 if (vn->vn_nr_item) { 668 if (vn->vn_nr_item) {
643 if (vn->vn_vi[0].vi_type & VI_TYPE_LEFT_MERGEABLE) 669 if (vn->vn_vi[0].vi_type & VI_TYPE_LEFT_MERGEABLE)
644 ih_size += IH_SIZE; 670 ih_size += IH_SIZE;
645 671
646 if (vn->vn_vi[vn->vn_nr_item-1].vi_type & VI_TYPE_RIGHT_MERGEABLE) 672 if (vn->vn_vi[vn->vn_nr_item - 1].
647 ih_size += IH_SIZE; 673 vi_type & VI_TYPE_RIGHT_MERGEABLE)
648 } else { 674 ih_size += IH_SIZE;
649 /* there was only one item and it will be deleted */ 675 } else {
650 struct item_head * ih; 676 /* there was only one item and it will be deleted */
651 677 struct item_head *ih;
652 RFALSE( B_NR_ITEMS (S0) != 1, 678
653 "vs-8125: item number must be 1: it is %d", B_NR_ITEMS(S0)); 679 RFALSE(B_NR_ITEMS(S0) != 1,
654 680 "vs-8125: item number must be 1: it is %d",
655 ih = B_N_PITEM_HEAD (S0, 0); 681 B_NR_ITEMS(S0));
656 if (tb->CFR[0] && !comp_short_le_keys (&(ih->ih_key), B_N_PDELIM_KEY (tb->CFR[0], tb->rkey[0]))) 682
657 if (is_direntry_le_ih (ih)) { 683 ih = B_N_PITEM_HEAD(S0, 0);
658 /* Directory must be in correct state here: that is 684 if (tb->CFR[0]
659 somewhere at the left side should exist first directory 685 && !comp_short_le_keys(&(ih->ih_key),
660 item. But the item being deleted can not be that first 686 B_N_PDELIM_KEY(tb->CFR[0],
661 one because its right neighbor is item of the same 687 tb->rkey[0])))
662 directory. (But first item always gets deleted in last 688 if (is_direntry_le_ih(ih)) {
663 turn). So, neighbors of deleted item can be merged, so 689 /* Directory must be in correct state here: that is
664 we can save ih_size */ 690 somewhere at the left side should exist first directory
665 ih_size = IH_SIZE; 691 item. But the item being deleted can not be that first
666 692 one because its right neighbor is item of the same
667 /* we might check that left neighbor exists and is of the 693 directory. (But first item always gets deleted in last
668 same directory */ 694 turn). So, neighbors of deleted item can be merged, so
669 RFALSE(le_ih_k_offset (ih) == DOT_OFFSET, 695 we can save ih_size */
670 "vs-8130: first directory item can not be removed until directory is not empty"); 696 ih_size = IH_SIZE;
671 } 697
672 698 /* we might check that left neighbor exists and is of the
673 } 699 same directory */
674 700 RFALSE(le_ih_k_offset(ih) == DOT_OFFSET,
675 if (MAX_CHILD_SIZE (S0) + vn->vn_size <= rfree + lfree + ih_size) { 701 "vs-8130: first directory item can not be removed until directory is not empty");
676 set_parameters (tb, 0, -1, -1, -1, NULL, -1, -1); 702 }
677 PROC_INFO_INC( tb -> tb_sb, leaves_removable );
678 return 1;
679 }
680 return 0;
681
682}
683 703
704 }
705
706 if (MAX_CHILD_SIZE(S0) + vn->vn_size <= rfree + lfree + ih_size) {
707 set_parameters(tb, 0, -1, -1, -1, NULL, -1, -1);
708 PROC_INFO_INC(tb->tb_sb, leaves_removable);
709 return 1;
710 }
711 return 0;
684 712
713}
685 714
686/* when we do not split item, lnum and rnum are numbers of entire items */ 715/* when we do not split item, lnum and rnum are numbers of entire items */
687#define SET_PAR_SHIFT_LEFT \ 716#define SET_PAR_SHIFT_LEFT \
@@ -704,7 +733,6 @@ else \
704 -1, -1);\ 733 -1, -1);\
705} 734}
706 735
707
708#define SET_PAR_SHIFT_RIGHT \ 736#define SET_PAR_SHIFT_RIGHT \
709if (h)\ 737if (h)\
710{\ 738{\
@@ -724,214 +752,199 @@ else \
724 -1, -1);\ 752 -1, -1);\
725} 753}
726 754
727 755static void free_buffers_in_tb(struct tree_balance *p_s_tb)
728static void free_buffers_in_tb ( 756{
729 struct tree_balance * p_s_tb 757 int n_counter;
730 ) { 758
731 int n_counter; 759 decrement_counters_in_path(p_s_tb->tb_path);
732 760
733 decrement_counters_in_path(p_s_tb->tb_path); 761 for (n_counter = 0; n_counter < MAX_HEIGHT; n_counter++) {
734 762 decrement_bcount(p_s_tb->L[n_counter]);
735 for ( n_counter = 0; n_counter < MAX_HEIGHT; n_counter++ ) { 763 p_s_tb->L[n_counter] = NULL;
736 decrement_bcount(p_s_tb->L[n_counter]); 764 decrement_bcount(p_s_tb->R[n_counter]);
737 p_s_tb->L[n_counter] = NULL; 765 p_s_tb->R[n_counter] = NULL;
738 decrement_bcount(p_s_tb->R[n_counter]); 766 decrement_bcount(p_s_tb->FL[n_counter]);
739 p_s_tb->R[n_counter] = NULL; 767 p_s_tb->FL[n_counter] = NULL;
740 decrement_bcount(p_s_tb->FL[n_counter]); 768 decrement_bcount(p_s_tb->FR[n_counter]);
741 p_s_tb->FL[n_counter] = NULL; 769 p_s_tb->FR[n_counter] = NULL;
742 decrement_bcount(p_s_tb->FR[n_counter]); 770 decrement_bcount(p_s_tb->CFL[n_counter]);
743 p_s_tb->FR[n_counter] = NULL; 771 p_s_tb->CFL[n_counter] = NULL;
744 decrement_bcount(p_s_tb->CFL[n_counter]); 772 decrement_bcount(p_s_tb->CFR[n_counter]);
745 p_s_tb->CFL[n_counter] = NULL; 773 p_s_tb->CFR[n_counter] = NULL;
746 decrement_bcount(p_s_tb->CFR[n_counter]); 774 }
747 p_s_tb->CFR[n_counter] = NULL;
748 }
749} 775}
750 776
751
752/* Get new buffers for storing new nodes that are created while balancing. 777/* Get new buffers for storing new nodes that are created while balancing.
753 * Returns: SCHEDULE_OCCURRED - schedule occurred while the function worked; 778 * Returns: SCHEDULE_OCCURRED - schedule occurred while the function worked;
754 * CARRY_ON - schedule didn't occur while the function worked; 779 * CARRY_ON - schedule didn't occur while the function worked;
755 * NO_DISK_SPACE - no disk space. 780 * NO_DISK_SPACE - no disk space.
756 */ 781 */
757/* The function is NOT SCHEDULE-SAFE! */ 782/* The function is NOT SCHEDULE-SAFE! */
758static int get_empty_nodes( 783static int get_empty_nodes(struct tree_balance *p_s_tb, int n_h)
759 struct tree_balance * p_s_tb, 784{
760 int n_h 785 struct buffer_head *p_s_new_bh,
761 ) { 786 *p_s_Sh = PATH_H_PBUFFER(p_s_tb->tb_path, n_h);
762 struct buffer_head * p_s_new_bh, 787 b_blocknr_t *p_n_blocknr, a_n_blocknrs[MAX_AMOUNT_NEEDED] = { 0, };
763 * p_s_Sh = PATH_H_PBUFFER (p_s_tb->tb_path, n_h); 788 int n_counter, n_number_of_freeblk, n_amount_needed, /* number of needed empty blocks */
764 b_blocknr_t * p_n_blocknr, 789 n_retval = CARRY_ON;
765 a_n_blocknrs[MAX_AMOUNT_NEEDED] = {0, }; 790 struct super_block *p_s_sb = p_s_tb->tb_sb;
766 int n_counter, 791
767 n_number_of_freeblk, 792 /* number_of_freeblk is the number of empty blocks which have been
768 n_amount_needed,/* number of needed empty blocks */ 793 acquired for use by the balancing algorithm minus the number of
769 n_retval = CARRY_ON; 794 empty blocks used in the previous levels of the analysis,
770 struct super_block * p_s_sb = p_s_tb->tb_sb; 795 number_of_freeblk = tb->cur_blknum can be non-zero if a schedule occurs
771 796 after empty blocks are acquired, and the balancing analysis is
772 797 then restarted, amount_needed is the number needed by this level
773 /* number_of_freeblk is the number of empty blocks which have been 798 (n_h) of the balancing analysis.
774 acquired for use by the balancing algorithm minus the number of 799
775 empty blocks used in the previous levels of the analysis, 800 Note that for systems with many processes writing, it would be
776 number_of_freeblk = tb->cur_blknum can be non-zero if a schedule occurs 801 more layout optimal to calculate the total number needed by all
777 after empty blocks are acquired, and the balancing analysis is 802 levels and then to run reiserfs_new_blocks to get all of them at once. */
778 then restarted, amount_needed is the number needed by this level 803
779 (n_h) of the balancing analysis. 804 /* Initiate number_of_freeblk to the amount acquired prior to the restart of
780 805 the analysis or 0 if not restarted, then subtract the amount needed
781 Note that for systems with many processes writing, it would be 806 by all of the levels of the tree below n_h. */
782 more layout optimal to calculate the total number needed by all 807 /* blknum includes S[n_h], so we subtract 1 in this calculation */
783 levels and then to run reiserfs_new_blocks to get all of them at once. */ 808 for (n_counter = 0, n_number_of_freeblk = p_s_tb->cur_blknum;
784 809 n_counter < n_h; n_counter++)
785 /* Initiate number_of_freeblk to the amount acquired prior to the restart of 810 n_number_of_freeblk -=
786 the analysis or 0 if not restarted, then subtract the amount needed 811 (p_s_tb->blknum[n_counter]) ? (p_s_tb->blknum[n_counter] -
787 by all of the levels of the tree below n_h. */ 812 1) : 0;
788 /* blknum includes S[n_h], so we subtract 1 in this calculation */ 813
789 for ( n_counter = 0, n_number_of_freeblk = p_s_tb->cur_blknum; n_counter < n_h; n_counter++ ) 814 /* Allocate missing empty blocks. */
790 n_number_of_freeblk -= ( p_s_tb->blknum[n_counter] ) ? (p_s_tb->blknum[n_counter] - 1) : 0; 815 /* if p_s_Sh == 0 then we are getting a new root */
791 816 n_amount_needed = (p_s_Sh) ? (p_s_tb->blknum[n_h] - 1) : 1;
792 /* Allocate missing empty blocks. */ 817 /* Amount_needed = the amount that we need more than the amount that we have. */
793 /* if p_s_Sh == 0 then we are getting a new root */ 818 if (n_amount_needed > n_number_of_freeblk)
794 n_amount_needed = ( p_s_Sh ) ? (p_s_tb->blknum[n_h] - 1) : 1; 819 n_amount_needed -= n_number_of_freeblk;
795 /* Amount_needed = the amount that we need more than the amount that we have. */ 820 else /* If we have enough already then there is nothing to do. */
796 if ( n_amount_needed > n_number_of_freeblk ) 821 return CARRY_ON;
797 n_amount_needed -= n_number_of_freeblk; 822
798 else /* If we have enough already then there is nothing to do. */ 823 /* No need to check quota - is not allocated for blocks used for formatted nodes */
799 return CARRY_ON; 824 if (reiserfs_new_form_blocknrs(p_s_tb, a_n_blocknrs,
800 825 n_amount_needed) == NO_DISK_SPACE)
801 /* No need to check quota - is not allocated for blocks used for formatted nodes */ 826 return NO_DISK_SPACE;
802 if (reiserfs_new_form_blocknrs (p_s_tb, a_n_blocknrs, 827
803 n_amount_needed) == NO_DISK_SPACE) 828 /* for each blocknumber we just got, get a buffer and stick it on FEB */
804 return NO_DISK_SPACE; 829 for (p_n_blocknr = a_n_blocknrs, n_counter = 0;
805 830 n_counter < n_amount_needed; p_n_blocknr++, n_counter++) {
806 /* for each blocknumber we just got, get a buffer and stick it on FEB */ 831
807 for ( p_n_blocknr = a_n_blocknrs, n_counter = 0; n_counter < n_amount_needed; 832 RFALSE(!*p_n_blocknr,
808 p_n_blocknr++, n_counter++ ) { 833 "PAP-8135: reiserfs_new_blocknrs failed when got new blocks");
809 834
810 RFALSE( ! *p_n_blocknr, 835 p_s_new_bh = sb_getblk(p_s_sb, *p_n_blocknr);
811 "PAP-8135: reiserfs_new_blocknrs failed when got new blocks"); 836 RFALSE(buffer_dirty(p_s_new_bh) ||
812 837 buffer_journaled(p_s_new_bh) ||
813 p_s_new_bh = sb_getblk(p_s_sb, *p_n_blocknr); 838 buffer_journal_dirty(p_s_new_bh),
814 RFALSE (buffer_dirty (p_s_new_bh) || 839 "PAP-8140: journlaled or dirty buffer %b for the new block",
815 buffer_journaled (p_s_new_bh) || 840 p_s_new_bh);
816 buffer_journal_dirty (p_s_new_bh), 841
817 "PAP-8140: journlaled or dirty buffer %b for the new block", 842 /* Put empty buffers into the array. */
818 p_s_new_bh); 843 RFALSE(p_s_tb->FEB[p_s_tb->cur_blknum],
819 844 "PAP-8141: busy slot for new buffer");
820 /* Put empty buffers into the array. */ 845
821 RFALSE (p_s_tb->FEB[p_s_tb->cur_blknum], 846 set_buffer_journal_new(p_s_new_bh);
822 "PAP-8141: busy slot for new buffer"); 847 p_s_tb->FEB[p_s_tb->cur_blknum++] = p_s_new_bh;
823 848 }
824 set_buffer_journal_new (p_s_new_bh); 849
825 p_s_tb->FEB[p_s_tb->cur_blknum++] = p_s_new_bh; 850 if (n_retval == CARRY_ON && FILESYSTEM_CHANGED_TB(p_s_tb))
826 } 851 n_retval = REPEAT_SEARCH;
827
828 if ( n_retval == CARRY_ON && FILESYSTEM_CHANGED_TB (p_s_tb) )
829 n_retval = REPEAT_SEARCH ;
830
831 return n_retval;
832}
833 852
853 return n_retval;
854}
834 855
835/* Get free space of the left neighbor, which is stored in the parent 856/* Get free space of the left neighbor, which is stored in the parent
836 * node of the left neighbor. */ 857 * node of the left neighbor. */
837static int get_lfree (struct tree_balance * tb, int h) 858static int get_lfree(struct tree_balance *tb, int h)
838{ 859{
839 struct buffer_head * l, * f; 860 struct buffer_head *l, *f;
840 int order; 861 int order;
841 862
842 if ((f = PATH_H_PPARENT (tb->tb_path, h)) == 0 || (l = tb->FL[h]) == 0) 863 if ((f = PATH_H_PPARENT(tb->tb_path, h)) == 0 || (l = tb->FL[h]) == 0)
843 return 0; 864 return 0;
844 865
845 if (f == l) 866 if (f == l)
846 order = PATH_H_B_ITEM_ORDER (tb->tb_path, h) - 1; 867 order = PATH_H_B_ITEM_ORDER(tb->tb_path, h) - 1;
847 else { 868 else {
848 order = B_NR_ITEMS (l); 869 order = B_NR_ITEMS(l);
849 f = l; 870 f = l;
850 } 871 }
851 872
852 return (MAX_CHILD_SIZE(f) - dc_size(B_N_CHILD(f,order))); 873 return (MAX_CHILD_SIZE(f) - dc_size(B_N_CHILD(f, order)));
853} 874}
854 875
855
856/* Get free space of the right neighbor, 876/* Get free space of the right neighbor,
857 * which is stored in the parent node of the right neighbor. 877 * which is stored in the parent node of the right neighbor.
858 */ 878 */
859static int get_rfree (struct tree_balance * tb, int h) 879static int get_rfree(struct tree_balance *tb, int h)
860{ 880{
861 struct buffer_head * r, * f; 881 struct buffer_head *r, *f;
862 int order; 882 int order;
863 883
864 if ((f = PATH_H_PPARENT (tb->tb_path, h)) == 0 || (r = tb->FR[h]) == 0) 884 if ((f = PATH_H_PPARENT(tb->tb_path, h)) == 0 || (r = tb->FR[h]) == 0)
865 return 0; 885 return 0;
866 886
867 if (f == r) 887 if (f == r)
868 order = PATH_H_B_ITEM_ORDER (tb->tb_path, h) + 1; 888 order = PATH_H_B_ITEM_ORDER(tb->tb_path, h) + 1;
869 else { 889 else {
870 order = 0; 890 order = 0;
871 f = r; 891 f = r;
872 } 892 }
873 893
874 return (MAX_CHILD_SIZE(f) - dc_size( B_N_CHILD(f,order))); 894 return (MAX_CHILD_SIZE(f) - dc_size(B_N_CHILD(f, order)));
875 895
876} 896}
877 897
878
879/* Check whether left neighbor is in memory. */ 898/* Check whether left neighbor is in memory. */
880static int is_left_neighbor_in_cache( 899static int is_left_neighbor_in_cache(struct tree_balance *p_s_tb, int n_h)
881 struct tree_balance * p_s_tb, 900{
882 int n_h 901 struct buffer_head *p_s_father, *left;
883 ) { 902 struct super_block *p_s_sb = p_s_tb->tb_sb;
884 struct buffer_head * p_s_father, * left; 903 b_blocknr_t n_left_neighbor_blocknr;
885 struct super_block * p_s_sb = p_s_tb->tb_sb; 904 int n_left_neighbor_position;
886 b_blocknr_t n_left_neighbor_blocknr; 905
887 int n_left_neighbor_position; 906 if (!p_s_tb->FL[n_h]) /* Father of the left neighbor does not exist. */
888 907 return 0;
889 if ( ! p_s_tb->FL[n_h] ) /* Father of the left neighbor does not exist. */ 908
890 return 0; 909 /* Calculate father of the node to be balanced. */
891 910 p_s_father = PATH_H_PBUFFER(p_s_tb->tb_path, n_h + 1);
892 /* Calculate father of the node to be balanced. */ 911
893 p_s_father = PATH_H_PBUFFER(p_s_tb->tb_path, n_h + 1); 912 RFALSE(!p_s_father ||
894 913 !B_IS_IN_TREE(p_s_father) ||
895 RFALSE( ! p_s_father || 914 !B_IS_IN_TREE(p_s_tb->FL[n_h]) ||
896 ! B_IS_IN_TREE (p_s_father) || 915 !buffer_uptodate(p_s_father) ||
897 ! B_IS_IN_TREE (p_s_tb->FL[n_h]) || 916 !buffer_uptodate(p_s_tb->FL[n_h]),
898 ! buffer_uptodate (p_s_father) || 917 "vs-8165: F[h] (%b) or FL[h] (%b) is invalid",
899 ! buffer_uptodate (p_s_tb->FL[n_h]), 918 p_s_father, p_s_tb->FL[n_h]);
900 "vs-8165: F[h] (%b) or FL[h] (%b) is invalid", 919
901 p_s_father, p_s_tb->FL[n_h]); 920 /* Get position of the pointer to the left neighbor into the left father. */
902 921 n_left_neighbor_position = (p_s_father == p_s_tb->FL[n_h]) ?
903 922 p_s_tb->lkey[n_h] : B_NR_ITEMS(p_s_tb->FL[n_h]);
904 /* Get position of the pointer to the left neighbor into the left father. */ 923 /* Get left neighbor block number. */
905 n_left_neighbor_position = ( p_s_father == p_s_tb->FL[n_h] ) ? 924 n_left_neighbor_blocknr =
906 p_s_tb->lkey[n_h] : B_NR_ITEMS (p_s_tb->FL[n_h]); 925 B_N_CHILD_NUM(p_s_tb->FL[n_h], n_left_neighbor_position);
907 /* Get left neighbor block number. */ 926 /* Look for the left neighbor in the cache. */
908 n_left_neighbor_blocknr = B_N_CHILD_NUM(p_s_tb->FL[n_h], n_left_neighbor_position); 927 if ((left = sb_find_get_block(p_s_sb, n_left_neighbor_blocknr))) {
909 /* Look for the left neighbor in the cache. */ 928
910 if ( (left = sb_find_get_block(p_s_sb, n_left_neighbor_blocknr)) ) { 929 RFALSE(buffer_uptodate(left) && !B_IS_IN_TREE(left),
911 930 "vs-8170: left neighbor (%b %z) is not in the tree",
912 RFALSE( buffer_uptodate (left) && ! B_IS_IN_TREE(left), 931 left, left);
913 "vs-8170: left neighbor (%b %z) is not in the tree", left, left); 932 put_bh(left);
914 put_bh(left) ; 933 return 1;
915 return 1; 934 }
916 }
917
918 return 0;
919}
920 935
936 return 0;
937}
921 938
922#define LEFT_PARENTS 'l' 939#define LEFT_PARENTS 'l'
923#define RIGHT_PARENTS 'r' 940#define RIGHT_PARENTS 'r'
924 941
925 942static void decrement_key(struct cpu_key *p_s_key)
926static void decrement_key (struct cpu_key * p_s_key)
927{ 943{
928 // call item specific function for this key 944 // call item specific function for this key
929 item_ops[cpu_key_k_type (p_s_key)]->decrement_key (p_s_key); 945 item_ops[cpu_key_k_type(p_s_key)]->decrement_key(p_s_key);
930} 946}
931 947
932
933
934
935/* Calculate far left/right parent of the left/right neighbor of the current node, that 948/* Calculate far left/right parent of the left/right neighbor of the current node, that
936 * is calculate the left/right (FL[h]/FR[h]) neighbor of the parent F[h]. 949 * is calculate the left/right (FL[h]/FR[h]) neighbor of the parent F[h].
937 * Calculate left/right common parent of the current node and L[h]/R[h]. 950 * Calculate left/right common parent of the current node and L[h]/R[h].
@@ -940,111 +953,121 @@ static void decrement_key (struct cpu_key * p_s_key)
940 SCHEDULE_OCCURRED - schedule occurred while the function worked; 953 SCHEDULE_OCCURRED - schedule occurred while the function worked;
941 * CARRY_ON - schedule didn't occur while the function worked; 954 * CARRY_ON - schedule didn't occur while the function worked;
942 */ 955 */
943static int get_far_parent (struct tree_balance * p_s_tb, 956static int get_far_parent(struct tree_balance *p_s_tb,
944 int n_h, 957 int n_h,
945 struct buffer_head ** pp_s_father, 958 struct buffer_head **pp_s_father,
946 struct buffer_head ** pp_s_com_father, 959 struct buffer_head **pp_s_com_father, char c_lr_par)
947 char c_lr_par)
948{ 960{
949 struct buffer_head * p_s_parent; 961 struct buffer_head *p_s_parent;
950 INITIALIZE_PATH (s_path_to_neighbor_father); 962 INITIALIZE_PATH(s_path_to_neighbor_father);
951 struct path * p_s_path = p_s_tb->tb_path; 963 struct path *p_s_path = p_s_tb->tb_path;
952 struct cpu_key s_lr_father_key; 964 struct cpu_key s_lr_father_key;
953 int n_counter, 965 int n_counter,
954 n_position = INT_MAX, 966 n_position = INT_MAX,
955 n_first_last_position = 0, 967 n_first_last_position = 0,
956 n_path_offset = PATH_H_PATH_OFFSET(p_s_path, n_h); 968 n_path_offset = PATH_H_PATH_OFFSET(p_s_path, n_h);
957 969
958 /* Starting from F[n_h] go upwards in the tree, and look for the common 970 /* Starting from F[n_h] go upwards in the tree, and look for the common
959 ancestor of F[n_h], and its neighbor l/r, that should be obtained. */ 971 ancestor of F[n_h], and its neighbor l/r, that should be obtained. */
960 972
961 n_counter = n_path_offset; 973 n_counter = n_path_offset;
962 974
963 RFALSE( n_counter < FIRST_PATH_ELEMENT_OFFSET, 975 RFALSE(n_counter < FIRST_PATH_ELEMENT_OFFSET,
964 "PAP-8180: invalid path length"); 976 "PAP-8180: invalid path length");
965 977
966 978 for (; n_counter > FIRST_PATH_ELEMENT_OFFSET; n_counter--) {
967 for ( ; n_counter > FIRST_PATH_ELEMENT_OFFSET; n_counter-- ) { 979 /* Check whether parent of the current buffer in the path is really parent in the tree. */
968 /* Check whether parent of the current buffer in the path is really parent in the tree. */ 980 if (!B_IS_IN_TREE
969 if ( ! B_IS_IN_TREE(p_s_parent = PATH_OFFSET_PBUFFER(p_s_path, n_counter - 1)) ) 981 (p_s_parent = PATH_OFFSET_PBUFFER(p_s_path, n_counter - 1)))
970 return REPEAT_SEARCH; 982 return REPEAT_SEARCH;
971 /* Check whether position in the parent is correct. */ 983 /* Check whether position in the parent is correct. */
972 if ( (n_position = PATH_OFFSET_POSITION(p_s_path, n_counter - 1)) > B_NR_ITEMS(p_s_parent) ) 984 if ((n_position =
973 return REPEAT_SEARCH; 985 PATH_OFFSET_POSITION(p_s_path,
974 /* Check whether parent at the path really points to the child. */ 986 n_counter - 1)) >
975 if ( B_N_CHILD_NUM(p_s_parent, n_position) != 987 B_NR_ITEMS(p_s_parent))
976 PATH_OFFSET_PBUFFER(p_s_path, n_counter)->b_blocknr ) 988 return REPEAT_SEARCH;
977 return REPEAT_SEARCH; 989 /* Check whether parent at the path really points to the child. */
978 /* Return delimiting key if position in the parent is not equal to first/last one. */ 990 if (B_N_CHILD_NUM(p_s_parent, n_position) !=
979 if ( c_lr_par == RIGHT_PARENTS ) 991 PATH_OFFSET_PBUFFER(p_s_path, n_counter)->b_blocknr)
980 n_first_last_position = B_NR_ITEMS (p_s_parent); 992 return REPEAT_SEARCH;
981 if ( n_position != n_first_last_position ) { 993 /* Return delimiting key if position in the parent is not equal to first/last one. */
982 *pp_s_com_father = p_s_parent; 994 if (c_lr_par == RIGHT_PARENTS)
983 get_bh(*pp_s_com_father) ; 995 n_first_last_position = B_NR_ITEMS(p_s_parent);
984 /*(*pp_s_com_father = p_s_parent)->b_count++;*/ 996 if (n_position != n_first_last_position) {
985 break; 997 *pp_s_com_father = p_s_parent;
998 get_bh(*pp_s_com_father);
999 /*(*pp_s_com_father = p_s_parent)->b_count++; */
1000 break;
1001 }
986 } 1002 }
987 } 1003
988 1004 /* if we are in the root of the tree, then there is no common father */
989 /* if we are in the root of the tree, then there is no common father */ 1005 if (n_counter == FIRST_PATH_ELEMENT_OFFSET) {
990 if ( n_counter == FIRST_PATH_ELEMENT_OFFSET ) { 1006 /* Check whether first buffer in the path is the root of the tree. */
991 /* Check whether first buffer in the path is the root of the tree. */ 1007 if (PATH_OFFSET_PBUFFER
992 if ( PATH_OFFSET_PBUFFER(p_s_tb->tb_path, FIRST_PATH_ELEMENT_OFFSET)->b_blocknr == 1008 (p_s_tb->tb_path,
993 SB_ROOT_BLOCK (p_s_tb->tb_sb) ) { 1009 FIRST_PATH_ELEMENT_OFFSET)->b_blocknr ==
994 *pp_s_father = *pp_s_com_father = NULL; 1010 SB_ROOT_BLOCK(p_s_tb->tb_sb)) {
995 return CARRY_ON; 1011 *pp_s_father = *pp_s_com_father = NULL;
1012 return CARRY_ON;
1013 }
1014 return REPEAT_SEARCH;
996 } 1015 }
997 return REPEAT_SEARCH;
998 }
999 1016
1000 RFALSE( B_LEVEL (*pp_s_com_father) <= DISK_LEAF_NODE_LEVEL, 1017 RFALSE(B_LEVEL(*pp_s_com_father) <= DISK_LEAF_NODE_LEVEL,
1001 "PAP-8185: (%b %z) level too small", 1018 "PAP-8185: (%b %z) level too small",
1002 *pp_s_com_father, *pp_s_com_father); 1019 *pp_s_com_father, *pp_s_com_father);
1003 1020
1004 /* Check whether the common parent is locked. */ 1021 /* Check whether the common parent is locked. */
1005 1022
1006 if ( buffer_locked (*pp_s_com_father) ) { 1023 if (buffer_locked(*pp_s_com_father)) {
1007 __wait_on_buffer(*pp_s_com_father); 1024 __wait_on_buffer(*pp_s_com_father);
1008 if ( FILESYSTEM_CHANGED_TB (p_s_tb) ) { 1025 if (FILESYSTEM_CHANGED_TB(p_s_tb)) {
1009 decrement_bcount(*pp_s_com_father); 1026 decrement_bcount(*pp_s_com_father);
1010 return REPEAT_SEARCH; 1027 return REPEAT_SEARCH;
1028 }
1011 } 1029 }
1012 }
1013
1014 /* So, we got common parent of the current node and its left/right neighbor.
1015 Now we are geting the parent of the left/right neighbor. */
1016 1030
1017 /* Form key to get parent of the left/right neighbor. */ 1031 /* So, we got common parent of the current node and its left/right neighbor.
1018 le_key2cpu_key (&s_lr_father_key, B_N_PDELIM_KEY(*pp_s_com_father, ( c_lr_par == LEFT_PARENTS ) ? 1032 Now we are geting the parent of the left/right neighbor. */
1019 (p_s_tb->lkey[n_h - 1] = n_position - 1) : (p_s_tb->rkey[n_h - 1] = n_position)));
1020 1033
1034 /* Form key to get parent of the left/right neighbor. */
1035 le_key2cpu_key(&s_lr_father_key,
1036 B_N_PDELIM_KEY(*pp_s_com_father,
1037 (c_lr_par ==
1038 LEFT_PARENTS) ? (p_s_tb->lkey[n_h - 1] =
1039 n_position -
1040 1) : (p_s_tb->rkey[n_h -
1041 1] =
1042 n_position)));
1021 1043
1022 if ( c_lr_par == LEFT_PARENTS ) 1044 if (c_lr_par == LEFT_PARENTS)
1023 decrement_key(&s_lr_father_key); 1045 decrement_key(&s_lr_father_key);
1024 1046
1025 if (search_by_key(p_s_tb->tb_sb, &s_lr_father_key, &s_path_to_neighbor_father, n_h + 1) == IO_ERROR) 1047 if (search_by_key
1026 // path is released 1048 (p_s_tb->tb_sb, &s_lr_father_key, &s_path_to_neighbor_father,
1027 return IO_ERROR; 1049 n_h + 1) == IO_ERROR)
1050 // path is released
1051 return IO_ERROR;
1028 1052
1029 if ( FILESYSTEM_CHANGED_TB (p_s_tb) ) { 1053 if (FILESYSTEM_CHANGED_TB(p_s_tb)) {
1030 decrement_counters_in_path(&s_path_to_neighbor_father); 1054 decrement_counters_in_path(&s_path_to_neighbor_father);
1031 decrement_bcount(*pp_s_com_father); 1055 decrement_bcount(*pp_s_com_father);
1032 return REPEAT_SEARCH; 1056 return REPEAT_SEARCH;
1033 } 1057 }
1034 1058
1035 *pp_s_father = PATH_PLAST_BUFFER(&s_path_to_neighbor_father); 1059 *pp_s_father = PATH_PLAST_BUFFER(&s_path_to_neighbor_father);
1036 1060
1037 RFALSE( B_LEVEL (*pp_s_father) != n_h + 1, 1061 RFALSE(B_LEVEL(*pp_s_father) != n_h + 1,
1038 "PAP-8190: (%b %z) level too small", *pp_s_father, *pp_s_father); 1062 "PAP-8190: (%b %z) level too small", *pp_s_father, *pp_s_father);
1039 RFALSE( s_path_to_neighbor_father.path_length < FIRST_PATH_ELEMENT_OFFSET, 1063 RFALSE(s_path_to_neighbor_father.path_length <
1040 "PAP-8192: path length is too small"); 1064 FIRST_PATH_ELEMENT_OFFSET, "PAP-8192: path length is too small");
1041 1065
1042 s_path_to_neighbor_father.path_length--; 1066 s_path_to_neighbor_father.path_length--;
1043 decrement_counters_in_path(&s_path_to_neighbor_father); 1067 decrement_counters_in_path(&s_path_to_neighbor_father);
1044 return CARRY_ON; 1068 return CARRY_ON;
1045} 1069}
1046 1070
1047
1048/* Get parents of neighbors of node in the path(S[n_path_offset]) and common parents of 1071/* Get parents of neighbors of node in the path(S[n_path_offset]) and common parents of
1049 * S[n_path_offset] and L[n_path_offset]/R[n_path_offset]: F[n_path_offset], FL[n_path_offset], 1072 * S[n_path_offset] and L[n_path_offset]/R[n_path_offset]: F[n_path_offset], FL[n_path_offset],
1050 * FR[n_path_offset], CFL[n_path_offset], CFR[n_path_offset]. 1073 * FR[n_path_offset], CFL[n_path_offset], CFR[n_path_offset].
@@ -1052,122 +1075,127 @@ static int get_far_parent (struct tree_balance * p_s_tb,
1052 * Returns: SCHEDULE_OCCURRED - schedule occurred while the function worked; 1075 * Returns: SCHEDULE_OCCURRED - schedule occurred while the function worked;
1053 * CARRY_ON - schedule didn't occur while the function worked; 1076 * CARRY_ON - schedule didn't occur while the function worked;
1054 */ 1077 */
1055static int get_parents (struct tree_balance * p_s_tb, int n_h) 1078static int get_parents(struct tree_balance *p_s_tb, int n_h)
1056{ 1079{
1057 struct path * p_s_path = p_s_tb->tb_path; 1080 struct path *p_s_path = p_s_tb->tb_path;
1058 int n_position, 1081 int n_position,
1059 n_ret_value, 1082 n_ret_value,
1060 n_path_offset = PATH_H_PATH_OFFSET(p_s_tb->tb_path, n_h); 1083 n_path_offset = PATH_H_PATH_OFFSET(p_s_tb->tb_path, n_h);
1061 struct buffer_head * p_s_curf, 1084 struct buffer_head *p_s_curf, *p_s_curcf;
1062 * p_s_curcf; 1085
1063 1086 /* Current node is the root of the tree or will be root of the tree */
1064 /* Current node is the root of the tree or will be root of the tree */ 1087 if (n_path_offset <= FIRST_PATH_ELEMENT_OFFSET) {
1065 if ( n_path_offset <= FIRST_PATH_ELEMENT_OFFSET ) { 1088 /* The root can not have parents.
1066 /* The root can not have parents. 1089 Release nodes which previously were obtained as parents of the current node neighbors. */
1067 Release nodes which previously were obtained as parents of the current node neighbors. */ 1090 decrement_bcount(p_s_tb->FL[n_h]);
1091 decrement_bcount(p_s_tb->CFL[n_h]);
1092 decrement_bcount(p_s_tb->FR[n_h]);
1093 decrement_bcount(p_s_tb->CFR[n_h]);
1094 p_s_tb->FL[n_h] = p_s_tb->CFL[n_h] = p_s_tb->FR[n_h] =
1095 p_s_tb->CFR[n_h] = NULL;
1096 return CARRY_ON;
1097 }
1098
1099 /* Get parent FL[n_path_offset] of L[n_path_offset]. */
1100 if ((n_position = PATH_OFFSET_POSITION(p_s_path, n_path_offset - 1))) {
1101 /* Current node is not the first child of its parent. */
1102 /*(p_s_curf = p_s_curcf = PATH_OFFSET_PBUFFER(p_s_path, n_path_offset - 1))->b_count += 2; */
1103 p_s_curf = p_s_curcf =
1104 PATH_OFFSET_PBUFFER(p_s_path, n_path_offset - 1);
1105 get_bh(p_s_curf);
1106 get_bh(p_s_curf);
1107 p_s_tb->lkey[n_h] = n_position - 1;
1108 } else {
1109 /* Calculate current parent of L[n_path_offset], which is the left neighbor of the current node.
1110 Calculate current common parent of L[n_path_offset] and the current node. Note that
1111 CFL[n_path_offset] not equal FL[n_path_offset] and CFL[n_path_offset] not equal F[n_path_offset].
1112 Calculate lkey[n_path_offset]. */
1113 if ((n_ret_value = get_far_parent(p_s_tb, n_h + 1, &p_s_curf,
1114 &p_s_curcf,
1115 LEFT_PARENTS)) != CARRY_ON)
1116 return n_ret_value;
1117 }
1118
1068 decrement_bcount(p_s_tb->FL[n_h]); 1119 decrement_bcount(p_s_tb->FL[n_h]);
1120 p_s_tb->FL[n_h] = p_s_curf; /* New initialization of FL[n_h]. */
1069 decrement_bcount(p_s_tb->CFL[n_h]); 1121 decrement_bcount(p_s_tb->CFL[n_h]);
1070 decrement_bcount(p_s_tb->FR[n_h]); 1122 p_s_tb->CFL[n_h] = p_s_curcf; /* New initialization of CFL[n_h]. */
1071 decrement_bcount(p_s_tb->CFR[n_h]); 1123
1072 p_s_tb->FL[n_h] = p_s_tb->CFL[n_h] = p_s_tb->FR[n_h] = p_s_tb->CFR[n_h] = NULL; 1124 RFALSE((p_s_curf && !B_IS_IN_TREE(p_s_curf)) ||
1073 return CARRY_ON; 1125 (p_s_curcf && !B_IS_IN_TREE(p_s_curcf)),
1074 } 1126 "PAP-8195: FL (%b) or CFL (%b) is invalid", p_s_curf, p_s_curcf);
1075
1076 /* Get parent FL[n_path_offset] of L[n_path_offset]. */
1077 if ( (n_position = PATH_OFFSET_POSITION(p_s_path, n_path_offset - 1)) ) {
1078 /* Current node is not the first child of its parent. */
1079 /*(p_s_curf = p_s_curcf = PATH_OFFSET_PBUFFER(p_s_path, n_path_offset - 1))->b_count += 2;*/
1080 p_s_curf = p_s_curcf = PATH_OFFSET_PBUFFER(p_s_path, n_path_offset - 1);
1081 get_bh(p_s_curf) ;
1082 get_bh(p_s_curf) ;
1083 p_s_tb->lkey[n_h] = n_position - 1;
1084 }
1085 else {
1086 /* Calculate current parent of L[n_path_offset], which is the left neighbor of the current node.
1087 Calculate current common parent of L[n_path_offset] and the current node. Note that
1088 CFL[n_path_offset] not equal FL[n_path_offset] and CFL[n_path_offset] not equal F[n_path_offset].
1089 Calculate lkey[n_path_offset]. */
1090 if ( (n_ret_value = get_far_parent(p_s_tb, n_h + 1, &p_s_curf,
1091 &p_s_curcf, LEFT_PARENTS)) != CARRY_ON )
1092 return n_ret_value;
1093 }
1094
1095 decrement_bcount(p_s_tb->FL[n_h]);
1096 p_s_tb->FL[n_h] = p_s_curf; /* New initialization of FL[n_h]. */
1097 decrement_bcount(p_s_tb->CFL[n_h]);
1098 p_s_tb->CFL[n_h] = p_s_curcf; /* New initialization of CFL[n_h]. */
1099
1100 RFALSE( (p_s_curf && !B_IS_IN_TREE (p_s_curf)) ||
1101 (p_s_curcf && !B_IS_IN_TREE (p_s_curcf)),
1102 "PAP-8195: FL (%b) or CFL (%b) is invalid", p_s_curf, p_s_curcf);
1103 1127
1104/* Get parent FR[n_h] of R[n_h]. */ 1128/* Get parent FR[n_h] of R[n_h]. */
1105 1129
1106/* Current node is the last child of F[n_h]. FR[n_h] != F[n_h]. */ 1130/* Current node is the last child of F[n_h]. FR[n_h] != F[n_h]. */
1107 if ( n_position == B_NR_ITEMS (PATH_H_PBUFFER(p_s_path, n_h + 1)) ) { 1131 if (n_position == B_NR_ITEMS(PATH_H_PBUFFER(p_s_path, n_h + 1))) {
1108/* Calculate current parent of R[n_h], which is the right neighbor of F[n_h]. 1132/* Calculate current parent of R[n_h], which is the right neighbor of F[n_h].
1109 Calculate current common parent of R[n_h] and current node. Note that CFR[n_h] 1133 Calculate current common parent of R[n_h] and current node. Note that CFR[n_h]
1110 not equal FR[n_path_offset] and CFR[n_h] not equal F[n_h]. */ 1134 not equal FR[n_path_offset] and CFR[n_h] not equal F[n_h]. */
1111 if ( (n_ret_value = get_far_parent(p_s_tb, n_h + 1, &p_s_curf, &p_s_curcf, RIGHT_PARENTS)) != CARRY_ON ) 1135 if ((n_ret_value =
1112 return n_ret_value; 1136 get_far_parent(p_s_tb, n_h + 1, &p_s_curf, &p_s_curcf,
1113 } 1137 RIGHT_PARENTS)) != CARRY_ON)
1114 else { 1138 return n_ret_value;
1139 } else {
1115/* Current node is not the last child of its parent F[n_h]. */ 1140/* Current node is not the last child of its parent F[n_h]. */
1116 /*(p_s_curf = p_s_curcf = PATH_OFFSET_PBUFFER(p_s_path, n_path_offset - 1))->b_count += 2;*/ 1141 /*(p_s_curf = p_s_curcf = PATH_OFFSET_PBUFFER(p_s_path, n_path_offset - 1))->b_count += 2; */
1117 p_s_curf = p_s_curcf = PATH_OFFSET_PBUFFER(p_s_path, n_path_offset - 1); 1142 p_s_curf = p_s_curcf =
1118 get_bh(p_s_curf) ; 1143 PATH_OFFSET_PBUFFER(p_s_path, n_path_offset - 1);
1119 get_bh(p_s_curf) ; 1144 get_bh(p_s_curf);
1120 p_s_tb->rkey[n_h] = n_position; 1145 get_bh(p_s_curf);
1121 } 1146 p_s_tb->rkey[n_h] = n_position;
1122 1147 }
1123 decrement_bcount(p_s_tb->FR[n_h]);
1124 p_s_tb->FR[n_h] = p_s_curf; /* New initialization of FR[n_path_offset]. */
1125
1126 decrement_bcount(p_s_tb->CFR[n_h]);
1127 p_s_tb->CFR[n_h] = p_s_curcf; /* New initialization of CFR[n_path_offset]. */
1128
1129 RFALSE( (p_s_curf && !B_IS_IN_TREE (p_s_curf)) ||
1130 (p_s_curcf && !B_IS_IN_TREE (p_s_curcf)),
1131 "PAP-8205: FR (%b) or CFR (%b) is invalid", p_s_curf, p_s_curcf);
1132
1133 return CARRY_ON;
1134}
1135 1148
1149 decrement_bcount(p_s_tb->FR[n_h]);
1150 p_s_tb->FR[n_h] = p_s_curf; /* New initialization of FR[n_path_offset]. */
1151
1152 decrement_bcount(p_s_tb->CFR[n_h]);
1153 p_s_tb->CFR[n_h] = p_s_curcf; /* New initialization of CFR[n_path_offset]. */
1154
1155 RFALSE((p_s_curf && !B_IS_IN_TREE(p_s_curf)) ||
1156 (p_s_curcf && !B_IS_IN_TREE(p_s_curcf)),
1157 "PAP-8205: FR (%b) or CFR (%b) is invalid", p_s_curf, p_s_curcf);
1158
1159 return CARRY_ON;
1160}
1136 1161
1137/* it is possible to remove node as result of shiftings to 1162/* it is possible to remove node as result of shiftings to
1138 neighbors even when we insert or paste item. */ 1163 neighbors even when we insert or paste item. */
1139static inline int can_node_be_removed (int mode, int lfree, int sfree, int rfree, struct tree_balance * tb, int h) 1164static inline int can_node_be_removed(int mode, int lfree, int sfree, int rfree,
1165 struct tree_balance *tb, int h)
1140{ 1166{
1141 struct buffer_head * Sh = PATH_H_PBUFFER (tb->tb_path, h); 1167 struct buffer_head *Sh = PATH_H_PBUFFER(tb->tb_path, h);
1142 int levbytes = tb->insert_size[h]; 1168 int levbytes = tb->insert_size[h];
1143 struct item_head * ih; 1169 struct item_head *ih;
1144 struct reiserfs_key * r_key = NULL; 1170 struct reiserfs_key *r_key = NULL;
1145 1171
1146 ih = B_N_PITEM_HEAD (Sh, 0); 1172 ih = B_N_PITEM_HEAD(Sh, 0);
1147 if ( tb->CFR[h] ) 1173 if (tb->CFR[h])
1148 r_key = B_N_PDELIM_KEY(tb->CFR[h],tb->rkey[h]); 1174 r_key = B_N_PDELIM_KEY(tb->CFR[h], tb->rkey[h]);
1149 1175
1150 if ( 1176 if (lfree + rfree + sfree < MAX_CHILD_SIZE(Sh) + levbytes
1151 lfree + rfree + sfree < MAX_CHILD_SIZE(Sh) + levbytes 1177 /* shifting may merge items which might save space */
1152 /* shifting may merge items which might save space */ 1178 -
1153 - (( ! h && op_is_left_mergeable (&(ih->ih_key), Sh->b_size) ) ? IH_SIZE : 0) 1179 ((!h
1154 - (( ! h && r_key && op_is_left_mergeable (r_key, Sh->b_size) ) ? IH_SIZE : 0) 1180 && op_is_left_mergeable(&(ih->ih_key), Sh->b_size)) ? IH_SIZE : 0)
1155 + (( h ) ? KEY_SIZE : 0)) 1181 -
1156 { 1182 ((!h && r_key
1157 /* node can not be removed */ 1183 && op_is_left_mergeable(r_key, Sh->b_size)) ? IH_SIZE : 0)
1158 if (sfree >= levbytes ) { /* new item fits into node S[h] without any shifting */ 1184 + ((h) ? KEY_SIZE : 0)) {
1159 if ( ! h ) 1185 /* node can not be removed */
1160 tb->s0num = B_NR_ITEMS(Sh) + ((mode == M_INSERT ) ? 1 : 0); 1186 if (sfree >= levbytes) { /* new item fits into node S[h] without any shifting */
1161 set_parameters (tb, h, 0, 0, 1, NULL, -1, -1); 1187 if (!h)
1162 return NO_BALANCING_NEEDED; 1188 tb->s0num =
1189 B_NR_ITEMS(Sh) +
1190 ((mode == M_INSERT) ? 1 : 0);
1191 set_parameters(tb, h, 0, 0, 1, NULL, -1, -1);
1192 return NO_BALANCING_NEEDED;
1193 }
1163 } 1194 }
1164 } 1195 PROC_INFO_INC(tb->tb_sb, can_node_be_removed[h]);
1165 PROC_INFO_INC( tb -> tb_sb, can_node_be_removed[ h ] ); 1196 return !NO_BALANCING_NEEDED;
1166 return !NO_BALANCING_NEEDED;
1167} 1197}
1168 1198
1169
1170
1171/* Check whether current node S[h] is balanced when increasing its size by 1199/* Check whether current node S[h] is balanced when increasing its size by
1172 * Inserting or Pasting. 1200 * Inserting or Pasting.
1173 * Calculate parameters for balancing for current level h. 1201 * Calculate parameters for balancing for current level h.
@@ -1182,154 +1210,157 @@ static inline int can_node_be_removed (int mode, int lfree, int sfree, int rfree
1182 * -2 - no disk space. 1210 * -2 - no disk space.
1183 */ 1211 */
1184/* ip means Inserting or Pasting */ 1212/* ip means Inserting or Pasting */
1185static int ip_check_balance (struct tree_balance * tb, int h) 1213static int ip_check_balance(struct tree_balance *tb, int h)
1186{ 1214{
1187 struct virtual_node * vn = tb->tb_vn; 1215 struct virtual_node *vn = tb->tb_vn;
1188 int levbytes, /* Number of bytes that must be inserted into (value 1216 int levbytes, /* Number of bytes that must be inserted into (value
1189 is negative if bytes are deleted) buffer which 1217 is negative if bytes are deleted) buffer which
1190 contains node being balanced. The mnemonic is 1218 contains node being balanced. The mnemonic is
1191 that the attempted change in node space used level 1219 that the attempted change in node space used level
1192 is levbytes bytes. */ 1220 is levbytes bytes. */
1193 n_ret_value; 1221 n_ret_value;
1194 1222
1195 int lfree, sfree, rfree /* free space in L, S and R */; 1223 int lfree, sfree, rfree /* free space in L, S and R */ ;
1196 1224
1197 /* nver is short for number of vertixes, and lnver is the number if 1225 /* nver is short for number of vertixes, and lnver is the number if
1198 we shift to the left, rnver is the number if we shift to the 1226 we shift to the left, rnver is the number if we shift to the
1199 right, and lrnver is the number if we shift in both directions. 1227 right, and lrnver is the number if we shift in both directions.
1200 The goal is to minimize first the number of vertixes, and second, 1228 The goal is to minimize first the number of vertixes, and second,
1201 the number of vertixes whose contents are changed by shifting, 1229 the number of vertixes whose contents are changed by shifting,
1202 and third the number of uncached vertixes whose contents are 1230 and third the number of uncached vertixes whose contents are
1203 changed by shifting and must be read from disk. */ 1231 changed by shifting and must be read from disk. */
1204 int nver, lnver, rnver, lrnver; 1232 int nver, lnver, rnver, lrnver;
1205 1233
1206 /* used at leaf level only, S0 = S[0] is the node being balanced, 1234 /* used at leaf level only, S0 = S[0] is the node being balanced,
1207 sInum [ I = 0,1,2 ] is the number of items that will 1235 sInum [ I = 0,1,2 ] is the number of items that will
1208 remain in node SI after balancing. S1 and S2 are new 1236 remain in node SI after balancing. S1 and S2 are new
1209 nodes that might be created. */ 1237 nodes that might be created. */
1210 1238
1211 /* we perform 8 calls to get_num_ver(). For each call we calculate five parameters. 1239 /* we perform 8 calls to get_num_ver(). For each call we calculate five parameters.
1212 where 4th parameter is s1bytes and 5th - s2bytes 1240 where 4th parameter is s1bytes and 5th - s2bytes
1213 */ 1241 */
1214 short snum012[40] = {0,}; /* s0num, s1num, s2num for 8 cases 1242 short snum012[40] = { 0, }; /* s0num, s1num, s2num for 8 cases
1215 0,1 - do not shift and do not shift but bottle 1243 0,1 - do not shift and do not shift but bottle
1216 2 - shift only whole item to left 1244 2 - shift only whole item to left
1217 3 - shift to left and bottle as much as possible 1245 3 - shift to left and bottle as much as possible
1218 4,5 - shift to right (whole items and as much as possible 1246 4,5 - shift to right (whole items and as much as possible
1219 6,7 - shift to both directions (whole items and as much as possible) 1247 6,7 - shift to both directions (whole items and as much as possible)
1220 */ 1248 */
1221 1249
1222 /* Sh is the node whose balance is currently being checked */ 1250 /* Sh is the node whose balance is currently being checked */
1223 struct buffer_head * Sh; 1251 struct buffer_head *Sh;
1224 1252
1225 Sh = PATH_H_PBUFFER (tb->tb_path, h); 1253 Sh = PATH_H_PBUFFER(tb->tb_path, h);
1226 levbytes = tb->insert_size[h]; 1254 levbytes = tb->insert_size[h];
1227 1255
1228 /* Calculate balance parameters for creating new root. */ 1256 /* Calculate balance parameters for creating new root. */
1229 if ( ! Sh ) { 1257 if (!Sh) {
1230 if ( ! h ) 1258 if (!h)
1231 reiserfs_panic (tb->tb_sb, "vs-8210: ip_check_balance: S[0] can not be 0"); 1259 reiserfs_panic(tb->tb_sb,
1232 switch ( n_ret_value = get_empty_nodes (tb, h) ) { 1260 "vs-8210: ip_check_balance: S[0] can not be 0");
1233 case CARRY_ON: 1261 switch (n_ret_value = get_empty_nodes(tb, h)) {
1234 set_parameters (tb, h, 0, 0, 1, NULL, -1, -1); 1262 case CARRY_ON:
1235 return NO_BALANCING_NEEDED; /* no balancing for higher levels needed */ 1263 set_parameters(tb, h, 0, 0, 1, NULL, -1, -1);
1236 1264 return NO_BALANCING_NEEDED; /* no balancing for higher levels needed */
1237 case NO_DISK_SPACE: 1265
1238 case REPEAT_SEARCH: 1266 case NO_DISK_SPACE:
1239 return n_ret_value; 1267 case REPEAT_SEARCH:
1240 default: 1268 return n_ret_value;
1241 reiserfs_panic(tb->tb_sb, "vs-8215: ip_check_balance: incorrect return value of get_empty_nodes"); 1269 default:
1270 reiserfs_panic(tb->tb_sb,
1271 "vs-8215: ip_check_balance: incorrect return value of get_empty_nodes");
1272 }
1242 } 1273 }
1243 }
1244
1245 if ( (n_ret_value = get_parents (tb, h)) != CARRY_ON ) /* get parents of S[h] neighbors. */
1246 return n_ret_value;
1247
1248 sfree = B_FREE_SPACE (Sh);
1249
1250 /* get free space of neighbors */
1251 rfree = get_rfree (tb, h);
1252 lfree = get_lfree (tb, h);
1253
1254 if (can_node_be_removed (vn->vn_mode, lfree, sfree, rfree, tb, h) == NO_BALANCING_NEEDED)
1255 /* and new item fits into node S[h] without any shifting */
1256 return NO_BALANCING_NEEDED;
1257
1258 create_virtual_node (tb, h);
1259
1260 /*
1261 determine maximal number of items we can shift to the left neighbor (in tb structure)
1262 and the maximal number of bytes that can flow to the left neighbor
1263 from the left most liquid item that cannot be shifted from S[0] entirely (returned value)
1264 */
1265 check_left (tb, h, lfree);
1266
1267 /*
1268 determine maximal number of items we can shift to the right neighbor (in tb structure)
1269 and the maximal number of bytes that can flow to the right neighbor
1270 from the right most liquid item that cannot be shifted from S[0] entirely (returned value)
1271 */
1272 check_right (tb, h, rfree);
1273
1274
1275 /* all contents of internal node S[h] can be moved into its
1276 neighbors, S[h] will be removed after balancing */
1277 if (h && (tb->rnum[h] + tb->lnum[h] >= vn->vn_nr_item + 1)) {
1278 int to_r;
1279
1280 /* Since we are working on internal nodes, and our internal
1281 nodes have fixed size entries, then we can balance by the
1282 number of items rather than the space they consume. In this
1283 routine we set the left node equal to the right node,
1284 allowing a difference of less than or equal to 1 child
1285 pointer. */
1286 to_r = ((MAX_NR_KEY(Sh)<<1)+2-tb->lnum[h]-tb->rnum[h]+vn->vn_nr_item+1)/2 -
1287 (MAX_NR_KEY(Sh) + 1 - tb->rnum[h]);
1288 set_parameters (tb, h, vn->vn_nr_item + 1 - to_r, to_r, 0, NULL, -1, -1);
1289 return CARRY_ON;
1290 }
1291
1292 /* this checks balance condition, that any two neighboring nodes can not fit in one node */
1293 RFALSE( h &&
1294 ( tb->lnum[h] >= vn->vn_nr_item + 1 ||
1295 tb->rnum[h] >= vn->vn_nr_item + 1),
1296 "vs-8220: tree is not balanced on internal level");
1297 RFALSE( ! h && ((tb->lnum[h] >= vn->vn_nr_item && (tb->lbytes == -1)) ||
1298 (tb->rnum[h] >= vn->vn_nr_item && (tb->rbytes == -1)) ),
1299 "vs-8225: tree is not balanced on leaf level");
1300
1301 /* all contents of S[0] can be moved into its neighbors
1302 S[0] will be removed after balancing. */
1303 if (!h && is_leaf_removable (tb))
1304 return CARRY_ON;
1305 1274
1275 if ((n_ret_value = get_parents(tb, h)) != CARRY_ON) /* get parents of S[h] neighbors. */
1276 return n_ret_value;
1306 1277
1307 /* why do we perform this check here rather than earlier?? 1278 sfree = B_FREE_SPACE(Sh);
1308 Answer: we can win 1 node in some cases above. Moreover we 1279
1309 checked it above, when we checked, that S[0] is not removable 1280 /* get free space of neighbors */
1310 in principle */ 1281 rfree = get_rfree(tb, h);
1311 if (sfree >= levbytes) { /* new item fits into node S[h] without any shifting */ 1282 lfree = get_lfree(tb, h);
1312 if ( ! h ) 1283
1313 tb->s0num = vn->vn_nr_item; 1284 if (can_node_be_removed(vn->vn_mode, lfree, sfree, rfree, tb, h) ==
1314 set_parameters (tb, h, 0, 0, 1, NULL, -1, -1); 1285 NO_BALANCING_NEEDED)
1315 return NO_BALANCING_NEEDED; 1286 /* and new item fits into node S[h] without any shifting */
1316 } 1287 return NO_BALANCING_NEEDED;
1317 1288
1289 create_virtual_node(tb, h);
1318 1290
1319 { 1291 /*
1320 int lpar, rpar, nset, lset, rset, lrset; 1292 determine maximal number of items we can shift to the left neighbor (in tb structure)
1321 /* 1293 and the maximal number of bytes that can flow to the left neighbor
1322 * regular overflowing of the node 1294 from the left most liquid item that cannot be shifted from S[0] entirely (returned value)
1323 */ 1295 */
1296 check_left(tb, h, lfree);
1324 1297
1325 /* get_num_ver works in 2 modes (FLOW & NO_FLOW) 1298 /*
1326 lpar, rpar - number of items we can shift to left/right neighbor (including splitting item) 1299 determine maximal number of items we can shift to the right neighbor (in tb structure)
1327 nset, lset, rset, lrset - shows, whether flowing items give better packing 1300 and the maximal number of bytes that can flow to the right neighbor
1328 */ 1301 from the right most liquid item that cannot be shifted from S[0] entirely (returned value)
1302 */
1303 check_right(tb, h, rfree);
1304
1305 /* all contents of internal node S[h] can be moved into its
1306 neighbors, S[h] will be removed after balancing */
1307 if (h && (tb->rnum[h] + tb->lnum[h] >= vn->vn_nr_item + 1)) {
1308 int to_r;
1309
1310 /* Since we are working on internal nodes, and our internal
1311 nodes have fixed size entries, then we can balance by the
1312 number of items rather than the space they consume. In this
1313 routine we set the left node equal to the right node,
1314 allowing a difference of less than or equal to 1 child
1315 pointer. */
1316 to_r =
1317 ((MAX_NR_KEY(Sh) << 1) + 2 - tb->lnum[h] - tb->rnum[h] +
1318 vn->vn_nr_item + 1) / 2 - (MAX_NR_KEY(Sh) + 1 -
1319 tb->rnum[h]);
1320 set_parameters(tb, h, vn->vn_nr_item + 1 - to_r, to_r, 0, NULL,
1321 -1, -1);
1322 return CARRY_ON;
1323 }
1324
1325 /* this checks balance condition, that any two neighboring nodes can not fit in one node */
1326 RFALSE(h &&
1327 (tb->lnum[h] >= vn->vn_nr_item + 1 ||
1328 tb->rnum[h] >= vn->vn_nr_item + 1),
1329 "vs-8220: tree is not balanced on internal level");
1330 RFALSE(!h && ((tb->lnum[h] >= vn->vn_nr_item && (tb->lbytes == -1)) ||
1331 (tb->rnum[h] >= vn->vn_nr_item && (tb->rbytes == -1))),
1332 "vs-8225: tree is not balanced on leaf level");
1333
1334 /* all contents of S[0] can be moved into its neighbors
1335 S[0] will be removed after balancing. */
1336 if (!h && is_leaf_removable(tb))
1337 return CARRY_ON;
1338
1339 /* why do we perform this check here rather than earlier??
1340 Answer: we can win 1 node in some cases above. Moreover we
1341 checked it above, when we checked, that S[0] is not removable
1342 in principle */
1343 if (sfree >= levbytes) { /* new item fits into node S[h] without any shifting */
1344 if (!h)
1345 tb->s0num = vn->vn_nr_item;
1346 set_parameters(tb, h, 0, 0, 1, NULL, -1, -1);
1347 return NO_BALANCING_NEEDED;
1348 }
1349
1350 {
1351 int lpar, rpar, nset, lset, rset, lrset;
1352 /*
1353 * regular overflowing of the node
1354 */
1355
1356 /* get_num_ver works in 2 modes (FLOW & NO_FLOW)
1357 lpar, rpar - number of items we can shift to left/right neighbor (including splitting item)
1358 nset, lset, rset, lrset - shows, whether flowing items give better packing
1359 */
1329#define FLOW 1 1360#define FLOW 1
1330#define NO_FLOW 0 /* do not any splitting */ 1361#define NO_FLOW 0 /* do not any splitting */
1331 1362
1332 /* we choose one the following */ 1363 /* we choose one the following */
1333#define NOTHING_SHIFT_NO_FLOW 0 1364#define NOTHING_SHIFT_NO_FLOW 0
1334#define NOTHING_SHIFT_FLOW 5 1365#define NOTHING_SHIFT_FLOW 5
1335#define LEFT_SHIFT_NO_FLOW 10 1366#define LEFT_SHIFT_NO_FLOW 10
@@ -1339,164 +1370,173 @@ static int ip_check_balance (struct tree_balance * tb, int h)
1339#define LR_SHIFT_NO_FLOW 30 1370#define LR_SHIFT_NO_FLOW 30
1340#define LR_SHIFT_FLOW 35 1371#define LR_SHIFT_FLOW 35
1341 1372
1373 lpar = tb->lnum[h];
1374 rpar = tb->rnum[h];
1375
1376 /* calculate number of blocks S[h] must be split into when
1377 nothing is shifted to the neighbors,
1378 as well as number of items in each part of the split node (s012 numbers),
1379 and number of bytes (s1bytes) of the shared drop which flow to S1 if any */
1380 nset = NOTHING_SHIFT_NO_FLOW;
1381 nver = get_num_ver(vn->vn_mode, tb, h,
1382 0, -1, h ? vn->vn_nr_item : 0, -1,
1383 snum012, NO_FLOW);
1384
1385 if (!h) {
1386 int nver1;
1387
1388 /* note, that in this case we try to bottle between S[0] and S1 (S1 - the first new node) */
1389 nver1 = get_num_ver(vn->vn_mode, tb, h,
1390 0, -1, 0, -1,
1391 snum012 + NOTHING_SHIFT_FLOW, FLOW);
1392 if (nver > nver1)
1393 nset = NOTHING_SHIFT_FLOW, nver = nver1;
1394 }
1342 1395
1343 lpar = tb->lnum[h]; 1396 /* calculate number of blocks S[h] must be split into when
1344 rpar = tb->rnum[h]; 1397 l_shift_num first items and l_shift_bytes of the right most
1345 1398 liquid item to be shifted are shifted to the left neighbor,
1346 1399 as well as number of items in each part of the splitted node (s012 numbers),
1347 /* calculate number of blocks S[h] must be split into when 1400 and number of bytes (s1bytes) of the shared drop which flow to S1 if any
1348 nothing is shifted to the neighbors, 1401 */
1349 as well as number of items in each part of the split node (s012 numbers), 1402 lset = LEFT_SHIFT_NO_FLOW;
1350 and number of bytes (s1bytes) of the shared drop which flow to S1 if any */ 1403 lnver = get_num_ver(vn->vn_mode, tb, h,
1351 nset = NOTHING_SHIFT_NO_FLOW; 1404 lpar - ((h || tb->lbytes == -1) ? 0 : 1),
1352 nver = get_num_ver (vn->vn_mode, tb, h, 1405 -1, h ? vn->vn_nr_item : 0, -1,
1353 0, -1, h?vn->vn_nr_item:0, -1, 1406 snum012 + LEFT_SHIFT_NO_FLOW, NO_FLOW);
1354 snum012, NO_FLOW); 1407 if (!h) {
1355 1408 int lnver1;
1356 if (!h) 1409
1357 { 1410 lnver1 = get_num_ver(vn->vn_mode, tb, h,
1358 int nver1; 1411 lpar -
1359 1412 ((tb->lbytes != -1) ? 1 : 0),
1360 /* note, that in this case we try to bottle between S[0] and S1 (S1 - the first new node) */ 1413 tb->lbytes, 0, -1,
1361 nver1 = get_num_ver (vn->vn_mode, tb, h, 1414 snum012 + LEFT_SHIFT_FLOW, FLOW);
1362 0, -1, 0, -1, 1415 if (lnver > lnver1)
1363 snum012 + NOTHING_SHIFT_FLOW, FLOW); 1416 lset = LEFT_SHIFT_FLOW, lnver = lnver1;
1364 if (nver > nver1) 1417 }
1365 nset = NOTHING_SHIFT_FLOW, nver = nver1;
1366 }
1367
1368
1369 /* calculate number of blocks S[h] must be split into when
1370 l_shift_num first items and l_shift_bytes of the right most
1371 liquid item to be shifted are shifted to the left neighbor,
1372 as well as number of items in each part of the splitted node (s012 numbers),
1373 and number of bytes (s1bytes) of the shared drop which flow to S1 if any
1374 */
1375 lset = LEFT_SHIFT_NO_FLOW;
1376 lnver = get_num_ver (vn->vn_mode, tb, h,
1377 lpar - (( h || tb->lbytes == -1 ) ? 0 : 1), -1, h ? vn->vn_nr_item:0, -1,
1378 snum012 + LEFT_SHIFT_NO_FLOW, NO_FLOW);
1379 if (!h)
1380 {
1381 int lnver1;
1382
1383 lnver1 = get_num_ver (vn->vn_mode, tb, h,
1384 lpar - ((tb->lbytes != -1) ? 1 : 0), tb->lbytes, 0, -1,
1385 snum012 + LEFT_SHIFT_FLOW, FLOW);
1386 if (lnver > lnver1)
1387 lset = LEFT_SHIFT_FLOW, lnver = lnver1;
1388 }
1389
1390
1391 /* calculate number of blocks S[h] must be split into when
1392 r_shift_num first items and r_shift_bytes of the left most
1393 liquid item to be shifted are shifted to the right neighbor,
1394 as well as number of items in each part of the splitted node (s012 numbers),
1395 and number of bytes (s1bytes) of the shared drop which flow to S1 if any
1396 */
1397 rset = RIGHT_SHIFT_NO_FLOW;
1398 rnver = get_num_ver (vn->vn_mode, tb, h,
1399 0, -1, h ? (vn->vn_nr_item-rpar) : (rpar - (( tb->rbytes != -1 ) ? 1 : 0)), -1,
1400 snum012 + RIGHT_SHIFT_NO_FLOW, NO_FLOW);
1401 if (!h)
1402 {
1403 int rnver1;
1404
1405 rnver1 = get_num_ver (vn->vn_mode, tb, h,
1406 0, -1, (rpar - ((tb->rbytes != -1) ? 1 : 0)), tb->rbytes,
1407 snum012 + RIGHT_SHIFT_FLOW, FLOW);
1408
1409 if (rnver > rnver1)
1410 rset = RIGHT_SHIFT_FLOW, rnver = rnver1;
1411 }
1412
1413
1414 /* calculate number of blocks S[h] must be split into when
1415 items are shifted in both directions,
1416 as well as number of items in each part of the splitted node (s012 numbers),
1417 and number of bytes (s1bytes) of the shared drop which flow to S1 if any
1418 */
1419 lrset = LR_SHIFT_NO_FLOW;
1420 lrnver = get_num_ver (vn->vn_mode, tb, h,
1421 lpar - ((h || tb->lbytes == -1) ? 0 : 1), -1, h ? (vn->vn_nr_item-rpar):(rpar - ((tb->rbytes != -1) ? 1 : 0)), -1,
1422 snum012 + LR_SHIFT_NO_FLOW, NO_FLOW);
1423 if (!h)
1424 {
1425 int lrnver1;
1426
1427 lrnver1 = get_num_ver (vn->vn_mode, tb, h,
1428 lpar - ((tb->lbytes != -1) ? 1 : 0), tb->lbytes, (rpar - ((tb->rbytes != -1) ? 1 : 0)), tb->rbytes,
1429 snum012 + LR_SHIFT_FLOW, FLOW);
1430 if (lrnver > lrnver1)
1431 lrset = LR_SHIFT_FLOW, lrnver = lrnver1;
1432 }
1433
1434
1435 1418
1436 /* Our general shifting strategy is: 1419 /* calculate number of blocks S[h] must be split into when
1437 1) to minimized number of new nodes; 1420 r_shift_num first items and r_shift_bytes of the left most
1438 2) to minimized number of neighbors involved in shifting; 1421 liquid item to be shifted are shifted to the right neighbor,
1439 3) to minimized number of disk reads; */ 1422 as well as number of items in each part of the splitted node (s012 numbers),
1423 and number of bytes (s1bytes) of the shared drop which flow to S1 if any
1424 */
1425 rset = RIGHT_SHIFT_NO_FLOW;
1426 rnver = get_num_ver(vn->vn_mode, tb, h,
1427 0, -1,
1428 h ? (vn->vn_nr_item - rpar) : (rpar -
1429 ((tb->
1430 rbytes !=
1431 -1) ? 1 :
1432 0)), -1,
1433 snum012 + RIGHT_SHIFT_NO_FLOW, NO_FLOW);
1434 if (!h) {
1435 int rnver1;
1436
1437 rnver1 = get_num_ver(vn->vn_mode, tb, h,
1438 0, -1,
1439 (rpar -
1440 ((tb->rbytes != -1) ? 1 : 0)),
1441 tb->rbytes,
1442 snum012 + RIGHT_SHIFT_FLOW, FLOW);
1443
1444 if (rnver > rnver1)
1445 rset = RIGHT_SHIFT_FLOW, rnver = rnver1;
1446 }
1440 1447
1441 /* we can win TWO or ONE nodes by shifting in both directions */ 1448 /* calculate number of blocks S[h] must be split into when
1442 if (lrnver < lnver && lrnver < rnver) 1449 items are shifted in both directions,
1443 { 1450 as well as number of items in each part of the splitted node (s012 numbers),
1444 RFALSE( h && 1451 and number of bytes (s1bytes) of the shared drop which flow to S1 if any
1445 (tb->lnum[h] != 1 || 1452 */
1446 tb->rnum[h] != 1 || 1453 lrset = LR_SHIFT_NO_FLOW;
1447 lrnver != 1 || rnver != 2 || lnver != 2 || h != 1), 1454 lrnver = get_num_ver(vn->vn_mode, tb, h,
1448 "vs-8230: bad h"); 1455 lpar - ((h || tb->lbytes == -1) ? 0 : 1),
1449 if (lrset == LR_SHIFT_FLOW) 1456 -1,
1450 set_parameters (tb, h, tb->lnum[h], tb->rnum[h], lrnver, snum012 + lrset, 1457 h ? (vn->vn_nr_item - rpar) : (rpar -
1451 tb->lbytes, tb->rbytes); 1458 ((tb->
1452 else 1459 rbytes !=
1453 set_parameters (tb, h, tb->lnum[h] - ((tb->lbytes == -1) ? 0 : 1), 1460 -1) ? 1 :
1454 tb->rnum[h] - ((tb->rbytes == -1) ? 0 : 1), lrnver, snum012 + lrset, -1, -1); 1461 0)), -1,
1455 1462 snum012 + LR_SHIFT_NO_FLOW, NO_FLOW);
1456 return CARRY_ON; 1463 if (!h) {
1457 } 1464 int lrnver1;
1465
1466 lrnver1 = get_num_ver(vn->vn_mode, tb, h,
1467 lpar -
1468 ((tb->lbytes != -1) ? 1 : 0),
1469 tb->lbytes,
1470 (rpar -
1471 ((tb->rbytes != -1) ? 1 : 0)),
1472 tb->rbytes,
1473 snum012 + LR_SHIFT_FLOW, FLOW);
1474 if (lrnver > lrnver1)
1475 lrset = LR_SHIFT_FLOW, lrnver = lrnver1;
1476 }
1458 1477
1459 /* if shifting doesn't lead to better packing then don't shift */ 1478 /* Our general shifting strategy is:
1460 if (nver == lrnver) 1479 1) to minimized number of new nodes;
1461 { 1480 2) to minimized number of neighbors involved in shifting;
1462 set_parameters (tb, h, 0, 0, nver, snum012 + nset, -1, -1); 1481 3) to minimized number of disk reads; */
1463 return CARRY_ON; 1482
1464 } 1483 /* we can win TWO or ONE nodes by shifting in both directions */
1484 if (lrnver < lnver && lrnver < rnver) {
1485 RFALSE(h &&
1486 (tb->lnum[h] != 1 ||
1487 tb->rnum[h] != 1 ||
1488 lrnver != 1 || rnver != 2 || lnver != 2
1489 || h != 1), "vs-8230: bad h");
1490 if (lrset == LR_SHIFT_FLOW)
1491 set_parameters(tb, h, tb->lnum[h], tb->rnum[h],
1492 lrnver, snum012 + lrset,
1493 tb->lbytes, tb->rbytes);
1494 else
1495 set_parameters(tb, h,
1496 tb->lnum[h] -
1497 ((tb->lbytes == -1) ? 0 : 1),
1498 tb->rnum[h] -
1499 ((tb->rbytes == -1) ? 0 : 1),
1500 lrnver, snum012 + lrset, -1, -1);
1501
1502 return CARRY_ON;
1503 }
1465 1504
1505 /* if shifting doesn't lead to better packing then don't shift */
1506 if (nver == lrnver) {
1507 set_parameters(tb, h, 0, 0, nver, snum012 + nset, -1,
1508 -1);
1509 return CARRY_ON;
1510 }
1466 1511
1467 /* now we know that for better packing shifting in only one 1512 /* now we know that for better packing shifting in only one
1468 direction either to the left or to the right is required */ 1513 direction either to the left or to the right is required */
1469 1514
1470 /* if shifting to the left is better than shifting to the right */ 1515 /* if shifting to the left is better than shifting to the right */
1471 if (lnver < rnver) 1516 if (lnver < rnver) {
1472 { 1517 SET_PAR_SHIFT_LEFT;
1473 SET_PAR_SHIFT_LEFT; 1518 return CARRY_ON;
1474 return CARRY_ON; 1519 }
1475 }
1476 1520
1477 /* if shifting to the right is better than shifting to the left */ 1521 /* if shifting to the right is better than shifting to the left */
1478 if (lnver > rnver) 1522 if (lnver > rnver) {
1479 { 1523 SET_PAR_SHIFT_RIGHT;
1480 SET_PAR_SHIFT_RIGHT; 1524 return CARRY_ON;
1481 return CARRY_ON; 1525 }
1482 }
1483 1526
1527 /* now shifting in either direction gives the same number
1528 of nodes and we can make use of the cached neighbors */
1529 if (is_left_neighbor_in_cache(tb, h)) {
1530 SET_PAR_SHIFT_LEFT;
1531 return CARRY_ON;
1532 }
1484 1533
1485 /* now shifting in either direction gives the same number 1534 /* shift to the right independently on whether the right neighbor in cache or not */
1486 of nodes and we can make use of the cached neighbors */ 1535 SET_PAR_SHIFT_RIGHT;
1487 if (is_left_neighbor_in_cache (tb,h)) 1536 return CARRY_ON;
1488 {
1489 SET_PAR_SHIFT_LEFT;
1490 return CARRY_ON;
1491 } 1537 }
1492
1493 /* shift to the right independently on whether the right neighbor in cache or not */
1494 SET_PAR_SHIFT_RIGHT;
1495 return CARRY_ON;
1496 }
1497} 1538}
1498 1539
1499
1500/* Check whether current node S[h] is balanced when Decreasing its size by 1540/* Check whether current node S[h] is balanced when Decreasing its size by
1501 * Deleting or Cutting for INTERNAL node of S+tree. 1541 * Deleting or Cutting for INTERNAL node of S+tree.
1502 * Calculate parameters for balancing for current level h. 1542 * Calculate parameters for balancing for current level h.
@@ -1513,157 +1553,173 @@ static int ip_check_balance (struct tree_balance * tb, int h)
1513 * Note: Items of internal nodes have fixed size, so the balance condition for 1553 * Note: Items of internal nodes have fixed size, so the balance condition for
1514 * the internal part of S+tree is as for the B-trees. 1554 * the internal part of S+tree is as for the B-trees.
1515 */ 1555 */
1516static int dc_check_balance_internal (struct tree_balance * tb, int h) 1556static int dc_check_balance_internal(struct tree_balance *tb, int h)
1517{ 1557{
1518 struct virtual_node * vn = tb->tb_vn; 1558 struct virtual_node *vn = tb->tb_vn;
1519 1559
1520 /* Sh is the node whose balance is currently being checked, 1560 /* Sh is the node whose balance is currently being checked,
1521 and Fh is its father. */ 1561 and Fh is its father. */
1522 struct buffer_head * Sh, * Fh; 1562 struct buffer_head *Sh, *Fh;
1523 int maxsize, 1563 int maxsize, n_ret_value;
1524 n_ret_value; 1564 int lfree, rfree /* free space in L and R */ ;
1525 int lfree, rfree /* free space in L and R */;
1526 1565
1527 Sh = PATH_H_PBUFFER (tb->tb_path, h); 1566 Sh = PATH_H_PBUFFER(tb->tb_path, h);
1528 Fh = PATH_H_PPARENT (tb->tb_path, h); 1567 Fh = PATH_H_PPARENT(tb->tb_path, h);
1529 1568
1530 maxsize = MAX_CHILD_SIZE(Sh); 1569 maxsize = MAX_CHILD_SIZE(Sh);
1531 1570
1532/* using tb->insert_size[h], which is negative in this case, create_virtual_node calculates: */ 1571/* using tb->insert_size[h], which is negative in this case, create_virtual_node calculates: */
1533/* new_nr_item = number of items node would have if operation is */ 1572/* new_nr_item = number of items node would have if operation is */
1534/* performed without balancing (new_nr_item); */ 1573/* performed without balancing (new_nr_item); */
1535 create_virtual_node (tb, h); 1574 create_virtual_node(tb, h);
1536 1575
1537 if ( ! Fh ) 1576 if (!Fh) { /* S[h] is the root. */
1538 { /* S[h] is the root. */ 1577 if (vn->vn_nr_item > 0) {
1539 if ( vn->vn_nr_item > 0 ) 1578 set_parameters(tb, h, 0, 0, 1, NULL, -1, -1);
1540 { 1579 return NO_BALANCING_NEEDED; /* no balancing for higher levels needed */
1541 set_parameters (tb, h, 0, 0, 1, NULL, -1, -1); 1580 }
1542 return NO_BALANCING_NEEDED; /* no balancing for higher levels needed */ 1581 /* new_nr_item == 0.
1582 * Current root will be deleted resulting in
1583 * decrementing the tree height. */
1584 set_parameters(tb, h, 0, 0, 0, NULL, -1, -1);
1585 return CARRY_ON;
1586 }
1587
1588 if ((n_ret_value = get_parents(tb, h)) != CARRY_ON)
1589 return n_ret_value;
1590
1591 /* get free space of neighbors */
1592 rfree = get_rfree(tb, h);
1593 lfree = get_lfree(tb, h);
1594
1595 /* determine maximal number of items we can fit into neighbors */
1596 check_left(tb, h, lfree);
1597 check_right(tb, h, rfree);
1598
1599 if (vn->vn_nr_item >= MIN_NR_KEY(Sh)) { /* Balance condition for the internal node is valid.
1600 * In this case we balance only if it leads to better packing. */
1601 if (vn->vn_nr_item == MIN_NR_KEY(Sh)) { /* Here we join S[h] with one of its neighbors,
1602 * which is impossible with greater values of new_nr_item. */
1603 if (tb->lnum[h] >= vn->vn_nr_item + 1) {
1604 /* All contents of S[h] can be moved to L[h]. */
1605 int n;
1606 int order_L;
1607
1608 order_L =
1609 ((n =
1610 PATH_H_B_ITEM_ORDER(tb->tb_path,
1611 h)) ==
1612 0) ? B_NR_ITEMS(tb->FL[h]) : n - 1;
1613 n = dc_size(B_N_CHILD(tb->FL[h], order_L)) /
1614 (DC_SIZE + KEY_SIZE);
1615 set_parameters(tb, h, -n - 1, 0, 0, NULL, -1,
1616 -1);
1617 return CARRY_ON;
1618 }
1619
1620 if (tb->rnum[h] >= vn->vn_nr_item + 1) {
1621 /* All contents of S[h] can be moved to R[h]. */
1622 int n;
1623 int order_R;
1624
1625 order_R =
1626 ((n =
1627 PATH_H_B_ITEM_ORDER(tb->tb_path,
1628 h)) ==
1629 B_NR_ITEMS(Fh)) ? 0 : n + 1;
1630 n = dc_size(B_N_CHILD(tb->FR[h], order_R)) /
1631 (DC_SIZE + KEY_SIZE);
1632 set_parameters(tb, h, 0, -n - 1, 0, NULL, -1,
1633 -1);
1634 return CARRY_ON;
1635 }
1636 }
1637
1638 if (tb->rnum[h] + tb->lnum[h] >= vn->vn_nr_item + 1) {
1639 /* All contents of S[h] can be moved to the neighbors (L[h] & R[h]). */
1640 int to_r;
1641
1642 to_r =
1643 ((MAX_NR_KEY(Sh) << 1) + 2 - tb->lnum[h] -
1644 tb->rnum[h] + vn->vn_nr_item + 1) / 2 -
1645 (MAX_NR_KEY(Sh) + 1 - tb->rnum[h]);
1646 set_parameters(tb, h, vn->vn_nr_item + 1 - to_r, to_r,
1647 0, NULL, -1, -1);
1648 return CARRY_ON;
1649 }
1650
1651 /* Balancing does not lead to better packing. */
1652 set_parameters(tb, h, 0, 0, 1, NULL, -1, -1);
1653 return NO_BALANCING_NEEDED;
1543 } 1654 }
1544 /* new_nr_item == 0. 1655
1545 * Current root will be deleted resulting in 1656 /* Current node contain insufficient number of items. Balancing is required. */
1546 * decrementing the tree height. */ 1657 /* Check whether we can merge S[h] with left neighbor. */
1547 set_parameters (tb, h, 0, 0, 0, NULL, -1, -1); 1658 if (tb->lnum[h] >= vn->vn_nr_item + 1)
1548 return CARRY_ON; 1659 if (is_left_neighbor_in_cache(tb, h)
1549 } 1660 || tb->rnum[h] < vn->vn_nr_item + 1 || !tb->FR[h]) {
1550 1661 int n;
1551 if ( (n_ret_value = get_parents(tb,h)) != CARRY_ON ) 1662 int order_L;
1552 return n_ret_value; 1663
1553 1664 order_L =
1554 1665 ((n =
1555 /* get free space of neighbors */ 1666 PATH_H_B_ITEM_ORDER(tb->tb_path,
1556 rfree = get_rfree (tb, h); 1667 h)) ==
1557 lfree = get_lfree (tb, h); 1668 0) ? B_NR_ITEMS(tb->FL[h]) : n - 1;
1558 1669 n = dc_size(B_N_CHILD(tb->FL[h], order_L)) / (DC_SIZE +
1559 /* determine maximal number of items we can fit into neighbors */ 1670 KEY_SIZE);
1560 check_left (tb, h, lfree); 1671 set_parameters(tb, h, -n - 1, 0, 0, NULL, -1, -1);
1561 check_right (tb, h, rfree); 1672 return CARRY_ON;
1562 1673 }
1563 1674
1564 if ( vn->vn_nr_item >= MIN_NR_KEY(Sh) ) 1675 /* Check whether we can merge S[h] with right neighbor. */
1565 { /* Balance condition for the internal node is valid. 1676 if (tb->rnum[h] >= vn->vn_nr_item + 1) {
1566 * In this case we balance only if it leads to better packing. */ 1677 int n;
1567 if ( vn->vn_nr_item == MIN_NR_KEY(Sh) ) 1678 int order_R;
1568 { /* Here we join S[h] with one of its neighbors, 1679
1569 * which is impossible with greater values of new_nr_item. */ 1680 order_R =
1570 if ( tb->lnum[h] >= vn->vn_nr_item + 1 ) 1681 ((n =
1571 { 1682 PATH_H_B_ITEM_ORDER(tb->tb_path,
1572 /* All contents of S[h] can be moved to L[h]. */ 1683 h)) == B_NR_ITEMS(Fh)) ? 0 : (n + 1);
1573 int n; 1684 n = dc_size(B_N_CHILD(tb->FR[h], order_R)) / (DC_SIZE +
1574 int order_L; 1685 KEY_SIZE);
1575 1686 set_parameters(tb, h, 0, -n - 1, 0, NULL, -1, -1);
1576 order_L = ((n=PATH_H_B_ITEM_ORDER(tb->tb_path, h))==0) ? B_NR_ITEMS(tb->FL[h]) : n - 1; 1687 return CARRY_ON;
1577 n = dc_size(B_N_CHILD(tb->FL[h],order_L)) / (DC_SIZE + KEY_SIZE);
1578 set_parameters (tb, h, -n-1, 0, 0, NULL, -1, -1);
1579 return CARRY_ON;
1580 }
1581
1582 if ( tb->rnum[h] >= vn->vn_nr_item + 1 )
1583 {
1584 /* All contents of S[h] can be moved to R[h]. */
1585 int n;
1586 int order_R;
1587
1588 order_R = ((n=PATH_H_B_ITEM_ORDER(tb->tb_path, h))==B_NR_ITEMS(Fh)) ? 0 : n + 1;
1589 n = dc_size(B_N_CHILD(tb->FR[h],order_R)) / (DC_SIZE + KEY_SIZE);
1590 set_parameters (tb, h, 0, -n-1, 0, NULL, -1, -1);
1591 return CARRY_ON;
1592 }
1593 } 1688 }
1594 1689
1595 if (tb->rnum[h] + tb->lnum[h] >= vn->vn_nr_item + 1) 1690 /* All contents of S[h] can be moved to the neighbors (L[h] & R[h]). */
1596 { 1691 if (tb->rnum[h] + tb->lnum[h] >= vn->vn_nr_item + 1) {
1597 /* All contents of S[h] can be moved to the neighbors (L[h] & R[h]). */ 1692 int to_r;
1598 int to_r; 1693
1694 to_r =
1695 ((MAX_NR_KEY(Sh) << 1) + 2 - tb->lnum[h] - tb->rnum[h] +
1696 vn->vn_nr_item + 1) / 2 - (MAX_NR_KEY(Sh) + 1 -
1697 tb->rnum[h]);
1698 set_parameters(tb, h, vn->vn_nr_item + 1 - to_r, to_r, 0, NULL,
1699 -1, -1);
1700 return CARRY_ON;
1701 }
1599 1702
1600 to_r = ((MAX_NR_KEY(Sh)<<1)+2-tb->lnum[h]-tb->rnum[h]+vn->vn_nr_item+1)/2 - 1703 /* For internal nodes try to borrow item from a neighbor */
1601 (MAX_NR_KEY(Sh) + 1 - tb->rnum[h]); 1704 RFALSE(!tb->FL[h] && !tb->FR[h], "vs-8235: trying to borrow for root");
1602 set_parameters (tb, h, vn->vn_nr_item + 1 - to_r, to_r, 0, NULL, -1, -1); 1705
1603 return CARRY_ON; 1706 /* Borrow one or two items from caching neighbor */
1707 if (is_left_neighbor_in_cache(tb, h) || !tb->FR[h]) {
1708 int from_l;
1709
1710 from_l =
1711 (MAX_NR_KEY(Sh) + 1 - tb->lnum[h] + vn->vn_nr_item +
1712 1) / 2 - (vn->vn_nr_item + 1);
1713 set_parameters(tb, h, -from_l, 0, 1, NULL, -1, -1);
1714 return CARRY_ON;
1604 } 1715 }
1605 1716
1606 /* Balancing does not lead to better packing. */ 1717 set_parameters(tb, h, 0,
1607 set_parameters (tb, h, 0, 0, 1, NULL, -1, -1); 1718 -((MAX_NR_KEY(Sh) + 1 - tb->rnum[h] + vn->vn_nr_item +
1608 return NO_BALANCING_NEEDED; 1719 1) / 2 - (vn->vn_nr_item + 1)), 1, NULL, -1, -1);
1609 }
1610
1611 /* Current node contain insufficient number of items. Balancing is required. */
1612 /* Check whether we can merge S[h] with left neighbor. */
1613 if (tb->lnum[h] >= vn->vn_nr_item + 1)
1614 if (is_left_neighbor_in_cache (tb,h) || tb->rnum[h] < vn->vn_nr_item + 1 || !tb->FR[h])
1615 {
1616 int n;
1617 int order_L;
1618
1619 order_L = ((n=PATH_H_B_ITEM_ORDER(tb->tb_path, h))==0) ? B_NR_ITEMS(tb->FL[h]) : n - 1;
1620 n = dc_size(B_N_CHILD(tb->FL[h],order_L)) / (DC_SIZE + KEY_SIZE);
1621 set_parameters (tb, h, -n-1, 0, 0, NULL, -1, -1);
1622 return CARRY_ON; 1720 return CARRY_ON;
1623 }
1624
1625 /* Check whether we can merge S[h] with right neighbor. */
1626 if (tb->rnum[h] >= vn->vn_nr_item + 1)
1627 {
1628 int n;
1629 int order_R;
1630
1631 order_R = ((n=PATH_H_B_ITEM_ORDER(tb->tb_path, h))==B_NR_ITEMS(Fh)) ? 0 : (n + 1);
1632 n = dc_size(B_N_CHILD(tb->FR[h],order_R)) / (DC_SIZE + KEY_SIZE);
1633 set_parameters (tb, h, 0, -n-1, 0, NULL, -1, -1);
1634 return CARRY_ON;
1635 }
1636
1637 /* All contents of S[h] can be moved to the neighbors (L[h] & R[h]). */
1638 if (tb->rnum[h] + tb->lnum[h] >= vn->vn_nr_item + 1)
1639 {
1640 int to_r;
1641
1642 to_r = ((MAX_NR_KEY(Sh)<<1)+2-tb->lnum[h]-tb->rnum[h]+vn->vn_nr_item+1)/2 -
1643 (MAX_NR_KEY(Sh) + 1 - tb->rnum[h]);
1644 set_parameters (tb, h, vn->vn_nr_item + 1 - to_r, to_r, 0, NULL, -1, -1);
1645 return CARRY_ON;
1646 }
1647
1648 /* For internal nodes try to borrow item from a neighbor */
1649 RFALSE( !tb->FL[h] && !tb->FR[h], "vs-8235: trying to borrow for root");
1650
1651 /* Borrow one or two items from caching neighbor */
1652 if (is_left_neighbor_in_cache (tb,h) || !tb->FR[h])
1653 {
1654 int from_l;
1655
1656 from_l = (MAX_NR_KEY(Sh) + 1 - tb->lnum[h] + vn->vn_nr_item + 1) / 2 - (vn->vn_nr_item + 1);
1657 set_parameters (tb, h, -from_l, 0, 1, NULL, -1, -1);
1658 return CARRY_ON;
1659 }
1660
1661 set_parameters (tb, h, 0, -((MAX_NR_KEY(Sh)+1-tb->rnum[h]+vn->vn_nr_item+1)/2-(vn->vn_nr_item+1)), 1,
1662 NULL, -1, -1);
1663 return CARRY_ON;
1664} 1721}
1665 1722
1666
1667/* Check whether current node S[h] is balanced when Decreasing its size by 1723/* Check whether current node S[h] is balanced when Decreasing its size by
1668 * Deleting or Truncating for LEAF node of S+tree. 1724 * Deleting or Truncating for LEAF node of S+tree.
1669 * Calculate parameters for balancing for current level h. 1725 * Calculate parameters for balancing for current level h.
@@ -1677,90 +1733,86 @@ static int dc_check_balance_internal (struct tree_balance * tb, int h)
1677 * -1 - no balancing for higher levels needed; 1733 * -1 - no balancing for higher levels needed;
1678 * -2 - no disk space. 1734 * -2 - no disk space.
1679 */ 1735 */
1680static int dc_check_balance_leaf (struct tree_balance * tb, int h) 1736static int dc_check_balance_leaf(struct tree_balance *tb, int h)
1681{ 1737{
1682 struct virtual_node * vn = tb->tb_vn; 1738 struct virtual_node *vn = tb->tb_vn;
1683 1739
1684 /* Number of bytes that must be deleted from 1740 /* Number of bytes that must be deleted from
1685 (value is negative if bytes are deleted) buffer which 1741 (value is negative if bytes are deleted) buffer which
1686 contains node being balanced. The mnemonic is that the 1742 contains node being balanced. The mnemonic is that the
1687 attempted change in node space used level is levbytes bytes. */ 1743 attempted change in node space used level is levbytes bytes. */
1688 int levbytes; 1744 int levbytes;
1689 /* the maximal item size */ 1745 /* the maximal item size */
1690 int maxsize, 1746 int maxsize, n_ret_value;
1691 n_ret_value; 1747 /* S0 is the node whose balance is currently being checked,
1692 /* S0 is the node whose balance is currently being checked, 1748 and F0 is its father. */
1693 and F0 is its father. */ 1749 struct buffer_head *S0, *F0;
1694 struct buffer_head * S0, * F0; 1750 int lfree, rfree /* free space in L and R */ ;
1695 int lfree, rfree /* free space in L and R */; 1751
1696 1752 S0 = PATH_H_PBUFFER(tb->tb_path, 0);
1697 S0 = PATH_H_PBUFFER (tb->tb_path, 0); 1753 F0 = PATH_H_PPARENT(tb->tb_path, 0);
1698 F0 = PATH_H_PPARENT (tb->tb_path, 0);
1699
1700 levbytes = tb->insert_size[h];
1701
1702 maxsize = MAX_CHILD_SIZE(S0); /* maximal possible size of an item */
1703
1704 if ( ! F0 )
1705 { /* S[0] is the root now. */
1706
1707 RFALSE( -levbytes >= maxsize - B_FREE_SPACE (S0),
1708 "vs-8240: attempt to create empty buffer tree");
1709
1710 set_parameters (tb, h, 0, 0, 1, NULL, -1, -1);
1711 return NO_BALANCING_NEEDED;
1712 }
1713
1714 if ( (n_ret_value = get_parents(tb,h)) != CARRY_ON )
1715 return n_ret_value;
1716
1717 /* get free space of neighbors */
1718 rfree = get_rfree (tb, h);
1719 lfree = get_lfree (tb, h);
1720
1721 create_virtual_node (tb, h);
1722
1723 /* if 3 leaves can be merge to one, set parameters and return */
1724 if (are_leaves_removable (tb, lfree, rfree))
1725 return CARRY_ON;
1726
1727 /* determine maximal number of items we can shift to the left/right neighbor
1728 and the maximal number of bytes that can flow to the left/right neighbor
1729 from the left/right most liquid item that cannot be shifted from S[0] entirely
1730 */
1731 check_left (tb, h, lfree);
1732 check_right (tb, h, rfree);
1733
1734 /* check whether we can merge S with left neighbor. */
1735 if (tb->lnum[0] >= vn->vn_nr_item && tb->lbytes == -1)
1736 if (is_left_neighbor_in_cache (tb,h) ||
1737 ((tb->rnum[0] - ((tb->rbytes == -1) ? 0 : 1)) < vn->vn_nr_item) || /* S can not be merged with R */
1738 !tb->FR[h]) {
1739
1740 RFALSE( !tb->FL[h], "vs-8245: dc_check_balance_leaf: FL[h] must exist");
1741
1742 /* set parameter to merge S[0] with its left neighbor */
1743 set_parameters (tb, h, -1, 0, 0, NULL, -1, -1);
1744 return CARRY_ON;
1745 }
1746
1747 /* check whether we can merge S[0] with right neighbor. */
1748 if (tb->rnum[0] >= vn->vn_nr_item && tb->rbytes == -1) {
1749 set_parameters (tb, h, 0, -1, 0, NULL, -1, -1);
1750 return CARRY_ON;
1751 }
1752
1753 /* All contents of S[0] can be moved to the neighbors (L[0] & R[0]). Set parameters and return */
1754 if (is_leaf_removable (tb))
1755 return CARRY_ON;
1756
1757 /* Balancing is not required. */
1758 tb->s0num = vn->vn_nr_item;
1759 set_parameters (tb, h, 0, 0, 1, NULL, -1, -1);
1760 return NO_BALANCING_NEEDED;
1761}
1762 1754
1755 levbytes = tb->insert_size[h];
1763 1756
1757 maxsize = MAX_CHILD_SIZE(S0); /* maximal possible size of an item */
1758
1759 if (!F0) { /* S[0] is the root now. */
1760
1761 RFALSE(-levbytes >= maxsize - B_FREE_SPACE(S0),
1762 "vs-8240: attempt to create empty buffer tree");
1763
1764 set_parameters(tb, h, 0, 0, 1, NULL, -1, -1);
1765 return NO_BALANCING_NEEDED;
1766 }
1767
1768 if ((n_ret_value = get_parents(tb, h)) != CARRY_ON)
1769 return n_ret_value;
1770
1771 /* get free space of neighbors */
1772 rfree = get_rfree(tb, h);
1773 lfree = get_lfree(tb, h);
1774
1775 create_virtual_node(tb, h);
1776
1777 /* if 3 leaves can be merge to one, set parameters and return */
1778 if (are_leaves_removable(tb, lfree, rfree))
1779 return CARRY_ON;
1780
1781 /* determine maximal number of items we can shift to the left/right neighbor
1782 and the maximal number of bytes that can flow to the left/right neighbor
1783 from the left/right most liquid item that cannot be shifted from S[0] entirely
1784 */
1785 check_left(tb, h, lfree);
1786 check_right(tb, h, rfree);
1787
1788 /* check whether we can merge S with left neighbor. */
1789 if (tb->lnum[0] >= vn->vn_nr_item && tb->lbytes == -1)
1790 if (is_left_neighbor_in_cache(tb, h) || ((tb->rnum[0] - ((tb->rbytes == -1) ? 0 : 1)) < vn->vn_nr_item) || /* S can not be merged with R */
1791 !tb->FR[h]) {
1792
1793 RFALSE(!tb->FL[h],
1794 "vs-8245: dc_check_balance_leaf: FL[h] must exist");
1795
1796 /* set parameter to merge S[0] with its left neighbor */
1797 set_parameters(tb, h, -1, 0, 0, NULL, -1, -1);
1798 return CARRY_ON;
1799 }
1800
1801 /* check whether we can merge S[0] with right neighbor. */
1802 if (tb->rnum[0] >= vn->vn_nr_item && tb->rbytes == -1) {
1803 set_parameters(tb, h, 0, -1, 0, NULL, -1, -1);
1804 return CARRY_ON;
1805 }
1806
1807 /* All contents of S[0] can be moved to the neighbors (L[0] & R[0]). Set parameters and return */
1808 if (is_leaf_removable(tb))
1809 return CARRY_ON;
1810
1811 /* Balancing is not required. */
1812 tb->s0num = vn->vn_nr_item;
1813 set_parameters(tb, h, 0, 0, 1, NULL, -1, -1);
1814 return NO_BALANCING_NEEDED;
1815}
1764 1816
1765/* Check whether current node S[h] is balanced when Decreasing its size by 1817/* Check whether current node S[h] is balanced when Decreasing its size by
1766 * Deleting or Cutting. 1818 * Deleting or Cutting.
@@ -1775,18 +1827,17 @@ static int dc_check_balance_leaf (struct tree_balance * tb, int h)
1775 * -1 - no balancing for higher levels needed; 1827 * -1 - no balancing for higher levels needed;
1776 * -2 - no disk space. 1828 * -2 - no disk space.
1777 */ 1829 */
1778static int dc_check_balance (struct tree_balance * tb, int h) 1830static int dc_check_balance(struct tree_balance *tb, int h)
1779{ 1831{
1780 RFALSE( ! (PATH_H_PBUFFER (tb->tb_path, h)), "vs-8250: S is not initialized"); 1832 RFALSE(!(PATH_H_PBUFFER(tb->tb_path, h)),
1833 "vs-8250: S is not initialized");
1781 1834
1782 if ( h ) 1835 if (h)
1783 return dc_check_balance_internal (tb, h); 1836 return dc_check_balance_internal(tb, h);
1784 else 1837 else
1785 return dc_check_balance_leaf (tb, h); 1838 return dc_check_balance_leaf(tb, h);
1786} 1839}
1787 1840
1788
1789
1790/* Check whether current node S[h] is balanced. 1841/* Check whether current node S[h] is balanced.
1791 * Calculate parameters for balancing for current level h. 1842 * Calculate parameters for balancing for current level h.
1792 * Parameters: 1843 * Parameters:
@@ -1805,83 +1856,80 @@ static int dc_check_balance (struct tree_balance * tb, int h)
1805 * -1 - no balancing for higher levels needed; 1856 * -1 - no balancing for higher levels needed;
1806 * -2 - no disk space. 1857 * -2 - no disk space.
1807 */ 1858 */
1808static int check_balance (int mode, 1859static int check_balance(int mode,
1809 struct tree_balance * tb, 1860 struct tree_balance *tb,
1810 int h, 1861 int h,
1811 int inum, 1862 int inum,
1812 int pos_in_item, 1863 int pos_in_item,
1813 struct item_head * ins_ih, 1864 struct item_head *ins_ih, const void *data)
1814 const void * data
1815 )
1816{ 1865{
1817 struct virtual_node * vn; 1866 struct virtual_node *vn;
1818 1867
1819 vn = tb->tb_vn = (struct virtual_node *)(tb->vn_buf); 1868 vn = tb->tb_vn = (struct virtual_node *)(tb->vn_buf);
1820 vn->vn_free_ptr = (char *)(tb->tb_vn + 1); 1869 vn->vn_free_ptr = (char *)(tb->tb_vn + 1);
1821 vn->vn_mode = mode; 1870 vn->vn_mode = mode;
1822 vn->vn_affected_item_num = inum; 1871 vn->vn_affected_item_num = inum;
1823 vn->vn_pos_in_item = pos_in_item; 1872 vn->vn_pos_in_item = pos_in_item;
1824 vn->vn_ins_ih = ins_ih; 1873 vn->vn_ins_ih = ins_ih;
1825 vn->vn_data = data; 1874 vn->vn_data = data;
1826 1875
1827 RFALSE( mode == M_INSERT && !vn->vn_ins_ih, 1876 RFALSE(mode == M_INSERT && !vn->vn_ins_ih,
1828 "vs-8255: ins_ih can not be 0 in insert mode"); 1877 "vs-8255: ins_ih can not be 0 in insert mode");
1829 1878
1830 if ( tb->insert_size[h] > 0 ) 1879 if (tb->insert_size[h] > 0)
1831 /* Calculate balance parameters when size of node is increasing. */ 1880 /* Calculate balance parameters when size of node is increasing. */
1832 return ip_check_balance (tb, h); 1881 return ip_check_balance(tb, h);
1833 1882
1834 /* Calculate balance parameters when size of node is decreasing. */ 1883 /* Calculate balance parameters when size of node is decreasing. */
1835 return dc_check_balance (tb, h); 1884 return dc_check_balance(tb, h);
1836} 1885}
1837 1886
1887/* Check whether parent at the path is the really parent of the current node.*/
1888static int get_direct_parent(struct tree_balance *p_s_tb, int n_h)
1889{
1890 struct buffer_head *p_s_bh;
1891 struct path *p_s_path = p_s_tb->tb_path;
1892 int n_position,
1893 n_path_offset = PATH_H_PATH_OFFSET(p_s_tb->tb_path, n_h);
1894
1895 /* We are in the root or in the new root. */
1896 if (n_path_offset <= FIRST_PATH_ELEMENT_OFFSET) {
1897
1898 RFALSE(n_path_offset < FIRST_PATH_ELEMENT_OFFSET - 1,
1899 "PAP-8260: invalid offset in the path");
1900
1901 if (PATH_OFFSET_PBUFFER(p_s_path, FIRST_PATH_ELEMENT_OFFSET)->
1902 b_blocknr == SB_ROOT_BLOCK(p_s_tb->tb_sb)) {
1903 /* Root is not changed. */
1904 PATH_OFFSET_PBUFFER(p_s_path, n_path_offset - 1) = NULL;
1905 PATH_OFFSET_POSITION(p_s_path, n_path_offset - 1) = 0;
1906 return CARRY_ON;
1907 }
1908 return REPEAT_SEARCH; /* Root is changed and we must recalculate the path. */
1909 }
1910
1911 if (!B_IS_IN_TREE
1912 (p_s_bh = PATH_OFFSET_PBUFFER(p_s_path, n_path_offset - 1)))
1913 return REPEAT_SEARCH; /* Parent in the path is not in the tree. */
1838 1914
1915 if ((n_position =
1916 PATH_OFFSET_POSITION(p_s_path,
1917 n_path_offset - 1)) > B_NR_ITEMS(p_s_bh))
1918 return REPEAT_SEARCH;
1839 1919
1840/* Check whether parent at the path is the really parent of the current node.*/ 1920 if (B_N_CHILD_NUM(p_s_bh, n_position) !=
1841static int get_direct_parent( 1921 PATH_OFFSET_PBUFFER(p_s_path, n_path_offset)->b_blocknr)
1842 struct tree_balance * p_s_tb, 1922 /* Parent in the path is not parent of the current node in the tree. */
1843 int n_h 1923 return REPEAT_SEARCH;
1844 ) { 1924
1845 struct buffer_head * p_s_bh; 1925 if (buffer_locked(p_s_bh)) {
1846 struct path * p_s_path = p_s_tb->tb_path; 1926 __wait_on_buffer(p_s_bh);
1847 int n_position, 1927 if (FILESYSTEM_CHANGED_TB(p_s_tb))
1848 n_path_offset = PATH_H_PATH_OFFSET(p_s_tb->tb_path, n_h); 1928 return REPEAT_SEARCH;
1849
1850 /* We are in the root or in the new root. */
1851 if ( n_path_offset <= FIRST_PATH_ELEMENT_OFFSET ) {
1852
1853 RFALSE( n_path_offset < FIRST_PATH_ELEMENT_OFFSET - 1,
1854 "PAP-8260: invalid offset in the path");
1855
1856 if ( PATH_OFFSET_PBUFFER(p_s_path, FIRST_PATH_ELEMENT_OFFSET)->b_blocknr ==
1857 SB_ROOT_BLOCK (p_s_tb->tb_sb) ) {
1858 /* Root is not changed. */
1859 PATH_OFFSET_PBUFFER(p_s_path, n_path_offset - 1) = NULL;
1860 PATH_OFFSET_POSITION(p_s_path, n_path_offset - 1) = 0;
1861 return CARRY_ON;
1862 } 1929 }
1863 return REPEAT_SEARCH; /* Root is changed and we must recalculate the path. */
1864 }
1865
1866 if ( ! B_IS_IN_TREE(p_s_bh = PATH_OFFSET_PBUFFER(p_s_path, n_path_offset - 1)) )
1867 return REPEAT_SEARCH; /* Parent in the path is not in the tree. */
1868
1869 if ( (n_position = PATH_OFFSET_POSITION(p_s_path, n_path_offset - 1)) > B_NR_ITEMS(p_s_bh) )
1870 return REPEAT_SEARCH;
1871
1872 if ( B_N_CHILD_NUM(p_s_bh, n_position) != PATH_OFFSET_PBUFFER(p_s_path, n_path_offset)->b_blocknr )
1873 /* Parent in the path is not parent of the current node in the tree. */
1874 return REPEAT_SEARCH;
1875
1876 if ( buffer_locked(p_s_bh) ) {
1877 __wait_on_buffer(p_s_bh);
1878 if ( FILESYSTEM_CHANGED_TB (p_s_tb) )
1879 return REPEAT_SEARCH;
1880 }
1881
1882 return CARRY_ON; /* Parent in the path is unlocked and really parent of the current node. */
1883}
1884 1930
1931 return CARRY_ON; /* Parent in the path is unlocked and really parent of the current node. */
1932}
1885 1933
1886/* Using lnum[n_h] and rnum[n_h] we should determine what neighbors 1934/* Using lnum[n_h] and rnum[n_h] we should determine what neighbors
1887 * of S[n_h] we 1935 * of S[n_h] we
@@ -1889,356 +1937,401 @@ static int get_direct_parent(
1889 * Returns: SCHEDULE_OCCURRED - schedule occurred while the function worked; 1937 * Returns: SCHEDULE_OCCURRED - schedule occurred while the function worked;
1890 * CARRY_ON - schedule didn't occur while the function worked; 1938 * CARRY_ON - schedule didn't occur while the function worked;
1891 */ 1939 */
1892static int get_neighbors( 1940static int get_neighbors(struct tree_balance *p_s_tb, int n_h)
1893 struct tree_balance * p_s_tb, 1941{
1894 int n_h 1942 int n_child_position,
1895 ) { 1943 n_path_offset = PATH_H_PATH_OFFSET(p_s_tb->tb_path, n_h + 1);
1896 int n_child_position, 1944 unsigned long n_son_number;
1897 n_path_offset = PATH_H_PATH_OFFSET(p_s_tb->tb_path, n_h + 1); 1945 struct super_block *p_s_sb = p_s_tb->tb_sb;
1898 unsigned long n_son_number; 1946 struct buffer_head *p_s_bh;
1899 struct super_block * p_s_sb = p_s_tb->tb_sb; 1947
1900 struct buffer_head * p_s_bh; 1948 PROC_INFO_INC(p_s_sb, get_neighbors[n_h]);
1901 1949
1902 1950 if (p_s_tb->lnum[n_h]) {
1903 PROC_INFO_INC( p_s_sb, get_neighbors[ n_h ] ); 1951 /* We need left neighbor to balance S[n_h]. */
1904 1952 PROC_INFO_INC(p_s_sb, need_l_neighbor[n_h]);
1905 if ( p_s_tb->lnum[n_h] ) { 1953 p_s_bh = PATH_OFFSET_PBUFFER(p_s_tb->tb_path, n_path_offset);
1906 /* We need left neighbor to balance S[n_h]. */ 1954
1907 PROC_INFO_INC( p_s_sb, need_l_neighbor[ n_h ] ); 1955 RFALSE(p_s_bh == p_s_tb->FL[n_h] &&
1908 p_s_bh = PATH_OFFSET_PBUFFER(p_s_tb->tb_path, n_path_offset); 1956 !PATH_OFFSET_POSITION(p_s_tb->tb_path, n_path_offset),
1909 1957 "PAP-8270: invalid position in the parent");
1910 RFALSE( p_s_bh == p_s_tb->FL[n_h] && 1958
1911 ! PATH_OFFSET_POSITION(p_s_tb->tb_path, n_path_offset), 1959 n_child_position =
1912 "PAP-8270: invalid position in the parent"); 1960 (p_s_bh ==
1913 1961 p_s_tb->FL[n_h]) ? p_s_tb->lkey[n_h] : B_NR_ITEMS(p_s_tb->
1914 n_child_position = ( p_s_bh == p_s_tb->FL[n_h] ) ? p_s_tb->lkey[n_h] : B_NR_ITEMS (p_s_tb->FL[n_h]); 1962 FL[n_h]);
1915 n_son_number = B_N_CHILD_NUM(p_s_tb->FL[n_h], n_child_position); 1963 n_son_number = B_N_CHILD_NUM(p_s_tb->FL[n_h], n_child_position);
1916 p_s_bh = sb_bread(p_s_sb, n_son_number); 1964 p_s_bh = sb_bread(p_s_sb, n_son_number);
1917 if (!p_s_bh) 1965 if (!p_s_bh)
1918 return IO_ERROR; 1966 return IO_ERROR;
1919 if ( FILESYSTEM_CHANGED_TB (p_s_tb) ) { 1967 if (FILESYSTEM_CHANGED_TB(p_s_tb)) {
1920 decrement_bcount(p_s_bh); 1968 decrement_bcount(p_s_bh);
1921 PROC_INFO_INC( p_s_sb, get_neighbors_restart[ n_h ] ); 1969 PROC_INFO_INC(p_s_sb, get_neighbors_restart[n_h]);
1922 return REPEAT_SEARCH; 1970 return REPEAT_SEARCH;
1971 }
1972
1973 RFALSE(!B_IS_IN_TREE(p_s_tb->FL[n_h]) ||
1974 n_child_position > B_NR_ITEMS(p_s_tb->FL[n_h]) ||
1975 B_N_CHILD_NUM(p_s_tb->FL[n_h], n_child_position) !=
1976 p_s_bh->b_blocknr, "PAP-8275: invalid parent");
1977 RFALSE(!B_IS_IN_TREE(p_s_bh), "PAP-8280: invalid child");
1978 RFALSE(!n_h &&
1979 B_FREE_SPACE(p_s_bh) !=
1980 MAX_CHILD_SIZE(p_s_bh) -
1981 dc_size(B_N_CHILD(p_s_tb->FL[0], n_child_position)),
1982 "PAP-8290: invalid child size of left neighbor");
1983
1984 decrement_bcount(p_s_tb->L[n_h]);
1985 p_s_tb->L[n_h] = p_s_bh;
1923 } 1986 }
1924 1987
1925 RFALSE( ! B_IS_IN_TREE(p_s_tb->FL[n_h]) || 1988 if (p_s_tb->rnum[n_h]) { /* We need right neighbor to balance S[n_path_offset]. */
1926 n_child_position > B_NR_ITEMS(p_s_tb->FL[n_h]) || 1989 PROC_INFO_INC(p_s_sb, need_r_neighbor[n_h]);
1927 B_N_CHILD_NUM(p_s_tb->FL[n_h], n_child_position) != 1990 p_s_bh = PATH_OFFSET_PBUFFER(p_s_tb->tb_path, n_path_offset);
1928 p_s_bh->b_blocknr, "PAP-8275: invalid parent"); 1991
1929 RFALSE( ! B_IS_IN_TREE(p_s_bh), "PAP-8280: invalid child"); 1992 RFALSE(p_s_bh == p_s_tb->FR[n_h] &&
1930 RFALSE( ! n_h && 1993 PATH_OFFSET_POSITION(p_s_tb->tb_path,
1931 B_FREE_SPACE (p_s_bh) != MAX_CHILD_SIZE (p_s_bh) - dc_size(B_N_CHILD (p_s_tb->FL[0],n_child_position)), 1994 n_path_offset) >=
1932 "PAP-8290: invalid child size of left neighbor"); 1995 B_NR_ITEMS(p_s_bh),
1933 1996 "PAP-8295: invalid position in the parent");
1934 decrement_bcount(p_s_tb->L[n_h]); 1997
1935 p_s_tb->L[n_h] = p_s_bh; 1998 n_child_position =
1936 } 1999 (p_s_bh == p_s_tb->FR[n_h]) ? p_s_tb->rkey[n_h] + 1 : 0;
1937 2000 n_son_number = B_N_CHILD_NUM(p_s_tb->FR[n_h], n_child_position);
1938 2001 p_s_bh = sb_bread(p_s_sb, n_son_number);
1939 if ( p_s_tb->rnum[n_h] ) { /* We need right neighbor to balance S[n_path_offset]. */ 2002 if (!p_s_bh)
1940 PROC_INFO_INC( p_s_sb, need_r_neighbor[ n_h ] ); 2003 return IO_ERROR;
1941 p_s_bh = PATH_OFFSET_PBUFFER(p_s_tb->tb_path, n_path_offset); 2004 if (FILESYSTEM_CHANGED_TB(p_s_tb)) {
1942 2005 decrement_bcount(p_s_bh);
1943 RFALSE( p_s_bh == p_s_tb->FR[n_h] && 2006 PROC_INFO_INC(p_s_sb, get_neighbors_restart[n_h]);
1944 PATH_OFFSET_POSITION(p_s_tb->tb_path, n_path_offset) >= B_NR_ITEMS(p_s_bh), 2007 return REPEAT_SEARCH;
1945 "PAP-8295: invalid position in the parent"); 2008 }
1946 2009 decrement_bcount(p_s_tb->R[n_h]);
1947 n_child_position = ( p_s_bh == p_s_tb->FR[n_h] ) ? p_s_tb->rkey[n_h] + 1 : 0; 2010 p_s_tb->R[n_h] = p_s_bh;
1948 n_son_number = B_N_CHILD_NUM(p_s_tb->FR[n_h], n_child_position); 2011
1949 p_s_bh = sb_bread(p_s_sb, n_son_number); 2012 RFALSE(!n_h
1950 if (!p_s_bh) 2013 && B_FREE_SPACE(p_s_bh) !=
1951 return IO_ERROR; 2014 MAX_CHILD_SIZE(p_s_bh) -
1952 if ( FILESYSTEM_CHANGED_TB (p_s_tb) ) { 2015 dc_size(B_N_CHILD(p_s_tb->FR[0], n_child_position)),
1953 decrement_bcount(p_s_bh); 2016 "PAP-8300: invalid child size of right neighbor (%d != %d - %d)",
1954 PROC_INFO_INC( p_s_sb, get_neighbors_restart[ n_h ] ); 2017 B_FREE_SPACE(p_s_bh), MAX_CHILD_SIZE(p_s_bh),
1955 return REPEAT_SEARCH; 2018 dc_size(B_N_CHILD(p_s_tb->FR[0], n_child_position)));
2019
1956 } 2020 }
1957 decrement_bcount(p_s_tb->R[n_h]); 2021 return CARRY_ON;
1958 p_s_tb->R[n_h] = p_s_bh;
1959
1960 RFALSE( ! n_h && B_FREE_SPACE (p_s_bh) != MAX_CHILD_SIZE (p_s_bh) - dc_size(B_N_CHILD (p_s_tb->FR[0],n_child_position)),
1961 "PAP-8300: invalid child size of right neighbor (%d != %d - %d)",
1962 B_FREE_SPACE (p_s_bh), MAX_CHILD_SIZE (p_s_bh),
1963 dc_size(B_N_CHILD (p_s_tb->FR[0],n_child_position)));
1964
1965 }
1966 return CARRY_ON;
1967} 2022}
1968 2023
1969#ifdef CONFIG_REISERFS_CHECK 2024#ifdef CONFIG_REISERFS_CHECK
1970void * reiserfs_kmalloc (size_t size, int flags, struct super_block * s) 2025void *reiserfs_kmalloc(size_t size, int flags, struct super_block *s)
1971{ 2026{
1972 void * vp; 2027 void *vp;
1973 static size_t malloced; 2028 static size_t malloced;
1974 2029
1975 2030 vp = kmalloc(size, flags);
1976 vp = kmalloc (size, flags); 2031 if (vp) {
1977 if (vp) { 2032 REISERFS_SB(s)->s_kmallocs += size;
1978 REISERFS_SB(s)->s_kmallocs += size; 2033 if (REISERFS_SB(s)->s_kmallocs > malloced + 200000) {
1979 if (REISERFS_SB(s)->s_kmallocs > malloced + 200000) { 2034 reiserfs_warning(s,
1980 reiserfs_warning (s, 2035 "vs-8301: reiserfs_kmalloc: allocated memory %d",
1981 "vs-8301: reiserfs_kmalloc: allocated memory %d", 2036 REISERFS_SB(s)->s_kmallocs);
1982 REISERFS_SB(s)->s_kmallocs); 2037 malloced = REISERFS_SB(s)->s_kmallocs;
1983 malloced = REISERFS_SB(s)->s_kmallocs; 2038 }
1984 } 2039 }
1985 } 2040 return vp;
1986 return vp;
1987} 2041}
1988 2042
1989void reiserfs_kfree (const void * vp, size_t size, struct super_block * s) 2043void reiserfs_kfree(const void *vp, size_t size, struct super_block *s)
1990{ 2044{
1991 kfree (vp); 2045 kfree(vp);
1992 2046
1993 REISERFS_SB(s)->s_kmallocs -= size; 2047 REISERFS_SB(s)->s_kmallocs -= size;
1994 if (REISERFS_SB(s)->s_kmallocs < 0) 2048 if (REISERFS_SB(s)->s_kmallocs < 0)
1995 reiserfs_warning (s, "vs-8302: reiserfs_kfree: allocated memory %d", 2049 reiserfs_warning(s,
1996 REISERFS_SB(s)->s_kmallocs); 2050 "vs-8302: reiserfs_kfree: allocated memory %d",
2051 REISERFS_SB(s)->s_kmallocs);
1997 2052
1998} 2053}
1999#endif 2054#endif
2000 2055
2001 2056static int get_virtual_node_size(struct super_block *sb, struct buffer_head *bh)
2002static int get_virtual_node_size (struct super_block * sb, struct buffer_head * bh)
2003{ 2057{
2004 int max_num_of_items; 2058 int max_num_of_items;
2005 int max_num_of_entries; 2059 int max_num_of_entries;
2006 unsigned long blocksize = sb->s_blocksize; 2060 unsigned long blocksize = sb->s_blocksize;
2007 2061
2008#define MIN_NAME_LEN 1 2062#define MIN_NAME_LEN 1
2009 2063
2010 max_num_of_items = (blocksize - BLKH_SIZE) / (IH_SIZE + MIN_ITEM_LEN); 2064 max_num_of_items = (blocksize - BLKH_SIZE) / (IH_SIZE + MIN_ITEM_LEN);
2011 max_num_of_entries = (blocksize - BLKH_SIZE - IH_SIZE) / 2065 max_num_of_entries = (blocksize - BLKH_SIZE - IH_SIZE) /
2012 (DEH_SIZE + MIN_NAME_LEN); 2066 (DEH_SIZE + MIN_NAME_LEN);
2013 2067
2014 return sizeof(struct virtual_node) + 2068 return sizeof(struct virtual_node) +
2015 max(max_num_of_items * sizeof (struct virtual_item), 2069 max(max_num_of_items * sizeof(struct virtual_item),
2016 sizeof (struct virtual_item) + sizeof(struct direntry_uarea) + 2070 sizeof(struct virtual_item) + sizeof(struct direntry_uarea) +
2017 (max_num_of_entries - 1) * sizeof (__u16)); 2071 (max_num_of_entries - 1) * sizeof(__u16));
2018} 2072}
2019 2073
2020
2021
2022/* maybe we should fail balancing we are going to perform when kmalloc 2074/* maybe we should fail balancing we are going to perform when kmalloc
2023 fails several times. But now it will loop until kmalloc gets 2075 fails several times. But now it will loop until kmalloc gets
2024 required memory */ 2076 required memory */
2025static int get_mem_for_virtual_node (struct tree_balance * tb) 2077static int get_mem_for_virtual_node(struct tree_balance *tb)
2026{ 2078{
2027 int check_fs = 0; 2079 int check_fs = 0;
2028 int size; 2080 int size;
2029 char * buf; 2081 char *buf;
2030 2082
2031 size = get_virtual_node_size (tb->tb_sb, PATH_PLAST_BUFFER (tb->tb_path)); 2083 size = get_virtual_node_size(tb->tb_sb, PATH_PLAST_BUFFER(tb->tb_path));
2032 2084
2033 if (size > tb->vn_buf_size) { 2085 if (size > tb->vn_buf_size) {
2034 /* we have to allocate more memory for virtual node */ 2086 /* we have to allocate more memory for virtual node */
2035 if (tb->vn_buf) { 2087 if (tb->vn_buf) {
2036 /* free memory allocated before */ 2088 /* free memory allocated before */
2037 reiserfs_kfree (tb->vn_buf, tb->vn_buf_size, tb->tb_sb); 2089 reiserfs_kfree(tb->vn_buf, tb->vn_buf_size, tb->tb_sb);
2038 /* this is not needed if kfree is atomic */ 2090 /* this is not needed if kfree is atomic */
2039 check_fs = 1; 2091 check_fs = 1;
2040 } 2092 }
2041 2093
2042 /* virtual node requires now more memory */ 2094 /* virtual node requires now more memory */
2043 tb->vn_buf_size = size; 2095 tb->vn_buf_size = size;
2044 2096
2045 /* get memory for virtual item */ 2097 /* get memory for virtual item */
2046 buf = reiserfs_kmalloc(size, GFP_ATOMIC | __GFP_NOWARN, tb->tb_sb); 2098 buf =
2047 if ( ! buf ) { 2099 reiserfs_kmalloc(size, GFP_ATOMIC | __GFP_NOWARN,
2048 /* getting memory with GFP_KERNEL priority may involve 2100 tb->tb_sb);
2049 balancing now (due to indirect_to_direct conversion on 2101 if (!buf) {
2050 dcache shrinking). So, release path and collected 2102 /* getting memory with GFP_KERNEL priority may involve
2051 resources here */ 2103 balancing now (due to indirect_to_direct conversion on
2052 free_buffers_in_tb (tb); 2104 dcache shrinking). So, release path and collected
2053 buf = reiserfs_kmalloc(size, GFP_NOFS, tb->tb_sb); 2105 resources here */
2054 if ( !buf ) { 2106 free_buffers_in_tb(tb);
2107 buf = reiserfs_kmalloc(size, GFP_NOFS, tb->tb_sb);
2108 if (!buf) {
2055#ifdef CONFIG_REISERFS_CHECK 2109#ifdef CONFIG_REISERFS_CHECK
2056 reiserfs_warning (tb->tb_sb, 2110 reiserfs_warning(tb->tb_sb,
2057 "vs-8345: get_mem_for_virtual_node: " 2111 "vs-8345: get_mem_for_virtual_node: "
2058 "kmalloc failed. reiserfs kmalloced %d bytes", 2112 "kmalloc failed. reiserfs kmalloced %d bytes",
2059 REISERFS_SB(tb->tb_sb)->s_kmallocs); 2113 REISERFS_SB(tb->tb_sb)->
2114 s_kmallocs);
2060#endif 2115#endif
2061 tb->vn_buf_size = 0; 2116 tb->vn_buf_size = 0;
2062 } 2117 }
2063 tb->vn_buf = buf; 2118 tb->vn_buf = buf;
2064 schedule() ; 2119 schedule();
2065 return REPEAT_SEARCH; 2120 return REPEAT_SEARCH;
2066 } 2121 }
2067 2122
2068 tb->vn_buf = buf; 2123 tb->vn_buf = buf;
2069 } 2124 }
2070 2125
2071 if ( check_fs && FILESYSTEM_CHANGED_TB (tb) ) 2126 if (check_fs && FILESYSTEM_CHANGED_TB(tb))
2072 return REPEAT_SEARCH; 2127 return REPEAT_SEARCH;
2073 2128
2074 return CARRY_ON; 2129 return CARRY_ON;
2075} 2130}
2076 2131
2077
2078#ifdef CONFIG_REISERFS_CHECK 2132#ifdef CONFIG_REISERFS_CHECK
2079static void tb_buffer_sanity_check (struct super_block * p_s_sb, 2133static void tb_buffer_sanity_check(struct super_block *p_s_sb,
2080 struct buffer_head * p_s_bh, 2134 struct buffer_head *p_s_bh,
2081 const char *descr, int level) { 2135 const char *descr, int level)
2082 if (p_s_bh) {
2083 if (atomic_read (&(p_s_bh->b_count)) <= 0) {
2084
2085 reiserfs_panic (p_s_sb, "jmacd-1: tb_buffer_sanity_check(): negative or zero reference counter for buffer %s[%d] (%b)\n", descr, level, p_s_bh);
2086 }
2087
2088 if ( ! buffer_uptodate (p_s_bh) ) {
2089 reiserfs_panic (p_s_sb, "jmacd-2: tb_buffer_sanity_check(): buffer is not up to date %s[%d] (%b)\n", descr, level, p_s_bh);
2090 }
2091
2092 if ( ! B_IS_IN_TREE (p_s_bh) ) {
2093 reiserfs_panic (p_s_sb, "jmacd-3: tb_buffer_sanity_check(): buffer is not in tree %s[%d] (%b)\n", descr, level, p_s_bh);
2094 }
2095
2096 if (p_s_bh->b_bdev != p_s_sb->s_bdev) {
2097 reiserfs_panic (p_s_sb, "jmacd-4: tb_buffer_sanity_check(): buffer has wrong device %s[%d] (%b)\n", descr, level, p_s_bh);
2098 }
2099
2100 if (p_s_bh->b_size != p_s_sb->s_blocksize) {
2101 reiserfs_panic (p_s_sb, "jmacd-5: tb_buffer_sanity_check(): buffer has wrong blocksize %s[%d] (%b)\n", descr, level, p_s_bh);
2102 }
2103
2104 if (p_s_bh->b_blocknr > SB_BLOCK_COUNT(p_s_sb)) {
2105 reiserfs_panic (p_s_sb, "jmacd-6: tb_buffer_sanity_check(): buffer block number too high %s[%d] (%b)\n", descr, level, p_s_bh);
2106 }
2107 }
2108}
2109#else
2110static void tb_buffer_sanity_check (struct super_block * p_s_sb,
2111 struct buffer_head * p_s_bh,
2112 const char *descr, int level)
2113{;}
2114#endif
2115
2116static int clear_all_dirty_bits(struct super_block *s,
2117 struct buffer_head *bh) {
2118 return reiserfs_prepare_for_journal(s, bh, 0) ;
2119}
2120
2121static int wait_tb_buffers_until_unlocked (struct tree_balance * p_s_tb)
2122{ 2136{
2123 struct buffer_head * locked; 2137 if (p_s_bh) {
2124#ifdef CONFIG_REISERFS_CHECK 2138 if (atomic_read(&(p_s_bh->b_count)) <= 0) {
2125 int repeat_counter = 0;
2126#endif
2127 int i;
2128 2139
2129 do { 2140 reiserfs_panic(p_s_sb,
2130 2141 "jmacd-1: tb_buffer_sanity_check(): negative or zero reference counter for buffer %s[%d] (%b)\n",
2131 locked = NULL; 2142 descr, level, p_s_bh);
2132
2133 for ( i = p_s_tb->tb_path->path_length; !locked && i > ILLEGAL_PATH_ELEMENT_OFFSET; i-- ) {
2134 if ( PATH_OFFSET_PBUFFER (p_s_tb->tb_path, i) ) {
2135 /* if I understand correctly, we can only be sure the last buffer
2136 ** in the path is in the tree --clm
2137 */
2138#ifdef CONFIG_REISERFS_CHECK
2139 if (PATH_PLAST_BUFFER(p_s_tb->tb_path) ==
2140 PATH_OFFSET_PBUFFER(p_s_tb->tb_path, i)) {
2141 tb_buffer_sanity_check (p_s_tb->tb_sb,
2142 PATH_OFFSET_PBUFFER (p_s_tb->tb_path, i),
2143 "S",
2144 p_s_tb->tb_path->path_length - i);
2145 } 2143 }
2146#endif
2147 if (!clear_all_dirty_bits(p_s_tb->tb_sb,
2148 PATH_OFFSET_PBUFFER (p_s_tb->tb_path, i)))
2149 {
2150 locked = PATH_OFFSET_PBUFFER (p_s_tb->tb_path, i);
2151 }
2152 }
2153 }
2154 2144
2155 for ( i = 0; !locked && i < MAX_HEIGHT && p_s_tb->insert_size[i]; i++ ) { 2145 if (!buffer_uptodate(p_s_bh)) {
2146 reiserfs_panic(p_s_sb,
2147 "jmacd-2: tb_buffer_sanity_check(): buffer is not up to date %s[%d] (%b)\n",
2148 descr, level, p_s_bh);
2149 }
2156 2150
2157 if (p_s_tb->lnum[i] ) { 2151 if (!B_IS_IN_TREE(p_s_bh)) {
2152 reiserfs_panic(p_s_sb,
2153 "jmacd-3: tb_buffer_sanity_check(): buffer is not in tree %s[%d] (%b)\n",
2154 descr, level, p_s_bh);
2155 }
2158 2156
2159 if ( p_s_tb->L[i] ) { 2157 if (p_s_bh->b_bdev != p_s_sb->s_bdev) {
2160 tb_buffer_sanity_check (p_s_tb->tb_sb, p_s_tb->L[i], "L", i); 2158 reiserfs_panic(p_s_sb,
2161 if (!clear_all_dirty_bits(p_s_tb->tb_sb, p_s_tb->L[i])) 2159 "jmacd-4: tb_buffer_sanity_check(): buffer has wrong device %s[%d] (%b)\n",
2162 locked = p_s_tb->L[i]; 2160 descr, level, p_s_bh);
2163 } 2161 }
2164 2162
2165 if ( !locked && p_s_tb->FL[i] ) { 2163 if (p_s_bh->b_size != p_s_sb->s_blocksize) {
2166 tb_buffer_sanity_check (p_s_tb->tb_sb, p_s_tb->FL[i], "FL", i); 2164 reiserfs_panic(p_s_sb,
2167 if (!clear_all_dirty_bits(p_s_tb->tb_sb, p_s_tb->FL[i])) 2165 "jmacd-5: tb_buffer_sanity_check(): buffer has wrong blocksize %s[%d] (%b)\n",
2168 locked = p_s_tb->FL[i]; 2166 descr, level, p_s_bh);
2169 } 2167 }
2170 2168
2171 if ( !locked && p_s_tb->CFL[i] ) { 2169 if (p_s_bh->b_blocknr > SB_BLOCK_COUNT(p_s_sb)) {
2172 tb_buffer_sanity_check (p_s_tb->tb_sb, p_s_tb->CFL[i], "CFL", i); 2170 reiserfs_panic(p_s_sb,
2173 if (!clear_all_dirty_bits(p_s_tb->tb_sb, p_s_tb->CFL[i])) 2171 "jmacd-6: tb_buffer_sanity_check(): buffer block number too high %s[%d] (%b)\n",
2174 locked = p_s_tb->CFL[i]; 2172 descr, level, p_s_bh);
2175 } 2173 }
2174 }
2175}
2176#else
2177static void tb_buffer_sanity_check(struct super_block *p_s_sb,
2178 struct buffer_head *p_s_bh,
2179 const char *descr, int level)
2180{;
2181}
2182#endif
2176 2183
2177 } 2184static int clear_all_dirty_bits(struct super_block *s, struct buffer_head *bh)
2185{
2186 return reiserfs_prepare_for_journal(s, bh, 0);
2187}
2178 2188
2179 if ( !locked && (p_s_tb->rnum[i]) ) { 2189static int wait_tb_buffers_until_unlocked(struct tree_balance *p_s_tb)
2190{
2191 struct buffer_head *locked;
2192#ifdef CONFIG_REISERFS_CHECK
2193 int repeat_counter = 0;
2194#endif
2195 int i;
2180 2196
2181 if ( p_s_tb->R[i] ) { 2197 do {
2182 tb_buffer_sanity_check (p_s_tb->tb_sb, p_s_tb->R[i], "R", i);
2183 if (!clear_all_dirty_bits(p_s_tb->tb_sb, p_s_tb->R[i]))
2184 locked = p_s_tb->R[i];
2185 }
2186 2198
2187 2199 locked = NULL;
2188 if ( !locked && p_s_tb->FR[i] ) { 2200
2189 tb_buffer_sanity_check (p_s_tb->tb_sb, p_s_tb->FR[i], "FR", i); 2201 for (i = p_s_tb->tb_path->path_length;
2190 if (!clear_all_dirty_bits(p_s_tb->tb_sb, p_s_tb->FR[i])) 2202 !locked && i > ILLEGAL_PATH_ELEMENT_OFFSET; i--) {
2191 locked = p_s_tb->FR[i]; 2203 if (PATH_OFFSET_PBUFFER(p_s_tb->tb_path, i)) {
2204 /* if I understand correctly, we can only be sure the last buffer
2205 ** in the path is in the tree --clm
2206 */
2207#ifdef CONFIG_REISERFS_CHECK
2208 if (PATH_PLAST_BUFFER(p_s_tb->tb_path) ==
2209 PATH_OFFSET_PBUFFER(p_s_tb->tb_path, i)) {
2210 tb_buffer_sanity_check(p_s_tb->tb_sb,
2211 PATH_OFFSET_PBUFFER
2212 (p_s_tb->tb_path,
2213 i), "S",
2214 p_s_tb->tb_path->
2215 path_length - i);
2216 }
2217#endif
2218 if (!clear_all_dirty_bits(p_s_tb->tb_sb,
2219 PATH_OFFSET_PBUFFER
2220 (p_s_tb->tb_path,
2221 i))) {
2222 locked =
2223 PATH_OFFSET_PBUFFER(p_s_tb->tb_path,
2224 i);
2225 }
2226 }
2192 } 2227 }
2193 2228
2194 if ( !locked && p_s_tb->CFR[i] ) { 2229 for (i = 0; !locked && i < MAX_HEIGHT && p_s_tb->insert_size[i];
2195 tb_buffer_sanity_check (p_s_tb->tb_sb, p_s_tb->CFR[i], "CFR", i); 2230 i++) {
2196 if (!clear_all_dirty_bits(p_s_tb->tb_sb, p_s_tb->CFR[i])) 2231
2197 locked = p_s_tb->CFR[i]; 2232 if (p_s_tb->lnum[i]) {
2233
2234 if (p_s_tb->L[i]) {
2235 tb_buffer_sanity_check(p_s_tb->tb_sb,
2236 p_s_tb->L[i],
2237 "L", i);
2238 if (!clear_all_dirty_bits
2239 (p_s_tb->tb_sb, p_s_tb->L[i]))
2240 locked = p_s_tb->L[i];
2241 }
2242
2243 if (!locked && p_s_tb->FL[i]) {
2244 tb_buffer_sanity_check(p_s_tb->tb_sb,
2245 p_s_tb->FL[i],
2246 "FL", i);
2247 if (!clear_all_dirty_bits
2248 (p_s_tb->tb_sb, p_s_tb->FL[i]))
2249 locked = p_s_tb->FL[i];
2250 }
2251
2252 if (!locked && p_s_tb->CFL[i]) {
2253 tb_buffer_sanity_check(p_s_tb->tb_sb,
2254 p_s_tb->CFL[i],
2255 "CFL", i);
2256 if (!clear_all_dirty_bits
2257 (p_s_tb->tb_sb, p_s_tb->CFL[i]))
2258 locked = p_s_tb->CFL[i];
2259 }
2260
2261 }
2262
2263 if (!locked && (p_s_tb->rnum[i])) {
2264
2265 if (p_s_tb->R[i]) {
2266 tb_buffer_sanity_check(p_s_tb->tb_sb,
2267 p_s_tb->R[i],
2268 "R", i);
2269 if (!clear_all_dirty_bits
2270 (p_s_tb->tb_sb, p_s_tb->R[i]))
2271 locked = p_s_tb->R[i];
2272 }
2273
2274 if (!locked && p_s_tb->FR[i]) {
2275 tb_buffer_sanity_check(p_s_tb->tb_sb,
2276 p_s_tb->FR[i],
2277 "FR", i);
2278 if (!clear_all_dirty_bits
2279 (p_s_tb->tb_sb, p_s_tb->FR[i]))
2280 locked = p_s_tb->FR[i];
2281 }
2282
2283 if (!locked && p_s_tb->CFR[i]) {
2284 tb_buffer_sanity_check(p_s_tb->tb_sb,
2285 p_s_tb->CFR[i],
2286 "CFR", i);
2287 if (!clear_all_dirty_bits
2288 (p_s_tb->tb_sb, p_s_tb->CFR[i]))
2289 locked = p_s_tb->CFR[i];
2290 }
2291 }
2292 }
2293 /* as far as I can tell, this is not required. The FEB list seems
2294 ** to be full of newly allocated nodes, which will never be locked,
2295 ** dirty, or anything else.
2296 ** To be safe, I'm putting in the checks and waits in. For the moment,
2297 ** they are needed to keep the code in journal.c from complaining
2298 ** about the buffer. That code is inside CONFIG_REISERFS_CHECK as well.
2299 ** --clm
2300 */
2301 for (i = 0; !locked && i < MAX_FEB_SIZE; i++) {
2302 if (p_s_tb->FEB[i]) {
2303 if (!clear_all_dirty_bits
2304 (p_s_tb->tb_sb, p_s_tb->FEB[i]))
2305 locked = p_s_tb->FEB[i];
2306 }
2198 } 2307 }
2199 }
2200 }
2201 /* as far as I can tell, this is not required. The FEB list seems
2202 ** to be full of newly allocated nodes, which will never be locked,
2203 ** dirty, or anything else.
2204 ** To be safe, I'm putting in the checks and waits in. For the moment,
2205 ** they are needed to keep the code in journal.c from complaining
2206 ** about the buffer. That code is inside CONFIG_REISERFS_CHECK as well.
2207 ** --clm
2208 */
2209 for ( i = 0; !locked && i < MAX_FEB_SIZE; i++ ) {
2210 if ( p_s_tb->FEB[i] ) {
2211 if (!clear_all_dirty_bits(p_s_tb->tb_sb, p_s_tb->FEB[i]))
2212 locked = p_s_tb->FEB[i] ;
2213 }
2214 }
2215 2308
2216 if (locked) { 2309 if (locked) {
2217#ifdef CONFIG_REISERFS_CHECK 2310#ifdef CONFIG_REISERFS_CHECK
2218 repeat_counter++; 2311 repeat_counter++;
2219 if ( (repeat_counter % 10000) == 0) { 2312 if ((repeat_counter % 10000) == 0) {
2220 reiserfs_warning (p_s_tb->tb_sb, 2313 reiserfs_warning(p_s_tb->tb_sb,
2221 "wait_tb_buffers_until_released(): too many " 2314 "wait_tb_buffers_until_released(): too many "
2222 "iterations waiting for buffer to unlock " 2315 "iterations waiting for buffer to unlock "
2223 "(%b)", locked); 2316 "(%b)", locked);
2224 2317
2225 /* Don't loop forever. Try to recover from possible error. */ 2318 /* Don't loop forever. Try to recover from possible error. */
2226 2319
2227 return ( FILESYSTEM_CHANGED_TB (p_s_tb) ) ? REPEAT_SEARCH : CARRY_ON; 2320 return (FILESYSTEM_CHANGED_TB(p_s_tb)) ?
2228 } 2321 REPEAT_SEARCH : CARRY_ON;
2322 }
2229#endif 2323#endif
2230 __wait_on_buffer (locked); 2324 __wait_on_buffer(locked);
2231 if ( FILESYSTEM_CHANGED_TB (p_s_tb) ) { 2325 if (FILESYSTEM_CHANGED_TB(p_s_tb)) {
2232 return REPEAT_SEARCH; 2326 return REPEAT_SEARCH;
2233 } 2327 }
2234 } 2328 }
2235 2329
2236 } while (locked); 2330 } while (locked);
2237 2331
2238 return CARRY_ON; 2332 return CARRY_ON;
2239} 2333}
2240 2334
2241
2242/* Prepare for balancing, that is 2335/* Prepare for balancing, that is
2243 * get all necessary parents, and neighbors; 2336 * get all necessary parents, and neighbors;
2244 * analyze what and where should be moved; 2337 * analyze what and where should be moved;
@@ -2267,252 +2360,266 @@ static int wait_tb_buffers_until_unlocked (struct tree_balance * p_s_tb)
2267 * -1 - if no_disk_space 2360 * -1 - if no_disk_space
2268 */ 2361 */
2269 2362
2363int fix_nodes(int n_op_mode, struct tree_balance *p_s_tb, struct item_head *p_s_ins_ih, // item head of item being inserted
2364 const void *data // inserted item or data to be pasted
2365 )
2366{
2367 int n_ret_value, n_h, n_item_num = PATH_LAST_POSITION(p_s_tb->tb_path);
2368 int n_pos_in_item;
2270 2369
2271int fix_nodes (int n_op_mode, 2370 /* we set wait_tb_buffers_run when we have to restore any dirty bits cleared
2272 struct tree_balance * p_s_tb, 2371 ** during wait_tb_buffers_run
2273 struct item_head * p_s_ins_ih, // item head of item being inserted 2372 */
2274 const void * data // inserted item or data to be pasted 2373 int wait_tb_buffers_run = 0;
2275 ) { 2374 struct buffer_head *p_s_tbS0 = PATH_PLAST_BUFFER(p_s_tb->tb_path);
2276 int n_ret_value,
2277 n_h,
2278 n_item_num = PATH_LAST_POSITION(p_s_tb->tb_path);
2279 int n_pos_in_item;
2280
2281 /* we set wait_tb_buffers_run when we have to restore any dirty bits cleared
2282 ** during wait_tb_buffers_run
2283 */
2284 int wait_tb_buffers_run = 0 ;
2285 struct buffer_head * p_s_tbS0 = PATH_PLAST_BUFFER(p_s_tb->tb_path);
2286
2287 ++ REISERFS_SB(p_s_tb -> tb_sb) -> s_fix_nodes;
2288
2289 n_pos_in_item = p_s_tb->tb_path->pos_in_item;
2290
2291
2292 p_s_tb->fs_gen = get_generation (p_s_tb->tb_sb);
2293
2294 /* we prepare and log the super here so it will already be in the
2295 ** transaction when do_balance needs to change it.
2296 ** This way do_balance won't have to schedule when trying to prepare
2297 ** the super for logging
2298 */
2299 reiserfs_prepare_for_journal(p_s_tb->tb_sb,
2300 SB_BUFFER_WITH_SB(p_s_tb->tb_sb), 1) ;
2301 journal_mark_dirty(p_s_tb->transaction_handle, p_s_tb->tb_sb,
2302 SB_BUFFER_WITH_SB(p_s_tb->tb_sb)) ;
2303 if ( FILESYSTEM_CHANGED_TB (p_s_tb) )
2304 return REPEAT_SEARCH;
2305
2306 /* if it possible in indirect_to_direct conversion */
2307 if (buffer_locked (p_s_tbS0)) {
2308 __wait_on_buffer (p_s_tbS0);
2309 if ( FILESYSTEM_CHANGED_TB (p_s_tb) )
2310 return REPEAT_SEARCH;
2311 }
2312 2375
2313#ifdef CONFIG_REISERFS_CHECK 2376 ++REISERFS_SB(p_s_tb->tb_sb)->s_fix_nodes;
2314 if ( cur_tb ) { 2377
2315 print_cur_tb ("fix_nodes"); 2378 n_pos_in_item = p_s_tb->tb_path->pos_in_item;
2316 reiserfs_panic(p_s_tb->tb_sb,"PAP-8305: fix_nodes: there is pending do_balance"); 2379
2317 } 2380 p_s_tb->fs_gen = get_generation(p_s_tb->tb_sb);
2318
2319 if (!buffer_uptodate (p_s_tbS0) || !B_IS_IN_TREE (p_s_tbS0)) {
2320 reiserfs_panic (p_s_tb->tb_sb, "PAP-8320: fix_nodes: S[0] (%b %z) is not uptodate "
2321 "at the beginning of fix_nodes or not in tree (mode %c)", p_s_tbS0, p_s_tbS0, n_op_mode);
2322 }
2323
2324 /* Check parameters. */
2325 switch (n_op_mode) {
2326 case M_INSERT:
2327 if ( n_item_num <= 0 || n_item_num > B_NR_ITEMS(p_s_tbS0) )
2328 reiserfs_panic(p_s_tb->tb_sb,"PAP-8330: fix_nodes: Incorrect item number %d (in S0 - %d) in case of insert",
2329 n_item_num, B_NR_ITEMS(p_s_tbS0));
2330 break;
2331 case M_PASTE:
2332 case M_DELETE:
2333 case M_CUT:
2334 if ( n_item_num < 0 || n_item_num >= B_NR_ITEMS(p_s_tbS0) ) {
2335 print_block (p_s_tbS0, 0, -1, -1);
2336 reiserfs_panic(p_s_tb->tb_sb,"PAP-8335: fix_nodes: Incorrect item number(%d); mode = %c insert_size = %d\n", n_item_num, n_op_mode, p_s_tb->insert_size[0]);
2337 }
2338 break;
2339 default:
2340 reiserfs_panic(p_s_tb->tb_sb,"PAP-8340: fix_nodes: Incorrect mode of operation");
2341 }
2342#endif
2343 2381
2344 if (get_mem_for_virtual_node (p_s_tb) == REPEAT_SEARCH) 2382 /* we prepare and log the super here so it will already be in the
2345 // FIXME: maybe -ENOMEM when tb->vn_buf == 0? Now just repeat 2383 ** transaction when do_balance needs to change it.
2346 return REPEAT_SEARCH; 2384 ** This way do_balance won't have to schedule when trying to prepare
2385 ** the super for logging
2386 */
2387 reiserfs_prepare_for_journal(p_s_tb->tb_sb,
2388 SB_BUFFER_WITH_SB(p_s_tb->tb_sb), 1);
2389 journal_mark_dirty(p_s_tb->transaction_handle, p_s_tb->tb_sb,
2390 SB_BUFFER_WITH_SB(p_s_tb->tb_sb));
2391 if (FILESYSTEM_CHANGED_TB(p_s_tb))
2392 return REPEAT_SEARCH;
2347 2393
2394 /* if it possible in indirect_to_direct conversion */
2395 if (buffer_locked(p_s_tbS0)) {
2396 __wait_on_buffer(p_s_tbS0);
2397 if (FILESYSTEM_CHANGED_TB(p_s_tb))
2398 return REPEAT_SEARCH;
2399 }
2400#ifdef CONFIG_REISERFS_CHECK
2401 if (cur_tb) {
2402 print_cur_tb("fix_nodes");
2403 reiserfs_panic(p_s_tb->tb_sb,
2404 "PAP-8305: fix_nodes: there is pending do_balance");
2405 }
2348 2406
2349 /* Starting from the leaf level; for all levels n_h of the tree. */ 2407 if (!buffer_uptodate(p_s_tbS0) || !B_IS_IN_TREE(p_s_tbS0)) {
2350 for ( n_h = 0; n_h < MAX_HEIGHT && p_s_tb->insert_size[n_h]; n_h++ ) { 2408 reiserfs_panic(p_s_tb->tb_sb,
2351 if ( (n_ret_value = get_direct_parent(p_s_tb, n_h)) != CARRY_ON ) { 2409 "PAP-8320: fix_nodes: S[0] (%b %z) is not uptodate "
2352 goto repeat; 2410 "at the beginning of fix_nodes or not in tree (mode %c)",
2411 p_s_tbS0, p_s_tbS0, n_op_mode);
2353 } 2412 }
2354 2413
2355 if ( (n_ret_value = check_balance (n_op_mode, p_s_tb, n_h, n_item_num, 2414 /* Check parameters. */
2356 n_pos_in_item, p_s_ins_ih, data)) != CARRY_ON ) { 2415 switch (n_op_mode) {
2357 if ( n_ret_value == NO_BALANCING_NEEDED ) { 2416 case M_INSERT:
2358 /* No balancing for higher levels needed. */ 2417 if (n_item_num <= 0 || n_item_num > B_NR_ITEMS(p_s_tbS0))
2359 if ( (n_ret_value = get_neighbors(p_s_tb, n_h)) != CARRY_ON ) { 2418 reiserfs_panic(p_s_tb->tb_sb,
2360 goto repeat; 2419 "PAP-8330: fix_nodes: Incorrect item number %d (in S0 - %d) in case of insert",
2420 n_item_num, B_NR_ITEMS(p_s_tbS0));
2421 break;
2422 case M_PASTE:
2423 case M_DELETE:
2424 case M_CUT:
2425 if (n_item_num < 0 || n_item_num >= B_NR_ITEMS(p_s_tbS0)) {
2426 print_block(p_s_tbS0, 0, -1, -1);
2427 reiserfs_panic(p_s_tb->tb_sb,
2428 "PAP-8335: fix_nodes: Incorrect item number(%d); mode = %c insert_size = %d\n",
2429 n_item_num, n_op_mode,
2430 p_s_tb->insert_size[0]);
2361 } 2431 }
2362 if ( n_h != MAX_HEIGHT - 1 )
2363 p_s_tb->insert_size[n_h + 1] = 0;
2364 /* ok, analysis and resource gathering are complete */
2365 break; 2432 break;
2366 } 2433 default:
2367 goto repeat; 2434 reiserfs_panic(p_s_tb->tb_sb,
2435 "PAP-8340: fix_nodes: Incorrect mode of operation");
2368 } 2436 }
2437#endif
2369 2438
2370 if ( (n_ret_value = get_neighbors(p_s_tb, n_h)) != CARRY_ON ) { 2439 if (get_mem_for_virtual_node(p_s_tb) == REPEAT_SEARCH)
2371 goto repeat; 2440 // FIXME: maybe -ENOMEM when tb->vn_buf == 0? Now just repeat
2372 } 2441 return REPEAT_SEARCH;
2373 2442
2374 if ( (n_ret_value = get_empty_nodes(p_s_tb, n_h)) != CARRY_ON ) { 2443 /* Starting from the leaf level; for all levels n_h of the tree. */
2375 goto repeat; /* No disk space, or schedule occurred and 2444 for (n_h = 0; n_h < MAX_HEIGHT && p_s_tb->insert_size[n_h]; n_h++) {
2376 analysis may be invalid and needs to be redone. */ 2445 if ((n_ret_value = get_direct_parent(p_s_tb, n_h)) != CARRY_ON) {
2377 } 2446 goto repeat;
2378 2447 }
2379 if ( ! PATH_H_PBUFFER(p_s_tb->tb_path, n_h) ) {
2380 /* We have a positive insert size but no nodes exist on this
2381 level, this means that we are creating a new root. */
2382 2448
2383 RFALSE( p_s_tb->blknum[n_h] != 1, 2449 if ((n_ret_value =
2384 "PAP-8350: creating new empty root"); 2450 check_balance(n_op_mode, p_s_tb, n_h, n_item_num,
2451 n_pos_in_item, p_s_ins_ih,
2452 data)) != CARRY_ON) {
2453 if (n_ret_value == NO_BALANCING_NEEDED) {
2454 /* No balancing for higher levels needed. */
2455 if ((n_ret_value =
2456 get_neighbors(p_s_tb, n_h)) != CARRY_ON) {
2457 goto repeat;
2458 }
2459 if (n_h != MAX_HEIGHT - 1)
2460 p_s_tb->insert_size[n_h + 1] = 0;
2461 /* ok, analysis and resource gathering are complete */
2462 break;
2463 }
2464 goto repeat;
2465 }
2385 2466
2386 if ( n_h < MAX_HEIGHT - 1 ) 2467 if ((n_ret_value = get_neighbors(p_s_tb, n_h)) != CARRY_ON) {
2387 p_s_tb->insert_size[n_h + 1] = 0; 2468 goto repeat;
2388 }
2389 else
2390 if ( ! PATH_H_PBUFFER(p_s_tb->tb_path, n_h + 1) ) {
2391 if ( p_s_tb->blknum[n_h] > 1 ) {
2392 /* The tree needs to be grown, so this node S[n_h]
2393 which is the root node is split into two nodes,
2394 and a new node (S[n_h+1]) will be created to
2395 become the root node. */
2396
2397 RFALSE( n_h == MAX_HEIGHT - 1,
2398 "PAP-8355: attempt to create too high of a tree");
2399
2400 p_s_tb->insert_size[n_h + 1] = (DC_SIZE + KEY_SIZE) * (p_s_tb->blknum[n_h] - 1) + DC_SIZE;
2401 } 2469 }
2402 else 2470
2403 if ( n_h < MAX_HEIGHT - 1 ) 2471 if ((n_ret_value = get_empty_nodes(p_s_tb, n_h)) != CARRY_ON) {
2404 p_s_tb->insert_size[n_h + 1] = 0; 2472 goto repeat; /* No disk space, or schedule occurred and
2405 } 2473 analysis may be invalid and needs to be redone. */
2406 else 2474 }
2407 p_s_tb->insert_size[n_h + 1] = (DC_SIZE + KEY_SIZE) * (p_s_tb->blknum[n_h] - 1); 2475
2408 } 2476 if (!PATH_H_PBUFFER(p_s_tb->tb_path, n_h)) {
2409 2477 /* We have a positive insert size but no nodes exist on this
2410 if ((n_ret_value = wait_tb_buffers_until_unlocked (p_s_tb)) == CARRY_ON) { 2478 level, this means that we are creating a new root. */
2411 if (FILESYSTEM_CHANGED_TB(p_s_tb)) { 2479
2412 wait_tb_buffers_run = 1 ; 2480 RFALSE(p_s_tb->blknum[n_h] != 1,
2413 n_ret_value = REPEAT_SEARCH ; 2481 "PAP-8350: creating new empty root");
2414 goto repeat; 2482
2415 } else { 2483 if (n_h < MAX_HEIGHT - 1)
2416 return CARRY_ON; 2484 p_s_tb->insert_size[n_h + 1] = 0;
2485 } else if (!PATH_H_PBUFFER(p_s_tb->tb_path, n_h + 1)) {
2486 if (p_s_tb->blknum[n_h] > 1) {
2487 /* The tree needs to be grown, so this node S[n_h]
2488 which is the root node is split into two nodes,
2489 and a new node (S[n_h+1]) will be created to
2490 become the root node. */
2491
2492 RFALSE(n_h == MAX_HEIGHT - 1,
2493 "PAP-8355: attempt to create too high of a tree");
2494
2495 p_s_tb->insert_size[n_h + 1] =
2496 (DC_SIZE +
2497 KEY_SIZE) * (p_s_tb->blknum[n_h] - 1) +
2498 DC_SIZE;
2499 } else if (n_h < MAX_HEIGHT - 1)
2500 p_s_tb->insert_size[n_h + 1] = 0;
2501 } else
2502 p_s_tb->insert_size[n_h + 1] =
2503 (DC_SIZE + KEY_SIZE) * (p_s_tb->blknum[n_h] - 1);
2417 } 2504 }
2418 } else {
2419 wait_tb_buffers_run = 1 ;
2420 goto repeat;
2421 }
2422
2423 repeat:
2424 // fix_nodes was unable to perform its calculation due to
2425 // filesystem got changed under us, lack of free disk space or i/o
2426 // failure. If the first is the case - the search will be
2427 // repeated. For now - free all resources acquired so far except
2428 // for the new allocated nodes
2429 {
2430 int i;
2431 2505
2432 /* Release path buffers. */ 2506 if ((n_ret_value = wait_tb_buffers_until_unlocked(p_s_tb)) == CARRY_ON) {
2433 if (wait_tb_buffers_run) { 2507 if (FILESYSTEM_CHANGED_TB(p_s_tb)) {
2434 pathrelse_and_restore(p_s_tb->tb_sb, p_s_tb->tb_path) ; 2508 wait_tb_buffers_run = 1;
2509 n_ret_value = REPEAT_SEARCH;
2510 goto repeat;
2511 } else {
2512 return CARRY_ON;
2513 }
2435 } else { 2514 } else {
2436 pathrelse (p_s_tb->tb_path); 2515 wait_tb_buffers_run = 1;
2437 } 2516 goto repeat;
2438 /* brelse all resources collected for balancing */
2439 for ( i = 0; i < MAX_HEIGHT; i++ ) {
2440 if (wait_tb_buffers_run) {
2441 reiserfs_restore_prepared_buffer(p_s_tb->tb_sb, p_s_tb->L[i]);
2442 reiserfs_restore_prepared_buffer(p_s_tb->tb_sb, p_s_tb->R[i]);
2443 reiserfs_restore_prepared_buffer(p_s_tb->tb_sb, p_s_tb->FL[i]);
2444 reiserfs_restore_prepared_buffer(p_s_tb->tb_sb, p_s_tb->FR[i]);
2445 reiserfs_restore_prepared_buffer(p_s_tb->tb_sb, p_s_tb->CFL[i]);
2446 reiserfs_restore_prepared_buffer(p_s_tb->tb_sb, p_s_tb->CFR[i]);
2447 }
2448
2449 brelse (p_s_tb->L[i]);p_s_tb->L[i] = NULL;
2450 brelse (p_s_tb->R[i]);p_s_tb->R[i] = NULL;
2451 brelse (p_s_tb->FL[i]);p_s_tb->FL[i] = NULL;
2452 brelse (p_s_tb->FR[i]);p_s_tb->FR[i] = NULL;
2453 brelse (p_s_tb->CFL[i]);p_s_tb->CFL[i] = NULL;
2454 brelse (p_s_tb->CFR[i]);p_s_tb->CFR[i] = NULL;
2455 } 2517 }
2456 2518
2457 if (wait_tb_buffers_run) { 2519 repeat:
2458 for ( i = 0; i < MAX_FEB_SIZE; i++ ) { 2520 // fix_nodes was unable to perform its calculation due to
2459 if ( p_s_tb->FEB[i] ) { 2521 // filesystem got changed under us, lack of free disk space or i/o
2460 reiserfs_restore_prepared_buffer(p_s_tb->tb_sb, 2522 // failure. If the first is the case - the search will be
2461 p_s_tb->FEB[i]) ; 2523 // repeated. For now - free all resources acquired so far except
2524 // for the new allocated nodes
2525 {
2526 int i;
2527
2528 /* Release path buffers. */
2529 if (wait_tb_buffers_run) {
2530 pathrelse_and_restore(p_s_tb->tb_sb, p_s_tb->tb_path);
2531 } else {
2532 pathrelse(p_s_tb->tb_path);
2533 }
2534 /* brelse all resources collected for balancing */
2535 for (i = 0; i < MAX_HEIGHT; i++) {
2536 if (wait_tb_buffers_run) {
2537 reiserfs_restore_prepared_buffer(p_s_tb->tb_sb,
2538 p_s_tb->L[i]);
2539 reiserfs_restore_prepared_buffer(p_s_tb->tb_sb,
2540 p_s_tb->R[i]);
2541 reiserfs_restore_prepared_buffer(p_s_tb->tb_sb,
2542 p_s_tb->FL[i]);
2543 reiserfs_restore_prepared_buffer(p_s_tb->tb_sb,
2544 p_s_tb->FR[i]);
2545 reiserfs_restore_prepared_buffer(p_s_tb->tb_sb,
2546 p_s_tb->
2547 CFL[i]);
2548 reiserfs_restore_prepared_buffer(p_s_tb->tb_sb,
2549 p_s_tb->
2550 CFR[i]);
2551 }
2552
2553 brelse(p_s_tb->L[i]);
2554 p_s_tb->L[i] = NULL;
2555 brelse(p_s_tb->R[i]);
2556 p_s_tb->R[i] = NULL;
2557 brelse(p_s_tb->FL[i]);
2558 p_s_tb->FL[i] = NULL;
2559 brelse(p_s_tb->FR[i]);
2560 p_s_tb->FR[i] = NULL;
2561 brelse(p_s_tb->CFL[i]);
2562 p_s_tb->CFL[i] = NULL;
2563 brelse(p_s_tb->CFR[i]);
2564 p_s_tb->CFR[i] = NULL;
2565 }
2566
2567 if (wait_tb_buffers_run) {
2568 for (i = 0; i < MAX_FEB_SIZE; i++) {
2569 if (p_s_tb->FEB[i]) {
2570 reiserfs_restore_prepared_buffer
2571 (p_s_tb->tb_sb, p_s_tb->FEB[i]);
2572 }
2573 }
2462 } 2574 }
2463 } 2575 return n_ret_value;
2464 } 2576 }
2465 return n_ret_value;
2466 }
2467 2577
2468} 2578}
2469 2579
2470
2471/* Anatoly will probably forgive me renaming p_s_tb to tb. I just 2580/* Anatoly will probably forgive me renaming p_s_tb to tb. I just
2472 wanted to make lines shorter */ 2581 wanted to make lines shorter */
2473void unfix_nodes (struct tree_balance * tb) 2582void unfix_nodes(struct tree_balance *tb)
2474{ 2583{
2475 int i; 2584 int i;
2476
2477 /* Release path buffers. */
2478 pathrelse_and_restore (tb->tb_sb, tb->tb_path);
2479
2480 /* brelse all resources collected for balancing */
2481 for ( i = 0; i < MAX_HEIGHT; i++ ) {
2482 reiserfs_restore_prepared_buffer (tb->tb_sb, tb->L[i]);
2483 reiserfs_restore_prepared_buffer (tb->tb_sb, tb->R[i]);
2484 reiserfs_restore_prepared_buffer (tb->tb_sb, tb->FL[i]);
2485 reiserfs_restore_prepared_buffer (tb->tb_sb, tb->FR[i]);
2486 reiserfs_restore_prepared_buffer (tb->tb_sb, tb->CFL[i]);
2487 reiserfs_restore_prepared_buffer (tb->tb_sb, tb->CFR[i]);
2488
2489 brelse (tb->L[i]);
2490 brelse (tb->R[i]);
2491 brelse (tb->FL[i]);
2492 brelse (tb->FR[i]);
2493 brelse (tb->CFL[i]);
2494 brelse (tb->CFR[i]);
2495 }
2496
2497 /* deal with list of allocated (used and unused) nodes */
2498 for ( i = 0; i < MAX_FEB_SIZE; i++ ) {
2499 if ( tb->FEB[i] ) {
2500 b_blocknr_t blocknr = tb->FEB[i]->b_blocknr ;
2501 /* de-allocated block which was not used by balancing and
2502 bforget about buffer for it */
2503 brelse (tb->FEB[i]);
2504 reiserfs_free_block (tb->transaction_handle, NULL, blocknr, 0);
2505 }
2506 if (tb->used[i]) {
2507 /* release used as new nodes including a new root */
2508 brelse (tb->used[i]);
2509 }
2510 }
2511 2585
2512 if (tb->vn_buf) 2586 /* Release path buffers. */
2513 reiserfs_kfree (tb->vn_buf, tb->vn_buf_size, tb->tb_sb); 2587 pathrelse_and_restore(tb->tb_sb, tb->tb_path);
2514 2588
2515} 2589 /* brelse all resources collected for balancing */
2590 for (i = 0; i < MAX_HEIGHT; i++) {
2591 reiserfs_restore_prepared_buffer(tb->tb_sb, tb->L[i]);
2592 reiserfs_restore_prepared_buffer(tb->tb_sb, tb->R[i]);
2593 reiserfs_restore_prepared_buffer(tb->tb_sb, tb->FL[i]);
2594 reiserfs_restore_prepared_buffer(tb->tb_sb, tb->FR[i]);
2595 reiserfs_restore_prepared_buffer(tb->tb_sb, tb->CFL[i]);
2596 reiserfs_restore_prepared_buffer(tb->tb_sb, tb->CFR[i]);
2597
2598 brelse(tb->L[i]);
2599 brelse(tb->R[i]);
2600 brelse(tb->FL[i]);
2601 brelse(tb->FR[i]);
2602 brelse(tb->CFL[i]);
2603 brelse(tb->CFR[i]);
2604 }
2516 2605
2606 /* deal with list of allocated (used and unused) nodes */
2607 for (i = 0; i < MAX_FEB_SIZE; i++) {
2608 if (tb->FEB[i]) {
2609 b_blocknr_t blocknr = tb->FEB[i]->b_blocknr;
2610 /* de-allocated block which was not used by balancing and
2611 bforget about buffer for it */
2612 brelse(tb->FEB[i]);
2613 reiserfs_free_block(tb->transaction_handle, NULL,
2614 blocknr, 0);
2615 }
2616 if (tb->used[i]) {
2617 /* release used as new nodes including a new root */
2618 brelse(tb->used[i]);
2619 }
2620 }
2517 2621
2622 if (tb->vn_buf)
2623 reiserfs_kfree(tb->vn_buf, tb->vn_buf_size, tb->tb_sb);
2518 2624
2625}
diff --git a/fs/reiserfs/hashes.c b/fs/reiserfs/hashes.c
index 08d0508c2d3..37c1306eb9b 100644
--- a/fs/reiserfs/hashes.c
+++ b/fs/reiserfs/hashes.c
@@ -22,7 +22,6 @@
22#include <asm/types.h> 22#include <asm/types.h>
23#include <asm/bug.h> 23#include <asm/bug.h>
24 24
25
26#define DELTA 0x9E3779B9 25#define DELTA 0x9E3779B9
27#define FULLROUNDS 10 /* 32 is overkill, 16 is strong crypto */ 26#define FULLROUNDS 10 /* 32 is overkill, 16 is strong crypto */
28#define PARTROUNDS 6 /* 6 gets complete mixing */ 27#define PARTROUNDS 6 /* 6 gets complete mixing */
@@ -48,105 +47,75 @@
48 h1 += b1; \ 47 h1 += b1; \
49 } while(0) 48 } while(0)
50 49
51
52u32 keyed_hash(const signed char *msg, int len) 50u32 keyed_hash(const signed char *msg, int len)
53{ 51{
54 u32 k[] = { 0x9464a485, 0x542e1a94, 0x3e846bff, 0xb75bcfc3}; 52 u32 k[] = { 0x9464a485, 0x542e1a94, 0x3e846bff, 0xb75bcfc3 };
55 53
56 u32 h0 = k[0], h1 = k[1]; 54 u32 h0 = k[0], h1 = k[1];
57 u32 a, b, c, d; 55 u32 a, b, c, d;
58 u32 pad; 56 u32 pad;
59 int i; 57 int i;
60
61 // assert(len >= 0 && len < 256);
62 58
63 pad = (u32)len | ((u32)len << 8); 59 // assert(len >= 0 && len < 256);
60
61 pad = (u32) len | ((u32) len << 8);
64 pad |= pad << 16; 62 pad |= pad << 16;
65 63
66 while(len >= 16) 64 while (len >= 16) {
67 { 65 a = (u32) msg[0] |
68 a = (u32)msg[ 0] | 66 (u32) msg[1] << 8 | (u32) msg[2] << 16 | (u32) msg[3] << 24;
69 (u32)msg[ 1] << 8 | 67 b = (u32) msg[4] |
70 (u32)msg[ 2] << 16| 68 (u32) msg[5] << 8 | (u32) msg[6] << 16 | (u32) msg[7] << 24;
71 (u32)msg[ 3] << 24; 69 c = (u32) msg[8] |
72 b = (u32)msg[ 4] | 70 (u32) msg[9] << 8 |
73 (u32)msg[ 5] << 8 | 71 (u32) msg[10] << 16 | (u32) msg[11] << 24;
74 (u32)msg[ 6] << 16| 72 d = (u32) msg[12] |
75 (u32)msg[ 7] << 24; 73 (u32) msg[13] << 8 |
76 c = (u32)msg[ 8] | 74 (u32) msg[14] << 16 | (u32) msg[15] << 24;
77 (u32)msg[ 9] << 8 | 75
78 (u32)msg[10] << 16|
79 (u32)msg[11] << 24;
80 d = (u32)msg[12] |
81 (u32)msg[13] << 8 |
82 (u32)msg[14] << 16|
83 (u32)msg[15] << 24;
84
85 TEACORE(PARTROUNDS); 76 TEACORE(PARTROUNDS);
86 77
87 len -= 16; 78 len -= 16;
88 msg += 16; 79 msg += 16;
89 } 80 }
90 81
91 if (len >= 12) 82 if (len >= 12) {
92 { 83 a = (u32) msg[0] |
93 a = (u32)msg[ 0] | 84 (u32) msg[1] << 8 | (u32) msg[2] << 16 | (u32) msg[3] << 24;
94 (u32)msg[ 1] << 8 | 85 b = (u32) msg[4] |
95 (u32)msg[ 2] << 16| 86 (u32) msg[5] << 8 | (u32) msg[6] << 16 | (u32) msg[7] << 24;
96 (u32)msg[ 3] << 24; 87 c = (u32) msg[8] |
97 b = (u32)msg[ 4] | 88 (u32) msg[9] << 8 |
98 (u32)msg[ 5] << 8 | 89 (u32) msg[10] << 16 | (u32) msg[11] << 24;
99 (u32)msg[ 6] << 16|
100 (u32)msg[ 7] << 24;
101 c = (u32)msg[ 8] |
102 (u32)msg[ 9] << 8 |
103 (u32)msg[10] << 16|
104 (u32)msg[11] << 24;
105 90
106 d = pad; 91 d = pad;
107 for(i = 12; i < len; i++) 92 for (i = 12; i < len; i++) {
108 {
109 d <<= 8; 93 d <<= 8;
110 d |= msg[i]; 94 d |= msg[i];
111 } 95 }
112 } 96 } else if (len >= 8) {
113 else if (len >= 8) 97 a = (u32) msg[0] |
114 { 98 (u32) msg[1] << 8 | (u32) msg[2] << 16 | (u32) msg[3] << 24;
115 a = (u32)msg[ 0] | 99 b = (u32) msg[4] |
116 (u32)msg[ 1] << 8 | 100 (u32) msg[5] << 8 | (u32) msg[6] << 16 | (u32) msg[7] << 24;
117 (u32)msg[ 2] << 16|
118 (u32)msg[ 3] << 24;
119 b = (u32)msg[ 4] |
120 (u32)msg[ 5] << 8 |
121 (u32)msg[ 6] << 16|
122 (u32)msg[ 7] << 24;
123 101
124 c = d = pad; 102 c = d = pad;
125 for(i = 8; i < len; i++) 103 for (i = 8; i < len; i++) {
126 {
127 c <<= 8; 104 c <<= 8;
128 c |= msg[i]; 105 c |= msg[i];
129 } 106 }
130 } 107 } else if (len >= 4) {
131 else if (len >= 4) 108 a = (u32) msg[0] |
132 { 109 (u32) msg[1] << 8 | (u32) msg[2] << 16 | (u32) msg[3] << 24;
133 a = (u32)msg[ 0] |
134 (u32)msg[ 1] << 8 |
135 (u32)msg[ 2] << 16|
136 (u32)msg[ 3] << 24;
137 110
138 b = c = d = pad; 111 b = c = d = pad;
139 for(i = 4; i < len; i++) 112 for (i = 4; i < len; i++) {
140 {
141 b <<= 8; 113 b <<= 8;
142 b |= msg[i]; 114 b |= msg[i];
143 } 115 }
144 } 116 } else {
145 else
146 {
147 a = b = c = d = pad; 117 a = b = c = d = pad;
148 for(i = 0; i < len; i++) 118 for (i = 0; i < len; i++) {
149 {
150 a <<= 8; 119 a <<= 8;
151 a |= msg[i]; 120 a |= msg[i];
152 } 121 }
@@ -155,55 +124,59 @@ u32 keyed_hash(const signed char *msg, int len)
155 TEACORE(FULLROUNDS); 124 TEACORE(FULLROUNDS);
156 125
157/* return 0;*/ 126/* return 0;*/
158 return h0^h1; 127 return h0 ^ h1;
159} 128}
160 129
161/* What follows in this file is copyright 2000 by Hans Reiser, and the 130/* What follows in this file is copyright 2000 by Hans Reiser, and the
162 * licensing of what follows is governed by reiserfs/README */ 131 * licensing of what follows is governed by reiserfs/README */
163 132
164u32 yura_hash (const signed char *msg, int len) 133u32 yura_hash(const signed char *msg, int len)
165{ 134{
166 int j, pow; 135 int j, pow;
167 u32 a, c; 136 u32 a, c;
168 int i; 137 int i;
169 138
170 for (pow=1,i=1; i < len; i++) pow = pow * 10; 139 for (pow = 1, i = 1; i < len; i++)
171 140 pow = pow * 10;
172 if (len == 1) 141
173 a = msg[0]-48; 142 if (len == 1)
174 else 143 a = msg[0] - 48;
175 a = (msg[0] - 48) * pow; 144 else
176 145 a = (msg[0] - 48) * pow;
177 for (i=1; i < len; i++) { 146
178 c = msg[i] - 48; 147 for (i = 1; i < len; i++) {
179 for (pow=1,j=i; j < len-1; j++) pow = pow * 10; 148 c = msg[i] - 48;
180 a = a + c * pow; 149 for (pow = 1, j = i; j < len - 1; j++)
181 } 150 pow = pow * 10;
182 151 a = a + c * pow;
183 for (; i < 40; i++) { 152 }
184 c = '0' - 48; 153
185 for (pow=1,j=i; j < len-1; j++) pow = pow * 10; 154 for (; i < 40; i++) {
186 a = a + c * pow; 155 c = '0' - 48;
187 } 156 for (pow = 1, j = i; j < len - 1; j++)
188 157 pow = pow * 10;
189 for (; i < 256; i++) { 158 a = a + c * pow;
190 c = i; 159 }
191 for (pow=1,j=i; j < len-1; j++) pow = pow * 10; 160
192 a = a + c * pow; 161 for (; i < 256; i++) {
193 } 162 c = i;
194 163 for (pow = 1, j = i; j < len - 1; j++)
195 a = a << 7; 164 pow = pow * 10;
196 return a; 165 a = a + c * pow;
166 }
167
168 a = a << 7;
169 return a;
197} 170}
198 171
199u32 r5_hash (const signed char *msg, int len) 172u32 r5_hash(const signed char *msg, int len)
200{ 173{
201 u32 a=0; 174 u32 a = 0;
202 while(*msg) { 175 while (*msg) {
203 a += *msg << 4; 176 a += *msg << 4;
204 a += *msg >> 4; 177 a += *msg >> 4;
205 a *= 11; 178 a *= 11;
206 msg++; 179 msg++;
207 } 180 }
208 return a; 181 return a;
209} 182}
diff --git a/fs/reiserfs/ibalance.c b/fs/reiserfs/ibalance.c
index a362125da0d..6c5a726fd34 100644
--- a/fs/reiserfs/ibalance.c
+++ b/fs/reiserfs/ibalance.c
@@ -10,13 +10,8 @@
10#include <linux/buffer_head.h> 10#include <linux/buffer_head.h>
11 11
12/* this is one and only function that is used outside (do_balance.c) */ 12/* this is one and only function that is used outside (do_balance.c) */
13int balance_internal ( 13int balance_internal(struct tree_balance *,
14 struct tree_balance * , 14 int, int, struct item_head *, struct buffer_head **);
15 int,
16 int,
17 struct item_head * ,
18 struct buffer_head **
19 );
20 15
21/* modes of internal_shift_left, internal_shift_right and internal_insert_childs */ 16/* modes of internal_shift_left, internal_shift_right and internal_insert_childs */
22#define INTERNAL_SHIFT_FROM_S_TO_L 0 17#define INTERNAL_SHIFT_FROM_S_TO_L 0
@@ -27,464 +22,474 @@ int balance_internal (
27#define INTERNAL_INSERT_TO_L 5 22#define INTERNAL_INSERT_TO_L 5
28#define INTERNAL_INSERT_TO_R 6 23#define INTERNAL_INSERT_TO_R 6
29 24
30static void internal_define_dest_src_infos ( 25static void internal_define_dest_src_infos(int shift_mode,
31 int shift_mode, 26 struct tree_balance *tb,
32 struct tree_balance * tb, 27 int h,
33 int h, 28 struct buffer_info *dest_bi,
34 struct buffer_info * dest_bi, 29 struct buffer_info *src_bi,
35 struct buffer_info * src_bi, 30 int *d_key, struct buffer_head **cf)
36 int * d_key,
37 struct buffer_head ** cf
38 )
39{ 31{
40 memset (dest_bi, 0, sizeof (struct buffer_info)); 32 memset(dest_bi, 0, sizeof(struct buffer_info));
41 memset (src_bi, 0, sizeof (struct buffer_info)); 33 memset(src_bi, 0, sizeof(struct buffer_info));
42 /* define dest, src, dest parent, dest position */ 34 /* define dest, src, dest parent, dest position */
43 switch (shift_mode) { 35 switch (shift_mode) {
44 case INTERNAL_SHIFT_FROM_S_TO_L: /* used in internal_shift_left */ 36 case INTERNAL_SHIFT_FROM_S_TO_L: /* used in internal_shift_left */
45 src_bi->tb = tb; 37 src_bi->tb = tb;
46 src_bi->bi_bh = PATH_H_PBUFFER (tb->tb_path, h); 38 src_bi->bi_bh = PATH_H_PBUFFER(tb->tb_path, h);
47 src_bi->bi_parent = PATH_H_PPARENT (tb->tb_path, h); 39 src_bi->bi_parent = PATH_H_PPARENT(tb->tb_path, h);
48 src_bi->bi_position = PATH_H_POSITION (tb->tb_path, h + 1); 40 src_bi->bi_position = PATH_H_POSITION(tb->tb_path, h + 1);
49 dest_bi->tb = tb; 41 dest_bi->tb = tb;
50 dest_bi->bi_bh = tb->L[h]; 42 dest_bi->bi_bh = tb->L[h];
51 dest_bi->bi_parent = tb->FL[h]; 43 dest_bi->bi_parent = tb->FL[h];
52 dest_bi->bi_position = get_left_neighbor_position (tb, h); 44 dest_bi->bi_position = get_left_neighbor_position(tb, h);
53 *d_key = tb->lkey[h]; 45 *d_key = tb->lkey[h];
54 *cf = tb->CFL[h]; 46 *cf = tb->CFL[h];
55 break; 47 break;
56 case INTERNAL_SHIFT_FROM_L_TO_S: 48 case INTERNAL_SHIFT_FROM_L_TO_S:
57 src_bi->tb = tb; 49 src_bi->tb = tb;
58 src_bi->bi_bh = tb->L[h]; 50 src_bi->bi_bh = tb->L[h];
59 src_bi->bi_parent = tb->FL[h]; 51 src_bi->bi_parent = tb->FL[h];
60 src_bi->bi_position = get_left_neighbor_position (tb, h); 52 src_bi->bi_position = get_left_neighbor_position(tb, h);
61 dest_bi->tb = tb; 53 dest_bi->tb = tb;
62 dest_bi->bi_bh = PATH_H_PBUFFER (tb->tb_path, h); 54 dest_bi->bi_bh = PATH_H_PBUFFER(tb->tb_path, h);
63 dest_bi->bi_parent = PATH_H_PPARENT (tb->tb_path, h); 55 dest_bi->bi_parent = PATH_H_PPARENT(tb->tb_path, h);
64 dest_bi->bi_position = PATH_H_POSITION (tb->tb_path, h + 1); /* dest position is analog of dest->b_item_order */ 56 dest_bi->bi_position = PATH_H_POSITION(tb->tb_path, h + 1); /* dest position is analog of dest->b_item_order */
65 *d_key = tb->lkey[h]; 57 *d_key = tb->lkey[h];
66 *cf = tb->CFL[h]; 58 *cf = tb->CFL[h];
67 break; 59 break;
68 60
69 case INTERNAL_SHIFT_FROM_R_TO_S: /* used in internal_shift_left */ 61 case INTERNAL_SHIFT_FROM_R_TO_S: /* used in internal_shift_left */
70 src_bi->tb = tb; 62 src_bi->tb = tb;
71 src_bi->bi_bh = tb->R[h]; 63 src_bi->bi_bh = tb->R[h];
72 src_bi->bi_parent = tb->FR[h]; 64 src_bi->bi_parent = tb->FR[h];
73 src_bi->bi_position = get_right_neighbor_position (tb, h); 65 src_bi->bi_position = get_right_neighbor_position(tb, h);
74 dest_bi->tb = tb; 66 dest_bi->tb = tb;
75 dest_bi->bi_bh = PATH_H_PBUFFER (tb->tb_path, h); 67 dest_bi->bi_bh = PATH_H_PBUFFER(tb->tb_path, h);
76 dest_bi->bi_parent = PATH_H_PPARENT (tb->tb_path, h); 68 dest_bi->bi_parent = PATH_H_PPARENT(tb->tb_path, h);
77 dest_bi->bi_position = PATH_H_POSITION (tb->tb_path, h + 1); 69 dest_bi->bi_position = PATH_H_POSITION(tb->tb_path, h + 1);
78 *d_key = tb->rkey[h]; 70 *d_key = tb->rkey[h];
79 *cf = tb->CFR[h]; 71 *cf = tb->CFR[h];
80 break; 72 break;
81 73
82 case INTERNAL_SHIFT_FROM_S_TO_R: 74 case INTERNAL_SHIFT_FROM_S_TO_R:
83 src_bi->tb = tb; 75 src_bi->tb = tb;
84 src_bi->bi_bh = PATH_H_PBUFFER (tb->tb_path, h); 76 src_bi->bi_bh = PATH_H_PBUFFER(tb->tb_path, h);
85 src_bi->bi_parent = PATH_H_PPARENT (tb->tb_path, h); 77 src_bi->bi_parent = PATH_H_PPARENT(tb->tb_path, h);
86 src_bi->bi_position = PATH_H_POSITION (tb->tb_path, h + 1); 78 src_bi->bi_position = PATH_H_POSITION(tb->tb_path, h + 1);
87 dest_bi->tb = tb; 79 dest_bi->tb = tb;
88 dest_bi->bi_bh = tb->R[h]; 80 dest_bi->bi_bh = tb->R[h];
89 dest_bi->bi_parent = tb->FR[h]; 81 dest_bi->bi_parent = tb->FR[h];
90 dest_bi->bi_position = get_right_neighbor_position (tb, h); 82 dest_bi->bi_position = get_right_neighbor_position(tb, h);
91 *d_key = tb->rkey[h]; 83 *d_key = tb->rkey[h];
92 *cf = tb->CFR[h]; 84 *cf = tb->CFR[h];
93 break; 85 break;
94 86
95 case INTERNAL_INSERT_TO_L: 87 case INTERNAL_INSERT_TO_L:
96 dest_bi->tb = tb; 88 dest_bi->tb = tb;
97 dest_bi->bi_bh = tb->L[h]; 89 dest_bi->bi_bh = tb->L[h];
98 dest_bi->bi_parent = tb->FL[h]; 90 dest_bi->bi_parent = tb->FL[h];
99 dest_bi->bi_position = get_left_neighbor_position (tb, h); 91 dest_bi->bi_position = get_left_neighbor_position(tb, h);
100 break; 92 break;
101 93
102 case INTERNAL_INSERT_TO_S: 94 case INTERNAL_INSERT_TO_S:
103 dest_bi->tb = tb; 95 dest_bi->tb = tb;
104 dest_bi->bi_bh = PATH_H_PBUFFER (tb->tb_path, h); 96 dest_bi->bi_bh = PATH_H_PBUFFER(tb->tb_path, h);
105 dest_bi->bi_parent = PATH_H_PPARENT (tb->tb_path, h); 97 dest_bi->bi_parent = PATH_H_PPARENT(tb->tb_path, h);
106 dest_bi->bi_position = PATH_H_POSITION (tb->tb_path, h + 1); 98 dest_bi->bi_position = PATH_H_POSITION(tb->tb_path, h + 1);
107 break; 99 break;
108 100
109 case INTERNAL_INSERT_TO_R: 101 case INTERNAL_INSERT_TO_R:
110 dest_bi->tb = tb; 102 dest_bi->tb = tb;
111 dest_bi->bi_bh = tb->R[h]; 103 dest_bi->bi_bh = tb->R[h];
112 dest_bi->bi_parent = tb->FR[h]; 104 dest_bi->bi_parent = tb->FR[h];
113 dest_bi->bi_position = get_right_neighbor_position (tb, h); 105 dest_bi->bi_position = get_right_neighbor_position(tb, h);
114 break; 106 break;
115 107
116 default: 108 default:
117 reiserfs_panic (tb->tb_sb, "internal_define_dest_src_infos: shift type is unknown (%d)", shift_mode); 109 reiserfs_panic(tb->tb_sb,
118 } 110 "internal_define_dest_src_infos: shift type is unknown (%d)",
111 shift_mode);
112 }
119} 113}
120 114
121
122
123/* Insert count node pointers into buffer cur before position to + 1. 115/* Insert count node pointers into buffer cur before position to + 1.
124 * Insert count items into buffer cur before position to. 116 * Insert count items into buffer cur before position to.
125 * Items and node pointers are specified by inserted and bh respectively. 117 * Items and node pointers are specified by inserted and bh respectively.
126 */ 118 */
127static void internal_insert_childs (struct buffer_info * cur_bi, 119static void internal_insert_childs(struct buffer_info *cur_bi,
128 int to, int count, 120 int to, int count,
129 struct item_head * inserted, 121 struct item_head *inserted,
130 struct buffer_head ** bh 122 struct buffer_head **bh)
131 )
132{ 123{
133 struct buffer_head * cur = cur_bi->bi_bh; 124 struct buffer_head *cur = cur_bi->bi_bh;
134 struct block_head * blkh; 125 struct block_head *blkh;
135 int nr; 126 int nr;
136 struct reiserfs_key * ih; 127 struct reiserfs_key *ih;
137 struct disk_child new_dc[2]; 128 struct disk_child new_dc[2];
138 struct disk_child * dc; 129 struct disk_child *dc;
139 int i; 130 int i;
140 131
141 if (count <= 0) 132 if (count <= 0)
142 return; 133 return;
143 134
144 blkh = B_BLK_HEAD(cur); 135 blkh = B_BLK_HEAD(cur);
145 nr = blkh_nr_item(blkh); 136 nr = blkh_nr_item(blkh);
146 137
147 RFALSE( count > 2, 138 RFALSE(count > 2, "too many children (%d) are to be inserted", count);
148 "too many children (%d) are to be inserted", count); 139 RFALSE(B_FREE_SPACE(cur) < count * (KEY_SIZE + DC_SIZE),
149 RFALSE( B_FREE_SPACE (cur) < count * (KEY_SIZE + DC_SIZE), 140 "no enough free space (%d), needed %d bytes",
150 "no enough free space (%d), needed %d bytes", 141 B_FREE_SPACE(cur), count * (KEY_SIZE + DC_SIZE));
151 B_FREE_SPACE (cur), count * (KEY_SIZE + DC_SIZE)); 142
152 143 /* prepare space for count disk_child */
153 /* prepare space for count disk_child */ 144 dc = B_N_CHILD(cur, to + 1);
154 dc = B_N_CHILD(cur,to+1); 145
155 146 memmove(dc + count, dc, (nr + 1 - (to + 1)) * DC_SIZE);
156 memmove (dc + count, dc, (nr+1-(to+1)) * DC_SIZE); 147
157 148 /* copy to_be_insert disk children */
158 /* copy to_be_insert disk children */ 149 for (i = 0; i < count; i++) {
159 for (i = 0; i < count; i ++) { 150 put_dc_size(&(new_dc[i]),
160 put_dc_size( &(new_dc[i]), MAX_CHILD_SIZE(bh[i]) - B_FREE_SPACE(bh[i])); 151 MAX_CHILD_SIZE(bh[i]) - B_FREE_SPACE(bh[i]));
161 put_dc_block_number( &(new_dc[i]), bh[i]->b_blocknr ); 152 put_dc_block_number(&(new_dc[i]), bh[i]->b_blocknr);
162 } 153 }
163 memcpy (dc, new_dc, DC_SIZE * count); 154 memcpy(dc, new_dc, DC_SIZE * count);
164 155
165 156 /* prepare space for count items */
166 /* prepare space for count items */ 157 ih = B_N_PDELIM_KEY(cur, ((to == -1) ? 0 : to));
167 ih = B_N_PDELIM_KEY (cur, ((to == -1) ? 0 : to)); 158
168 159 memmove(ih + count, ih,
169 memmove (ih + count, ih, (nr - to) * KEY_SIZE + (nr + 1 + count) * DC_SIZE); 160 (nr - to) * KEY_SIZE + (nr + 1 + count) * DC_SIZE);
170 161
171 /* copy item headers (keys) */ 162 /* copy item headers (keys) */
172 memcpy (ih, inserted, KEY_SIZE); 163 memcpy(ih, inserted, KEY_SIZE);
173 if ( count > 1 ) 164 if (count > 1)
174 memcpy (ih + 1, inserted + 1, KEY_SIZE); 165 memcpy(ih + 1, inserted + 1, KEY_SIZE);
175 166
176 /* sizes, item number */ 167 /* sizes, item number */
177 set_blkh_nr_item( blkh, blkh_nr_item(blkh) + count ); 168 set_blkh_nr_item(blkh, blkh_nr_item(blkh) + count);
178 set_blkh_free_space( blkh, 169 set_blkh_free_space(blkh,
179 blkh_free_space(blkh) - count * (DC_SIZE + KEY_SIZE ) ); 170 blkh_free_space(blkh) - count * (DC_SIZE +
180 171 KEY_SIZE));
181 do_balance_mark_internal_dirty (cur_bi->tb, cur,0); 172
182 173 do_balance_mark_internal_dirty(cur_bi->tb, cur, 0);
183 /*&&&&&&&&&&&&&&&&&&&&&&&&*/ 174
184 check_internal (cur); 175 /*&&&&&&&&&&&&&&&&&&&&&&&& */
185 /*&&&&&&&&&&&&&&&&&&&&&&&&*/ 176 check_internal(cur);
186 177 /*&&&&&&&&&&&&&&&&&&&&&&&& */
187 if (cur_bi->bi_parent) { 178
188 struct disk_child *t_dc = B_N_CHILD (cur_bi->bi_parent,cur_bi->bi_position); 179 if (cur_bi->bi_parent) {
189 put_dc_size( t_dc, dc_size(t_dc) + (count * (DC_SIZE + KEY_SIZE))); 180 struct disk_child *t_dc =
190 do_balance_mark_internal_dirty(cur_bi->tb, cur_bi->bi_parent, 0); 181 B_N_CHILD(cur_bi->bi_parent, cur_bi->bi_position);
191 182 put_dc_size(t_dc,
192 /*&&&&&&&&&&&&&&&&&&&&&&&&*/ 183 dc_size(t_dc) + (count * (DC_SIZE + KEY_SIZE)));
193 check_internal (cur_bi->bi_parent); 184 do_balance_mark_internal_dirty(cur_bi->tb, cur_bi->bi_parent,
194 /*&&&&&&&&&&&&&&&&&&&&&&&&*/ 185 0);
195 } 186
187 /*&&&&&&&&&&&&&&&&&&&&&&&& */
188 check_internal(cur_bi->bi_parent);
189 /*&&&&&&&&&&&&&&&&&&&&&&&& */
190 }
196 191
197} 192}
198 193
199
200/* Delete del_num items and node pointers from buffer cur starting from * 194/* Delete del_num items and node pointers from buffer cur starting from *
201 * the first_i'th item and first_p'th pointers respectively. */ 195 * the first_i'th item and first_p'th pointers respectively. */
202static void internal_delete_pointers_items ( 196static void internal_delete_pointers_items(struct buffer_info *cur_bi,
203 struct buffer_info * cur_bi, 197 int first_p,
204 int first_p, 198 int first_i, int del_num)
205 int first_i,
206 int del_num
207 )
208{ 199{
209 struct buffer_head * cur = cur_bi->bi_bh; 200 struct buffer_head *cur = cur_bi->bi_bh;
210 int nr; 201 int nr;
211 struct block_head * blkh; 202 struct block_head *blkh;
212 struct reiserfs_key * key; 203 struct reiserfs_key *key;
213 struct disk_child * dc; 204 struct disk_child *dc;
214 205
215 RFALSE( cur == NULL, "buffer is 0"); 206 RFALSE(cur == NULL, "buffer is 0");
216 RFALSE( del_num < 0, 207 RFALSE(del_num < 0,
217 "negative number of items (%d) can not be deleted", del_num); 208 "negative number of items (%d) can not be deleted", del_num);
218 RFALSE( first_p < 0 || first_p + del_num > B_NR_ITEMS (cur) + 1 || first_i < 0, 209 RFALSE(first_p < 0 || first_p + del_num > B_NR_ITEMS(cur) + 1
219 "first pointer order (%d) < 0 or " 210 || first_i < 0,
220 "no so many pointers (%d), only (%d) or " 211 "first pointer order (%d) < 0 or "
221 "first key order %d < 0", first_p, 212 "no so many pointers (%d), only (%d) or "
222 first_p + del_num, B_NR_ITEMS (cur) + 1, first_i); 213 "first key order %d < 0", first_p, first_p + del_num,
223 if ( del_num == 0 ) 214 B_NR_ITEMS(cur) + 1, first_i);
224 return; 215 if (del_num == 0)
225 216 return;
226 blkh = B_BLK_HEAD(cur); 217
227 nr = blkh_nr_item(blkh); 218 blkh = B_BLK_HEAD(cur);
228 219 nr = blkh_nr_item(blkh);
229 if ( first_p == 0 && del_num == nr + 1 ) { 220
230 RFALSE( first_i != 0, "1st deleted key must have order 0, not %d", first_i); 221 if (first_p == 0 && del_num == nr + 1) {
231 make_empty_node (cur_bi); 222 RFALSE(first_i != 0,
232 return; 223 "1st deleted key must have order 0, not %d", first_i);
233 } 224 make_empty_node(cur_bi);
234 225 return;
235 RFALSE( first_i + del_num > B_NR_ITEMS (cur), 226 }
236 "first_i = %d del_num = %d "
237 "no so many keys (%d) in the node (%b)(%z)",
238 first_i, del_num, first_i + del_num, cur, cur);
239
240
241 /* deleting */
242 dc = B_N_CHILD (cur, first_p);
243
244 memmove (dc, dc + del_num, (nr + 1 - first_p - del_num) * DC_SIZE);
245 key = B_N_PDELIM_KEY (cur, first_i);
246 memmove (key, key + del_num, (nr - first_i - del_num) * KEY_SIZE + (nr + 1 - del_num) * DC_SIZE);
247
248
249 /* sizes, item number */
250 set_blkh_nr_item( blkh, blkh_nr_item(blkh) - del_num );
251 set_blkh_free_space( blkh,
252 blkh_free_space(blkh) + (del_num * (KEY_SIZE + DC_SIZE) ) );
253
254 do_balance_mark_internal_dirty (cur_bi->tb, cur, 0);
255 /*&&&&&&&&&&&&&&&&&&&&&&&*/
256 check_internal (cur);
257 /*&&&&&&&&&&&&&&&&&&&&&&&*/
258
259 if (cur_bi->bi_parent) {
260 struct disk_child *t_dc;
261 t_dc = B_N_CHILD (cur_bi->bi_parent, cur_bi->bi_position);
262 put_dc_size( t_dc, dc_size(t_dc) - (del_num * (KEY_SIZE + DC_SIZE) ) );
263
264 do_balance_mark_internal_dirty (cur_bi->tb, cur_bi->bi_parent,0);
265 /*&&&&&&&&&&&&&&&&&&&&&&&&*/
266 check_internal (cur_bi->bi_parent);
267 /*&&&&&&&&&&&&&&&&&&&&&&&&*/
268 }
269}
270 227
228 RFALSE(first_i + del_num > B_NR_ITEMS(cur),
229 "first_i = %d del_num = %d "
230 "no so many keys (%d) in the node (%b)(%z)",
231 first_i, del_num, first_i + del_num, cur, cur);
232
233 /* deleting */
234 dc = B_N_CHILD(cur, first_p);
235
236 memmove(dc, dc + del_num, (nr + 1 - first_p - del_num) * DC_SIZE);
237 key = B_N_PDELIM_KEY(cur, first_i);
238 memmove(key, key + del_num,
239 (nr - first_i - del_num) * KEY_SIZE + (nr + 1 -
240 del_num) * DC_SIZE);
241
242 /* sizes, item number */
243 set_blkh_nr_item(blkh, blkh_nr_item(blkh) - del_num);
244 set_blkh_free_space(blkh,
245 blkh_free_space(blkh) +
246 (del_num * (KEY_SIZE + DC_SIZE)));
247
248 do_balance_mark_internal_dirty(cur_bi->tb, cur, 0);
249 /*&&&&&&&&&&&&&&&&&&&&&&& */
250 check_internal(cur);
251 /*&&&&&&&&&&&&&&&&&&&&&&& */
252
253 if (cur_bi->bi_parent) {
254 struct disk_child *t_dc;
255 t_dc = B_N_CHILD(cur_bi->bi_parent, cur_bi->bi_position);
256 put_dc_size(t_dc,
257 dc_size(t_dc) - (del_num * (KEY_SIZE + DC_SIZE)));
258
259 do_balance_mark_internal_dirty(cur_bi->tb, cur_bi->bi_parent,
260 0);
261 /*&&&&&&&&&&&&&&&&&&&&&&&& */
262 check_internal(cur_bi->bi_parent);
263 /*&&&&&&&&&&&&&&&&&&&&&&&& */
264 }
265}
271 266
272/* delete n node pointers and items starting from given position */ 267/* delete n node pointers and items starting from given position */
273static void internal_delete_childs (struct buffer_info * cur_bi, 268static void internal_delete_childs(struct buffer_info *cur_bi, int from, int n)
274 int from, int n)
275{ 269{
276 int i_from; 270 int i_from;
277 271
278 i_from = (from == 0) ? from : from - 1; 272 i_from = (from == 0) ? from : from - 1;
279 273
280 /* delete n pointers starting from `from' position in CUR; 274 /* delete n pointers starting from `from' position in CUR;
281 delete n keys starting from 'i_from' position in CUR; 275 delete n keys starting from 'i_from' position in CUR;
282 */ 276 */
283 internal_delete_pointers_items (cur_bi, from, i_from, n); 277 internal_delete_pointers_items(cur_bi, from, i_from, n);
284} 278}
285 279
286
287/* copy cpy_num node pointers and cpy_num - 1 items from buffer src to buffer dest 280/* copy cpy_num node pointers and cpy_num - 1 items from buffer src to buffer dest
288* last_first == FIRST_TO_LAST means, that we copy first items from src to tail of dest 281* last_first == FIRST_TO_LAST means, that we copy first items from src to tail of dest
289 * last_first == LAST_TO_FIRST means, that we copy last items from src to head of dest 282 * last_first == LAST_TO_FIRST means, that we copy last items from src to head of dest
290 */ 283 */
291static void internal_copy_pointers_items ( 284static void internal_copy_pointers_items(struct buffer_info *dest_bi,
292 struct buffer_info * dest_bi, 285 struct buffer_head *src,
293 struct buffer_head * src, 286 int last_first, int cpy_num)
294 int last_first, int cpy_num
295 )
296{ 287{
297 /* ATTENTION! Number of node pointers in DEST is equal to number of items in DEST * 288 /* ATTENTION! Number of node pointers in DEST is equal to number of items in DEST *
298 * as delimiting key have already inserted to buffer dest.*/ 289 * as delimiting key have already inserted to buffer dest.*/
299 struct buffer_head * dest = dest_bi->bi_bh; 290 struct buffer_head *dest = dest_bi->bi_bh;
300 int nr_dest, nr_src; 291 int nr_dest, nr_src;
301 int dest_order, src_order; 292 int dest_order, src_order;
302 struct block_head * blkh; 293 struct block_head *blkh;
303 struct reiserfs_key * key; 294 struct reiserfs_key *key;
304 struct disk_child * dc; 295 struct disk_child *dc;
305 296
306 nr_src = B_NR_ITEMS (src); 297 nr_src = B_NR_ITEMS(src);
307 298
308 RFALSE( dest == NULL || src == NULL, 299 RFALSE(dest == NULL || src == NULL,
309 "src (%p) or dest (%p) buffer is 0", src, dest); 300 "src (%p) or dest (%p) buffer is 0", src, dest);
310 RFALSE( last_first != FIRST_TO_LAST && last_first != LAST_TO_FIRST, 301 RFALSE(last_first != FIRST_TO_LAST && last_first != LAST_TO_FIRST,
311 "invalid last_first parameter (%d)", last_first); 302 "invalid last_first parameter (%d)", last_first);
312 RFALSE( nr_src < cpy_num - 1, 303 RFALSE(nr_src < cpy_num - 1,
313 "no so many items (%d) in src (%d)", cpy_num, nr_src); 304 "no so many items (%d) in src (%d)", cpy_num, nr_src);
314 RFALSE( cpy_num < 0, "cpy_num less than 0 (%d)", cpy_num); 305 RFALSE(cpy_num < 0, "cpy_num less than 0 (%d)", cpy_num);
315 RFALSE( cpy_num - 1 + B_NR_ITEMS(dest) > (int)MAX_NR_KEY(dest), 306 RFALSE(cpy_num - 1 + B_NR_ITEMS(dest) > (int)MAX_NR_KEY(dest),
316 "cpy_num (%d) + item number in dest (%d) can not be > MAX_NR_KEY(%d)", 307 "cpy_num (%d) + item number in dest (%d) can not be > MAX_NR_KEY(%d)",
317 cpy_num, B_NR_ITEMS(dest), MAX_NR_KEY(dest)); 308 cpy_num, B_NR_ITEMS(dest), MAX_NR_KEY(dest));
318 309
319 if ( cpy_num == 0 ) 310 if (cpy_num == 0)
320 return; 311 return;
321 312
322 /* coping */ 313 /* coping */
323 blkh = B_BLK_HEAD(dest); 314 blkh = B_BLK_HEAD(dest);
324 nr_dest = blkh_nr_item(blkh); 315 nr_dest = blkh_nr_item(blkh);
325 316
326 /*dest_order = (last_first == LAST_TO_FIRST) ? 0 : nr_dest;*/ 317 /*dest_order = (last_first == LAST_TO_FIRST) ? 0 : nr_dest; */
327 /*src_order = (last_first == LAST_TO_FIRST) ? (nr_src - cpy_num + 1) : 0;*/ 318 /*src_order = (last_first == LAST_TO_FIRST) ? (nr_src - cpy_num + 1) : 0; */
328 (last_first == LAST_TO_FIRST) ? (dest_order = 0, src_order = nr_src - cpy_num + 1) : 319 (last_first == LAST_TO_FIRST) ? (dest_order = 0, src_order =
329 (dest_order = nr_dest, src_order = 0); 320 nr_src - cpy_num + 1) : (dest_order =
321 nr_dest,
322 src_order =
323 0);
330 324
331 /* prepare space for cpy_num pointers */ 325 /* prepare space for cpy_num pointers */
332 dc = B_N_CHILD (dest, dest_order); 326 dc = B_N_CHILD(dest, dest_order);
333 327
334 memmove (dc + cpy_num, dc, (nr_dest - dest_order) * DC_SIZE); 328 memmove(dc + cpy_num, dc, (nr_dest - dest_order) * DC_SIZE);
335 329
336 /* insert pointers */ 330 /* insert pointers */
337 memcpy (dc, B_N_CHILD (src, src_order), DC_SIZE * cpy_num); 331 memcpy(dc, B_N_CHILD(src, src_order), DC_SIZE * cpy_num);
338 332
339 333 /* prepare space for cpy_num - 1 item headers */
340 /* prepare space for cpy_num - 1 item headers */ 334 key = B_N_PDELIM_KEY(dest, dest_order);
341 key = B_N_PDELIM_KEY(dest, dest_order); 335 memmove(key + cpy_num - 1, key,
342 memmove (key + cpy_num - 1, key, 336 KEY_SIZE * (nr_dest - dest_order) + DC_SIZE * (nr_dest +
343 KEY_SIZE * (nr_dest - dest_order) + DC_SIZE * (nr_dest + cpy_num)); 337 cpy_num));
344 338
345 339 /* insert headers */
346 /* insert headers */ 340 memcpy(key, B_N_PDELIM_KEY(src, src_order), KEY_SIZE * (cpy_num - 1));
347 memcpy (key, B_N_PDELIM_KEY (src, src_order), KEY_SIZE * (cpy_num - 1)); 341
348 342 /* sizes, item number */
349 /* sizes, item number */ 343 set_blkh_nr_item(blkh, blkh_nr_item(blkh) + (cpy_num - 1));
350 set_blkh_nr_item( blkh, blkh_nr_item(blkh) + (cpy_num - 1 ) ); 344 set_blkh_free_space(blkh,
351 set_blkh_free_space( blkh, 345 blkh_free_space(blkh) - (KEY_SIZE * (cpy_num - 1) +
352 blkh_free_space(blkh) - (KEY_SIZE * (cpy_num - 1) + DC_SIZE * cpy_num ) ); 346 DC_SIZE * cpy_num));
353 347
354 do_balance_mark_internal_dirty (dest_bi->tb, dest, 0); 348 do_balance_mark_internal_dirty(dest_bi->tb, dest, 0);
355 349
356 /*&&&&&&&&&&&&&&&&&&&&&&&&*/ 350 /*&&&&&&&&&&&&&&&&&&&&&&&& */
357 check_internal (dest); 351 check_internal(dest);
358 /*&&&&&&&&&&&&&&&&&&&&&&&&*/ 352 /*&&&&&&&&&&&&&&&&&&&&&&&& */
359 353
360 if (dest_bi->bi_parent) { 354 if (dest_bi->bi_parent) {
361 struct disk_child *t_dc; 355 struct disk_child *t_dc;
362 t_dc = B_N_CHILD(dest_bi->bi_parent,dest_bi->bi_position); 356 t_dc = B_N_CHILD(dest_bi->bi_parent, dest_bi->bi_position);
363 put_dc_size( t_dc, dc_size(t_dc) + (KEY_SIZE * (cpy_num - 1) + DC_SIZE * cpy_num) ); 357 put_dc_size(t_dc,
364 358 dc_size(t_dc) + (KEY_SIZE * (cpy_num - 1) +
365 do_balance_mark_internal_dirty (dest_bi->tb, dest_bi->bi_parent,0); 359 DC_SIZE * cpy_num));
366 /*&&&&&&&&&&&&&&&&&&&&&&&&*/ 360
367 check_internal (dest_bi->bi_parent); 361 do_balance_mark_internal_dirty(dest_bi->tb, dest_bi->bi_parent,
368 /*&&&&&&&&&&&&&&&&&&&&&&&&*/ 362 0);
369 } 363 /*&&&&&&&&&&&&&&&&&&&&&&&& */
364 check_internal(dest_bi->bi_parent);
365 /*&&&&&&&&&&&&&&&&&&&&&&&& */
366 }
370 367
371} 368}
372 369
373
374/* Copy cpy_num node pointers and cpy_num - 1 items from buffer src to buffer dest. 370/* Copy cpy_num node pointers and cpy_num - 1 items from buffer src to buffer dest.
375 * Delete cpy_num - del_par items and node pointers from buffer src. 371 * Delete cpy_num - del_par items and node pointers from buffer src.
376 * last_first == FIRST_TO_LAST means, that we copy/delete first items from src. 372 * last_first == FIRST_TO_LAST means, that we copy/delete first items from src.
377 * last_first == LAST_TO_FIRST means, that we copy/delete last items from src. 373 * last_first == LAST_TO_FIRST means, that we copy/delete last items from src.
378 */ 374 */
379static void internal_move_pointers_items (struct buffer_info * dest_bi, 375static void internal_move_pointers_items(struct buffer_info *dest_bi,
380 struct buffer_info * src_bi, 376 struct buffer_info *src_bi,
381 int last_first, int cpy_num, int del_par) 377 int last_first, int cpy_num,
378 int del_par)
382{ 379{
383 int first_pointer; 380 int first_pointer;
384 int first_item; 381 int first_item;
385 382
386 internal_copy_pointers_items (dest_bi, src_bi->bi_bh, last_first, cpy_num); 383 internal_copy_pointers_items(dest_bi, src_bi->bi_bh, last_first,
387 384 cpy_num);
388 if (last_first == FIRST_TO_LAST) { /* shift_left occurs */ 385
389 first_pointer = 0; 386 if (last_first == FIRST_TO_LAST) { /* shift_left occurs */
390 first_item = 0; 387 first_pointer = 0;
391 /* delete cpy_num - del_par pointers and keys starting for pointers with first_pointer, 388 first_item = 0;
392 for key - with first_item */ 389 /* delete cpy_num - del_par pointers and keys starting for pointers with first_pointer,
393 internal_delete_pointers_items (src_bi, first_pointer, first_item, cpy_num - del_par); 390 for key - with first_item */
394 } else { /* shift_right occurs */ 391 internal_delete_pointers_items(src_bi, first_pointer,
395 int i, j; 392 first_item, cpy_num - del_par);
396 393 } else { /* shift_right occurs */
397 i = ( cpy_num - del_par == ( j = B_NR_ITEMS(src_bi->bi_bh)) + 1 ) ? 0 : j - cpy_num + del_par; 394 int i, j;
398 395
399 internal_delete_pointers_items (src_bi, j + 1 - cpy_num + del_par, i, cpy_num - del_par); 396 i = (cpy_num - del_par ==
400 } 397 (j =
398 B_NR_ITEMS(src_bi->bi_bh)) + 1) ? 0 : j - cpy_num +
399 del_par;
400
401 internal_delete_pointers_items(src_bi,
402 j + 1 - cpy_num + del_par, i,
403 cpy_num - del_par);
404 }
401} 405}
402 406
403/* Insert n_src'th key of buffer src before n_dest'th key of buffer dest. */ 407/* Insert n_src'th key of buffer src before n_dest'th key of buffer dest. */
404static void internal_insert_key (struct buffer_info * dest_bi, 408static void internal_insert_key(struct buffer_info *dest_bi, int dest_position_before, /* insert key before key with n_dest number */
405 int dest_position_before, /* insert key before key with n_dest number */ 409 struct buffer_head *src, int src_position)
406 struct buffer_head * src,
407 int src_position)
408{ 410{
409 struct buffer_head * dest = dest_bi->bi_bh; 411 struct buffer_head *dest = dest_bi->bi_bh;
410 int nr; 412 int nr;
411 struct block_head * blkh; 413 struct block_head *blkh;
412 struct reiserfs_key * key; 414 struct reiserfs_key *key;
413 415
414 RFALSE( dest == NULL || src == NULL, 416 RFALSE(dest == NULL || src == NULL,
415 "source(%p) or dest(%p) buffer is 0", src, dest); 417 "source(%p) or dest(%p) buffer is 0", src, dest);
416 RFALSE( dest_position_before < 0 || src_position < 0, 418 RFALSE(dest_position_before < 0 || src_position < 0,
417 "source(%d) or dest(%d) key number less than 0", 419 "source(%d) or dest(%d) key number less than 0",
418 src_position, dest_position_before); 420 src_position, dest_position_before);
419 RFALSE( dest_position_before > B_NR_ITEMS (dest) || 421 RFALSE(dest_position_before > B_NR_ITEMS(dest) ||
420 src_position >= B_NR_ITEMS(src), 422 src_position >= B_NR_ITEMS(src),
421 "invalid position in dest (%d (key number %d)) or in src (%d (key number %d))", 423 "invalid position in dest (%d (key number %d)) or in src (%d (key number %d))",
422 dest_position_before, B_NR_ITEMS (dest), 424 dest_position_before, B_NR_ITEMS(dest),
423 src_position, B_NR_ITEMS(src)); 425 src_position, B_NR_ITEMS(src));
424 RFALSE( B_FREE_SPACE (dest) < KEY_SIZE, 426 RFALSE(B_FREE_SPACE(dest) < KEY_SIZE,
425 "no enough free space (%d) in dest buffer", B_FREE_SPACE (dest)); 427 "no enough free space (%d) in dest buffer", B_FREE_SPACE(dest));
426 428
427 blkh = B_BLK_HEAD(dest); 429 blkh = B_BLK_HEAD(dest);
428 nr = blkh_nr_item(blkh); 430 nr = blkh_nr_item(blkh);
429 431
430 /* prepare space for inserting key */ 432 /* prepare space for inserting key */
431 key = B_N_PDELIM_KEY (dest, dest_position_before); 433 key = B_N_PDELIM_KEY(dest, dest_position_before);
432 memmove (key + 1, key, (nr - dest_position_before) * KEY_SIZE + (nr + 1) * DC_SIZE); 434 memmove(key + 1, key,
433 435 (nr - dest_position_before) * KEY_SIZE + (nr + 1) * DC_SIZE);
434 /* insert key */ 436
435 memcpy (key, B_N_PDELIM_KEY(src, src_position), KEY_SIZE); 437 /* insert key */
436 438 memcpy(key, B_N_PDELIM_KEY(src, src_position), KEY_SIZE);
437 /* Change dirt, free space, item number fields. */ 439
438 440 /* Change dirt, free space, item number fields. */
439 set_blkh_nr_item( blkh, blkh_nr_item(blkh) + 1 ); 441
440 set_blkh_free_space( blkh, blkh_free_space(blkh) - KEY_SIZE ); 442 set_blkh_nr_item(blkh, blkh_nr_item(blkh) + 1);
441 443 set_blkh_free_space(blkh, blkh_free_space(blkh) - KEY_SIZE);
442 do_balance_mark_internal_dirty (dest_bi->tb, dest, 0); 444
443 445 do_balance_mark_internal_dirty(dest_bi->tb, dest, 0);
444 if (dest_bi->bi_parent) { 446
445 struct disk_child *t_dc; 447 if (dest_bi->bi_parent) {
446 t_dc = B_N_CHILD(dest_bi->bi_parent,dest_bi->bi_position); 448 struct disk_child *t_dc;
447 put_dc_size( t_dc, dc_size(t_dc) + KEY_SIZE ); 449 t_dc = B_N_CHILD(dest_bi->bi_parent, dest_bi->bi_position);
448 450 put_dc_size(t_dc, dc_size(t_dc) + KEY_SIZE);
449 do_balance_mark_internal_dirty (dest_bi->tb, dest_bi->bi_parent,0); 451
450 } 452 do_balance_mark_internal_dirty(dest_bi->tb, dest_bi->bi_parent,
453 0);
454 }
451} 455}
452 456
453
454
455/* Insert d_key'th (delimiting) key from buffer cfl to tail of dest. 457/* Insert d_key'th (delimiting) key from buffer cfl to tail of dest.
456 * Copy pointer_amount node pointers and pointer_amount - 1 items from buffer src to buffer dest. 458 * Copy pointer_amount node pointers and pointer_amount - 1 items from buffer src to buffer dest.
457 * Replace d_key'th key in buffer cfl. 459 * Replace d_key'th key in buffer cfl.
458 * Delete pointer_amount items and node pointers from buffer src. 460 * Delete pointer_amount items and node pointers from buffer src.
459 */ 461 */
460/* this can be invoked both to shift from S to L and from R to S */ 462/* this can be invoked both to shift from S to L and from R to S */
461static void internal_shift_left ( 463static void internal_shift_left(int mode, /* INTERNAL_FROM_S_TO_L | INTERNAL_FROM_R_TO_S */
462 int mode, /* INTERNAL_FROM_S_TO_L | INTERNAL_FROM_R_TO_S */ 464 struct tree_balance *tb,
463 struct tree_balance * tb, 465 int h, int pointer_amount)
464 int h,
465 int pointer_amount
466 )
467{ 466{
468 struct buffer_info dest_bi, src_bi; 467 struct buffer_info dest_bi, src_bi;
469 struct buffer_head * cf; 468 struct buffer_head *cf;
470 int d_key_position; 469 int d_key_position;
471 470
472 internal_define_dest_src_infos (mode, tb, h, &dest_bi, &src_bi, &d_key_position, &cf); 471 internal_define_dest_src_infos(mode, tb, h, &dest_bi, &src_bi,
473 472 &d_key_position, &cf);
474 /*printk("pointer_amount = %d\n",pointer_amount);*/ 473
475 474 /*printk("pointer_amount = %d\n",pointer_amount); */
476 if (pointer_amount) { 475
477 /* insert delimiting key from common father of dest and src to node dest into position B_NR_ITEM(dest) */ 476 if (pointer_amount) {
478 internal_insert_key (&dest_bi, B_NR_ITEMS(dest_bi.bi_bh), cf, d_key_position); 477 /* insert delimiting key from common father of dest and src to node dest into position B_NR_ITEM(dest) */
479 478 internal_insert_key(&dest_bi, B_NR_ITEMS(dest_bi.bi_bh), cf,
480 if (B_NR_ITEMS(src_bi.bi_bh) == pointer_amount - 1) { 479 d_key_position);
481 if (src_bi.bi_position/*src->b_item_order*/ == 0) 480
482 replace_key (tb, cf, d_key_position, src_bi.bi_parent/*src->b_parent*/, 0); 481 if (B_NR_ITEMS(src_bi.bi_bh) == pointer_amount - 1) {
483 } else 482 if (src_bi.bi_position /*src->b_item_order */ == 0)
484 replace_key (tb, cf, d_key_position, src_bi.bi_bh, pointer_amount - 1); 483 replace_key(tb, cf, d_key_position,
485 } 484 src_bi.
486 /* last parameter is del_parameter */ 485 bi_parent /*src->b_parent */ , 0);
487 internal_move_pointers_items (&dest_bi, &src_bi, FIRST_TO_LAST, pointer_amount, 0); 486 } else
487 replace_key(tb, cf, d_key_position, src_bi.bi_bh,
488 pointer_amount - 1);
489 }
490 /* last parameter is del_parameter */
491 internal_move_pointers_items(&dest_bi, &src_bi, FIRST_TO_LAST,
492 pointer_amount, 0);
488 493
489} 494}
490 495
@@ -493,67 +498,66 @@ static void internal_shift_left (
493 * Delete n - 1 items and node pointers from buffer S[h]. 498 * Delete n - 1 items and node pointers from buffer S[h].
494 */ 499 */
495/* it always shifts from S[h] to L[h] */ 500/* it always shifts from S[h] to L[h] */
496static void internal_shift1_left ( 501static void internal_shift1_left(struct tree_balance *tb,
497 struct tree_balance * tb, 502 int h, int pointer_amount)
498 int h,
499 int pointer_amount
500 )
501{ 503{
502 struct buffer_info dest_bi, src_bi; 504 struct buffer_info dest_bi, src_bi;
503 struct buffer_head * cf; 505 struct buffer_head *cf;
504 int d_key_position; 506 int d_key_position;
505 507
506 internal_define_dest_src_infos (INTERNAL_SHIFT_FROM_S_TO_L, tb, h, &dest_bi, &src_bi, &d_key_position, &cf); 508 internal_define_dest_src_infos(INTERNAL_SHIFT_FROM_S_TO_L, tb, h,
509 &dest_bi, &src_bi, &d_key_position, &cf);
507 510
508 if ( pointer_amount > 0 ) /* insert lkey[h]-th key from CFL[h] to left neighbor L[h] */ 511 if (pointer_amount > 0) /* insert lkey[h]-th key from CFL[h] to left neighbor L[h] */
509 internal_insert_key (&dest_bi, B_NR_ITEMS(dest_bi.bi_bh), cf, d_key_position); 512 internal_insert_key(&dest_bi, B_NR_ITEMS(dest_bi.bi_bh), cf,
510 /* internal_insert_key (tb->L[h], B_NR_ITEM(tb->L[h]), tb->CFL[h], tb->lkey[h]);*/ 513 d_key_position);
514 /* internal_insert_key (tb->L[h], B_NR_ITEM(tb->L[h]), tb->CFL[h], tb->lkey[h]); */
511 515
512 /* last parameter is del_parameter */ 516 /* last parameter is del_parameter */
513 internal_move_pointers_items (&dest_bi, &src_bi, FIRST_TO_LAST, pointer_amount, 1); 517 internal_move_pointers_items(&dest_bi, &src_bi, FIRST_TO_LAST,
514 /* internal_move_pointers_items (tb->L[h], tb->S[h], FIRST_TO_LAST, pointer_amount, 1);*/ 518 pointer_amount, 1);
519 /* internal_move_pointers_items (tb->L[h], tb->S[h], FIRST_TO_LAST, pointer_amount, 1); */
515} 520}
516 521
517
518/* Insert d_key'th (delimiting) key from buffer cfr to head of dest. 522/* Insert d_key'th (delimiting) key from buffer cfr to head of dest.
519 * Copy n node pointers and n - 1 items from buffer src to buffer dest. 523 * Copy n node pointers and n - 1 items from buffer src to buffer dest.
520 * Replace d_key'th key in buffer cfr. 524 * Replace d_key'th key in buffer cfr.
521 * Delete n items and node pointers from buffer src. 525 * Delete n items and node pointers from buffer src.
522 */ 526 */
523static void internal_shift_right ( 527static void internal_shift_right(int mode, /* INTERNAL_FROM_S_TO_R | INTERNAL_FROM_L_TO_S */
524 int mode, /* INTERNAL_FROM_S_TO_R | INTERNAL_FROM_L_TO_S */ 528 struct tree_balance *tb,
525 struct tree_balance * tb, 529 int h, int pointer_amount)
526 int h,
527 int pointer_amount
528 )
529{ 530{
530 struct buffer_info dest_bi, src_bi; 531 struct buffer_info dest_bi, src_bi;
531 struct buffer_head * cf; 532 struct buffer_head *cf;
532 int d_key_position; 533 int d_key_position;
533 int nr; 534 int nr;
534 535
535 536 internal_define_dest_src_infos(mode, tb, h, &dest_bi, &src_bi,
536 internal_define_dest_src_infos (mode, tb, h, &dest_bi, &src_bi, &d_key_position, &cf); 537 &d_key_position, &cf);
537 538
538 nr = B_NR_ITEMS (src_bi.bi_bh); 539 nr = B_NR_ITEMS(src_bi.bi_bh);
539 540
540 if (pointer_amount > 0) { 541 if (pointer_amount > 0) {
541 /* insert delimiting key from common father of dest and src to dest node into position 0 */ 542 /* insert delimiting key from common father of dest and src to dest node into position 0 */
542 internal_insert_key (&dest_bi, 0, cf, d_key_position); 543 internal_insert_key(&dest_bi, 0, cf, d_key_position);
543 if (nr == pointer_amount - 1) { 544 if (nr == pointer_amount - 1) {
544 RFALSE( src_bi.bi_bh != PATH_H_PBUFFER (tb->tb_path, h)/*tb->S[h]*/ || 545 RFALSE(src_bi.bi_bh != PATH_H_PBUFFER(tb->tb_path, h) /*tb->S[h] */ ||
545 dest_bi.bi_bh != tb->R[h], 546 dest_bi.bi_bh != tb->R[h],
546 "src (%p) must be == tb->S[h](%p) when it disappears", 547 "src (%p) must be == tb->S[h](%p) when it disappears",
547 src_bi.bi_bh, PATH_H_PBUFFER (tb->tb_path, h)); 548 src_bi.bi_bh, PATH_H_PBUFFER(tb->tb_path, h));
548 /* when S[h] disappers replace left delemiting key as well */ 549 /* when S[h] disappers replace left delemiting key as well */
549 if (tb->CFL[h]) 550 if (tb->CFL[h])
550 replace_key (tb, cf, d_key_position, tb->CFL[h], tb->lkey[h]); 551 replace_key(tb, cf, d_key_position, tb->CFL[h],
551 } else 552 tb->lkey[h]);
552 replace_key (tb, cf, d_key_position, src_bi.bi_bh, nr - pointer_amount); 553 } else
553 } 554 replace_key(tb, cf, d_key_position, src_bi.bi_bh,
554 555 nr - pointer_amount);
555 /* last parameter is del_parameter */ 556 }
556 internal_move_pointers_items (&dest_bi, &src_bi, LAST_TO_FIRST, pointer_amount, 0); 557
558 /* last parameter is del_parameter */
559 internal_move_pointers_items(&dest_bi, &src_bi, LAST_TO_FIRST,
560 pointer_amount, 0);
557} 561}
558 562
559/* Insert delimiting key to R[h]. 563/* Insert delimiting key to R[h].
@@ -561,498 +565,526 @@ static void internal_shift_right (
561 * Delete n - 1 items and node pointers from buffer S[h]. 565 * Delete n - 1 items and node pointers from buffer S[h].
562 */ 566 */
563/* it always shift from S[h] to R[h] */ 567/* it always shift from S[h] to R[h] */
564static void internal_shift1_right ( 568static void internal_shift1_right(struct tree_balance *tb,
565 struct tree_balance * tb, 569 int h, int pointer_amount)
566 int h,
567 int pointer_amount
568 )
569{ 570{
570 struct buffer_info dest_bi, src_bi; 571 struct buffer_info dest_bi, src_bi;
571 struct buffer_head * cf; 572 struct buffer_head *cf;
572 int d_key_position; 573 int d_key_position;
573 574
574 internal_define_dest_src_infos (INTERNAL_SHIFT_FROM_S_TO_R, tb, h, &dest_bi, &src_bi, &d_key_position, &cf); 575 internal_define_dest_src_infos(INTERNAL_SHIFT_FROM_S_TO_R, tb, h,
575 576 &dest_bi, &src_bi, &d_key_position, &cf);
576 if (pointer_amount > 0) /* insert rkey from CFR[h] to right neighbor R[h] */ 577
577 internal_insert_key (&dest_bi, 0, cf, d_key_position); 578 if (pointer_amount > 0) /* insert rkey from CFR[h] to right neighbor R[h] */
578 /* internal_insert_key (tb->R[h], 0, tb->CFR[h], tb->rkey[h]);*/ 579 internal_insert_key(&dest_bi, 0, cf, d_key_position);
579 580 /* internal_insert_key (tb->R[h], 0, tb->CFR[h], tb->rkey[h]); */
580 /* last parameter is del_parameter */
581 internal_move_pointers_items (&dest_bi, &src_bi, LAST_TO_FIRST, pointer_amount, 1);
582 /* internal_move_pointers_items (tb->R[h], tb->S[h], LAST_TO_FIRST, pointer_amount, 1);*/
583}
584 581
582 /* last parameter is del_parameter */
583 internal_move_pointers_items(&dest_bi, &src_bi, LAST_TO_FIRST,
584 pointer_amount, 1);
585 /* internal_move_pointers_items (tb->R[h], tb->S[h], LAST_TO_FIRST, pointer_amount, 1); */
586}
585 587
586/* Delete insert_num node pointers together with their left items 588/* Delete insert_num node pointers together with their left items
587 * and balance current node.*/ 589 * and balance current node.*/
588static void balance_internal_when_delete (struct tree_balance * tb, 590static void balance_internal_when_delete(struct tree_balance *tb,
589 int h, int child_pos) 591 int h, int child_pos)
590{ 592{
591 int insert_num; 593 int insert_num;
592 int n; 594 int n;
593 struct buffer_head * tbSh = PATH_H_PBUFFER (tb->tb_path, h); 595 struct buffer_head *tbSh = PATH_H_PBUFFER(tb->tb_path, h);
594 struct buffer_info bi; 596 struct buffer_info bi;
595 597
596 insert_num = tb->insert_size[h] / ((int)(DC_SIZE + KEY_SIZE)); 598 insert_num = tb->insert_size[h] / ((int)(DC_SIZE + KEY_SIZE));
597 599
598 /* delete child-node-pointer(s) together with their left item(s) */ 600 /* delete child-node-pointer(s) together with their left item(s) */
599 bi.tb = tb; 601 bi.tb = tb;
600 bi.bi_bh = tbSh; 602 bi.bi_bh = tbSh;
601 bi.bi_parent = PATH_H_PPARENT (tb->tb_path, h); 603 bi.bi_parent = PATH_H_PPARENT(tb->tb_path, h);
602 bi.bi_position = PATH_H_POSITION (tb->tb_path, h + 1); 604 bi.bi_position = PATH_H_POSITION(tb->tb_path, h + 1);
603 605
604 internal_delete_childs (&bi, child_pos, -insert_num); 606 internal_delete_childs(&bi, child_pos, -insert_num);
605 607
606 RFALSE( tb->blknum[h] > 1, 608 RFALSE(tb->blknum[h] > 1,
607 "tb->blknum[%d]=%d when insert_size < 0", h, tb->blknum[h]); 609 "tb->blknum[%d]=%d when insert_size < 0", h, tb->blknum[h]);
608 610
609 n = B_NR_ITEMS(tbSh); 611 n = B_NR_ITEMS(tbSh);
610 612
611 if ( tb->lnum[h] == 0 && tb->rnum[h] == 0 ) { 613 if (tb->lnum[h] == 0 && tb->rnum[h] == 0) {
612 if ( tb->blknum[h] == 0 ) { 614 if (tb->blknum[h] == 0) {
613 /* node S[h] (root of the tree) is empty now */ 615 /* node S[h] (root of the tree) is empty now */
614 struct buffer_head *new_root; 616 struct buffer_head *new_root;
615 617
616 RFALSE( n || B_FREE_SPACE (tbSh) != MAX_CHILD_SIZE(tbSh) - DC_SIZE, 618 RFALSE(n
617 "buffer must have only 0 keys (%d)", n); 619 || B_FREE_SPACE(tbSh) !=
618 RFALSE( bi.bi_parent, "root has parent (%p)", bi.bi_parent); 620 MAX_CHILD_SIZE(tbSh) - DC_SIZE,
619 621 "buffer must have only 0 keys (%d)", n);
620 /* choose a new root */ 622 RFALSE(bi.bi_parent, "root has parent (%p)",
621 if ( ! tb->L[h-1] || ! B_NR_ITEMS(tb->L[h-1]) ) 623 bi.bi_parent);
622 new_root = tb->R[h-1]; 624
623 else 625 /* choose a new root */
624 new_root = tb->L[h-1]; 626 if (!tb->L[h - 1] || !B_NR_ITEMS(tb->L[h - 1]))
625 /* switch super block's tree root block number to the new value */ 627 new_root = tb->R[h - 1];
626 PUT_SB_ROOT_BLOCK( tb->tb_sb, new_root->b_blocknr ); 628 else
627 //REISERFS_SB(tb->tb_sb)->s_rs->s_tree_height --; 629 new_root = tb->L[h - 1];
628 PUT_SB_TREE_HEIGHT( tb->tb_sb, SB_TREE_HEIGHT(tb->tb_sb) - 1 ); 630 /* switch super block's tree root block number to the new value */
629 631 PUT_SB_ROOT_BLOCK(tb->tb_sb, new_root->b_blocknr);
630 do_balance_mark_sb_dirty (tb, REISERFS_SB(tb->tb_sb)->s_sbh, 1); 632 //REISERFS_SB(tb->tb_sb)->s_rs->s_tree_height --;
631 /*&&&&&&&&&&&&&&&&&&&&&&*/ 633 PUT_SB_TREE_HEIGHT(tb->tb_sb,
632 if (h > 1) 634 SB_TREE_HEIGHT(tb->tb_sb) - 1);
633 /* use check_internal if new root is an internal node */ 635
634 check_internal (new_root); 636 do_balance_mark_sb_dirty(tb,
635 /*&&&&&&&&&&&&&&&&&&&&&&*/ 637 REISERFS_SB(tb->tb_sb)->s_sbh,
636 638 1);
637 /* do what is needed for buffer thrown from tree */ 639 /*&&&&&&&&&&&&&&&&&&&&&& */
638 reiserfs_invalidate_buffer(tb, tbSh); 640 if (h > 1)
639 return; 641 /* use check_internal if new root is an internal node */
642 check_internal(new_root);
643 /*&&&&&&&&&&&&&&&&&&&&&& */
644
645 /* do what is needed for buffer thrown from tree */
646 reiserfs_invalidate_buffer(tb, tbSh);
647 return;
648 }
649 return;
650 }
651
652 if (tb->L[h] && tb->lnum[h] == -B_NR_ITEMS(tb->L[h]) - 1) { /* join S[h] with L[h] */
653
654 RFALSE(tb->rnum[h] != 0,
655 "invalid tb->rnum[%d]==%d when joining S[h] with L[h]",
656 h, tb->rnum[h]);
657
658 internal_shift_left(INTERNAL_SHIFT_FROM_S_TO_L, tb, h, n + 1);
659 reiserfs_invalidate_buffer(tb, tbSh);
660
661 return;
662 }
663
664 if (tb->R[h] && tb->rnum[h] == -B_NR_ITEMS(tb->R[h]) - 1) { /* join S[h] with R[h] */
665 RFALSE(tb->lnum[h] != 0,
666 "invalid tb->lnum[%d]==%d when joining S[h] with R[h]",
667 h, tb->lnum[h]);
668
669 internal_shift_right(INTERNAL_SHIFT_FROM_S_TO_R, tb, h, n + 1);
670
671 reiserfs_invalidate_buffer(tb, tbSh);
672 return;
640 } 673 }
641 return;
642 }
643
644 if ( tb->L[h] && tb->lnum[h] == -B_NR_ITEMS(tb->L[h]) - 1 ) { /* join S[h] with L[h] */
645
646 RFALSE( tb->rnum[h] != 0,
647 "invalid tb->rnum[%d]==%d when joining S[h] with L[h]",
648 h, tb->rnum[h]);
649
650 internal_shift_left (INTERNAL_SHIFT_FROM_S_TO_L, tb, h, n + 1);
651 reiserfs_invalidate_buffer(tb, tbSh);
652
653 return;
654 }
655
656 if ( tb->R[h] && tb->rnum[h] == -B_NR_ITEMS(tb->R[h]) - 1 ) { /* join S[h] with R[h] */
657 RFALSE( tb->lnum[h] != 0,
658 "invalid tb->lnum[%d]==%d when joining S[h] with R[h]",
659 h, tb->lnum[h]);
660
661 internal_shift_right (INTERNAL_SHIFT_FROM_S_TO_R, tb, h, n + 1);
662
663 reiserfs_invalidate_buffer(tb,tbSh);
664 return;
665 }
666
667 if ( tb->lnum[h] < 0 ) { /* borrow from left neighbor L[h] */
668 RFALSE( tb->rnum[h] != 0,
669 "wrong tb->rnum[%d]==%d when borrow from L[h]", h, tb->rnum[h]);
670 /*internal_shift_right (tb, h, tb->L[h], tb->CFL[h], tb->lkey[h], tb->S[h], -tb->lnum[h]);*/
671 internal_shift_right (INTERNAL_SHIFT_FROM_L_TO_S, tb, h, -tb->lnum[h]);
672 return;
673 }
674
675 if ( tb->rnum[h] < 0 ) { /* borrow from right neighbor R[h] */
676 RFALSE( tb->lnum[h] != 0,
677 "invalid tb->lnum[%d]==%d when borrow from R[h]",
678 h, tb->lnum[h]);
679 internal_shift_left (INTERNAL_SHIFT_FROM_R_TO_S, tb, h, -tb->rnum[h]);/*tb->S[h], tb->CFR[h], tb->rkey[h], tb->R[h], -tb->rnum[h]);*/
680 return;
681 }
682
683 if ( tb->lnum[h] > 0 ) { /* split S[h] into two parts and put them into neighbors */
684 RFALSE( tb->rnum[h] == 0 || tb->lnum[h] + tb->rnum[h] != n + 1,
685 "invalid tb->lnum[%d]==%d or tb->rnum[%d]==%d when S[h](item number == %d) is split between them",
686 h, tb->lnum[h], h, tb->rnum[h], n);
687
688 internal_shift_left (INTERNAL_SHIFT_FROM_S_TO_L, tb, h, tb->lnum[h]);/*tb->L[h], tb->CFL[h], tb->lkey[h], tb->S[h], tb->lnum[h]);*/
689 internal_shift_right (INTERNAL_SHIFT_FROM_S_TO_R, tb, h, tb->rnum[h]);
690
691 reiserfs_invalidate_buffer (tb, tbSh);
692
693 return;
694 }
695 reiserfs_panic (tb->tb_sb, "balance_internal_when_delete: unexpected tb->lnum[%d]==%d or tb->rnum[%d]==%d",
696 h, tb->lnum[h], h, tb->rnum[h]);
697}
698 674
675 if (tb->lnum[h] < 0) { /* borrow from left neighbor L[h] */
676 RFALSE(tb->rnum[h] != 0,
677 "wrong tb->rnum[%d]==%d when borrow from L[h]", h,
678 tb->rnum[h]);
679 /*internal_shift_right (tb, h, tb->L[h], tb->CFL[h], tb->lkey[h], tb->S[h], -tb->lnum[h]); */
680 internal_shift_right(INTERNAL_SHIFT_FROM_L_TO_S, tb, h,
681 -tb->lnum[h]);
682 return;
683 }
684
685 if (tb->rnum[h] < 0) { /* borrow from right neighbor R[h] */
686 RFALSE(tb->lnum[h] != 0,
687 "invalid tb->lnum[%d]==%d when borrow from R[h]",
688 h, tb->lnum[h]);
689 internal_shift_left(INTERNAL_SHIFT_FROM_R_TO_S, tb, h, -tb->rnum[h]); /*tb->S[h], tb->CFR[h], tb->rkey[h], tb->R[h], -tb->rnum[h]); */
690 return;
691 }
692
693 if (tb->lnum[h] > 0) { /* split S[h] into two parts and put them into neighbors */
694 RFALSE(tb->rnum[h] == 0 || tb->lnum[h] + tb->rnum[h] != n + 1,
695 "invalid tb->lnum[%d]==%d or tb->rnum[%d]==%d when S[h](item number == %d) is split between them",
696 h, tb->lnum[h], h, tb->rnum[h], n);
697
698 internal_shift_left(INTERNAL_SHIFT_FROM_S_TO_L, tb, h, tb->lnum[h]); /*tb->L[h], tb->CFL[h], tb->lkey[h], tb->S[h], tb->lnum[h]); */
699 internal_shift_right(INTERNAL_SHIFT_FROM_S_TO_R, tb, h,
700 tb->rnum[h]);
701
702 reiserfs_invalidate_buffer(tb, tbSh);
703
704 return;
705 }
706 reiserfs_panic(tb->tb_sb,
707 "balance_internal_when_delete: unexpected tb->lnum[%d]==%d or tb->rnum[%d]==%d",
708 h, tb->lnum[h], h, tb->rnum[h]);
709}
699 710
700/* Replace delimiting key of buffers L[h] and S[h] by the given key.*/ 711/* Replace delimiting key of buffers L[h] and S[h] by the given key.*/
701static void replace_lkey ( 712static void replace_lkey(struct tree_balance *tb, int h, struct item_head *key)
702 struct tree_balance * tb,
703 int h,
704 struct item_head * key
705 )
706{ 713{
707 RFALSE( tb->L[h] == NULL || tb->CFL[h] == NULL, 714 RFALSE(tb->L[h] == NULL || tb->CFL[h] == NULL,
708 "L[h](%p) and CFL[h](%p) must exist in replace_lkey", 715 "L[h](%p) and CFL[h](%p) must exist in replace_lkey",
709 tb->L[h], tb->CFL[h]); 716 tb->L[h], tb->CFL[h]);
710 717
711 if (B_NR_ITEMS(PATH_H_PBUFFER(tb->tb_path, h)) == 0) 718 if (B_NR_ITEMS(PATH_H_PBUFFER(tb->tb_path, h)) == 0)
712 return; 719 return;
713 720
714 memcpy (B_N_PDELIM_KEY(tb->CFL[h],tb->lkey[h]), key, KEY_SIZE); 721 memcpy(B_N_PDELIM_KEY(tb->CFL[h], tb->lkey[h]), key, KEY_SIZE);
715 722
716 do_balance_mark_internal_dirty (tb, tb->CFL[h],0); 723 do_balance_mark_internal_dirty(tb, tb->CFL[h], 0);
717} 724}
718 725
719
720/* Replace delimiting key of buffers S[h] and R[h] by the given key.*/ 726/* Replace delimiting key of buffers S[h] and R[h] by the given key.*/
721static void replace_rkey ( 727static void replace_rkey(struct tree_balance *tb, int h, struct item_head *key)
722 struct tree_balance * tb,
723 int h,
724 struct item_head * key
725 )
726{ 728{
727 RFALSE( tb->R[h] == NULL || tb->CFR[h] == NULL, 729 RFALSE(tb->R[h] == NULL || tb->CFR[h] == NULL,
728 "R[h](%p) and CFR[h](%p) must exist in replace_rkey", 730 "R[h](%p) and CFR[h](%p) must exist in replace_rkey",
729 tb->R[h], tb->CFR[h]); 731 tb->R[h], tb->CFR[h]);
730 RFALSE( B_NR_ITEMS(tb->R[h]) == 0, 732 RFALSE(B_NR_ITEMS(tb->R[h]) == 0,
731 "R[h] can not be empty if it exists (item number=%d)", 733 "R[h] can not be empty if it exists (item number=%d)",
732 B_NR_ITEMS(tb->R[h])); 734 B_NR_ITEMS(tb->R[h]));
733 735
734 memcpy (B_N_PDELIM_KEY(tb->CFR[h],tb->rkey[h]), key, KEY_SIZE); 736 memcpy(B_N_PDELIM_KEY(tb->CFR[h], tb->rkey[h]), key, KEY_SIZE);
735 737
736 do_balance_mark_internal_dirty (tb, tb->CFR[h], 0); 738 do_balance_mark_internal_dirty(tb, tb->CFR[h], 0);
737} 739}
738 740
739 741int balance_internal(struct tree_balance *tb, /* tree_balance structure */
740int balance_internal (struct tree_balance * tb, /* tree_balance structure */ 742 int h, /* level of the tree */
741 int h, /* level of the tree */ 743 int child_pos, struct item_head *insert_key, /* key for insertion on higher level */
742 int child_pos, 744 struct buffer_head **insert_ptr /* node for insertion on higher level */
743 struct item_head * insert_key, /* key for insertion on higher level */
744 struct buffer_head ** insert_ptr /* node for insertion on higher level*/
745 ) 745 )
746 /* if inserting/pasting 746 /* if inserting/pasting
747 { 747 {
748 child_pos is the position of the node-pointer in S[h] that * 748 child_pos is the position of the node-pointer in S[h] that *
749 pointed to S[h-1] before balancing of the h-1 level; * 749 pointed to S[h-1] before balancing of the h-1 level; *
750 this means that new pointers and items must be inserted AFTER * 750 this means that new pointers and items must be inserted AFTER *
751 child_pos 751 child_pos
752 } 752 }
753 else 753 else
754 { 754 {
755 it is the position of the leftmost pointer that must be deleted (together with 755 it is the position of the leftmost pointer that must be deleted (together with
756 its corresponding key to the left of the pointer) 756 its corresponding key to the left of the pointer)
757 as a result of the previous level's balancing. 757 as a result of the previous level's balancing.
758 } 758 }
759*/ 759 */
760{ 760{
761 struct buffer_head * tbSh = PATH_H_PBUFFER (tb->tb_path, h); 761 struct buffer_head *tbSh = PATH_H_PBUFFER(tb->tb_path, h);
762 struct buffer_info bi; 762 struct buffer_info bi;
763 int order; /* we return this: it is 0 if there is no S[h], else it is tb->S[h]->b_item_order */ 763 int order; /* we return this: it is 0 if there is no S[h], else it is tb->S[h]->b_item_order */
764 int insert_num, n, k; 764 int insert_num, n, k;
765 struct buffer_head * S_new; 765 struct buffer_head *S_new;
766 struct item_head new_insert_key; 766 struct item_head new_insert_key;
767 struct buffer_head * new_insert_ptr = NULL; 767 struct buffer_head *new_insert_ptr = NULL;
768 struct item_head * new_insert_key_addr = insert_key; 768 struct item_head *new_insert_key_addr = insert_key;
769 769
770 RFALSE( h < 1, "h (%d) can not be < 1 on internal level", h); 770 RFALSE(h < 1, "h (%d) can not be < 1 on internal level", h);
771 771
772 PROC_INFO_INC( tb -> tb_sb, balance_at[ h ] ); 772 PROC_INFO_INC(tb->tb_sb, balance_at[h]);
773 773
774 order = ( tbSh ) ? PATH_H_POSITION (tb->tb_path, h + 1)/*tb->S[h]->b_item_order*/ : 0; 774 order =
775 775 (tbSh) ? PATH_H_POSITION(tb->tb_path,
776 /* Using insert_size[h] calculate the number insert_num of items 776 h + 1) /*tb->S[h]->b_item_order */ : 0;
777 that must be inserted to or deleted from S[h]. */ 777
778 insert_num = tb->insert_size[h]/((int)(KEY_SIZE + DC_SIZE)); 778 /* Using insert_size[h] calculate the number insert_num of items
779 779 that must be inserted to or deleted from S[h]. */
780 /* Check whether insert_num is proper **/ 780 insert_num = tb->insert_size[h] / ((int)(KEY_SIZE + DC_SIZE));
781 RFALSE( insert_num < -2 || insert_num > 2, 781
782 "incorrect number of items inserted to the internal node (%d)", 782 /* Check whether insert_num is proper * */
783 insert_num); 783 RFALSE(insert_num < -2 || insert_num > 2,
784 RFALSE( h > 1 && (insert_num > 1 || insert_num < -1), 784 "incorrect number of items inserted to the internal node (%d)",
785 "incorrect number of items (%d) inserted to the internal node on a level (h=%d) higher than last internal level", 785 insert_num);
786 insert_num, h); 786 RFALSE(h > 1 && (insert_num > 1 || insert_num < -1),
787 787 "incorrect number of items (%d) inserted to the internal node on a level (h=%d) higher than last internal level",
788 /* Make balance in case insert_num < 0 */ 788 insert_num, h);
789 if ( insert_num < 0 ) { 789
790 balance_internal_when_delete (tb, h, child_pos); 790 /* Make balance in case insert_num < 0 */
791 return order; 791 if (insert_num < 0) {
792 } 792 balance_internal_when_delete(tb, h, child_pos);
793 793 return order;
794 k = 0;
795 if ( tb->lnum[h] > 0 ) {
796 /* shift lnum[h] items from S[h] to the left neighbor L[h].
797 check how many of new items fall into L[h] or CFL[h] after
798 shifting */
799 n = B_NR_ITEMS (tb->L[h]); /* number of items in L[h] */
800 if ( tb->lnum[h] <= child_pos ) {
801 /* new items don't fall into L[h] or CFL[h] */
802 internal_shift_left (INTERNAL_SHIFT_FROM_S_TO_L, tb, h, tb->lnum[h]);
803 /*internal_shift_left (tb->L[h],tb->CFL[h],tb->lkey[h],tbSh,tb->lnum[h]);*/
804 child_pos -= tb->lnum[h];
805 } else if ( tb->lnum[h] > child_pos + insert_num ) {
806 /* all new items fall into L[h] */
807 internal_shift_left (INTERNAL_SHIFT_FROM_S_TO_L, tb, h, tb->lnum[h] - insert_num);
808 /* internal_shift_left(tb->L[h],tb->CFL[h],tb->lkey[h],tbSh,
809 tb->lnum[h]-insert_num);
810 */
811 /* insert insert_num keys and node-pointers into L[h] */
812 bi.tb = tb;
813 bi.bi_bh = tb->L[h];
814 bi.bi_parent = tb->FL[h];
815 bi.bi_position = get_left_neighbor_position (tb, h);
816 internal_insert_childs (&bi,/*tb->L[h], tb->S[h-1]->b_next*/ n + child_pos + 1,
817 insert_num,insert_key,insert_ptr);
818
819 insert_num = 0;
820 } else {
821 struct disk_child * dc;
822
823 /* some items fall into L[h] or CFL[h], but some don't fall */
824 internal_shift1_left(tb,h,child_pos+1);
825 /* calculate number of new items that fall into L[h] */
826 k = tb->lnum[h] - child_pos - 1;
827 bi.tb = tb;
828 bi.bi_bh = tb->L[h];
829 bi.bi_parent = tb->FL[h];
830 bi.bi_position = get_left_neighbor_position (tb, h);
831 internal_insert_childs (&bi,/*tb->L[h], tb->S[h-1]->b_next,*/ n + child_pos + 1,k,
832 insert_key,insert_ptr);
833
834 replace_lkey(tb,h,insert_key + k);
835
836 /* replace the first node-ptr in S[h] by node-ptr to insert_ptr[k] */
837 dc = B_N_CHILD(tbSh, 0);
838 put_dc_size( dc, MAX_CHILD_SIZE(insert_ptr[k]) - B_FREE_SPACE (insert_ptr[k]));
839 put_dc_block_number( dc, insert_ptr[k]->b_blocknr );
840
841 do_balance_mark_internal_dirty (tb, tbSh, 0);
842
843 k++;
844 insert_key += k;
845 insert_ptr += k;
846 insert_num -= k;
847 child_pos = 0;
848 } 794 }
849 } /* tb->lnum[h] > 0 */
850
851 if ( tb->rnum[h] > 0 ) {
852 /*shift rnum[h] items from S[h] to the right neighbor R[h]*/
853 /* check how many of new items fall into R or CFR after shifting */
854 n = B_NR_ITEMS (tbSh); /* number of items in S[h] */
855 if ( n - tb->rnum[h] >= child_pos )
856 /* new items fall into S[h] */
857 /*internal_shift_right(tb,h,tbSh,tb->CFR[h],tb->rkey[h],tb->R[h],tb->rnum[h]);*/
858 internal_shift_right (INTERNAL_SHIFT_FROM_S_TO_R, tb, h, tb->rnum[h]);
859 else
860 if ( n + insert_num - tb->rnum[h] < child_pos )
861 {
862 /* all new items fall into R[h] */
863 /*internal_shift_right(tb,h,tbSh,tb->CFR[h],tb->rkey[h],tb->R[h],
864 tb->rnum[h] - insert_num);*/
865 internal_shift_right (INTERNAL_SHIFT_FROM_S_TO_R, tb, h, tb->rnum[h] - insert_num);
866
867 /* insert insert_num keys and node-pointers into R[h] */
868 bi.tb = tb;
869 bi.bi_bh = tb->R[h];
870 bi.bi_parent = tb->FR[h];
871 bi.bi_position = get_right_neighbor_position (tb, h);
872 internal_insert_childs (&bi, /*tb->R[h],tb->S[h-1]->b_next*/ child_pos - n - insert_num + tb->rnum[h] - 1,
873 insert_num,insert_key,insert_ptr);
874 insert_num = 0;
875 }
876 else
877 {
878 struct disk_child * dc;
879
880 /* one of the items falls into CFR[h] */
881 internal_shift1_right(tb,h,n - child_pos + 1);
882 /* calculate number of new items that fall into R[h] */
883 k = tb->rnum[h] - n + child_pos - 1;
884 bi.tb = tb;
885 bi.bi_bh = tb->R[h];
886 bi.bi_parent = tb->FR[h];
887 bi.bi_position = get_right_neighbor_position (tb, h);
888 internal_insert_childs (&bi, /*tb->R[h], tb->R[h]->b_child,*/ 0, k, insert_key + 1, insert_ptr + 1);
889 795
890 replace_rkey(tb,h,insert_key + insert_num - k - 1); 796 k = 0;
797 if (tb->lnum[h] > 0) {
798 /* shift lnum[h] items from S[h] to the left neighbor L[h].
799 check how many of new items fall into L[h] or CFL[h] after
800 shifting */
801 n = B_NR_ITEMS(tb->L[h]); /* number of items in L[h] */
802 if (tb->lnum[h] <= child_pos) {
803 /* new items don't fall into L[h] or CFL[h] */
804 internal_shift_left(INTERNAL_SHIFT_FROM_S_TO_L, tb, h,
805 tb->lnum[h]);
806 /*internal_shift_left (tb->L[h],tb->CFL[h],tb->lkey[h],tbSh,tb->lnum[h]); */
807 child_pos -= tb->lnum[h];
808 } else if (tb->lnum[h] > child_pos + insert_num) {
809 /* all new items fall into L[h] */
810 internal_shift_left(INTERNAL_SHIFT_FROM_S_TO_L, tb, h,
811 tb->lnum[h] - insert_num);
812 /* internal_shift_left(tb->L[h],tb->CFL[h],tb->lkey[h],tbSh,
813 tb->lnum[h]-insert_num);
814 */
815 /* insert insert_num keys and node-pointers into L[h] */
816 bi.tb = tb;
817 bi.bi_bh = tb->L[h];
818 bi.bi_parent = tb->FL[h];
819 bi.bi_position = get_left_neighbor_position(tb, h);
820 internal_insert_childs(&bi,
821 /*tb->L[h], tb->S[h-1]->b_next */
822 n + child_pos + 1,
823 insert_num, insert_key,
824 insert_ptr);
825
826 insert_num = 0;
827 } else {
828 struct disk_child *dc;
829
830 /* some items fall into L[h] or CFL[h], but some don't fall */
831 internal_shift1_left(tb, h, child_pos + 1);
832 /* calculate number of new items that fall into L[h] */
833 k = tb->lnum[h] - child_pos - 1;
834 bi.tb = tb;
835 bi.bi_bh = tb->L[h];
836 bi.bi_parent = tb->FL[h];
837 bi.bi_position = get_left_neighbor_position(tb, h);
838 internal_insert_childs(&bi,
839 /*tb->L[h], tb->S[h-1]->b_next, */
840 n + child_pos + 1, k,
841 insert_key, insert_ptr);
842
843 replace_lkey(tb, h, insert_key + k);
844
845 /* replace the first node-ptr in S[h] by node-ptr to insert_ptr[k] */
846 dc = B_N_CHILD(tbSh, 0);
847 put_dc_size(dc,
848 MAX_CHILD_SIZE(insert_ptr[k]) -
849 B_FREE_SPACE(insert_ptr[k]));
850 put_dc_block_number(dc, insert_ptr[k]->b_blocknr);
851
852 do_balance_mark_internal_dirty(tb, tbSh, 0);
853
854 k++;
855 insert_key += k;
856 insert_ptr += k;
857 insert_num -= k;
858 child_pos = 0;
859 }
860 }
861 /* tb->lnum[h] > 0 */
862 if (tb->rnum[h] > 0) {
863 /*shift rnum[h] items from S[h] to the right neighbor R[h] */
864 /* check how many of new items fall into R or CFR after shifting */
865 n = B_NR_ITEMS(tbSh); /* number of items in S[h] */
866 if (n - tb->rnum[h] >= child_pos)
867 /* new items fall into S[h] */
868 /*internal_shift_right(tb,h,tbSh,tb->CFR[h],tb->rkey[h],tb->R[h],tb->rnum[h]); */
869 internal_shift_right(INTERNAL_SHIFT_FROM_S_TO_R, tb, h,
870 tb->rnum[h]);
871 else if (n + insert_num - tb->rnum[h] < child_pos) {
872 /* all new items fall into R[h] */
873 /*internal_shift_right(tb,h,tbSh,tb->CFR[h],tb->rkey[h],tb->R[h],
874 tb->rnum[h] - insert_num); */
875 internal_shift_right(INTERNAL_SHIFT_FROM_S_TO_R, tb, h,
876 tb->rnum[h] - insert_num);
877
878 /* insert insert_num keys and node-pointers into R[h] */
879 bi.tb = tb;
880 bi.bi_bh = tb->R[h];
881 bi.bi_parent = tb->FR[h];
882 bi.bi_position = get_right_neighbor_position(tb, h);
883 internal_insert_childs(&bi,
884 /*tb->R[h],tb->S[h-1]->b_next */
885 child_pos - n - insert_num +
886 tb->rnum[h] - 1,
887 insert_num, insert_key,
888 insert_ptr);
889 insert_num = 0;
890 } else {
891 struct disk_child *dc;
892
893 /* one of the items falls into CFR[h] */
894 internal_shift1_right(tb, h, n - child_pos + 1);
895 /* calculate number of new items that fall into R[h] */
896 k = tb->rnum[h] - n + child_pos - 1;
897 bi.tb = tb;
898 bi.bi_bh = tb->R[h];
899 bi.bi_parent = tb->FR[h];
900 bi.bi_position = get_right_neighbor_position(tb, h);
901 internal_insert_childs(&bi,
902 /*tb->R[h], tb->R[h]->b_child, */
903 0, k, insert_key + 1,
904 insert_ptr + 1);
905
906 replace_rkey(tb, h, insert_key + insert_num - k - 1);
907
908 /* replace the first node-ptr in R[h] by node-ptr insert_ptr[insert_num-k-1] */
909 dc = B_N_CHILD(tb->R[h], 0);
910 put_dc_size(dc,
911 MAX_CHILD_SIZE(insert_ptr
912 [insert_num - k - 1]) -
913 B_FREE_SPACE(insert_ptr
914 [insert_num - k - 1]));
915 put_dc_block_number(dc,
916 insert_ptr[insert_num - k -
917 1]->b_blocknr);
918
919 do_balance_mark_internal_dirty(tb, tb->R[h], 0);
920
921 insert_num -= (k + 1);
922 }
923 }
891 924
892 /* replace the first node-ptr in R[h] by node-ptr insert_ptr[insert_num-k-1]*/ 925 /** Fill new node that appears instead of S[h] **/
893 dc = B_N_CHILD(tb->R[h], 0); 926 RFALSE(tb->blknum[h] > 2, "blknum can not be > 2 for internal level");
894 put_dc_size( dc, MAX_CHILD_SIZE(insert_ptr[insert_num-k-1]) - 927 RFALSE(tb->blknum[h] < 0, "blknum can not be < 0");
895 B_FREE_SPACE (insert_ptr[insert_num-k-1]));
896 put_dc_block_number( dc, insert_ptr[insert_num-k-1]->b_blocknr );
897 928
898 do_balance_mark_internal_dirty (tb, tb->R[h],0); 929 if (!tb->blknum[h]) { /* node S[h] is empty now */
930 RFALSE(!tbSh, "S[h] is equal NULL");
899 931
900 insert_num -= (k + 1); 932 /* do what is needed for buffer thrown from tree */
901 } 933 reiserfs_invalidate_buffer(tb, tbSh);
902 } 934 return order;
935 }
903 936
904 /** Fill new node that appears instead of S[h] **/ 937 if (!tbSh) {
905 RFALSE( tb->blknum[h] > 2, "blknum can not be > 2 for internal level"); 938 /* create new root */
906 RFALSE( tb->blknum[h] < 0, "blknum can not be < 0"); 939 struct disk_child *dc;
940 struct buffer_head *tbSh_1 = PATH_H_PBUFFER(tb->tb_path, h - 1);
941 struct block_head *blkh;
907 942
908 if ( ! tb->blknum[h] ) 943 if (tb->blknum[h] != 1)
909 { /* node S[h] is empty now */ 944 reiserfs_panic(NULL,
910 RFALSE( ! tbSh, "S[h] is equal NULL"); 945 "balance_internal: One new node required for creating the new root");
946 /* S[h] = empty buffer from the list FEB. */
947 tbSh = get_FEB(tb);
948 blkh = B_BLK_HEAD(tbSh);
949 set_blkh_level(blkh, h + 1);
911 950
912 /* do what is needed for buffer thrown from tree */ 951 /* Put the unique node-pointer to S[h] that points to S[h-1]. */
913 reiserfs_invalidate_buffer(tb,tbSh); 952
914 return order; 953 dc = B_N_CHILD(tbSh, 0);
915 } 954 put_dc_block_number(dc, tbSh_1->b_blocknr);
916 955 put_dc_size(dc,
917 if ( ! tbSh ) { 956 (MAX_CHILD_SIZE(tbSh_1) - B_FREE_SPACE(tbSh_1)));
918 /* create new root */ 957
919 struct disk_child * dc; 958 tb->insert_size[h] -= DC_SIZE;
920 struct buffer_head * tbSh_1 = PATH_H_PBUFFER (tb->tb_path, h - 1); 959 set_blkh_free_space(blkh, blkh_free_space(blkh) - DC_SIZE);
921 struct block_head * blkh;
922
923
924 if ( tb->blknum[h] != 1 )
925 reiserfs_panic(NULL, "balance_internal: One new node required for creating the new root");
926 /* S[h] = empty buffer from the list FEB. */
927 tbSh = get_FEB (tb);
928 blkh = B_BLK_HEAD(tbSh);
929 set_blkh_level( blkh, h + 1 );
930
931 /* Put the unique node-pointer to S[h] that points to S[h-1]. */
932
933 dc = B_N_CHILD(tbSh, 0);
934 put_dc_block_number( dc, tbSh_1->b_blocknr );
935 put_dc_size( dc, (MAX_CHILD_SIZE (tbSh_1) - B_FREE_SPACE (tbSh_1)));
936
937 tb->insert_size[h] -= DC_SIZE;
938 set_blkh_free_space( blkh, blkh_free_space(blkh) - DC_SIZE );
939
940 do_balance_mark_internal_dirty (tb, tbSh, 0);
941
942 /*&&&&&&&&&&&&&&&&&&&&&&&&*/
943 check_internal (tbSh);
944 /*&&&&&&&&&&&&&&&&&&&&&&&&*/
945
946 /* put new root into path structure */
947 PATH_OFFSET_PBUFFER(tb->tb_path, ILLEGAL_PATH_ELEMENT_OFFSET) = tbSh;
948
949 /* Change root in structure super block. */
950 PUT_SB_ROOT_BLOCK( tb->tb_sb, tbSh->b_blocknr );
951 PUT_SB_TREE_HEIGHT( tb->tb_sb, SB_TREE_HEIGHT(tb->tb_sb) + 1 );
952 do_balance_mark_sb_dirty (tb, REISERFS_SB(tb->tb_sb)->s_sbh, 1);
953 }
954
955 if ( tb->blknum[h] == 2 ) {
956 int snum;
957 struct buffer_info dest_bi, src_bi;
958 960
961 do_balance_mark_internal_dirty(tb, tbSh, 0);
959 962
960 /* S_new = free buffer from list FEB */ 963 /*&&&&&&&&&&&&&&&&&&&&&&&& */
961 S_new = get_FEB(tb); 964 check_internal(tbSh);
962 965 /*&&&&&&&&&&&&&&&&&&&&&&&& */
963 set_blkh_level( B_BLK_HEAD(S_new), h + 1 ); 966
964 967 /* put new root into path structure */
965 dest_bi.tb = tb; 968 PATH_OFFSET_PBUFFER(tb->tb_path, ILLEGAL_PATH_ELEMENT_OFFSET) =
966 dest_bi.bi_bh = S_new; 969 tbSh;
967 dest_bi.bi_parent = NULL; 970
968 dest_bi.bi_position = 0; 971 /* Change root in structure super block. */
969 src_bi.tb = tb; 972 PUT_SB_ROOT_BLOCK(tb->tb_sb, tbSh->b_blocknr);
970 src_bi.bi_bh = tbSh; 973 PUT_SB_TREE_HEIGHT(tb->tb_sb, SB_TREE_HEIGHT(tb->tb_sb) + 1);
971 src_bi.bi_parent = PATH_H_PPARENT (tb->tb_path, h); 974 do_balance_mark_sb_dirty(tb, REISERFS_SB(tb->tb_sb)->s_sbh, 1);
972 src_bi.bi_position = PATH_H_POSITION (tb->tb_path, h + 1);
973
974 n = B_NR_ITEMS (tbSh); /* number of items in S[h] */
975 snum = (insert_num + n + 1)/2;
976 if ( n - snum >= child_pos ) {
977 /* new items don't fall into S_new */
978 /* store the delimiting key for the next level */
979 /* new_insert_key = (n - snum)'th key in S[h] */
980 memcpy (&new_insert_key,B_N_PDELIM_KEY(tbSh,n - snum),
981 KEY_SIZE);
982 /* last parameter is del_par */
983 internal_move_pointers_items (&dest_bi, &src_bi, LAST_TO_FIRST, snum, 0);
984 /* internal_move_pointers_items(S_new, tbSh, LAST_TO_FIRST, snum, 0);*/
985 } else if ( n + insert_num - snum < child_pos ) {
986 /* all new items fall into S_new */
987 /* store the delimiting key for the next level */
988 /* new_insert_key = (n + insert_item - snum)'th key in S[h] */
989 memcpy(&new_insert_key,B_N_PDELIM_KEY(tbSh,n + insert_num - snum),
990 KEY_SIZE);
991 /* last parameter is del_par */
992 internal_move_pointers_items (&dest_bi, &src_bi, LAST_TO_FIRST, snum - insert_num, 0);
993 /* internal_move_pointers_items(S_new,tbSh,1,snum - insert_num,0);*/
994
995 /* insert insert_num keys and node-pointers into S_new */
996 internal_insert_childs (&dest_bi, /*S_new,tb->S[h-1]->b_next,*/child_pos - n - insert_num + snum - 1,
997 insert_num,insert_key,insert_ptr);
998
999 insert_num = 0;
1000 } else {
1001 struct disk_child * dc;
1002
1003 /* some items fall into S_new, but some don't fall */
1004 /* last parameter is del_par */
1005 internal_move_pointers_items (&dest_bi, &src_bi, LAST_TO_FIRST, n - child_pos + 1, 1);
1006 /* internal_move_pointers_items(S_new,tbSh,1,n - child_pos + 1,1);*/
1007 /* calculate number of new items that fall into S_new */
1008 k = snum - n + child_pos - 1;
1009
1010 internal_insert_childs (&dest_bi, /*S_new,*/ 0, k, insert_key + 1, insert_ptr+1);
1011
1012 /* new_insert_key = insert_key[insert_num - k - 1] */
1013 memcpy(&new_insert_key,insert_key + insert_num - k - 1,
1014 KEY_SIZE);
1015 /* replace first node-ptr in S_new by node-ptr to insert_ptr[insert_num-k-1] */
1016
1017 dc = B_N_CHILD(S_new,0);
1018 put_dc_size( dc, (MAX_CHILD_SIZE(insert_ptr[insert_num-k-1]) -
1019 B_FREE_SPACE(insert_ptr[insert_num-k-1])) );
1020 put_dc_block_number( dc, insert_ptr[insert_num-k-1]->b_blocknr );
1021
1022 do_balance_mark_internal_dirty (tb, S_new,0);
1023
1024 insert_num -= (k + 1);
1025 } 975 }
1026 /* new_insert_ptr = node_pointer to S_new */ 976
1027 new_insert_ptr = S_new; 977 if (tb->blknum[h] == 2) {
1028 978 int snum;
1029 RFALSE (!buffer_journaled(S_new) || buffer_journal_dirty(S_new) || 979 struct buffer_info dest_bi, src_bi;
1030 buffer_dirty (S_new), 980
1031 "cm-00001: bad S_new (%b)", S_new); 981 /* S_new = free buffer from list FEB */
1032 982 S_new = get_FEB(tb);
1033 // S_new is released in unfix_nodes 983
1034 } 984 set_blkh_level(B_BLK_HEAD(S_new), h + 1);
1035 985
1036 n = B_NR_ITEMS (tbSh); /*number of items in S[h] */ 986 dest_bi.tb = tb;
1037 987 dest_bi.bi_bh = S_new;
1038 if ( 0 <= child_pos && child_pos <= n && insert_num > 0 ) { 988 dest_bi.bi_parent = NULL;
1039 bi.tb = tb; 989 dest_bi.bi_position = 0;
1040 bi.bi_bh = tbSh; 990 src_bi.tb = tb;
1041 bi.bi_parent = PATH_H_PPARENT (tb->tb_path, h); 991 src_bi.bi_bh = tbSh;
1042 bi.bi_position = PATH_H_POSITION (tb->tb_path, h + 1); 992 src_bi.bi_parent = PATH_H_PPARENT(tb->tb_path, h);
1043 internal_insert_childs ( 993 src_bi.bi_position = PATH_H_POSITION(tb->tb_path, h + 1);
1044 &bi,/*tbSh,*/ 994
1045 /* ( tb->S[h-1]->b_parent == tb->S[h] ) ? tb->S[h-1]->b_next : tb->S[h]->b_child->b_next,*/ 995 n = B_NR_ITEMS(tbSh); /* number of items in S[h] */
1046 child_pos,insert_num,insert_key,insert_ptr 996 snum = (insert_num + n + 1) / 2;
1047 ); 997 if (n - snum >= child_pos) {
998 /* new items don't fall into S_new */
999 /* store the delimiting key for the next level */
1000 /* new_insert_key = (n - snum)'th key in S[h] */
1001 memcpy(&new_insert_key, B_N_PDELIM_KEY(tbSh, n - snum),
1002 KEY_SIZE);
1003 /* last parameter is del_par */
1004 internal_move_pointers_items(&dest_bi, &src_bi,
1005 LAST_TO_FIRST, snum, 0);
1006 /* internal_move_pointers_items(S_new, tbSh, LAST_TO_FIRST, snum, 0); */
1007 } else if (n + insert_num - snum < child_pos) {
1008 /* all new items fall into S_new */
1009 /* store the delimiting key for the next level */
1010 /* new_insert_key = (n + insert_item - snum)'th key in S[h] */
1011 memcpy(&new_insert_key,
1012 B_N_PDELIM_KEY(tbSh, n + insert_num - snum),
1013 KEY_SIZE);
1014 /* last parameter is del_par */
1015 internal_move_pointers_items(&dest_bi, &src_bi,
1016 LAST_TO_FIRST,
1017 snum - insert_num, 0);
1018 /* internal_move_pointers_items(S_new,tbSh,1,snum - insert_num,0); */
1019
1020 /* insert insert_num keys and node-pointers into S_new */
1021 internal_insert_childs(&dest_bi,
1022 /*S_new,tb->S[h-1]->b_next, */
1023 child_pos - n - insert_num +
1024 snum - 1,
1025 insert_num, insert_key,
1026 insert_ptr);
1027
1028 insert_num = 0;
1029 } else {
1030 struct disk_child *dc;
1031
1032 /* some items fall into S_new, but some don't fall */
1033 /* last parameter is del_par */
1034 internal_move_pointers_items(&dest_bi, &src_bi,
1035 LAST_TO_FIRST,
1036 n - child_pos + 1, 1);
1037 /* internal_move_pointers_items(S_new,tbSh,1,n - child_pos + 1,1); */
1038 /* calculate number of new items that fall into S_new */
1039 k = snum - n + child_pos - 1;
1040
1041 internal_insert_childs(&dest_bi, /*S_new, */ 0, k,
1042 insert_key + 1, insert_ptr + 1);
1043
1044 /* new_insert_key = insert_key[insert_num - k - 1] */
1045 memcpy(&new_insert_key, insert_key + insert_num - k - 1,
1046 KEY_SIZE);
1047 /* replace first node-ptr in S_new by node-ptr to insert_ptr[insert_num-k-1] */
1048
1049 dc = B_N_CHILD(S_new, 0);
1050 put_dc_size(dc,
1051 (MAX_CHILD_SIZE
1052 (insert_ptr[insert_num - k - 1]) -
1053 B_FREE_SPACE(insert_ptr
1054 [insert_num - k - 1])));
1055 put_dc_block_number(dc,
1056 insert_ptr[insert_num - k -
1057 1]->b_blocknr);
1058
1059 do_balance_mark_internal_dirty(tb, S_new, 0);
1060
1061 insert_num -= (k + 1);
1062 }
1063 /* new_insert_ptr = node_pointer to S_new */
1064 new_insert_ptr = S_new;
1065
1066 RFALSE(!buffer_journaled(S_new) || buffer_journal_dirty(S_new)
1067 || buffer_dirty(S_new), "cm-00001: bad S_new (%b)",
1068 S_new);
1069
1070 // S_new is released in unfix_nodes
1048 } 1071 }
1049 1072
1073 n = B_NR_ITEMS(tbSh); /*number of items in S[h] */
1050 1074
1051 memcpy (new_insert_key_addr,&new_insert_key,KEY_SIZE); 1075 if (0 <= child_pos && child_pos <= n && insert_num > 0) {
1076 bi.tb = tb;
1077 bi.bi_bh = tbSh;
1078 bi.bi_parent = PATH_H_PPARENT(tb->tb_path, h);
1079 bi.bi_position = PATH_H_POSITION(tb->tb_path, h + 1);
1080 internal_insert_childs(&bi, /*tbSh, */
1081 /* ( tb->S[h-1]->b_parent == tb->S[h] ) ? tb->S[h-1]->b_next : tb->S[h]->b_child->b_next, */
1082 child_pos, insert_num, insert_key,
1083 insert_ptr);
1084 }
1085
1086 memcpy(new_insert_key_addr, &new_insert_key, KEY_SIZE);
1052 insert_ptr[0] = new_insert_ptr; 1087 insert_ptr[0] = new_insert_ptr;
1053 1088
1054 return order; 1089 return order;
1055 } 1090}
1056
1057
1058
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index 289d864fe73..1aaf2c7d44e 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -18,107 +18,109 @@
18#include <linux/writeback.h> 18#include <linux/writeback.h>
19#include <linux/quotaops.h> 19#include <linux/quotaops.h>
20 20
21extern int reiserfs_default_io_size; /* default io size devuned in super.c */ 21extern int reiserfs_default_io_size; /* default io size devuned in super.c */
22 22
23static int reiserfs_commit_write(struct file *f, struct page *page, 23static int reiserfs_commit_write(struct file *f, struct page *page,
24 unsigned from, unsigned to); 24 unsigned from, unsigned to);
25static int reiserfs_prepare_write(struct file *f, struct page *page, 25static int reiserfs_prepare_write(struct file *f, struct page *page,
26 unsigned from, unsigned to); 26 unsigned from, unsigned to);
27 27
28void reiserfs_delete_inode (struct inode * inode) 28void reiserfs_delete_inode(struct inode *inode)
29{ 29{
30 /* We need blocks for transaction + (user+group) quota update (possibly delete) */ 30 /* We need blocks for transaction + (user+group) quota update (possibly delete) */
31 int jbegin_count = JOURNAL_PER_BALANCE_CNT * 2 + 2 * REISERFS_QUOTA_INIT_BLOCKS(inode->i_sb); 31 int jbegin_count =
32 struct reiserfs_transaction_handle th ; 32 JOURNAL_PER_BALANCE_CNT * 2 +
33 33 2 * REISERFS_QUOTA_INIT_BLOCKS(inode->i_sb);
34 reiserfs_write_lock(inode->i_sb); 34 struct reiserfs_transaction_handle th;
35 35
36 /* The = 0 happens when we abort creating a new inode for some reason like lack of space.. */ 36 reiserfs_write_lock(inode->i_sb);
37 if (!(inode->i_state & I_NEW) && INODE_PKEY(inode)->k_objectid != 0) { /* also handles bad_inode case */
38 down (&inode->i_sem);
39 37
40 reiserfs_delete_xattrs (inode); 38 /* The = 0 happens when we abort creating a new inode for some reason like lack of space.. */
39 if (!(inode->i_state & I_NEW) && INODE_PKEY(inode)->k_objectid != 0) { /* also handles bad_inode case */
40 down(&inode->i_sem);
41 41
42 if (journal_begin(&th, inode->i_sb, jbegin_count)) { 42 reiserfs_delete_xattrs(inode);
43 up (&inode->i_sem);
44 goto out;
45 }
46 reiserfs_update_inode_transaction(inode) ;
47 43
48 if (reiserfs_delete_object (&th, inode)) { 44 if (journal_begin(&th, inode->i_sb, jbegin_count)) {
49 up (&inode->i_sem); 45 up(&inode->i_sem);
50 goto out; 46 goto out;
51 } 47 }
48 reiserfs_update_inode_transaction(inode);
52 49
53 /* Do quota update inside a transaction for journaled quotas. We must do that 50 if (reiserfs_delete_object(&th, inode)) {
54 * after delete_object so that quota updates go into the same transaction as 51 up(&inode->i_sem);
55 * stat data deletion */ 52 goto out;
56 DQUOT_FREE_INODE(inode); 53 }
57 54
58 if (journal_end(&th, inode->i_sb, jbegin_count)) { 55 /* Do quota update inside a transaction for journaled quotas. We must do that
59 up (&inode->i_sem); 56 * after delete_object so that quota updates go into the same transaction as
60 goto out; 57 * stat data deletion */
61 } 58 DQUOT_FREE_INODE(inode);
59
60 if (journal_end(&th, inode->i_sb, jbegin_count)) {
61 up(&inode->i_sem);
62 goto out;
63 }
62 64
63 up (&inode->i_sem); 65 up(&inode->i_sem);
64 66
65 /* all items of file are deleted, so we can remove "save" link */ 67 /* all items of file are deleted, so we can remove "save" link */
66 remove_save_link (inode, 0/* not truncate */); /* we can't do anything 68 remove_save_link(inode, 0 /* not truncate */ ); /* we can't do anything
67 * about an error here */ 69 * about an error here */
68 } else { 70 } else {
69 /* no object items are in the tree */ 71 /* no object items are in the tree */
70 ; 72 ;
71 } 73 }
72out: 74 out:
73 clear_inode (inode); /* note this must go after the journal_end to prevent deadlock */ 75 clear_inode(inode); /* note this must go after the journal_end to prevent deadlock */
74 inode->i_blocks = 0; 76 inode->i_blocks = 0;
75 reiserfs_write_unlock(inode->i_sb); 77 reiserfs_write_unlock(inode->i_sb);
76} 78}
77 79
78static void _make_cpu_key (struct cpu_key * key, int version, __u32 dirid, __u32 objectid, 80static void _make_cpu_key(struct cpu_key *key, int version, __u32 dirid,
79 loff_t offset, int type, int length ) 81 __u32 objectid, loff_t offset, int type, int length)
80{ 82{
81 key->version = version; 83 key->version = version;
82 84
83 key->on_disk_key.k_dir_id = dirid; 85 key->on_disk_key.k_dir_id = dirid;
84 key->on_disk_key.k_objectid = objectid; 86 key->on_disk_key.k_objectid = objectid;
85 set_cpu_key_k_offset (key, offset); 87 set_cpu_key_k_offset(key, offset);
86 set_cpu_key_k_type (key, type); 88 set_cpu_key_k_type(key, type);
87 key->key_length = length; 89 key->key_length = length;
88} 90}
89 91
90
91/* take base of inode_key (it comes from inode always) (dirid, objectid) and version from an inode, set 92/* take base of inode_key (it comes from inode always) (dirid, objectid) and version from an inode, set
92 offset and type of key */ 93 offset and type of key */
93void make_cpu_key (struct cpu_key * key, struct inode * inode, loff_t offset, 94void make_cpu_key(struct cpu_key *key, struct inode *inode, loff_t offset,
94 int type, int length ) 95 int type, int length)
95{ 96{
96 _make_cpu_key (key, get_inode_item_key_version (inode), le32_to_cpu (INODE_PKEY (inode)->k_dir_id), 97 _make_cpu_key(key, get_inode_item_key_version(inode),
97 le32_to_cpu (INODE_PKEY (inode)->k_objectid), 98 le32_to_cpu(INODE_PKEY(inode)->k_dir_id),
98 offset, type, length); 99 le32_to_cpu(INODE_PKEY(inode)->k_objectid), offset, type,
100 length);
99} 101}
100 102
101
102// 103//
103// when key is 0, do not set version and short key 104// when key is 0, do not set version and short key
104// 105//
105inline void make_le_item_head (struct item_head * ih, const struct cpu_key * key, 106inline void make_le_item_head(struct item_head *ih, const struct cpu_key *key,
106 int version, 107 int version,
107 loff_t offset, int type, int length, 108 loff_t offset, int type, int length,
108 int entry_count/*or ih_free_space*/) 109 int entry_count /*or ih_free_space */ )
109{ 110{
110 if (key) { 111 if (key) {
111 ih->ih_key.k_dir_id = cpu_to_le32 (key->on_disk_key.k_dir_id); 112 ih->ih_key.k_dir_id = cpu_to_le32(key->on_disk_key.k_dir_id);
112 ih->ih_key.k_objectid = cpu_to_le32 (key->on_disk_key.k_objectid); 113 ih->ih_key.k_objectid =
113 } 114 cpu_to_le32(key->on_disk_key.k_objectid);
114 put_ih_version( ih, version ); 115 }
115 set_le_ih_k_offset (ih, offset); 116 put_ih_version(ih, version);
116 set_le_ih_k_type (ih, type); 117 set_le_ih_k_offset(ih, offset);
117 put_ih_item_len( ih, length ); 118 set_le_ih_k_type(ih, type);
118 /* set_ih_free_space (ih, 0);*/ 119 put_ih_item_len(ih, length);
119 // for directory items it is entry count, for directs and stat 120 /* set_ih_free_space (ih, 0); */
120 // datas - 0xffff, for indirects - 0 121 // for directory items it is entry count, for directs and stat
121 put_ih_entry_count( ih, entry_count ); 122 // datas - 0xffff, for indirects - 0
123 put_ih_entry_count(ih, entry_count);
122} 124}
123 125
124// 126//
@@ -153,84 +155,84 @@ inline void make_le_item_head (struct item_head * ih, const struct cpu_key * key
153** to be unmapped, so that block_prepare_write will correctly call 155** to be unmapped, so that block_prepare_write will correctly call
154** reiserfs_get_block to convert the tail into an unformatted node 156** reiserfs_get_block to convert the tail into an unformatted node
155*/ 157*/
156static inline void fix_tail_page_for_writing(struct page *page) { 158static inline void fix_tail_page_for_writing(struct page *page)
157 struct buffer_head *head, *next, *bh ; 159{
158 160 struct buffer_head *head, *next, *bh;
159 if (page && page_has_buffers(page)) { 161
160 head = page_buffers(page) ; 162 if (page && page_has_buffers(page)) {
161 bh = head ; 163 head = page_buffers(page);
162 do { 164 bh = head;
163 next = bh->b_this_page ; 165 do {
164 if (buffer_mapped(bh) && bh->b_blocknr == 0) { 166 next = bh->b_this_page;
165 reiserfs_unmap_buffer(bh) ; 167 if (buffer_mapped(bh) && bh->b_blocknr == 0) {
166 } 168 reiserfs_unmap_buffer(bh);
167 bh = next ; 169 }
168 } while (bh != head) ; 170 bh = next;
169 } 171 } while (bh != head);
172 }
170} 173}
171 174
172/* reiserfs_get_block does not need to allocate a block only if it has been 175/* reiserfs_get_block does not need to allocate a block only if it has been
173 done already or non-hole position has been found in the indirect item */ 176 done already or non-hole position has been found in the indirect item */
174static inline int allocation_needed (int retval, b_blocknr_t allocated, 177static inline int allocation_needed(int retval, b_blocknr_t allocated,
175 struct item_head * ih, 178 struct item_head *ih,
176 __le32 * item, int pos_in_item) 179 __le32 * item, int pos_in_item)
177{ 180{
178 if (allocated) 181 if (allocated)
179 return 0; 182 return 0;
180 if (retval == POSITION_FOUND && is_indirect_le_ih (ih) && 183 if (retval == POSITION_FOUND && is_indirect_le_ih(ih) &&
181 get_block_num(item, pos_in_item)) 184 get_block_num(item, pos_in_item))
182 return 0; 185 return 0;
183 return 1; 186 return 1;
184} 187}
185 188
186static inline int indirect_item_found (int retval, struct item_head * ih) 189static inline int indirect_item_found(int retval, struct item_head *ih)
187{ 190{
188 return (retval == POSITION_FOUND) && is_indirect_le_ih (ih); 191 return (retval == POSITION_FOUND) && is_indirect_le_ih(ih);
189} 192}
190 193
191 194static inline void set_block_dev_mapped(struct buffer_head *bh,
192static inline void set_block_dev_mapped (struct buffer_head * bh, 195 b_blocknr_t block, struct inode *inode)
193 b_blocknr_t block, struct inode * inode)
194{ 196{
195 map_bh(bh, inode->i_sb, block); 197 map_bh(bh, inode->i_sb, block);
196} 198}
197 199
198
199// 200//
200// files which were created in the earlier version can not be longer, 201// files which were created in the earlier version can not be longer,
201// than 2 gb 202// than 2 gb
202// 203//
203static int file_capable (struct inode * inode, long block) 204static int file_capable(struct inode *inode, long block)
204{ 205{
205 if (get_inode_item_key_version (inode) != KEY_FORMAT_3_5 || // it is new file. 206 if (get_inode_item_key_version(inode) != KEY_FORMAT_3_5 || // it is new file.
206 block < (1 << (31 - inode->i_sb->s_blocksize_bits))) // old file, but 'block' is inside of 2gb 207 block < (1 << (31 - inode->i_sb->s_blocksize_bits))) // old file, but 'block' is inside of 2gb
207 return 1; 208 return 1;
208 209
209 return 0; 210 return 0;
210} 211}
211 212
212/*static*/ int restart_transaction(struct reiserfs_transaction_handle *th, 213/*static*/ int restart_transaction(struct reiserfs_transaction_handle *th,
213 struct inode *inode, struct path *path) { 214 struct inode *inode, struct path *path)
214 struct super_block *s = th->t_super ; 215{
215 int len = th->t_blocks_allocated ; 216 struct super_block *s = th->t_super;
216 int err; 217 int len = th->t_blocks_allocated;
217 218 int err;
218 BUG_ON (!th->t_trans_id); 219
219 BUG_ON (!th->t_refcount); 220 BUG_ON(!th->t_trans_id);
220 221 BUG_ON(!th->t_refcount);
221 /* we cannot restart while nested */ 222
222 if (th->t_refcount > 1) { 223 /* we cannot restart while nested */
223 return 0 ; 224 if (th->t_refcount > 1) {
224 } 225 return 0;
225 pathrelse(path) ; 226 }
226 reiserfs_update_sd(th, inode) ; 227 pathrelse(path);
227 err = journal_end(th, s, len) ; 228 reiserfs_update_sd(th, inode);
228 if (!err) { 229 err = journal_end(th, s, len);
229 err = journal_begin(th, s, JOURNAL_PER_BALANCE_CNT * 6) ; 230 if (!err) {
230 if (!err) 231 err = journal_begin(th, s, JOURNAL_PER_BALANCE_CNT * 6);
231 reiserfs_update_inode_transaction(inode) ; 232 if (!err)
232 } 233 reiserfs_update_inode_transaction(inode);
233 return err; 234 }
235 return err;
234} 236}
235 237
236// it is called by get_block when create == 0. Returns block number 238// it is called by get_block when create == 0. Returns block number
@@ -241,190 +243,192 @@ static int file_capable (struct inode * inode, long block)
241// Please improve the english/clarity in the comment above, as it is 243// Please improve the english/clarity in the comment above, as it is
242// hard to understand. 244// hard to understand.
243 245
244static int _get_block_create_0 (struct inode * inode, long block, 246static int _get_block_create_0(struct inode *inode, long block,
245 struct buffer_head * bh_result, 247 struct buffer_head *bh_result, int args)
246 int args)
247{ 248{
248 INITIALIZE_PATH (path); 249 INITIALIZE_PATH(path);
249 struct cpu_key key; 250 struct cpu_key key;
250 struct buffer_head * bh; 251 struct buffer_head *bh;
251 struct item_head * ih, tmp_ih; 252 struct item_head *ih, tmp_ih;
252 int fs_gen ; 253 int fs_gen;
253 int blocknr; 254 int blocknr;
254 char * p = NULL; 255 char *p = NULL;
255 int chars; 256 int chars;
256 int ret ; 257 int ret;
257 int result ; 258 int result;
258 int done = 0 ; 259 int done = 0;
259 unsigned long offset ; 260 unsigned long offset;
260 261
261 // prepare the key to look for the 'block'-th block of file 262 // prepare the key to look for the 'block'-th block of file
262 make_cpu_key (&key, inode, 263 make_cpu_key(&key, inode,
263 (loff_t)block * inode->i_sb->s_blocksize + 1, TYPE_ANY, 3); 264 (loff_t) block * inode->i_sb->s_blocksize + 1, TYPE_ANY,
264 265 3);
265research: 266
266 result = search_for_position_by_key (inode->i_sb, &key, &path) ; 267 research:
267 if (result != POSITION_FOUND) { 268 result = search_for_position_by_key(inode->i_sb, &key, &path);
268 pathrelse (&path); 269 if (result != POSITION_FOUND) {
269 if (p) 270 pathrelse(&path);
270 kunmap(bh_result->b_page) ; 271 if (p)
271 if (result == IO_ERROR) 272 kunmap(bh_result->b_page);
272 return -EIO; 273 if (result == IO_ERROR)
273 // We do not return -ENOENT if there is a hole but page is uptodate, because it means 274 return -EIO;
274 // That there is some MMAPED data associated with it that is yet to be written to disk. 275 // We do not return -ENOENT if there is a hole but page is uptodate, because it means
275 if ((args & GET_BLOCK_NO_HOLE) && !PageUptodate(bh_result->b_page) ) { 276 // That there is some MMAPED data associated with it that is yet to be written to disk.
276 return -ENOENT ; 277 if ((args & GET_BLOCK_NO_HOLE)
277 } 278 && !PageUptodate(bh_result->b_page)) {
278 return 0 ; 279 return -ENOENT;
279 } 280 }
280 281 return 0;
281 // 282 }
282 bh = get_last_bh (&path); 283 //
283 ih = get_ih (&path); 284 bh = get_last_bh(&path);
284 if (is_indirect_le_ih (ih)) { 285 ih = get_ih(&path);
285 __le32 * ind_item = (__le32 *)B_I_PITEM (bh, ih); 286 if (is_indirect_le_ih(ih)) {
286 287 __le32 *ind_item = (__le32 *) B_I_PITEM(bh, ih);
287 /* FIXME: here we could cache indirect item or part of it in 288
288 the inode to avoid search_by_key in case of subsequent 289 /* FIXME: here we could cache indirect item or part of it in
289 access to file */ 290 the inode to avoid search_by_key in case of subsequent
290 blocknr = get_block_num(ind_item, path.pos_in_item) ; 291 access to file */
291 ret = 0 ; 292 blocknr = get_block_num(ind_item, path.pos_in_item);
292 if (blocknr) { 293 ret = 0;
293 map_bh(bh_result, inode->i_sb, blocknr); 294 if (blocknr) {
294 if (path.pos_in_item == ((ih_item_len(ih) / UNFM_P_SIZE) - 1)) { 295 map_bh(bh_result, inode->i_sb, blocknr);
295 set_buffer_boundary(bh_result); 296 if (path.pos_in_item ==
296 } 297 ((ih_item_len(ih) / UNFM_P_SIZE) - 1)) {
297 } else 298 set_buffer_boundary(bh_result);
298 // We do not return -ENOENT if there is a hole but page is uptodate, because it means 299 }
299 // That there is some MMAPED data associated with it that is yet to be written to disk. 300 } else
300 if ((args & GET_BLOCK_NO_HOLE) && !PageUptodate(bh_result->b_page) ) { 301 // We do not return -ENOENT if there is a hole but page is uptodate, because it means
301 ret = -ENOENT ; 302 // That there is some MMAPED data associated with it that is yet to be written to disk.
302 } 303 if ((args & GET_BLOCK_NO_HOLE)
303 304 && !PageUptodate(bh_result->b_page)) {
304 pathrelse (&path); 305 ret = -ENOENT;
305 if (p) 306 }
306 kunmap(bh_result->b_page) ; 307
307 return ret ; 308 pathrelse(&path);
308 } 309 if (p)
309 310 kunmap(bh_result->b_page);
310 // requested data are in direct item(s) 311 return ret;
311 if (!(args & GET_BLOCK_READ_DIRECT)) { 312 }
312 // we are called by bmap. FIXME: we can not map block of file 313 // requested data are in direct item(s)
313 // when it is stored in direct item(s) 314 if (!(args & GET_BLOCK_READ_DIRECT)) {
314 pathrelse (&path); 315 // we are called by bmap. FIXME: we can not map block of file
315 if (p) 316 // when it is stored in direct item(s)
316 kunmap(bh_result->b_page) ; 317 pathrelse(&path);
317 return -ENOENT; 318 if (p)
318 } 319 kunmap(bh_result->b_page);
319 320 return -ENOENT;
320 /* if we've got a direct item, and the buffer or page was uptodate, 321 }
321 ** we don't want to pull data off disk again. skip to the 322
322 ** end, where we map the buffer and return 323 /* if we've got a direct item, and the buffer or page was uptodate,
323 */ 324 ** we don't want to pull data off disk again. skip to the
324 if (buffer_uptodate(bh_result)) { 325 ** end, where we map the buffer and return
325 goto finished ; 326 */
326 } else 327 if (buffer_uptodate(bh_result)) {
327 /* 328 goto finished;
328 ** grab_tail_page can trigger calls to reiserfs_get_block on up to date 329 } else
329 ** pages without any buffers. If the page is up to date, we don't want 330 /*
330 ** read old data off disk. Set the up to date bit on the buffer instead 331 ** grab_tail_page can trigger calls to reiserfs_get_block on up to date
331 ** and jump to the end 332 ** pages without any buffers. If the page is up to date, we don't want
332 */ 333 ** read old data off disk. Set the up to date bit on the buffer instead
333 if (!bh_result->b_page || PageUptodate(bh_result->b_page)) { 334 ** and jump to the end
335 */
336 if (!bh_result->b_page || PageUptodate(bh_result->b_page)) {
334 set_buffer_uptodate(bh_result); 337 set_buffer_uptodate(bh_result);
335 goto finished ; 338 goto finished;
336 } 339 }
337 340 // read file tail into part of page
338 // read file tail into part of page 341 offset = (cpu_key_k_offset(&key) - 1) & (PAGE_CACHE_SIZE - 1);
339 offset = (cpu_key_k_offset(&key) - 1) & (PAGE_CACHE_SIZE - 1) ; 342 fs_gen = get_generation(inode->i_sb);
340 fs_gen = get_generation(inode->i_sb) ; 343 copy_item_head(&tmp_ih, ih);
341 copy_item_head (&tmp_ih, ih); 344
342 345 /* we only want to kmap if we are reading the tail into the page.
343 /* we only want to kmap if we are reading the tail into the page. 346 ** this is not the common case, so we don't kmap until we are
344 ** this is not the common case, so we don't kmap until we are 347 ** sure we need to. But, this means the item might move if
345 ** sure we need to. But, this means the item might move if 348 ** kmap schedules
346 ** kmap schedules 349 */
347 */ 350 if (!p) {
348 if (!p) { 351 p = (char *)kmap(bh_result->b_page);
349 p = (char *)kmap(bh_result->b_page) ; 352 if (fs_changed(fs_gen, inode->i_sb)
350 if (fs_changed (fs_gen, inode->i_sb) && item_moved (&tmp_ih, &path)) { 353 && item_moved(&tmp_ih, &path)) {
351 goto research; 354 goto research;
352 } 355 }
353 } 356 }
354 p += offset ; 357 p += offset;
355 memset (p, 0, inode->i_sb->s_blocksize); 358 memset(p, 0, inode->i_sb->s_blocksize);
356 do { 359 do {
357 if (!is_direct_le_ih (ih)) { 360 if (!is_direct_le_ih(ih)) {
358 BUG (); 361 BUG();
359 } 362 }
360 /* make sure we don't read more bytes than actually exist in 363 /* make sure we don't read more bytes than actually exist in
361 ** the file. This can happen in odd cases where i_size isn't 364 ** the file. This can happen in odd cases where i_size isn't
362 ** correct, and when direct item padding results in a few 365 ** correct, and when direct item padding results in a few
363 ** extra bytes at the end of the direct item 366 ** extra bytes at the end of the direct item
364 */ 367 */
365 if ((le_ih_k_offset(ih) + path.pos_in_item) > inode->i_size) 368 if ((le_ih_k_offset(ih) + path.pos_in_item) > inode->i_size)
366 break ; 369 break;
367 if ((le_ih_k_offset(ih) - 1 + ih_item_len(ih)) > inode->i_size) { 370 if ((le_ih_k_offset(ih) - 1 + ih_item_len(ih)) > inode->i_size) {
368 chars = inode->i_size - (le_ih_k_offset(ih) - 1) - path.pos_in_item; 371 chars =
369 done = 1 ; 372 inode->i_size - (le_ih_k_offset(ih) - 1) -
370 } else { 373 path.pos_in_item;
371 chars = ih_item_len(ih) - path.pos_in_item; 374 done = 1;
372 } 375 } else {
373 memcpy (p, B_I_PITEM (bh, ih) + path.pos_in_item, chars); 376 chars = ih_item_len(ih) - path.pos_in_item;
374 377 }
375 if (done) 378 memcpy(p, B_I_PITEM(bh, ih) + path.pos_in_item, chars);
376 break ; 379
377 380 if (done)
378 p += chars; 381 break;
379 382
380 if (PATH_LAST_POSITION (&path) != (B_NR_ITEMS (bh) - 1)) 383 p += chars;
381 // we done, if read direct item is not the last item of 384
382 // node FIXME: we could try to check right delimiting key 385 if (PATH_LAST_POSITION(&path) != (B_NR_ITEMS(bh) - 1))
383 // to see whether direct item continues in the right 386 // we done, if read direct item is not the last item of
384 // neighbor or rely on i_size 387 // node FIXME: we could try to check right delimiting key
385 break; 388 // to see whether direct item continues in the right
386 389 // neighbor or rely on i_size
387 // update key to look for the next piece 390 break;
388 set_cpu_key_k_offset (&key, cpu_key_k_offset (&key) + chars); 391
389 result = search_for_position_by_key (inode->i_sb, &key, &path); 392 // update key to look for the next piece
390 if (result != POSITION_FOUND) 393 set_cpu_key_k_offset(&key, cpu_key_k_offset(&key) + chars);
391 // i/o error most likely 394 result = search_for_position_by_key(inode->i_sb, &key, &path);
392 break; 395 if (result != POSITION_FOUND)
393 bh = get_last_bh (&path); 396 // i/o error most likely
394 ih = get_ih (&path); 397 break;
395 } while (1); 398 bh = get_last_bh(&path);
396 399 ih = get_ih(&path);
397 flush_dcache_page(bh_result->b_page) ; 400 } while (1);
398 kunmap(bh_result->b_page) ; 401
399 402 flush_dcache_page(bh_result->b_page);
400finished: 403 kunmap(bh_result->b_page);
401 pathrelse (&path); 404
402 405 finished:
403 if (result == IO_ERROR) 406 pathrelse(&path);
404 return -EIO; 407
405 408 if (result == IO_ERROR)
406 /* this buffer has valid data, but isn't valid for io. mapping it to 409 return -EIO;
407 * block #0 tells the rest of reiserfs it just has a tail in it
408 */
409 map_bh(bh_result, inode->i_sb, 0);
410 set_buffer_uptodate (bh_result);
411 return 0;
412}
413 410
411 /* this buffer has valid data, but isn't valid for io. mapping it to
412 * block #0 tells the rest of reiserfs it just has a tail in it
413 */
414 map_bh(bh_result, inode->i_sb, 0);
415 set_buffer_uptodate(bh_result);
416 return 0;
417}
414 418
415// this is called to create file map. So, _get_block_create_0 will not 419// this is called to create file map. So, _get_block_create_0 will not
416// read direct item 420// read direct item
417static int reiserfs_bmap (struct inode * inode, sector_t block, 421static int reiserfs_bmap(struct inode *inode, sector_t block,
418 struct buffer_head * bh_result, int create) 422 struct buffer_head *bh_result, int create)
419{ 423{
420 if (!file_capable (inode, block)) 424 if (!file_capable(inode, block))
421 return -EFBIG; 425 return -EFBIG;
422 426
423 reiserfs_write_lock(inode->i_sb); 427 reiserfs_write_lock(inode->i_sb);
424 /* do not read the direct item */ 428 /* do not read the direct item */
425 _get_block_create_0 (inode, block, bh_result, 0) ; 429 _get_block_create_0(inode, block, bh_result, 0);
426 reiserfs_write_unlock(inode->i_sb); 430 reiserfs_write_unlock(inode->i_sb);
427 return 0; 431 return 0;
428} 432}
429 433
430/* special version of get_block that is only used by grab_tail_page right 434/* special version of get_block that is only used by grab_tail_page right
@@ -444,9 +448,11 @@ static int reiserfs_bmap (struct inode * inode, sector_t block,
444** don't want to send create == GET_BLOCK_NO_HOLE to reiserfs_get_block, 448** don't want to send create == GET_BLOCK_NO_HOLE to reiserfs_get_block,
445** don't use this function. 449** don't use this function.
446*/ 450*/
447static int reiserfs_get_block_create_0 (struct inode * inode, sector_t block, 451static int reiserfs_get_block_create_0(struct inode *inode, sector_t block,
448 struct buffer_head * bh_result, int create) { 452 struct buffer_head *bh_result,
449 return reiserfs_get_block(inode, block, bh_result, GET_BLOCK_NO_HOLE) ; 453 int create)
454{
455 return reiserfs_get_block(inode, block, bh_result, GET_BLOCK_NO_HOLE);
450} 456}
451 457
452/* This is special helper for reiserfs_get_block in case we are executing 458/* This is special helper for reiserfs_get_block in case we are executing
@@ -457,43 +463,42 @@ static int reiserfs_get_blocks_direct_io(struct inode *inode,
457 struct buffer_head *bh_result, 463 struct buffer_head *bh_result,
458 int create) 464 int create)
459{ 465{
460 int ret ; 466 int ret;
461 467
462 bh_result->b_page = NULL; 468 bh_result->b_page = NULL;
463
464 /* We set the b_size before reiserfs_get_block call since it is
465 referenced in convert_tail_for_hole() that may be called from
466 reiserfs_get_block() */
467 bh_result->b_size = (1 << inode->i_blkbits);
468
469 ret = reiserfs_get_block(inode, iblock, bh_result,
470 create | GET_BLOCK_NO_DANGLE) ;
471 if (ret)
472 goto out;
473
474 /* don't allow direct io onto tail pages */
475 if (buffer_mapped(bh_result) && bh_result->b_blocknr == 0) {
476 /* make sure future calls to the direct io funcs for this offset
477 ** in the file fail by unmapping the buffer
478 */
479 clear_buffer_mapped(bh_result);
480 ret = -EINVAL ;
481 }
482 /* Possible unpacked tail. Flush the data before pages have
483 disappeared */
484 if (REISERFS_I(inode)->i_flags & i_pack_on_close_mask) {
485 int err;
486 lock_kernel();
487 err = reiserfs_commit_for_inode(inode);
488 REISERFS_I(inode)->i_flags &= ~i_pack_on_close_mask;
489 unlock_kernel();
490 if (err < 0)
491 ret = err;
492 }
493out:
494 return ret ;
495}
496 469
470 /* We set the b_size before reiserfs_get_block call since it is
471 referenced in convert_tail_for_hole() that may be called from
472 reiserfs_get_block() */
473 bh_result->b_size = (1 << inode->i_blkbits);
474
475 ret = reiserfs_get_block(inode, iblock, bh_result,
476 create | GET_BLOCK_NO_DANGLE);
477 if (ret)
478 goto out;
479
480 /* don't allow direct io onto tail pages */
481 if (buffer_mapped(bh_result) && bh_result->b_blocknr == 0) {
482 /* make sure future calls to the direct io funcs for this offset
483 ** in the file fail by unmapping the buffer
484 */
485 clear_buffer_mapped(bh_result);
486 ret = -EINVAL;
487 }
488 /* Possible unpacked tail. Flush the data before pages have
489 disappeared */
490 if (REISERFS_I(inode)->i_flags & i_pack_on_close_mask) {
491 int err;
492 lock_kernel();
493 err = reiserfs_commit_for_inode(inode);
494 REISERFS_I(inode)->i_flags &= ~i_pack_on_close_mask;
495 unlock_kernel();
496 if (err < 0)
497 ret = err;
498 }
499 out:
500 return ret;
501}
497 502
498/* 503/*
499** helper function for when reiserfs_get_block is called for a hole 504** helper function for when reiserfs_get_block is called for a hole
@@ -505,490 +510,547 @@ out:
505** you should not be in a transaction, or have any paths held when you 510** you should not be in a transaction, or have any paths held when you
506** call this. 511** call this.
507*/ 512*/
508static int convert_tail_for_hole(struct inode *inode, 513static int convert_tail_for_hole(struct inode *inode,
509 struct buffer_head *bh_result, 514 struct buffer_head *bh_result,
510 loff_t tail_offset) { 515 loff_t tail_offset)
511 unsigned long index ; 516{
512 unsigned long tail_end ; 517 unsigned long index;
513 unsigned long tail_start ; 518 unsigned long tail_end;
514 struct page * tail_page ; 519 unsigned long tail_start;
515 struct page * hole_page = bh_result->b_page ; 520 struct page *tail_page;
516 int retval = 0 ; 521 struct page *hole_page = bh_result->b_page;
517 522 int retval = 0;
518 if ((tail_offset & (bh_result->b_size - 1)) != 1) 523
519 return -EIO ; 524 if ((tail_offset & (bh_result->b_size - 1)) != 1)
520 525 return -EIO;
521 /* always try to read until the end of the block */ 526
522 tail_start = tail_offset & (PAGE_CACHE_SIZE - 1) ; 527 /* always try to read until the end of the block */
523 tail_end = (tail_start | (bh_result->b_size - 1)) + 1 ; 528 tail_start = tail_offset & (PAGE_CACHE_SIZE - 1);
524 529 tail_end = (tail_start | (bh_result->b_size - 1)) + 1;
525 index = tail_offset >> PAGE_CACHE_SHIFT ; 530
526 /* hole_page can be zero in case of direct_io, we are sure 531 index = tail_offset >> PAGE_CACHE_SHIFT;
527 that we cannot get here if we write with O_DIRECT into 532 /* hole_page can be zero in case of direct_io, we are sure
528 tail page */ 533 that we cannot get here if we write with O_DIRECT into
529 if (!hole_page || index != hole_page->index) { 534 tail page */
530 tail_page = grab_cache_page(inode->i_mapping, index) ; 535 if (!hole_page || index != hole_page->index) {
531 retval = -ENOMEM; 536 tail_page = grab_cache_page(inode->i_mapping, index);
532 if (!tail_page) { 537 retval = -ENOMEM;
533 goto out ; 538 if (!tail_page) {
534 } 539 goto out;
535 } else { 540 }
536 tail_page = hole_page ; 541 } else {
537 } 542 tail_page = hole_page;
538 543 }
539 /* we don't have to make sure the conversion did not happen while 544
540 ** we were locking the page because anyone that could convert 545 /* we don't have to make sure the conversion did not happen while
541 ** must first take i_sem. 546 ** we were locking the page because anyone that could convert
542 ** 547 ** must first take i_sem.
543 ** We must fix the tail page for writing because it might have buffers 548 **
544 ** that are mapped, but have a block number of 0. This indicates tail 549 ** We must fix the tail page for writing because it might have buffers
545 ** data that has been read directly into the page, and block_prepare_write 550 ** that are mapped, but have a block number of 0. This indicates tail
546 ** won't trigger a get_block in this case. 551 ** data that has been read directly into the page, and block_prepare_write
547 */ 552 ** won't trigger a get_block in this case.
548 fix_tail_page_for_writing(tail_page) ; 553 */
549 retval = reiserfs_prepare_write(NULL, tail_page, tail_start, tail_end); 554 fix_tail_page_for_writing(tail_page);
550 if (retval) 555 retval = reiserfs_prepare_write(NULL, tail_page, tail_start, tail_end);
551 goto unlock ; 556 if (retval)
552 557 goto unlock;
553 /* tail conversion might change the data in the page */ 558
554 flush_dcache_page(tail_page) ; 559 /* tail conversion might change the data in the page */
555 560 flush_dcache_page(tail_page);
556 retval = reiserfs_commit_write(NULL, tail_page, tail_start, tail_end) ; 561
557 562 retval = reiserfs_commit_write(NULL, tail_page, tail_start, tail_end);
558unlock: 563
559 if (tail_page != hole_page) { 564 unlock:
560 unlock_page(tail_page) ; 565 if (tail_page != hole_page) {
561 page_cache_release(tail_page) ; 566 unlock_page(tail_page);
562 } 567 page_cache_release(tail_page);
563out: 568 }
564 return retval ; 569 out:
570 return retval;
565} 571}
566 572
567static inline int _allocate_block(struct reiserfs_transaction_handle *th, 573static inline int _allocate_block(struct reiserfs_transaction_handle *th,
568 long block, 574 long block,
569 struct inode *inode, 575 struct inode *inode,
570 b_blocknr_t *allocated_block_nr, 576 b_blocknr_t * allocated_block_nr,
571 struct path * path, 577 struct path *path, int flags)
572 int flags) { 578{
573 BUG_ON (!th->t_trans_id); 579 BUG_ON(!th->t_trans_id);
574 580
575#ifdef REISERFS_PREALLOCATE 581#ifdef REISERFS_PREALLOCATE
576 if (!(flags & GET_BLOCK_NO_ISEM)) { 582 if (!(flags & GET_BLOCK_NO_ISEM)) {
577 return reiserfs_new_unf_blocknrs2(th, inode, allocated_block_nr, path, block); 583 return reiserfs_new_unf_blocknrs2(th, inode, allocated_block_nr,
578 } 584 path, block);
585 }
579#endif 586#endif
580 return reiserfs_new_unf_blocknrs (th, inode, allocated_block_nr, path, block); 587 return reiserfs_new_unf_blocknrs(th, inode, allocated_block_nr, path,
588 block);
581} 589}
582 590
583int reiserfs_get_block (struct inode * inode, sector_t block, 591int reiserfs_get_block(struct inode *inode, sector_t block,
584 struct buffer_head * bh_result, int create) 592 struct buffer_head *bh_result, int create)
585{ 593{
586 int repeat, retval = 0; 594 int repeat, retval = 0;
587 b_blocknr_t allocated_block_nr = 0;// b_blocknr_t is (unsigned) 32 bit int 595 b_blocknr_t allocated_block_nr = 0; // b_blocknr_t is (unsigned) 32 bit int
588 INITIALIZE_PATH(path); 596 INITIALIZE_PATH(path);
589 int pos_in_item; 597 int pos_in_item;
590 struct cpu_key key; 598 struct cpu_key key;
591 struct buffer_head * bh, * unbh = NULL; 599 struct buffer_head *bh, *unbh = NULL;
592 struct item_head * ih, tmp_ih; 600 struct item_head *ih, tmp_ih;
593 __le32 * item; 601 __le32 *item;
594 int done; 602 int done;
595 int fs_gen; 603 int fs_gen;
596 struct reiserfs_transaction_handle *th = NULL; 604 struct reiserfs_transaction_handle *th = NULL;
597 /* space reserved in transaction batch: 605 /* space reserved in transaction batch:
598 . 3 balancings in direct->indirect conversion 606 . 3 balancings in direct->indirect conversion
599 . 1 block involved into reiserfs_update_sd() 607 . 1 block involved into reiserfs_update_sd()
600 XXX in practically impossible worst case direct2indirect() 608 XXX in practically impossible worst case direct2indirect()
601 can incur (much) more than 3 balancings. 609 can incur (much) more than 3 balancings.
602 quota update for user, group */ 610 quota update for user, group */
603 int jbegin_count = JOURNAL_PER_BALANCE_CNT * 3 + 1 + 2 * REISERFS_QUOTA_TRANS_BLOCKS(inode->i_sb); 611 int jbegin_count =
604 int version; 612 JOURNAL_PER_BALANCE_CNT * 3 + 1 +
605 int dangle = 1; 613 2 * REISERFS_QUOTA_TRANS_BLOCKS(inode->i_sb);
606 loff_t new_offset = (((loff_t)block) << inode->i_sb->s_blocksize_bits) + 1 ; 614 int version;
607 615 int dangle = 1;
608 /* bad.... */ 616 loff_t new_offset =
609 reiserfs_write_lock(inode->i_sb); 617 (((loff_t) block) << inode->i_sb->s_blocksize_bits) + 1;
610 version = get_inode_item_key_version (inode); 618
611 619 /* bad.... */
612 if (block < 0) { 620 reiserfs_write_lock(inode->i_sb);
613 reiserfs_write_unlock(inode->i_sb); 621 version = get_inode_item_key_version(inode);
614 return -EIO;
615 }
616 622
617 if (!file_capable (inode, block)) { 623 if (block < 0) {
618 reiserfs_write_unlock(inode->i_sb); 624 reiserfs_write_unlock(inode->i_sb);
619 return -EFBIG; 625 return -EIO;
620 } 626 }
621
622 /* if !create, we aren't changing the FS, so we don't need to
623 ** log anything, so we don't need to start a transaction
624 */
625 if (!(create & GET_BLOCK_CREATE)) {
626 int ret ;
627 /* find number of block-th logical block of the file */
628 ret = _get_block_create_0 (inode, block, bh_result,
629 create | GET_BLOCK_READ_DIRECT) ;
630 reiserfs_write_unlock(inode->i_sb);
631 return ret;
632 }
633 /*
634 * if we're already in a transaction, make sure to close
635 * any new transactions we start in this func
636 */
637 if ((create & GET_BLOCK_NO_DANGLE) ||
638 reiserfs_transaction_running(inode->i_sb))
639 dangle = 0;
640
641 /* If file is of such a size, that it might have a tail and tails are enabled
642 ** we should mark it as possibly needing tail packing on close
643 */
644 if ( (have_large_tails (inode->i_sb) && inode->i_size < i_block_size (inode)*4) ||
645 (have_small_tails (inode->i_sb) && inode->i_size < i_block_size(inode)) )
646 REISERFS_I(inode)->i_flags |= i_pack_on_close_mask ;
647
648 /* set the key of the first byte in the 'block'-th block of file */
649 make_cpu_key (&key, inode, new_offset,
650 TYPE_ANY, 3/*key length*/);
651 if ((new_offset + inode->i_sb->s_blocksize - 1) > inode->i_size) {
652start_trans:
653 th = reiserfs_persistent_transaction(inode->i_sb, jbegin_count);
654 if (!th) {
655 retval = -ENOMEM;
656 goto failure;
657 }
658 reiserfs_update_inode_transaction(inode) ;
659 }
660 research:
661
662 retval = search_for_position_by_key (inode->i_sb, &key, &path);
663 if (retval == IO_ERROR) {
664 retval = -EIO;
665 goto failure;
666 }
667
668 bh = get_last_bh (&path);
669 ih = get_ih (&path);
670 item = get_item (&path);
671 pos_in_item = path.pos_in_item;
672
673 fs_gen = get_generation (inode->i_sb);
674 copy_item_head (&tmp_ih, ih);
675
676 if (allocation_needed (retval, allocated_block_nr, ih, item, pos_in_item)) {
677 /* we have to allocate block for the unformatted node */
678 if (!th) {
679 pathrelse(&path) ;
680 goto start_trans;
681 }
682
683 repeat = _allocate_block(th, block, inode, &allocated_block_nr, &path, create);
684
685 if (repeat == NO_DISK_SPACE || repeat == QUOTA_EXCEEDED) {
686 /* restart the transaction to give the journal a chance to free
687 ** some blocks. releases the path, so we have to go back to
688 ** research if we succeed on the second try
689 */
690 SB_JOURNAL(inode->i_sb)->j_next_async_flush = 1;
691 retval = restart_transaction(th, inode, &path) ;
692 if (retval)
693 goto failure;
694 repeat = _allocate_block(th, block, inode, &allocated_block_nr, NULL, create);
695
696 if (repeat != NO_DISK_SPACE && repeat != QUOTA_EXCEEDED) {
697 goto research ;
698 }
699 if (repeat == QUOTA_EXCEEDED)
700 retval = -EDQUOT;
701 else
702 retval = -ENOSPC;
703 goto failure;
704 }
705
706 if (fs_changed (fs_gen, inode->i_sb) && item_moved (&tmp_ih, &path)) {
707 goto research;
708 }
709 }
710
711 if (indirect_item_found (retval, ih)) {
712 b_blocknr_t unfm_ptr;
713 /* 'block'-th block is in the file already (there is
714 corresponding cell in some indirect item). But it may be
715 zero unformatted node pointer (hole) */
716 unfm_ptr = get_block_num (item, pos_in_item);
717 if (unfm_ptr == 0) {
718 /* use allocated block to plug the hole */
719 reiserfs_prepare_for_journal(inode->i_sb, bh, 1) ;
720 if (fs_changed (fs_gen, inode->i_sb) && item_moved (&tmp_ih, &path)) {
721 reiserfs_restore_prepared_buffer(inode->i_sb, bh) ;
722 goto research;
723 }
724 set_buffer_new(bh_result);
725 if (buffer_dirty(bh_result) && reiserfs_data_ordered(inode->i_sb))
726 reiserfs_add_ordered_list(inode, bh_result);
727 put_block_num(item, pos_in_item, allocated_block_nr) ;
728 unfm_ptr = allocated_block_nr;
729 journal_mark_dirty (th, inode->i_sb, bh);
730 reiserfs_update_sd(th, inode) ;
731 }
732 set_block_dev_mapped(bh_result, unfm_ptr, inode);
733 pathrelse (&path);
734 retval = 0;
735 if (!dangle && th)
736 retval = reiserfs_end_persistent_transaction(th);
737 627
738 reiserfs_write_unlock(inode->i_sb); 628 if (!file_capable(inode, block)) {
739 629 reiserfs_write_unlock(inode->i_sb);
740 /* the item was found, so new blocks were not added to the file 630 return -EFBIG;
741 ** there is no need to make sure the inode is updated with this 631 }
742 ** transaction 632
743 */ 633 /* if !create, we aren't changing the FS, so we don't need to
744 return retval; 634 ** log anything, so we don't need to start a transaction
745 } 635 */
746 636 if (!(create & GET_BLOCK_CREATE)) {
747 if (!th) { 637 int ret;
748 pathrelse(&path) ; 638 /* find number of block-th logical block of the file */
749 goto start_trans; 639 ret = _get_block_create_0(inode, block, bh_result,
750 } 640 create | GET_BLOCK_READ_DIRECT);
751 641 reiserfs_write_unlock(inode->i_sb);
752 /* desired position is not found or is in the direct item. We have 642 return ret;
753 to append file with holes up to 'block'-th block converting 643 }
754 direct items to indirect one if necessary */ 644 /*
755 done = 0; 645 * if we're already in a transaction, make sure to close
756 do { 646 * any new transactions we start in this func
757 if (is_statdata_le_ih (ih)) { 647 */
758 __le32 unp = 0; 648 if ((create & GET_BLOCK_NO_DANGLE) ||
759 struct cpu_key tmp_key; 649 reiserfs_transaction_running(inode->i_sb))
760 650 dangle = 0;
761 /* indirect item has to be inserted */ 651
762 make_le_item_head (&tmp_ih, &key, version, 1, TYPE_INDIRECT, 652 /* If file is of such a size, that it might have a tail and tails are enabled
763 UNFM_P_SIZE, 0/* free_space */); 653 ** we should mark it as possibly needing tail packing on close
764 654 */
765 if (cpu_key_k_offset (&key) == 1) { 655 if ((have_large_tails(inode->i_sb)
766 /* we are going to add 'block'-th block to the file. Use 656 && inode->i_size < i_block_size(inode) * 4)
767 allocated block for that */ 657 || (have_small_tails(inode->i_sb)
768 unp = cpu_to_le32 (allocated_block_nr); 658 && inode->i_size < i_block_size(inode)))
769 set_block_dev_mapped (bh_result, allocated_block_nr, inode); 659 REISERFS_I(inode)->i_flags |= i_pack_on_close_mask;
770 set_buffer_new(bh_result); 660
771 done = 1; 661 /* set the key of the first byte in the 'block'-th block of file */
772 } 662 make_cpu_key(&key, inode, new_offset, TYPE_ANY, 3 /*key length */ );
773 tmp_key = key; // ;) 663 if ((new_offset + inode->i_sb->s_blocksize - 1) > inode->i_size) {
774 set_cpu_key_k_offset (&tmp_key, 1); 664 start_trans:
775 PATH_LAST_POSITION(&path) ++; 665 th = reiserfs_persistent_transaction(inode->i_sb, jbegin_count);
776 666 if (!th) {
777 retval = reiserfs_insert_item (th, &path, &tmp_key, &tmp_ih, inode, (char *)&unp); 667 retval = -ENOMEM;
778 if (retval) {
779 reiserfs_free_block (th, inode, allocated_block_nr, 1);
780 goto failure; // retval == -ENOSPC, -EDQUOT or -EIO or -EEXIST
781 }
782 //mark_tail_converted (inode);
783 } else if (is_direct_le_ih (ih)) {
784 /* direct item has to be converted */
785 loff_t tail_offset;
786
787 tail_offset = ((le_ih_k_offset (ih) - 1) & ~(inode->i_sb->s_blocksize - 1)) + 1;
788 if (tail_offset == cpu_key_k_offset (&key)) {
789 /* direct item we just found fits into block we have
790 to map. Convert it into unformatted node: use
791 bh_result for the conversion */
792 set_block_dev_mapped (bh_result, allocated_block_nr, inode);
793 unbh = bh_result;
794 done = 1;
795 } else {
796 /* we have to padd file tail stored in direct item(s)
797 up to block size and convert it to unformatted
798 node. FIXME: this should also get into page cache */
799
800 pathrelse(&path) ;
801 /*
802 * ugly, but we can only end the transaction if
803 * we aren't nested
804 */
805 BUG_ON (!th->t_refcount);
806 if (th->t_refcount == 1) {
807 retval = reiserfs_end_persistent_transaction(th);
808 th = NULL;
809 if (retval)
810 goto failure; 668 goto failure;
811 } 669 }
670 reiserfs_update_inode_transaction(inode);
671 }
672 research:
812 673
813 retval = convert_tail_for_hole(inode, bh_result, tail_offset) ; 674 retval = search_for_position_by_key(inode->i_sb, &key, &path);
814 if (retval) {
815 if ( retval != -ENOSPC )
816 reiserfs_warning (inode->i_sb, "clm-6004: convert tail failed inode %lu, error %d", inode->i_ino, retval) ;
817 if (allocated_block_nr) {
818 /* the bitmap, the super, and the stat data == 3 */
819 if (!th)
820 th = reiserfs_persistent_transaction(inode->i_sb,3);
821 if (th)
822 reiserfs_free_block (th,inode,allocated_block_nr,1);
823 }
824 goto failure ;
825 }
826 goto research ;
827 }
828 retval = direct2indirect (th, inode, &path, unbh, tail_offset);
829 if (retval) {
830 reiserfs_unmap_buffer(unbh);
831 reiserfs_free_block (th, inode, allocated_block_nr, 1);
832 goto failure;
833 }
834 /* it is important the set_buffer_uptodate is done after
835 ** the direct2indirect. The buffer might contain valid
836 ** data newer than the data on disk (read by readpage, changed,
837 ** and then sent here by writepage). direct2indirect needs
838 ** to know if unbh was already up to date, so it can decide
839 ** if the data in unbh needs to be replaced with data from
840 ** the disk
841 */
842 set_buffer_uptodate (unbh);
843
844 /* unbh->b_page == NULL in case of DIRECT_IO request, this means
845 buffer will disappear shortly, so it should not be added to
846 */
847 if ( unbh->b_page ) {
848 /* we've converted the tail, so we must
849 ** flush unbh before the transaction commits
850 */
851 reiserfs_add_tail_list(inode, unbh) ;
852
853 /* mark it dirty now to prevent commit_write from adding
854 ** this buffer to the inode's dirty buffer list
855 */
856 /*
857 * AKPM: changed __mark_buffer_dirty to mark_buffer_dirty().
858 * It's still atomic, but it sets the page dirty too,
859 * which makes it eligible for writeback at any time by the
860 * VM (which was also the case with __mark_buffer_dirty())
861 */
862 mark_buffer_dirty(unbh) ;
863 }
864 } else {
865 /* append indirect item with holes if needed, when appending
866 pointer to 'block'-th block use block, which is already
867 allocated */
868 struct cpu_key tmp_key;
869 unp_t unf_single=0; // We use this in case we need to allocate only
870 // one block which is a fastpath
871 unp_t *un;
872 __u64 max_to_insert=MAX_ITEM_LEN(inode->i_sb->s_blocksize)/UNFM_P_SIZE;
873 __u64 blocks_needed;
874
875 RFALSE( pos_in_item != ih_item_len(ih) / UNFM_P_SIZE,
876 "vs-804: invalid position for append");
877 /* indirect item has to be appended, set up key of that position */
878 make_cpu_key (&tmp_key, inode,
879 le_key_k_offset (version, &(ih->ih_key)) + op_bytes_number (ih, inode->i_sb->s_blocksize),
880 //pos_in_item * inode->i_sb->s_blocksize,
881 TYPE_INDIRECT, 3);// key type is unimportant
882
883 blocks_needed = 1 + ((cpu_key_k_offset (&key) - cpu_key_k_offset (&tmp_key)) >> inode->i_sb->s_blocksize_bits);
884 RFALSE( blocks_needed < 0, "green-805: invalid offset");
885
886 if ( blocks_needed == 1 ) {
887 un = &unf_single;
888 } else {
889 un=kmalloc( min(blocks_needed,max_to_insert)*UNFM_P_SIZE,
890 GFP_ATOMIC); // We need to avoid scheduling.
891 if ( !un) {
892 un = &unf_single;
893 blocks_needed = 1;
894 max_to_insert = 0;
895 } else
896 memset(un, 0, UNFM_P_SIZE * min(blocks_needed,max_to_insert));
897 }
898 if ( blocks_needed <= max_to_insert) {
899 /* we are going to add target block to the file. Use allocated
900 block for that */
901 un[blocks_needed-1] = cpu_to_le32 (allocated_block_nr);
902 set_block_dev_mapped (bh_result, allocated_block_nr, inode);
903 set_buffer_new(bh_result);
904 done = 1;
905 } else {
906 /* paste hole to the indirect item */
907 /* If kmalloc failed, max_to_insert becomes zero and it means we
908 only have space for one block */
909 blocks_needed=max_to_insert?max_to_insert:1;
910 }
911 retval = reiserfs_paste_into_item (th, &path, &tmp_key, inode, (char *)un, UNFM_P_SIZE * blocks_needed);
912
913 if (blocks_needed != 1)
914 kfree(un);
915
916 if (retval) {
917 reiserfs_free_block (th, inode, allocated_block_nr, 1);
918 goto failure;
919 }
920 if (!done) {
921 /* We need to mark new file size in case this function will be
922 interrupted/aborted later on. And we may do this only for
923 holes. */
924 inode->i_size += inode->i_sb->s_blocksize * blocks_needed;
925 }
926 }
927
928 if (done == 1)
929 break;
930
931 /* this loop could log more blocks than we had originally asked
932 ** for. So, we have to allow the transaction to end if it is
933 ** too big or too full. Update the inode so things are
934 ** consistent if we crash before the function returns
935 **
936 ** release the path so that anybody waiting on the path before
937 ** ending their transaction will be able to continue.
938 */
939 if (journal_transaction_should_end(th, th->t_blocks_allocated)) {
940 retval = restart_transaction(th, inode, &path) ;
941 if (retval)
942 goto failure;
943 }
944 /* inserting indirect pointers for a hole can take a
945 ** long time. reschedule if needed
946 */
947 cond_resched();
948
949 retval = search_for_position_by_key (inode->i_sb, &key, &path);
950 if (retval == IO_ERROR) { 675 if (retval == IO_ERROR) {
951 retval = -EIO; 676 retval = -EIO;
952 goto failure; 677 goto failure;
953 } 678 }
954 if (retval == POSITION_FOUND) { 679
955 reiserfs_warning (inode->i_sb, "vs-825: reiserfs_get_block: " 680 bh = get_last_bh(&path);
956 "%K should not be found", &key); 681 ih = get_ih(&path);
957 retval = -EEXIST; 682 item = get_item(&path);
958 if (allocated_block_nr)
959 reiserfs_free_block (th, inode, allocated_block_nr, 1);
960 pathrelse(&path) ;
961 goto failure;
962 }
963 bh = get_last_bh (&path);
964 ih = get_ih (&path);
965 item = get_item (&path);
966 pos_in_item = path.pos_in_item; 683 pos_in_item = path.pos_in_item;
967 } while (1);
968 684
685 fs_gen = get_generation(inode->i_sb);
686 copy_item_head(&tmp_ih, ih);
687
688 if (allocation_needed
689 (retval, allocated_block_nr, ih, item, pos_in_item)) {
690 /* we have to allocate block for the unformatted node */
691 if (!th) {
692 pathrelse(&path);
693 goto start_trans;
694 }
695
696 repeat =
697 _allocate_block(th, block, inode, &allocated_block_nr,
698 &path, create);
699
700 if (repeat == NO_DISK_SPACE || repeat == QUOTA_EXCEEDED) {
701 /* restart the transaction to give the journal a chance to free
702 ** some blocks. releases the path, so we have to go back to
703 ** research if we succeed on the second try
704 */
705 SB_JOURNAL(inode->i_sb)->j_next_async_flush = 1;
706 retval = restart_transaction(th, inode, &path);
707 if (retval)
708 goto failure;
709 repeat =
710 _allocate_block(th, block, inode,
711 &allocated_block_nr, NULL, create);
712
713 if (repeat != NO_DISK_SPACE && repeat != QUOTA_EXCEEDED) {
714 goto research;
715 }
716 if (repeat == QUOTA_EXCEEDED)
717 retval = -EDQUOT;
718 else
719 retval = -ENOSPC;
720 goto failure;
721 }
722
723 if (fs_changed(fs_gen, inode->i_sb)
724 && item_moved(&tmp_ih, &path)) {
725 goto research;
726 }
727 }
728
729 if (indirect_item_found(retval, ih)) {
730 b_blocknr_t unfm_ptr;
731 /* 'block'-th block is in the file already (there is
732 corresponding cell in some indirect item). But it may be
733 zero unformatted node pointer (hole) */
734 unfm_ptr = get_block_num(item, pos_in_item);
735 if (unfm_ptr == 0) {
736 /* use allocated block to plug the hole */
737 reiserfs_prepare_for_journal(inode->i_sb, bh, 1);
738 if (fs_changed(fs_gen, inode->i_sb)
739 && item_moved(&tmp_ih, &path)) {
740 reiserfs_restore_prepared_buffer(inode->i_sb,
741 bh);
742 goto research;
743 }
744 set_buffer_new(bh_result);
745 if (buffer_dirty(bh_result)
746 && reiserfs_data_ordered(inode->i_sb))
747 reiserfs_add_ordered_list(inode, bh_result);
748 put_block_num(item, pos_in_item, allocated_block_nr);
749 unfm_ptr = allocated_block_nr;
750 journal_mark_dirty(th, inode->i_sb, bh);
751 reiserfs_update_sd(th, inode);
752 }
753 set_block_dev_mapped(bh_result, unfm_ptr, inode);
754 pathrelse(&path);
755 retval = 0;
756 if (!dangle && th)
757 retval = reiserfs_end_persistent_transaction(th);
758
759 reiserfs_write_unlock(inode->i_sb);
760
761 /* the item was found, so new blocks were not added to the file
762 ** there is no need to make sure the inode is updated with this
763 ** transaction
764 */
765 return retval;
766 }
767
768 if (!th) {
769 pathrelse(&path);
770 goto start_trans;
771 }
772
773 /* desired position is not found or is in the direct item. We have
774 to append file with holes up to 'block'-th block converting
775 direct items to indirect one if necessary */
776 done = 0;
777 do {
778 if (is_statdata_le_ih(ih)) {
779 __le32 unp = 0;
780 struct cpu_key tmp_key;
781
782 /* indirect item has to be inserted */
783 make_le_item_head(&tmp_ih, &key, version, 1,
784 TYPE_INDIRECT, UNFM_P_SIZE,
785 0 /* free_space */ );
786
787 if (cpu_key_k_offset(&key) == 1) {
788 /* we are going to add 'block'-th block to the file. Use
789 allocated block for that */
790 unp = cpu_to_le32(allocated_block_nr);
791 set_block_dev_mapped(bh_result,
792 allocated_block_nr, inode);
793 set_buffer_new(bh_result);
794 done = 1;
795 }
796 tmp_key = key; // ;)
797 set_cpu_key_k_offset(&tmp_key, 1);
798 PATH_LAST_POSITION(&path)++;
799
800 retval =
801 reiserfs_insert_item(th, &path, &tmp_key, &tmp_ih,
802 inode, (char *)&unp);
803 if (retval) {
804 reiserfs_free_block(th, inode,
805 allocated_block_nr, 1);
806 goto failure; // retval == -ENOSPC, -EDQUOT or -EIO or -EEXIST
807 }
808 //mark_tail_converted (inode);
809 } else if (is_direct_le_ih(ih)) {
810 /* direct item has to be converted */
811 loff_t tail_offset;
812
813 tail_offset =
814 ((le_ih_k_offset(ih) -
815 1) & ~(inode->i_sb->s_blocksize - 1)) + 1;
816 if (tail_offset == cpu_key_k_offset(&key)) {
817 /* direct item we just found fits into block we have
818 to map. Convert it into unformatted node: use
819 bh_result for the conversion */
820 set_block_dev_mapped(bh_result,
821 allocated_block_nr, inode);
822 unbh = bh_result;
823 done = 1;
824 } else {
825 /* we have to padd file tail stored in direct item(s)
826 up to block size and convert it to unformatted
827 node. FIXME: this should also get into page cache */
828
829 pathrelse(&path);
830 /*
831 * ugly, but we can only end the transaction if
832 * we aren't nested
833 */
834 BUG_ON(!th->t_refcount);
835 if (th->t_refcount == 1) {
836 retval =
837 reiserfs_end_persistent_transaction
838 (th);
839 th = NULL;
840 if (retval)
841 goto failure;
842 }
843
844 retval =
845 convert_tail_for_hole(inode, bh_result,
846 tail_offset);
847 if (retval) {
848 if (retval != -ENOSPC)
849 reiserfs_warning(inode->i_sb,
850 "clm-6004: convert tail failed inode %lu, error %d",
851 inode->i_ino,
852 retval);
853 if (allocated_block_nr) {
854 /* the bitmap, the super, and the stat data == 3 */
855 if (!th)
856 th = reiserfs_persistent_transaction(inode->i_sb, 3);
857 if (th)
858 reiserfs_free_block(th,
859 inode,
860 allocated_block_nr,
861 1);
862 }
863 goto failure;
864 }
865 goto research;
866 }
867 retval =
868 direct2indirect(th, inode, &path, unbh,
869 tail_offset);
870 if (retval) {
871 reiserfs_unmap_buffer(unbh);
872 reiserfs_free_block(th, inode,
873 allocated_block_nr, 1);
874 goto failure;
875 }
876 /* it is important the set_buffer_uptodate is done after
877 ** the direct2indirect. The buffer might contain valid
878 ** data newer than the data on disk (read by readpage, changed,
879 ** and then sent here by writepage). direct2indirect needs
880 ** to know if unbh was already up to date, so it can decide
881 ** if the data in unbh needs to be replaced with data from
882 ** the disk
883 */
884 set_buffer_uptodate(unbh);
885
886 /* unbh->b_page == NULL in case of DIRECT_IO request, this means
887 buffer will disappear shortly, so it should not be added to
888 */
889 if (unbh->b_page) {
890 /* we've converted the tail, so we must
891 ** flush unbh before the transaction commits
892 */
893 reiserfs_add_tail_list(inode, unbh);
894
895 /* mark it dirty now to prevent commit_write from adding
896 ** this buffer to the inode's dirty buffer list
897 */
898 /*
899 * AKPM: changed __mark_buffer_dirty to mark_buffer_dirty().
900 * It's still atomic, but it sets the page dirty too,
901 * which makes it eligible for writeback at any time by the
902 * VM (which was also the case with __mark_buffer_dirty())
903 */
904 mark_buffer_dirty(unbh);
905 }
906 } else {
907 /* append indirect item with holes if needed, when appending
908 pointer to 'block'-th block use block, which is already
909 allocated */
910 struct cpu_key tmp_key;
911 unp_t unf_single = 0; // We use this in case we need to allocate only
912 // one block which is a fastpath
913 unp_t *un;
914 __u64 max_to_insert =
915 MAX_ITEM_LEN(inode->i_sb->s_blocksize) /
916 UNFM_P_SIZE;
917 __u64 blocks_needed;
918
919 RFALSE(pos_in_item != ih_item_len(ih) / UNFM_P_SIZE,
920 "vs-804: invalid position for append");
921 /* indirect item has to be appended, set up key of that position */
922 make_cpu_key(&tmp_key, inode,
923 le_key_k_offset(version,
924 &(ih->ih_key)) +
925 op_bytes_number(ih,
926 inode->i_sb->s_blocksize),
927 //pos_in_item * inode->i_sb->s_blocksize,
928 TYPE_INDIRECT, 3); // key type is unimportant
929
930 blocks_needed =
931 1 +
932 ((cpu_key_k_offset(&key) -
933 cpu_key_k_offset(&tmp_key)) >> inode->i_sb->
934 s_blocksize_bits);
935 RFALSE(blocks_needed < 0, "green-805: invalid offset");
936
937 if (blocks_needed == 1) {
938 un = &unf_single;
939 } else {
940 un = kmalloc(min(blocks_needed, max_to_insert) * UNFM_P_SIZE, GFP_ATOMIC); // We need to avoid scheduling.
941 if (!un) {
942 un = &unf_single;
943 blocks_needed = 1;
944 max_to_insert = 0;
945 } else
946 memset(un, 0,
947 UNFM_P_SIZE * min(blocks_needed,
948 max_to_insert));
949 }
950 if (blocks_needed <= max_to_insert) {
951 /* we are going to add target block to the file. Use allocated
952 block for that */
953 un[blocks_needed - 1] =
954 cpu_to_le32(allocated_block_nr);
955 set_block_dev_mapped(bh_result,
956 allocated_block_nr, inode);
957 set_buffer_new(bh_result);
958 done = 1;
959 } else {
960 /* paste hole to the indirect item */
961 /* If kmalloc failed, max_to_insert becomes zero and it means we
962 only have space for one block */
963 blocks_needed =
964 max_to_insert ? max_to_insert : 1;
965 }
966 retval =
967 reiserfs_paste_into_item(th, &path, &tmp_key, inode,
968 (char *)un,
969 UNFM_P_SIZE *
970 blocks_needed);
971
972 if (blocks_needed != 1)
973 kfree(un);
974
975 if (retval) {
976 reiserfs_free_block(th, inode,
977 allocated_block_nr, 1);
978 goto failure;
979 }
980 if (!done) {
981 /* We need to mark new file size in case this function will be
982 interrupted/aborted later on. And we may do this only for
983 holes. */
984 inode->i_size +=
985 inode->i_sb->s_blocksize * blocks_needed;
986 }
987 }
969 988
970 retval = 0; 989 if (done == 1)
990 break;
971 991
972 failure: 992 /* this loop could log more blocks than we had originally asked
973 if (th && (!dangle || (retval && !th->t_trans_id))) { 993 ** for. So, we have to allow the transaction to end if it is
974 int err; 994 ** too big or too full. Update the inode so things are
975 if (th->t_trans_id) 995 ** consistent if we crash before the function returns
976 reiserfs_update_sd(th, inode); 996 **
977 err = reiserfs_end_persistent_transaction(th); 997 ** release the path so that anybody waiting on the path before
978 if (err) 998 ** ending their transaction will be able to continue.
979 retval = err; 999 */
980 } 1000 if (journal_transaction_should_end(th, th->t_blocks_allocated)) {
1001 retval = restart_transaction(th, inode, &path);
1002 if (retval)
1003 goto failure;
1004 }
1005 /* inserting indirect pointers for a hole can take a
1006 ** long time. reschedule if needed
1007 */
1008 cond_resched();
981 1009
982 reiserfs_write_unlock(inode->i_sb); 1010 retval = search_for_position_by_key(inode->i_sb, &key, &path);
983 reiserfs_check_path(&path) ; 1011 if (retval == IO_ERROR) {
984 return retval; 1012 retval = -EIO;
1013 goto failure;
1014 }
1015 if (retval == POSITION_FOUND) {
1016 reiserfs_warning(inode->i_sb,
1017 "vs-825: reiserfs_get_block: "
1018 "%K should not be found", &key);
1019 retval = -EEXIST;
1020 if (allocated_block_nr)
1021 reiserfs_free_block(th, inode,
1022 allocated_block_nr, 1);
1023 pathrelse(&path);
1024 goto failure;
1025 }
1026 bh = get_last_bh(&path);
1027 ih = get_ih(&path);
1028 item = get_item(&path);
1029 pos_in_item = path.pos_in_item;
1030 } while (1);
1031
1032 retval = 0;
1033
1034 failure:
1035 if (th && (!dangle || (retval && !th->t_trans_id))) {
1036 int err;
1037 if (th->t_trans_id)
1038 reiserfs_update_sd(th, inode);
1039 err = reiserfs_end_persistent_transaction(th);
1040 if (err)
1041 retval = err;
1042 }
1043
1044 reiserfs_write_unlock(inode->i_sb);
1045 reiserfs_check_path(&path);
1046 return retval;
985} 1047}
986 1048
987static int 1049static int
988reiserfs_readpages(struct file *file, struct address_space *mapping, 1050reiserfs_readpages(struct file *file, struct address_space *mapping,
989 struct list_head *pages, unsigned nr_pages) 1051 struct list_head *pages, unsigned nr_pages)
990{ 1052{
991 return mpage_readpages(mapping, pages, nr_pages, reiserfs_get_block); 1053 return mpage_readpages(mapping, pages, nr_pages, reiserfs_get_block);
992} 1054}
993 1055
994/* Compute real number of used bytes by file 1056/* Compute real number of used bytes by file
@@ -996,51 +1058,56 @@ reiserfs_readpages(struct file *file, struct address_space *mapping,
996 */ 1058 */
997static int real_space_diff(struct inode *inode, int sd_size) 1059static int real_space_diff(struct inode *inode, int sd_size)
998{ 1060{
999 int bytes; 1061 int bytes;
1000 loff_t blocksize = inode->i_sb->s_blocksize ; 1062 loff_t blocksize = inode->i_sb->s_blocksize;
1001 1063
1002 if (S_ISLNK(inode->i_mode) || S_ISDIR(inode->i_mode)) 1064 if (S_ISLNK(inode->i_mode) || S_ISDIR(inode->i_mode))
1003 return sd_size ; 1065 return sd_size;
1004 1066
1005 /* End of file is also in full block with indirect reference, so round 1067 /* End of file is also in full block with indirect reference, so round
1006 ** up to the next block. 1068 ** up to the next block.
1007 ** 1069 **
1008 ** there is just no way to know if the tail is actually packed 1070 ** there is just no way to know if the tail is actually packed
1009 ** on the file, so we have to assume it isn't. When we pack the 1071 ** on the file, so we have to assume it isn't. When we pack the
1010 ** tail, we add 4 bytes to pretend there really is an unformatted 1072 ** tail, we add 4 bytes to pretend there really is an unformatted
1011 ** node pointer 1073 ** node pointer
1012 */ 1074 */
1013 bytes = ((inode->i_size + (blocksize-1)) >> inode->i_sb->s_blocksize_bits) * UNFM_P_SIZE + sd_size; 1075 bytes =
1014 return bytes ; 1076 ((inode->i_size +
1077 (blocksize - 1)) >> inode->i_sb->s_blocksize_bits) * UNFM_P_SIZE +
1078 sd_size;
1079 return bytes;
1015} 1080}
1016 1081
1017static inline loff_t to_real_used_space(struct inode *inode, ulong blocks, 1082static inline loff_t to_real_used_space(struct inode *inode, ulong blocks,
1018 int sd_size) 1083 int sd_size)
1019{ 1084{
1020 if (S_ISLNK(inode->i_mode) || S_ISDIR(inode->i_mode)) { 1085 if (S_ISLNK(inode->i_mode) || S_ISDIR(inode->i_mode)) {
1021 return inode->i_size + (loff_t)(real_space_diff(inode, sd_size)) ; 1086 return inode->i_size +
1022 } 1087 (loff_t) (real_space_diff(inode, sd_size));
1023 return ((loff_t)real_space_diff(inode, sd_size)) + (((loff_t)blocks) << 9); 1088 }
1089 return ((loff_t) real_space_diff(inode, sd_size)) +
1090 (((loff_t) blocks) << 9);
1024} 1091}
1025 1092
1026/* Compute number of blocks used by file in ReiserFS counting */ 1093/* Compute number of blocks used by file in ReiserFS counting */
1027static inline ulong to_fake_used_blocks(struct inode *inode, int sd_size) 1094static inline ulong to_fake_used_blocks(struct inode *inode, int sd_size)
1028{ 1095{
1029 loff_t bytes = inode_get_bytes(inode) ; 1096 loff_t bytes = inode_get_bytes(inode);
1030 loff_t real_space = real_space_diff(inode, sd_size) ; 1097 loff_t real_space = real_space_diff(inode, sd_size);
1031 1098
1032 /* keeps fsck and non-quota versions of reiserfs happy */ 1099 /* keeps fsck and non-quota versions of reiserfs happy */
1033 if (S_ISLNK(inode->i_mode) || S_ISDIR(inode->i_mode)) { 1100 if (S_ISLNK(inode->i_mode) || S_ISDIR(inode->i_mode)) {
1034 bytes += (loff_t)511 ; 1101 bytes += (loff_t) 511;
1035 } 1102 }
1036 1103
1037 /* files from before the quota patch might i_blocks such that 1104 /* files from before the quota patch might i_blocks such that
1038 ** bytes < real_space. Deal with that here to prevent it from 1105 ** bytes < real_space. Deal with that here to prevent it from
1039 ** going negative. 1106 ** going negative.
1040 */ 1107 */
1041 if (bytes < real_space) 1108 if (bytes < real_space)
1042 return 0 ; 1109 return 0;
1043 return (bytes - real_space) >> 9; 1110 return (bytes - real_space) >> 9;
1044} 1111}
1045 1112
1046// 1113//
@@ -1051,263 +1118,269 @@ static inline ulong to_fake_used_blocks(struct inode *inode, int sd_size)
1051// 1118//
1052 1119
1053// called by read_locked_inode 1120// called by read_locked_inode
1054static void init_inode (struct inode * inode, struct path * path) 1121static void init_inode(struct inode *inode, struct path *path)
1055{ 1122{
1056 struct buffer_head * bh; 1123 struct buffer_head *bh;
1057 struct item_head * ih; 1124 struct item_head *ih;
1058 __u32 rdev; 1125 __u32 rdev;
1059 //int version = ITEM_VERSION_1; 1126 //int version = ITEM_VERSION_1;
1060 1127
1061 bh = PATH_PLAST_BUFFER (path); 1128 bh = PATH_PLAST_BUFFER(path);
1062 ih = PATH_PITEM_HEAD (path); 1129 ih = PATH_PITEM_HEAD(path);
1063 1130
1064 1131 copy_key(INODE_PKEY(inode), &(ih->ih_key));
1065 copy_key (INODE_PKEY (inode), &(ih->ih_key)); 1132 inode->i_blksize = reiserfs_default_io_size;
1066 inode->i_blksize = reiserfs_default_io_size; 1133
1067 1134 INIT_LIST_HEAD(&(REISERFS_I(inode)->i_prealloc_list));
1068 INIT_LIST_HEAD(&(REISERFS_I(inode)->i_prealloc_list )); 1135 REISERFS_I(inode)->i_flags = 0;
1069 REISERFS_I(inode)->i_flags = 0; 1136 REISERFS_I(inode)->i_prealloc_block = 0;
1070 REISERFS_I(inode)->i_prealloc_block = 0; 1137 REISERFS_I(inode)->i_prealloc_count = 0;
1071 REISERFS_I(inode)->i_prealloc_count = 0; 1138 REISERFS_I(inode)->i_trans_id = 0;
1072 REISERFS_I(inode)->i_trans_id = 0; 1139 REISERFS_I(inode)->i_jl = NULL;
1073 REISERFS_I(inode)->i_jl = NULL; 1140 REISERFS_I(inode)->i_acl_access = NULL;
1074 REISERFS_I(inode)->i_acl_access = NULL; 1141 REISERFS_I(inode)->i_acl_default = NULL;
1075 REISERFS_I(inode)->i_acl_default = NULL; 1142 init_rwsem(&REISERFS_I(inode)->xattr_sem);
1076 init_rwsem (&REISERFS_I(inode)->xattr_sem); 1143
1077 1144 if (stat_data_v1(ih)) {
1078 if (stat_data_v1 (ih)) { 1145 struct stat_data_v1 *sd =
1079 struct stat_data_v1 * sd = (struct stat_data_v1 *)B_I_PITEM (bh, ih); 1146 (struct stat_data_v1 *)B_I_PITEM(bh, ih);
1080 unsigned long blocks; 1147 unsigned long blocks;
1081 1148
1082 set_inode_item_key_version (inode, KEY_FORMAT_3_5); 1149 set_inode_item_key_version(inode, KEY_FORMAT_3_5);
1083 set_inode_sd_version (inode, STAT_DATA_V1); 1150 set_inode_sd_version(inode, STAT_DATA_V1);
1084 inode->i_mode = sd_v1_mode(sd); 1151 inode->i_mode = sd_v1_mode(sd);
1085 inode->i_nlink = sd_v1_nlink(sd); 1152 inode->i_nlink = sd_v1_nlink(sd);
1086 inode->i_uid = sd_v1_uid(sd); 1153 inode->i_uid = sd_v1_uid(sd);
1087 inode->i_gid = sd_v1_gid(sd); 1154 inode->i_gid = sd_v1_gid(sd);
1088 inode->i_size = sd_v1_size(sd); 1155 inode->i_size = sd_v1_size(sd);
1089 inode->i_atime.tv_sec = sd_v1_atime(sd); 1156 inode->i_atime.tv_sec = sd_v1_atime(sd);
1090 inode->i_mtime.tv_sec = sd_v1_mtime(sd); 1157 inode->i_mtime.tv_sec = sd_v1_mtime(sd);
1091 inode->i_ctime.tv_sec = sd_v1_ctime(sd); 1158 inode->i_ctime.tv_sec = sd_v1_ctime(sd);
1092 inode->i_atime.tv_nsec = 0; 1159 inode->i_atime.tv_nsec = 0;
1093 inode->i_ctime.tv_nsec = 0; 1160 inode->i_ctime.tv_nsec = 0;
1094 inode->i_mtime.tv_nsec = 0; 1161 inode->i_mtime.tv_nsec = 0;
1095 1162
1096 inode->i_blocks = sd_v1_blocks(sd); 1163 inode->i_blocks = sd_v1_blocks(sd);
1097 inode->i_generation = le32_to_cpu (INODE_PKEY (inode)->k_dir_id); 1164 inode->i_generation = le32_to_cpu(INODE_PKEY(inode)->k_dir_id);
1098 blocks = (inode->i_size + 511) >> 9; 1165 blocks = (inode->i_size + 511) >> 9;
1099 blocks = _ROUND_UP (blocks, inode->i_sb->s_blocksize >> 9); 1166 blocks = _ROUND_UP(blocks, inode->i_sb->s_blocksize >> 9);
1100 if (inode->i_blocks > blocks) { 1167 if (inode->i_blocks > blocks) {
1101 // there was a bug in <=3.5.23 when i_blocks could take negative 1168 // there was a bug in <=3.5.23 when i_blocks could take negative
1102 // values. Starting from 3.5.17 this value could even be stored in 1169 // values. Starting from 3.5.17 this value could even be stored in
1103 // stat data. For such files we set i_blocks based on file 1170 // stat data. For such files we set i_blocks based on file
1104 // size. Just 2 notes: this can be wrong for sparce files. On-disk value will be 1171 // size. Just 2 notes: this can be wrong for sparce files. On-disk value will be
1105 // only updated if file's inode will ever change 1172 // only updated if file's inode will ever change
1106 inode->i_blocks = blocks; 1173 inode->i_blocks = blocks;
1107 } 1174 }
1108
1109 rdev = sd_v1_rdev(sd);
1110 REISERFS_I(inode)->i_first_direct_byte = sd_v1_first_direct_byte(sd);
1111 /* an early bug in the quota code can give us an odd number for the
1112 ** block count. This is incorrect, fix it here.
1113 */
1114 if (inode->i_blocks & 1) {
1115 inode->i_blocks++ ;
1116 }
1117 inode_set_bytes(inode, to_real_used_space(inode, inode->i_blocks,
1118 SD_V1_SIZE));
1119 /* nopack is initially zero for v1 objects. For v2 objects,
1120 nopack is initialised from sd_attrs */
1121 REISERFS_I(inode)->i_flags &= ~i_nopack_mask;
1122 } else {
1123 // new stat data found, but object may have old items
1124 // (directories and symlinks)
1125 struct stat_data * sd = (struct stat_data *)B_I_PITEM (bh, ih);
1126
1127 inode->i_mode = sd_v2_mode(sd);
1128 inode->i_nlink = sd_v2_nlink(sd);
1129 inode->i_uid = sd_v2_uid(sd);
1130 inode->i_size = sd_v2_size(sd);
1131 inode->i_gid = sd_v2_gid(sd);
1132 inode->i_mtime.tv_sec = sd_v2_mtime(sd);
1133 inode->i_atime.tv_sec = sd_v2_atime(sd);
1134 inode->i_ctime.tv_sec = sd_v2_ctime(sd);
1135 inode->i_ctime.tv_nsec = 0;
1136 inode->i_mtime.tv_nsec = 0;
1137 inode->i_atime.tv_nsec = 0;
1138 inode->i_blocks = sd_v2_blocks(sd);
1139 rdev = sd_v2_rdev(sd);
1140 if( S_ISCHR( inode -> i_mode ) || S_ISBLK( inode -> i_mode ) )
1141 inode->i_generation = le32_to_cpu (INODE_PKEY (inode)->k_dir_id);
1142 else
1143 inode->i_generation = sd_v2_generation(sd);
1144 1175
1145 if (S_ISDIR (inode->i_mode) || S_ISLNK (inode->i_mode)) 1176 rdev = sd_v1_rdev(sd);
1146 set_inode_item_key_version (inode, KEY_FORMAT_3_5); 1177 REISERFS_I(inode)->i_first_direct_byte =
1147 else 1178 sd_v1_first_direct_byte(sd);
1148 set_inode_item_key_version (inode, KEY_FORMAT_3_6); 1179 /* an early bug in the quota code can give us an odd number for the
1149 REISERFS_I(inode)->i_first_direct_byte = 0; 1180 ** block count. This is incorrect, fix it here.
1150 set_inode_sd_version (inode, STAT_DATA_V2); 1181 */
1151 inode_set_bytes(inode, to_real_used_space(inode, inode->i_blocks, 1182 if (inode->i_blocks & 1) {
1152 SD_V2_SIZE)); 1183 inode->i_blocks++;
1153 /* read persistent inode attributes from sd and initalise 1184 }
1154 generic inode flags from them */ 1185 inode_set_bytes(inode,
1155 REISERFS_I(inode)->i_attrs = sd_v2_attrs( sd ); 1186 to_real_used_space(inode, inode->i_blocks,
1156 sd_attrs_to_i_attrs( sd_v2_attrs( sd ), inode ); 1187 SD_V1_SIZE));
1157 } 1188 /* nopack is initially zero for v1 objects. For v2 objects,
1158 1189 nopack is initialised from sd_attrs */
1159 pathrelse (path); 1190 REISERFS_I(inode)->i_flags &= ~i_nopack_mask;
1160 if (S_ISREG (inode->i_mode)) { 1191 } else {
1161 inode->i_op = &reiserfs_file_inode_operations; 1192 // new stat data found, but object may have old items
1162 inode->i_fop = &reiserfs_file_operations; 1193 // (directories and symlinks)
1163 inode->i_mapping->a_ops = &reiserfs_address_space_operations ; 1194 struct stat_data *sd = (struct stat_data *)B_I_PITEM(bh, ih);
1164 } else if (S_ISDIR (inode->i_mode)) { 1195
1165 inode->i_op = &reiserfs_dir_inode_operations; 1196 inode->i_mode = sd_v2_mode(sd);
1166 inode->i_fop = &reiserfs_dir_operations; 1197 inode->i_nlink = sd_v2_nlink(sd);
1167 } else if (S_ISLNK (inode->i_mode)) { 1198 inode->i_uid = sd_v2_uid(sd);
1168 inode->i_op = &reiserfs_symlink_inode_operations; 1199 inode->i_size = sd_v2_size(sd);
1169 inode->i_mapping->a_ops = &reiserfs_address_space_operations; 1200 inode->i_gid = sd_v2_gid(sd);
1170 } else { 1201 inode->i_mtime.tv_sec = sd_v2_mtime(sd);
1171 inode->i_blocks = 0; 1202 inode->i_atime.tv_sec = sd_v2_atime(sd);
1172 inode->i_op = &reiserfs_special_inode_operations; 1203 inode->i_ctime.tv_sec = sd_v2_ctime(sd);
1173 init_special_inode(inode, inode->i_mode, new_decode_dev(rdev)); 1204 inode->i_ctime.tv_nsec = 0;
1174 } 1205 inode->i_mtime.tv_nsec = 0;
1175} 1206 inode->i_atime.tv_nsec = 0;
1207 inode->i_blocks = sd_v2_blocks(sd);
1208 rdev = sd_v2_rdev(sd);
1209 if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode))
1210 inode->i_generation =
1211 le32_to_cpu(INODE_PKEY(inode)->k_dir_id);
1212 else
1213 inode->i_generation = sd_v2_generation(sd);
1176 1214
1215 if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))
1216 set_inode_item_key_version(inode, KEY_FORMAT_3_5);
1217 else
1218 set_inode_item_key_version(inode, KEY_FORMAT_3_6);
1219 REISERFS_I(inode)->i_first_direct_byte = 0;
1220 set_inode_sd_version(inode, STAT_DATA_V2);
1221 inode_set_bytes(inode,
1222 to_real_used_space(inode, inode->i_blocks,
1223 SD_V2_SIZE));
1224 /* read persistent inode attributes from sd and initalise
1225 generic inode flags from them */
1226 REISERFS_I(inode)->i_attrs = sd_v2_attrs(sd);
1227 sd_attrs_to_i_attrs(sd_v2_attrs(sd), inode);
1228 }
1229
1230 pathrelse(path);
1231 if (S_ISREG(inode->i_mode)) {
1232 inode->i_op = &reiserfs_file_inode_operations;
1233 inode->i_fop = &reiserfs_file_operations;
1234 inode->i_mapping->a_ops = &reiserfs_address_space_operations;
1235 } else if (S_ISDIR(inode->i_mode)) {
1236 inode->i_op = &reiserfs_dir_inode_operations;
1237 inode->i_fop = &reiserfs_dir_operations;
1238 } else if (S_ISLNK(inode->i_mode)) {
1239 inode->i_op = &reiserfs_symlink_inode_operations;
1240 inode->i_mapping->a_ops = &reiserfs_address_space_operations;
1241 } else {
1242 inode->i_blocks = 0;
1243 inode->i_op = &reiserfs_special_inode_operations;
1244 init_special_inode(inode, inode->i_mode, new_decode_dev(rdev));
1245 }
1246}
1177 1247
1178// update new stat data with inode fields 1248// update new stat data with inode fields
1179static void inode2sd (void * sd, struct inode * inode, loff_t size) 1249static void inode2sd(void *sd, struct inode *inode, loff_t size)
1180{ 1250{
1181 struct stat_data * sd_v2 = (struct stat_data *)sd; 1251 struct stat_data *sd_v2 = (struct stat_data *)sd;
1182 __u16 flags; 1252 __u16 flags;
1183 1253
1184 set_sd_v2_mode(sd_v2, inode->i_mode ); 1254 set_sd_v2_mode(sd_v2, inode->i_mode);
1185 set_sd_v2_nlink(sd_v2, inode->i_nlink ); 1255 set_sd_v2_nlink(sd_v2, inode->i_nlink);
1186 set_sd_v2_uid(sd_v2, inode->i_uid ); 1256 set_sd_v2_uid(sd_v2, inode->i_uid);
1187 set_sd_v2_size(sd_v2, size ); 1257 set_sd_v2_size(sd_v2, size);
1188 set_sd_v2_gid(sd_v2, inode->i_gid ); 1258 set_sd_v2_gid(sd_v2, inode->i_gid);
1189 set_sd_v2_mtime(sd_v2, inode->i_mtime.tv_sec ); 1259 set_sd_v2_mtime(sd_v2, inode->i_mtime.tv_sec);
1190 set_sd_v2_atime(sd_v2, inode->i_atime.tv_sec ); 1260 set_sd_v2_atime(sd_v2, inode->i_atime.tv_sec);
1191 set_sd_v2_ctime(sd_v2, inode->i_ctime.tv_sec ); 1261 set_sd_v2_ctime(sd_v2, inode->i_ctime.tv_sec);
1192 set_sd_v2_blocks(sd_v2, to_fake_used_blocks(inode, SD_V2_SIZE)); 1262 set_sd_v2_blocks(sd_v2, to_fake_used_blocks(inode, SD_V2_SIZE));
1193 if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) 1263 if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode))
1194 set_sd_v2_rdev(sd_v2, new_encode_dev(inode->i_rdev)); 1264 set_sd_v2_rdev(sd_v2, new_encode_dev(inode->i_rdev));
1195 else 1265 else
1196 set_sd_v2_generation(sd_v2, inode->i_generation); 1266 set_sd_v2_generation(sd_v2, inode->i_generation);
1197 flags = REISERFS_I(inode)->i_attrs; 1267 flags = REISERFS_I(inode)->i_attrs;
1198 i_attrs_to_sd_attrs( inode, &flags ); 1268 i_attrs_to_sd_attrs(inode, &flags);
1199 set_sd_v2_attrs( sd_v2, flags ); 1269 set_sd_v2_attrs(sd_v2, flags);
1200} 1270}
1201 1271
1202
1203// used to copy inode's fields to old stat data 1272// used to copy inode's fields to old stat data
1204static void inode2sd_v1 (void * sd, struct inode * inode, loff_t size) 1273static void inode2sd_v1(void *sd, struct inode *inode, loff_t size)
1205{ 1274{
1206 struct stat_data_v1 * sd_v1 = (struct stat_data_v1 *)sd; 1275 struct stat_data_v1 *sd_v1 = (struct stat_data_v1 *)sd;
1207 1276
1208 set_sd_v1_mode(sd_v1, inode->i_mode ); 1277 set_sd_v1_mode(sd_v1, inode->i_mode);
1209 set_sd_v1_uid(sd_v1, inode->i_uid ); 1278 set_sd_v1_uid(sd_v1, inode->i_uid);
1210 set_sd_v1_gid(sd_v1, inode->i_gid ); 1279 set_sd_v1_gid(sd_v1, inode->i_gid);
1211 set_sd_v1_nlink(sd_v1, inode->i_nlink ); 1280 set_sd_v1_nlink(sd_v1, inode->i_nlink);
1212 set_sd_v1_size(sd_v1, size ); 1281 set_sd_v1_size(sd_v1, size);
1213 set_sd_v1_atime(sd_v1, inode->i_atime.tv_sec ); 1282 set_sd_v1_atime(sd_v1, inode->i_atime.tv_sec);
1214 set_sd_v1_ctime(sd_v1, inode->i_ctime.tv_sec ); 1283 set_sd_v1_ctime(sd_v1, inode->i_ctime.tv_sec);
1215 set_sd_v1_mtime(sd_v1, inode->i_mtime.tv_sec ); 1284 set_sd_v1_mtime(sd_v1, inode->i_mtime.tv_sec);
1216 1285
1217 if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) 1286 if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode))
1218 set_sd_v1_rdev(sd_v1, new_encode_dev(inode->i_rdev)); 1287 set_sd_v1_rdev(sd_v1, new_encode_dev(inode->i_rdev));
1219 else 1288 else
1220 set_sd_v1_blocks(sd_v1, to_fake_used_blocks(inode, SD_V1_SIZE)); 1289 set_sd_v1_blocks(sd_v1, to_fake_used_blocks(inode, SD_V1_SIZE));
1221
1222 // Sigh. i_first_direct_byte is back
1223 set_sd_v1_first_direct_byte(sd_v1, REISERFS_I(inode)->i_first_direct_byte);
1224}
1225 1290
1291 // Sigh. i_first_direct_byte is back
1292 set_sd_v1_first_direct_byte(sd_v1,
1293 REISERFS_I(inode)->i_first_direct_byte);
1294}
1226 1295
1227/* NOTE, you must prepare the buffer head before sending it here, 1296/* NOTE, you must prepare the buffer head before sending it here,
1228** and then log it after the call 1297** and then log it after the call
1229*/ 1298*/
1230static void update_stat_data (struct path * path, struct inode * inode, 1299static void update_stat_data(struct path *path, struct inode *inode,
1231 loff_t size) 1300 loff_t size)
1232{ 1301{
1233 struct buffer_head * bh; 1302 struct buffer_head *bh;
1234 struct item_head * ih; 1303 struct item_head *ih;
1235 1304
1236 bh = PATH_PLAST_BUFFER (path); 1305 bh = PATH_PLAST_BUFFER(path);
1237 ih = PATH_PITEM_HEAD (path); 1306 ih = PATH_PITEM_HEAD(path);
1238 1307
1239 if (!is_statdata_le_ih (ih)) 1308 if (!is_statdata_le_ih(ih))
1240 reiserfs_panic (inode->i_sb, "vs-13065: update_stat_data: key %k, found item %h", 1309 reiserfs_panic(inode->i_sb,
1241 INODE_PKEY (inode), ih); 1310 "vs-13065: update_stat_data: key %k, found item %h",
1242 1311 INODE_PKEY(inode), ih);
1243 if (stat_data_v1 (ih)) { 1312
1244 // path points to old stat data 1313 if (stat_data_v1(ih)) {
1245 inode2sd_v1 (B_I_PITEM (bh, ih), inode, size); 1314 // path points to old stat data
1246 } else { 1315 inode2sd_v1(B_I_PITEM(bh, ih), inode, size);
1247 inode2sd (B_I_PITEM (bh, ih), inode, size); 1316 } else {
1248 } 1317 inode2sd(B_I_PITEM(bh, ih), inode, size);
1249 1318 }
1250 return;
1251}
1252 1319
1320 return;
1321}
1253 1322
1254void reiserfs_update_sd_size (struct reiserfs_transaction_handle *th, 1323void reiserfs_update_sd_size(struct reiserfs_transaction_handle *th,
1255 struct inode * inode, loff_t size) 1324 struct inode *inode, loff_t size)
1256{ 1325{
1257 struct cpu_key key; 1326 struct cpu_key key;
1258 INITIALIZE_PATH(path); 1327 INITIALIZE_PATH(path);
1259 struct buffer_head *bh ; 1328 struct buffer_head *bh;
1260 int fs_gen ; 1329 int fs_gen;
1261 struct item_head *ih, tmp_ih ; 1330 struct item_head *ih, tmp_ih;
1262 int retval; 1331 int retval;
1263 1332
1264 BUG_ON (!th->t_trans_id); 1333 BUG_ON(!th->t_trans_id);
1265 1334
1266 make_cpu_key (&key, inode, SD_OFFSET, TYPE_STAT_DATA, 3);//key type is unimportant 1335 make_cpu_key(&key, inode, SD_OFFSET, TYPE_STAT_DATA, 3); //key type is unimportant
1267 1336
1268 for(;;) { 1337 for (;;) {
1269 int pos; 1338 int pos;
1270 /* look for the object's stat data */ 1339 /* look for the object's stat data */
1271 retval = search_item (inode->i_sb, &key, &path); 1340 retval = search_item(inode->i_sb, &key, &path);
1272 if (retval == IO_ERROR) { 1341 if (retval == IO_ERROR) {
1273 reiserfs_warning (inode->i_sb, "vs-13050: reiserfs_update_sd: " 1342 reiserfs_warning(inode->i_sb,
1274 "i/o failure occurred trying to update %K stat data", 1343 "vs-13050: reiserfs_update_sd: "
1275 &key); 1344 "i/o failure occurred trying to update %K stat data",
1276 return; 1345 &key);
1277 } 1346 return;
1278 if (retval == ITEM_NOT_FOUND) { 1347 }
1279 pos = PATH_LAST_POSITION (&path); 1348 if (retval == ITEM_NOT_FOUND) {
1280 pathrelse(&path) ; 1349 pos = PATH_LAST_POSITION(&path);
1281 if (inode->i_nlink == 0) { 1350 pathrelse(&path);
1282 /*reiserfs_warning (inode->i_sb, "vs-13050: reiserfs_update_sd: i_nlink == 0, stat data not found");*/ 1351 if (inode->i_nlink == 0) {
1283 return; 1352 /*reiserfs_warning (inode->i_sb, "vs-13050: reiserfs_update_sd: i_nlink == 0, stat data not found"); */
1284 } 1353 return;
1285 reiserfs_warning (inode->i_sb, "vs-13060: reiserfs_update_sd: " 1354 }
1286 "stat data of object %k (nlink == %d) not found (pos %d)", 1355 reiserfs_warning(inode->i_sb,
1287 INODE_PKEY (inode), inode->i_nlink, pos); 1356 "vs-13060: reiserfs_update_sd: "
1288 reiserfs_check_path(&path) ; 1357 "stat data of object %k (nlink == %d) not found (pos %d)",
1289 return; 1358 INODE_PKEY(inode), inode->i_nlink,
1290 } 1359 pos);
1291 1360 reiserfs_check_path(&path);
1292 /* sigh, prepare_for_journal might schedule. When it schedules the 1361 return;
1293 ** FS might change. We have to detect that, and loop back to the 1362 }
1294 ** search if the stat data item has moved 1363
1295 */ 1364 /* sigh, prepare_for_journal might schedule. When it schedules the
1296 bh = get_last_bh(&path) ; 1365 ** FS might change. We have to detect that, and loop back to the
1297 ih = get_ih(&path) ; 1366 ** search if the stat data item has moved
1298 copy_item_head (&tmp_ih, ih); 1367 */
1299 fs_gen = get_generation (inode->i_sb); 1368 bh = get_last_bh(&path);
1300 reiserfs_prepare_for_journal(inode->i_sb, bh, 1) ; 1369 ih = get_ih(&path);
1301 if (fs_changed (fs_gen, inode->i_sb) && item_moved(&tmp_ih, &path)) { 1370 copy_item_head(&tmp_ih, ih);
1302 reiserfs_restore_prepared_buffer(inode->i_sb, bh) ; 1371 fs_gen = get_generation(inode->i_sb);
1303 continue ; /* Stat_data item has been moved after scheduling. */ 1372 reiserfs_prepare_for_journal(inode->i_sb, bh, 1);
1304 } 1373 if (fs_changed(fs_gen, inode->i_sb)
1305 break; 1374 && item_moved(&tmp_ih, &path)) {
1306 } 1375 reiserfs_restore_prepared_buffer(inode->i_sb, bh);
1307 update_stat_data (&path, inode, size); 1376 continue; /* Stat_data item has been moved after scheduling. */
1308 journal_mark_dirty(th, th->t_super, bh) ; 1377 }
1309 pathrelse (&path); 1378 break;
1310 return; 1379 }
1380 update_stat_data(&path, inode, size);
1381 journal_mark_dirty(th, th->t_super, bh);
1382 pathrelse(&path);
1383 return;
1311} 1384}
1312 1385
1313/* reiserfs_read_locked_inode is called to read the inode off disk, and it 1386/* reiserfs_read_locked_inode is called to read the inode off disk, and it
@@ -1316,9 +1389,10 @@ void reiserfs_update_sd_size (struct reiserfs_transaction_handle *th,
1316** corresponding iput might try to delete whatever object the inode last 1389** corresponding iput might try to delete whatever object the inode last
1317** represented. 1390** represented.
1318*/ 1391*/
1319static void reiserfs_make_bad_inode(struct inode *inode) { 1392static void reiserfs_make_bad_inode(struct inode *inode)
1320 memset(INODE_PKEY(inode), 0, KEY_SIZE); 1393{
1321 make_bad_inode(inode); 1394 memset(INODE_PKEY(inode), 0, KEY_SIZE);
1395 make_bad_inode(inode);
1322} 1396}
1323 1397
1324// 1398//
@@ -1326,77 +1400,79 @@ static void reiserfs_make_bad_inode(struct inode *inode) {
1326// evolved as the prototype did 1400// evolved as the prototype did
1327// 1401//
1328 1402
1329int reiserfs_init_locked_inode (struct inode * inode, void *p) 1403int reiserfs_init_locked_inode(struct inode *inode, void *p)
1330{ 1404{
1331 struct reiserfs_iget_args *args = (struct reiserfs_iget_args *)p ; 1405 struct reiserfs_iget_args *args = (struct reiserfs_iget_args *)p;
1332 inode->i_ino = args->objectid; 1406 inode->i_ino = args->objectid;
1333 INODE_PKEY(inode)->k_dir_id = cpu_to_le32(args->dirid); 1407 INODE_PKEY(inode)->k_dir_id = cpu_to_le32(args->dirid);
1334 return 0; 1408 return 0;
1335} 1409}
1336 1410
1337/* looks for stat data in the tree, and fills up the fields of in-core 1411/* looks for stat data in the tree, and fills up the fields of in-core
1338 inode stat data fields */ 1412 inode stat data fields */
1339void reiserfs_read_locked_inode (struct inode * inode, struct reiserfs_iget_args *args) 1413void reiserfs_read_locked_inode(struct inode *inode,
1414 struct reiserfs_iget_args *args)
1340{ 1415{
1341 INITIALIZE_PATH (path_to_sd); 1416 INITIALIZE_PATH(path_to_sd);
1342 struct cpu_key key; 1417 struct cpu_key key;
1343 unsigned long dirino; 1418 unsigned long dirino;
1344 int retval; 1419 int retval;
1345 1420
1346 dirino = args->dirid ; 1421 dirino = args->dirid;
1347 1422
1348 /* set version 1, version 2 could be used too, because stat data 1423 /* set version 1, version 2 could be used too, because stat data
1349 key is the same in both versions */ 1424 key is the same in both versions */
1350 key.version = KEY_FORMAT_3_5; 1425 key.version = KEY_FORMAT_3_5;
1351 key.on_disk_key.k_dir_id = dirino; 1426 key.on_disk_key.k_dir_id = dirino;
1352 key.on_disk_key.k_objectid = inode->i_ino; 1427 key.on_disk_key.k_objectid = inode->i_ino;
1353 key.on_disk_key.k_offset = 0; 1428 key.on_disk_key.k_offset = 0;
1354 key.on_disk_key.k_type = 0; 1429 key.on_disk_key.k_type = 0;
1355 1430
1356 /* look for the object's stat data */ 1431 /* look for the object's stat data */
1357 retval = search_item (inode->i_sb, &key, &path_to_sd); 1432 retval = search_item(inode->i_sb, &key, &path_to_sd);
1358 if (retval == IO_ERROR) { 1433 if (retval == IO_ERROR) {
1359 reiserfs_warning (inode->i_sb, "vs-13070: reiserfs_read_locked_inode: " 1434 reiserfs_warning(inode->i_sb,
1360 "i/o failure occurred trying to find stat data of %K", 1435 "vs-13070: reiserfs_read_locked_inode: "
1361 &key); 1436 "i/o failure occurred trying to find stat data of %K",
1362 reiserfs_make_bad_inode(inode) ; 1437 &key);
1363 return; 1438 reiserfs_make_bad_inode(inode);
1364 } 1439 return;
1365 if (retval != ITEM_FOUND) { 1440 }
1366 /* a stale NFS handle can trigger this without it being an error */ 1441 if (retval != ITEM_FOUND) {
1367 pathrelse (&path_to_sd); 1442 /* a stale NFS handle can trigger this without it being an error */
1368 reiserfs_make_bad_inode(inode) ; 1443 pathrelse(&path_to_sd);
1369 inode->i_nlink = 0; 1444 reiserfs_make_bad_inode(inode);
1370 return; 1445 inode->i_nlink = 0;
1371 } 1446 return;
1372 1447 }
1373 init_inode (inode, &path_to_sd); 1448
1374 1449 init_inode(inode, &path_to_sd);
1375 /* It is possible that knfsd is trying to access inode of a file 1450
1376 that is being removed from the disk by some other thread. As we 1451 /* It is possible that knfsd is trying to access inode of a file
1377 update sd on unlink all that is required is to check for nlink 1452 that is being removed from the disk by some other thread. As we
1378 here. This bug was first found by Sizif when debugging 1453 update sd on unlink all that is required is to check for nlink
1379 SquidNG/Butterfly, forgotten, and found again after Philippe 1454 here. This bug was first found by Sizif when debugging
1380 Gramoulle <philippe.gramoulle@mmania.com> reproduced it. 1455 SquidNG/Butterfly, forgotten, and found again after Philippe
1381 1456 Gramoulle <philippe.gramoulle@mmania.com> reproduced it.
1382 More logical fix would require changes in fs/inode.c:iput() to 1457
1383 remove inode from hash-table _after_ fs cleaned disk stuff up and 1458 More logical fix would require changes in fs/inode.c:iput() to
1384 in iget() to return NULL if I_FREEING inode is found in 1459 remove inode from hash-table _after_ fs cleaned disk stuff up and
1385 hash-table. */ 1460 in iget() to return NULL if I_FREEING inode is found in
1386 /* Currently there is one place where it's ok to meet inode with 1461 hash-table. */
1387 nlink==0: processing of open-unlinked and half-truncated files 1462 /* Currently there is one place where it's ok to meet inode with
1388 during mount (fs/reiserfs/super.c:finish_unfinished()). */ 1463 nlink==0: processing of open-unlinked and half-truncated files
1389 if( ( inode -> i_nlink == 0 ) && 1464 during mount (fs/reiserfs/super.c:finish_unfinished()). */
1390 ! REISERFS_SB(inode -> i_sb) -> s_is_unlinked_ok ) { 1465 if ((inode->i_nlink == 0) &&
1391 reiserfs_warning (inode->i_sb, 1466 !REISERFS_SB(inode->i_sb)->s_is_unlinked_ok) {
1392 "vs-13075: reiserfs_read_locked_inode: " 1467 reiserfs_warning(inode->i_sb,
1393 "dead inode read from disk %K. " 1468 "vs-13075: reiserfs_read_locked_inode: "
1394 "This is likely to be race with knfsd. Ignore", 1469 "dead inode read from disk %K. "
1395 &key ); 1470 "This is likely to be race with knfsd. Ignore",
1396 reiserfs_make_bad_inode( inode ); 1471 &key);
1397 } 1472 reiserfs_make_bad_inode(inode);
1398 1473 }
1399 reiserfs_check_path(&path_to_sd) ; /* init inode should be relsing */ 1474
1475 reiserfs_check_path(&path_to_sd); /* init inode should be relsing */
1400 1476
1401} 1477}
1402 1478
@@ -1412,140 +1488,148 @@ void reiserfs_read_locked_inode (struct inode * inode, struct reiserfs_iget_args
1412 * inode numbers (objectids) are distinguished by parent directory ids. 1488 * inode numbers (objectids) are distinguished by parent directory ids.
1413 * 1489 *
1414 */ 1490 */
1415int reiserfs_find_actor( struct inode *inode, void *opaque ) 1491int reiserfs_find_actor(struct inode *inode, void *opaque)
1416{ 1492{
1417 struct reiserfs_iget_args *args; 1493 struct reiserfs_iget_args *args;
1418 1494
1419 args = opaque; 1495 args = opaque;
1420 /* args is already in CPU order */ 1496 /* args is already in CPU order */
1421 return (inode->i_ino == args->objectid) && 1497 return (inode->i_ino == args->objectid) &&
1422 (le32_to_cpu(INODE_PKEY(inode)->k_dir_id) == args->dirid); 1498 (le32_to_cpu(INODE_PKEY(inode)->k_dir_id) == args->dirid);
1423} 1499}
1424 1500
1425struct inode * reiserfs_iget (struct super_block * s, const struct cpu_key * key) 1501struct inode *reiserfs_iget(struct super_block *s, const struct cpu_key *key)
1426{ 1502{
1427 struct inode * inode; 1503 struct inode *inode;
1428 struct reiserfs_iget_args args ; 1504 struct reiserfs_iget_args args;
1429 1505
1430 args.objectid = key->on_disk_key.k_objectid ; 1506 args.objectid = key->on_disk_key.k_objectid;
1431 args.dirid = key->on_disk_key.k_dir_id ; 1507 args.dirid = key->on_disk_key.k_dir_id;
1432 inode = iget5_locked (s, key->on_disk_key.k_objectid, 1508 inode = iget5_locked(s, key->on_disk_key.k_objectid,
1433 reiserfs_find_actor, reiserfs_init_locked_inode, (void *)(&args)); 1509 reiserfs_find_actor, reiserfs_init_locked_inode,
1434 if (!inode) 1510 (void *)(&args));
1435 return ERR_PTR(-ENOMEM) ; 1511 if (!inode)
1436 1512 return ERR_PTR(-ENOMEM);
1437 if (inode->i_state & I_NEW) { 1513
1438 reiserfs_read_locked_inode(inode, &args); 1514 if (inode->i_state & I_NEW) {
1439 unlock_new_inode(inode); 1515 reiserfs_read_locked_inode(inode, &args);
1440 } 1516 unlock_new_inode(inode);
1441 1517 }
1442 if (comp_short_keys (INODE_PKEY (inode), key) || is_bad_inode (inode)) { 1518
1443 /* either due to i/o error or a stale NFS handle */ 1519 if (comp_short_keys(INODE_PKEY(inode), key) || is_bad_inode(inode)) {
1444 iput (inode); 1520 /* either due to i/o error or a stale NFS handle */
1445 inode = NULL; 1521 iput(inode);
1446 } 1522 inode = NULL;
1447 return inode; 1523 }
1524 return inode;
1448} 1525}
1449 1526
1450struct dentry *reiserfs_get_dentry(struct super_block *sb, void *vobjp) 1527struct dentry *reiserfs_get_dentry(struct super_block *sb, void *vobjp)
1451{ 1528{
1452 __u32 *data = vobjp; 1529 __u32 *data = vobjp;
1453 struct cpu_key key ; 1530 struct cpu_key key;
1454 struct dentry *result; 1531 struct dentry *result;
1455 struct inode *inode; 1532 struct inode *inode;
1456 1533
1457 key.on_disk_key.k_objectid = data[0] ; 1534 key.on_disk_key.k_objectid = data[0];
1458 key.on_disk_key.k_dir_id = data[1] ; 1535 key.on_disk_key.k_dir_id = data[1];
1459 reiserfs_write_lock(sb); 1536 reiserfs_write_lock(sb);
1460 inode = reiserfs_iget(sb, &key) ; 1537 inode = reiserfs_iget(sb, &key);
1461 if (inode && !IS_ERR(inode) && data[2] != 0 && 1538 if (inode && !IS_ERR(inode) && data[2] != 0 &&
1462 data[2] != inode->i_generation) { 1539 data[2] != inode->i_generation) {
1463 iput(inode) ; 1540 iput(inode);
1464 inode = NULL ; 1541 inode = NULL;
1465 } 1542 }
1466 reiserfs_write_unlock(sb); 1543 reiserfs_write_unlock(sb);
1467 if (!inode) 1544 if (!inode)
1468 inode = ERR_PTR(-ESTALE); 1545 inode = ERR_PTR(-ESTALE);
1469 if (IS_ERR(inode)) 1546 if (IS_ERR(inode))
1470 return ERR_PTR(PTR_ERR(inode)); 1547 return ERR_PTR(PTR_ERR(inode));
1471 result = d_alloc_anon(inode); 1548 result = d_alloc_anon(inode);
1472 if (!result) { 1549 if (!result) {
1473 iput(inode); 1550 iput(inode);
1474 return ERR_PTR(-ENOMEM); 1551 return ERR_PTR(-ENOMEM);
1475 } 1552 }
1476 return result; 1553 return result;
1477} 1554}
1478 1555
1479struct dentry *reiserfs_decode_fh(struct super_block *sb, __u32 *data, 1556struct dentry *reiserfs_decode_fh(struct super_block *sb, __u32 * data,
1480 int len, int fhtype, 1557 int len, int fhtype,
1481 int (*acceptable)(void *contect, struct dentry *de), 1558 int (*acceptable) (void *contect,
1482 void *context) { 1559 struct dentry * de),
1483 __u32 obj[3], parent[3]; 1560 void *context)
1484 1561{
1485 /* fhtype happens to reflect the number of u32s encoded. 1562 __u32 obj[3], parent[3];
1486 * due to a bug in earlier code, fhtype might indicate there 1563
1487 * are more u32s then actually fitted. 1564 /* fhtype happens to reflect the number of u32s encoded.
1488 * so if fhtype seems to be more than len, reduce fhtype. 1565 * due to a bug in earlier code, fhtype might indicate there
1489 * Valid types are: 1566 * are more u32s then actually fitted.
1490 * 2 - objectid + dir_id - legacy support 1567 * so if fhtype seems to be more than len, reduce fhtype.
1491 * 3 - objectid + dir_id + generation 1568 * Valid types are:
1492 * 4 - objectid + dir_id + objectid and dirid of parent - legacy 1569 * 2 - objectid + dir_id - legacy support
1493 * 5 - objectid + dir_id + generation + objectid and dirid of parent 1570 * 3 - objectid + dir_id + generation
1494 * 6 - as above plus generation of directory 1571 * 4 - objectid + dir_id + objectid and dirid of parent - legacy
1495 * 6 does not fit in NFSv2 handles 1572 * 5 - objectid + dir_id + generation + objectid and dirid of parent
1496 */ 1573 * 6 - as above plus generation of directory
1497 if (fhtype > len) { 1574 * 6 does not fit in NFSv2 handles
1498 if (fhtype != 6 || len != 5) 1575 */
1499 reiserfs_warning (sb, "nfsd/reiserfs, fhtype=%d, len=%d - odd", 1576 if (fhtype > len) {
1500 fhtype, len); 1577 if (fhtype != 6 || len != 5)
1501 fhtype = 5; 1578 reiserfs_warning(sb,
1502 } 1579 "nfsd/reiserfs, fhtype=%d, len=%d - odd",
1503 1580 fhtype, len);
1504 obj[0] = data[0]; 1581 fhtype = 5;
1505 obj[1] = data[1]; 1582 }
1506 if (fhtype == 3 || fhtype >= 5) 1583
1507 obj[2] = data[2]; 1584 obj[0] = data[0];
1508 else obj[2] = 0; /* generation number */ 1585 obj[1] = data[1];
1509 1586 if (fhtype == 3 || fhtype >= 5)
1510 if (fhtype >= 4) { 1587 obj[2] = data[2];
1511 parent[0] = data[fhtype>=5?3:2] ; 1588 else
1512 parent[1] = data[fhtype>=5?4:3] ; 1589 obj[2] = 0; /* generation number */
1513 if (fhtype == 6)
1514 parent[2] = data[5];
1515 else parent[2] = 0;
1516 }
1517 return sb->s_export_op->find_exported_dentry(sb, obj, fhtype < 4 ? NULL : parent,
1518 acceptable, context);
1519}
1520 1590
1521int reiserfs_encode_fh(struct dentry *dentry, __u32 *data, int *lenp, int need_parent) { 1591 if (fhtype >= 4) {
1522 struct inode *inode = dentry->d_inode ; 1592 parent[0] = data[fhtype >= 5 ? 3 : 2];
1523 int maxlen = *lenp; 1593 parent[1] = data[fhtype >= 5 ? 4 : 3];
1524 1594 if (fhtype == 6)
1525 if (maxlen < 3) 1595 parent[2] = data[5];
1526 return 255 ; 1596 else
1527 1597 parent[2] = 0;
1528 data[0] = inode->i_ino ; 1598 }
1529 data[1] = le32_to_cpu(INODE_PKEY (inode)->k_dir_id) ; 1599 return sb->s_export_op->find_exported_dentry(sb, obj,
1530 data[2] = inode->i_generation ; 1600 fhtype < 4 ? NULL : parent,
1531 *lenp = 3 ; 1601 acceptable, context);
1532 /* no room for directory info? return what we've stored so far */
1533 if (maxlen < 5 || ! need_parent)
1534 return 3 ;
1535
1536 spin_lock(&dentry->d_lock);
1537 inode = dentry->d_parent->d_inode ;
1538 data[3] = inode->i_ino ;
1539 data[4] = le32_to_cpu(INODE_PKEY (inode)->k_dir_id) ;
1540 *lenp = 5 ;
1541 if (maxlen >= 6) {
1542 data[5] = inode->i_generation ;
1543 *lenp = 6 ;
1544 }
1545 spin_unlock(&dentry->d_lock);
1546 return *lenp ;
1547} 1602}
1548 1603
1604int reiserfs_encode_fh(struct dentry *dentry, __u32 * data, int *lenp,
1605 int need_parent)
1606{
1607 struct inode *inode = dentry->d_inode;
1608 int maxlen = *lenp;
1609
1610 if (maxlen < 3)
1611 return 255;
1612
1613 data[0] = inode->i_ino;
1614 data[1] = le32_to_cpu(INODE_PKEY(inode)->k_dir_id);
1615 data[2] = inode->i_generation;
1616 *lenp = 3;
1617 /* no room for directory info? return what we've stored so far */
1618 if (maxlen < 5 || !need_parent)
1619 return 3;
1620
1621 spin_lock(&dentry->d_lock);
1622 inode = dentry->d_parent->d_inode;
1623 data[3] = inode->i_ino;
1624 data[4] = le32_to_cpu(INODE_PKEY(inode)->k_dir_id);
1625 *lenp = 5;
1626 if (maxlen >= 6) {
1627 data[5] = inode->i_generation;
1628 *lenp = 6;
1629 }
1630 spin_unlock(&dentry->d_lock);
1631 return *lenp;
1632}
1549 1633
1550/* looks for stat data, then copies fields to it, marks the buffer 1634/* looks for stat data, then copies fields to it, marks the buffer
1551 containing stat data as dirty */ 1635 containing stat data as dirty */
@@ -1554,120 +1638,127 @@ int reiserfs_encode_fh(struct dentry *dentry, __u32 *data, int *lenp, int need_p
1554** to properly mark inodes for datasync and such, but only actually 1638** to properly mark inodes for datasync and such, but only actually
1555** does something when called for a synchronous update. 1639** does something when called for a synchronous update.
1556*/ 1640*/
1557int reiserfs_write_inode (struct inode * inode, int do_sync) { 1641int reiserfs_write_inode(struct inode *inode, int do_sync)
1558 struct reiserfs_transaction_handle th ; 1642{
1559 int jbegin_count = 1 ; 1643 struct reiserfs_transaction_handle th;
1560 1644 int jbegin_count = 1;
1561 if (inode->i_sb->s_flags & MS_RDONLY) 1645
1562 return -EROFS; 1646 if (inode->i_sb->s_flags & MS_RDONLY)
1563 /* memory pressure can sometimes initiate write_inode calls with sync == 1, 1647 return -EROFS;
1564 ** these cases are just when the system needs ram, not when the 1648 /* memory pressure can sometimes initiate write_inode calls with sync == 1,
1565 ** inode needs to reach disk for safety, and they can safely be 1649 ** these cases are just when the system needs ram, not when the
1566 ** ignored because the altered inode has already been logged. 1650 ** inode needs to reach disk for safety, and they can safely be
1567 */ 1651 ** ignored because the altered inode has already been logged.
1568 if (do_sync && !(current->flags & PF_MEMALLOC)) { 1652 */
1569 reiserfs_write_lock(inode->i_sb); 1653 if (do_sync && !(current->flags & PF_MEMALLOC)) {
1570 if (!journal_begin(&th, inode->i_sb, jbegin_count)) { 1654 reiserfs_write_lock(inode->i_sb);
1571 reiserfs_update_sd (&th, inode); 1655 if (!journal_begin(&th, inode->i_sb, jbegin_count)) {
1572 journal_end_sync(&th, inode->i_sb, jbegin_count) ; 1656 reiserfs_update_sd(&th, inode);
1573 } 1657 journal_end_sync(&th, inode->i_sb, jbegin_count);
1574 reiserfs_write_unlock(inode->i_sb); 1658 }
1575 } 1659 reiserfs_write_unlock(inode->i_sb);
1576 return 0; 1660 }
1661 return 0;
1577} 1662}
1578 1663
1579/* stat data of new object is inserted already, this inserts the item 1664/* stat data of new object is inserted already, this inserts the item
1580 containing "." and ".." entries */ 1665 containing "." and ".." entries */
1581static int reiserfs_new_directory (struct reiserfs_transaction_handle *th, 1666static int reiserfs_new_directory(struct reiserfs_transaction_handle *th,
1582 struct inode *inode, 1667 struct inode *inode,
1583 struct item_head * ih, struct path * path, 1668 struct item_head *ih, struct path *path,
1584 struct inode * dir) 1669 struct inode *dir)
1585{ 1670{
1586 struct super_block * sb = th->t_super; 1671 struct super_block *sb = th->t_super;
1587 char empty_dir [EMPTY_DIR_SIZE]; 1672 char empty_dir[EMPTY_DIR_SIZE];
1588 char * body = empty_dir; 1673 char *body = empty_dir;
1589 struct cpu_key key; 1674 struct cpu_key key;
1590 int retval; 1675 int retval;
1591 1676
1592 BUG_ON (!th->t_trans_id); 1677 BUG_ON(!th->t_trans_id);
1593 1678
1594 _make_cpu_key (&key, KEY_FORMAT_3_5, le32_to_cpu (ih->ih_key.k_dir_id), 1679 _make_cpu_key(&key, KEY_FORMAT_3_5, le32_to_cpu(ih->ih_key.k_dir_id),
1595 le32_to_cpu (ih->ih_key.k_objectid), DOT_OFFSET, TYPE_DIRENTRY, 3/*key length*/); 1680 le32_to_cpu(ih->ih_key.k_objectid), DOT_OFFSET,
1596 1681 TYPE_DIRENTRY, 3 /*key length */ );
1597 /* compose item head for new item. Directories consist of items of 1682
1598 old type (ITEM_VERSION_1). Do not set key (second arg is 0), it 1683 /* compose item head for new item. Directories consist of items of
1599 is done by reiserfs_new_inode */ 1684 old type (ITEM_VERSION_1). Do not set key (second arg is 0), it
1600 if (old_format_only (sb)) { 1685 is done by reiserfs_new_inode */
1601 make_le_item_head (ih, NULL, KEY_FORMAT_3_5, DOT_OFFSET, TYPE_DIRENTRY, EMPTY_DIR_SIZE_V1, 2); 1686 if (old_format_only(sb)) {
1602 1687 make_le_item_head(ih, NULL, KEY_FORMAT_3_5, DOT_OFFSET,
1603 make_empty_dir_item_v1 (body, ih->ih_key.k_dir_id, ih->ih_key.k_objectid, 1688 TYPE_DIRENTRY, EMPTY_DIR_SIZE_V1, 2);
1604 INODE_PKEY (dir)->k_dir_id, 1689
1605 INODE_PKEY (dir)->k_objectid ); 1690 make_empty_dir_item_v1(body, ih->ih_key.k_dir_id,
1606 } else { 1691 ih->ih_key.k_objectid,
1607 make_le_item_head (ih, NULL, KEY_FORMAT_3_5, DOT_OFFSET, TYPE_DIRENTRY, EMPTY_DIR_SIZE, 2); 1692 INODE_PKEY(dir)->k_dir_id,
1608 1693 INODE_PKEY(dir)->k_objectid);
1609 make_empty_dir_item (body, ih->ih_key.k_dir_id, ih->ih_key.k_objectid, 1694 } else {
1610 INODE_PKEY (dir)->k_dir_id, 1695 make_le_item_head(ih, NULL, KEY_FORMAT_3_5, DOT_OFFSET,
1611 INODE_PKEY (dir)->k_objectid ); 1696 TYPE_DIRENTRY, EMPTY_DIR_SIZE, 2);
1612 } 1697
1613 1698 make_empty_dir_item(body, ih->ih_key.k_dir_id,
1614 /* look for place in the tree for new item */ 1699 ih->ih_key.k_objectid,
1615 retval = search_item (sb, &key, path); 1700 INODE_PKEY(dir)->k_dir_id,
1616 if (retval == IO_ERROR) { 1701 INODE_PKEY(dir)->k_objectid);
1617 reiserfs_warning (sb, "vs-13080: reiserfs_new_directory: " 1702 }
1618 "i/o failure occurred creating new directory"); 1703
1619 return -EIO; 1704 /* look for place in the tree for new item */
1620 } 1705 retval = search_item(sb, &key, path);
1621 if (retval == ITEM_FOUND) { 1706 if (retval == IO_ERROR) {
1622 pathrelse (path); 1707 reiserfs_warning(sb, "vs-13080: reiserfs_new_directory: "
1623 reiserfs_warning (sb, "vs-13070: reiserfs_new_directory: " 1708 "i/o failure occurred creating new directory");
1624 "object with this key exists (%k)", &(ih->ih_key)); 1709 return -EIO;
1625 return -EEXIST; 1710 }
1626 } 1711 if (retval == ITEM_FOUND) {
1627 1712 pathrelse(path);
1628 /* insert item, that is empty directory item */ 1713 reiserfs_warning(sb, "vs-13070: reiserfs_new_directory: "
1629 return reiserfs_insert_item (th, path, &key, ih, inode, body); 1714 "object with this key exists (%k)",
1630} 1715 &(ih->ih_key));
1716 return -EEXIST;
1717 }
1631 1718
1719 /* insert item, that is empty directory item */
1720 return reiserfs_insert_item(th, path, &key, ih, inode, body);
1721}
1632 1722
1633/* stat data of object has been inserted, this inserts the item 1723/* stat data of object has been inserted, this inserts the item
1634 containing the body of symlink */ 1724 containing the body of symlink */
1635static int reiserfs_new_symlink (struct reiserfs_transaction_handle *th, 1725static int reiserfs_new_symlink(struct reiserfs_transaction_handle *th, struct inode *inode, /* Inode of symlink */
1636 struct inode *inode, /* Inode of symlink */ 1726 struct item_head *ih,
1637 struct item_head * ih, 1727 struct path *path, const char *symname,
1638 struct path * path, const char * symname, int item_len) 1728 int item_len)
1639{ 1729{
1640 struct super_block * sb = th->t_super; 1730 struct super_block *sb = th->t_super;
1641 struct cpu_key key; 1731 struct cpu_key key;
1642 int retval; 1732 int retval;
1643 1733
1644 BUG_ON (!th->t_trans_id); 1734 BUG_ON(!th->t_trans_id);
1645 1735
1646 _make_cpu_key (&key, KEY_FORMAT_3_5, 1736 _make_cpu_key(&key, KEY_FORMAT_3_5,
1647 le32_to_cpu (ih->ih_key.k_dir_id), 1737 le32_to_cpu(ih->ih_key.k_dir_id),
1648 le32_to_cpu (ih->ih_key.k_objectid), 1738 le32_to_cpu(ih->ih_key.k_objectid),
1649 1, TYPE_DIRECT, 3/*key length*/); 1739 1, TYPE_DIRECT, 3 /*key length */ );
1650 1740
1651 make_le_item_head (ih, NULL, KEY_FORMAT_3_5, 1, TYPE_DIRECT, item_len, 0/*free_space*/); 1741 make_le_item_head(ih, NULL, KEY_FORMAT_3_5, 1, TYPE_DIRECT, item_len,
1652 1742 0 /*free_space */ );
1653 /* look for place in the tree for new item */ 1743
1654 retval = search_item (sb, &key, path); 1744 /* look for place in the tree for new item */
1655 if (retval == IO_ERROR) { 1745 retval = search_item(sb, &key, path);
1656 reiserfs_warning (sb, "vs-13080: reiserfs_new_symlinik: " 1746 if (retval == IO_ERROR) {
1657 "i/o failure occurred creating new symlink"); 1747 reiserfs_warning(sb, "vs-13080: reiserfs_new_symlinik: "
1658 return -EIO; 1748 "i/o failure occurred creating new symlink");
1659 } 1749 return -EIO;
1660 if (retval == ITEM_FOUND) { 1750 }
1661 pathrelse (path); 1751 if (retval == ITEM_FOUND) {
1662 reiserfs_warning (sb, "vs-13080: reiserfs_new_symlink: " 1752 pathrelse(path);
1663 "object with this key exists (%k)", &(ih->ih_key)); 1753 reiserfs_warning(sb, "vs-13080: reiserfs_new_symlink: "
1664 return -EEXIST; 1754 "object with this key exists (%k)",
1665 } 1755 &(ih->ih_key));
1666 1756 return -EEXIST;
1667 /* insert item, that is body of symlink */ 1757 }
1668 return reiserfs_insert_item (th, path, &key, ih, inode, symname);
1669}
1670 1758
1759 /* insert item, that is body of symlink */
1760 return reiserfs_insert_item(th, path, &key, ih, inode, symname);
1761}
1671 1762
1672/* inserts the stat data into the tree, and then calls 1763/* inserts the stat data into the tree, and then calls
1673 reiserfs_new_directory (to insert ".", ".." item if new object is 1764 reiserfs_new_directory (to insert ".", ".." item if new object is
@@ -1678,213 +1769,219 @@ static int reiserfs_new_symlink (struct reiserfs_transaction_handle *th,
1678 non-zero due to an error, we have to drop the quota previously allocated 1769 non-zero due to an error, we have to drop the quota previously allocated
1679 for the fresh inode. This can only be done outside a transaction, so 1770 for the fresh inode. This can only be done outside a transaction, so
1680 if we return non-zero, we also end the transaction. */ 1771 if we return non-zero, we also end the transaction. */
1681int reiserfs_new_inode (struct reiserfs_transaction_handle *th, 1772int reiserfs_new_inode(struct reiserfs_transaction_handle *th,
1682 struct inode * dir, int mode, 1773 struct inode *dir, int mode, const char *symname,
1683 const char * symname, 1774 /* 0 for regular, EMTRY_DIR_SIZE for dirs,
1684 /* 0 for regular, EMTRY_DIR_SIZE for dirs, 1775 strlen (symname) for symlinks) */
1685 strlen (symname) for symlinks)*/ 1776 loff_t i_size, struct dentry *dentry,
1686 loff_t i_size, struct dentry *dentry, 1777 struct inode *inode)
1687 struct inode *inode)
1688{ 1778{
1689 struct super_block * sb; 1779 struct super_block *sb;
1690 INITIALIZE_PATH (path_to_key); 1780 INITIALIZE_PATH(path_to_key);
1691 struct cpu_key key; 1781 struct cpu_key key;
1692 struct item_head ih; 1782 struct item_head ih;
1693 struct stat_data sd; 1783 struct stat_data sd;
1694 int retval; 1784 int retval;
1695 int err; 1785 int err;
1696 1786
1697 BUG_ON (!th->t_trans_id); 1787 BUG_ON(!th->t_trans_id);
1698 1788
1699 if (DQUOT_ALLOC_INODE(inode)) { 1789 if (DQUOT_ALLOC_INODE(inode)) {
1700 err = -EDQUOT; 1790 err = -EDQUOT;
1701 goto out_end_trans; 1791 goto out_end_trans;
1702 } 1792 }
1703 if (!dir || !dir->i_nlink) { 1793 if (!dir || !dir->i_nlink) {
1704 err = -EPERM; 1794 err = -EPERM;
1705 goto out_bad_inode; 1795 goto out_bad_inode;
1706 } 1796 }
1707 1797
1708 sb = dir->i_sb; 1798 sb = dir->i_sb;
1709 1799
1710 /* item head of new item */ 1800 /* item head of new item */
1711 ih.ih_key.k_dir_id = reiserfs_choose_packing(dir); 1801 ih.ih_key.k_dir_id = reiserfs_choose_packing(dir);
1712 ih.ih_key.k_objectid = cpu_to_le32 (reiserfs_get_unused_objectid (th)); 1802 ih.ih_key.k_objectid = cpu_to_le32(reiserfs_get_unused_objectid(th));
1713 if (!ih.ih_key.k_objectid) { 1803 if (!ih.ih_key.k_objectid) {
1714 err = -ENOMEM; 1804 err = -ENOMEM;
1715 goto out_bad_inode ; 1805 goto out_bad_inode;
1716 } 1806 }
1717 if (old_format_only (sb)) 1807 if (old_format_only(sb))
1718 /* not a perfect generation count, as object ids can be reused, but 1808 /* not a perfect generation count, as object ids can be reused, but
1719 ** this is as good as reiserfs can do right now. 1809 ** this is as good as reiserfs can do right now.
1720 ** note that the private part of inode isn't filled in yet, we have 1810 ** note that the private part of inode isn't filled in yet, we have
1721 ** to use the directory. 1811 ** to use the directory.
1722 */ 1812 */
1723 inode->i_generation = le32_to_cpu (INODE_PKEY (dir)->k_objectid); 1813 inode->i_generation = le32_to_cpu(INODE_PKEY(dir)->k_objectid);
1724 else 1814 else
1725#if defined( USE_INODE_GENERATION_COUNTER ) 1815#if defined( USE_INODE_GENERATION_COUNTER )
1726 inode->i_generation = le32_to_cpu(REISERFS_SB(sb)->s_rs->s_inode_generation); 1816 inode->i_generation =
1817 le32_to_cpu(REISERFS_SB(sb)->s_rs->s_inode_generation);
1727#else 1818#else
1728 inode->i_generation = ++event; 1819 inode->i_generation = ++event;
1729#endif 1820#endif
1730 1821
1731 /* fill stat data */ 1822 /* fill stat data */
1732 inode->i_nlink = (S_ISDIR (mode) ? 2 : 1); 1823 inode->i_nlink = (S_ISDIR(mode) ? 2 : 1);
1733 1824
1734 /* uid and gid must already be set by the caller for quota init */ 1825 /* uid and gid must already be set by the caller for quota init */
1735 1826
1736 /* symlink cannot be immutable or append only, right? */ 1827 /* symlink cannot be immutable or append only, right? */
1737 if( S_ISLNK( inode -> i_mode ) ) 1828 if (S_ISLNK(inode->i_mode))
1738 inode -> i_flags &= ~ ( S_IMMUTABLE | S_APPEND ); 1829 inode->i_flags &= ~(S_IMMUTABLE | S_APPEND);
1739 1830
1740 inode->i_mtime = inode->i_atime = inode->i_ctime = 1831 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC;
1741 CURRENT_TIME_SEC; 1832 inode->i_size = i_size;
1742 inode->i_size = i_size; 1833 inode->i_blocks = 0;
1743 inode->i_blocks = 0; 1834 inode->i_bytes = 0;
1744 inode->i_bytes = 0; 1835 REISERFS_I(inode)->i_first_direct_byte = S_ISLNK(mode) ? 1 :
1745 REISERFS_I(inode)->i_first_direct_byte = S_ISLNK(mode) ? 1 : 1836 U32_MAX /*NO_BYTES_IN_DIRECT_ITEM */ ;
1746 U32_MAX/*NO_BYTES_IN_DIRECT_ITEM*/; 1837
1747 1838 INIT_LIST_HEAD(&(REISERFS_I(inode)->i_prealloc_list));
1748 INIT_LIST_HEAD(&(REISERFS_I(inode)->i_prealloc_list )); 1839 REISERFS_I(inode)->i_flags = 0;
1749 REISERFS_I(inode)->i_flags = 0; 1840 REISERFS_I(inode)->i_prealloc_block = 0;
1750 REISERFS_I(inode)->i_prealloc_block = 0; 1841 REISERFS_I(inode)->i_prealloc_count = 0;
1751 REISERFS_I(inode)->i_prealloc_count = 0; 1842 REISERFS_I(inode)->i_trans_id = 0;
1752 REISERFS_I(inode)->i_trans_id = 0; 1843 REISERFS_I(inode)->i_jl = NULL;
1753 REISERFS_I(inode)->i_jl = NULL; 1844 REISERFS_I(inode)->i_attrs =
1754 REISERFS_I(inode)->i_attrs = 1845 REISERFS_I(dir)->i_attrs & REISERFS_INHERIT_MASK;
1755 REISERFS_I(dir)->i_attrs & REISERFS_INHERIT_MASK; 1846 sd_attrs_to_i_attrs(REISERFS_I(inode)->i_attrs, inode);
1756 sd_attrs_to_i_attrs( REISERFS_I(inode) -> i_attrs, inode ); 1847 REISERFS_I(inode)->i_acl_access = NULL;
1757 REISERFS_I(inode)->i_acl_access = NULL; 1848 REISERFS_I(inode)->i_acl_default = NULL;
1758 REISERFS_I(inode)->i_acl_default = NULL; 1849 init_rwsem(&REISERFS_I(inode)->xattr_sem);
1759 init_rwsem (&REISERFS_I(inode)->xattr_sem); 1850
1760 1851 if (old_format_only(sb))
1761 if (old_format_only (sb)) 1852 make_le_item_head(&ih, NULL, KEY_FORMAT_3_5, SD_OFFSET,
1762 make_le_item_head (&ih, NULL, KEY_FORMAT_3_5, SD_OFFSET, TYPE_STAT_DATA, SD_V1_SIZE, MAX_US_INT); 1853 TYPE_STAT_DATA, SD_V1_SIZE, MAX_US_INT);
1763 else 1854 else
1764 make_le_item_head (&ih, NULL, KEY_FORMAT_3_6, SD_OFFSET, TYPE_STAT_DATA, SD_SIZE, MAX_US_INT); 1855 make_le_item_head(&ih, NULL, KEY_FORMAT_3_6, SD_OFFSET,
1765 1856 TYPE_STAT_DATA, SD_SIZE, MAX_US_INT);
1766 /* key to search for correct place for new stat data */ 1857
1767 _make_cpu_key (&key, KEY_FORMAT_3_6, le32_to_cpu (ih.ih_key.k_dir_id), 1858 /* key to search for correct place for new stat data */
1768 le32_to_cpu (ih.ih_key.k_objectid), SD_OFFSET, TYPE_STAT_DATA, 3/*key length*/); 1859 _make_cpu_key(&key, KEY_FORMAT_3_6, le32_to_cpu(ih.ih_key.k_dir_id),
1769 1860 le32_to_cpu(ih.ih_key.k_objectid), SD_OFFSET,
1770 /* find proper place for inserting of stat data */ 1861 TYPE_STAT_DATA, 3 /*key length */ );
1771 retval = search_item (sb, &key, &path_to_key); 1862
1772 if (retval == IO_ERROR) { 1863 /* find proper place for inserting of stat data */
1773 err = -EIO; 1864 retval = search_item(sb, &key, &path_to_key);
1774 goto out_bad_inode; 1865 if (retval == IO_ERROR) {
1775 } 1866 err = -EIO;
1776 if (retval == ITEM_FOUND) { 1867 goto out_bad_inode;
1777 pathrelse (&path_to_key); 1868 }
1778 err = -EEXIST; 1869 if (retval == ITEM_FOUND) {
1779 goto out_bad_inode; 1870 pathrelse(&path_to_key);
1780 } 1871 err = -EEXIST;
1781 if (old_format_only (sb)) { 1872 goto out_bad_inode;
1782 if (inode->i_uid & ~0xffff || inode->i_gid & ~0xffff) { 1873 }
1783 pathrelse (&path_to_key); 1874 if (old_format_only(sb)) {
1784 /* i_uid or i_gid is too big to be stored in stat data v3.5 */ 1875 if (inode->i_uid & ~0xffff || inode->i_gid & ~0xffff) {
1785 err = -EINVAL; 1876 pathrelse(&path_to_key);
1786 goto out_bad_inode; 1877 /* i_uid or i_gid is too big to be stored in stat data v3.5 */
1787 } 1878 err = -EINVAL;
1788 inode2sd_v1 (&sd, inode, inode->i_size); 1879 goto out_bad_inode;
1789 } else { 1880 }
1790 inode2sd (&sd, inode, inode->i_size); 1881 inode2sd_v1(&sd, inode, inode->i_size);
1791 } 1882 } else {
1792 // these do not go to on-disk stat data 1883 inode2sd(&sd, inode, inode->i_size);
1793 inode->i_ino = le32_to_cpu (ih.ih_key.k_objectid); 1884 }
1794 inode->i_blksize = reiserfs_default_io_size; 1885 // these do not go to on-disk stat data
1795 1886 inode->i_ino = le32_to_cpu(ih.ih_key.k_objectid);
1796 // store in in-core inode the key of stat data and version all 1887 inode->i_blksize = reiserfs_default_io_size;
1797 // object items will have (directory items will have old offset 1888
1798 // format, other new objects will consist of new items) 1889 // store in in-core inode the key of stat data and version all
1799 memcpy (INODE_PKEY (inode), &(ih.ih_key), KEY_SIZE); 1890 // object items will have (directory items will have old offset
1800 if (old_format_only (sb) || S_ISDIR(mode) || S_ISLNK(mode)) 1891 // format, other new objects will consist of new items)
1801 set_inode_item_key_version (inode, KEY_FORMAT_3_5); 1892 memcpy(INODE_PKEY(inode), &(ih.ih_key), KEY_SIZE);
1802 else 1893 if (old_format_only(sb) || S_ISDIR(mode) || S_ISLNK(mode))
1803 set_inode_item_key_version (inode, KEY_FORMAT_3_6); 1894 set_inode_item_key_version(inode, KEY_FORMAT_3_5);
1804 if (old_format_only (sb)) 1895 else
1805 set_inode_sd_version (inode, STAT_DATA_V1); 1896 set_inode_item_key_version(inode, KEY_FORMAT_3_6);
1806 else 1897 if (old_format_only(sb))
1807 set_inode_sd_version (inode, STAT_DATA_V2); 1898 set_inode_sd_version(inode, STAT_DATA_V1);
1808 1899 else
1809 /* insert the stat data into the tree */ 1900 set_inode_sd_version(inode, STAT_DATA_V2);
1901
1902 /* insert the stat data into the tree */
1810#ifdef DISPLACE_NEW_PACKING_LOCALITIES 1903#ifdef DISPLACE_NEW_PACKING_LOCALITIES
1811 if (REISERFS_I(dir)->new_packing_locality) 1904 if (REISERFS_I(dir)->new_packing_locality)
1812 th->displace_new_blocks = 1; 1905 th->displace_new_blocks = 1;
1813#endif 1906#endif
1814 retval = reiserfs_insert_item (th, &path_to_key, &key, &ih, inode, (char *)(&sd)); 1907 retval =
1815 if (retval) { 1908 reiserfs_insert_item(th, &path_to_key, &key, &ih, inode,
1816 err = retval; 1909 (char *)(&sd));
1817 reiserfs_check_path(&path_to_key) ; 1910 if (retval) {
1818 goto out_bad_inode; 1911 err = retval;
1819 } 1912 reiserfs_check_path(&path_to_key);
1820 1913 goto out_bad_inode;
1914 }
1821#ifdef DISPLACE_NEW_PACKING_LOCALITIES 1915#ifdef DISPLACE_NEW_PACKING_LOCALITIES
1822 if (!th->displace_new_blocks) 1916 if (!th->displace_new_blocks)
1823 REISERFS_I(dir)->new_packing_locality = 0; 1917 REISERFS_I(dir)->new_packing_locality = 0;
1824#endif 1918#endif
1825 if (S_ISDIR(mode)) { 1919 if (S_ISDIR(mode)) {
1826 /* insert item with "." and ".." */ 1920 /* insert item with "." and ".." */
1827 retval = reiserfs_new_directory (th, inode, &ih, &path_to_key, dir); 1921 retval =
1828 } 1922 reiserfs_new_directory(th, inode, &ih, &path_to_key, dir);
1829 1923 }
1830 if (S_ISLNK(mode)) { 1924
1831 /* insert body of symlink */ 1925 if (S_ISLNK(mode)) {
1832 if (!old_format_only (sb)) 1926 /* insert body of symlink */
1833 i_size = ROUND_UP(i_size); 1927 if (!old_format_only(sb))
1834 retval = reiserfs_new_symlink (th, inode, &ih, &path_to_key, symname, i_size); 1928 i_size = ROUND_UP(i_size);
1835 } 1929 retval =
1836 if (retval) { 1930 reiserfs_new_symlink(th, inode, &ih, &path_to_key, symname,
1837 err = retval; 1931 i_size);
1838 reiserfs_check_path(&path_to_key) ; 1932 }
1839 journal_end(th, th->t_super, th->t_blocks_allocated); 1933 if (retval) {
1840 goto out_inserted_sd; 1934 err = retval;
1841 } 1935 reiserfs_check_path(&path_to_key);
1842 1936 journal_end(th, th->t_super, th->t_blocks_allocated);
1843 /* XXX CHECK THIS */ 1937 goto out_inserted_sd;
1844 if (reiserfs_posixacl (inode->i_sb)) { 1938 }
1845 retval = reiserfs_inherit_default_acl (dir, dentry, inode); 1939
1846 if (retval) { 1940 /* XXX CHECK THIS */
1847 err = retval; 1941 if (reiserfs_posixacl(inode->i_sb)) {
1848 reiserfs_check_path(&path_to_key) ; 1942 retval = reiserfs_inherit_default_acl(dir, dentry, inode);
1849 journal_end(th, th->t_super, th->t_blocks_allocated); 1943 if (retval) {
1850 goto out_inserted_sd; 1944 err = retval;
1851 } 1945 reiserfs_check_path(&path_to_key);
1852 } else if (inode->i_sb->s_flags & MS_POSIXACL) { 1946 journal_end(th, th->t_super, th->t_blocks_allocated);
1853 reiserfs_warning (inode->i_sb, "ACLs aren't enabled in the fs, " 1947 goto out_inserted_sd;
1854 "but vfs thinks they are!"); 1948 }
1855 } else if (is_reiserfs_priv_object (dir)) { 1949 } else if (inode->i_sb->s_flags & MS_POSIXACL) {
1856 reiserfs_mark_inode_private (inode); 1950 reiserfs_warning(inode->i_sb, "ACLs aren't enabled in the fs, "
1857 } 1951 "but vfs thinks they are!");
1858 1952 } else if (is_reiserfs_priv_object(dir)) {
1859 insert_inode_hash (inode); 1953 reiserfs_mark_inode_private(inode);
1860 reiserfs_update_sd(th, inode); 1954 }
1861 reiserfs_check_path(&path_to_key) ; 1955
1862 1956 insert_inode_hash(inode);
1863 return 0; 1957 reiserfs_update_sd(th, inode);
1958 reiserfs_check_path(&path_to_key);
1959
1960 return 0;
1864 1961
1865/* it looks like you can easily compress these two goto targets into 1962/* it looks like you can easily compress these two goto targets into
1866 * one. Keeping it like this doesn't actually hurt anything, and they 1963 * one. Keeping it like this doesn't actually hurt anything, and they
1867 * are place holders for what the quota code actually needs. 1964 * are place holders for what the quota code actually needs.
1868 */ 1965 */
1869out_bad_inode: 1966 out_bad_inode:
1870 /* Invalidate the object, nothing was inserted yet */ 1967 /* Invalidate the object, nothing was inserted yet */
1871 INODE_PKEY(inode)->k_objectid = 0; 1968 INODE_PKEY(inode)->k_objectid = 0;
1872 1969
1873 /* Quota change must be inside a transaction for journaling */ 1970 /* Quota change must be inside a transaction for journaling */
1874 DQUOT_FREE_INODE(inode); 1971 DQUOT_FREE_INODE(inode);
1875 1972
1876out_end_trans: 1973 out_end_trans:
1877 journal_end(th, th->t_super, th->t_blocks_allocated) ; 1974 journal_end(th, th->t_super, th->t_blocks_allocated);
1878 /* Drop can be outside and it needs more credits so it's better to have it outside */ 1975 /* Drop can be outside and it needs more credits so it's better to have it outside */
1879 DQUOT_DROP(inode); 1976 DQUOT_DROP(inode);
1880 inode->i_flags |= S_NOQUOTA; 1977 inode->i_flags |= S_NOQUOTA;
1881 make_bad_inode(inode); 1978 make_bad_inode(inode);
1882 1979
1883out_inserted_sd: 1980 out_inserted_sd:
1884 inode->i_nlink = 0; 1981 inode->i_nlink = 0;
1885 th->t_trans_id = 0; /* so the caller can't use this handle later */ 1982 th->t_trans_id = 0; /* so the caller can't use this handle later */
1886 iput(inode); 1983 iput(inode);
1887 return err; 1984 return err;
1888} 1985}
1889 1986
1890/* 1987/*
@@ -1900,77 +1997,78 @@ out_inserted_sd:
1900** 1997**
1901** on failure, nonzero is returned, page_result and bh_result are untouched. 1998** on failure, nonzero is returned, page_result and bh_result are untouched.
1902*/ 1999*/
1903static int grab_tail_page(struct inode *p_s_inode, 2000static int grab_tail_page(struct inode *p_s_inode,
1904 struct page **page_result, 2001 struct page **page_result,
1905 struct buffer_head **bh_result) { 2002 struct buffer_head **bh_result)
1906 2003{
1907 /* we want the page with the last byte in the file, 2004
1908 ** not the page that will hold the next byte for appending 2005 /* we want the page with the last byte in the file,
1909 */ 2006 ** not the page that will hold the next byte for appending
1910 unsigned long index = (p_s_inode->i_size-1) >> PAGE_CACHE_SHIFT ; 2007 */
1911 unsigned long pos = 0 ; 2008 unsigned long index = (p_s_inode->i_size - 1) >> PAGE_CACHE_SHIFT;
1912 unsigned long start = 0 ; 2009 unsigned long pos = 0;
1913 unsigned long blocksize = p_s_inode->i_sb->s_blocksize ; 2010 unsigned long start = 0;
1914 unsigned long offset = (p_s_inode->i_size) & (PAGE_CACHE_SIZE - 1) ; 2011 unsigned long blocksize = p_s_inode->i_sb->s_blocksize;
1915 struct buffer_head *bh ; 2012 unsigned long offset = (p_s_inode->i_size) & (PAGE_CACHE_SIZE - 1);
1916 struct buffer_head *head ; 2013 struct buffer_head *bh;
1917 struct page * page ; 2014 struct buffer_head *head;
1918 int error ; 2015 struct page *page;
1919 2016 int error;
1920 /* we know that we are only called with inode->i_size > 0. 2017
1921 ** we also know that a file tail can never be as big as a block 2018 /* we know that we are only called with inode->i_size > 0.
1922 ** If i_size % blocksize == 0, our file is currently block aligned 2019 ** we also know that a file tail can never be as big as a block
1923 ** and it won't need converting or zeroing after a truncate. 2020 ** If i_size % blocksize == 0, our file is currently block aligned
1924 */ 2021 ** and it won't need converting or zeroing after a truncate.
1925 if ((offset & (blocksize - 1)) == 0) { 2022 */
1926 return -ENOENT ; 2023 if ((offset & (blocksize - 1)) == 0) {
1927 } 2024 return -ENOENT;
1928 page = grab_cache_page(p_s_inode->i_mapping, index) ; 2025 }
1929 error = -ENOMEM ; 2026 page = grab_cache_page(p_s_inode->i_mapping, index);
1930 if (!page) { 2027 error = -ENOMEM;
1931 goto out ; 2028 if (!page) {
1932 } 2029 goto out;
1933 /* start within the page of the last block in the file */ 2030 }
1934 start = (offset / blocksize) * blocksize ; 2031 /* start within the page of the last block in the file */
1935 2032 start = (offset / blocksize) * blocksize;
1936 error = block_prepare_write(page, start, offset, 2033
1937 reiserfs_get_block_create_0) ; 2034 error = block_prepare_write(page, start, offset,
1938 if (error) 2035 reiserfs_get_block_create_0);
1939 goto unlock ; 2036 if (error)
1940 2037 goto unlock;
1941 head = page_buffers(page) ; 2038
1942 bh = head; 2039 head = page_buffers(page);
1943 do { 2040 bh = head;
1944 if (pos >= start) { 2041 do {
1945 break ; 2042 if (pos >= start) {
1946 } 2043 break;
1947 bh = bh->b_this_page ; 2044 }
1948 pos += blocksize ; 2045 bh = bh->b_this_page;
1949 } while(bh != head) ; 2046 pos += blocksize;
1950 2047 } while (bh != head);
1951 if (!buffer_uptodate(bh)) { 2048
1952 /* note, this should never happen, prepare_write should 2049 if (!buffer_uptodate(bh)) {
1953 ** be taking care of this for us. If the buffer isn't up to date, 2050 /* note, this should never happen, prepare_write should
1954 ** I've screwed up the code to find the buffer, or the code to 2051 ** be taking care of this for us. If the buffer isn't up to date,
1955 ** call prepare_write 2052 ** I've screwed up the code to find the buffer, or the code to
1956 */ 2053 ** call prepare_write
1957 reiserfs_warning (p_s_inode->i_sb, 2054 */
1958 "clm-6000: error reading block %lu on dev %s", 2055 reiserfs_warning(p_s_inode->i_sb,
1959 bh->b_blocknr, 2056 "clm-6000: error reading block %lu on dev %s",
1960 reiserfs_bdevname (p_s_inode->i_sb)) ; 2057 bh->b_blocknr,
1961 error = -EIO ; 2058 reiserfs_bdevname(p_s_inode->i_sb));
1962 goto unlock ; 2059 error = -EIO;
1963 } 2060 goto unlock;
1964 *bh_result = bh ; 2061 }
1965 *page_result = page ; 2062 *bh_result = bh;
1966 2063 *page_result = page;
1967out: 2064
1968 return error ; 2065 out:
1969 2066 return error;
1970unlock: 2067
1971 unlock_page(page) ; 2068 unlock:
1972 page_cache_release(page) ; 2069 unlock_page(page);
1973 return error ; 2070 page_cache_release(page);
2071 return error;
1974} 2072}
1975 2073
1976/* 2074/*
@@ -1979,235 +2077,247 @@ unlock:
1979** 2077**
1980** some code taken from block_truncate_page 2078** some code taken from block_truncate_page
1981*/ 2079*/
1982int reiserfs_truncate_file(struct inode *p_s_inode, int update_timestamps) { 2080int reiserfs_truncate_file(struct inode *p_s_inode, int update_timestamps)
1983 struct reiserfs_transaction_handle th ; 2081{
1984 /* we want the offset for the first byte after the end of the file */ 2082 struct reiserfs_transaction_handle th;
1985 unsigned long offset = p_s_inode->i_size & (PAGE_CACHE_SIZE - 1) ; 2083 /* we want the offset for the first byte after the end of the file */
1986 unsigned blocksize = p_s_inode->i_sb->s_blocksize ; 2084 unsigned long offset = p_s_inode->i_size & (PAGE_CACHE_SIZE - 1);
1987 unsigned length ; 2085 unsigned blocksize = p_s_inode->i_sb->s_blocksize;
1988 struct page *page = NULL ; 2086 unsigned length;
1989 int error ; 2087 struct page *page = NULL;
1990 struct buffer_head *bh = NULL ; 2088 int error;
1991 2089 struct buffer_head *bh = NULL;
1992 reiserfs_write_lock(p_s_inode->i_sb); 2090
1993 2091 reiserfs_write_lock(p_s_inode->i_sb);
1994 if (p_s_inode->i_size > 0) { 2092
1995 if ((error = grab_tail_page(p_s_inode, &page, &bh))) { 2093 if (p_s_inode->i_size > 0) {
1996 // -ENOENT means we truncated past the end of the file, 2094 if ((error = grab_tail_page(p_s_inode, &page, &bh))) {
1997 // and get_block_create_0 could not find a block to read in, 2095 // -ENOENT means we truncated past the end of the file,
1998 // which is ok. 2096 // and get_block_create_0 could not find a block to read in,
1999 if (error != -ENOENT) 2097 // which is ok.
2000 reiserfs_warning (p_s_inode->i_sb, 2098 if (error != -ENOENT)
2001 "clm-6001: grab_tail_page failed %d", 2099 reiserfs_warning(p_s_inode->i_sb,
2002 error); 2100 "clm-6001: grab_tail_page failed %d",
2003 page = NULL ; 2101 error);
2004 bh = NULL ; 2102 page = NULL;
2005 } 2103 bh = NULL;
2006 } 2104 }
2007 2105 }
2008 /* so, if page != NULL, we have a buffer head for the offset at
2009 ** the end of the file. if the bh is mapped, and bh->b_blocknr != 0,
2010 ** then we have an unformatted node. Otherwise, we have a direct item,
2011 ** and no zeroing is required on disk. We zero after the truncate,
2012 ** because the truncate might pack the item anyway
2013 ** (it will unmap bh if it packs).
2014 */
2015 /* it is enough to reserve space in transaction for 2 balancings:
2016 one for "save" link adding and another for the first
2017 cut_from_item. 1 is for update_sd */
2018 error = journal_begin (&th, p_s_inode->i_sb,
2019 JOURNAL_PER_BALANCE_CNT * 2 + 1);
2020 if (error)
2021 goto out;
2022 reiserfs_update_inode_transaction(p_s_inode) ;
2023 if (update_timestamps)
2024 /* we are doing real truncate: if the system crashes before the last
2025 transaction of truncating gets committed - on reboot the file
2026 either appears truncated properly or not truncated at all */
2027 add_save_link (&th, p_s_inode, 1);
2028 error = reiserfs_do_truncate (&th, p_s_inode, page, update_timestamps) ;
2029 if (error)
2030 goto out;
2031 error = journal_end (&th, p_s_inode->i_sb, JOURNAL_PER_BALANCE_CNT * 2 + 1);
2032 if (error)
2033 goto out;
2034
2035 if (update_timestamps) {
2036 error = remove_save_link (p_s_inode, 1/* truncate */);
2037 if (error)
2038 goto out;
2039 }
2040
2041 if (page) {
2042 length = offset & (blocksize - 1) ;
2043 /* if we are not on a block boundary */
2044 if (length) {
2045 char *kaddr;
2046
2047 length = blocksize - length ;
2048 kaddr = kmap_atomic(page, KM_USER0) ;
2049 memset(kaddr + offset, 0, length) ;
2050 flush_dcache_page(page) ;
2051 kunmap_atomic(kaddr, KM_USER0) ;
2052 if (buffer_mapped(bh) && bh->b_blocknr != 0) {
2053 mark_buffer_dirty(bh) ;
2054 }
2055 }
2056 unlock_page(page) ;
2057 page_cache_release(page) ;
2058 }
2059
2060 reiserfs_write_unlock(p_s_inode->i_sb);
2061 return 0;
2062out:
2063 if (page) {
2064 unlock_page (page);
2065 page_cache_release (page);
2066 }
2067 reiserfs_write_unlock(p_s_inode->i_sb);
2068 return error;
2069}
2070 2106
2071static int map_block_for_writepage(struct inode *inode, 2107 /* so, if page != NULL, we have a buffer head for the offset at
2072 struct buffer_head *bh_result, 2108 ** the end of the file. if the bh is mapped, and bh->b_blocknr != 0,
2073 unsigned long block) { 2109 ** then we have an unformatted node. Otherwise, we have a direct item,
2074 struct reiserfs_transaction_handle th ; 2110 ** and no zeroing is required on disk. We zero after the truncate,
2075 int fs_gen ; 2111 ** because the truncate might pack the item anyway
2076 struct item_head tmp_ih ; 2112 ** (it will unmap bh if it packs).
2077 struct item_head *ih ;
2078 struct buffer_head *bh ;
2079 __le32 *item ;
2080 struct cpu_key key ;
2081 INITIALIZE_PATH(path) ;
2082 int pos_in_item ;
2083 int jbegin_count = JOURNAL_PER_BALANCE_CNT ;
2084 loff_t byte_offset = (block << inode->i_sb->s_blocksize_bits) + 1 ;
2085 int retval ;
2086 int use_get_block = 0 ;
2087 int bytes_copied = 0 ;
2088 int copy_size ;
2089 int trans_running = 0;
2090
2091 /* catch places below that try to log something without starting a trans */
2092 th.t_trans_id = 0;
2093
2094 if (!buffer_uptodate(bh_result)) {
2095 return -EIO;
2096 }
2097
2098 kmap(bh_result->b_page) ;
2099start_over:
2100 reiserfs_write_lock(inode->i_sb);
2101 make_cpu_key(&key, inode, byte_offset, TYPE_ANY, 3) ;
2102
2103research:
2104 retval = search_for_position_by_key(inode->i_sb, &key, &path) ;
2105 if (retval != POSITION_FOUND) {
2106 use_get_block = 1;
2107 goto out ;
2108 }
2109
2110 bh = get_last_bh(&path) ;
2111 ih = get_ih(&path) ;
2112 item = get_item(&path) ;
2113 pos_in_item = path.pos_in_item ;
2114
2115 /* we've found an unformatted node */
2116 if (indirect_item_found(retval, ih)) {
2117 if (bytes_copied > 0) {
2118 reiserfs_warning (inode->i_sb, "clm-6002: bytes_copied %d",
2119 bytes_copied) ;
2120 }
2121 if (!get_block_num(item, pos_in_item)) {
2122 /* crap, we are writing to a hole */
2123 use_get_block = 1;
2124 goto out ;
2125 }
2126 set_block_dev_mapped(bh_result, get_block_num(item,pos_in_item),inode);
2127 } else if (is_direct_le_ih(ih)) {
2128 char *p ;
2129 p = page_address(bh_result->b_page) ;
2130 p += (byte_offset -1) & (PAGE_CACHE_SIZE - 1) ;
2131 copy_size = ih_item_len(ih) - pos_in_item;
2132
2133 fs_gen = get_generation(inode->i_sb) ;
2134 copy_item_head(&tmp_ih, ih) ;
2135
2136 if (!trans_running) {
2137 /* vs-3050 is gone, no need to drop the path */
2138 retval = journal_begin(&th, inode->i_sb, jbegin_count) ;
2139 if (retval)
2140 goto out;
2141 reiserfs_update_inode_transaction(inode) ;
2142 trans_running = 1;
2143 if (fs_changed(fs_gen, inode->i_sb) && item_moved(&tmp_ih, &path)) {
2144 reiserfs_restore_prepared_buffer(inode->i_sb, bh) ;
2145 goto research;
2146 }
2147 }
2148
2149 reiserfs_prepare_for_journal(inode->i_sb, bh, 1) ;
2150
2151 if (fs_changed (fs_gen, inode->i_sb) && item_moved (&tmp_ih, &path)) {
2152 reiserfs_restore_prepared_buffer(inode->i_sb, bh) ;
2153 goto research;
2154 }
2155
2156 memcpy( B_I_PITEM(bh, ih) + pos_in_item, p + bytes_copied, copy_size) ;
2157
2158 journal_mark_dirty(&th, inode->i_sb, bh) ;
2159 bytes_copied += copy_size ;
2160 set_block_dev_mapped(bh_result, 0, inode);
2161
2162 /* are there still bytes left? */
2163 if (bytes_copied < bh_result->b_size &&
2164 (byte_offset + bytes_copied) < inode->i_size) {
2165 set_cpu_key_k_offset(&key, cpu_key_k_offset(&key) + copy_size) ;
2166 goto research ;
2167 }
2168 } else {
2169 reiserfs_warning (inode->i_sb,
2170 "clm-6003: bad item inode %lu, device %s",
2171 inode->i_ino, reiserfs_bdevname (inode->i_sb)) ;
2172 retval = -EIO ;
2173 goto out ;
2174 }
2175 retval = 0 ;
2176
2177out:
2178 pathrelse(&path) ;
2179 if (trans_running) {
2180 int err = journal_end(&th, inode->i_sb, jbegin_count) ;
2181 if (err)
2182 retval = err;
2183 trans_running = 0;
2184 }
2185 reiserfs_write_unlock(inode->i_sb);
2186
2187 /* this is where we fill in holes in the file. */
2188 if (use_get_block) {
2189 retval = reiserfs_get_block(inode, block, bh_result,
2190 GET_BLOCK_CREATE | GET_BLOCK_NO_ISEM |
2191 GET_BLOCK_NO_DANGLE);
2192 if (!retval) {
2193 if (!buffer_mapped(bh_result) || bh_result->b_blocknr == 0) {
2194 /* get_block failed to find a mapped unformatted node. */
2195 use_get_block = 0 ;
2196 goto start_over ;
2197 }
2198 }
2199 }
2200 kunmap(bh_result->b_page) ;
2201
2202 if (!retval && buffer_mapped(bh_result) && bh_result->b_blocknr == 0) {
2203 /* we've copied data from the page into the direct item, so the
2204 * buffer in the page is now clean, mark it to reflect that.
2205 */ 2113 */
2206 lock_buffer(bh_result); 2114 /* it is enough to reserve space in transaction for 2 balancings:
2207 clear_buffer_dirty(bh_result); 2115 one for "save" link adding and another for the first
2208 unlock_buffer(bh_result); 2116 cut_from_item. 1 is for update_sd */
2209 } 2117 error = journal_begin(&th, p_s_inode->i_sb,
2210 return retval ; 2118 JOURNAL_PER_BALANCE_CNT * 2 + 1);
2119 if (error)
2120 goto out;
2121 reiserfs_update_inode_transaction(p_s_inode);
2122 if (update_timestamps)
2123 /* we are doing real truncate: if the system crashes before the last
2124 transaction of truncating gets committed - on reboot the file
2125 either appears truncated properly or not truncated at all */
2126 add_save_link(&th, p_s_inode, 1);
2127 error = reiserfs_do_truncate(&th, p_s_inode, page, update_timestamps);
2128 if (error)
2129 goto out;
2130 error =
2131 journal_end(&th, p_s_inode->i_sb, JOURNAL_PER_BALANCE_CNT * 2 + 1);
2132 if (error)
2133 goto out;
2134
2135 if (update_timestamps) {
2136 error = remove_save_link(p_s_inode, 1 /* truncate */ );
2137 if (error)
2138 goto out;
2139 }
2140
2141 if (page) {
2142 length = offset & (blocksize - 1);
2143 /* if we are not on a block boundary */
2144 if (length) {
2145 char *kaddr;
2146
2147 length = blocksize - length;
2148 kaddr = kmap_atomic(page, KM_USER0);
2149 memset(kaddr + offset, 0, length);
2150 flush_dcache_page(page);
2151 kunmap_atomic(kaddr, KM_USER0);
2152 if (buffer_mapped(bh) && bh->b_blocknr != 0) {
2153 mark_buffer_dirty(bh);
2154 }
2155 }
2156 unlock_page(page);
2157 page_cache_release(page);
2158 }
2159
2160 reiserfs_write_unlock(p_s_inode->i_sb);
2161 return 0;
2162 out:
2163 if (page) {
2164 unlock_page(page);
2165 page_cache_release(page);
2166 }
2167 reiserfs_write_unlock(p_s_inode->i_sb);
2168 return error;
2169}
2170
2171static int map_block_for_writepage(struct inode *inode,
2172 struct buffer_head *bh_result,
2173 unsigned long block)
2174{
2175 struct reiserfs_transaction_handle th;
2176 int fs_gen;
2177 struct item_head tmp_ih;
2178 struct item_head *ih;
2179 struct buffer_head *bh;
2180 __le32 *item;
2181 struct cpu_key key;
2182 INITIALIZE_PATH(path);
2183 int pos_in_item;
2184 int jbegin_count = JOURNAL_PER_BALANCE_CNT;
2185 loff_t byte_offset = (block << inode->i_sb->s_blocksize_bits) + 1;
2186 int retval;
2187 int use_get_block = 0;
2188 int bytes_copied = 0;
2189 int copy_size;
2190 int trans_running = 0;
2191
2192 /* catch places below that try to log something without starting a trans */
2193 th.t_trans_id = 0;
2194
2195 if (!buffer_uptodate(bh_result)) {
2196 return -EIO;
2197 }
2198
2199 kmap(bh_result->b_page);
2200 start_over:
2201 reiserfs_write_lock(inode->i_sb);
2202 make_cpu_key(&key, inode, byte_offset, TYPE_ANY, 3);
2203
2204 research:
2205 retval = search_for_position_by_key(inode->i_sb, &key, &path);
2206 if (retval != POSITION_FOUND) {
2207 use_get_block = 1;
2208 goto out;
2209 }
2210
2211 bh = get_last_bh(&path);
2212 ih = get_ih(&path);
2213 item = get_item(&path);
2214 pos_in_item = path.pos_in_item;
2215
2216 /* we've found an unformatted node */
2217 if (indirect_item_found(retval, ih)) {
2218 if (bytes_copied > 0) {
2219 reiserfs_warning(inode->i_sb,
2220 "clm-6002: bytes_copied %d",
2221 bytes_copied);
2222 }
2223 if (!get_block_num(item, pos_in_item)) {
2224 /* crap, we are writing to a hole */
2225 use_get_block = 1;
2226 goto out;
2227 }
2228 set_block_dev_mapped(bh_result,
2229 get_block_num(item, pos_in_item), inode);
2230 } else if (is_direct_le_ih(ih)) {
2231 char *p;
2232 p = page_address(bh_result->b_page);
2233 p += (byte_offset - 1) & (PAGE_CACHE_SIZE - 1);
2234 copy_size = ih_item_len(ih) - pos_in_item;
2235
2236 fs_gen = get_generation(inode->i_sb);
2237 copy_item_head(&tmp_ih, ih);
2238
2239 if (!trans_running) {
2240 /* vs-3050 is gone, no need to drop the path */
2241 retval = journal_begin(&th, inode->i_sb, jbegin_count);
2242 if (retval)
2243 goto out;
2244 reiserfs_update_inode_transaction(inode);
2245 trans_running = 1;
2246 if (fs_changed(fs_gen, inode->i_sb)
2247 && item_moved(&tmp_ih, &path)) {
2248 reiserfs_restore_prepared_buffer(inode->i_sb,
2249 bh);
2250 goto research;
2251 }
2252 }
2253
2254 reiserfs_prepare_for_journal(inode->i_sb, bh, 1);
2255
2256 if (fs_changed(fs_gen, inode->i_sb)
2257 && item_moved(&tmp_ih, &path)) {
2258 reiserfs_restore_prepared_buffer(inode->i_sb, bh);
2259 goto research;
2260 }
2261
2262 memcpy(B_I_PITEM(bh, ih) + pos_in_item, p + bytes_copied,
2263 copy_size);
2264
2265 journal_mark_dirty(&th, inode->i_sb, bh);
2266 bytes_copied += copy_size;
2267 set_block_dev_mapped(bh_result, 0, inode);
2268
2269 /* are there still bytes left? */
2270 if (bytes_copied < bh_result->b_size &&
2271 (byte_offset + bytes_copied) < inode->i_size) {
2272 set_cpu_key_k_offset(&key,
2273 cpu_key_k_offset(&key) +
2274 copy_size);
2275 goto research;
2276 }
2277 } else {
2278 reiserfs_warning(inode->i_sb,
2279 "clm-6003: bad item inode %lu, device %s",
2280 inode->i_ino, reiserfs_bdevname(inode->i_sb));
2281 retval = -EIO;
2282 goto out;
2283 }
2284 retval = 0;
2285
2286 out:
2287 pathrelse(&path);
2288 if (trans_running) {
2289 int err = journal_end(&th, inode->i_sb, jbegin_count);
2290 if (err)
2291 retval = err;
2292 trans_running = 0;
2293 }
2294 reiserfs_write_unlock(inode->i_sb);
2295
2296 /* this is where we fill in holes in the file. */
2297 if (use_get_block) {
2298 retval = reiserfs_get_block(inode, block, bh_result,
2299 GET_BLOCK_CREATE | GET_BLOCK_NO_ISEM
2300 | GET_BLOCK_NO_DANGLE);
2301 if (!retval) {
2302 if (!buffer_mapped(bh_result)
2303 || bh_result->b_blocknr == 0) {
2304 /* get_block failed to find a mapped unformatted node. */
2305 use_get_block = 0;
2306 goto start_over;
2307 }
2308 }
2309 }
2310 kunmap(bh_result->b_page);
2311
2312 if (!retval && buffer_mapped(bh_result) && bh_result->b_blocknr == 0) {
2313 /* we've copied data from the page into the direct item, so the
2314 * buffer in the page is now clean, mark it to reflect that.
2315 */
2316 lock_buffer(bh_result);
2317 clear_buffer_dirty(bh_result);
2318 unlock_buffer(bh_result);
2319 }
2320 return retval;
2211} 2321}
2212 2322
2213/* 2323/*
@@ -2215,383 +2325,390 @@ out:
2215 * start/recovery path as __block_write_full_page, along with special 2325 * start/recovery path as __block_write_full_page, along with special
2216 * code to handle reiserfs tails. 2326 * code to handle reiserfs tails.
2217 */ 2327 */
2218static int reiserfs_write_full_page(struct page *page, struct writeback_control *wbc) { 2328static int reiserfs_write_full_page(struct page *page,
2219 struct inode *inode = page->mapping->host ; 2329 struct writeback_control *wbc)
2220 unsigned long end_index = inode->i_size >> PAGE_CACHE_SHIFT ; 2330{
2221 int error = 0; 2331 struct inode *inode = page->mapping->host;
2222 unsigned long block ; 2332 unsigned long end_index = inode->i_size >> PAGE_CACHE_SHIFT;
2223 struct buffer_head *head, *bh; 2333 int error = 0;
2224 int partial = 0 ; 2334 unsigned long block;
2225 int nr = 0; 2335 struct buffer_head *head, *bh;
2226 int checked = PageChecked(page); 2336 int partial = 0;
2227 struct reiserfs_transaction_handle th; 2337 int nr = 0;
2228 struct super_block *s = inode->i_sb; 2338 int checked = PageChecked(page);
2229 int bh_per_page = PAGE_CACHE_SIZE / s->s_blocksize; 2339 struct reiserfs_transaction_handle th;
2230 th.t_trans_id = 0; 2340 struct super_block *s = inode->i_sb;
2231 2341 int bh_per_page = PAGE_CACHE_SIZE / s->s_blocksize;
2232 /* The page dirty bit is cleared before writepage is called, which 2342 th.t_trans_id = 0;
2233 * means we have to tell create_empty_buffers to make dirty buffers 2343
2234 * The page really should be up to date at this point, so tossing 2344 /* The page dirty bit is cleared before writepage is called, which
2235 * in the BH_Uptodate is just a sanity check. 2345 * means we have to tell create_empty_buffers to make dirty buffers
2236 */ 2346 * The page really should be up to date at this point, so tossing
2237 if (!page_has_buffers(page)) { 2347 * in the BH_Uptodate is just a sanity check.
2238 create_empty_buffers(page, s->s_blocksize, 2348 */
2239 (1 << BH_Dirty) | (1 << BH_Uptodate)); 2349 if (!page_has_buffers(page)) {
2240 } 2350 create_empty_buffers(page, s->s_blocksize,
2241 head = page_buffers(page) ; 2351 (1 << BH_Dirty) | (1 << BH_Uptodate));
2242 2352 }
2243 /* last page in the file, zero out any contents past the 2353 head = page_buffers(page);
2244 ** last byte in the file
2245 */
2246 if (page->index >= end_index) {
2247 char *kaddr;
2248 unsigned last_offset;
2249
2250 last_offset = inode->i_size & (PAGE_CACHE_SIZE - 1) ;
2251 /* no file contents in this page */
2252 if (page->index >= end_index + 1 || !last_offset) {
2253 unlock_page(page);
2254 return 0;
2255 }
2256 kaddr = kmap_atomic(page, KM_USER0);
2257 memset(kaddr + last_offset, 0, PAGE_CACHE_SIZE-last_offset) ;
2258 flush_dcache_page(page) ;
2259 kunmap_atomic(kaddr, KM_USER0) ;
2260 }
2261 bh = head ;
2262 block = page->index << (PAGE_CACHE_SHIFT - s->s_blocksize_bits) ;
2263 /* first map all the buffers, logging any direct items we find */
2264 do {
2265 if ((checked || buffer_dirty(bh)) && (!buffer_mapped(bh) ||
2266 (buffer_mapped(bh) && bh->b_blocknr == 0))) {
2267 /* not mapped yet, or it points to a direct item, search
2268 * the btree for the mapping info, and log any direct
2269 * items found
2270 */
2271 if ((error = map_block_for_writepage(inode, bh, block))) {
2272 goto fail ;
2273 }
2274 }
2275 bh = bh->b_this_page;
2276 block++;
2277 } while(bh != head) ;
2278
2279 /*
2280 * we start the transaction after map_block_for_writepage,
2281 * because it can create holes in the file (an unbounded operation).
2282 * starting it here, we can make a reliable estimate for how many
2283 * blocks we're going to log
2284 */
2285 if (checked) {
2286 ClearPageChecked(page);
2287 reiserfs_write_lock(s);
2288 error = journal_begin(&th, s, bh_per_page + 1);
2289 if (error) {
2290 reiserfs_write_unlock(s);
2291 goto fail;
2292 }
2293 reiserfs_update_inode_transaction(inode);
2294 }
2295 /* now go through and lock any dirty buffers on the page */
2296 do {
2297 get_bh(bh);
2298 if (!buffer_mapped(bh))
2299 continue;
2300 if (buffer_mapped(bh) && bh->b_blocknr == 0)
2301 continue;
2302 2354
2303 if (checked) { 2355 /* last page in the file, zero out any contents past the
2304 reiserfs_prepare_for_journal(s, bh, 1); 2356 ** last byte in the file
2305 journal_mark_dirty(&th, s, bh); 2357 */
2306 continue; 2358 if (page->index >= end_index) {
2359 char *kaddr;
2360 unsigned last_offset;
2361
2362 last_offset = inode->i_size & (PAGE_CACHE_SIZE - 1);
2363 /* no file contents in this page */
2364 if (page->index >= end_index + 1 || !last_offset) {
2365 unlock_page(page);
2366 return 0;
2367 }
2368 kaddr = kmap_atomic(page, KM_USER0);
2369 memset(kaddr + last_offset, 0, PAGE_CACHE_SIZE - last_offset);
2370 flush_dcache_page(page);
2371 kunmap_atomic(kaddr, KM_USER0);
2307 } 2372 }
2308 /* from this point on, we know the buffer is mapped to a 2373 bh = head;
2309 * real block and not a direct item 2374 block = page->index << (PAGE_CACHE_SHIFT - s->s_blocksize_bits);
2375 /* first map all the buffers, logging any direct items we find */
2376 do {
2377 if ((checked || buffer_dirty(bh)) && (!buffer_mapped(bh) ||
2378 (buffer_mapped(bh)
2379 && bh->b_blocknr ==
2380 0))) {
2381 /* not mapped yet, or it points to a direct item, search
2382 * the btree for the mapping info, and log any direct
2383 * items found
2384 */
2385 if ((error = map_block_for_writepage(inode, bh, block))) {
2386 goto fail;
2387 }
2388 }
2389 bh = bh->b_this_page;
2390 block++;
2391 } while (bh != head);
2392
2393 /*
2394 * we start the transaction after map_block_for_writepage,
2395 * because it can create holes in the file (an unbounded operation).
2396 * starting it here, we can make a reliable estimate for how many
2397 * blocks we're going to log
2310 */ 2398 */
2311 if (wbc->sync_mode != WB_SYNC_NONE || !wbc->nonblocking) { 2399 if (checked) {
2312 lock_buffer(bh); 2400 ClearPageChecked(page);
2313 } else { 2401 reiserfs_write_lock(s);
2314 if (test_set_buffer_locked(bh)) { 2402 error = journal_begin(&th, s, bh_per_page + 1);
2315 redirty_page_for_writepage(wbc, page); 2403 if (error) {
2316 continue; 2404 reiserfs_write_unlock(s);
2317 } 2405 goto fail;
2406 }
2407 reiserfs_update_inode_transaction(inode);
2318 } 2408 }
2319 if (test_clear_buffer_dirty(bh)) { 2409 /* now go through and lock any dirty buffers on the page */
2320 mark_buffer_async_write(bh); 2410 do {
2321 } else { 2411 get_bh(bh);
2322 unlock_buffer(bh); 2412 if (!buffer_mapped(bh))
2413 continue;
2414 if (buffer_mapped(bh) && bh->b_blocknr == 0)
2415 continue;
2416
2417 if (checked) {
2418 reiserfs_prepare_for_journal(s, bh, 1);
2419 journal_mark_dirty(&th, s, bh);
2420 continue;
2421 }
2422 /* from this point on, we know the buffer is mapped to a
2423 * real block and not a direct item
2424 */
2425 if (wbc->sync_mode != WB_SYNC_NONE || !wbc->nonblocking) {
2426 lock_buffer(bh);
2427 } else {
2428 if (test_set_buffer_locked(bh)) {
2429 redirty_page_for_writepage(wbc, page);
2430 continue;
2431 }
2432 }
2433 if (test_clear_buffer_dirty(bh)) {
2434 mark_buffer_async_write(bh);
2435 } else {
2436 unlock_buffer(bh);
2437 }
2438 } while ((bh = bh->b_this_page) != head);
2439
2440 if (checked) {
2441 error = journal_end(&th, s, bh_per_page + 1);
2442 reiserfs_write_unlock(s);
2443 if (error)
2444 goto fail;
2323 } 2445 }
2324 } while((bh = bh->b_this_page) != head); 2446 BUG_ON(PageWriteback(page));
2447 set_page_writeback(page);
2448 unlock_page(page);
2325 2449
2326 if (checked) { 2450 /*
2327 error = journal_end(&th, s, bh_per_page + 1); 2451 * since any buffer might be the only dirty buffer on the page,
2328 reiserfs_write_unlock(s); 2452 * the first submit_bh can bring the page out of writeback.
2329 if (error) 2453 * be careful with the buffers.
2330 goto fail;
2331 }
2332 BUG_ON(PageWriteback(page));
2333 set_page_writeback(page);
2334 unlock_page(page);
2335
2336 /*
2337 * since any buffer might be the only dirty buffer on the page,
2338 * the first submit_bh can bring the page out of writeback.
2339 * be careful with the buffers.
2340 */
2341 do {
2342 struct buffer_head *next = bh->b_this_page;
2343 if (buffer_async_write(bh)) {
2344 submit_bh(WRITE, bh);
2345 nr++;
2346 }
2347 put_bh(bh);
2348 bh = next;
2349 } while(bh != head);
2350
2351 error = 0;
2352done:
2353 if (nr == 0) {
2354 /*
2355 * if this page only had a direct item, it is very possible for
2356 * no io to be required without there being an error. Or,
2357 * someone else could have locked them and sent them down the
2358 * pipe without locking the page
2359 */ 2454 */
2360 bh = head ;
2361 do { 2455 do {
2362 if (!buffer_uptodate(bh)) { 2456 struct buffer_head *next = bh->b_this_page;
2363 partial = 1; 2457 if (buffer_async_write(bh)) {
2364 break; 2458 submit_bh(WRITE, bh);
2365 } 2459 nr++;
2366 bh = bh->b_this_page; 2460 }
2367 } while(bh != head); 2461 put_bh(bh);
2368 if (!partial) 2462 bh = next;
2369 SetPageUptodate(page); 2463 } while (bh != head);
2370 end_page_writeback(page);
2371 }
2372 return error;
2373
2374fail:
2375 /* catches various errors, we need to make sure any valid dirty blocks
2376 * get to the media. The page is currently locked and not marked for
2377 * writeback
2378 */
2379 ClearPageUptodate(page);
2380 bh = head;
2381 do {
2382 get_bh(bh);
2383 if (buffer_mapped(bh) && buffer_dirty(bh) && bh->b_blocknr) {
2384 lock_buffer(bh);
2385 mark_buffer_async_write(bh);
2386 } else {
2387 /*
2388 * clear any dirty bits that might have come from getting
2389 * attached to a dirty page
2390 */
2391 clear_buffer_dirty(bh);
2392 }
2393 bh = bh->b_this_page;
2394 } while(bh != head);
2395 SetPageError(page);
2396 BUG_ON(PageWriteback(page));
2397 set_page_writeback(page);
2398 unlock_page(page);
2399 do {
2400 struct buffer_head *next = bh->b_this_page;
2401 if (buffer_async_write(bh)) {
2402 clear_buffer_dirty(bh);
2403 submit_bh(WRITE, bh);
2404 nr++;
2405 }
2406 put_bh(bh);
2407 bh = next;
2408 } while(bh != head);
2409 goto done;
2410}
2411 2464
2465 error = 0;
2466 done:
2467 if (nr == 0) {
2468 /*
2469 * if this page only had a direct item, it is very possible for
2470 * no io to be required without there being an error. Or,
2471 * someone else could have locked them and sent them down the
2472 * pipe without locking the page
2473 */
2474 bh = head;
2475 do {
2476 if (!buffer_uptodate(bh)) {
2477 partial = 1;
2478 break;
2479 }
2480 bh = bh->b_this_page;
2481 } while (bh != head);
2482 if (!partial)
2483 SetPageUptodate(page);
2484 end_page_writeback(page);
2485 }
2486 return error;
2412 2487
2413static int reiserfs_readpage (struct file *f, struct page * page) 2488 fail:
2414{ 2489 /* catches various errors, we need to make sure any valid dirty blocks
2415 return block_read_full_page (page, reiserfs_get_block); 2490 * get to the media. The page is currently locked and not marked for
2491 * writeback
2492 */
2493 ClearPageUptodate(page);
2494 bh = head;
2495 do {
2496 get_bh(bh);
2497 if (buffer_mapped(bh) && buffer_dirty(bh) && bh->b_blocknr) {
2498 lock_buffer(bh);
2499 mark_buffer_async_write(bh);
2500 } else {
2501 /*
2502 * clear any dirty bits that might have come from getting
2503 * attached to a dirty page
2504 */
2505 clear_buffer_dirty(bh);
2506 }
2507 bh = bh->b_this_page;
2508 } while (bh != head);
2509 SetPageError(page);
2510 BUG_ON(PageWriteback(page));
2511 set_page_writeback(page);
2512 unlock_page(page);
2513 do {
2514 struct buffer_head *next = bh->b_this_page;
2515 if (buffer_async_write(bh)) {
2516 clear_buffer_dirty(bh);
2517 submit_bh(WRITE, bh);
2518 nr++;
2519 }
2520 put_bh(bh);
2521 bh = next;
2522 } while (bh != head);
2523 goto done;
2416} 2524}
2417 2525
2526static int reiserfs_readpage(struct file *f, struct page *page)
2527{
2528 return block_read_full_page(page, reiserfs_get_block);
2529}
2418 2530
2419static int reiserfs_writepage (struct page * page, struct writeback_control *wbc) 2531static int reiserfs_writepage(struct page *page, struct writeback_control *wbc)
2420{ 2532{
2421 struct inode *inode = page->mapping->host ; 2533 struct inode *inode = page->mapping->host;
2422 reiserfs_wait_on_write_block(inode->i_sb) ; 2534 reiserfs_wait_on_write_block(inode->i_sb);
2423 return reiserfs_write_full_page(page, wbc) ; 2535 return reiserfs_write_full_page(page, wbc);
2424} 2536}
2425 2537
2426static int reiserfs_prepare_write(struct file *f, struct page *page, 2538static int reiserfs_prepare_write(struct file *f, struct page *page,
2427 unsigned from, unsigned to) { 2539 unsigned from, unsigned to)
2428 struct inode *inode = page->mapping->host ; 2540{
2429 int ret; 2541 struct inode *inode = page->mapping->host;
2430 int old_ref = 0; 2542 int ret;
2431 2543 int old_ref = 0;
2432 reiserfs_wait_on_write_block(inode->i_sb) ; 2544
2433 fix_tail_page_for_writing(page) ; 2545 reiserfs_wait_on_write_block(inode->i_sb);
2434 if (reiserfs_transaction_running(inode->i_sb)) { 2546 fix_tail_page_for_writing(page);
2435 struct reiserfs_transaction_handle *th; 2547 if (reiserfs_transaction_running(inode->i_sb)) {
2436 th = (struct reiserfs_transaction_handle *)current->journal_info; 2548 struct reiserfs_transaction_handle *th;
2437 BUG_ON (!th->t_refcount); 2549 th = (struct reiserfs_transaction_handle *)current->
2438 BUG_ON (!th->t_trans_id); 2550 journal_info;
2439 old_ref = th->t_refcount; 2551 BUG_ON(!th->t_refcount);
2440 th->t_refcount++; 2552 BUG_ON(!th->t_trans_id);
2441 } 2553 old_ref = th->t_refcount;
2442 2554 th->t_refcount++;
2443 ret = block_prepare_write(page, from, to, reiserfs_get_block) ;
2444 if (ret && reiserfs_transaction_running(inode->i_sb)) {
2445 struct reiserfs_transaction_handle *th = current->journal_info;
2446 /* this gets a little ugly. If reiserfs_get_block returned an
2447 * error and left a transacstion running, we've got to close it,
2448 * and we've got to free handle if it was a persistent transaction.
2449 *
2450 * But, if we had nested into an existing transaction, we need
2451 * to just drop the ref count on the handle.
2452 *
2453 * If old_ref == 0, the transaction is from reiserfs_get_block,
2454 * and it was a persistent trans. Otherwise, it was nested above.
2455 */
2456 if (th->t_refcount > old_ref) {
2457 if (old_ref)
2458 th->t_refcount--;
2459 else {
2460 int err;
2461 reiserfs_write_lock(inode->i_sb);
2462 err = reiserfs_end_persistent_transaction(th);
2463 reiserfs_write_unlock(inode->i_sb);
2464 if (err)
2465 ret = err;
2466 }
2467 } 2555 }
2468 }
2469 return ret;
2470 2556
2471} 2557 ret = block_prepare_write(page, from, to, reiserfs_get_block);
2558 if (ret && reiserfs_transaction_running(inode->i_sb)) {
2559 struct reiserfs_transaction_handle *th = current->journal_info;
2560 /* this gets a little ugly. If reiserfs_get_block returned an
2561 * error and left a transacstion running, we've got to close it,
2562 * and we've got to free handle if it was a persistent transaction.
2563 *
2564 * But, if we had nested into an existing transaction, we need
2565 * to just drop the ref count on the handle.
2566 *
2567 * If old_ref == 0, the transaction is from reiserfs_get_block,
2568 * and it was a persistent trans. Otherwise, it was nested above.
2569 */
2570 if (th->t_refcount > old_ref) {
2571 if (old_ref)
2572 th->t_refcount--;
2573 else {
2574 int err;
2575 reiserfs_write_lock(inode->i_sb);
2576 err = reiserfs_end_persistent_transaction(th);
2577 reiserfs_write_unlock(inode->i_sb);
2578 if (err)
2579 ret = err;
2580 }
2581 }
2582 }
2583 return ret;
2472 2584
2585}
2473 2586
2474static sector_t reiserfs_aop_bmap(struct address_space *as, sector_t block) { 2587static sector_t reiserfs_aop_bmap(struct address_space *as, sector_t block)
2475 return generic_block_bmap(as, block, reiserfs_bmap) ; 2588{
2589 return generic_block_bmap(as, block, reiserfs_bmap);
2476} 2590}
2477 2591
2478static int reiserfs_commit_write(struct file *f, struct page *page, 2592static int reiserfs_commit_write(struct file *f, struct page *page,
2479 unsigned from, unsigned to) { 2593 unsigned from, unsigned to)
2480 struct inode *inode = page->mapping->host ; 2594{
2481 loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to; 2595 struct inode *inode = page->mapping->host;
2482 int ret = 0; 2596 loff_t pos = ((loff_t) page->index << PAGE_CACHE_SHIFT) + to;
2483 int update_sd = 0; 2597 int ret = 0;
2484 struct reiserfs_transaction_handle *th = NULL; 2598 int update_sd = 0;
2485 2599 struct reiserfs_transaction_handle *th = NULL;
2486 reiserfs_wait_on_write_block(inode->i_sb) ; 2600
2487 if (reiserfs_transaction_running(inode->i_sb)) { 2601 reiserfs_wait_on_write_block(inode->i_sb);
2488 th = current->journal_info; 2602 if (reiserfs_transaction_running(inode->i_sb)) {
2489 } 2603 th = current->journal_info;
2490 reiserfs_commit_page(inode, page, from, to); 2604 }
2491 2605 reiserfs_commit_page(inode, page, from, to);
2492 /* generic_commit_write does this for us, but does not update the
2493 ** transaction tracking stuff when the size changes. So, we have
2494 ** to do the i_size updates here.
2495 */
2496 if (pos > inode->i_size) {
2497 struct reiserfs_transaction_handle myth ;
2498 reiserfs_write_lock(inode->i_sb);
2499 /* If the file have grown beyond the border where it
2500 can have a tail, unmark it as needing a tail
2501 packing */
2502 if ( (have_large_tails (inode->i_sb) && inode->i_size > i_block_size (inode)*4) ||
2503 (have_small_tails (inode->i_sb) && inode->i_size > i_block_size(inode)) )
2504 REISERFS_I(inode)->i_flags &= ~i_pack_on_close_mask ;
2505
2506 ret = journal_begin(&myth, inode->i_sb, 1) ;
2507 if (ret) {
2508 reiserfs_write_unlock(inode->i_sb);
2509 goto journal_error;
2510 }
2511 reiserfs_update_inode_transaction(inode) ;
2512 inode->i_size = pos ;
2513 reiserfs_update_sd(&myth, inode) ;
2514 update_sd = 1;
2515 ret = journal_end(&myth, inode->i_sb, 1) ;
2516 reiserfs_write_unlock(inode->i_sb);
2517 if (ret)
2518 goto journal_error;
2519 }
2520 if (th) {
2521 reiserfs_write_lock(inode->i_sb);
2522 if (!update_sd)
2523 reiserfs_update_sd(th, inode) ;
2524 ret = reiserfs_end_persistent_transaction(th);
2525 reiserfs_write_unlock(inode->i_sb);
2526 if (ret)
2527 goto out;
2528 }
2529
2530 /* we test for O_SYNC here so we can commit the transaction
2531 ** for any packed tails the file might have had
2532 */
2533 if (f && (f->f_flags & O_SYNC)) {
2534 reiserfs_write_lock(inode->i_sb);
2535 ret = reiserfs_commit_for_inode(inode) ;
2536 reiserfs_write_unlock(inode->i_sb);
2537 }
2538out:
2539 return ret ;
2540 2606
2541journal_error: 2607 /* generic_commit_write does this for us, but does not update the
2542 if (th) { 2608 ** transaction tracking stuff when the size changes. So, we have
2543 reiserfs_write_lock(inode->i_sb); 2609 ** to do the i_size updates here.
2544 if (!update_sd) 2610 */
2545 reiserfs_update_sd(th, inode) ; 2611 if (pos > inode->i_size) {
2546 ret = reiserfs_end_persistent_transaction(th); 2612 struct reiserfs_transaction_handle myth;
2547 reiserfs_write_unlock(inode->i_sb); 2613 reiserfs_write_lock(inode->i_sb);
2548 } 2614 /* If the file have grown beyond the border where it
2615 can have a tail, unmark it as needing a tail
2616 packing */
2617 if ((have_large_tails(inode->i_sb)
2618 && inode->i_size > i_block_size(inode) * 4)
2619 || (have_small_tails(inode->i_sb)
2620 && inode->i_size > i_block_size(inode)))
2621 REISERFS_I(inode)->i_flags &= ~i_pack_on_close_mask;
2622
2623 ret = journal_begin(&myth, inode->i_sb, 1);
2624 if (ret) {
2625 reiserfs_write_unlock(inode->i_sb);
2626 goto journal_error;
2627 }
2628 reiserfs_update_inode_transaction(inode);
2629 inode->i_size = pos;
2630 reiserfs_update_sd(&myth, inode);
2631 update_sd = 1;
2632 ret = journal_end(&myth, inode->i_sb, 1);
2633 reiserfs_write_unlock(inode->i_sb);
2634 if (ret)
2635 goto journal_error;
2636 }
2637 if (th) {
2638 reiserfs_write_lock(inode->i_sb);
2639 if (!update_sd)
2640 reiserfs_update_sd(th, inode);
2641 ret = reiserfs_end_persistent_transaction(th);
2642 reiserfs_write_unlock(inode->i_sb);
2643 if (ret)
2644 goto out;
2645 }
2646
2647 /* we test for O_SYNC here so we can commit the transaction
2648 ** for any packed tails the file might have had
2649 */
2650 if (f && (f->f_flags & O_SYNC)) {
2651 reiserfs_write_lock(inode->i_sb);
2652 ret = reiserfs_commit_for_inode(inode);
2653 reiserfs_write_unlock(inode->i_sb);
2654 }
2655 out:
2656 return ret;
2549 2657
2550 return ret; 2658 journal_error:
2659 if (th) {
2660 reiserfs_write_lock(inode->i_sb);
2661 if (!update_sd)
2662 reiserfs_update_sd(th, inode);
2663 ret = reiserfs_end_persistent_transaction(th);
2664 reiserfs_write_unlock(inode->i_sb);
2665 }
2666
2667 return ret;
2551} 2668}
2552 2669
2553void sd_attrs_to_i_attrs( __u16 sd_attrs, struct inode *inode ) 2670void sd_attrs_to_i_attrs(__u16 sd_attrs, struct inode *inode)
2554{ 2671{
2555 if( reiserfs_attrs( inode -> i_sb ) ) { 2672 if (reiserfs_attrs(inode->i_sb)) {
2556 if( sd_attrs & REISERFS_SYNC_FL ) 2673 if (sd_attrs & REISERFS_SYNC_FL)
2557 inode -> i_flags |= S_SYNC; 2674 inode->i_flags |= S_SYNC;
2558 else 2675 else
2559 inode -> i_flags &= ~S_SYNC; 2676 inode->i_flags &= ~S_SYNC;
2560 if( sd_attrs & REISERFS_IMMUTABLE_FL ) 2677 if (sd_attrs & REISERFS_IMMUTABLE_FL)
2561 inode -> i_flags |= S_IMMUTABLE; 2678 inode->i_flags |= S_IMMUTABLE;
2562 else 2679 else
2563 inode -> i_flags &= ~S_IMMUTABLE; 2680 inode->i_flags &= ~S_IMMUTABLE;
2564 if( sd_attrs & REISERFS_APPEND_FL ) 2681 if (sd_attrs & REISERFS_APPEND_FL)
2565 inode -> i_flags |= S_APPEND; 2682 inode->i_flags |= S_APPEND;
2566 else 2683 else
2567 inode -> i_flags &= ~S_APPEND; 2684 inode->i_flags &= ~S_APPEND;
2568 if( sd_attrs & REISERFS_NOATIME_FL ) 2685 if (sd_attrs & REISERFS_NOATIME_FL)
2569 inode -> i_flags |= S_NOATIME; 2686 inode->i_flags |= S_NOATIME;
2570 else 2687 else
2571 inode -> i_flags &= ~S_NOATIME; 2688 inode->i_flags &= ~S_NOATIME;
2572 if( sd_attrs & REISERFS_NOTAIL_FL ) 2689 if (sd_attrs & REISERFS_NOTAIL_FL)
2573 REISERFS_I(inode)->i_flags |= i_nopack_mask; 2690 REISERFS_I(inode)->i_flags |= i_nopack_mask;
2574 else 2691 else
2575 REISERFS_I(inode)->i_flags &= ~i_nopack_mask; 2692 REISERFS_I(inode)->i_flags &= ~i_nopack_mask;
2576 } 2693 }
2577} 2694}
2578 2695
2579void i_attrs_to_sd_attrs( struct inode *inode, __u16 *sd_attrs ) 2696void i_attrs_to_sd_attrs(struct inode *inode, __u16 * sd_attrs)
2580{ 2697{
2581 if( reiserfs_attrs( inode -> i_sb ) ) { 2698 if (reiserfs_attrs(inode->i_sb)) {
2582 if( inode -> i_flags & S_IMMUTABLE ) 2699 if (inode->i_flags & S_IMMUTABLE)
2583 *sd_attrs |= REISERFS_IMMUTABLE_FL; 2700 *sd_attrs |= REISERFS_IMMUTABLE_FL;
2584 else 2701 else
2585 *sd_attrs &= ~REISERFS_IMMUTABLE_FL; 2702 *sd_attrs &= ~REISERFS_IMMUTABLE_FL;
2586 if( inode -> i_flags & S_SYNC ) 2703 if (inode->i_flags & S_SYNC)
2587 *sd_attrs |= REISERFS_SYNC_FL; 2704 *sd_attrs |= REISERFS_SYNC_FL;
2588 else 2705 else
2589 *sd_attrs &= ~REISERFS_SYNC_FL; 2706 *sd_attrs &= ~REISERFS_SYNC_FL;
2590 if( inode -> i_flags & S_NOATIME ) 2707 if (inode->i_flags & S_NOATIME)
2591 *sd_attrs |= REISERFS_NOATIME_FL; 2708 *sd_attrs |= REISERFS_NOATIME_FL;
2592 else 2709 else
2593 *sd_attrs &= ~REISERFS_NOATIME_FL; 2710 *sd_attrs &= ~REISERFS_NOATIME_FL;
2594 if( REISERFS_I(inode)->i_flags & i_nopack_mask ) 2711 if (REISERFS_I(inode)->i_flags & i_nopack_mask)
2595 *sd_attrs |= REISERFS_NOTAIL_FL; 2712 *sd_attrs |= REISERFS_NOTAIL_FL;
2596 else 2713 else
2597 *sd_attrs &= ~REISERFS_NOTAIL_FL; 2714 *sd_attrs &= ~REISERFS_NOTAIL_FL;
@@ -2603,106 +2720,107 @@ void i_attrs_to_sd_attrs( struct inode *inode, __u16 *sd_attrs )
2603*/ 2720*/
2604static int invalidatepage_can_drop(struct inode *inode, struct buffer_head *bh) 2721static int invalidatepage_can_drop(struct inode *inode, struct buffer_head *bh)
2605{ 2722{
2606 int ret = 1 ; 2723 int ret = 1;
2607 struct reiserfs_journal *j = SB_JOURNAL(inode->i_sb) ; 2724 struct reiserfs_journal *j = SB_JOURNAL(inode->i_sb);
2608 2725
2609 spin_lock(&j->j_dirty_buffers_lock) ; 2726 spin_lock(&j->j_dirty_buffers_lock);
2610 if (!buffer_mapped(bh)) { 2727 if (!buffer_mapped(bh)) {
2611 goto free_jh; 2728 goto free_jh;
2612 } 2729 }
2613 /* the page is locked, and the only places that log a data buffer 2730 /* the page is locked, and the only places that log a data buffer
2614 * also lock the page. 2731 * also lock the page.
2615 */
2616 if (reiserfs_file_data_log(inode)) {
2617 /*
2618 * very conservative, leave the buffer pinned if
2619 * anyone might need it.
2620 */
2621 if (buffer_journaled(bh) || buffer_journal_dirty(bh)) {
2622 ret = 0 ;
2623 }
2624 } else
2625 if (buffer_dirty(bh) || buffer_locked(bh)) {
2626 struct reiserfs_journal_list *jl;
2627 struct reiserfs_jh *jh = bh->b_private;
2628
2629 /* why is this safe?
2630 * reiserfs_setattr updates i_size in the on disk
2631 * stat data before allowing vmtruncate to be called.
2632 *
2633 * If buffer was put onto the ordered list for this
2634 * transaction, we know for sure either this transaction
2635 * or an older one already has updated i_size on disk,
2636 * and this ordered data won't be referenced in the file
2637 * if we crash.
2638 *
2639 * if the buffer was put onto the ordered list for an older
2640 * transaction, we need to leave it around
2641 */ 2732 */
2642 if (jh && (jl = jh->jl) && jl != SB_JOURNAL(inode->i_sb)->j_current_jl) 2733 if (reiserfs_file_data_log(inode)) {
2643 ret = 0; 2734 /*
2644 } 2735 * very conservative, leave the buffer pinned if
2645free_jh: 2736 * anyone might need it.
2646 if (ret && bh->b_private) { 2737 */
2647 reiserfs_free_jh(bh); 2738 if (buffer_journaled(bh) || buffer_journal_dirty(bh)) {
2648 } 2739 ret = 0;
2649 spin_unlock(&j->j_dirty_buffers_lock) ; 2740 }
2650 return ret ; 2741 } else if (buffer_dirty(bh) || buffer_locked(bh)) {
2742 struct reiserfs_journal_list *jl;
2743 struct reiserfs_jh *jh = bh->b_private;
2744
2745 /* why is this safe?
2746 * reiserfs_setattr updates i_size in the on disk
2747 * stat data before allowing vmtruncate to be called.
2748 *
2749 * If buffer was put onto the ordered list for this
2750 * transaction, we know for sure either this transaction
2751 * or an older one already has updated i_size on disk,
2752 * and this ordered data won't be referenced in the file
2753 * if we crash.
2754 *
2755 * if the buffer was put onto the ordered list for an older
2756 * transaction, we need to leave it around
2757 */
2758 if (jh && (jl = jh->jl)
2759 && jl != SB_JOURNAL(inode->i_sb)->j_current_jl)
2760 ret = 0;
2761 }
2762 free_jh:
2763 if (ret && bh->b_private) {
2764 reiserfs_free_jh(bh);
2765 }
2766 spin_unlock(&j->j_dirty_buffers_lock);
2767 return ret;
2651} 2768}
2652 2769
2653/* clm -- taken from fs/buffer.c:block_invalidate_page */ 2770/* clm -- taken from fs/buffer.c:block_invalidate_page */
2654static int reiserfs_invalidatepage(struct page *page, unsigned long offset) 2771static int reiserfs_invalidatepage(struct page *page, unsigned long offset)
2655{ 2772{
2656 struct buffer_head *head, *bh, *next; 2773 struct buffer_head *head, *bh, *next;
2657 struct inode *inode = page->mapping->host; 2774 struct inode *inode = page->mapping->host;
2658 unsigned int curr_off = 0; 2775 unsigned int curr_off = 0;
2659 int ret = 1; 2776 int ret = 1;
2660 2777
2661 BUG_ON(!PageLocked(page)); 2778 BUG_ON(!PageLocked(page));
2662 2779
2663 if (offset == 0) 2780 if (offset == 0)
2664 ClearPageChecked(page); 2781 ClearPageChecked(page);
2665 2782
2666 if (!page_has_buffers(page)) 2783 if (!page_has_buffers(page))
2667 goto out; 2784 goto out;
2785
2786 head = page_buffers(page);
2787 bh = head;
2788 do {
2789 unsigned int next_off = curr_off + bh->b_size;
2790 next = bh->b_this_page;
2668 2791
2669 head = page_buffers(page); 2792 /*
2670 bh = head; 2793 * is this block fully invalidated?
2671 do { 2794 */
2672 unsigned int next_off = curr_off + bh->b_size; 2795 if (offset <= curr_off) {
2673 next = bh->b_this_page; 2796 if (invalidatepage_can_drop(inode, bh))
2797 reiserfs_unmap_buffer(bh);
2798 else
2799 ret = 0;
2800 }
2801 curr_off = next_off;
2802 bh = next;
2803 } while (bh != head);
2674 2804
2675 /* 2805 /*
2676 * is this block fully invalidated? 2806 * We release buffers only if the entire page is being invalidated.
2807 * The get_block cached value has been unconditionally invalidated,
2808 * so real IO is not possible anymore.
2677 */ 2809 */
2678 if (offset <= curr_off) { 2810 if (!offset && ret)
2679 if (invalidatepage_can_drop(inode, bh)) 2811 ret = try_to_release_page(page, 0);
2680 reiserfs_unmap_buffer(bh); 2812 out:
2681 else 2813 return ret;
2682 ret = 0;
2683 }
2684 curr_off = next_off;
2685 bh = next;
2686 } while (bh != head);
2687
2688 /*
2689 * We release buffers only if the entire page is being invalidated.
2690 * The get_block cached value has been unconditionally invalidated,
2691 * so real IO is not possible anymore.
2692 */
2693 if (!offset && ret)
2694 ret = try_to_release_page(page, 0);
2695out:
2696 return ret;
2697} 2814}
2698 2815
2699static int reiserfs_set_page_dirty(struct page *page) { 2816static int reiserfs_set_page_dirty(struct page *page)
2700 struct inode *inode = page->mapping->host; 2817{
2701 if (reiserfs_file_data_log(inode)) { 2818 struct inode *inode = page->mapping->host;
2702 SetPageChecked(page); 2819 if (reiserfs_file_data_log(inode)) {
2703 return __set_page_dirty_nobuffers(page); 2820 SetPageChecked(page);
2704 } 2821 return __set_page_dirty_nobuffers(page);
2705 return __set_page_dirty_buffers(page); 2822 }
2823 return __set_page_dirty_buffers(page);
2706} 2824}
2707 2825
2708/* 2826/*
@@ -2716,143 +2834,152 @@ static int reiserfs_set_page_dirty(struct page *page) {
2716 */ 2834 */
2717static int reiserfs_releasepage(struct page *page, int unused_gfp_flags) 2835static int reiserfs_releasepage(struct page *page, int unused_gfp_flags)
2718{ 2836{
2719 struct inode *inode = page->mapping->host ; 2837 struct inode *inode = page->mapping->host;
2720 struct reiserfs_journal *j = SB_JOURNAL(inode->i_sb) ; 2838 struct reiserfs_journal *j = SB_JOURNAL(inode->i_sb);
2721 struct buffer_head *head ; 2839 struct buffer_head *head;
2722 struct buffer_head *bh ; 2840 struct buffer_head *bh;
2723 int ret = 1 ; 2841 int ret = 1;
2724 2842
2725 WARN_ON(PageChecked(page)); 2843 WARN_ON(PageChecked(page));
2726 spin_lock(&j->j_dirty_buffers_lock) ; 2844 spin_lock(&j->j_dirty_buffers_lock);
2727 head = page_buffers(page) ; 2845 head = page_buffers(page);
2728 bh = head ; 2846 bh = head;
2729 do { 2847 do {
2730 if (bh->b_private) { 2848 if (bh->b_private) {
2731 if (!buffer_dirty(bh) && !buffer_locked(bh)) { 2849 if (!buffer_dirty(bh) && !buffer_locked(bh)) {
2732 reiserfs_free_jh(bh); 2850 reiserfs_free_jh(bh);
2733 } else { 2851 } else {
2734 ret = 0 ; 2852 ret = 0;
2735 break ; 2853 break;
2736 } 2854 }
2737 } 2855 }
2738 bh = bh->b_this_page ; 2856 bh = bh->b_this_page;
2739 } while (bh != head) ; 2857 } while (bh != head);
2740 if (ret) 2858 if (ret)
2741 ret = try_to_free_buffers(page) ; 2859 ret = try_to_free_buffers(page);
2742 spin_unlock(&j->j_dirty_buffers_lock) ; 2860 spin_unlock(&j->j_dirty_buffers_lock);
2743 return ret ; 2861 return ret;
2744} 2862}
2745 2863
2746/* We thank Mingming Cao for helping us understand in great detail what 2864/* We thank Mingming Cao for helping us understand in great detail what
2747 to do in this section of the code. */ 2865 to do in this section of the code. */
2748static ssize_t reiserfs_direct_IO(int rw, struct kiocb *iocb, 2866static ssize_t reiserfs_direct_IO(int rw, struct kiocb *iocb,
2749 const struct iovec *iov, loff_t offset, unsigned long nr_segs) 2867 const struct iovec *iov, loff_t offset,
2868 unsigned long nr_segs)
2750{ 2869{
2751 struct file *file = iocb->ki_filp; 2870 struct file *file = iocb->ki_filp;
2752 struct inode *inode = file->f_mapping->host; 2871 struct inode *inode = file->f_mapping->host;
2753 2872
2754 return blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov, 2873 return blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov,
2755 offset, nr_segs, reiserfs_get_blocks_direct_io, NULL); 2874 offset, nr_segs,
2875 reiserfs_get_blocks_direct_io, NULL);
2756} 2876}
2757 2877
2758int reiserfs_setattr(struct dentry *dentry, struct iattr *attr) { 2878int reiserfs_setattr(struct dentry *dentry, struct iattr *attr)
2759 struct inode *inode = dentry->d_inode ; 2879{
2760 int error ; 2880 struct inode *inode = dentry->d_inode;
2761 unsigned int ia_valid = attr->ia_valid; 2881 int error;
2762 reiserfs_write_lock(inode->i_sb); 2882 unsigned int ia_valid = attr->ia_valid;
2763 if (attr->ia_valid & ATTR_SIZE) { 2883 reiserfs_write_lock(inode->i_sb);
2764 /* version 2 items will be caught by the s_maxbytes check 2884 if (attr->ia_valid & ATTR_SIZE) {
2765 ** done for us in vmtruncate 2885 /* version 2 items will be caught by the s_maxbytes check
2766 */ 2886 ** done for us in vmtruncate
2767 if (get_inode_item_key_version(inode) == KEY_FORMAT_3_5 && 2887 */
2768 attr->ia_size > MAX_NON_LFS) { 2888 if (get_inode_item_key_version(inode) == KEY_FORMAT_3_5 &&
2769 error = -EFBIG ; 2889 attr->ia_size > MAX_NON_LFS) {
2770 goto out; 2890 error = -EFBIG;
2771 } 2891 goto out;
2772 /* fill in hole pointers in the expanding truncate case. */ 2892 }
2773 if (attr->ia_size > inode->i_size) { 2893 /* fill in hole pointers in the expanding truncate case. */
2774 error = generic_cont_expand(inode, attr->ia_size) ; 2894 if (attr->ia_size > inode->i_size) {
2775 if (REISERFS_I(inode)->i_prealloc_count > 0) { 2895 error = generic_cont_expand(inode, attr->ia_size);
2776 int err; 2896 if (REISERFS_I(inode)->i_prealloc_count > 0) {
2777 struct reiserfs_transaction_handle th ; 2897 int err;
2778 /* we're changing at most 2 bitmaps, inode + super */ 2898 struct reiserfs_transaction_handle th;
2779 err = journal_begin(&th, inode->i_sb, 4) ; 2899 /* we're changing at most 2 bitmaps, inode + super */
2780 if (!err) { 2900 err = journal_begin(&th, inode->i_sb, 4);
2781 reiserfs_discard_prealloc (&th, inode); 2901 if (!err) {
2782 err = journal_end(&th, inode->i_sb, 4) ; 2902 reiserfs_discard_prealloc(&th, inode);
2903 err = journal_end(&th, inode->i_sb, 4);
2904 }
2905 if (err)
2906 error = err;
2907 }
2908 if (error)
2909 goto out;
2783 } 2910 }
2784 if (err)
2785 error = err;
2786 }
2787 if (error)
2788 goto out;
2789 } 2911 }
2790 }
2791 2912
2792 if ((((attr->ia_valid & ATTR_UID) && (attr->ia_uid & ~0xffff)) || 2913 if ((((attr->ia_valid & ATTR_UID) && (attr->ia_uid & ~0xffff)) ||
2793 ((attr->ia_valid & ATTR_GID) && (attr->ia_gid & ~0xffff))) && 2914 ((attr->ia_valid & ATTR_GID) && (attr->ia_gid & ~0xffff))) &&
2794 (get_inode_sd_version (inode) == STAT_DATA_V1)) { 2915 (get_inode_sd_version(inode) == STAT_DATA_V1)) {
2795 /* stat data of format v3.5 has 16 bit uid and gid */ 2916 /* stat data of format v3.5 has 16 bit uid and gid */
2796 error = -EINVAL; 2917 error = -EINVAL;
2797 goto out; 2918 goto out;
2798 } 2919 }
2799
2800 error = inode_change_ok(inode, attr) ;
2801 if (!error) {
2802 if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) ||
2803 (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) {
2804 error = reiserfs_chown_xattrs (inode, attr);
2805
2806 if (!error) {
2807 struct reiserfs_transaction_handle th;
2808 int jbegin_count = 2*(REISERFS_QUOTA_INIT_BLOCKS(inode->i_sb)+REISERFS_QUOTA_DEL_BLOCKS(inode->i_sb))+2;
2809
2810 /* (user+group)*(old+new) structure - we count quota info and , inode write (sb, inode) */
2811 error = journal_begin(&th, inode->i_sb, jbegin_count);
2812 if (error)
2813 goto out;
2814 error = DQUOT_TRANSFER(inode, attr) ? -EDQUOT : 0;
2815 if (error) {
2816 journal_end(&th, inode->i_sb, jbegin_count);
2817 goto out;
2818 }
2819 /* Update corresponding info in inode so that everything is in
2820 * one transaction */
2821 if (attr->ia_valid & ATTR_UID)
2822 inode->i_uid = attr->ia_uid;
2823 if (attr->ia_valid & ATTR_GID)
2824 inode->i_gid = attr->ia_gid;
2825 mark_inode_dirty(inode);
2826 error = journal_end(&th, inode->i_sb, jbegin_count);
2827 }
2828 }
2829 if (!error)
2830 error = inode_setattr(inode, attr) ;
2831 }
2832 2920
2921 error = inode_change_ok(inode, attr);
2922 if (!error) {
2923 if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) ||
2924 (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) {
2925 error = reiserfs_chown_xattrs(inode, attr);
2926
2927 if (!error) {
2928 struct reiserfs_transaction_handle th;
2929 int jbegin_count =
2930 2 *
2931 (REISERFS_QUOTA_INIT_BLOCKS(inode->i_sb) +
2932 REISERFS_QUOTA_DEL_BLOCKS(inode->i_sb)) +
2933 2;
2934
2935 /* (user+group)*(old+new) structure - we count quota info and , inode write (sb, inode) */
2936 error =
2937 journal_begin(&th, inode->i_sb,
2938 jbegin_count);
2939 if (error)
2940 goto out;
2941 error =
2942 DQUOT_TRANSFER(inode, attr) ? -EDQUOT : 0;
2943 if (error) {
2944 journal_end(&th, inode->i_sb,
2945 jbegin_count);
2946 goto out;
2947 }
2948 /* Update corresponding info in inode so that everything is in
2949 * one transaction */
2950 if (attr->ia_valid & ATTR_UID)
2951 inode->i_uid = attr->ia_uid;
2952 if (attr->ia_valid & ATTR_GID)
2953 inode->i_gid = attr->ia_gid;
2954 mark_inode_dirty(inode);
2955 error =
2956 journal_end(&th, inode->i_sb, jbegin_count);
2957 }
2958 }
2959 if (!error)
2960 error = inode_setattr(inode, attr);
2961 }
2833 2962
2834 if (!error && reiserfs_posixacl (inode->i_sb)) { 2963 if (!error && reiserfs_posixacl(inode->i_sb)) {
2835 if (attr->ia_valid & ATTR_MODE) 2964 if (attr->ia_valid & ATTR_MODE)
2836 error = reiserfs_acl_chmod (inode); 2965 error = reiserfs_acl_chmod(inode);
2837 } 2966 }
2838 2967
2839out: 2968 out:
2840 reiserfs_write_unlock(inode->i_sb); 2969 reiserfs_write_unlock(inode->i_sb);
2841 return error ; 2970 return error;
2842} 2971}
2843 2972
2844
2845
2846struct address_space_operations reiserfs_address_space_operations = { 2973struct address_space_operations reiserfs_address_space_operations = {
2847 .writepage = reiserfs_writepage, 2974 .writepage = reiserfs_writepage,
2848 .readpage = reiserfs_readpage, 2975 .readpage = reiserfs_readpage,
2849 .readpages = reiserfs_readpages, 2976 .readpages = reiserfs_readpages,
2850 .releasepage = reiserfs_releasepage, 2977 .releasepage = reiserfs_releasepage,
2851 .invalidatepage = reiserfs_invalidatepage, 2978 .invalidatepage = reiserfs_invalidatepage,
2852 .sync_page = block_sync_page, 2979 .sync_page = block_sync_page,
2853 .prepare_write = reiserfs_prepare_write, 2980 .prepare_write = reiserfs_prepare_write,
2854 .commit_write = reiserfs_commit_write, 2981 .commit_write = reiserfs_commit_write,
2855 .bmap = reiserfs_aop_bmap, 2982 .bmap = reiserfs_aop_bmap,
2856 .direct_IO = reiserfs_direct_IO, 2983 .direct_IO = reiserfs_direct_IO,
2857 .set_page_dirty = reiserfs_set_page_dirty, 2984 .set_page_dirty = reiserfs_set_page_dirty,
2858} ; 2985};
diff --git a/fs/reiserfs/ioctl.c b/fs/reiserfs/ioctl.c
index 76caedf737f..81fc00285f6 100644
--- a/fs/reiserfs/ioctl.c
+++ b/fs/reiserfs/ioctl.c
@@ -9,7 +9,7 @@
9#include <linux/pagemap.h> 9#include <linux/pagemap.h>
10#include <linux/smp_lock.h> 10#include <linux/smp_lock.h>
11 11
12static int reiserfs_unpack (struct inode * inode, struct file * filp); 12static int reiserfs_unpack(struct inode *inode, struct file *filp);
13 13
14/* 14/*
15** reiserfs_ioctl - handler for ioctl for inode 15** reiserfs_ioctl - handler for ioctl for inode
@@ -19,69 +19,72 @@ static int reiserfs_unpack (struct inode * inode, struct file * filp);
19** 2) REISERFS_IOC_[GS]ETFLAGS, REISERFS_IOC_[GS]ETVERSION 19** 2) REISERFS_IOC_[GS]ETFLAGS, REISERFS_IOC_[GS]ETVERSION
20** 3) That's all for a while ... 20** 3) That's all for a while ...
21*/ 21*/
22int reiserfs_ioctl (struct inode * inode, struct file * filp, unsigned int cmd, 22int reiserfs_ioctl(struct inode *inode, struct file *filp, unsigned int cmd,
23 unsigned long arg) 23 unsigned long arg)
24{ 24{
25 unsigned int flags; 25 unsigned int flags;
26 26
27 switch (cmd) { 27 switch (cmd) {
28 case REISERFS_IOC_UNPACK: 28 case REISERFS_IOC_UNPACK:
29 if( S_ISREG( inode -> i_mode ) ) { 29 if (S_ISREG(inode->i_mode)) {
30 if (arg) 30 if (arg)
31 return reiserfs_unpack (inode, filp); 31 return reiserfs_unpack(inode, filp);
32 else 32 else
33 return 0; 33 return 0;
34 } else 34 } else
35 return -ENOTTY; 35 return -ENOTTY;
36 /* following two cases are taken from fs/ext2/ioctl.c by Remy 36 /* following two cases are taken from fs/ext2/ioctl.c by Remy
37 Card (card@masi.ibp.fr) */ 37 Card (card@masi.ibp.fr) */
38 case REISERFS_IOC_GETFLAGS: 38 case REISERFS_IOC_GETFLAGS:
39 if (!reiserfs_attrs (inode->i_sb)) 39 if (!reiserfs_attrs(inode->i_sb))
40 return -ENOTTY; 40 return -ENOTTY;
41 41
42 flags = REISERFS_I(inode) -> i_attrs; 42 flags = REISERFS_I(inode)->i_attrs;
43 i_attrs_to_sd_attrs( inode, ( __u16 * ) &flags ); 43 i_attrs_to_sd_attrs(inode, (__u16 *) & flags);
44 return put_user(flags, (int __user *) arg); 44 return put_user(flags, (int __user *)arg);
45 case REISERFS_IOC_SETFLAGS: { 45 case REISERFS_IOC_SETFLAGS:{
46 if (!reiserfs_attrs (inode->i_sb)) 46 if (!reiserfs_attrs(inode->i_sb))
47 return -ENOTTY; 47 return -ENOTTY;
48 48
49 if (IS_RDONLY(inode)) 49 if (IS_RDONLY(inode))
50 return -EROFS; 50 return -EROFS;
51 51
52 if ((current->fsuid != inode->i_uid) && !capable(CAP_FOWNER)) 52 if ((current->fsuid != inode->i_uid)
53 return -EPERM; 53 && !capable(CAP_FOWNER))
54 return -EPERM;
54 55
55 if (get_user(flags, (int __user *) arg)) 56 if (get_user(flags, (int __user *)arg))
56 return -EFAULT; 57 return -EFAULT;
57 58
58 if ( ( ( flags ^ REISERFS_I(inode) -> i_attrs) & ( REISERFS_IMMUTABLE_FL | REISERFS_APPEND_FL)) && 59 if (((flags ^ REISERFS_I(inode)->
59 !capable( CAP_LINUX_IMMUTABLE ) ) 60 i_attrs) & (REISERFS_IMMUTABLE_FL |
60 return -EPERM; 61 REISERFS_APPEND_FL))
61 62 && !capable(CAP_LINUX_IMMUTABLE))
62 if( ( flags & REISERFS_NOTAIL_FL ) && 63 return -EPERM;
63 S_ISREG( inode -> i_mode ) ) { 64
65 if ((flags & REISERFS_NOTAIL_FL) &&
66 S_ISREG(inode->i_mode)) {
64 int result; 67 int result;
65 68
66 result = reiserfs_unpack( inode, filp ); 69 result = reiserfs_unpack(inode, filp);
67 if( result ) 70 if (result)
68 return result; 71 return result;
72 }
73 sd_attrs_to_i_attrs(flags, inode);
74 REISERFS_I(inode)->i_attrs = flags;
75 inode->i_ctime = CURRENT_TIME_SEC;
76 mark_inode_dirty(inode);
77 return 0;
69 } 78 }
70 sd_attrs_to_i_attrs( flags, inode );
71 REISERFS_I(inode) -> i_attrs = flags;
72 inode->i_ctime = CURRENT_TIME_SEC;
73 mark_inode_dirty(inode);
74 return 0;
75 }
76 case REISERFS_IOC_GETVERSION: 79 case REISERFS_IOC_GETVERSION:
77 return put_user(inode->i_generation, (int __user *) arg); 80 return put_user(inode->i_generation, (int __user *)arg);
78 case REISERFS_IOC_SETVERSION: 81 case REISERFS_IOC_SETVERSION:
79 if ((current->fsuid != inode->i_uid) && !capable(CAP_FOWNER)) 82 if ((current->fsuid != inode->i_uid) && !capable(CAP_FOWNER))
80 return -EPERM; 83 return -EPERM;
81 if (IS_RDONLY(inode)) 84 if (IS_RDONLY(inode))
82 return -EROFS; 85 return -EROFS;
83 if (get_user(inode->i_generation, (int __user *) arg)) 86 if (get_user(inode->i_generation, (int __user *)arg))
84 return -EFAULT; 87 return -EFAULT;
85 inode->i_ctime = CURRENT_TIME_SEC; 88 inode->i_ctime = CURRENT_TIME_SEC;
86 mark_inode_dirty(inode); 89 mark_inode_dirty(inode);
87 return 0; 90 return 0;
@@ -95,63 +98,65 @@ int reiserfs_ioctl (struct inode * inode, struct file * filp, unsigned int cmd,
95** Function try to convert tail from direct item into indirect. 98** Function try to convert tail from direct item into indirect.
96** It set up nopack attribute in the REISERFS_I(inode)->nopack 99** It set up nopack attribute in the REISERFS_I(inode)->nopack
97*/ 100*/
98static int reiserfs_unpack (struct inode * inode, struct file * filp) 101static int reiserfs_unpack(struct inode *inode, struct file *filp)
99{ 102{
100 int retval = 0; 103 int retval = 0;
101 int index ; 104 int index;
102 struct page *page ; 105 struct page *page;
103 struct address_space *mapping ; 106 struct address_space *mapping;
104 unsigned long write_from ; 107 unsigned long write_from;
105 unsigned long blocksize = inode->i_sb->s_blocksize ; 108 unsigned long blocksize = inode->i_sb->s_blocksize;
106 109
107 if (inode->i_size == 0) { 110 if (inode->i_size == 0) {
108 REISERFS_I(inode)->i_flags |= i_nopack_mask; 111 REISERFS_I(inode)->i_flags |= i_nopack_mask;
109 return 0 ; 112 return 0;
110 } 113 }
111 /* ioctl already done */ 114 /* ioctl already done */
112 if (REISERFS_I(inode)->i_flags & i_nopack_mask) { 115 if (REISERFS_I(inode)->i_flags & i_nopack_mask) {
113 return 0 ; 116 return 0;
114 } 117 }
115 reiserfs_write_lock(inode->i_sb); 118 reiserfs_write_lock(inode->i_sb);
116 119
117 /* we need to make sure nobody is changing the file size beneath 120 /* we need to make sure nobody is changing the file size beneath
118 ** us 121 ** us
119 */ 122 */
120 down(&inode->i_sem) ; 123 down(&inode->i_sem);
121 124
122 write_from = inode->i_size & (blocksize - 1) ; 125 write_from = inode->i_size & (blocksize - 1);
123 /* if we are on a block boundary, we are already unpacked. */ 126 /* if we are on a block boundary, we are already unpacked. */
124 if ( write_from == 0) { 127 if (write_from == 0) {
128 REISERFS_I(inode)->i_flags |= i_nopack_mask;
129 goto out;
130 }
131
132 /* we unpack by finding the page with the tail, and calling
133 ** reiserfs_prepare_write on that page. This will force a
134 ** reiserfs_get_block to unpack the tail for us.
135 */
136 index = inode->i_size >> PAGE_CACHE_SHIFT;
137 mapping = inode->i_mapping;
138 page = grab_cache_page(mapping, index);
139 retval = -ENOMEM;
140 if (!page) {
141 goto out;
142 }
143 retval =
144 mapping->a_ops->prepare_write(NULL, page, write_from, write_from);
145 if (retval)
146 goto out_unlock;
147
148 /* conversion can change page contents, must flush */
149 flush_dcache_page(page);
150 retval =
151 mapping->a_ops->commit_write(NULL, page, write_from, write_from);
125 REISERFS_I(inode)->i_flags |= i_nopack_mask; 152 REISERFS_I(inode)->i_flags |= i_nopack_mask;
126 goto out ; 153
127 } 154 out_unlock:
128 155 unlock_page(page);
129 /* we unpack by finding the page with the tail, and calling 156 page_cache_release(page);
130 ** reiserfs_prepare_write on that page. This will force a 157
131 ** reiserfs_get_block to unpack the tail for us. 158 out:
132 */ 159 up(&inode->i_sem);
133 index = inode->i_size >> PAGE_CACHE_SHIFT ; 160 reiserfs_write_unlock(inode->i_sb);
134 mapping = inode->i_mapping ; 161 return retval;
135 page = grab_cache_page(mapping, index) ;
136 retval = -ENOMEM;
137 if (!page) {
138 goto out ;
139 }
140 retval = mapping->a_ops->prepare_write(NULL, page, write_from, write_from) ;
141 if (retval)
142 goto out_unlock ;
143
144 /* conversion can change page contents, must flush */
145 flush_dcache_page(page) ;
146 retval = mapping->a_ops->commit_write(NULL, page, write_from, write_from) ;
147 REISERFS_I(inode)->i_flags |= i_nopack_mask;
148
149out_unlock:
150 unlock_page(page) ;
151 page_cache_release(page) ;
152
153out:
154 up(&inode->i_sem) ;
155 reiserfs_write_unlock(inode->i_sb);
156 return retval;
157} 162}
diff --git a/fs/reiserfs/item_ops.c b/fs/reiserfs/item_ops.c
index e477aeba8c9..e237cd668e5 100644
--- a/fs/reiserfs/item_ops.c
+++ b/fs/reiserfs/item_ops.c
@@ -14,760 +14,729 @@
14////////////////////////////////////////////////////////////////////////////// 14//////////////////////////////////////////////////////////////////////////////
15// stat data functions 15// stat data functions
16// 16//
17static int sd_bytes_number (struct item_head * ih, int block_size) 17static int sd_bytes_number(struct item_head *ih, int block_size)
18{ 18{
19 return 0; 19 return 0;
20} 20}
21 21
22static void sd_decrement_key (struct cpu_key * key) 22static void sd_decrement_key(struct cpu_key *key)
23{ 23{
24 key->on_disk_key.k_objectid --; 24 key->on_disk_key.k_objectid--;
25 set_cpu_key_k_type (key, TYPE_ANY); 25 set_cpu_key_k_type(key, TYPE_ANY);
26 set_cpu_key_k_offset(key, (loff_t)(-1)); 26 set_cpu_key_k_offset(key, (loff_t) (-1));
27} 27}
28 28
29static int sd_is_left_mergeable (struct reiserfs_key * key, unsigned long bsize) 29static int sd_is_left_mergeable(struct reiserfs_key *key, unsigned long bsize)
30{ 30{
31 return 0; 31 return 0;
32} 32}
33 33
34 34static char *print_time(time_t t)
35
36static char * print_time (time_t t)
37{ 35{
38 static char timebuf[256]; 36 static char timebuf[256];
39 37
40 sprintf (timebuf, "%ld", t); 38 sprintf(timebuf, "%ld", t);
41 return timebuf; 39 return timebuf;
42} 40}
43 41
44 42static void sd_print_item(struct item_head *ih, char *item)
45static void sd_print_item (struct item_head * ih, char * item)
46{ 43{
47 printk ("\tmode | size | nlinks | first direct | mtime\n"); 44 printk("\tmode | size | nlinks | first direct | mtime\n");
48 if (stat_data_v1 (ih)) { 45 if (stat_data_v1(ih)) {
49 struct stat_data_v1 * sd = (struct stat_data_v1 *)item; 46 struct stat_data_v1 *sd = (struct stat_data_v1 *)item;
50 47
51 printk ("\t0%-6o | %6u | %2u | %d | %s\n", sd_v1_mode(sd), 48 printk("\t0%-6o | %6u | %2u | %d | %s\n", sd_v1_mode(sd),
52 sd_v1_size(sd), sd_v1_nlink(sd), sd_v1_first_direct_byte(sd), 49 sd_v1_size(sd), sd_v1_nlink(sd),
53 print_time( sd_v1_mtime(sd) ) ); 50 sd_v1_first_direct_byte(sd),
54 } else { 51 print_time(sd_v1_mtime(sd)));
55 struct stat_data * sd = (struct stat_data *)item; 52 } else {
53 struct stat_data *sd = (struct stat_data *)item;
56 54
57 printk ("\t0%-6o | %6Lu | %2u | %d | %s\n", sd_v2_mode(sd), 55 printk("\t0%-6o | %6Lu | %2u | %d | %s\n", sd_v2_mode(sd),
58 (unsigned long long)sd_v2_size(sd), sd_v2_nlink(sd), 56 (unsigned long long)sd_v2_size(sd), sd_v2_nlink(sd),
59 sd_v2_rdev(sd), print_time(sd_v2_mtime(sd))); 57 sd_v2_rdev(sd), print_time(sd_v2_mtime(sd)));
60 } 58 }
61} 59}
62 60
63static void sd_check_item (struct item_head * ih, char * item) 61static void sd_check_item(struct item_head *ih, char *item)
64{ 62{
65 // FIXME: type something here! 63 // FIXME: type something here!
66} 64}
67 65
68 66static int sd_create_vi(struct virtual_node *vn,
69static int sd_create_vi (struct virtual_node * vn, 67 struct virtual_item *vi,
70 struct virtual_item * vi, 68 int is_affected, int insert_size)
71 int is_affected,
72 int insert_size)
73{ 69{
74 vi->vi_index = TYPE_STAT_DATA; 70 vi->vi_index = TYPE_STAT_DATA;
75 //vi->vi_type |= VI_TYPE_STAT_DATA;// not needed? 71 //vi->vi_type |= VI_TYPE_STAT_DATA;// not needed?
76 return 0; 72 return 0;
77} 73}
78 74
79 75static int sd_check_left(struct virtual_item *vi, int free,
80static int sd_check_left (struct virtual_item * vi, int free, 76 int start_skip, int end_skip)
81 int start_skip, int end_skip)
82{ 77{
83 if (start_skip || end_skip) 78 if (start_skip || end_skip)
84 BUG (); 79 BUG();
85 return -1; 80 return -1;
86} 81}
87 82
88 83static int sd_check_right(struct virtual_item *vi, int free)
89static int sd_check_right (struct virtual_item * vi, int free)
90{ 84{
91 return -1; 85 return -1;
92} 86}
93 87
94static int sd_part_size (struct virtual_item * vi, int first, int count) 88static int sd_part_size(struct virtual_item *vi, int first, int count)
95{ 89{
96 if (count) 90 if (count)
97 BUG (); 91 BUG();
98 return 0; 92 return 0;
99} 93}
100 94
101static int sd_unit_num (struct virtual_item * vi) 95static int sd_unit_num(struct virtual_item *vi)
102{ 96{
103 return vi->vi_item_len - IH_SIZE; 97 return vi->vi_item_len - IH_SIZE;
104} 98}
105 99
106 100static void sd_print_vi(struct virtual_item *vi)
107static void sd_print_vi (struct virtual_item * vi)
108{ 101{
109 reiserfs_warning (NULL, "STATDATA, index %d, type 0x%x, %h", 102 reiserfs_warning(NULL, "STATDATA, index %d, type 0x%x, %h",
110 vi->vi_index, vi->vi_type, vi->vi_ih); 103 vi->vi_index, vi->vi_type, vi->vi_ih);
111} 104}
112 105
113static struct item_operations stat_data_ops = { 106static struct item_operations stat_data_ops = {
114 .bytes_number = sd_bytes_number, 107 .bytes_number = sd_bytes_number,
115 .decrement_key = sd_decrement_key, 108 .decrement_key = sd_decrement_key,
116 .is_left_mergeable = sd_is_left_mergeable, 109 .is_left_mergeable = sd_is_left_mergeable,
117 .print_item = sd_print_item, 110 .print_item = sd_print_item,
118 .check_item = sd_check_item, 111 .check_item = sd_check_item,
119 112
120 .create_vi = sd_create_vi, 113 .create_vi = sd_create_vi,
121 .check_left = sd_check_left, 114 .check_left = sd_check_left,
122 .check_right = sd_check_right, 115 .check_right = sd_check_right,
123 .part_size = sd_part_size, 116 .part_size = sd_part_size,
124 .unit_num = sd_unit_num, 117 .unit_num = sd_unit_num,
125 .print_vi = sd_print_vi 118 .print_vi = sd_print_vi
126}; 119};
127 120
128
129
130////////////////////////////////////////////////////////////////////////////// 121//////////////////////////////////////////////////////////////////////////////
131// direct item functions 122// direct item functions
132// 123//
133static int direct_bytes_number (struct item_head * ih, int block_size) 124static int direct_bytes_number(struct item_head *ih, int block_size)
134{ 125{
135 return ih_item_len(ih); 126 return ih_item_len(ih);
136} 127}
137 128
138
139// FIXME: this should probably switch to indirect as well 129// FIXME: this should probably switch to indirect as well
140static void direct_decrement_key (struct cpu_key * key) 130static void direct_decrement_key(struct cpu_key *key)
141{ 131{
142 cpu_key_k_offset_dec (key); 132 cpu_key_k_offset_dec(key);
143 if (cpu_key_k_offset (key) == 0) 133 if (cpu_key_k_offset(key) == 0)
144 set_cpu_key_k_type (key, TYPE_STAT_DATA); 134 set_cpu_key_k_type(key, TYPE_STAT_DATA);
145} 135}
146 136
147 137static int direct_is_left_mergeable(struct reiserfs_key *key,
148static int direct_is_left_mergeable (struct reiserfs_key * key, unsigned long bsize) 138 unsigned long bsize)
149{ 139{
150 int version = le_key_version (key); 140 int version = le_key_version(key);
151 return ((le_key_k_offset (version, key) & (bsize - 1)) != 1); 141 return ((le_key_k_offset(version, key) & (bsize - 1)) != 1);
152} 142}
153 143
154 144static void direct_print_item(struct item_head *ih, char *item)
155static void direct_print_item (struct item_head * ih, char * item)
156{ 145{
157 int j = 0; 146 int j = 0;
158 147
159// return; 148// return;
160 printk ("\""); 149 printk("\"");
161 while (j < ih_item_len(ih)) 150 while (j < ih_item_len(ih))
162 printk ("%c", item[j++]); 151 printk("%c", item[j++]);
163 printk ("\"\n"); 152 printk("\"\n");
164} 153}
165 154
166 155static void direct_check_item(struct item_head *ih, char *item)
167static void direct_check_item (struct item_head * ih, char * item)
168{ 156{
169 // FIXME: type something here! 157 // FIXME: type something here!
170} 158}
171 159
172 160static int direct_create_vi(struct virtual_node *vn,
173static int direct_create_vi (struct virtual_node * vn, 161 struct virtual_item *vi,
174 struct virtual_item * vi, 162 int is_affected, int insert_size)
175 int is_affected,
176 int insert_size)
177{ 163{
178 vi->vi_index = TYPE_DIRECT; 164 vi->vi_index = TYPE_DIRECT;
179 //vi->vi_type |= VI_TYPE_DIRECT; 165 //vi->vi_type |= VI_TYPE_DIRECT;
180 return 0; 166 return 0;
181} 167}
182 168
183static int direct_check_left (struct virtual_item * vi, int free, 169static int direct_check_left(struct virtual_item *vi, int free,
184 int start_skip, int end_skip) 170 int start_skip, int end_skip)
185{ 171{
186 int bytes; 172 int bytes;
187 173
188 bytes = free - free % 8; 174 bytes = free - free % 8;
189 return bytes ?: -1; 175 return bytes ? : -1;
190} 176}
191 177
192 178static int direct_check_right(struct virtual_item *vi, int free)
193static int direct_check_right (struct virtual_item * vi, int free)
194{ 179{
195 return direct_check_left (vi, free, 0, 0); 180 return direct_check_left(vi, free, 0, 0);
196} 181}
197 182
198static int direct_part_size (struct virtual_item * vi, int first, int count) 183static int direct_part_size(struct virtual_item *vi, int first, int count)
199{ 184{
200 return count; 185 return count;
201} 186}
202 187
203 188static int direct_unit_num(struct virtual_item *vi)
204static int direct_unit_num (struct virtual_item * vi)
205{ 189{
206 return vi->vi_item_len - IH_SIZE; 190 return vi->vi_item_len - IH_SIZE;
207} 191}
208 192
209 193static void direct_print_vi(struct virtual_item *vi)
210static void direct_print_vi (struct virtual_item * vi)
211{ 194{
212 reiserfs_warning (NULL, "DIRECT, index %d, type 0x%x, %h", 195 reiserfs_warning(NULL, "DIRECT, index %d, type 0x%x, %h",
213 vi->vi_index, vi->vi_type, vi->vi_ih); 196 vi->vi_index, vi->vi_type, vi->vi_ih);
214} 197}
215 198
216static struct item_operations direct_ops = { 199static struct item_operations direct_ops = {
217 .bytes_number = direct_bytes_number, 200 .bytes_number = direct_bytes_number,
218 .decrement_key = direct_decrement_key, 201 .decrement_key = direct_decrement_key,
219 .is_left_mergeable = direct_is_left_mergeable, 202 .is_left_mergeable = direct_is_left_mergeable,
220 .print_item = direct_print_item, 203 .print_item = direct_print_item,
221 .check_item = direct_check_item, 204 .check_item = direct_check_item,
222 205
223 .create_vi = direct_create_vi, 206 .create_vi = direct_create_vi,
224 .check_left = direct_check_left, 207 .check_left = direct_check_left,
225 .check_right = direct_check_right, 208 .check_right = direct_check_right,
226 .part_size = direct_part_size, 209 .part_size = direct_part_size,
227 .unit_num = direct_unit_num, 210 .unit_num = direct_unit_num,
228 .print_vi = direct_print_vi 211 .print_vi = direct_print_vi
229}; 212};
230 213
231
232
233////////////////////////////////////////////////////////////////////////////// 214//////////////////////////////////////////////////////////////////////////////
234// indirect item functions 215// indirect item functions
235// 216//
236 217
237static int indirect_bytes_number (struct item_head * ih, int block_size) 218static int indirect_bytes_number(struct item_head *ih, int block_size)
238{ 219{
239 return ih_item_len(ih) / UNFM_P_SIZE * block_size; //- get_ih_free_space (ih); 220 return ih_item_len(ih) / UNFM_P_SIZE * block_size; //- get_ih_free_space (ih);
240} 221}
241 222
242
243// decrease offset, if it becomes 0, change type to stat data 223// decrease offset, if it becomes 0, change type to stat data
244static void indirect_decrement_key (struct cpu_key * key) 224static void indirect_decrement_key(struct cpu_key *key)
245{ 225{
246 cpu_key_k_offset_dec (key); 226 cpu_key_k_offset_dec(key);
247 if (cpu_key_k_offset (key) == 0) 227 if (cpu_key_k_offset(key) == 0)
248 set_cpu_key_k_type (key, TYPE_STAT_DATA); 228 set_cpu_key_k_type(key, TYPE_STAT_DATA);
249} 229}
250 230
251
252// if it is not first item of the body, then it is mergeable 231// if it is not first item of the body, then it is mergeable
253static int indirect_is_left_mergeable (struct reiserfs_key * key, unsigned long bsize) 232static int indirect_is_left_mergeable(struct reiserfs_key *key,
233 unsigned long bsize)
254{ 234{
255 int version = le_key_version (key); 235 int version = le_key_version(key);
256 return (le_key_k_offset (version, key) != 1); 236 return (le_key_k_offset(version, key) != 1);
257} 237}
258 238
259
260// printing of indirect item 239// printing of indirect item
261static void start_new_sequence (__u32 * start, int * len, __u32 new) 240static void start_new_sequence(__u32 * start, int *len, __u32 new)
262{ 241{
263 *start = new; 242 *start = new;
264 *len = 1; 243 *len = 1;
265} 244}
266 245
267 246static int sequence_finished(__u32 start, int *len, __u32 new)
268static int sequence_finished (__u32 start, int * len, __u32 new)
269{ 247{
270 if (start == INT_MAX) 248 if (start == INT_MAX)
271 return 1; 249 return 1;
272 250
273 if (start == 0 && new == 0) { 251 if (start == 0 && new == 0) {
274 (*len) ++; 252 (*len)++;
275 return 0; 253 return 0;
276 } 254 }
277 if (start != 0 && (start + *len) == new) { 255 if (start != 0 && (start + *len) == new) {
278 (*len) ++; 256 (*len)++;
279 return 0; 257 return 0;
280 } 258 }
281 return 1; 259 return 1;
282} 260}
283 261
284static void print_sequence (__u32 start, int len) 262static void print_sequence(__u32 start, int len)
285{ 263{
286 if (start == INT_MAX) 264 if (start == INT_MAX)
287 return; 265 return;
288 266
289 if (len == 1) 267 if (len == 1)
290 printk (" %d", start); 268 printk(" %d", start);
291 else 269 else
292 printk (" %d(%d)", start, len); 270 printk(" %d(%d)", start, len);
293} 271}
294 272
295 273static void indirect_print_item(struct item_head *ih, char *item)
296static void indirect_print_item (struct item_head * ih, char * item)
297{ 274{
298 int j; 275 int j;
299 __le32 * unp; 276 __le32 *unp;
300 __u32 prev = INT_MAX; 277 __u32 prev = INT_MAX;
301 int num; 278 int num;
302 279
303 unp = (__le32 *)item; 280 unp = (__le32 *) item;
304 281
305 if (ih_item_len(ih) % UNFM_P_SIZE) 282 if (ih_item_len(ih) % UNFM_P_SIZE)
306 reiserfs_warning (NULL, "indirect_print_item: invalid item len"); 283 reiserfs_warning(NULL, "indirect_print_item: invalid item len");
307 284
308 printk ("%d pointers\n[ ", (int)I_UNFM_NUM (ih)); 285 printk("%d pointers\n[ ", (int)I_UNFM_NUM(ih));
309 for (j = 0; j < I_UNFM_NUM (ih); j ++) { 286 for (j = 0; j < I_UNFM_NUM(ih); j++) {
310 if (sequence_finished (prev, &num, get_block_num(unp, j))) { 287 if (sequence_finished(prev, &num, get_block_num(unp, j))) {
311 print_sequence (prev, num); 288 print_sequence(prev, num);
312 start_new_sequence (&prev, &num, get_block_num(unp, j)); 289 start_new_sequence(&prev, &num, get_block_num(unp, j));
290 }
313 } 291 }
314 } 292 print_sequence(prev, num);
315 print_sequence (prev, num); 293 printk("]\n");
316 printk ("]\n");
317} 294}
318 295
319static void indirect_check_item (struct item_head * ih, char * item) 296static void indirect_check_item(struct item_head *ih, char *item)
320{ 297{
321 // FIXME: type something here! 298 // FIXME: type something here!
322} 299}
323 300
324 301static int indirect_create_vi(struct virtual_node *vn,
325static int indirect_create_vi (struct virtual_node * vn, 302 struct virtual_item *vi,
326 struct virtual_item * vi, 303 int is_affected, int insert_size)
327 int is_affected,
328 int insert_size)
329{ 304{
330 vi->vi_index = TYPE_INDIRECT; 305 vi->vi_index = TYPE_INDIRECT;
331 //vi->vi_type |= VI_TYPE_INDIRECT; 306 //vi->vi_type |= VI_TYPE_INDIRECT;
332 return 0; 307 return 0;
333} 308}
334 309
335static int indirect_check_left (struct virtual_item * vi, int free, 310static int indirect_check_left(struct virtual_item *vi, int free,
336 int start_skip, int end_skip) 311 int start_skip, int end_skip)
337{ 312{
338 int bytes; 313 int bytes;
339 314
340 bytes = free - free % UNFM_P_SIZE; 315 bytes = free - free % UNFM_P_SIZE;
341 return bytes ?: -1; 316 return bytes ? : -1;
342} 317}
343 318
344 319static int indirect_check_right(struct virtual_item *vi, int free)
345static int indirect_check_right (struct virtual_item * vi, int free)
346{ 320{
347 return indirect_check_left (vi, free, 0, 0); 321 return indirect_check_left(vi, free, 0, 0);
348} 322}
349 323
350
351
352// return size in bytes of 'units' units. If first == 0 - calculate from the head (left), otherwise - from tail (right) 324// return size in bytes of 'units' units. If first == 0 - calculate from the head (left), otherwise - from tail (right)
353static int indirect_part_size (struct virtual_item * vi, int first, int units) 325static int indirect_part_size(struct virtual_item *vi, int first, int units)
354{ 326{
355 // unit of indirect item is byte (yet) 327 // unit of indirect item is byte (yet)
356 return units; 328 return units;
357} 329}
358 330
359static int indirect_unit_num (struct virtual_item * vi) 331static int indirect_unit_num(struct virtual_item *vi)
360{ 332{
361 // unit of indirect item is byte (yet) 333 // unit of indirect item is byte (yet)
362 return vi->vi_item_len - IH_SIZE; 334 return vi->vi_item_len - IH_SIZE;
363} 335}
364 336
365static void indirect_print_vi (struct virtual_item * vi) 337static void indirect_print_vi(struct virtual_item *vi)
366{ 338{
367 reiserfs_warning (NULL, "INDIRECT, index %d, type 0x%x, %h", 339 reiserfs_warning(NULL, "INDIRECT, index %d, type 0x%x, %h",
368 vi->vi_index, vi->vi_type, vi->vi_ih); 340 vi->vi_index, vi->vi_type, vi->vi_ih);
369} 341}
370 342
371static struct item_operations indirect_ops = { 343static struct item_operations indirect_ops = {
372 .bytes_number = indirect_bytes_number, 344 .bytes_number = indirect_bytes_number,
373 .decrement_key = indirect_decrement_key, 345 .decrement_key = indirect_decrement_key,
374 .is_left_mergeable = indirect_is_left_mergeable, 346 .is_left_mergeable = indirect_is_left_mergeable,
375 .print_item = indirect_print_item, 347 .print_item = indirect_print_item,
376 .check_item = indirect_check_item, 348 .check_item = indirect_check_item,
377 349
378 .create_vi = indirect_create_vi, 350 .create_vi = indirect_create_vi,
379 .check_left = indirect_check_left, 351 .check_left = indirect_check_left,
380 .check_right = indirect_check_right, 352 .check_right = indirect_check_right,
381 .part_size = indirect_part_size, 353 .part_size = indirect_part_size,
382 .unit_num = indirect_unit_num, 354 .unit_num = indirect_unit_num,
383 .print_vi = indirect_print_vi 355 .print_vi = indirect_print_vi
384}; 356};
385 357
386
387////////////////////////////////////////////////////////////////////////////// 358//////////////////////////////////////////////////////////////////////////////
388// direntry functions 359// direntry functions
389// 360//
390 361
391 362static int direntry_bytes_number(struct item_head *ih, int block_size)
392static int direntry_bytes_number (struct item_head * ih, int block_size)
393{ 363{
394 reiserfs_warning (NULL, "vs-16090: direntry_bytes_number: " 364 reiserfs_warning(NULL, "vs-16090: direntry_bytes_number: "
395 "bytes number is asked for direntry"); 365 "bytes number is asked for direntry");
396 return 0; 366 return 0;
397}
398
399static void direntry_decrement_key (struct cpu_key * key)
400{
401 cpu_key_k_offset_dec (key);
402 if (cpu_key_k_offset (key) == 0)
403 set_cpu_key_k_type (key, TYPE_STAT_DATA);
404} 367}
405 368
406 369static void direntry_decrement_key(struct cpu_key *key)
407static int direntry_is_left_mergeable (struct reiserfs_key * key, unsigned long bsize)
408{ 370{
409 if (le32_to_cpu (key->u.k_offset_v1.k_offset) == DOT_OFFSET) 371 cpu_key_k_offset_dec(key);
410 return 0; 372 if (cpu_key_k_offset(key) == 0)
411 return 1; 373 set_cpu_key_k_type(key, TYPE_STAT_DATA);
412
413} 374}
414 375
415 376static int direntry_is_left_mergeable(struct reiserfs_key *key,
416static void direntry_print_item (struct item_head * ih, char * item) 377 unsigned long bsize)
417{ 378{
418 int i; 379 if (le32_to_cpu(key->u.k_offset_v1.k_offset) == DOT_OFFSET)
419 int namelen; 380 return 0;
420 struct reiserfs_de_head * deh; 381 return 1;
421 char * name;
422 static char namebuf [80];
423
424
425 printk ("\n # %-15s%-30s%-15s%-15s%-15s\n", "Name", "Key of pointed object", "Hash", "Gen number", "Status");
426 382
427 deh = (struct reiserfs_de_head *)item; 383}
428 384
429 for (i = 0; i < I_ENTRY_COUNT (ih); i ++, deh ++) { 385static void direntry_print_item(struct item_head *ih, char *item)
430 namelen = (i ? (deh_location(deh - 1)) : ih_item_len(ih)) - deh_location(deh); 386{
431 name = item + deh_location(deh); 387 int i;
432 if (name[namelen-1] == 0) 388 int namelen;
433 namelen = strlen (name); 389 struct reiserfs_de_head *deh;
434 namebuf[0] = '"'; 390 char *name;
435 if (namelen > sizeof (namebuf) - 3) { 391 static char namebuf[80];
436 strncpy (namebuf + 1, name, sizeof (namebuf) - 3); 392
437 namebuf[sizeof (namebuf) - 2] = '"'; 393 printk("\n # %-15s%-30s%-15s%-15s%-15s\n", "Name",
438 namebuf[sizeof (namebuf) - 1] = 0; 394 "Key of pointed object", "Hash", "Gen number", "Status");
439 } else { 395
440 memcpy (namebuf + 1, name, namelen); 396 deh = (struct reiserfs_de_head *)item;
441 namebuf[namelen + 1] = '"'; 397
442 namebuf[namelen + 2] = 0; 398 for (i = 0; i < I_ENTRY_COUNT(ih); i++, deh++) {
399 namelen =
400 (i ? (deh_location(deh - 1)) : ih_item_len(ih)) -
401 deh_location(deh);
402 name = item + deh_location(deh);
403 if (name[namelen - 1] == 0)
404 namelen = strlen(name);
405 namebuf[0] = '"';
406 if (namelen > sizeof(namebuf) - 3) {
407 strncpy(namebuf + 1, name, sizeof(namebuf) - 3);
408 namebuf[sizeof(namebuf) - 2] = '"';
409 namebuf[sizeof(namebuf) - 1] = 0;
410 } else {
411 memcpy(namebuf + 1, name, namelen);
412 namebuf[namelen + 1] = '"';
413 namebuf[namelen + 2] = 0;
414 }
415
416 printk("%d: %-15s%-15d%-15d%-15Ld%-15Ld(%s)\n",
417 i, namebuf,
418 deh_dir_id(deh), deh_objectid(deh),
419 GET_HASH_VALUE(deh_offset(deh)),
420 GET_GENERATION_NUMBER((deh_offset(deh))),
421 (de_hidden(deh)) ? "HIDDEN" : "VISIBLE");
443 } 422 }
444
445 printk ("%d: %-15s%-15d%-15d%-15Ld%-15Ld(%s)\n",
446 i, namebuf,
447 deh_dir_id(deh), deh_objectid(deh),
448 GET_HASH_VALUE (deh_offset (deh)), GET_GENERATION_NUMBER ((deh_offset (deh))),
449 (de_hidden (deh)) ? "HIDDEN" : "VISIBLE");
450 }
451} 423}
452 424
453 425static void direntry_check_item(struct item_head *ih, char *item)
454static void direntry_check_item (struct item_head * ih, char * item)
455{ 426{
456 int i; 427 int i;
457 struct reiserfs_de_head * deh; 428 struct reiserfs_de_head *deh;
458 429
459 // FIXME: type something here! 430 // FIXME: type something here!
460 deh = (struct reiserfs_de_head *)item; 431 deh = (struct reiserfs_de_head *)item;
461 for (i = 0; i < I_ENTRY_COUNT (ih); i ++, deh ++) { 432 for (i = 0; i < I_ENTRY_COUNT(ih); i++, deh++) {
462 ; 433 ;
463 } 434 }
464} 435}
465 436
466
467
468#define DIRENTRY_VI_FIRST_DIRENTRY_ITEM 1 437#define DIRENTRY_VI_FIRST_DIRENTRY_ITEM 1
469 438
470/* 439/*
471 * function returns old entry number in directory item in real node 440 * function returns old entry number in directory item in real node
472 * using new entry number in virtual item in virtual node */ 441 * using new entry number in virtual item in virtual node */
473static inline int old_entry_num (int is_affected, int virtual_entry_num, int pos_in_item, int mode) 442static inline int old_entry_num(int is_affected, int virtual_entry_num,
443 int pos_in_item, int mode)
474{ 444{
475 if ( mode == M_INSERT || mode == M_DELETE) 445 if (mode == M_INSERT || mode == M_DELETE)
476 return virtual_entry_num; 446 return virtual_entry_num;
477
478 if (!is_affected)
479 /* cut or paste is applied to another item */
480 return virtual_entry_num;
481
482 if (virtual_entry_num < pos_in_item)
483 return virtual_entry_num;
484 447
485 if (mode == M_CUT) 448 if (!is_affected)
486 return virtual_entry_num + 1; 449 /* cut or paste is applied to another item */
450 return virtual_entry_num;
487 451
488 RFALSE( mode != M_PASTE || virtual_entry_num == 0, 452 if (virtual_entry_num < pos_in_item)
489 "vs-8015: old_entry_num: mode must be M_PASTE (mode = \'%c\'", mode); 453 return virtual_entry_num;
490
491 return virtual_entry_num - 1;
492}
493 454
455 if (mode == M_CUT)
456 return virtual_entry_num + 1;
494 457
458 RFALSE(mode != M_PASTE || virtual_entry_num == 0,
459 "vs-8015: old_entry_num: mode must be M_PASTE (mode = \'%c\'",
460 mode);
495 461
462 return virtual_entry_num - 1;
463}
496 464
497/* Create an array of sizes of directory entries for virtual 465/* Create an array of sizes of directory entries for virtual
498 item. Return space used by an item. FIXME: no control over 466 item. Return space used by an item. FIXME: no control over
499 consuming of space used by this item handler */ 467 consuming of space used by this item handler */
500static int direntry_create_vi (struct virtual_node * vn, 468static int direntry_create_vi(struct virtual_node *vn,
501 struct virtual_item * vi, 469 struct virtual_item *vi,
502 int is_affected, 470 int is_affected, int insert_size)
503 int insert_size) 471{
504{ 472 struct direntry_uarea *dir_u = vi->vi_uarea;
505 struct direntry_uarea * dir_u = vi->vi_uarea; 473 int i, j;
506 int i, j; 474 int size = sizeof(struct direntry_uarea);
507 int size = sizeof (struct direntry_uarea); 475 struct reiserfs_de_head *deh;
508 struct reiserfs_de_head * deh;
509
510 vi->vi_index = TYPE_DIRENTRY;
511
512 if (!(vi->vi_ih) || !vi->vi_item)
513 BUG ();
514
515
516 dir_u->flags = 0;
517 if (le_ih_k_offset (vi->vi_ih) == DOT_OFFSET)
518 dir_u->flags |= DIRENTRY_VI_FIRST_DIRENTRY_ITEM;
519
520 deh = (struct reiserfs_de_head *)(vi->vi_item);
521
522
523 /* virtual directory item have this amount of entry after */
524 dir_u->entry_count = ih_entry_count (vi->vi_ih) +
525 ((is_affected) ? ((vn->vn_mode == M_CUT) ? -1 :
526 (vn->vn_mode == M_PASTE ? 1 : 0)) : 0);
527
528 for (i = 0; i < dir_u->entry_count; i ++) {
529 j = old_entry_num (is_affected, i, vn->vn_pos_in_item, vn->vn_mode);
530 dir_u->entry_sizes[i] = (j ? deh_location( &(deh[j - 1]) ) :
531 ih_item_len (vi->vi_ih)) -
532 deh_location( &(deh[j])) + DEH_SIZE;
533 }
534
535 size += (dir_u->entry_count * sizeof (short));
536
537 /* set size of pasted entry */
538 if (is_affected && vn->vn_mode == M_PASTE)
539 dir_u->entry_sizes[vn->vn_pos_in_item] = insert_size;
540 476
477 vi->vi_index = TYPE_DIRENTRY;
478
479 if (!(vi->vi_ih) || !vi->vi_item)
480 BUG();
481
482 dir_u->flags = 0;
483 if (le_ih_k_offset(vi->vi_ih) == DOT_OFFSET)
484 dir_u->flags |= DIRENTRY_VI_FIRST_DIRENTRY_ITEM;
485
486 deh = (struct reiserfs_de_head *)(vi->vi_item);
487
488 /* virtual directory item have this amount of entry after */
489 dir_u->entry_count = ih_entry_count(vi->vi_ih) +
490 ((is_affected) ? ((vn->vn_mode == M_CUT) ? -1 :
491 (vn->vn_mode == M_PASTE ? 1 : 0)) : 0);
492
493 for (i = 0; i < dir_u->entry_count; i++) {
494 j = old_entry_num(is_affected, i, vn->vn_pos_in_item,
495 vn->vn_mode);
496 dir_u->entry_sizes[i] =
497 (j ? deh_location(&(deh[j - 1])) : ih_item_len(vi->vi_ih)) -
498 deh_location(&(deh[j])) + DEH_SIZE;
499 }
500
501 size += (dir_u->entry_count * sizeof(short));
502
503 /* set size of pasted entry */
504 if (is_affected && vn->vn_mode == M_PASTE)
505 dir_u->entry_sizes[vn->vn_pos_in_item] = insert_size;
541 506
542#ifdef CONFIG_REISERFS_CHECK 507#ifdef CONFIG_REISERFS_CHECK
543 /* compare total size of entries with item length */ 508 /* compare total size of entries with item length */
544 { 509 {
545 int k, l; 510 int k, l;
546 511
547 l = 0; 512 l = 0;
548 for (k = 0; k < dir_u->entry_count; k ++) 513 for (k = 0; k < dir_u->entry_count; k++)
549 l += dir_u->entry_sizes[k]; 514 l += dir_u->entry_sizes[k];
550 515
551 if (l + IH_SIZE != vi->vi_item_len + 516 if (l + IH_SIZE != vi->vi_item_len +
552 ((is_affected && (vn->vn_mode == M_PASTE || vn->vn_mode == M_CUT)) ? insert_size : 0) ) { 517 ((is_affected
553 reiserfs_panic (NULL, "vs-8025: set_entry_sizes: (mode==%c, insert_size==%d), invalid length of directory item", 518 && (vn->vn_mode == M_PASTE
554 vn->vn_mode, insert_size); 519 || vn->vn_mode == M_CUT)) ? insert_size : 0)) {
520 reiserfs_panic(NULL,
521 "vs-8025: set_entry_sizes: (mode==%c, insert_size==%d), invalid length of directory item",
522 vn->vn_mode, insert_size);
523 }
555 } 524 }
556 }
557#endif 525#endif
558 526
559 return size; 527 return size;
560
561 528
562} 529}
563 530
564
565// 531//
566// return number of entries which may fit into specified amount of 532// return number of entries which may fit into specified amount of
567// free space, or -1 if free space is not enough even for 1 entry 533// free space, or -1 if free space is not enough even for 1 entry
568// 534//
569static int direntry_check_left (struct virtual_item * vi, int free, 535static int direntry_check_left(struct virtual_item *vi, int free,
570 int start_skip, int end_skip) 536 int start_skip, int end_skip)
571{ 537{
572 int i; 538 int i;
573 int entries = 0; 539 int entries = 0;
574 struct direntry_uarea * dir_u = vi->vi_uarea; 540 struct direntry_uarea *dir_u = vi->vi_uarea;
575 541
576 for (i = start_skip; i < dir_u->entry_count - end_skip; i ++) { 542 for (i = start_skip; i < dir_u->entry_count - end_skip; i++) {
577 if (dir_u->entry_sizes[i] > free) 543 if (dir_u->entry_sizes[i] > free)
578 /* i-th entry doesn't fit into the remaining free space */ 544 /* i-th entry doesn't fit into the remaining free space */
579 break; 545 break;
580
581 free -= dir_u->entry_sizes[i];
582 entries ++;
583 }
584 546
585 if (entries == dir_u->entry_count) { 547 free -= dir_u->entry_sizes[i];
586 reiserfs_panic (NULL, "free space %d, entry_count %d\n", free, dir_u->entry_count); 548 entries++;
587 } 549 }
588 550
589 /* "." and ".." can not be separated from each other */ 551 if (entries == dir_u->entry_count) {
590 if (start_skip == 0 && (dir_u->flags & DIRENTRY_VI_FIRST_DIRENTRY_ITEM) && entries < 2) 552 reiserfs_panic(NULL, "free space %d, entry_count %d\n", free,
591 entries = 0; 553 dir_u->entry_count);
592 554 }
593 return entries ?: -1;
594}
595 555
556 /* "." and ".." can not be separated from each other */
557 if (start_skip == 0 && (dir_u->flags & DIRENTRY_VI_FIRST_DIRENTRY_ITEM)
558 && entries < 2)
559 entries = 0;
596 560
597static int direntry_check_right (struct virtual_item * vi, int free) 561 return entries ? : -1;
562}
563
564static int direntry_check_right(struct virtual_item *vi, int free)
598{ 565{
599 int i; 566 int i;
600 int entries = 0; 567 int entries = 0;
601 struct direntry_uarea * dir_u = vi->vi_uarea; 568 struct direntry_uarea *dir_u = vi->vi_uarea;
602
603 for (i = dir_u->entry_count - 1; i >= 0; i --) {
604 if (dir_u->entry_sizes[i] > free)
605 /* i-th entry doesn't fit into the remaining free space */
606 break;
607
608 free -= dir_u->entry_sizes[i];
609 entries ++;
610 }
611 if (entries == dir_u->entry_count)
612 BUG ();
613 569
614 /* "." and ".." can not be separated from each other */ 570 for (i = dir_u->entry_count - 1; i >= 0; i--) {
615 if ((dir_u->flags & DIRENTRY_VI_FIRST_DIRENTRY_ITEM) && entries > dir_u->entry_count - 2) 571 if (dir_u->entry_sizes[i] > free)
616 entries = dir_u->entry_count - 2; 572 /* i-th entry doesn't fit into the remaining free space */
573 break;
617 574
618 return entries ?: -1; 575 free -= dir_u->entry_sizes[i];
619} 576 entries++;
577 }
578 if (entries == dir_u->entry_count)
579 BUG();
620 580
581 /* "." and ".." can not be separated from each other */
582 if ((dir_u->flags & DIRENTRY_VI_FIRST_DIRENTRY_ITEM)
583 && entries > dir_u->entry_count - 2)
584 entries = dir_u->entry_count - 2;
585
586 return entries ? : -1;
587}
621 588
622/* sum of entry sizes between from-th and to-th entries including both edges */ 589/* sum of entry sizes between from-th and to-th entries including both edges */
623static int direntry_part_size (struct virtual_item * vi, int first, int count) 590static int direntry_part_size(struct virtual_item *vi, int first, int count)
624{ 591{
625 int i, retval; 592 int i, retval;
626 int from, to; 593 int from, to;
627 struct direntry_uarea * dir_u = vi->vi_uarea; 594 struct direntry_uarea *dir_u = vi->vi_uarea;
628
629 retval = 0;
630 if (first == 0)
631 from = 0;
632 else
633 from = dir_u->entry_count - count;
634 to = from + count - 1;
635 595
636 for (i = from; i <= to; i ++) 596 retval = 0;
637 retval += dir_u->entry_sizes[i]; 597 if (first == 0)
598 from = 0;
599 else
600 from = dir_u->entry_count - count;
601 to = from + count - 1;
638 602
639 return retval; 603 for (i = from; i <= to; i++)
640} 604 retval += dir_u->entry_sizes[i];
641 605
642static int direntry_unit_num (struct virtual_item * vi) 606 return retval;
643{
644 struct direntry_uarea * dir_u = vi->vi_uarea;
645
646 return dir_u->entry_count;
647} 607}
648 608
609static int direntry_unit_num(struct virtual_item *vi)
610{
611 struct direntry_uarea *dir_u = vi->vi_uarea;
649 612
613 return dir_u->entry_count;
614}
650 615
651static void direntry_print_vi (struct virtual_item * vi) 616static void direntry_print_vi(struct virtual_item *vi)
652{ 617{
653 int i; 618 int i;
654 struct direntry_uarea * dir_u = vi->vi_uarea; 619 struct direntry_uarea *dir_u = vi->vi_uarea;
655 620
656 reiserfs_warning (NULL, "DIRENTRY, index %d, type 0x%x, %h, flags 0x%x", 621 reiserfs_warning(NULL, "DIRENTRY, index %d, type 0x%x, %h, flags 0x%x",
657 vi->vi_index, vi->vi_type, vi->vi_ih, dir_u->flags); 622 vi->vi_index, vi->vi_type, vi->vi_ih, dir_u->flags);
658 printk ("%d entries: ", dir_u->entry_count); 623 printk("%d entries: ", dir_u->entry_count);
659 for (i = 0; i < dir_u->entry_count; i ++) 624 for (i = 0; i < dir_u->entry_count; i++)
660 printk ("%d ", dir_u->entry_sizes[i]); 625 printk("%d ", dir_u->entry_sizes[i]);
661 printk ("\n"); 626 printk("\n");
662} 627}
663 628
664static struct item_operations direntry_ops = { 629static struct item_operations direntry_ops = {
665 .bytes_number = direntry_bytes_number, 630 .bytes_number = direntry_bytes_number,
666 .decrement_key = direntry_decrement_key, 631 .decrement_key = direntry_decrement_key,
667 .is_left_mergeable = direntry_is_left_mergeable, 632 .is_left_mergeable = direntry_is_left_mergeable,
668 .print_item = direntry_print_item, 633 .print_item = direntry_print_item,
669 .check_item = direntry_check_item, 634 .check_item = direntry_check_item,
670 635
671 .create_vi = direntry_create_vi, 636 .create_vi = direntry_create_vi,
672 .check_left = direntry_check_left, 637 .check_left = direntry_check_left,
673 .check_right = direntry_check_right, 638 .check_right = direntry_check_right,
674 .part_size = direntry_part_size, 639 .part_size = direntry_part_size,
675 .unit_num = direntry_unit_num, 640 .unit_num = direntry_unit_num,
676 .print_vi = direntry_print_vi 641 .print_vi = direntry_print_vi
677}; 642};
678 643
679
680////////////////////////////////////////////////////////////////////////////// 644//////////////////////////////////////////////////////////////////////////////
681// Error catching functions to catch errors caused by incorrect item types. 645// Error catching functions to catch errors caused by incorrect item types.
682// 646//
683static int errcatch_bytes_number (struct item_head * ih, int block_size) 647static int errcatch_bytes_number(struct item_head *ih, int block_size)
684{ 648{
685 reiserfs_warning (NULL, "green-16001: Invalid item type observed, run fsck ASAP"); 649 reiserfs_warning(NULL,
686 return 0; 650 "green-16001: Invalid item type observed, run fsck ASAP");
651 return 0;
687} 652}
688 653
689static void errcatch_decrement_key (struct cpu_key * key) 654static void errcatch_decrement_key(struct cpu_key *key)
690{ 655{
691 reiserfs_warning (NULL, "green-16002: Invalid item type observed, run fsck ASAP"); 656 reiserfs_warning(NULL,
657 "green-16002: Invalid item type observed, run fsck ASAP");
692} 658}
693 659
694 660static int errcatch_is_left_mergeable(struct reiserfs_key *key,
695static int errcatch_is_left_mergeable (struct reiserfs_key * key, unsigned long bsize) 661 unsigned long bsize)
696{ 662{
697 reiserfs_warning (NULL, "green-16003: Invalid item type observed, run fsck ASAP"); 663 reiserfs_warning(NULL,
698 return 0; 664 "green-16003: Invalid item type observed, run fsck ASAP");
665 return 0;
699} 666}
700 667
701 668static void errcatch_print_item(struct item_head *ih, char *item)
702static void errcatch_print_item (struct item_head * ih, char * item)
703{ 669{
704 reiserfs_warning (NULL, "green-16004: Invalid item type observed, run fsck ASAP"); 670 reiserfs_warning(NULL,
671 "green-16004: Invalid item type observed, run fsck ASAP");
705} 672}
706 673
707 674static void errcatch_check_item(struct item_head *ih, char *item)
708static void errcatch_check_item (struct item_head * ih, char * item)
709{ 675{
710 reiserfs_warning (NULL, "green-16005: Invalid item type observed, run fsck ASAP"); 676 reiserfs_warning(NULL,
677 "green-16005: Invalid item type observed, run fsck ASAP");
711} 678}
712 679
713static int errcatch_create_vi (struct virtual_node * vn, 680static int errcatch_create_vi(struct virtual_node *vn,
714 struct virtual_item * vi, 681 struct virtual_item *vi,
715 int is_affected, 682 int is_affected, int insert_size)
716 int insert_size)
717{ 683{
718 reiserfs_warning (NULL, "green-16006: Invalid item type observed, run fsck ASAP"); 684 reiserfs_warning(NULL,
719 return 0; // We might return -1 here as well, but it won't help as create_virtual_node() from where 685 "green-16006: Invalid item type observed, run fsck ASAP");
720 // this operation is called from is of return type void. 686 return 0; // We might return -1 here as well, but it won't help as create_virtual_node() from where
687 // this operation is called from is of return type void.
721} 688}
722 689
723static int errcatch_check_left (struct virtual_item * vi, int free, 690static int errcatch_check_left(struct virtual_item *vi, int free,
724 int start_skip, int end_skip) 691 int start_skip, int end_skip)
725{ 692{
726 reiserfs_warning (NULL, "green-16007: Invalid item type observed, run fsck ASAP"); 693 reiserfs_warning(NULL,
727 return -1; 694 "green-16007: Invalid item type observed, run fsck ASAP");
695 return -1;
728} 696}
729 697
730 698static int errcatch_check_right(struct virtual_item *vi, int free)
731static int errcatch_check_right (struct virtual_item * vi, int free)
732{ 699{
733 reiserfs_warning (NULL, "green-16008: Invalid item type observed, run fsck ASAP"); 700 reiserfs_warning(NULL,
734 return -1; 701 "green-16008: Invalid item type observed, run fsck ASAP");
702 return -1;
735} 703}
736 704
737static int errcatch_part_size (struct virtual_item * vi, int first, int count) 705static int errcatch_part_size(struct virtual_item *vi, int first, int count)
738{ 706{
739 reiserfs_warning (NULL, "green-16009: Invalid item type observed, run fsck ASAP"); 707 reiserfs_warning(NULL,
740 return 0; 708 "green-16009: Invalid item type observed, run fsck ASAP");
709 return 0;
741} 710}
742 711
743static int errcatch_unit_num (struct virtual_item * vi) 712static int errcatch_unit_num(struct virtual_item *vi)
744{ 713{
745 reiserfs_warning (NULL, "green-16010: Invalid item type observed, run fsck ASAP"); 714 reiserfs_warning(NULL,
746 return 0; 715 "green-16010: Invalid item type observed, run fsck ASAP");
716 return 0;
747} 717}
748 718
749static void errcatch_print_vi (struct virtual_item * vi) 719static void errcatch_print_vi(struct virtual_item *vi)
750{ 720{
751 reiserfs_warning (NULL, "green-16011: Invalid item type observed, run fsck ASAP"); 721 reiserfs_warning(NULL,
722 "green-16011: Invalid item type observed, run fsck ASAP");
752} 723}
753 724
754static struct item_operations errcatch_ops = { 725static struct item_operations errcatch_ops = {
755 errcatch_bytes_number, 726 errcatch_bytes_number,
756 errcatch_decrement_key, 727 errcatch_decrement_key,
757 errcatch_is_left_mergeable, 728 errcatch_is_left_mergeable,
758 errcatch_print_item, 729 errcatch_print_item,
759 errcatch_check_item, 730 errcatch_check_item,
760 731
761 errcatch_create_vi, 732 errcatch_create_vi,
762 errcatch_check_left, 733 errcatch_check_left,
763 errcatch_check_right, 734 errcatch_check_right,
764 errcatch_part_size, 735 errcatch_part_size,
765 errcatch_unit_num, 736 errcatch_unit_num,
766 errcatch_print_vi 737 errcatch_print_vi
767}; 738};
768 739
769
770
771////////////////////////////////////////////////////////////////////////////// 740//////////////////////////////////////////////////////////////////////////////
772// 741//
773// 742//
@@ -775,15 +744,11 @@ static struct item_operations errcatch_ops = {
775#error Item types must use disk-format assigned values. 744#error Item types must use disk-format assigned values.
776#endif 745#endif
777 746
778struct item_operations * item_ops [TYPE_ANY + 1] = { 747struct item_operations *item_ops[TYPE_ANY + 1] = {
779 &stat_data_ops, 748 &stat_data_ops,
780 &indirect_ops, 749 &indirect_ops,
781 &direct_ops, 750 &direct_ops,
782 &direntry_ops, 751 &direntry_ops,
783 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 752 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
784 &errcatch_ops /* This is to catch errors with invalid type (15th entry for TYPE_ANY) */ 753 &errcatch_ops /* This is to catch errors with invalid type (15th entry for TYPE_ANY) */
785}; 754};
786
787
788
789
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index d1bcf0da672..c66c27ec410 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -55,7 +55,6 @@
55#include <linux/writeback.h> 55#include <linux/writeback.h>
56#include <linux/blkdev.h> 56#include <linux/blkdev.h>
57 57
58
59/* gets a struct reiserfs_journal_list * from a list head */ 58/* gets a struct reiserfs_journal_list * from a list head */
60#define JOURNAL_LIST_ENTRY(h) (list_entry((h), struct reiserfs_journal_list, \ 59#define JOURNAL_LIST_ENTRY(h) (list_entry((h), struct reiserfs_journal_list, \
61 j_list)) 60 j_list))
@@ -69,55 +68,61 @@ static int reiserfs_mounted_fs_count;
69 68
70static struct workqueue_struct *commit_wq; 69static struct workqueue_struct *commit_wq;
71 70
72#define JOURNAL_TRANS_HALF 1018 /* must be correct to keep the desc and commit 71#define JOURNAL_TRANS_HALF 1018 /* must be correct to keep the desc and commit
73 structs at 4k */ 72 structs at 4k */
74#define BUFNR 64 /*read ahead */ 73#define BUFNR 64 /*read ahead */
75 74
76/* cnode stat bits. Move these into reiserfs_fs.h */ 75/* cnode stat bits. Move these into reiserfs_fs.h */
77 76
78#define BLOCK_FREED 2 /* this block was freed, and can't be written. */ 77#define BLOCK_FREED 2 /* this block was freed, and can't be written. */
79#define BLOCK_FREED_HOLDER 3 /* this block was freed during this transaction, and can't be written */ 78#define BLOCK_FREED_HOLDER 3 /* this block was freed during this transaction, and can't be written */
80 79
81#define BLOCK_NEEDS_FLUSH 4 /* used in flush_journal_list */ 80#define BLOCK_NEEDS_FLUSH 4 /* used in flush_journal_list */
82#define BLOCK_DIRTIED 5 81#define BLOCK_DIRTIED 5
83 82
84
85/* journal list state bits */ 83/* journal list state bits */
86#define LIST_TOUCHED 1 84#define LIST_TOUCHED 1
87#define LIST_DIRTY 2 85#define LIST_DIRTY 2
88#define LIST_COMMIT_PENDING 4 /* someone will commit this list */ 86#define LIST_COMMIT_PENDING 4 /* someone will commit this list */
89 87
90/* flags for do_journal_end */ 88/* flags for do_journal_end */
91#define FLUSH_ALL 1 /* flush commit and real blocks */ 89#define FLUSH_ALL 1 /* flush commit and real blocks */
92#define COMMIT_NOW 2 /* end and commit this transaction */ 90#define COMMIT_NOW 2 /* end and commit this transaction */
93#define WAIT 4 /* wait for the log blocks to hit the disk*/ 91#define WAIT 4 /* wait for the log blocks to hit the disk */
94 92
95static int do_journal_end(struct reiserfs_transaction_handle *,struct super_block *,unsigned long nblocks,int flags) ; 93static int do_journal_end(struct reiserfs_transaction_handle *,
96static int flush_journal_list(struct super_block *s, struct reiserfs_journal_list *jl, int flushall) ; 94 struct super_block *, unsigned long nblocks,
97static int flush_commit_list(struct super_block *s, struct reiserfs_journal_list *jl, int flushall) ; 95 int flags);
98static int can_dirty(struct reiserfs_journal_cnode *cn) ; 96static int flush_journal_list(struct super_block *s,
99static int journal_join(struct reiserfs_transaction_handle *th, struct super_block *p_s_sb, unsigned long nblocks); 97 struct reiserfs_journal_list *jl, int flushall);
100static int release_journal_dev( struct super_block *super, 98static int flush_commit_list(struct super_block *s,
101 struct reiserfs_journal *journal ); 99 struct reiserfs_journal_list *jl, int flushall);
100static int can_dirty(struct reiserfs_journal_cnode *cn);
101static int journal_join(struct reiserfs_transaction_handle *th,
102 struct super_block *p_s_sb, unsigned long nblocks);
103static int release_journal_dev(struct super_block *super,
104 struct reiserfs_journal *journal);
102static int dirty_one_transaction(struct super_block *s, 105static int dirty_one_transaction(struct super_block *s,
103 struct reiserfs_journal_list *jl); 106 struct reiserfs_journal_list *jl);
104static void flush_async_commits(void *p); 107static void flush_async_commits(void *p);
105static void queue_log_writer(struct super_block *s); 108static void queue_log_writer(struct super_block *s);
106 109
107/* values for join in do_journal_begin_r */ 110/* values for join in do_journal_begin_r */
108enum { 111enum {
109 JBEGIN_REG = 0, /* regular journal begin */ 112 JBEGIN_REG = 0, /* regular journal begin */
110 JBEGIN_JOIN = 1, /* join the running transaction if at all possible */ 113 JBEGIN_JOIN = 1, /* join the running transaction if at all possible */
111 JBEGIN_ABORT = 2, /* called from cleanup code, ignores aborted flag */ 114 JBEGIN_ABORT = 2, /* called from cleanup code, ignores aborted flag */
112}; 115};
113 116
114static int do_journal_begin_r(struct reiserfs_transaction_handle *th, 117static int do_journal_begin_r(struct reiserfs_transaction_handle *th,
115 struct super_block * p_s_sb, 118 struct super_block *p_s_sb,
116 unsigned long nblocks,int join); 119 unsigned long nblocks, int join);
117 120
118static void init_journal_hash(struct super_block *p_s_sb) { 121static void init_journal_hash(struct super_block *p_s_sb)
119 struct reiserfs_journal *journal = SB_JOURNAL (p_s_sb); 122{
120 memset(journal->j_hash_table, 0, JOURNAL_HASH_SIZE * sizeof(struct reiserfs_journal_cnode *)) ; 123 struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb);
124 memset(journal->j_hash_table, 0,
125 JOURNAL_HASH_SIZE * sizeof(struct reiserfs_journal_cnode *));
121} 126}
122 127
123/* 128/*
@@ -125,149 +130,159 @@ static void init_journal_hash(struct super_block *p_s_sb) {
125** make schedule happen after I've freed a block. Look at remove_from_transaction and journal_mark_freed for 130** make schedule happen after I've freed a block. Look at remove_from_transaction and journal_mark_freed for
126** more details. 131** more details.
127*/ 132*/
128static int reiserfs_clean_and_file_buffer(struct buffer_head *bh) { 133static int reiserfs_clean_and_file_buffer(struct buffer_head *bh)
129 if (bh) { 134{
130 clear_buffer_dirty(bh); 135 if (bh) {
131 clear_buffer_journal_test(bh); 136 clear_buffer_dirty(bh);
132 } 137 clear_buffer_journal_test(bh);
133 return 0 ; 138 }
139 return 0;
134} 140}
135 141
136static void disable_barrier(struct super_block *s) 142static void disable_barrier(struct super_block *s)
137{ 143{
138 REISERFS_SB(s)->s_mount_opt &= ~(1 << REISERFS_BARRIER_FLUSH); 144 REISERFS_SB(s)->s_mount_opt &= ~(1 << REISERFS_BARRIER_FLUSH);
139 printk("reiserfs: disabling flush barriers on %s\n", reiserfs_bdevname(s)); 145 printk("reiserfs: disabling flush barriers on %s\n",
140} 146 reiserfs_bdevname(s));
141 147}
142static struct reiserfs_bitmap_node * 148
143allocate_bitmap_node(struct super_block *p_s_sb) { 149static struct reiserfs_bitmap_node *allocate_bitmap_node(struct super_block
144 struct reiserfs_bitmap_node *bn ; 150 *p_s_sb)
145 static int id; 151{
146 152 struct reiserfs_bitmap_node *bn;
147 bn = reiserfs_kmalloc(sizeof(struct reiserfs_bitmap_node), GFP_NOFS, p_s_sb) ; 153 static int id;
148 if (!bn) { 154
149 return NULL ; 155 bn = reiserfs_kmalloc(sizeof(struct reiserfs_bitmap_node), GFP_NOFS,
150 } 156 p_s_sb);
151 bn->data = reiserfs_kmalloc(p_s_sb->s_blocksize, GFP_NOFS, p_s_sb) ; 157 if (!bn) {
152 if (!bn->data) { 158 return NULL;
153 reiserfs_kfree(bn, sizeof(struct reiserfs_bitmap_node), p_s_sb) ; 159 }
154 return NULL ; 160 bn->data = reiserfs_kmalloc(p_s_sb->s_blocksize, GFP_NOFS, p_s_sb);
155 } 161 if (!bn->data) {
156 bn->id = id++ ; 162 reiserfs_kfree(bn, sizeof(struct reiserfs_bitmap_node), p_s_sb);
157 memset(bn->data, 0, p_s_sb->s_blocksize) ; 163 return NULL;
158 INIT_LIST_HEAD(&bn->list) ; 164 }
159 return bn ; 165 bn->id = id++;
160} 166 memset(bn->data, 0, p_s_sb->s_blocksize);
161 167 INIT_LIST_HEAD(&bn->list);
162static struct reiserfs_bitmap_node * 168 return bn;
163get_bitmap_node(struct super_block *p_s_sb) { 169}
164 struct reiserfs_journal *journal = SB_JOURNAL (p_s_sb); 170
165 struct reiserfs_bitmap_node *bn = NULL; 171static struct reiserfs_bitmap_node *get_bitmap_node(struct super_block *p_s_sb)
166 struct list_head *entry = journal->j_bitmap_nodes.next ; 172{
167 173 struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb);
168 journal->j_used_bitmap_nodes++ ; 174 struct reiserfs_bitmap_node *bn = NULL;
169repeat: 175 struct list_head *entry = journal->j_bitmap_nodes.next;
170 176
171 if(entry != &journal->j_bitmap_nodes) { 177 journal->j_used_bitmap_nodes++;
172 bn = list_entry(entry, struct reiserfs_bitmap_node, list) ; 178 repeat:
173 list_del(entry) ; 179
174 memset(bn->data, 0, p_s_sb->s_blocksize) ; 180 if (entry != &journal->j_bitmap_nodes) {
175 journal->j_free_bitmap_nodes-- ; 181 bn = list_entry(entry, struct reiserfs_bitmap_node, list);
176 return bn ; 182 list_del(entry);
177 } 183 memset(bn->data, 0, p_s_sb->s_blocksize);
178 bn = allocate_bitmap_node(p_s_sb) ; 184 journal->j_free_bitmap_nodes--;
179 if (!bn) { 185 return bn;
180 yield(); 186 }
181 goto repeat ; 187 bn = allocate_bitmap_node(p_s_sb);
182 } 188 if (!bn) {
183 return bn ; 189 yield();
190 goto repeat;
191 }
192 return bn;
184} 193}
185static inline void free_bitmap_node(struct super_block *p_s_sb, 194static inline void free_bitmap_node(struct super_block *p_s_sb,
186 struct reiserfs_bitmap_node *bn) { 195 struct reiserfs_bitmap_node *bn)
187 struct reiserfs_journal *journal = SB_JOURNAL (p_s_sb); 196{
188 journal->j_used_bitmap_nodes-- ; 197 struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb);
189 if (journal->j_free_bitmap_nodes > REISERFS_MAX_BITMAP_NODES) { 198 journal->j_used_bitmap_nodes--;
190 reiserfs_kfree(bn->data, p_s_sb->s_blocksize, p_s_sb) ; 199 if (journal->j_free_bitmap_nodes > REISERFS_MAX_BITMAP_NODES) {
191 reiserfs_kfree(bn, sizeof(struct reiserfs_bitmap_node), p_s_sb) ; 200 reiserfs_kfree(bn->data, p_s_sb->s_blocksize, p_s_sb);
192 } else { 201 reiserfs_kfree(bn, sizeof(struct reiserfs_bitmap_node), p_s_sb);
193 list_add(&bn->list, &journal->j_bitmap_nodes) ; 202 } else {
194 journal->j_free_bitmap_nodes++ ; 203 list_add(&bn->list, &journal->j_bitmap_nodes);
195 } 204 journal->j_free_bitmap_nodes++;
196} 205 }
197 206}
198static void allocate_bitmap_nodes(struct super_block *p_s_sb) { 207
199 int i ; 208static void allocate_bitmap_nodes(struct super_block *p_s_sb)
200 struct reiserfs_journal *journal = SB_JOURNAL (p_s_sb); 209{
201 struct reiserfs_bitmap_node *bn = NULL ; 210 int i;
202 for (i = 0 ; i < REISERFS_MIN_BITMAP_NODES ; i++) { 211 struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb);
203 bn = allocate_bitmap_node(p_s_sb) ; 212 struct reiserfs_bitmap_node *bn = NULL;
204 if (bn) { 213 for (i = 0; i < REISERFS_MIN_BITMAP_NODES; i++) {
205 list_add(&bn->list, &journal->j_bitmap_nodes) ; 214 bn = allocate_bitmap_node(p_s_sb);
206 journal->j_free_bitmap_nodes++ ; 215 if (bn) {
207 } else { 216 list_add(&bn->list, &journal->j_bitmap_nodes);
208 break ; // this is ok, we'll try again when more are needed 217 journal->j_free_bitmap_nodes++;
209 } 218 } else {
210 } 219 break; // this is ok, we'll try again when more are needed
220 }
221 }
211} 222}
212 223
213static int set_bit_in_list_bitmap(struct super_block *p_s_sb, int block, 224static int set_bit_in_list_bitmap(struct super_block *p_s_sb, int block,
214 struct reiserfs_list_bitmap *jb) { 225 struct reiserfs_list_bitmap *jb)
215 int bmap_nr = block / (p_s_sb->s_blocksize << 3) ; 226{
216 int bit_nr = block % (p_s_sb->s_blocksize << 3) ; 227 int bmap_nr = block / (p_s_sb->s_blocksize << 3);
228 int bit_nr = block % (p_s_sb->s_blocksize << 3);
217 229
218 if (!jb->bitmaps[bmap_nr]) { 230 if (!jb->bitmaps[bmap_nr]) {
219 jb->bitmaps[bmap_nr] = get_bitmap_node(p_s_sb) ; 231 jb->bitmaps[bmap_nr] = get_bitmap_node(p_s_sb);
220 } 232 }
221 set_bit(bit_nr, (unsigned long *)jb->bitmaps[bmap_nr]->data) ; 233 set_bit(bit_nr, (unsigned long *)jb->bitmaps[bmap_nr]->data);
222 return 0 ; 234 return 0;
223} 235}
224 236
225static void cleanup_bitmap_list(struct super_block *p_s_sb, 237static void cleanup_bitmap_list(struct super_block *p_s_sb,
226 struct reiserfs_list_bitmap *jb) { 238 struct reiserfs_list_bitmap *jb)
227 int i; 239{
228 if (jb->bitmaps == NULL) 240 int i;
229 return; 241 if (jb->bitmaps == NULL)
230 242 return;
231 for (i = 0 ; i < SB_BMAP_NR(p_s_sb) ; i++) { 243
232 if (jb->bitmaps[i]) { 244 for (i = 0; i < SB_BMAP_NR(p_s_sb); i++) {
233 free_bitmap_node(p_s_sb, jb->bitmaps[i]) ; 245 if (jb->bitmaps[i]) {
234 jb->bitmaps[i] = NULL ; 246 free_bitmap_node(p_s_sb, jb->bitmaps[i]);
235 } 247 jb->bitmaps[i] = NULL;
236 } 248 }
249 }
237} 250}
238 251
239/* 252/*
240** only call this on FS unmount. 253** only call this on FS unmount.
241*/ 254*/
242static int free_list_bitmaps(struct super_block *p_s_sb, 255static int free_list_bitmaps(struct super_block *p_s_sb,
243 struct reiserfs_list_bitmap *jb_array) { 256 struct reiserfs_list_bitmap *jb_array)
244 int i ; 257{
245 struct reiserfs_list_bitmap *jb ; 258 int i;
246 for (i = 0 ; i < JOURNAL_NUM_BITMAPS ; i++) { 259 struct reiserfs_list_bitmap *jb;
247 jb = jb_array + i ; 260 for (i = 0; i < JOURNAL_NUM_BITMAPS; i++) {
248 jb->journal_list = NULL ; 261 jb = jb_array + i;
249 cleanup_bitmap_list(p_s_sb, jb) ; 262 jb->journal_list = NULL;
250 vfree(jb->bitmaps) ; 263 cleanup_bitmap_list(p_s_sb, jb);
251 jb->bitmaps = NULL ; 264 vfree(jb->bitmaps);
252 } 265 jb->bitmaps = NULL;
253 return 0; 266 }
254} 267 return 0;
255 268}
256static int free_bitmap_nodes(struct super_block *p_s_sb) { 269
257 struct reiserfs_journal *journal = SB_JOURNAL (p_s_sb); 270static int free_bitmap_nodes(struct super_block *p_s_sb)
258 struct list_head *next = journal->j_bitmap_nodes.next ; 271{
259 struct reiserfs_bitmap_node *bn ; 272 struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb);
260 273 struct list_head *next = journal->j_bitmap_nodes.next;
261 while(next != &journal->j_bitmap_nodes) { 274 struct reiserfs_bitmap_node *bn;
262 bn = list_entry(next, struct reiserfs_bitmap_node, list) ; 275
263 list_del(next) ; 276 while (next != &journal->j_bitmap_nodes) {
264 reiserfs_kfree(bn->data, p_s_sb->s_blocksize, p_s_sb) ; 277 bn = list_entry(next, struct reiserfs_bitmap_node, list);
265 reiserfs_kfree(bn, sizeof(struct reiserfs_bitmap_node), p_s_sb) ; 278 list_del(next);
266 next = journal->j_bitmap_nodes.next ; 279 reiserfs_kfree(bn->data, p_s_sb->s_blocksize, p_s_sb);
267 journal->j_free_bitmap_nodes-- ; 280 reiserfs_kfree(bn, sizeof(struct reiserfs_bitmap_node), p_s_sb);
268 } 281 next = journal->j_bitmap_nodes.next;
269 282 journal->j_free_bitmap_nodes--;
270 return 0 ; 283 }
284
285 return 0;
271} 286}
272 287
273/* 288/*
@@ -275,59 +290,65 @@ static int free_bitmap_nodes(struct super_block *p_s_sb) {
275** jb_array is the array to be filled in. 290** jb_array is the array to be filled in.
276*/ 291*/
277int reiserfs_allocate_list_bitmaps(struct super_block *p_s_sb, 292int reiserfs_allocate_list_bitmaps(struct super_block *p_s_sb,
278 struct reiserfs_list_bitmap *jb_array, 293 struct reiserfs_list_bitmap *jb_array,
279 int bmap_nr) { 294 int bmap_nr)
280 int i ; 295{
281 int failed = 0 ; 296 int i;
282 struct reiserfs_list_bitmap *jb ; 297 int failed = 0;
283 int mem = bmap_nr * sizeof(struct reiserfs_bitmap_node *) ; 298 struct reiserfs_list_bitmap *jb;
284 299 int mem = bmap_nr * sizeof(struct reiserfs_bitmap_node *);
285 for (i = 0 ; i < JOURNAL_NUM_BITMAPS ; i++) { 300
286 jb = jb_array + i ; 301 for (i = 0; i < JOURNAL_NUM_BITMAPS; i++) {
287 jb->journal_list = NULL ; 302 jb = jb_array + i;
288 jb->bitmaps = vmalloc( mem ) ; 303 jb->journal_list = NULL;
289 if (!jb->bitmaps) { 304 jb->bitmaps = vmalloc(mem);
290 reiserfs_warning(p_s_sb, "clm-2000, unable to allocate bitmaps for journal lists") ; 305 if (!jb->bitmaps) {
291 failed = 1; 306 reiserfs_warning(p_s_sb,
292 break ; 307 "clm-2000, unable to allocate bitmaps for journal lists");
293 } 308 failed = 1;
294 memset(jb->bitmaps, 0, mem) ; 309 break;
295 } 310 }
296 if (failed) { 311 memset(jb->bitmaps, 0, mem);
297 free_list_bitmaps(p_s_sb, jb_array) ; 312 }
298 return -1 ; 313 if (failed) {
299 } 314 free_list_bitmaps(p_s_sb, jb_array);
300 return 0 ; 315 return -1;
316 }
317 return 0;
301} 318}
302 319
303/* 320/*
304** find an available list bitmap. If you can't find one, flush a commit list 321** find an available list bitmap. If you can't find one, flush a commit list
305** and try again 322** and try again
306*/ 323*/
307static struct reiserfs_list_bitmap * 324static struct reiserfs_list_bitmap *get_list_bitmap(struct super_block *p_s_sb,
308get_list_bitmap(struct super_block *p_s_sb, struct reiserfs_journal_list *jl) { 325 struct reiserfs_journal_list
309 int i,j ; 326 *jl)
310 struct reiserfs_journal *journal = SB_JOURNAL (p_s_sb); 327{
311 struct reiserfs_list_bitmap *jb = NULL ; 328 int i, j;
312 329 struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb);
313 for (j = 0 ; j < (JOURNAL_NUM_BITMAPS * 3) ; j++) { 330 struct reiserfs_list_bitmap *jb = NULL;
314 i = journal->j_list_bitmap_index ; 331
315 journal->j_list_bitmap_index = (i + 1) % JOURNAL_NUM_BITMAPS ; 332 for (j = 0; j < (JOURNAL_NUM_BITMAPS * 3); j++) {
316 jb = journal->j_list_bitmap + i ; 333 i = journal->j_list_bitmap_index;
317 if (journal->j_list_bitmap[i].journal_list) { 334 journal->j_list_bitmap_index = (i + 1) % JOURNAL_NUM_BITMAPS;
318 flush_commit_list(p_s_sb, journal->j_list_bitmap[i].journal_list, 1) ; 335 jb = journal->j_list_bitmap + i;
319 if (!journal->j_list_bitmap[i].journal_list) { 336 if (journal->j_list_bitmap[i].journal_list) {
320 break ; 337 flush_commit_list(p_s_sb,
321 } 338 journal->j_list_bitmap[i].
322 } else { 339 journal_list, 1);
323 break ; 340 if (!journal->j_list_bitmap[i].journal_list) {
324 } 341 break;
325 } 342 }
326 if (jb->journal_list) { /* double check to make sure if flushed correctly */ 343 } else {
327 return NULL ; 344 break;
328 } 345 }
329 jb->journal_list = jl ; 346 }
330 return jb ; 347 if (jb->journal_list) { /* double check to make sure if flushed correctly */
348 return NULL;
349 }
350 jb->journal_list = jl;
351 return jb;
331} 352}
332 353
333/* 354/*
@@ -335,104 +356,114 @@ get_list_bitmap(struct super_block *p_s_sb, struct reiserfs_journal_list *jl) {
335** Uses the cnode->next and cnode->prev pointers 356** Uses the cnode->next and cnode->prev pointers
336** returns NULL on failure 357** returns NULL on failure
337*/ 358*/
338static struct reiserfs_journal_cnode *allocate_cnodes(int num_cnodes) { 359static struct reiserfs_journal_cnode *allocate_cnodes(int num_cnodes)
339 struct reiserfs_journal_cnode *head ; 360{
340 int i ; 361 struct reiserfs_journal_cnode *head;
341 if (num_cnodes <= 0) { 362 int i;
342 return NULL ; 363 if (num_cnodes <= 0) {
343 } 364 return NULL;
344 head = vmalloc(num_cnodes * sizeof(struct reiserfs_journal_cnode)) ; 365 }
345 if (!head) { 366 head = vmalloc(num_cnodes * sizeof(struct reiserfs_journal_cnode));
346 return NULL ; 367 if (!head) {
347 } 368 return NULL;
348 memset(head, 0, num_cnodes * sizeof(struct reiserfs_journal_cnode)) ; 369 }
349 head[0].prev = NULL ; 370 memset(head, 0, num_cnodes * sizeof(struct reiserfs_journal_cnode));
350 head[0].next = head + 1 ; 371 head[0].prev = NULL;
351 for (i = 1 ; i < num_cnodes; i++) { 372 head[0].next = head + 1;
352 head[i].prev = head + (i - 1) ; 373 for (i = 1; i < num_cnodes; i++) {
353 head[i].next = head + (i + 1) ; /* if last one, overwrite it after the if */ 374 head[i].prev = head + (i - 1);
354 } 375 head[i].next = head + (i + 1); /* if last one, overwrite it after the if */
355 head[num_cnodes -1].next = NULL ; 376 }
356 return head ; 377 head[num_cnodes - 1].next = NULL;
378 return head;
357} 379}
358 380
359/* 381/*
360** pulls a cnode off the free list, or returns NULL on failure 382** pulls a cnode off the free list, or returns NULL on failure
361*/ 383*/
362static struct reiserfs_journal_cnode *get_cnode(struct super_block *p_s_sb) { 384static struct reiserfs_journal_cnode *get_cnode(struct super_block *p_s_sb)
363 struct reiserfs_journal_cnode *cn ; 385{
364 struct reiserfs_journal *journal = SB_JOURNAL (p_s_sb); 386 struct reiserfs_journal_cnode *cn;
365 387 struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb);
366 reiserfs_check_lock_depth(p_s_sb, "get_cnode") ; 388
367 389 reiserfs_check_lock_depth(p_s_sb, "get_cnode");
368 if (journal->j_cnode_free <= 0) { 390
369 return NULL ; 391 if (journal->j_cnode_free <= 0) {
370 } 392 return NULL;
371 journal->j_cnode_used++ ; 393 }
372 journal->j_cnode_free-- ; 394 journal->j_cnode_used++;
373 cn = journal->j_cnode_free_list ; 395 journal->j_cnode_free--;
374 if (!cn) { 396 cn = journal->j_cnode_free_list;
375 return cn ; 397 if (!cn) {
376 } 398 return cn;
377 if (cn->next) { 399 }
378 cn->next->prev = NULL ; 400 if (cn->next) {
379 } 401 cn->next->prev = NULL;
380 journal->j_cnode_free_list = cn->next ; 402 }
381 memset(cn, 0, sizeof(struct reiserfs_journal_cnode)) ; 403 journal->j_cnode_free_list = cn->next;
382 return cn ; 404 memset(cn, 0, sizeof(struct reiserfs_journal_cnode));
405 return cn;
383} 406}
384 407
385/* 408/*
386** returns a cnode to the free list 409** returns a cnode to the free list
387*/ 410*/
388static void free_cnode(struct super_block *p_s_sb, struct reiserfs_journal_cnode *cn) { 411static void free_cnode(struct super_block *p_s_sb,
389 struct reiserfs_journal *journal = SB_JOURNAL (p_s_sb); 412 struct reiserfs_journal_cnode *cn)
413{
414 struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb);
390 415
391 reiserfs_check_lock_depth(p_s_sb, "free_cnode") ; 416 reiserfs_check_lock_depth(p_s_sb, "free_cnode");
392 417
393 journal->j_cnode_used-- ; 418 journal->j_cnode_used--;
394 journal->j_cnode_free++ ; 419 journal->j_cnode_free++;
395 /* memset(cn, 0, sizeof(struct reiserfs_journal_cnode)) ; */ 420 /* memset(cn, 0, sizeof(struct reiserfs_journal_cnode)) ; */
396 cn->next = journal->j_cnode_free_list ; 421 cn->next = journal->j_cnode_free_list;
397 if (journal->j_cnode_free_list) { 422 if (journal->j_cnode_free_list) {
398 journal->j_cnode_free_list->prev = cn ; 423 journal->j_cnode_free_list->prev = cn;
399 } 424 }
400 cn->prev = NULL ; /* not needed with the memset, but I might kill the memset, and forget to do this */ 425 cn->prev = NULL; /* not needed with the memset, but I might kill the memset, and forget to do this */
401 journal->j_cnode_free_list = cn ; 426 journal->j_cnode_free_list = cn;
402} 427}
403 428
404static void clear_prepared_bits(struct buffer_head *bh) { 429static void clear_prepared_bits(struct buffer_head *bh)
405 clear_buffer_journal_prepared (bh); 430{
406 clear_buffer_journal_restore_dirty (bh); 431 clear_buffer_journal_prepared(bh);
432 clear_buffer_journal_restore_dirty(bh);
407} 433}
408 434
409/* utility function to force a BUG if it is called without the big 435/* utility function to force a BUG if it is called without the big
410** kernel lock held. caller is the string printed just before calling BUG() 436** kernel lock held. caller is the string printed just before calling BUG()
411*/ 437*/
412void reiserfs_check_lock_depth(struct super_block *sb, char *caller) { 438void reiserfs_check_lock_depth(struct super_block *sb, char *caller)
439{
413#ifdef CONFIG_SMP 440#ifdef CONFIG_SMP
414 if (current->lock_depth < 0) { 441 if (current->lock_depth < 0) {
415 reiserfs_panic (sb, "%s called without kernel lock held", caller) ; 442 reiserfs_panic(sb, "%s called without kernel lock held",
416 } 443 caller);
444 }
417#else 445#else
418 ; 446 ;
419#endif 447#endif
420} 448}
421 449
422/* return a cnode with same dev, block number and size in table, or null if not found */ 450/* return a cnode with same dev, block number and size in table, or null if not found */
423static inline struct reiserfs_journal_cnode * 451static inline struct reiserfs_journal_cnode *get_journal_hash_dev(struct
424get_journal_hash_dev(struct super_block *sb, 452 super_block
425 struct reiserfs_journal_cnode **table, 453 *sb,
426 long bl) 454 struct
455 reiserfs_journal_cnode
456 **table,
457 long bl)
427{ 458{
428 struct reiserfs_journal_cnode *cn ; 459 struct reiserfs_journal_cnode *cn;
429 cn = journal_hash(table, sb, bl) ; 460 cn = journal_hash(table, sb, bl);
430 while(cn) { 461 while (cn) {
431 if (cn->blocknr == bl && cn->sb == sb) 462 if (cn->blocknr == bl && cn->sb == sb)
432 return cn ; 463 return cn;
433 cn = cn->hnext ; 464 cn = cn->hnext;
434 } 465 }
435 return (struct reiserfs_journal_cnode *)0 ; 466 return (struct reiserfs_journal_cnode *)0;
436} 467}
437 468
438/* 469/*
@@ -454,91 +485,103 @@ get_journal_hash_dev(struct super_block *sb,
454** 485**
455*/ 486*/
456int reiserfs_in_journal(struct super_block *p_s_sb, 487int reiserfs_in_journal(struct super_block *p_s_sb,
457 int bmap_nr, int bit_nr, int search_all, 488 int bmap_nr, int bit_nr, int search_all,
458 b_blocknr_t *next_zero_bit) { 489 b_blocknr_t * next_zero_bit)
459 struct reiserfs_journal *journal = SB_JOURNAL (p_s_sb); 490{
460 struct reiserfs_journal_cnode *cn ; 491 struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb);
461 struct reiserfs_list_bitmap *jb ; 492 struct reiserfs_journal_cnode *cn;
462 int i ; 493 struct reiserfs_list_bitmap *jb;
463 unsigned long bl; 494 int i;
464 495 unsigned long bl;
465 *next_zero_bit = 0 ; /* always start this at zero. */ 496
466 497 *next_zero_bit = 0; /* always start this at zero. */
467 PROC_INFO_INC( p_s_sb, journal.in_journal ); 498
468 /* If we aren't doing a search_all, this is a metablock, and it will be logged before use. 499 PROC_INFO_INC(p_s_sb, journal.in_journal);
469 ** if we crash before the transaction that freed it commits, this transaction won't 500 /* If we aren't doing a search_all, this is a metablock, and it will be logged before use.
470 ** have committed either, and the block will never be written 501 ** if we crash before the transaction that freed it commits, this transaction won't
471 */ 502 ** have committed either, and the block will never be written
472 if (search_all) { 503 */
473 for (i = 0 ; i < JOURNAL_NUM_BITMAPS ; i++) { 504 if (search_all) {
474 PROC_INFO_INC( p_s_sb, journal.in_journal_bitmap ); 505 for (i = 0; i < JOURNAL_NUM_BITMAPS; i++) {
475 jb = journal->j_list_bitmap + i ; 506 PROC_INFO_INC(p_s_sb, journal.in_journal_bitmap);
476 if (jb->journal_list && jb->bitmaps[bmap_nr] && 507 jb = journal->j_list_bitmap + i;
477 test_bit(bit_nr, (unsigned long *)jb->bitmaps[bmap_nr]->data)) { 508 if (jb->journal_list && jb->bitmaps[bmap_nr] &&
478 *next_zero_bit = find_next_zero_bit((unsigned long *) 509 test_bit(bit_nr,
479 (jb->bitmaps[bmap_nr]->data), 510 (unsigned long *)jb->bitmaps[bmap_nr]->
480 p_s_sb->s_blocksize << 3, bit_nr+1) ; 511 data)) {
481 return 1 ; 512 *next_zero_bit =
482 } 513 find_next_zero_bit((unsigned long *)
483 } 514 (jb->bitmaps[bmap_nr]->
484 } 515 data),
485 516 p_s_sb->s_blocksize << 3,
486 bl = bmap_nr * (p_s_sb->s_blocksize << 3) + bit_nr; 517 bit_nr + 1);
487 /* is it in any old transactions? */ 518 return 1;
488 if (search_all && (cn = get_journal_hash_dev(p_s_sb, journal->j_list_hash_table, bl))) { 519 }
489 return 1; 520 }
490 } 521 }
491 522
492 /* is it in the current transaction. This should never happen */ 523 bl = bmap_nr * (p_s_sb->s_blocksize << 3) + bit_nr;
493 if ((cn = get_journal_hash_dev(p_s_sb, journal->j_hash_table, bl))) { 524 /* is it in any old transactions? */
494 BUG(); 525 if (search_all
495 return 1; 526 && (cn =
496 } 527 get_journal_hash_dev(p_s_sb, journal->j_list_hash_table, bl))) {
497 528 return 1;
498 PROC_INFO_INC( p_s_sb, journal.in_journal_reusable ); 529 }
499 /* safe for reuse */ 530
500 return 0 ; 531 /* is it in the current transaction. This should never happen */
532 if ((cn = get_journal_hash_dev(p_s_sb, journal->j_hash_table, bl))) {
533 BUG();
534 return 1;
535 }
536
537 PROC_INFO_INC(p_s_sb, journal.in_journal_reusable);
538 /* safe for reuse */
539 return 0;
501} 540}
502 541
503/* insert cn into table 542/* insert cn into table
504*/ 543*/
505static inline void insert_journal_hash(struct reiserfs_journal_cnode **table, struct reiserfs_journal_cnode *cn) { 544static inline void insert_journal_hash(struct reiserfs_journal_cnode **table,
506 struct reiserfs_journal_cnode *cn_orig ; 545 struct reiserfs_journal_cnode *cn)
546{
547 struct reiserfs_journal_cnode *cn_orig;
507 548
508 cn_orig = journal_hash(table, cn->sb, cn->blocknr) ; 549 cn_orig = journal_hash(table, cn->sb, cn->blocknr);
509 cn->hnext = cn_orig ; 550 cn->hnext = cn_orig;
510 cn->hprev = NULL ; 551 cn->hprev = NULL;
511 if (cn_orig) { 552 if (cn_orig) {
512 cn_orig->hprev = cn ; 553 cn_orig->hprev = cn;
513 } 554 }
514 journal_hash(table, cn->sb, cn->blocknr) = cn ; 555 journal_hash(table, cn->sb, cn->blocknr) = cn;
515} 556}
516 557
517/* lock the current transaction */ 558/* lock the current transaction */
518inline static void lock_journal(struct super_block *p_s_sb) { 559inline static void lock_journal(struct super_block *p_s_sb)
519 PROC_INFO_INC( p_s_sb, journal.lock_journal ); 560{
520 down(&SB_JOURNAL(p_s_sb)->j_lock); 561 PROC_INFO_INC(p_s_sb, journal.lock_journal);
562 down(&SB_JOURNAL(p_s_sb)->j_lock);
521} 563}
522 564
523/* unlock the current transaction */ 565/* unlock the current transaction */
524inline static void unlock_journal(struct super_block *p_s_sb) { 566inline static void unlock_journal(struct super_block *p_s_sb)
525 up(&SB_JOURNAL(p_s_sb)->j_lock); 567{
568 up(&SB_JOURNAL(p_s_sb)->j_lock);
526} 569}
527 570
528static inline void get_journal_list(struct reiserfs_journal_list *jl) 571static inline void get_journal_list(struct reiserfs_journal_list *jl)
529{ 572{
530 jl->j_refcount++; 573 jl->j_refcount++;
531} 574}
532 575
533static inline void put_journal_list(struct super_block *s, 576static inline void put_journal_list(struct super_block *s,
534 struct reiserfs_journal_list *jl) 577 struct reiserfs_journal_list *jl)
535{ 578{
536 if (jl->j_refcount < 1) { 579 if (jl->j_refcount < 1) {
537 reiserfs_panic (s, "trans id %lu, refcount at %d", jl->j_trans_id, 580 reiserfs_panic(s, "trans id %lu, refcount at %d",
538 jl->j_refcount); 581 jl->j_trans_id, jl->j_refcount);
539 } 582 }
540 if (--jl->j_refcount == 0) 583 if (--jl->j_refcount == 0)
541 reiserfs_kfree(jl, sizeof(struct reiserfs_journal_list), s); 584 reiserfs_kfree(jl, sizeof(struct reiserfs_journal_list), s);
542} 585}
543 586
544/* 587/*
@@ -546,358 +589,375 @@ static inline void put_journal_list(struct super_block *s,
546** it gets called by flush_commit_list, and cleans up any data stored about blocks freed during a 589** it gets called by flush_commit_list, and cleans up any data stored about blocks freed during a
547** transaction. 590** transaction.
548*/ 591*/
549static void cleanup_freed_for_journal_list(struct super_block *p_s_sb, struct reiserfs_journal_list *jl) { 592static void cleanup_freed_for_journal_list(struct super_block *p_s_sb,
593 struct reiserfs_journal_list *jl)
594{
550 595
551 struct reiserfs_list_bitmap *jb = jl->j_list_bitmap ; 596 struct reiserfs_list_bitmap *jb = jl->j_list_bitmap;
552 if (jb) { 597 if (jb) {
553 cleanup_bitmap_list(p_s_sb, jb) ; 598 cleanup_bitmap_list(p_s_sb, jb);
554 } 599 }
555 jl->j_list_bitmap->journal_list = NULL ; 600 jl->j_list_bitmap->journal_list = NULL;
556 jl->j_list_bitmap = NULL ; 601 jl->j_list_bitmap = NULL;
557} 602}
558 603
559static int journal_list_still_alive(struct super_block *s, 604static int journal_list_still_alive(struct super_block *s,
560 unsigned long trans_id) 605 unsigned long trans_id)
561{ 606{
562 struct reiserfs_journal *journal = SB_JOURNAL (s); 607 struct reiserfs_journal *journal = SB_JOURNAL(s);
563 struct list_head *entry = &journal->j_journal_list; 608 struct list_head *entry = &journal->j_journal_list;
564 struct reiserfs_journal_list *jl; 609 struct reiserfs_journal_list *jl;
565 610
566 if (!list_empty(entry)) { 611 if (!list_empty(entry)) {
567 jl = JOURNAL_LIST_ENTRY(entry->next); 612 jl = JOURNAL_LIST_ENTRY(entry->next);
568 if (jl->j_trans_id <= trans_id) { 613 if (jl->j_trans_id <= trans_id) {
569 return 1; 614 return 1;
570 } 615 }
571 } 616 }
572 return 0; 617 return 0;
573} 618}
574 619
575static void reiserfs_end_buffer_io_sync(struct buffer_head *bh, int uptodate) { 620static void reiserfs_end_buffer_io_sync(struct buffer_head *bh, int uptodate)
576 char b[BDEVNAME_SIZE]; 621{
577 622 char b[BDEVNAME_SIZE];
578 if (buffer_journaled(bh)) { 623
579 reiserfs_warning(NULL, "clm-2084: pinned buffer %lu:%s sent to disk", 624 if (buffer_journaled(bh)) {
580 bh->b_blocknr, bdevname(bh->b_bdev, b)) ; 625 reiserfs_warning(NULL,
581 } 626 "clm-2084: pinned buffer %lu:%s sent to disk",
582 if (uptodate) 627 bh->b_blocknr, bdevname(bh->b_bdev, b));
583 set_buffer_uptodate(bh) ; 628 }
584 else 629 if (uptodate)
585 clear_buffer_uptodate(bh) ; 630 set_buffer_uptodate(bh);
586 unlock_buffer(bh) ; 631 else
587 put_bh(bh) ; 632 clear_buffer_uptodate(bh);
588} 633 unlock_buffer(bh);
589 634 put_bh(bh);
590static void reiserfs_end_ordered_io(struct buffer_head *bh, int uptodate) { 635}
591 if (uptodate) 636
592 set_buffer_uptodate(bh) ; 637static void reiserfs_end_ordered_io(struct buffer_head *bh, int uptodate)
593 else 638{
594 clear_buffer_uptodate(bh) ; 639 if (uptodate)
595 unlock_buffer(bh) ; 640 set_buffer_uptodate(bh);
596 put_bh(bh) ; 641 else
597} 642 clear_buffer_uptodate(bh);
598 643 unlock_buffer(bh);
599static void submit_logged_buffer(struct buffer_head *bh) { 644 put_bh(bh);
600 get_bh(bh) ; 645}
601 bh->b_end_io = reiserfs_end_buffer_io_sync ; 646
602 clear_buffer_journal_new (bh); 647static void submit_logged_buffer(struct buffer_head *bh)
603 clear_buffer_dirty(bh) ; 648{
604 if (!test_clear_buffer_journal_test (bh)) 649 get_bh(bh);
605 BUG(); 650 bh->b_end_io = reiserfs_end_buffer_io_sync;
606 if (!buffer_uptodate(bh)) 651 clear_buffer_journal_new(bh);
607 BUG(); 652 clear_buffer_dirty(bh);
608 submit_bh(WRITE, bh) ; 653 if (!test_clear_buffer_journal_test(bh))
609} 654 BUG();
610 655 if (!buffer_uptodate(bh))
611static void submit_ordered_buffer(struct buffer_head *bh) { 656 BUG();
612 get_bh(bh) ; 657 submit_bh(WRITE, bh);
613 bh->b_end_io = reiserfs_end_ordered_io; 658}
614 clear_buffer_dirty(bh) ; 659
615 if (!buffer_uptodate(bh)) 660static void submit_ordered_buffer(struct buffer_head *bh)
616 BUG(); 661{
617 submit_bh(WRITE, bh) ; 662 get_bh(bh);
618} 663 bh->b_end_io = reiserfs_end_ordered_io;
619 664 clear_buffer_dirty(bh);
620static int submit_barrier_buffer(struct buffer_head *bh) { 665 if (!buffer_uptodate(bh))
621 get_bh(bh) ; 666 BUG();
622 bh->b_end_io = reiserfs_end_ordered_io; 667 submit_bh(WRITE, bh);
623 clear_buffer_dirty(bh) ; 668}
624 if (!buffer_uptodate(bh)) 669
625 BUG(); 670static int submit_barrier_buffer(struct buffer_head *bh)
626 return submit_bh(WRITE_BARRIER, bh) ; 671{
672 get_bh(bh);
673 bh->b_end_io = reiserfs_end_ordered_io;
674 clear_buffer_dirty(bh);
675 if (!buffer_uptodate(bh))
676 BUG();
677 return submit_bh(WRITE_BARRIER, bh);
627} 678}
628 679
629static void check_barrier_completion(struct super_block *s, 680static void check_barrier_completion(struct super_block *s,
630 struct buffer_head *bh) { 681 struct buffer_head *bh)
631 if (buffer_eopnotsupp(bh)) { 682{
632 clear_buffer_eopnotsupp(bh); 683 if (buffer_eopnotsupp(bh)) {
633 disable_barrier(s); 684 clear_buffer_eopnotsupp(bh);
634 set_buffer_uptodate(bh); 685 disable_barrier(s);
635 set_buffer_dirty(bh); 686 set_buffer_uptodate(bh);
636 sync_dirty_buffer(bh); 687 set_buffer_dirty(bh);
637 } 688 sync_dirty_buffer(bh);
689 }
638} 690}
639 691
640#define CHUNK_SIZE 32 692#define CHUNK_SIZE 32
641struct buffer_chunk { 693struct buffer_chunk {
642 struct buffer_head *bh[CHUNK_SIZE]; 694 struct buffer_head *bh[CHUNK_SIZE];
643 int nr; 695 int nr;
644}; 696};
645 697
646static void write_chunk(struct buffer_chunk *chunk) { 698static void write_chunk(struct buffer_chunk *chunk)
647 int i; 699{
648 get_fs_excl(); 700 int i;
649 for (i = 0; i < chunk->nr ; i++) { 701 get_fs_excl();
650 submit_logged_buffer(chunk->bh[i]) ; 702 for (i = 0; i < chunk->nr; i++) {
651 } 703 submit_logged_buffer(chunk->bh[i]);
652 chunk->nr = 0; 704 }
653 put_fs_excl(); 705 chunk->nr = 0;
706 put_fs_excl();
654} 707}
655 708
656static void write_ordered_chunk(struct buffer_chunk *chunk) { 709static void write_ordered_chunk(struct buffer_chunk *chunk)
657 int i; 710{
658 get_fs_excl(); 711 int i;
659 for (i = 0; i < chunk->nr ; i++) { 712 get_fs_excl();
660 submit_ordered_buffer(chunk->bh[i]) ; 713 for (i = 0; i < chunk->nr; i++) {
661 } 714 submit_ordered_buffer(chunk->bh[i]);
662 chunk->nr = 0; 715 }
663 put_fs_excl(); 716 chunk->nr = 0;
717 put_fs_excl();
664} 718}
665 719
666static int add_to_chunk(struct buffer_chunk *chunk, struct buffer_head *bh, 720static int add_to_chunk(struct buffer_chunk *chunk, struct buffer_head *bh,
667 spinlock_t *lock, 721 spinlock_t * lock, void (fn) (struct buffer_chunk *))
668 void (fn)(struct buffer_chunk *))
669{ 722{
670 int ret = 0; 723 int ret = 0;
671 if (chunk->nr >= CHUNK_SIZE) 724 if (chunk->nr >= CHUNK_SIZE)
672 BUG(); 725 BUG();
673 chunk->bh[chunk->nr++] = bh; 726 chunk->bh[chunk->nr++] = bh;
674 if (chunk->nr >= CHUNK_SIZE) { 727 if (chunk->nr >= CHUNK_SIZE) {
675 ret = 1; 728 ret = 1;
676 if (lock) 729 if (lock)
677 spin_unlock(lock); 730 spin_unlock(lock);
678 fn(chunk); 731 fn(chunk);
679 if (lock) 732 if (lock)
680 spin_lock(lock); 733 spin_lock(lock);
681 } 734 }
682 return ret; 735 return ret;
683} 736}
684 737
685
686static atomic_t nr_reiserfs_jh = ATOMIC_INIT(0); 738static atomic_t nr_reiserfs_jh = ATOMIC_INIT(0);
687static struct reiserfs_jh *alloc_jh(void) { 739static struct reiserfs_jh *alloc_jh(void)
688 struct reiserfs_jh *jh; 740{
689 while(1) { 741 struct reiserfs_jh *jh;
690 jh = kmalloc(sizeof(*jh), GFP_NOFS); 742 while (1) {
691 if (jh) { 743 jh = kmalloc(sizeof(*jh), GFP_NOFS);
692 atomic_inc(&nr_reiserfs_jh); 744 if (jh) {
693 return jh; 745 atomic_inc(&nr_reiserfs_jh);
746 return jh;
747 }
748 yield();
694 } 749 }
695 yield();
696 }
697} 750}
698 751
699/* 752/*
700 * we want to free the jh when the buffer has been written 753 * we want to free the jh when the buffer has been written
701 * and waited on 754 * and waited on
702 */ 755 */
703void reiserfs_free_jh(struct buffer_head *bh) { 756void reiserfs_free_jh(struct buffer_head *bh)
704 struct reiserfs_jh *jh; 757{
705 758 struct reiserfs_jh *jh;
706 jh = bh->b_private; 759
707 if (jh) { 760 jh = bh->b_private;
708 bh->b_private = NULL; 761 if (jh) {
709 jh->bh = NULL; 762 bh->b_private = NULL;
710 list_del_init(&jh->list); 763 jh->bh = NULL;
711 kfree(jh); 764 list_del_init(&jh->list);
712 if (atomic_read(&nr_reiserfs_jh) <= 0) 765 kfree(jh);
713 BUG(); 766 if (atomic_read(&nr_reiserfs_jh) <= 0)
714 atomic_dec(&nr_reiserfs_jh); 767 BUG();
715 put_bh(bh); 768 atomic_dec(&nr_reiserfs_jh);
716 } 769 put_bh(bh);
770 }
717} 771}
718 772
719static inline int __add_jh(struct reiserfs_journal *j, struct buffer_head *bh, 773static inline int __add_jh(struct reiserfs_journal *j, struct buffer_head *bh,
720 int tail) 774 int tail)
721{ 775{
722 struct reiserfs_jh *jh; 776 struct reiserfs_jh *jh;
723 777
724 if (bh->b_private) { 778 if (bh->b_private) {
725 spin_lock(&j->j_dirty_buffers_lock); 779 spin_lock(&j->j_dirty_buffers_lock);
726 if (!bh->b_private) { 780 if (!bh->b_private) {
727 spin_unlock(&j->j_dirty_buffers_lock); 781 spin_unlock(&j->j_dirty_buffers_lock);
728 goto no_jh; 782 goto no_jh;
783 }
784 jh = bh->b_private;
785 list_del_init(&jh->list);
786 } else {
787 no_jh:
788 get_bh(bh);
789 jh = alloc_jh();
790 spin_lock(&j->j_dirty_buffers_lock);
791 /* buffer must be locked for __add_jh, should be able to have
792 * two adds at the same time
793 */
794 if (bh->b_private)
795 BUG();
796 jh->bh = bh;
797 bh->b_private = jh;
729 } 798 }
730 jh = bh->b_private; 799 jh->jl = j->j_current_jl;
731 list_del_init(&jh->list); 800 if (tail)
732 } else { 801 list_add_tail(&jh->list, &jh->jl->j_tail_bh_list);
733no_jh: 802 else {
734 get_bh(bh); 803 list_add_tail(&jh->list, &jh->jl->j_bh_list);
735 jh = alloc_jh(); 804 }
736 spin_lock(&j->j_dirty_buffers_lock); 805 spin_unlock(&j->j_dirty_buffers_lock);
737 /* buffer must be locked for __add_jh, should be able to have 806 return 0;
738 * two adds at the same time
739 */
740 if (bh->b_private)
741 BUG();
742 jh->bh = bh;
743 bh->b_private = jh;
744 }
745 jh->jl = j->j_current_jl;
746 if (tail)
747 list_add_tail(&jh->list, &jh->jl->j_tail_bh_list);
748 else {
749 list_add_tail(&jh->list, &jh->jl->j_bh_list);
750 }
751 spin_unlock(&j->j_dirty_buffers_lock);
752 return 0;
753} 807}
754 808
755int reiserfs_add_tail_list(struct inode *inode, struct buffer_head *bh) { 809int reiserfs_add_tail_list(struct inode *inode, struct buffer_head *bh)
756 return __add_jh(SB_JOURNAL(inode->i_sb), bh, 1); 810{
811 return __add_jh(SB_JOURNAL(inode->i_sb), bh, 1);
757} 812}
758int reiserfs_add_ordered_list(struct inode *inode, struct buffer_head *bh) { 813int reiserfs_add_ordered_list(struct inode *inode, struct buffer_head *bh)
759 return __add_jh(SB_JOURNAL(inode->i_sb), bh, 0); 814{
815 return __add_jh(SB_JOURNAL(inode->i_sb), bh, 0);
760} 816}
761 817
762#define JH_ENTRY(l) list_entry((l), struct reiserfs_jh, list) 818#define JH_ENTRY(l) list_entry((l), struct reiserfs_jh, list)
763static int write_ordered_buffers(spinlock_t *lock, 819static int write_ordered_buffers(spinlock_t * lock,
764 struct reiserfs_journal *j, 820 struct reiserfs_journal *j,
765 struct reiserfs_journal_list *jl, 821 struct reiserfs_journal_list *jl,
766 struct list_head *list) 822 struct list_head *list)
767{ 823{
768 struct buffer_head *bh; 824 struct buffer_head *bh;
769 struct reiserfs_jh *jh; 825 struct reiserfs_jh *jh;
770 int ret = j->j_errno; 826 int ret = j->j_errno;
771 struct buffer_chunk chunk; 827 struct buffer_chunk chunk;
772 struct list_head tmp; 828 struct list_head tmp;
773 INIT_LIST_HEAD(&tmp); 829 INIT_LIST_HEAD(&tmp);
774 830
775 chunk.nr = 0; 831 chunk.nr = 0;
776 spin_lock(lock); 832 spin_lock(lock);
777 while(!list_empty(list)) { 833 while (!list_empty(list)) {
778 jh = JH_ENTRY(list->next); 834 jh = JH_ENTRY(list->next);
779 bh = jh->bh; 835 bh = jh->bh;
780 get_bh(bh); 836 get_bh(bh);
781 if (test_set_buffer_locked(bh)) { 837 if (test_set_buffer_locked(bh)) {
782 if (!buffer_dirty(bh)) { 838 if (!buffer_dirty(bh)) {
783 list_del_init(&jh->list); 839 list_del_init(&jh->list);
784 list_add(&jh->list, &tmp); 840 list_add(&jh->list, &tmp);
785 goto loop_next; 841 goto loop_next;
786 } 842 }
787 spin_unlock(lock); 843 spin_unlock(lock);
788 if (chunk.nr) 844 if (chunk.nr)
845 write_ordered_chunk(&chunk);
846 wait_on_buffer(bh);
847 cond_resched();
848 spin_lock(lock);
849 goto loop_next;
850 }
851 if (buffer_dirty(bh)) {
852 list_del_init(&jh->list);
853 list_add(&jh->list, &tmp);
854 add_to_chunk(&chunk, bh, lock, write_ordered_chunk);
855 } else {
856 reiserfs_free_jh(bh);
857 unlock_buffer(bh);
858 }
859 loop_next:
860 put_bh(bh);
861 cond_resched_lock(lock);
862 }
863 if (chunk.nr) {
864 spin_unlock(lock);
789 write_ordered_chunk(&chunk); 865 write_ordered_chunk(&chunk);
790 wait_on_buffer(bh); 866 spin_lock(lock);
791 cond_resched();
792 spin_lock(lock);
793 goto loop_next;
794 }
795 if (buffer_dirty(bh)) {
796 list_del_init(&jh->list);
797 list_add(&jh->list, &tmp);
798 add_to_chunk(&chunk, bh, lock, write_ordered_chunk);
799 } else {
800 reiserfs_free_jh(bh);
801 unlock_buffer(bh);
802 } 867 }
803loop_next: 868 while (!list_empty(&tmp)) {
804 put_bh(bh); 869 jh = JH_ENTRY(tmp.prev);
805 cond_resched_lock(lock); 870 bh = jh->bh;
806 } 871 get_bh(bh);
807 if (chunk.nr) { 872 reiserfs_free_jh(bh);
808 spin_unlock(lock); 873
809 write_ordered_chunk(&chunk); 874 if (buffer_locked(bh)) {
810 spin_lock(lock); 875 spin_unlock(lock);
811 } 876 wait_on_buffer(bh);
812 while(!list_empty(&tmp)) { 877 spin_lock(lock);
813 jh = JH_ENTRY(tmp.prev); 878 }
814 bh = jh->bh; 879 if (!buffer_uptodate(bh)) {
815 get_bh(bh); 880 ret = -EIO;
816 reiserfs_free_jh(bh); 881 }
817 882 put_bh(bh);
818 if (buffer_locked(bh)) { 883 cond_resched_lock(lock);
819 spin_unlock(lock);
820 wait_on_buffer(bh);
821 spin_lock(lock);
822 } 884 }
823 if (!buffer_uptodate(bh)) { 885 spin_unlock(lock);
824 ret = -EIO; 886 return ret;
825 } 887}
826 put_bh(bh);
827 cond_resched_lock(lock);
828 }
829 spin_unlock(lock);
830 return ret;
831}
832
833static int flush_older_commits(struct super_block *s, struct reiserfs_journal_list *jl) {
834 struct reiserfs_journal *journal = SB_JOURNAL (s);
835 struct reiserfs_journal_list *other_jl;
836 struct reiserfs_journal_list *first_jl;
837 struct list_head *entry;
838 unsigned long trans_id = jl->j_trans_id;
839 unsigned long other_trans_id;
840 unsigned long first_trans_id;
841
842find_first:
843 /*
844 * first we walk backwards to find the oldest uncommitted transation
845 */
846 first_jl = jl;
847 entry = jl->j_list.prev;
848 while(1) {
849 other_jl = JOURNAL_LIST_ENTRY(entry);
850 if (entry == &journal->j_journal_list ||
851 atomic_read(&other_jl->j_older_commits_done))
852 break;
853
854 first_jl = other_jl;
855 entry = other_jl->j_list.prev;
856 }
857
858 /* if we didn't find any older uncommitted transactions, return now */
859 if (first_jl == jl) {
860 return 0;
861 }
862
863 first_trans_id = first_jl->j_trans_id;
864 888
865 entry = &first_jl->j_list; 889static int flush_older_commits(struct super_block *s,
866 while(1) { 890 struct reiserfs_journal_list *jl)
867 other_jl = JOURNAL_LIST_ENTRY(entry); 891{
868 other_trans_id = other_jl->j_trans_id; 892 struct reiserfs_journal *journal = SB_JOURNAL(s);
893 struct reiserfs_journal_list *other_jl;
894 struct reiserfs_journal_list *first_jl;
895 struct list_head *entry;
896 unsigned long trans_id = jl->j_trans_id;
897 unsigned long other_trans_id;
898 unsigned long first_trans_id;
899
900 find_first:
901 /*
902 * first we walk backwards to find the oldest uncommitted transation
903 */
904 first_jl = jl;
905 entry = jl->j_list.prev;
906 while (1) {
907 other_jl = JOURNAL_LIST_ENTRY(entry);
908 if (entry == &journal->j_journal_list ||
909 atomic_read(&other_jl->j_older_commits_done))
910 break;
869 911
870 if (other_trans_id < trans_id) { 912 first_jl = other_jl;
871 if (atomic_read(&other_jl->j_commit_left) != 0) { 913 entry = other_jl->j_list.prev;
872 flush_commit_list(s, other_jl, 0); 914 }
873 915
874 /* list we were called with is gone, return */ 916 /* if we didn't find any older uncommitted transactions, return now */
875 if (!journal_list_still_alive(s, trans_id)) 917 if (first_jl == jl) {
876 return 1; 918 return 0;
919 }
877 920
878 /* the one we just flushed is gone, this means all 921 first_trans_id = first_jl->j_trans_id;
879 * older lists are also gone, so first_jl is no longer 922
880 * valid either. Go back to the beginning. 923 entry = &first_jl->j_list;
881 */ 924 while (1) {
882 if (!journal_list_still_alive(s, other_trans_id)) { 925 other_jl = JOURNAL_LIST_ENTRY(entry);
883 goto find_first; 926 other_trans_id = other_jl->j_trans_id;
927
928 if (other_trans_id < trans_id) {
929 if (atomic_read(&other_jl->j_commit_left) != 0) {
930 flush_commit_list(s, other_jl, 0);
931
932 /* list we were called with is gone, return */
933 if (!journal_list_still_alive(s, trans_id))
934 return 1;
935
936 /* the one we just flushed is gone, this means all
937 * older lists are also gone, so first_jl is no longer
938 * valid either. Go back to the beginning.
939 */
940 if (!journal_list_still_alive
941 (s, other_trans_id)) {
942 goto find_first;
943 }
944 }
945 entry = entry->next;
946 if (entry == &journal->j_journal_list)
947 return 0;
948 } else {
949 return 0;
884 } 950 }
885 }
886 entry = entry->next;
887 if (entry == &journal->j_journal_list)
888 return 0;
889 } else {
890 return 0;
891 } 951 }
892 } 952 return 0;
893 return 0;
894} 953}
895int reiserfs_async_progress_wait(struct super_block *s) { 954int reiserfs_async_progress_wait(struct super_block *s)
896 DEFINE_WAIT(wait); 955{
897 struct reiserfs_journal *j = SB_JOURNAL(s); 956 DEFINE_WAIT(wait);
898 if (atomic_read(&j->j_async_throttle)) 957 struct reiserfs_journal *j = SB_JOURNAL(s);
899 blk_congestion_wait(WRITE, HZ/10); 958 if (atomic_read(&j->j_async_throttle))
900 return 0; 959 blk_congestion_wait(WRITE, HZ / 10);
960 return 0;
901} 961}
902 962
903/* 963/*
@@ -907,212 +967,225 @@ int reiserfs_async_progress_wait(struct super_block *s) {
907** Before the commit block can by written, every other log block must be safely on disk 967** Before the commit block can by written, every other log block must be safely on disk
908** 968**
909*/ 969*/
910static int flush_commit_list(struct super_block *s, struct reiserfs_journal_list *jl, int flushall) { 970static int flush_commit_list(struct super_block *s,
911 int i; 971 struct reiserfs_journal_list *jl, int flushall)
912 int bn ; 972{
913 struct buffer_head *tbh = NULL ; 973 int i;
914 unsigned long trans_id = jl->j_trans_id; 974 int bn;
915 struct reiserfs_journal *journal = SB_JOURNAL (s); 975 struct buffer_head *tbh = NULL;
916 int barrier = 0; 976 unsigned long trans_id = jl->j_trans_id;
917 int retval = 0; 977 struct reiserfs_journal *journal = SB_JOURNAL(s);
918 978 int barrier = 0;
919 reiserfs_check_lock_depth(s, "flush_commit_list") ; 979 int retval = 0;
920 980
921 if (atomic_read(&jl->j_older_commits_done)) { 981 reiserfs_check_lock_depth(s, "flush_commit_list");
922 return 0 ; 982
923 } 983 if (atomic_read(&jl->j_older_commits_done)) {
924 984 return 0;
925 get_fs_excl(); 985 }
926 986
927 /* before we can put our commit blocks on disk, we have to make sure everyone older than 987 get_fs_excl();
928 ** us is on disk too 988
929 */ 989 /* before we can put our commit blocks on disk, we have to make sure everyone older than
930 BUG_ON (jl->j_len <= 0); 990 ** us is on disk too
931 BUG_ON (trans_id == journal->j_trans_id); 991 */
932 992 BUG_ON(jl->j_len <= 0);
933 get_journal_list(jl); 993 BUG_ON(trans_id == journal->j_trans_id);
934 if (flushall) { 994
935 if (flush_older_commits(s, jl) == 1) { 995 get_journal_list(jl);
936 /* list disappeared during flush_older_commits. return */ 996 if (flushall) {
937 goto put_jl; 997 if (flush_older_commits(s, jl) == 1) {
938 } 998 /* list disappeared during flush_older_commits. return */
939 } 999 goto put_jl;
940 1000 }
941 /* make sure nobody is trying to flush this one at the same time */ 1001 }
942 down(&jl->j_commit_lock); 1002
943 if (!journal_list_still_alive(s, trans_id)) { 1003 /* make sure nobody is trying to flush this one at the same time */
944 up(&jl->j_commit_lock); 1004 down(&jl->j_commit_lock);
945 goto put_jl; 1005 if (!journal_list_still_alive(s, trans_id)) {
946 } 1006 up(&jl->j_commit_lock);
947 BUG_ON (jl->j_trans_id == 0); 1007 goto put_jl;
948 1008 }
949 /* this commit is done, exit */ 1009 BUG_ON(jl->j_trans_id == 0);
950 if (atomic_read(&(jl->j_commit_left)) <= 0) { 1010
951 if (flushall) { 1011 /* this commit is done, exit */
952 atomic_set(&(jl->j_older_commits_done), 1) ; 1012 if (atomic_read(&(jl->j_commit_left)) <= 0) {
953 } 1013 if (flushall) {
954 up(&jl->j_commit_lock); 1014 atomic_set(&(jl->j_older_commits_done), 1);
955 goto put_jl; 1015 }
956 } 1016 up(&jl->j_commit_lock);
957 1017 goto put_jl;
958 if (!list_empty(&jl->j_bh_list)) { 1018 }
959 unlock_kernel(); 1019
960 write_ordered_buffers(&journal->j_dirty_buffers_lock, 1020 if (!list_empty(&jl->j_bh_list)) {
961 journal, jl, &jl->j_bh_list); 1021 unlock_kernel();
962 lock_kernel(); 1022 write_ordered_buffers(&journal->j_dirty_buffers_lock,
963 } 1023 journal, jl, &jl->j_bh_list);
964 BUG_ON (!list_empty(&jl->j_bh_list)); 1024 lock_kernel();
965 /* 1025 }
966 * for the description block and all the log blocks, submit any buffers 1026 BUG_ON(!list_empty(&jl->j_bh_list));
967 * that haven't already reached the disk 1027 /*
968 */ 1028 * for the description block and all the log blocks, submit any buffers
969 atomic_inc(&journal->j_async_throttle); 1029 * that haven't already reached the disk
970 for (i = 0 ; i < (jl->j_len + 1) ; i++) { 1030 */
971 bn = SB_ONDISK_JOURNAL_1st_BLOCK(s) + (jl->j_start+i) % 1031 atomic_inc(&journal->j_async_throttle);
972 SB_ONDISK_JOURNAL_SIZE(s); 1032 for (i = 0; i < (jl->j_len + 1); i++) {
973 tbh = journal_find_get_block(s, bn) ; 1033 bn = SB_ONDISK_JOURNAL_1st_BLOCK(s) + (jl->j_start + i) %
974 if (buffer_dirty(tbh)) /* redundant, ll_rw_block() checks */ 1034 SB_ONDISK_JOURNAL_SIZE(s);
975 ll_rw_block(WRITE, 1, &tbh) ; 1035 tbh = journal_find_get_block(s, bn);
976 put_bh(tbh) ; 1036 if (buffer_dirty(tbh)) /* redundant, ll_rw_block() checks */
977 } 1037 ll_rw_block(WRITE, 1, &tbh);
978 atomic_dec(&journal->j_async_throttle); 1038 put_bh(tbh);
979 1039 }
980 /* wait on everything written so far before writing the commit 1040 atomic_dec(&journal->j_async_throttle);
981 * if we are in barrier mode, send the commit down now 1041
982 */ 1042 /* wait on everything written so far before writing the commit
983 barrier = reiserfs_barrier_flush(s); 1043 * if we are in barrier mode, send the commit down now
984 if (barrier) { 1044 */
985 int ret; 1045 barrier = reiserfs_barrier_flush(s);
986 lock_buffer(jl->j_commit_bh); 1046 if (barrier) {
987 ret = submit_barrier_buffer(jl->j_commit_bh); 1047 int ret;
988 if (ret == -EOPNOTSUPP) { 1048 lock_buffer(jl->j_commit_bh);
989 set_buffer_uptodate(jl->j_commit_bh); 1049 ret = submit_barrier_buffer(jl->j_commit_bh);
990 disable_barrier(s); 1050 if (ret == -EOPNOTSUPP) {
991 barrier = 0; 1051 set_buffer_uptodate(jl->j_commit_bh);
992 } 1052 disable_barrier(s);
993 } 1053 barrier = 0;
994 for (i = 0 ; i < (jl->j_len + 1) ; i++) { 1054 }
995 bn = SB_ONDISK_JOURNAL_1st_BLOCK(s) + 1055 }
996 (jl->j_start + i) % SB_ONDISK_JOURNAL_SIZE(s) ; 1056 for (i = 0; i < (jl->j_len + 1); i++) {
997 tbh = journal_find_get_block(s, bn) ; 1057 bn = SB_ONDISK_JOURNAL_1st_BLOCK(s) +
998 wait_on_buffer(tbh) ; 1058 (jl->j_start + i) % SB_ONDISK_JOURNAL_SIZE(s);
999 // since we're using ll_rw_blk above, it might have skipped over 1059 tbh = journal_find_get_block(s, bn);
1000 // a locked buffer. Double check here 1060 wait_on_buffer(tbh);
1001 // 1061 // since we're using ll_rw_blk above, it might have skipped over
1002 if (buffer_dirty(tbh)) /* redundant, sync_dirty_buffer() checks */ 1062 // a locked buffer. Double check here
1003 sync_dirty_buffer(tbh); 1063 //
1004 if (unlikely (!buffer_uptodate(tbh))) { 1064 if (buffer_dirty(tbh)) /* redundant, sync_dirty_buffer() checks */
1065 sync_dirty_buffer(tbh);
1066 if (unlikely(!buffer_uptodate(tbh))) {
1005#ifdef CONFIG_REISERFS_CHECK 1067#ifdef CONFIG_REISERFS_CHECK
1006 reiserfs_warning(s, "journal-601, buffer write failed") ; 1068 reiserfs_warning(s, "journal-601, buffer write failed");
1007#endif 1069#endif
1008 retval = -EIO; 1070 retval = -EIO;
1009 } 1071 }
1010 put_bh(tbh) ; /* once for journal_find_get_block */ 1072 put_bh(tbh); /* once for journal_find_get_block */
1011 put_bh(tbh) ; /* once due to original getblk in do_journal_end */ 1073 put_bh(tbh); /* once due to original getblk in do_journal_end */
1012 atomic_dec(&(jl->j_commit_left)) ; 1074 atomic_dec(&(jl->j_commit_left));
1013 } 1075 }
1014 1076
1015 BUG_ON (atomic_read(&(jl->j_commit_left)) != 1); 1077 BUG_ON(atomic_read(&(jl->j_commit_left)) != 1);
1016 1078
1017 if (!barrier) { 1079 if (!barrier) {
1018 if (buffer_dirty(jl->j_commit_bh)) 1080 if (buffer_dirty(jl->j_commit_bh))
1019 BUG(); 1081 BUG();
1020 mark_buffer_dirty(jl->j_commit_bh) ; 1082 mark_buffer_dirty(jl->j_commit_bh);
1021 sync_dirty_buffer(jl->j_commit_bh) ; 1083 sync_dirty_buffer(jl->j_commit_bh);
1022 } else 1084 } else
1023 wait_on_buffer(jl->j_commit_bh); 1085 wait_on_buffer(jl->j_commit_bh);
1024 1086
1025 check_barrier_completion(s, jl->j_commit_bh); 1087 check_barrier_completion(s, jl->j_commit_bh);
1026 1088
1027 /* If there was a write error in the journal - we can't commit this 1089 /* If there was a write error in the journal - we can't commit this
1028 * transaction - it will be invalid and, if successful, will just end 1090 * transaction - it will be invalid and, if successful, will just end
1029 * up propogating the write error out to the filesystem. */ 1091 * up propogating the write error out to the filesystem. */
1030 if (unlikely (!buffer_uptodate(jl->j_commit_bh))) { 1092 if (unlikely(!buffer_uptodate(jl->j_commit_bh))) {
1031#ifdef CONFIG_REISERFS_CHECK 1093#ifdef CONFIG_REISERFS_CHECK
1032 reiserfs_warning(s, "journal-615: buffer write failed") ; 1094 reiserfs_warning(s, "journal-615: buffer write failed");
1033#endif 1095#endif
1034 retval = -EIO; 1096 retval = -EIO;
1035 } 1097 }
1036 bforget(jl->j_commit_bh) ; 1098 bforget(jl->j_commit_bh);
1037 if (journal->j_last_commit_id != 0 && 1099 if (journal->j_last_commit_id != 0 &&
1038 (jl->j_trans_id - journal->j_last_commit_id) != 1) { 1100 (jl->j_trans_id - journal->j_last_commit_id) != 1) {
1039 reiserfs_warning(s, "clm-2200: last commit %lu, current %lu", 1101 reiserfs_warning(s, "clm-2200: last commit %lu, current %lu",
1040 journal->j_last_commit_id, 1102 journal->j_last_commit_id, jl->j_trans_id);
1041 jl->j_trans_id); 1103 }
1042 } 1104 journal->j_last_commit_id = jl->j_trans_id;
1043 journal->j_last_commit_id = jl->j_trans_id; 1105
1044 1106 /* now, every commit block is on the disk. It is safe to allow blocks freed during this transaction to be reallocated */
1045 /* now, every commit block is on the disk. It is safe to allow blocks freed during this transaction to be reallocated */ 1107 cleanup_freed_for_journal_list(s, jl);
1046 cleanup_freed_for_journal_list(s, jl) ; 1108
1047 1109 retval = retval ? retval : journal->j_errno;
1048 retval = retval ? retval : journal->j_errno; 1110
1049 1111 /* mark the metadata dirty */
1050 /* mark the metadata dirty */ 1112 if (!retval)
1051 if (!retval) 1113 dirty_one_transaction(s, jl);
1052 dirty_one_transaction(s, jl); 1114 atomic_dec(&(jl->j_commit_left));
1053 atomic_dec(&(jl->j_commit_left)) ; 1115
1054 1116 if (flushall) {
1055 if (flushall) { 1117 atomic_set(&(jl->j_older_commits_done), 1);
1056 atomic_set(&(jl->j_older_commits_done), 1) ; 1118 }
1057 } 1119 up(&jl->j_commit_lock);
1058 up(&jl->j_commit_lock); 1120 put_jl:
1059put_jl: 1121 put_journal_list(s, jl);
1060 put_journal_list(s, jl); 1122
1061 1123 if (retval)
1062 if (retval) 1124 reiserfs_abort(s, retval, "Journal write error in %s",
1063 reiserfs_abort (s, retval, "Journal write error in %s", __FUNCTION__); 1125 __FUNCTION__);
1064 put_fs_excl(); 1126 put_fs_excl();
1065 return retval; 1127 return retval;
1066} 1128}
1067 1129
1068/* 1130/*
1069** flush_journal_list frequently needs to find a newer transaction for a given block. This does that, or 1131** flush_journal_list frequently needs to find a newer transaction for a given block. This does that, or
1070** returns NULL if it can't find anything 1132** returns NULL if it can't find anything
1071*/ 1133*/
1072static struct reiserfs_journal_list *find_newer_jl_for_cn(struct reiserfs_journal_cnode *cn) { 1134static struct reiserfs_journal_list *find_newer_jl_for_cn(struct
1073 struct super_block *sb = cn->sb; 1135 reiserfs_journal_cnode
1074 b_blocknr_t blocknr = cn->blocknr ; 1136 *cn)
1137{
1138 struct super_block *sb = cn->sb;
1139 b_blocknr_t blocknr = cn->blocknr;
1075 1140
1076 cn = cn->hprev ; 1141 cn = cn->hprev;
1077 while(cn) { 1142 while (cn) {
1078 if (cn->sb == sb && cn->blocknr == blocknr && cn->jlist) { 1143 if (cn->sb == sb && cn->blocknr == blocknr && cn->jlist) {
1079 return cn->jlist ; 1144 return cn->jlist;
1080 } 1145 }
1081 cn = cn->hprev ; 1146 cn = cn->hprev;
1082 } 1147 }
1083 return NULL ; 1148 return NULL;
1084} 1149}
1085 1150
1086static void remove_journal_hash(struct super_block *, struct reiserfs_journal_cnode **, 1151static void remove_journal_hash(struct super_block *,
1087struct reiserfs_journal_list *, unsigned long, int); 1152 struct reiserfs_journal_cnode **,
1153 struct reiserfs_journal_list *, unsigned long,
1154 int);
1088 1155
1089/* 1156/*
1090** once all the real blocks have been flushed, it is safe to remove them from the 1157** once all the real blocks have been flushed, it is safe to remove them from the
1091** journal list for this transaction. Aside from freeing the cnode, this also allows the 1158** journal list for this transaction. Aside from freeing the cnode, this also allows the
1092** block to be reallocated for data blocks if it had been deleted. 1159** block to be reallocated for data blocks if it had been deleted.
1093*/ 1160*/
1094static void remove_all_from_journal_list(struct super_block *p_s_sb, struct reiserfs_journal_list *jl, int debug) { 1161static void remove_all_from_journal_list(struct super_block *p_s_sb,
1095 struct reiserfs_journal *journal = SB_JOURNAL (p_s_sb); 1162 struct reiserfs_journal_list *jl,
1096 struct reiserfs_journal_cnode *cn, *last ; 1163 int debug)
1097 cn = jl->j_realblock ; 1164{
1098 1165 struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb);
1099 /* which is better, to lock once around the whole loop, or 1166 struct reiserfs_journal_cnode *cn, *last;
1100 ** to lock for each call to remove_journal_hash? 1167 cn = jl->j_realblock;
1101 */ 1168
1102 while(cn) { 1169 /* which is better, to lock once around the whole loop, or
1103 if (cn->blocknr != 0) { 1170 ** to lock for each call to remove_journal_hash?
1104 if (debug) { 1171 */
1105 reiserfs_warning (p_s_sb, "block %u, bh is %d, state %ld", cn->blocknr, 1172 while (cn) {
1106 cn->bh ? 1: 0, cn->state) ; 1173 if (cn->blocknr != 0) {
1107 } 1174 if (debug) {
1108 cn->state = 0 ; 1175 reiserfs_warning(p_s_sb,
1109 remove_journal_hash(p_s_sb, journal->j_list_hash_table, jl, cn->blocknr, 1) ; 1176 "block %u, bh is %d, state %ld",
1110 } 1177 cn->blocknr, cn->bh ? 1 : 0,
1111 last = cn ; 1178 cn->state);
1112 cn = cn->next ; 1179 }
1113 free_cnode(p_s_sb, last) ; 1180 cn->state = 0;
1114 } 1181 remove_journal_hash(p_s_sb, journal->j_list_hash_table,
1115 jl->j_realblock = NULL ; 1182 jl, cn->blocknr, 1);
1183 }
1184 last = cn;
1185 cn = cn->next;
1186 free_cnode(p_s_sb, last);
1187 }
1188 jl->j_realblock = NULL;
1116} 1189}
1117 1190
1118/* 1191/*
@@ -1122,98 +1195,107 @@ static void remove_all_from_journal_list(struct super_block *p_s_sb, struct reis
1122** called by flush_journal_list, before it calls remove_all_from_journal_list 1195** called by flush_journal_list, before it calls remove_all_from_journal_list
1123** 1196**
1124*/ 1197*/
1125static int _update_journal_header_block(struct super_block *p_s_sb, unsigned long offset, unsigned long trans_id) { 1198static int _update_journal_header_block(struct super_block *p_s_sb,
1126 struct reiserfs_journal_header *jh ; 1199 unsigned long offset,
1127 struct reiserfs_journal *journal = SB_JOURNAL (p_s_sb); 1200 unsigned long trans_id)
1201{
1202 struct reiserfs_journal_header *jh;
1203 struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb);
1128 1204
1129 if (reiserfs_is_journal_aborted (journal)) 1205 if (reiserfs_is_journal_aborted(journal))
1130 return -EIO; 1206 return -EIO;
1131 1207
1132 if (trans_id >= journal->j_last_flush_trans_id) { 1208 if (trans_id >= journal->j_last_flush_trans_id) {
1133 if (buffer_locked((journal->j_header_bh))) { 1209 if (buffer_locked((journal->j_header_bh))) {
1134 wait_on_buffer((journal->j_header_bh)) ; 1210 wait_on_buffer((journal->j_header_bh));
1135 if (unlikely (!buffer_uptodate(journal->j_header_bh))) { 1211 if (unlikely(!buffer_uptodate(journal->j_header_bh))) {
1136#ifdef CONFIG_REISERFS_CHECK 1212#ifdef CONFIG_REISERFS_CHECK
1137 reiserfs_warning (p_s_sb, "journal-699: buffer write failed") ; 1213 reiserfs_warning(p_s_sb,
1214 "journal-699: buffer write failed");
1138#endif 1215#endif
1139 return -EIO; 1216 return -EIO;
1140 } 1217 }
1141 } 1218 }
1142 journal->j_last_flush_trans_id = trans_id ; 1219 journal->j_last_flush_trans_id = trans_id;
1143 journal->j_first_unflushed_offset = offset ; 1220 journal->j_first_unflushed_offset = offset;
1144 jh = (struct reiserfs_journal_header *)(journal->j_header_bh->b_data) ; 1221 jh = (struct reiserfs_journal_header *)(journal->j_header_bh->
1145 jh->j_last_flush_trans_id = cpu_to_le32(trans_id) ; 1222 b_data);
1146 jh->j_first_unflushed_offset = cpu_to_le32(offset) ; 1223 jh->j_last_flush_trans_id = cpu_to_le32(trans_id);
1147 jh->j_mount_id = cpu_to_le32(journal->j_mount_id) ; 1224 jh->j_first_unflushed_offset = cpu_to_le32(offset);
1148 1225 jh->j_mount_id = cpu_to_le32(journal->j_mount_id);
1149 if (reiserfs_barrier_flush(p_s_sb)) { 1226
1150 int ret; 1227 if (reiserfs_barrier_flush(p_s_sb)) {
1151 lock_buffer(journal->j_header_bh); 1228 int ret;
1152 ret = submit_barrier_buffer(journal->j_header_bh); 1229 lock_buffer(journal->j_header_bh);
1153 if (ret == -EOPNOTSUPP) { 1230 ret = submit_barrier_buffer(journal->j_header_bh);
1154 set_buffer_uptodate(journal->j_header_bh); 1231 if (ret == -EOPNOTSUPP) {
1155 disable_barrier(p_s_sb); 1232 set_buffer_uptodate(journal->j_header_bh);
1156 goto sync; 1233 disable_barrier(p_s_sb);
1157 } 1234 goto sync;
1158 wait_on_buffer(journal->j_header_bh); 1235 }
1159 check_barrier_completion(p_s_sb, journal->j_header_bh); 1236 wait_on_buffer(journal->j_header_bh);
1160 } else { 1237 check_barrier_completion(p_s_sb, journal->j_header_bh);
1161sync: 1238 } else {
1162 set_buffer_dirty(journal->j_header_bh) ; 1239 sync:
1163 sync_dirty_buffer(journal->j_header_bh) ; 1240 set_buffer_dirty(journal->j_header_bh);
1164 } 1241 sync_dirty_buffer(journal->j_header_bh);
1165 if (!buffer_uptodate(journal->j_header_bh)) { 1242 }
1166 reiserfs_warning (p_s_sb, "journal-837: IO error during journal replay"); 1243 if (!buffer_uptodate(journal->j_header_bh)) {
1167 return -EIO ; 1244 reiserfs_warning(p_s_sb,
1168 } 1245 "journal-837: IO error during journal replay");
1169 } 1246 return -EIO;
1170 return 0 ; 1247 }
1171} 1248 }
1172 1249 return 0;
1173static int update_journal_header_block(struct super_block *p_s_sb,
1174 unsigned long offset,
1175 unsigned long trans_id) {
1176 return _update_journal_header_block(p_s_sb, offset, trans_id);
1177} 1250}
1251
1252static int update_journal_header_block(struct super_block *p_s_sb,
1253 unsigned long offset,
1254 unsigned long trans_id)
1255{
1256 return _update_journal_header_block(p_s_sb, offset, trans_id);
1257}
1258
1178/* 1259/*
1179** flush any and all journal lists older than you are 1260** flush any and all journal lists older than you are
1180** can only be called from flush_journal_list 1261** can only be called from flush_journal_list
1181*/ 1262*/
1182static int flush_older_journal_lists(struct super_block *p_s_sb, 1263static int flush_older_journal_lists(struct super_block *p_s_sb,
1183 struct reiserfs_journal_list *jl) 1264 struct reiserfs_journal_list *jl)
1184{ 1265{
1185 struct list_head *entry; 1266 struct list_head *entry;
1186 struct reiserfs_journal_list *other_jl ; 1267 struct reiserfs_journal_list *other_jl;
1187 struct reiserfs_journal *journal = SB_JOURNAL (p_s_sb); 1268 struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb);
1188 unsigned long trans_id = jl->j_trans_id; 1269 unsigned long trans_id = jl->j_trans_id;
1189 1270
1190 /* we know we are the only ones flushing things, no extra race 1271 /* we know we are the only ones flushing things, no extra race
1191 * protection is required. 1272 * protection is required.
1192 */ 1273 */
1193restart: 1274 restart:
1194 entry = journal->j_journal_list.next; 1275 entry = journal->j_journal_list.next;
1195 /* Did we wrap? */ 1276 /* Did we wrap? */
1196 if (entry == &journal->j_journal_list) 1277 if (entry == &journal->j_journal_list)
1197 return 0; 1278 return 0;
1198 other_jl = JOURNAL_LIST_ENTRY(entry); 1279 other_jl = JOURNAL_LIST_ENTRY(entry);
1199 if (other_jl->j_trans_id < trans_id) { 1280 if (other_jl->j_trans_id < trans_id) {
1200 BUG_ON (other_jl->j_refcount <= 0); 1281 BUG_ON(other_jl->j_refcount <= 0);
1201 /* do not flush all */ 1282 /* do not flush all */
1202 flush_journal_list(p_s_sb, other_jl, 0) ; 1283 flush_journal_list(p_s_sb, other_jl, 0);
1203 1284
1204 /* other_jl is now deleted from the list */ 1285 /* other_jl is now deleted from the list */
1205 goto restart; 1286 goto restart;
1206 } 1287 }
1207 return 0 ; 1288 return 0;
1208} 1289}
1209 1290
1210static void del_from_work_list(struct super_block *s, 1291static void del_from_work_list(struct super_block *s,
1211 struct reiserfs_journal_list *jl) { 1292 struct reiserfs_journal_list *jl)
1212 struct reiserfs_journal *journal = SB_JOURNAL (s); 1293{
1213 if (!list_empty(&jl->j_working_list)) { 1294 struct reiserfs_journal *journal = SB_JOURNAL(s);
1214 list_del_init(&jl->j_working_list); 1295 if (!list_empty(&jl->j_working_list)) {
1215 journal->j_num_work_lists--; 1296 list_del_init(&jl->j_working_list);
1216 } 1297 journal->j_num_work_lists--;
1298 }
1217} 1299}
1218 1300
1219/* flush a journal list, both commit and real blocks 1301/* flush a journal list, both commit and real blocks
@@ -1225,386 +1307,407 @@ static void del_from_work_list(struct super_block *s,
1225** and the journal is locked. That means it can only be called from 1307** and the journal is locked. That means it can only be called from
1226** do_journal_end, or by journal_release 1308** do_journal_end, or by journal_release
1227*/ 1309*/
1228static int flush_journal_list(struct super_block *s, 1310static int flush_journal_list(struct super_block *s,
1229 struct reiserfs_journal_list *jl, int flushall) { 1311 struct reiserfs_journal_list *jl, int flushall)
1230 struct reiserfs_journal_list *pjl ; 1312{
1231 struct reiserfs_journal_cnode *cn, *last ; 1313 struct reiserfs_journal_list *pjl;
1232 int count ; 1314 struct reiserfs_journal_cnode *cn, *last;
1233 int was_jwait = 0 ; 1315 int count;
1234 int was_dirty = 0 ; 1316 int was_jwait = 0;
1235 struct buffer_head *saved_bh ; 1317 int was_dirty = 0;
1236 unsigned long j_len_saved = jl->j_len ; 1318 struct buffer_head *saved_bh;
1237 struct reiserfs_journal *journal = SB_JOURNAL (s); 1319 unsigned long j_len_saved = jl->j_len;
1238 int err = 0; 1320 struct reiserfs_journal *journal = SB_JOURNAL(s);
1239 1321 int err = 0;
1240 BUG_ON (j_len_saved <= 0); 1322
1241 1323 BUG_ON(j_len_saved <= 0);
1242 if (atomic_read(&journal->j_wcount) != 0) { 1324
1243 reiserfs_warning(s, "clm-2048: flush_journal_list called with wcount %d", 1325 if (atomic_read(&journal->j_wcount) != 0) {
1244 atomic_read(&journal->j_wcount)) ; 1326 reiserfs_warning(s,
1245 } 1327 "clm-2048: flush_journal_list called with wcount %d",
1246 BUG_ON (jl->j_trans_id == 0); 1328 atomic_read(&journal->j_wcount));
1247 1329 }
1248 /* if flushall == 0, the lock is already held */ 1330 BUG_ON(jl->j_trans_id == 0);
1249 if (flushall) { 1331
1250 down(&journal->j_flush_sem); 1332 /* if flushall == 0, the lock is already held */
1251 } else if (!down_trylock(&journal->j_flush_sem)) { 1333 if (flushall) {
1252 BUG(); 1334 down(&journal->j_flush_sem);
1253 } 1335 } else if (!down_trylock(&journal->j_flush_sem)) {
1254 1336 BUG();
1255 count = 0 ; 1337 }
1256 if (j_len_saved > journal->j_trans_max) { 1338
1257 reiserfs_panic(s, "journal-715: flush_journal_list, length is %lu, trans id %lu\n", j_len_saved, jl->j_trans_id); 1339 count = 0;
1258 return 0 ; 1340 if (j_len_saved > journal->j_trans_max) {
1259 } 1341 reiserfs_panic(s,
1260 1342 "journal-715: flush_journal_list, length is %lu, trans id %lu\n",
1261 get_fs_excl(); 1343 j_len_saved, jl->j_trans_id);
1262 1344 return 0;
1263 /* if all the work is already done, get out of here */ 1345 }
1264 if (atomic_read(&(jl->j_nonzerolen)) <= 0 && 1346
1265 atomic_read(&(jl->j_commit_left)) <= 0) { 1347 get_fs_excl();
1266 goto flush_older_and_return ; 1348
1267 } 1349 /* if all the work is already done, get out of here */
1268 1350 if (atomic_read(&(jl->j_nonzerolen)) <= 0 &&
1269 /* start by putting the commit list on disk. This will also flush 1351 atomic_read(&(jl->j_commit_left)) <= 0) {
1270 ** the commit lists of any olders transactions 1352 goto flush_older_and_return;
1271 */ 1353 }
1272 flush_commit_list(s, jl, 1) ; 1354
1273 1355 /* start by putting the commit list on disk. This will also flush
1274 if (!(jl->j_state & LIST_DIRTY) && !reiserfs_is_journal_aborted (journal)) 1356 ** the commit lists of any olders transactions
1275 BUG(); 1357 */
1276 1358 flush_commit_list(s, jl, 1);
1277 /* are we done now? */ 1359
1278 if (atomic_read(&(jl->j_nonzerolen)) <= 0 && 1360 if (!(jl->j_state & LIST_DIRTY)
1279 atomic_read(&(jl->j_commit_left)) <= 0) { 1361 && !reiserfs_is_journal_aborted(journal))
1280 goto flush_older_and_return ; 1362 BUG();
1281 } 1363
1282 1364 /* are we done now? */
1283 /* loop through each cnode, see if we need to write it, 1365 if (atomic_read(&(jl->j_nonzerolen)) <= 0 &&
1284 ** or wait on a more recent transaction, or just ignore it 1366 atomic_read(&(jl->j_commit_left)) <= 0) {
1285 */ 1367 goto flush_older_and_return;
1286 if (atomic_read(&(journal->j_wcount)) != 0) { 1368 }
1287 reiserfs_panic(s, "journal-844: panic journal list is flushing, wcount is not 0\n") ; 1369
1288 } 1370 /* loop through each cnode, see if we need to write it,
1289 cn = jl->j_realblock ; 1371 ** or wait on a more recent transaction, or just ignore it
1290 while(cn) { 1372 */
1291 was_jwait = 0 ; 1373 if (atomic_read(&(journal->j_wcount)) != 0) {
1292 was_dirty = 0 ; 1374 reiserfs_panic(s,
1293 saved_bh = NULL ; 1375 "journal-844: panic journal list is flushing, wcount is not 0\n");
1294 /* blocknr of 0 is no longer in the hash, ignore it */ 1376 }
1295 if (cn->blocknr == 0) { 1377 cn = jl->j_realblock;
1296 goto free_cnode ; 1378 while (cn) {
1297 } 1379 was_jwait = 0;
1298 1380 was_dirty = 0;
1299 /* This transaction failed commit. Don't write out to the disk */ 1381 saved_bh = NULL;
1300 if (!(jl->j_state & LIST_DIRTY)) 1382 /* blocknr of 0 is no longer in the hash, ignore it */
1301 goto free_cnode; 1383 if (cn->blocknr == 0) {
1302 1384 goto free_cnode;
1303 pjl = find_newer_jl_for_cn(cn) ; 1385 }
1304 /* the order is important here. We check pjl to make sure we 1386
1305 ** don't clear BH_JDirty_wait if we aren't the one writing this 1387 /* This transaction failed commit. Don't write out to the disk */
1306 ** block to disk 1388 if (!(jl->j_state & LIST_DIRTY))
1307 */ 1389 goto free_cnode;
1308 if (!pjl && cn->bh) { 1390
1309 saved_bh = cn->bh ; 1391 pjl = find_newer_jl_for_cn(cn);
1310 1392 /* the order is important here. We check pjl to make sure we
1311 /* we do this to make sure nobody releases the buffer while 1393 ** don't clear BH_JDirty_wait if we aren't the one writing this
1312 ** we are working with it 1394 ** block to disk
1313 */ 1395 */
1314 get_bh(saved_bh) ; 1396 if (!pjl && cn->bh) {
1315 1397 saved_bh = cn->bh;
1316 if (buffer_journal_dirty(saved_bh)) { 1398
1317 BUG_ON (!can_dirty (cn)); 1399 /* we do this to make sure nobody releases the buffer while
1318 was_jwait = 1 ; 1400 ** we are working with it
1319 was_dirty = 1 ; 1401 */
1320 } else if (can_dirty(cn)) { 1402 get_bh(saved_bh);
1321 /* everything with !pjl && jwait should be writable */ 1403
1322 BUG(); 1404 if (buffer_journal_dirty(saved_bh)) {
1323 } 1405 BUG_ON(!can_dirty(cn));
1324 } 1406 was_jwait = 1;
1325 1407 was_dirty = 1;
1326 /* if someone has this block in a newer transaction, just make 1408 } else if (can_dirty(cn)) {
1327 ** sure they are commited, and don't try writing it to disk 1409 /* everything with !pjl && jwait should be writable */
1328 */ 1410 BUG();
1329 if (pjl) { 1411 }
1330 if (atomic_read(&pjl->j_commit_left)) 1412 }
1331 flush_commit_list(s, pjl, 1) ; 1413
1332 goto free_cnode ; 1414 /* if someone has this block in a newer transaction, just make
1333 } 1415 ** sure they are commited, and don't try writing it to disk
1334 1416 */
1335 /* bh == NULL when the block got to disk on its own, OR, 1417 if (pjl) {
1336 ** the block got freed in a future transaction 1418 if (atomic_read(&pjl->j_commit_left))
1337 */ 1419 flush_commit_list(s, pjl, 1);
1338 if (saved_bh == NULL) { 1420 goto free_cnode;
1339 goto free_cnode ; 1421 }
1340 } 1422
1341 1423 /* bh == NULL when the block got to disk on its own, OR,
1342 /* this should never happen. kupdate_one_transaction has this list 1424 ** the block got freed in a future transaction
1343 ** locked while it works, so we should never see a buffer here that 1425 */
1344 ** is not marked JDirty_wait 1426 if (saved_bh == NULL) {
1345 */ 1427 goto free_cnode;
1346 if ((!was_jwait) && !buffer_locked(saved_bh)) { 1428 }
1347 reiserfs_warning (s, "journal-813: BAD! buffer %llu %cdirty %cjwait, " 1429
1348 "not in a newer tranasction", 1430 /* this should never happen. kupdate_one_transaction has this list
1349 (unsigned long long)saved_bh->b_blocknr, 1431 ** locked while it works, so we should never see a buffer here that
1350 was_dirty ? ' ' : '!', was_jwait ? ' ' : '!') ; 1432 ** is not marked JDirty_wait
1351 } 1433 */
1352 if (was_dirty) { 1434 if ((!was_jwait) && !buffer_locked(saved_bh)) {
1353 /* we inc again because saved_bh gets decremented at free_cnode */ 1435 reiserfs_warning(s,
1354 get_bh(saved_bh) ; 1436 "journal-813: BAD! buffer %llu %cdirty %cjwait, "
1355 set_bit(BLOCK_NEEDS_FLUSH, &cn->state) ; 1437 "not in a newer tranasction",
1356 lock_buffer(saved_bh); 1438 (unsigned long long)saved_bh->
1357 BUG_ON (cn->blocknr != saved_bh->b_blocknr); 1439 b_blocknr, was_dirty ? ' ' : '!',
1358 if (buffer_dirty(saved_bh)) 1440 was_jwait ? ' ' : '!');
1359 submit_logged_buffer(saved_bh) ; 1441 }
1360 else 1442 if (was_dirty) {
1361 unlock_buffer(saved_bh); 1443 /* we inc again because saved_bh gets decremented at free_cnode */
1362 count++ ; 1444 get_bh(saved_bh);
1363 } else { 1445 set_bit(BLOCK_NEEDS_FLUSH, &cn->state);
1364 reiserfs_warning (s, "clm-2082: Unable to flush buffer %llu in %s", 1446 lock_buffer(saved_bh);
1365 (unsigned long long)saved_bh->b_blocknr, __FUNCTION__); 1447 BUG_ON(cn->blocknr != saved_bh->b_blocknr);
1366 } 1448 if (buffer_dirty(saved_bh))
1367free_cnode: 1449 submit_logged_buffer(saved_bh);
1368 last = cn ; 1450 else
1369 cn = cn->next ; 1451 unlock_buffer(saved_bh);
1370 if (saved_bh) { 1452 count++;
1371 /* we incremented this to keep others from taking the buffer head away */ 1453 } else {
1372 put_bh(saved_bh) ; 1454 reiserfs_warning(s,
1373 if (atomic_read(&(saved_bh->b_count)) < 0) { 1455 "clm-2082: Unable to flush buffer %llu in %s",
1374 reiserfs_warning (s, "journal-945: saved_bh->b_count < 0"); 1456 (unsigned long long)saved_bh->
1375 } 1457 b_blocknr, __FUNCTION__);
1376 } 1458 }
1377 } 1459 free_cnode:
1378 if (count > 0) { 1460 last = cn;
1379 cn = jl->j_realblock ; 1461 cn = cn->next;
1380 while(cn) { 1462 if (saved_bh) {
1381 if (test_bit(BLOCK_NEEDS_FLUSH, &cn->state)) { 1463 /* we incremented this to keep others from taking the buffer head away */
1382 if (!cn->bh) { 1464 put_bh(saved_bh);
1383 reiserfs_panic(s, "journal-1011: cn->bh is NULL\n") ; 1465 if (atomic_read(&(saved_bh->b_count)) < 0) {
1384 } 1466 reiserfs_warning(s,
1385 wait_on_buffer(cn->bh) ; 1467 "journal-945: saved_bh->b_count < 0");
1386 if (!cn->bh) { 1468 }
1387 reiserfs_panic(s, "journal-1012: cn->bh is NULL\n") ; 1469 }
1388 } 1470 }
1389 if (unlikely (!buffer_uptodate(cn->bh))) { 1471 if (count > 0) {
1472 cn = jl->j_realblock;
1473 while (cn) {
1474 if (test_bit(BLOCK_NEEDS_FLUSH, &cn->state)) {
1475 if (!cn->bh) {
1476 reiserfs_panic(s,
1477 "journal-1011: cn->bh is NULL\n");
1478 }
1479 wait_on_buffer(cn->bh);
1480 if (!cn->bh) {
1481 reiserfs_panic(s,
1482 "journal-1012: cn->bh is NULL\n");
1483 }
1484 if (unlikely(!buffer_uptodate(cn->bh))) {
1390#ifdef CONFIG_REISERFS_CHECK 1485#ifdef CONFIG_REISERFS_CHECK
1391 reiserfs_warning(s, "journal-949: buffer write failed\n") ; 1486 reiserfs_warning(s,
1487 "journal-949: buffer write failed\n");
1392#endif 1488#endif
1393 err = -EIO; 1489 err = -EIO;
1394 } 1490 }
1395 /* note, we must clear the JDirty_wait bit after the up to date 1491 /* note, we must clear the JDirty_wait bit after the up to date
1396 ** check, otherwise we race against our flushpage routine 1492 ** check, otherwise we race against our flushpage routine
1397 */ 1493 */
1398 BUG_ON (!test_clear_buffer_journal_dirty (cn->bh)); 1494 BUG_ON(!test_clear_buffer_journal_dirty
1399 1495 (cn->bh));
1400 /* undo the inc from journal_mark_dirty */ 1496
1401 put_bh(cn->bh) ; 1497 /* undo the inc from journal_mark_dirty */
1402 brelse(cn->bh) ; 1498 put_bh(cn->bh);
1403 } 1499 brelse(cn->bh);
1404 cn = cn->next ; 1500 }
1405 } 1501 cn = cn->next;
1406 } 1502 }
1407 1503 }
1408 if (err) 1504
1409 reiserfs_abort (s, -EIO, "Write error while pushing transaction to disk in %s", __FUNCTION__); 1505 if (err)
1410flush_older_and_return: 1506 reiserfs_abort(s, -EIO,
1411 1507 "Write error while pushing transaction to disk in %s",
1412 1508 __FUNCTION__);
1413 /* before we can update the journal header block, we _must_ flush all 1509 flush_older_and_return:
1414 ** real blocks from all older transactions to disk. This is because 1510
1415 ** once the header block is updated, this transaction will not be 1511 /* before we can update the journal header block, we _must_ flush all
1416 ** replayed after a crash 1512 ** real blocks from all older transactions to disk. This is because
1417 */ 1513 ** once the header block is updated, this transaction will not be
1418 if (flushall) { 1514 ** replayed after a crash
1419 flush_older_journal_lists(s, jl); 1515 */
1420 } 1516 if (flushall) {
1421 1517 flush_older_journal_lists(s, jl);
1422 err = journal->j_errno; 1518 }
1423 /* before we can remove everything from the hash tables for this 1519
1424 ** transaction, we must make sure it can never be replayed 1520 err = journal->j_errno;
1425 ** 1521 /* before we can remove everything from the hash tables for this
1426 ** since we are only called from do_journal_end, we know for sure there 1522 ** transaction, we must make sure it can never be replayed
1427 ** are no allocations going on while we are flushing journal lists. So, 1523 **
1428 ** we only need to update the journal header block for the last list 1524 ** since we are only called from do_journal_end, we know for sure there
1429 ** being flushed 1525 ** are no allocations going on while we are flushing journal lists. So,
1430 */ 1526 ** we only need to update the journal header block for the last list
1431 if (!err && flushall) { 1527 ** being flushed
1432 err = update_journal_header_block(s, (jl->j_start + jl->j_len + 2) % SB_ONDISK_JOURNAL_SIZE(s), jl->j_trans_id) ; 1528 */
1433 if (err) 1529 if (!err && flushall) {
1434 reiserfs_abort (s, -EIO, "Write error while updating journal header in %s", __FUNCTION__); 1530 err =
1435 } 1531 update_journal_header_block(s,
1436 remove_all_from_journal_list(s, jl, 0) ; 1532 (jl->j_start + jl->j_len +
1437 list_del_init(&jl->j_list); 1533 2) % SB_ONDISK_JOURNAL_SIZE(s),
1438 journal->j_num_lists--; 1534 jl->j_trans_id);
1439 del_from_work_list(s, jl); 1535 if (err)
1440 1536 reiserfs_abort(s, -EIO,
1441 if (journal->j_last_flush_id != 0 && 1537 "Write error while updating journal header in %s",
1442 (jl->j_trans_id - journal->j_last_flush_id) != 1) { 1538 __FUNCTION__);
1443 reiserfs_warning(s, "clm-2201: last flush %lu, current %lu", 1539 }
1444 journal->j_last_flush_id, 1540 remove_all_from_journal_list(s, jl, 0);
1445 jl->j_trans_id); 1541 list_del_init(&jl->j_list);
1446 } 1542 journal->j_num_lists--;
1447 journal->j_last_flush_id = jl->j_trans_id; 1543 del_from_work_list(s, jl);
1448 1544
1449 /* not strictly required since we are freeing the list, but it should 1545 if (journal->j_last_flush_id != 0 &&
1450 * help find code using dead lists later on 1546 (jl->j_trans_id - journal->j_last_flush_id) != 1) {
1451 */ 1547 reiserfs_warning(s, "clm-2201: last flush %lu, current %lu",
1452 jl->j_len = 0 ; 1548 journal->j_last_flush_id, jl->j_trans_id);
1453 atomic_set(&(jl->j_nonzerolen), 0) ; 1549 }
1454 jl->j_start = 0 ; 1550 journal->j_last_flush_id = jl->j_trans_id;
1455 jl->j_realblock = NULL ; 1551
1456 jl->j_commit_bh = NULL ; 1552 /* not strictly required since we are freeing the list, but it should
1457 jl->j_trans_id = 0 ; 1553 * help find code using dead lists later on
1458 jl->j_state = 0; 1554 */
1459 put_journal_list(s, jl); 1555 jl->j_len = 0;
1460 if (flushall) 1556 atomic_set(&(jl->j_nonzerolen), 0);
1461 up(&journal->j_flush_sem); 1557 jl->j_start = 0;
1462 put_fs_excl(); 1558 jl->j_realblock = NULL;
1463 return err ; 1559 jl->j_commit_bh = NULL;
1464} 1560 jl->j_trans_id = 0;
1561 jl->j_state = 0;
1562 put_journal_list(s, jl);
1563 if (flushall)
1564 up(&journal->j_flush_sem);
1565 put_fs_excl();
1566 return err;
1567}
1465 1568
1466static int write_one_transaction(struct super_block *s, 1569static int write_one_transaction(struct super_block *s,
1467 struct reiserfs_journal_list *jl, 1570 struct reiserfs_journal_list *jl,
1468 struct buffer_chunk *chunk) 1571 struct buffer_chunk *chunk)
1469{ 1572{
1470 struct reiserfs_journal_cnode *cn; 1573 struct reiserfs_journal_cnode *cn;
1471 int ret = 0 ; 1574 int ret = 0;
1472 1575
1473 jl->j_state |= LIST_TOUCHED; 1576 jl->j_state |= LIST_TOUCHED;
1474 del_from_work_list(s, jl); 1577 del_from_work_list(s, jl);
1475 if (jl->j_len == 0 || atomic_read(&jl->j_nonzerolen) == 0) { 1578 if (jl->j_len == 0 || atomic_read(&jl->j_nonzerolen) == 0) {
1476 return 0; 1579 return 0;
1477 } 1580 }
1478 1581
1479 cn = jl->j_realblock ; 1582 cn = jl->j_realblock;
1480 while(cn) { 1583 while (cn) {
1481 /* if the blocknr == 0, this has been cleared from the hash, 1584 /* if the blocknr == 0, this has been cleared from the hash,
1482 ** skip it 1585 ** skip it
1483 */ 1586 */
1484 if (cn->blocknr == 0) { 1587 if (cn->blocknr == 0) {
1485 goto next ; 1588 goto next;
1486 } 1589 }
1487 if (cn->bh && can_dirty(cn) && buffer_dirty(cn->bh)) { 1590 if (cn->bh && can_dirty(cn) && buffer_dirty(cn->bh)) {
1488 struct buffer_head *tmp_bh; 1591 struct buffer_head *tmp_bh;
1489 /* we can race against journal_mark_freed when we try 1592 /* we can race against journal_mark_freed when we try
1490 * to lock_buffer(cn->bh), so we have to inc the buffer 1593 * to lock_buffer(cn->bh), so we have to inc the buffer
1491 * count, and recheck things after locking 1594 * count, and recheck things after locking
1492 */ 1595 */
1493 tmp_bh = cn->bh; 1596 tmp_bh = cn->bh;
1494 get_bh(tmp_bh); 1597 get_bh(tmp_bh);
1495 lock_buffer(tmp_bh); 1598 lock_buffer(tmp_bh);
1496 if (cn->bh && can_dirty(cn) && buffer_dirty(tmp_bh)) { 1599 if (cn->bh && can_dirty(cn) && buffer_dirty(tmp_bh)) {
1497 if (!buffer_journal_dirty(tmp_bh) || 1600 if (!buffer_journal_dirty(tmp_bh) ||
1498 buffer_journal_prepared(tmp_bh)) 1601 buffer_journal_prepared(tmp_bh))
1499 BUG(); 1602 BUG();
1500 add_to_chunk(chunk, tmp_bh, NULL, write_chunk); 1603 add_to_chunk(chunk, tmp_bh, NULL, write_chunk);
1501 ret++; 1604 ret++;
1502 } else { 1605 } else {
1503 /* note, cn->bh might be null now */ 1606 /* note, cn->bh might be null now */
1504 unlock_buffer(tmp_bh); 1607 unlock_buffer(tmp_bh);
1505 } 1608 }
1506 put_bh(tmp_bh); 1609 put_bh(tmp_bh);
1507 } 1610 }
1508next: 1611 next:
1509 cn = cn->next ; 1612 cn = cn->next;
1510 cond_resched(); 1613 cond_resched();
1511 } 1614 }
1512 return ret ; 1615 return ret;
1513} 1616}
1514 1617
1515/* used by flush_commit_list */ 1618/* used by flush_commit_list */
1516static int dirty_one_transaction(struct super_block *s, 1619static int dirty_one_transaction(struct super_block *s,
1517 struct reiserfs_journal_list *jl) 1620 struct reiserfs_journal_list *jl)
1518{ 1621{
1519 struct reiserfs_journal_cnode *cn; 1622 struct reiserfs_journal_cnode *cn;
1520 struct reiserfs_journal_list *pjl; 1623 struct reiserfs_journal_list *pjl;
1521 int ret = 0 ; 1624 int ret = 0;
1522 1625
1523 jl->j_state |= LIST_DIRTY; 1626 jl->j_state |= LIST_DIRTY;
1524 cn = jl->j_realblock ; 1627 cn = jl->j_realblock;
1525 while(cn) { 1628 while (cn) {
1526 /* look for a more recent transaction that logged this 1629 /* look for a more recent transaction that logged this
1527 ** buffer. Only the most recent transaction with a buffer in 1630 ** buffer. Only the most recent transaction with a buffer in
1528 ** it is allowed to send that buffer to disk 1631 ** it is allowed to send that buffer to disk
1529 */ 1632 */
1530 pjl = find_newer_jl_for_cn(cn) ; 1633 pjl = find_newer_jl_for_cn(cn);
1531 if (!pjl && cn->blocknr && cn->bh && buffer_journal_dirty(cn->bh)) 1634 if (!pjl && cn->blocknr && cn->bh
1532 { 1635 && buffer_journal_dirty(cn->bh)) {
1533 BUG_ON (!can_dirty(cn)); 1636 BUG_ON(!can_dirty(cn));
1534 /* if the buffer is prepared, it will either be logged 1637 /* if the buffer is prepared, it will either be logged
1535 * or restored. If restored, we need to make sure 1638 * or restored. If restored, we need to make sure
1536 * it actually gets marked dirty 1639 * it actually gets marked dirty
1537 */ 1640 */
1538 clear_buffer_journal_new (cn->bh); 1641 clear_buffer_journal_new(cn->bh);
1539 if (buffer_journal_prepared (cn->bh)) { 1642 if (buffer_journal_prepared(cn->bh)) {
1540 set_buffer_journal_restore_dirty (cn->bh); 1643 set_buffer_journal_restore_dirty(cn->bh);
1541 } else { 1644 } else {
1542 set_buffer_journal_test (cn->bh); 1645 set_buffer_journal_test(cn->bh);
1543 mark_buffer_dirty(cn->bh); 1646 mark_buffer_dirty(cn->bh);
1544 } 1647 }
1545 } 1648 }
1546 cn = cn->next ; 1649 cn = cn->next;
1547 } 1650 }
1548 return ret ; 1651 return ret;
1549} 1652}
1550 1653
1551static int kupdate_transactions(struct super_block *s, 1654static int kupdate_transactions(struct super_block *s,
1552 struct reiserfs_journal_list *jl, 1655 struct reiserfs_journal_list *jl,
1553 struct reiserfs_journal_list **next_jl, 1656 struct reiserfs_journal_list **next_jl,
1554 unsigned long *next_trans_id, 1657 unsigned long *next_trans_id,
1555 int num_blocks, 1658 int num_blocks, int num_trans)
1556 int num_trans) { 1659{
1557 int ret = 0; 1660 int ret = 0;
1558 int written = 0 ; 1661 int written = 0;
1559 int transactions_flushed = 0; 1662 int transactions_flushed = 0;
1560 unsigned long orig_trans_id = jl->j_trans_id; 1663 unsigned long orig_trans_id = jl->j_trans_id;
1561 struct buffer_chunk chunk; 1664 struct buffer_chunk chunk;
1562 struct list_head *entry; 1665 struct list_head *entry;
1563 struct reiserfs_journal *journal = SB_JOURNAL (s); 1666 struct reiserfs_journal *journal = SB_JOURNAL(s);
1564 chunk.nr = 0; 1667 chunk.nr = 0;
1565 1668
1566 down(&journal->j_flush_sem); 1669 down(&journal->j_flush_sem);
1567 if (!journal_list_still_alive(s, orig_trans_id)) { 1670 if (!journal_list_still_alive(s, orig_trans_id)) {
1568 goto done; 1671 goto done;
1569 } 1672 }
1570 1673
1571 /* we've got j_flush_sem held, nobody is going to delete any 1674 /* we've got j_flush_sem held, nobody is going to delete any
1572 * of these lists out from underneath us 1675 * of these lists out from underneath us
1573 */ 1676 */
1574 while((num_trans && transactions_flushed < num_trans) || 1677 while ((num_trans && transactions_flushed < num_trans) ||
1575 (!num_trans && written < num_blocks)) { 1678 (!num_trans && written < num_blocks)) {
1576 1679
1577 if (jl->j_len == 0 || (jl->j_state & LIST_TOUCHED) || 1680 if (jl->j_len == 0 || (jl->j_state & LIST_TOUCHED) ||
1578 atomic_read(&jl->j_commit_left) || !(jl->j_state & LIST_DIRTY)) 1681 atomic_read(&jl->j_commit_left)
1579 { 1682 || !(jl->j_state & LIST_DIRTY)) {
1580 del_from_work_list(s, jl); 1683 del_from_work_list(s, jl);
1581 break; 1684 break;
1582 } 1685 }
1583 ret = write_one_transaction(s, jl, &chunk); 1686 ret = write_one_transaction(s, jl, &chunk);
1584 1687
1585 if (ret < 0) 1688 if (ret < 0)
1586 goto done; 1689 goto done;
1587 transactions_flushed++; 1690 transactions_flushed++;
1588 written += ret; 1691 written += ret;
1589 entry = jl->j_list.next; 1692 entry = jl->j_list.next;
1590 1693
1591 /* did we wrap? */ 1694 /* did we wrap? */
1592 if (entry == &journal->j_journal_list) { 1695 if (entry == &journal->j_journal_list) {
1593 break; 1696 break;
1594 } 1697 }
1595 jl = JOURNAL_LIST_ENTRY(entry); 1698 jl = JOURNAL_LIST_ENTRY(entry);
1596 1699
1597 /* don't bother with older transactions */ 1700 /* don't bother with older transactions */
1598 if (jl->j_trans_id <= orig_trans_id) 1701 if (jl->j_trans_id <= orig_trans_id)
1599 break; 1702 break;
1600 } 1703 }
1601 if (chunk.nr) { 1704 if (chunk.nr) {
1602 write_chunk(&chunk); 1705 write_chunk(&chunk);
1603 } 1706 }
1604 1707
1605done: 1708 done:
1606 up(&journal->j_flush_sem); 1709 up(&journal->j_flush_sem);
1607 return ret; 1710 return ret;
1608} 1711}
1609 1712
1610/* for o_sync and fsync heavy applications, they tend to use 1713/* for o_sync and fsync heavy applications, they tend to use
@@ -1617,47 +1720,48 @@ done:
1617** list updates the header block 1720** list updates the header block
1618*/ 1721*/
1619static int flush_used_journal_lists(struct super_block *s, 1722static int flush_used_journal_lists(struct super_block *s,
1620 struct reiserfs_journal_list *jl) { 1723 struct reiserfs_journal_list *jl)
1621 unsigned long len = 0; 1724{
1622 unsigned long cur_len; 1725 unsigned long len = 0;
1623 int ret; 1726 unsigned long cur_len;
1624 int i; 1727 int ret;
1625 int limit = 256; 1728 int i;
1626 struct reiserfs_journal_list *tjl; 1729 int limit = 256;
1627 struct reiserfs_journal_list *flush_jl; 1730 struct reiserfs_journal_list *tjl;
1628 unsigned long trans_id; 1731 struct reiserfs_journal_list *flush_jl;
1629 struct reiserfs_journal *journal = SB_JOURNAL (s); 1732 unsigned long trans_id;
1630 1733 struct reiserfs_journal *journal = SB_JOURNAL(s);
1631 flush_jl = tjl = jl; 1734
1632 1735 flush_jl = tjl = jl;
1633 /* in data logging mode, try harder to flush a lot of blocks */ 1736
1634 if (reiserfs_data_log(s)) 1737 /* in data logging mode, try harder to flush a lot of blocks */
1635 limit = 1024; 1738 if (reiserfs_data_log(s))
1636 /* flush for 256 transactions or limit blocks, whichever comes first */ 1739 limit = 1024;
1637 for(i = 0 ; i < 256 && len < limit ; i++) { 1740 /* flush for 256 transactions or limit blocks, whichever comes first */
1638 if (atomic_read(&tjl->j_commit_left) || 1741 for (i = 0; i < 256 && len < limit; i++) {
1639 tjl->j_trans_id < jl->j_trans_id) { 1742 if (atomic_read(&tjl->j_commit_left) ||
1640 break; 1743 tjl->j_trans_id < jl->j_trans_id) {
1641 } 1744 break;
1642 cur_len = atomic_read(&tjl->j_nonzerolen); 1745 }
1643 if (cur_len > 0) { 1746 cur_len = atomic_read(&tjl->j_nonzerolen);
1644 tjl->j_state &= ~LIST_TOUCHED; 1747 if (cur_len > 0) {
1645 } 1748 tjl->j_state &= ~LIST_TOUCHED;
1646 len += cur_len; 1749 }
1647 flush_jl = tjl; 1750 len += cur_len;
1648 if (tjl->j_list.next == &journal->j_journal_list) 1751 flush_jl = tjl;
1649 break; 1752 if (tjl->j_list.next == &journal->j_journal_list)
1650 tjl = JOURNAL_LIST_ENTRY(tjl->j_list.next); 1753 break;
1651 } 1754 tjl = JOURNAL_LIST_ENTRY(tjl->j_list.next);
1652 /* try to find a group of blocks we can flush across all the 1755 }
1653 ** transactions, but only bother if we've actually spanned 1756 /* try to find a group of blocks we can flush across all the
1654 ** across multiple lists 1757 ** transactions, but only bother if we've actually spanned
1655 */ 1758 ** across multiple lists
1656 if (flush_jl != jl) { 1759 */
1657 ret = kupdate_transactions(s, jl, &tjl, &trans_id, len, i); 1760 if (flush_jl != jl) {
1658 } 1761 ret = kupdate_transactions(s, jl, &tjl, &trans_id, len, i);
1659 flush_journal_list(s, flush_jl, 1); 1762 }
1660 return 0; 1763 flush_journal_list(s, flush_jl, 1);
1764 return 0;
1661} 1765}
1662 1766
1663/* 1767/*
@@ -1665,207 +1769,248 @@ static int flush_used_journal_lists(struct super_block *s,
1665** only touchs the hnext and hprev pointers. 1769** only touchs the hnext and hprev pointers.
1666*/ 1770*/
1667void remove_journal_hash(struct super_block *sb, 1771void remove_journal_hash(struct super_block *sb,
1668 struct reiserfs_journal_cnode **table, 1772 struct reiserfs_journal_cnode **table,
1669 struct reiserfs_journal_list *jl, 1773 struct reiserfs_journal_list *jl,
1670 unsigned long block, int remove_freed) 1774 unsigned long block, int remove_freed)
1671{ 1775{
1672 struct reiserfs_journal_cnode *cur ; 1776 struct reiserfs_journal_cnode *cur;
1673 struct reiserfs_journal_cnode **head ; 1777 struct reiserfs_journal_cnode **head;
1674 1778
1675 head= &(journal_hash(table, sb, block)) ; 1779 head = &(journal_hash(table, sb, block));
1676 if (!head) { 1780 if (!head) {
1677 return ; 1781 return;
1678 } 1782 }
1679 cur = *head ; 1783 cur = *head;
1680 while(cur) { 1784 while (cur) {
1681 if (cur->blocknr == block && cur->sb == sb && (jl == NULL || jl == cur->jlist) && 1785 if (cur->blocknr == block && cur->sb == sb
1682 (!test_bit(BLOCK_FREED, &cur->state) || remove_freed)) { 1786 && (jl == NULL || jl == cur->jlist)
1683 if (cur->hnext) { 1787 && (!test_bit(BLOCK_FREED, &cur->state) || remove_freed)) {
1684 cur->hnext->hprev = cur->hprev ; 1788 if (cur->hnext) {
1685 } 1789 cur->hnext->hprev = cur->hprev;
1686 if (cur->hprev) { 1790 }
1687 cur->hprev->hnext = cur->hnext ; 1791 if (cur->hprev) {
1688 } else { 1792 cur->hprev->hnext = cur->hnext;
1689 *head = cur->hnext ; 1793 } else {
1690 } 1794 *head = cur->hnext;
1691 cur->blocknr = 0 ; 1795 }
1692 cur->sb = NULL ; 1796 cur->blocknr = 0;
1693 cur->state = 0 ; 1797 cur->sb = NULL;
1694 if (cur->bh && cur->jlist) /* anybody who clears the cur->bh will also dec the nonzerolen */ 1798 cur->state = 0;
1695 atomic_dec(&(cur->jlist->j_nonzerolen)) ; 1799 if (cur->bh && cur->jlist) /* anybody who clears the cur->bh will also dec the nonzerolen */
1696 cur->bh = NULL ; 1800 atomic_dec(&(cur->jlist->j_nonzerolen));
1697 cur->jlist = NULL ; 1801 cur->bh = NULL;
1698 } 1802 cur->jlist = NULL;
1699 cur = cur->hnext ; 1803 }
1700 } 1804 cur = cur->hnext;
1701} 1805 }
1702 1806}
1703static void free_journal_ram(struct super_block *p_s_sb) { 1807
1704 struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); 1808static void free_journal_ram(struct super_block *p_s_sb)
1705 reiserfs_kfree(journal->j_current_jl, 1809{
1706 sizeof(struct reiserfs_journal_list), p_s_sb); 1810 struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb);
1707 journal->j_num_lists--; 1811 reiserfs_kfree(journal->j_current_jl,
1708 1812 sizeof(struct reiserfs_journal_list), p_s_sb);
1709 vfree(journal->j_cnode_free_orig) ; 1813 journal->j_num_lists--;
1710 free_list_bitmaps(p_s_sb, journal->j_list_bitmap) ; 1814
1711 free_bitmap_nodes(p_s_sb) ; /* must be after free_list_bitmaps */ 1815 vfree(journal->j_cnode_free_orig);
1712 if (journal->j_header_bh) { 1816 free_list_bitmaps(p_s_sb, journal->j_list_bitmap);
1713 brelse(journal->j_header_bh) ; 1817 free_bitmap_nodes(p_s_sb); /* must be after free_list_bitmaps */
1714 } 1818 if (journal->j_header_bh) {
1715 /* j_header_bh is on the journal dev, make sure not to release the journal 1819 brelse(journal->j_header_bh);
1716 * dev until we brelse j_header_bh 1820 }
1717 */ 1821 /* j_header_bh is on the journal dev, make sure not to release the journal
1718 release_journal_dev(p_s_sb, journal); 1822 * dev until we brelse j_header_bh
1719 vfree(journal) ; 1823 */
1824 release_journal_dev(p_s_sb, journal);
1825 vfree(journal);
1720} 1826}
1721 1827
1722/* 1828/*
1723** call on unmount. Only set error to 1 if you haven't made your way out 1829** call on unmount. Only set error to 1 if you haven't made your way out
1724** of read_super() yet. Any other caller must keep error at 0. 1830** of read_super() yet. Any other caller must keep error at 0.
1725*/ 1831*/
1726static int do_journal_release(struct reiserfs_transaction_handle *th, struct super_block *p_s_sb, int error) { 1832static int do_journal_release(struct reiserfs_transaction_handle *th,
1727 struct reiserfs_transaction_handle myth ; 1833 struct super_block *p_s_sb, int error)
1728 int flushed = 0; 1834{
1729 struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); 1835 struct reiserfs_transaction_handle myth;
1730 1836 int flushed = 0;
1731 /* we only want to flush out transactions if we were called with error == 0 1837 struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb);
1732 */ 1838
1733 if (!error && !(p_s_sb->s_flags & MS_RDONLY)) { 1839 /* we only want to flush out transactions if we were called with error == 0
1734 /* end the current trans */ 1840 */
1735 BUG_ON (!th->t_trans_id); 1841 if (!error && !(p_s_sb->s_flags & MS_RDONLY)) {
1736 do_journal_end(th, p_s_sb,10, FLUSH_ALL) ; 1842 /* end the current trans */
1737 1843 BUG_ON(!th->t_trans_id);
1738 /* make sure something gets logged to force our way into the flush code */ 1844 do_journal_end(th, p_s_sb, 10, FLUSH_ALL);
1739 if (!journal_join(&myth, p_s_sb, 1)) { 1845
1740 reiserfs_prepare_for_journal(p_s_sb, SB_BUFFER_WITH_SB(p_s_sb), 1) ; 1846 /* make sure something gets logged to force our way into the flush code */
1741 journal_mark_dirty(&myth, p_s_sb, SB_BUFFER_WITH_SB(p_s_sb)) ; 1847 if (!journal_join(&myth, p_s_sb, 1)) {
1742 do_journal_end(&myth, p_s_sb,1, FLUSH_ALL) ; 1848 reiserfs_prepare_for_journal(p_s_sb,
1743 flushed = 1; 1849 SB_BUFFER_WITH_SB(p_s_sb),
1744 } 1850 1);
1745 } 1851 journal_mark_dirty(&myth, p_s_sb,
1746 1852 SB_BUFFER_WITH_SB(p_s_sb));
1747 /* this also catches errors during the do_journal_end above */ 1853 do_journal_end(&myth, p_s_sb, 1, FLUSH_ALL);
1748 if (!error && reiserfs_is_journal_aborted(journal)) { 1854 flushed = 1;
1749 memset(&myth, 0, sizeof(myth)); 1855 }
1750 if (!journal_join_abort(&myth, p_s_sb, 1)) { 1856 }
1751 reiserfs_prepare_for_journal(p_s_sb, SB_BUFFER_WITH_SB(p_s_sb), 1) ; 1857
1752 journal_mark_dirty(&myth, p_s_sb, SB_BUFFER_WITH_SB(p_s_sb)) ; 1858 /* this also catches errors during the do_journal_end above */
1753 do_journal_end(&myth, p_s_sb, 1, FLUSH_ALL) ; 1859 if (!error && reiserfs_is_journal_aborted(journal)) {
1754 } 1860 memset(&myth, 0, sizeof(myth));
1755 } 1861 if (!journal_join_abort(&myth, p_s_sb, 1)) {
1756 1862 reiserfs_prepare_for_journal(p_s_sb,
1757 reiserfs_mounted_fs_count-- ; 1863 SB_BUFFER_WITH_SB(p_s_sb),
1758 /* wait for all commits to finish */ 1864 1);
1759 cancel_delayed_work(&SB_JOURNAL(p_s_sb)->j_work); 1865 journal_mark_dirty(&myth, p_s_sb,
1760 flush_workqueue(commit_wq); 1866 SB_BUFFER_WITH_SB(p_s_sb));
1761 if (!reiserfs_mounted_fs_count) { 1867 do_journal_end(&myth, p_s_sb, 1, FLUSH_ALL);
1762 destroy_workqueue(commit_wq); 1868 }
1763 commit_wq = NULL; 1869 }
1764 } 1870
1765 1871 reiserfs_mounted_fs_count--;
1766 free_journal_ram(p_s_sb) ; 1872 /* wait for all commits to finish */
1767 1873 cancel_delayed_work(&SB_JOURNAL(p_s_sb)->j_work);
1768 return 0 ; 1874 flush_workqueue(commit_wq);
1875 if (!reiserfs_mounted_fs_count) {
1876 destroy_workqueue(commit_wq);
1877 commit_wq = NULL;
1878 }
1879
1880 free_journal_ram(p_s_sb);
1881
1882 return 0;
1769} 1883}
1770 1884
1771/* 1885/*
1772** call on unmount. flush all journal trans, release all alloc'd ram 1886** call on unmount. flush all journal trans, release all alloc'd ram
1773*/ 1887*/
1774int journal_release(struct reiserfs_transaction_handle *th, struct super_block *p_s_sb) { 1888int journal_release(struct reiserfs_transaction_handle *th,
1775 return do_journal_release(th, p_s_sb, 0) ; 1889 struct super_block *p_s_sb)
1890{
1891 return do_journal_release(th, p_s_sb, 0);
1776} 1892}
1893
1777/* 1894/*
1778** only call from an error condition inside reiserfs_read_super! 1895** only call from an error condition inside reiserfs_read_super!
1779*/ 1896*/
1780int journal_release_error(struct reiserfs_transaction_handle *th, struct super_block *p_s_sb) { 1897int journal_release_error(struct reiserfs_transaction_handle *th,
1781 return do_journal_release(th, p_s_sb, 1) ; 1898 struct super_block *p_s_sb)
1899{
1900 return do_journal_release(th, p_s_sb, 1);
1782} 1901}
1783 1902
1784/* compares description block with commit block. returns 1 if they differ, 0 if they are the same */ 1903/* compares description block with commit block. returns 1 if they differ, 0 if they are the same */
1785static int journal_compare_desc_commit(struct super_block *p_s_sb, struct reiserfs_journal_desc *desc, 1904static int journal_compare_desc_commit(struct super_block *p_s_sb,
1786 struct reiserfs_journal_commit *commit) { 1905 struct reiserfs_journal_desc *desc,
1787 if (get_commit_trans_id (commit) != get_desc_trans_id (desc) || 1906 struct reiserfs_journal_commit *commit)
1788 get_commit_trans_len (commit) != get_desc_trans_len (desc) || 1907{
1789 get_commit_trans_len (commit) > SB_JOURNAL(p_s_sb)->j_trans_max || 1908 if (get_commit_trans_id(commit) != get_desc_trans_id(desc) ||
1790 get_commit_trans_len (commit) <= 0 1909 get_commit_trans_len(commit) != get_desc_trans_len(desc) ||
1791 ) { 1910 get_commit_trans_len(commit) > SB_JOURNAL(p_s_sb)->j_trans_max ||
1792 return 1 ; 1911 get_commit_trans_len(commit) <= 0) {
1793 } 1912 return 1;
1794 return 0 ; 1913 }
1914 return 0;
1795} 1915}
1916
1796/* returns 0 if it did not find a description block 1917/* returns 0 if it did not find a description block
1797** returns -1 if it found a corrupt commit block 1918** returns -1 if it found a corrupt commit block
1798** returns 1 if both desc and commit were valid 1919** returns 1 if both desc and commit were valid
1799*/ 1920*/
1800static int journal_transaction_is_valid(struct super_block *p_s_sb, struct buffer_head *d_bh, unsigned long *oldest_invalid_trans_id, unsigned long *newest_mount_id) { 1921static int journal_transaction_is_valid(struct super_block *p_s_sb,
1801 struct reiserfs_journal_desc *desc ; 1922 struct buffer_head *d_bh,
1802 struct reiserfs_journal_commit *commit ; 1923 unsigned long *oldest_invalid_trans_id,
1803 struct buffer_head *c_bh ; 1924 unsigned long *newest_mount_id)
1804 unsigned long offset ; 1925{
1805 1926 struct reiserfs_journal_desc *desc;
1806 if (!d_bh) 1927 struct reiserfs_journal_commit *commit;
1807 return 0 ; 1928 struct buffer_head *c_bh;
1808 1929 unsigned long offset;
1809 desc = (struct reiserfs_journal_desc *)d_bh->b_data ; 1930
1810 if (get_desc_trans_len(desc) > 0 && !memcmp(get_journal_desc_magic (d_bh), JOURNAL_DESC_MAGIC, 8)) { 1931 if (!d_bh)
1811 if (oldest_invalid_trans_id && *oldest_invalid_trans_id && get_desc_trans_id(desc) > *oldest_invalid_trans_id) { 1932 return 0;
1812 reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, "journal-986: transaction " 1933
1813 "is valid returning because trans_id %d is greater than " 1934 desc = (struct reiserfs_journal_desc *)d_bh->b_data;
1814 "oldest_invalid %lu", get_desc_trans_id(desc), 1935 if (get_desc_trans_len(desc) > 0
1815 *oldest_invalid_trans_id); 1936 && !memcmp(get_journal_desc_magic(d_bh), JOURNAL_DESC_MAGIC, 8)) {
1816 return 0 ; 1937 if (oldest_invalid_trans_id && *oldest_invalid_trans_id
1817 } 1938 && get_desc_trans_id(desc) > *oldest_invalid_trans_id) {
1818 if (newest_mount_id && *newest_mount_id > get_desc_mount_id (desc)) { 1939 reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE,
1819 reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, "journal-1087: transaction " 1940 "journal-986: transaction "
1820 "is valid returning because mount_id %d is less than " 1941 "is valid returning because trans_id %d is greater than "
1821 "newest_mount_id %lu", get_desc_mount_id (desc), 1942 "oldest_invalid %lu",
1822 *newest_mount_id) ; 1943 get_desc_trans_id(desc),
1823 return -1 ; 1944 *oldest_invalid_trans_id);
1824 } 1945 return 0;
1825 if ( get_desc_trans_len(desc) > SB_JOURNAL(p_s_sb)->j_trans_max ) { 1946 }
1826 reiserfs_warning(p_s_sb, "journal-2018: Bad transaction length %d encountered, ignoring transaction", get_desc_trans_len(desc)); 1947 if (newest_mount_id
1827 return -1 ; 1948 && *newest_mount_id > get_desc_mount_id(desc)) {
1828 } 1949 reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE,
1829 offset = d_bh->b_blocknr - SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) ; 1950 "journal-1087: transaction "
1830 1951 "is valid returning because mount_id %d is less than "
1831 /* ok, we have a journal description block, lets see if the transaction was valid */ 1952 "newest_mount_id %lu",
1832 c_bh = journal_bread(p_s_sb, SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) + 1953 get_desc_mount_id(desc),
1833 ((offset + get_desc_trans_len(desc) + 1) % SB_ONDISK_JOURNAL_SIZE(p_s_sb))) ; 1954 *newest_mount_id);
1834 if (!c_bh) 1955 return -1;
1835 return 0 ; 1956 }
1836 commit = (struct reiserfs_journal_commit *)c_bh->b_data ; 1957 if (get_desc_trans_len(desc) > SB_JOURNAL(p_s_sb)->j_trans_max) {
1837 if (journal_compare_desc_commit(p_s_sb, desc, commit)) { 1958 reiserfs_warning(p_s_sb,
1838 reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, 1959 "journal-2018: Bad transaction length %d encountered, ignoring transaction",
1839 "journal_transaction_is_valid, commit offset %ld had bad " 1960 get_desc_trans_len(desc));
1840 "time %d or length %d", 1961 return -1;
1841 c_bh->b_blocknr - SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb), 1962 }
1842 get_commit_trans_id (commit), 1963 offset = d_bh->b_blocknr - SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb);
1843 get_commit_trans_len(commit)); 1964
1844 brelse(c_bh) ; 1965 /* ok, we have a journal description block, lets see if the transaction was valid */
1845 if (oldest_invalid_trans_id) { 1966 c_bh =
1846 *oldest_invalid_trans_id = get_desc_trans_id(desc) ; 1967 journal_bread(p_s_sb,
1847 reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, "journal-1004: " 1968 SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) +
1848 "transaction_is_valid setting oldest invalid trans_id " 1969 ((offset + get_desc_trans_len(desc) +
1849 "to %d", get_desc_trans_id(desc)) ; 1970 1) % SB_ONDISK_JOURNAL_SIZE(p_s_sb)));
1850 } 1971 if (!c_bh)
1851 return -1; 1972 return 0;
1852 } 1973 commit = (struct reiserfs_journal_commit *)c_bh->b_data;
1853 brelse(c_bh) ; 1974 if (journal_compare_desc_commit(p_s_sb, desc, commit)) {
1854 reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, "journal-1006: found valid " 1975 reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE,
1855 "transaction start offset %llu, len %d id %d", 1976 "journal_transaction_is_valid, commit offset %ld had bad "
1856 d_bh->b_blocknr - SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb), 1977 "time %d or length %d",
1857 get_desc_trans_len(desc), get_desc_trans_id(desc)) ; 1978 c_bh->b_blocknr -
1858 return 1 ; 1979 SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb),
1859 } else { 1980 get_commit_trans_id(commit),
1860 return 0 ; 1981 get_commit_trans_len(commit));
1861 } 1982 brelse(c_bh);
1862} 1983 if (oldest_invalid_trans_id) {
1863 1984 *oldest_invalid_trans_id =
1864static void brelse_array(struct buffer_head **heads, int num) { 1985 get_desc_trans_id(desc);
1865 int i ; 1986 reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE,
1866 for (i = 0 ; i < num ; i++) { 1987 "journal-1004: "
1867 brelse(heads[i]) ; 1988 "transaction_is_valid setting oldest invalid trans_id "
1868 } 1989 "to %d",
1990 get_desc_trans_id(desc));
1991 }
1992 return -1;
1993 }
1994 brelse(c_bh);
1995 reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE,
1996 "journal-1006: found valid "
1997 "transaction start offset %llu, len %d id %d",
1998 d_bh->b_blocknr -
1999 SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb),
2000 get_desc_trans_len(desc),
2001 get_desc_trans_id(desc));
2002 return 1;
2003 } else {
2004 return 0;
2005 }
2006}
2007
2008static void brelse_array(struct buffer_head **heads, int num)
2009{
2010 int i;
2011 for (i = 0; i < num; i++) {
2012 brelse(heads[i]);
2013 }
1869} 2014}
1870 2015
1871/* 2016/*
@@ -1873,149 +2018,202 @@ static void brelse_array(struct buffer_head **heads, int num) {
1873** this either reads in a replays a transaction, or returns because the transaction 2018** this either reads in a replays a transaction, or returns because the transaction
1874** is invalid, or too old. 2019** is invalid, or too old.
1875*/ 2020*/
1876static int journal_read_transaction(struct super_block *p_s_sb, unsigned long cur_dblock, unsigned long oldest_start, 2021static int journal_read_transaction(struct super_block *p_s_sb,
1877 unsigned long oldest_trans_id, unsigned long newest_mount_id) { 2022 unsigned long cur_dblock,
1878 struct reiserfs_journal *journal = SB_JOURNAL (p_s_sb); 2023 unsigned long oldest_start,
1879 struct reiserfs_journal_desc *desc ; 2024 unsigned long oldest_trans_id,
1880 struct reiserfs_journal_commit *commit ; 2025 unsigned long newest_mount_id)
1881 unsigned long trans_id = 0 ; 2026{
1882 struct buffer_head *c_bh ; 2027 struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb);
1883 struct buffer_head *d_bh ; 2028 struct reiserfs_journal_desc *desc;
1884 struct buffer_head **log_blocks = NULL ; 2029 struct reiserfs_journal_commit *commit;
1885 struct buffer_head **real_blocks = NULL ; 2030 unsigned long trans_id = 0;
1886 unsigned long trans_offset ; 2031 struct buffer_head *c_bh;
1887 int i; 2032 struct buffer_head *d_bh;
1888 int trans_half; 2033 struct buffer_head **log_blocks = NULL;
1889 2034 struct buffer_head **real_blocks = NULL;
1890 d_bh = journal_bread(p_s_sb, cur_dblock) ; 2035 unsigned long trans_offset;
1891 if (!d_bh) 2036 int i;
1892 return 1 ; 2037 int trans_half;
1893 desc = (struct reiserfs_journal_desc *)d_bh->b_data ; 2038
1894 trans_offset = d_bh->b_blocknr - SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) ; 2039 d_bh = journal_bread(p_s_sb, cur_dblock);
1895 reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, "journal-1037: " 2040 if (!d_bh)
1896 "journal_read_transaction, offset %llu, len %d mount_id %d", 2041 return 1;
1897 d_bh->b_blocknr - SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb), 2042 desc = (struct reiserfs_journal_desc *)d_bh->b_data;
1898 get_desc_trans_len(desc), get_desc_mount_id(desc)) ; 2043 trans_offset = d_bh->b_blocknr - SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb);
1899 if (get_desc_trans_id(desc) < oldest_trans_id) { 2044 reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, "journal-1037: "
1900 reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, "journal-1039: " 2045 "journal_read_transaction, offset %llu, len %d mount_id %d",
1901 "journal_read_trans skipping because %lu is too old", 2046 d_bh->b_blocknr - SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb),
1902 cur_dblock - SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb)) ; 2047 get_desc_trans_len(desc), get_desc_mount_id(desc));
1903 brelse(d_bh) ; 2048 if (get_desc_trans_id(desc) < oldest_trans_id) {
1904 return 1 ; 2049 reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, "journal-1039: "
1905 } 2050 "journal_read_trans skipping because %lu is too old",
1906 if (get_desc_mount_id(desc) != newest_mount_id) { 2051 cur_dblock -
1907 reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, "journal-1146: " 2052 SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb));
1908 "journal_read_trans skipping because %d is != " 2053 brelse(d_bh);
1909 "newest_mount_id %lu", get_desc_mount_id(desc), 2054 return 1;
1910 newest_mount_id) ; 2055 }
1911 brelse(d_bh) ; 2056 if (get_desc_mount_id(desc) != newest_mount_id) {
1912 return 1 ; 2057 reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, "journal-1146: "
1913 } 2058 "journal_read_trans skipping because %d is != "
1914 c_bh = journal_bread(p_s_sb, SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) + 2059 "newest_mount_id %lu", get_desc_mount_id(desc),
1915 ((trans_offset + get_desc_trans_len(desc) + 1) % 2060 newest_mount_id);
1916 SB_ONDISK_JOURNAL_SIZE(p_s_sb))) ; 2061 brelse(d_bh);
1917 if (!c_bh) { 2062 return 1;
1918 brelse(d_bh) ; 2063 }
1919 return 1 ; 2064 c_bh = journal_bread(p_s_sb, SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) +
1920 } 2065 ((trans_offset + get_desc_trans_len(desc) + 1) %
1921 commit = (struct reiserfs_journal_commit *)c_bh->b_data ; 2066 SB_ONDISK_JOURNAL_SIZE(p_s_sb)));
1922 if (journal_compare_desc_commit(p_s_sb, desc, commit)) { 2067 if (!c_bh) {
1923 reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, "journal_read_transaction, " 2068 brelse(d_bh);
1924 "commit offset %llu had bad time %d or length %d", 2069 return 1;
1925 c_bh->b_blocknr - SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb), 2070 }
1926 get_commit_trans_id(commit), get_commit_trans_len(commit)); 2071 commit = (struct reiserfs_journal_commit *)c_bh->b_data;
1927 brelse(c_bh) ; 2072 if (journal_compare_desc_commit(p_s_sb, desc, commit)) {
1928 brelse(d_bh) ; 2073 reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE,
1929 return 1; 2074 "journal_read_transaction, "
1930 } 2075 "commit offset %llu had bad time %d or length %d",
1931 trans_id = get_desc_trans_id(desc) ; 2076 c_bh->b_blocknr -
1932 /* now we know we've got a good transaction, and it was inside the valid time ranges */ 2077 SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb),
1933 log_blocks = reiserfs_kmalloc(get_desc_trans_len(desc) * sizeof(struct buffer_head *), GFP_NOFS, p_s_sb) ; 2078 get_commit_trans_id(commit),
1934 real_blocks = reiserfs_kmalloc(get_desc_trans_len(desc) * sizeof(struct buffer_head *), GFP_NOFS, p_s_sb) ; 2079 get_commit_trans_len(commit));
1935 if (!log_blocks || !real_blocks) { 2080 brelse(c_bh);
1936 brelse(c_bh) ; 2081 brelse(d_bh);
1937 brelse(d_bh) ; 2082 return 1;
1938 reiserfs_kfree(log_blocks, get_desc_trans_len(desc) * sizeof(struct buffer_head *), p_s_sb) ; 2083 }
1939 reiserfs_kfree(real_blocks, get_desc_trans_len(desc) * sizeof(struct buffer_head *), p_s_sb) ; 2084 trans_id = get_desc_trans_id(desc);
1940 reiserfs_warning(p_s_sb, "journal-1169: kmalloc failed, unable to mount FS") ; 2085 /* now we know we've got a good transaction, and it was inside the valid time ranges */
1941 return -1 ; 2086 log_blocks =
1942 } 2087 reiserfs_kmalloc(get_desc_trans_len(desc) *
1943 /* get all the buffer heads */ 2088 sizeof(struct buffer_head *), GFP_NOFS, p_s_sb);
1944 trans_half = journal_trans_half (p_s_sb->s_blocksize) ; 2089 real_blocks =
1945 for(i = 0 ; i < get_desc_trans_len(desc) ; i++) { 2090 reiserfs_kmalloc(get_desc_trans_len(desc) *
1946 log_blocks[i] = journal_getblk(p_s_sb, SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) + (trans_offset + 1 + i) % SB_ONDISK_JOURNAL_SIZE(p_s_sb)); 2091 sizeof(struct buffer_head *), GFP_NOFS, p_s_sb);
1947 if (i < trans_half) { 2092 if (!log_blocks || !real_blocks) {
1948 real_blocks[i] = sb_getblk(p_s_sb, le32_to_cpu(desc->j_realblock[i])) ; 2093 brelse(c_bh);
1949 } else { 2094 brelse(d_bh);
1950 real_blocks[i] = sb_getblk(p_s_sb, le32_to_cpu(commit->j_realblock[i - trans_half])) ; 2095 reiserfs_kfree(log_blocks,
1951 } 2096 get_desc_trans_len(desc) *
1952 if ( real_blocks[i]->b_blocknr > SB_BLOCK_COUNT(p_s_sb) ) { 2097 sizeof(struct buffer_head *), p_s_sb);
1953 reiserfs_warning(p_s_sb, "journal-1207: REPLAY FAILURE fsck required! Block to replay is outside of filesystem"); 2098 reiserfs_kfree(real_blocks,
1954 goto abort_replay; 2099 get_desc_trans_len(desc) *
1955 } 2100 sizeof(struct buffer_head *), p_s_sb);
1956 /* make sure we don't try to replay onto log or reserved area */ 2101 reiserfs_warning(p_s_sb,
1957 if (is_block_in_log_or_reserved_area(p_s_sb, real_blocks[i]->b_blocknr)) { 2102 "journal-1169: kmalloc failed, unable to mount FS");
1958 reiserfs_warning(p_s_sb, "journal-1204: REPLAY FAILURE fsck required! Trying to replay onto a log block") ; 2103 return -1;
1959abort_replay: 2104 }
1960 brelse_array(log_blocks, i) ; 2105 /* get all the buffer heads */
1961 brelse_array(real_blocks, i) ; 2106 trans_half = journal_trans_half(p_s_sb->s_blocksize);
1962 brelse(c_bh) ; 2107 for (i = 0; i < get_desc_trans_len(desc); i++) {
1963 brelse(d_bh) ; 2108 log_blocks[i] =
1964 reiserfs_kfree(log_blocks, get_desc_trans_len(desc) * sizeof(struct buffer_head *), p_s_sb) ; 2109 journal_getblk(p_s_sb,
1965 reiserfs_kfree(real_blocks, get_desc_trans_len(desc) * sizeof(struct buffer_head *), p_s_sb) ; 2110 SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) +
1966 return -1 ; 2111 (trans_offset + 1 +
1967 } 2112 i) % SB_ONDISK_JOURNAL_SIZE(p_s_sb));
1968 } 2113 if (i < trans_half) {
1969 /* read in the log blocks, memcpy to the corresponding real block */ 2114 real_blocks[i] =
1970 ll_rw_block(READ, get_desc_trans_len(desc), log_blocks) ; 2115 sb_getblk(p_s_sb,
1971 for (i = 0 ; i < get_desc_trans_len(desc) ; i++) { 2116 le32_to_cpu(desc->j_realblock[i]));
1972 wait_on_buffer(log_blocks[i]) ; 2117 } else {
1973 if (!buffer_uptodate(log_blocks[i])) { 2118 real_blocks[i] =
1974 reiserfs_warning(p_s_sb, "journal-1212: REPLAY FAILURE fsck required! buffer write failed") ; 2119 sb_getblk(p_s_sb,
1975 brelse_array(log_blocks + i, get_desc_trans_len(desc) - i) ; 2120 le32_to_cpu(commit->
1976 brelse_array(real_blocks, get_desc_trans_len(desc)) ; 2121 j_realblock[i - trans_half]));
1977 brelse(c_bh) ; 2122 }
1978 brelse(d_bh) ; 2123 if (real_blocks[i]->b_blocknr > SB_BLOCK_COUNT(p_s_sb)) {
1979 reiserfs_kfree(log_blocks, get_desc_trans_len(desc) * sizeof(struct buffer_head *), p_s_sb) ; 2124 reiserfs_warning(p_s_sb,
1980 reiserfs_kfree(real_blocks, get_desc_trans_len(desc) * sizeof(struct buffer_head *), p_s_sb) ; 2125 "journal-1207: REPLAY FAILURE fsck required! Block to replay is outside of filesystem");
1981 return -1 ; 2126 goto abort_replay;
1982 } 2127 }
1983 memcpy(real_blocks[i]->b_data, log_blocks[i]->b_data, real_blocks[i]->b_size) ; 2128 /* make sure we don't try to replay onto log or reserved area */
1984 set_buffer_uptodate(real_blocks[i]) ; 2129 if (is_block_in_log_or_reserved_area
1985 brelse(log_blocks[i]) ; 2130 (p_s_sb, real_blocks[i]->b_blocknr)) {
1986 } 2131 reiserfs_warning(p_s_sb,
1987 /* flush out the real blocks */ 2132 "journal-1204: REPLAY FAILURE fsck required! Trying to replay onto a log block");
1988 for (i = 0 ; i < get_desc_trans_len(desc) ; i++) { 2133 abort_replay:
1989 set_buffer_dirty(real_blocks[i]) ; 2134 brelse_array(log_blocks, i);
1990 ll_rw_block(WRITE, 1, real_blocks + i) ; 2135 brelse_array(real_blocks, i);
1991 } 2136 brelse(c_bh);
1992 for (i = 0 ; i < get_desc_trans_len(desc) ; i++) { 2137 brelse(d_bh);
1993 wait_on_buffer(real_blocks[i]) ; 2138 reiserfs_kfree(log_blocks,
1994 if (!buffer_uptodate(real_blocks[i])) { 2139 get_desc_trans_len(desc) *
1995 reiserfs_warning(p_s_sb, "journal-1226: REPLAY FAILURE, fsck required! buffer write failed") ; 2140 sizeof(struct buffer_head *), p_s_sb);
1996 brelse_array(real_blocks + i, get_desc_trans_len(desc) - i) ; 2141 reiserfs_kfree(real_blocks,
1997 brelse(c_bh) ; 2142 get_desc_trans_len(desc) *
1998 brelse(d_bh) ; 2143 sizeof(struct buffer_head *), p_s_sb);
1999 reiserfs_kfree(log_blocks, get_desc_trans_len(desc) * sizeof(struct buffer_head *), p_s_sb) ; 2144 return -1;
2000 reiserfs_kfree(real_blocks, get_desc_trans_len(desc) * sizeof(struct buffer_head *), p_s_sb) ; 2145 }
2001 return -1 ; 2146 }
2002 } 2147 /* read in the log blocks, memcpy to the corresponding real block */
2003 brelse(real_blocks[i]) ; 2148 ll_rw_block(READ, get_desc_trans_len(desc), log_blocks);
2004 } 2149 for (i = 0; i < get_desc_trans_len(desc); i++) {
2005 cur_dblock = SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) + ((trans_offset + get_desc_trans_len(desc) + 2) % SB_ONDISK_JOURNAL_SIZE(p_s_sb)) ; 2150 wait_on_buffer(log_blocks[i]);
2006 reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, "journal-1095: setting journal " 2151 if (!buffer_uptodate(log_blocks[i])) {
2007 "start to offset %ld", 2152 reiserfs_warning(p_s_sb,
2008 cur_dblock - SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb)) ; 2153 "journal-1212: REPLAY FAILURE fsck required! buffer write failed");
2009 2154 brelse_array(log_blocks + i,
2010 /* init starting values for the first transaction, in case this is the last transaction to be replayed. */ 2155 get_desc_trans_len(desc) - i);
2011 journal->j_start = cur_dblock - SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) ; 2156 brelse_array(real_blocks, get_desc_trans_len(desc));
2012 journal->j_last_flush_trans_id = trans_id ; 2157 brelse(c_bh);
2013 journal->j_trans_id = trans_id + 1; 2158 brelse(d_bh);
2014 brelse(c_bh) ; 2159 reiserfs_kfree(log_blocks,
2015 brelse(d_bh) ; 2160 get_desc_trans_len(desc) *
2016 reiserfs_kfree(log_blocks, le32_to_cpu(desc->j_len) * sizeof(struct buffer_head *), p_s_sb) ; 2161 sizeof(struct buffer_head *), p_s_sb);
2017 reiserfs_kfree(real_blocks, le32_to_cpu(desc->j_len) * sizeof(struct buffer_head *), p_s_sb) ; 2162 reiserfs_kfree(real_blocks,
2018 return 0 ; 2163 get_desc_trans_len(desc) *
2164 sizeof(struct buffer_head *), p_s_sb);
2165 return -1;
2166 }
2167 memcpy(real_blocks[i]->b_data, log_blocks[i]->b_data,
2168 real_blocks[i]->b_size);
2169 set_buffer_uptodate(real_blocks[i]);
2170 brelse(log_blocks[i]);
2171 }
2172 /* flush out the real blocks */
2173 for (i = 0; i < get_desc_trans_len(desc); i++) {
2174 set_buffer_dirty(real_blocks[i]);
2175 ll_rw_block(WRITE, 1, real_blocks + i);
2176 }
2177 for (i = 0; i < get_desc_trans_len(desc); i++) {
2178 wait_on_buffer(real_blocks[i]);
2179 if (!buffer_uptodate(real_blocks[i])) {
2180 reiserfs_warning(p_s_sb,
2181 "journal-1226: REPLAY FAILURE, fsck required! buffer write failed");
2182 brelse_array(real_blocks + i,
2183 get_desc_trans_len(desc) - i);
2184 brelse(c_bh);
2185 brelse(d_bh);
2186 reiserfs_kfree(log_blocks,
2187 get_desc_trans_len(desc) *
2188 sizeof(struct buffer_head *), p_s_sb);
2189 reiserfs_kfree(real_blocks,
2190 get_desc_trans_len(desc) *
2191 sizeof(struct buffer_head *), p_s_sb);
2192 return -1;
2193 }
2194 brelse(real_blocks[i]);
2195 }
2196 cur_dblock =
2197 SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) +
2198 ((trans_offset + get_desc_trans_len(desc) +
2199 2) % SB_ONDISK_JOURNAL_SIZE(p_s_sb));
2200 reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE,
2201 "journal-1095: setting journal " "start to offset %ld",
2202 cur_dblock - SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb));
2203
2204 /* init starting values for the first transaction, in case this is the last transaction to be replayed. */
2205 journal->j_start = cur_dblock - SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb);
2206 journal->j_last_flush_trans_id = trans_id;
2207 journal->j_trans_id = trans_id + 1;
2208 brelse(c_bh);
2209 brelse(d_bh);
2210 reiserfs_kfree(log_blocks,
2211 le32_to_cpu(desc->j_len) * sizeof(struct buffer_head *),
2212 p_s_sb);
2213 reiserfs_kfree(real_blocks,
2214 le32_to_cpu(desc->j_len) * sizeof(struct buffer_head *),
2215 p_s_sb);
2216 return 0;
2019} 2217}
2020 2218
2021/* This function reads blocks starting from block and to max_block of bufsize 2219/* This function reads blocks starting from block and to max_block of bufsize
@@ -2024,39 +2222,39 @@ abort_replay:
2024 Right now it is only used from journal code. But later we might use it 2222 Right now it is only used from journal code. But later we might use it
2025 from other places. 2223 from other places.
2026 Note: Do not use journal_getblk/sb_getblk functions here! */ 2224 Note: Do not use journal_getblk/sb_getblk functions here! */
2027static struct buffer_head * reiserfs_breada (struct block_device *dev, int block, int bufsize, 2225static struct buffer_head *reiserfs_breada(struct block_device *dev, int block,
2028 unsigned int max_block) 2226 int bufsize, unsigned int max_block)
2029{ 2227{
2030 struct buffer_head * bhlist[BUFNR]; 2228 struct buffer_head *bhlist[BUFNR];
2031 unsigned int blocks = BUFNR; 2229 unsigned int blocks = BUFNR;
2032 struct buffer_head * bh; 2230 struct buffer_head *bh;
2033 int i, j; 2231 int i, j;
2034 2232
2035 bh = __getblk (dev, block, bufsize ); 2233 bh = __getblk(dev, block, bufsize);
2036 if (buffer_uptodate (bh)) 2234 if (buffer_uptodate(bh))
2037 return (bh); 2235 return (bh);
2038 2236
2039 if (block + BUFNR > max_block) { 2237 if (block + BUFNR > max_block) {
2040 blocks = max_block - block; 2238 blocks = max_block - block;
2041 } 2239 }
2042 bhlist[0] = bh; 2240 bhlist[0] = bh;
2043 j = 1; 2241 j = 1;
2044 for (i = 1; i < blocks; i++) { 2242 for (i = 1; i < blocks; i++) {
2045 bh = __getblk (dev, block + i, bufsize); 2243 bh = __getblk(dev, block + i, bufsize);
2046 if (buffer_uptodate (bh)) { 2244 if (buffer_uptodate(bh)) {
2047 brelse (bh); 2245 brelse(bh);
2048 break; 2246 break;
2049 } 2247 } else
2050 else bhlist[j++] = bh; 2248 bhlist[j++] = bh;
2051 } 2249 }
2052 ll_rw_block (READ, j, bhlist); 2250 ll_rw_block(READ, j, bhlist);
2053 for(i = 1; i < j; i++) 2251 for (i = 1; i < j; i++)
2054 brelse (bhlist[i]); 2252 brelse(bhlist[i]);
2055 bh = bhlist[0]; 2253 bh = bhlist[0];
2056 wait_on_buffer (bh); 2254 wait_on_buffer(bh);
2057 if (buffer_uptodate (bh)) 2255 if (buffer_uptodate(bh))
2058 return bh; 2256 return bh;
2059 brelse (bh); 2257 brelse(bh);
2060 return NULL; 2258 return NULL;
2061} 2259}
2062 2260
@@ -2069,218 +2267,250 @@ static struct buffer_head * reiserfs_breada (struct block_device *dev, int block
2069** 2267**
2070** On exit, it sets things up so the first transaction will work correctly. 2268** On exit, it sets things up so the first transaction will work correctly.
2071*/ 2269*/
2072static int journal_read(struct super_block *p_s_sb) { 2270static int journal_read(struct super_block *p_s_sb)
2073 struct reiserfs_journal *journal = SB_JOURNAL (p_s_sb); 2271{
2074 struct reiserfs_journal_desc *desc ; 2272 struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb);
2075 unsigned long oldest_trans_id = 0; 2273 struct reiserfs_journal_desc *desc;
2076 unsigned long oldest_invalid_trans_id = 0 ; 2274 unsigned long oldest_trans_id = 0;
2077 time_t start ; 2275 unsigned long oldest_invalid_trans_id = 0;
2078 unsigned long oldest_start = 0; 2276 time_t start;
2079 unsigned long cur_dblock = 0 ; 2277 unsigned long oldest_start = 0;
2080 unsigned long newest_mount_id = 9 ; 2278 unsigned long cur_dblock = 0;
2081 struct buffer_head *d_bh ; 2279 unsigned long newest_mount_id = 9;
2082 struct reiserfs_journal_header *jh ; 2280 struct buffer_head *d_bh;
2083 int valid_journal_header = 0 ; 2281 struct reiserfs_journal_header *jh;
2084 int replay_count = 0 ; 2282 int valid_journal_header = 0;
2085 int continue_replay = 1 ; 2283 int replay_count = 0;
2086 int ret ; 2284 int continue_replay = 1;
2087 char b[BDEVNAME_SIZE]; 2285 int ret;
2088 2286 char b[BDEVNAME_SIZE];
2089 cur_dblock = SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) ; 2287
2090 reiserfs_info (p_s_sb, "checking transaction log (%s)\n", 2288 cur_dblock = SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb);
2091 bdevname(journal->j_dev_bd, b)); 2289 reiserfs_info(p_s_sb, "checking transaction log (%s)\n",
2092 start = get_seconds(); 2290 bdevname(journal->j_dev_bd, b));
2093 2291 start = get_seconds();
2094 /* step 1, read in the journal header block. Check the transaction it says 2292
2095 ** is the first unflushed, and if that transaction is not valid, 2293 /* step 1, read in the journal header block. Check the transaction it says
2096 ** replay is done 2294 ** is the first unflushed, and if that transaction is not valid,
2097 */ 2295 ** replay is done
2098 journal->j_header_bh = journal_bread(p_s_sb, 2296 */
2099 SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) + 2297 journal->j_header_bh = journal_bread(p_s_sb,
2100 SB_ONDISK_JOURNAL_SIZE(p_s_sb)); 2298 SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb)
2101 if (!journal->j_header_bh) { 2299 + SB_ONDISK_JOURNAL_SIZE(p_s_sb));
2102 return 1 ; 2300 if (!journal->j_header_bh) {
2103 } 2301 return 1;
2104 jh = (struct reiserfs_journal_header *)(journal->j_header_bh->b_data) ; 2302 }
2105 if (le32_to_cpu(jh->j_first_unflushed_offset) >= 0 && 2303 jh = (struct reiserfs_journal_header *)(journal->j_header_bh->b_data);
2106 le32_to_cpu(jh->j_first_unflushed_offset) < SB_ONDISK_JOURNAL_SIZE(p_s_sb) && 2304 if (le32_to_cpu(jh->j_first_unflushed_offset) >= 0 &&
2107 le32_to_cpu(jh->j_last_flush_trans_id) > 0) { 2305 le32_to_cpu(jh->j_first_unflushed_offset) <
2108 oldest_start = SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) + 2306 SB_ONDISK_JOURNAL_SIZE(p_s_sb)
2109 le32_to_cpu(jh->j_first_unflushed_offset) ; 2307 && le32_to_cpu(jh->j_last_flush_trans_id) > 0) {
2110 oldest_trans_id = le32_to_cpu(jh->j_last_flush_trans_id) + 1; 2308 oldest_start =
2111 newest_mount_id = le32_to_cpu(jh->j_mount_id); 2309 SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) +
2112 reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, "journal-1153: found in " 2310 le32_to_cpu(jh->j_first_unflushed_offset);
2113 "header: first_unflushed_offset %d, last_flushed_trans_id " 2311 oldest_trans_id = le32_to_cpu(jh->j_last_flush_trans_id) + 1;
2114 "%lu", le32_to_cpu(jh->j_first_unflushed_offset), 2312 newest_mount_id = le32_to_cpu(jh->j_mount_id);
2115 le32_to_cpu(jh->j_last_flush_trans_id)) ; 2313 reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE,
2116 valid_journal_header = 1 ; 2314 "journal-1153: found in "
2117 2315 "header: first_unflushed_offset %d, last_flushed_trans_id "
2118 /* now, we try to read the first unflushed offset. If it is not valid, 2316 "%lu", le32_to_cpu(jh->j_first_unflushed_offset),
2119 ** there is nothing more we can do, and it makes no sense to read 2317 le32_to_cpu(jh->j_last_flush_trans_id));
2120 ** through the whole log. 2318 valid_journal_header = 1;
2121 */ 2319
2122 d_bh = journal_bread(p_s_sb, SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) + le32_to_cpu(jh->j_first_unflushed_offset)) ; 2320 /* now, we try to read the first unflushed offset. If it is not valid,
2123 ret = journal_transaction_is_valid(p_s_sb, d_bh, NULL, NULL) ; 2321 ** there is nothing more we can do, and it makes no sense to read
2124 if (!ret) { 2322 ** through the whole log.
2125 continue_replay = 0 ; 2323 */
2126 } 2324 d_bh =
2127 brelse(d_bh) ; 2325 journal_bread(p_s_sb,
2128 goto start_log_replay; 2326 SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) +
2129 } 2327 le32_to_cpu(jh->j_first_unflushed_offset));
2130 2328 ret = journal_transaction_is_valid(p_s_sb, d_bh, NULL, NULL);
2131 if (continue_replay && bdev_read_only(p_s_sb->s_bdev)) { 2329 if (!ret) {
2132 reiserfs_warning (p_s_sb, 2330 continue_replay = 0;
2133 "clm-2076: device is readonly, unable to replay log") ; 2331 }
2134 return -1 ; 2332 brelse(d_bh);
2135 } 2333 goto start_log_replay;
2136 2334 }
2137 /* ok, there are transactions that need to be replayed. start with the first log block, find 2335
2138 ** all the valid transactions, and pick out the oldest. 2336 if (continue_replay && bdev_read_only(p_s_sb->s_bdev)) {
2139 */ 2337 reiserfs_warning(p_s_sb,
2140 while(continue_replay && cur_dblock < (SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) + SB_ONDISK_JOURNAL_SIZE(p_s_sb))) { 2338 "clm-2076: device is readonly, unable to replay log");
2141 /* Note that it is required for blocksize of primary fs device and journal 2339 return -1;
2142 device to be the same */ 2340 }
2143 d_bh = reiserfs_breada(journal->j_dev_bd, cur_dblock, p_s_sb->s_blocksize, 2341
2144 SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) + SB_ONDISK_JOURNAL_SIZE(p_s_sb)) ; 2342 /* ok, there are transactions that need to be replayed. start with the first log block, find
2145 ret = journal_transaction_is_valid(p_s_sb, d_bh, &oldest_invalid_trans_id, &newest_mount_id) ; 2343 ** all the valid transactions, and pick out the oldest.
2146 if (ret == 1) { 2344 */
2147 desc = (struct reiserfs_journal_desc *)d_bh->b_data ; 2345 while (continue_replay
2148 if (oldest_start == 0) { /* init all oldest_ values */ 2346 && cur_dblock <
2149 oldest_trans_id = get_desc_trans_id(desc) ; 2347 (SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) +
2150 oldest_start = d_bh->b_blocknr ; 2348 SB_ONDISK_JOURNAL_SIZE(p_s_sb))) {
2151 newest_mount_id = get_desc_mount_id(desc) ; 2349 /* Note that it is required for blocksize of primary fs device and journal
2152 reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, "journal-1179: Setting " 2350 device to be the same */
2153 "oldest_start to offset %llu, trans_id %lu", 2351 d_bh =
2154 oldest_start - SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb), 2352 reiserfs_breada(journal->j_dev_bd, cur_dblock,
2155 oldest_trans_id) ; 2353 p_s_sb->s_blocksize,
2156 } else if (oldest_trans_id > get_desc_trans_id(desc)) { 2354 SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) +
2157 /* one we just read was older */ 2355 SB_ONDISK_JOURNAL_SIZE(p_s_sb));
2158 oldest_trans_id = get_desc_trans_id(desc) ; 2356 ret =
2159 oldest_start = d_bh->b_blocknr ; 2357 journal_transaction_is_valid(p_s_sb, d_bh,
2160 reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, "journal-1180: Resetting " 2358 &oldest_invalid_trans_id,
2161 "oldest_start to offset %lu, trans_id %lu", 2359 &newest_mount_id);
2162 oldest_start - SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb), 2360 if (ret == 1) {
2163 oldest_trans_id) ; 2361 desc = (struct reiserfs_journal_desc *)d_bh->b_data;
2164 } 2362 if (oldest_start == 0) { /* init all oldest_ values */
2165 if (newest_mount_id < get_desc_mount_id(desc)) { 2363 oldest_trans_id = get_desc_trans_id(desc);
2166 newest_mount_id = get_desc_mount_id(desc) ; 2364 oldest_start = d_bh->b_blocknr;
2365 newest_mount_id = get_desc_mount_id(desc);
2366 reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE,
2367 "journal-1179: Setting "
2368 "oldest_start to offset %llu, trans_id %lu",
2369 oldest_start -
2370 SB_ONDISK_JOURNAL_1st_BLOCK
2371 (p_s_sb), oldest_trans_id);
2372 } else if (oldest_trans_id > get_desc_trans_id(desc)) {
2373 /* one we just read was older */
2374 oldest_trans_id = get_desc_trans_id(desc);
2375 oldest_start = d_bh->b_blocknr;
2376 reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE,
2377 "journal-1180: Resetting "
2378 "oldest_start to offset %lu, trans_id %lu",
2379 oldest_start -
2380 SB_ONDISK_JOURNAL_1st_BLOCK
2381 (p_s_sb), oldest_trans_id);
2382 }
2383 if (newest_mount_id < get_desc_mount_id(desc)) {
2384 newest_mount_id = get_desc_mount_id(desc);
2385 reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE,
2386 "journal-1299: Setting "
2387 "newest_mount_id to %d",
2388 get_desc_mount_id(desc));
2389 }
2390 cur_dblock += get_desc_trans_len(desc) + 2;
2391 } else {
2392 cur_dblock++;
2393 }
2394 brelse(d_bh);
2395 }
2396
2397 start_log_replay:
2398 cur_dblock = oldest_start;
2399 if (oldest_trans_id) {
2400 reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE,
2401 "journal-1206: Starting replay "
2402 "from offset %llu, trans_id %lu",
2403 cur_dblock - SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb),
2404 oldest_trans_id);
2405
2406 }
2407 replay_count = 0;
2408 while (continue_replay && oldest_trans_id > 0) {
2409 ret =
2410 journal_read_transaction(p_s_sb, cur_dblock, oldest_start,
2411 oldest_trans_id, newest_mount_id);
2412 if (ret < 0) {
2413 return ret;
2414 } else if (ret != 0) {
2415 break;
2416 }
2417 cur_dblock =
2418 SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) + journal->j_start;
2419 replay_count++;
2420 if (cur_dblock == oldest_start)
2421 break;
2422 }
2423
2424 if (oldest_trans_id == 0) {
2425 reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE,
2426 "journal-1225: No valid " "transactions found");
2427 }
2428 /* j_start does not get set correctly if we don't replay any transactions.
2429 ** if we had a valid journal_header, set j_start to the first unflushed transaction value,
2430 ** copy the trans_id from the header
2431 */
2432 if (valid_journal_header && replay_count == 0) {
2433 journal->j_start = le32_to_cpu(jh->j_first_unflushed_offset);
2434 journal->j_trans_id =
2435 le32_to_cpu(jh->j_last_flush_trans_id) + 1;
2436 journal->j_last_flush_trans_id =
2437 le32_to_cpu(jh->j_last_flush_trans_id);
2438 journal->j_mount_id = le32_to_cpu(jh->j_mount_id) + 1;
2439 } else {
2440 journal->j_mount_id = newest_mount_id + 1;
2441 }
2167 reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, "journal-1299: Setting " 2442 reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, "journal-1299: Setting "
2168 "newest_mount_id to %d", get_desc_mount_id(desc)); 2443 "newest_mount_id to %lu", journal->j_mount_id);
2169 } 2444 journal->j_first_unflushed_offset = journal->j_start;
2170 cur_dblock += get_desc_trans_len(desc) + 2 ; 2445 if (replay_count > 0) {
2171 } else { 2446 reiserfs_info(p_s_sb,
2172 cur_dblock++ ; 2447 "replayed %d transactions in %lu seconds\n",
2173 } 2448 replay_count, get_seconds() - start);
2174 brelse(d_bh) ; 2449 }
2175 } 2450 if (!bdev_read_only(p_s_sb->s_bdev) &&
2176 2451 _update_journal_header_block(p_s_sb, journal->j_start,
2177start_log_replay: 2452 journal->j_last_flush_trans_id)) {
2178 cur_dblock = oldest_start ; 2453 /* replay failed, caller must call free_journal_ram and abort
2179 if (oldest_trans_id) { 2454 ** the mount
2180 reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, "journal-1206: Starting replay " 2455 */
2181 "from offset %llu, trans_id %lu", 2456 return -1;
2182 cur_dblock - SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb), 2457 }
2183 oldest_trans_id) ; 2458 return 0;
2184
2185 }
2186 replay_count = 0 ;
2187 while(continue_replay && oldest_trans_id > 0) {
2188 ret = journal_read_transaction(p_s_sb, cur_dblock, oldest_start, oldest_trans_id, newest_mount_id) ;
2189 if (ret < 0) {
2190 return ret ;
2191 } else if (ret != 0) {
2192 break ;
2193 }
2194 cur_dblock = SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) + journal->j_start ;
2195 replay_count++ ;
2196 if (cur_dblock == oldest_start)
2197 break;
2198 }
2199
2200 if (oldest_trans_id == 0) {
2201 reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, "journal-1225: No valid "
2202 "transactions found") ;
2203 }
2204 /* j_start does not get set correctly if we don't replay any transactions.
2205 ** if we had a valid journal_header, set j_start to the first unflushed transaction value,
2206 ** copy the trans_id from the header
2207 */
2208 if (valid_journal_header && replay_count == 0) {
2209 journal->j_start = le32_to_cpu(jh->j_first_unflushed_offset) ;
2210 journal->j_trans_id = le32_to_cpu(jh->j_last_flush_trans_id) + 1;
2211 journal->j_last_flush_trans_id = le32_to_cpu(jh->j_last_flush_trans_id) ;
2212 journal->j_mount_id = le32_to_cpu(jh->j_mount_id) + 1;
2213 } else {
2214 journal->j_mount_id = newest_mount_id + 1 ;
2215 }
2216 reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, "journal-1299: Setting "
2217 "newest_mount_id to %lu", journal->j_mount_id) ;
2218 journal->j_first_unflushed_offset = journal->j_start ;
2219 if (replay_count > 0) {
2220 reiserfs_info (p_s_sb, "replayed %d transactions in %lu seconds\n",
2221 replay_count, get_seconds() - start) ;
2222 }
2223 if (!bdev_read_only(p_s_sb->s_bdev) &&
2224 _update_journal_header_block(p_s_sb, journal->j_start,
2225 journal->j_last_flush_trans_id))
2226 {
2227 /* replay failed, caller must call free_journal_ram and abort
2228 ** the mount
2229 */
2230 return -1 ;
2231 }
2232 return 0 ;
2233} 2459}
2234 2460
2235static struct reiserfs_journal_list *alloc_journal_list(struct super_block *s) 2461static struct reiserfs_journal_list *alloc_journal_list(struct super_block *s)
2236{ 2462{
2237 struct reiserfs_journal_list *jl; 2463 struct reiserfs_journal_list *jl;
2238retry: 2464 retry:
2239 jl = reiserfs_kmalloc(sizeof(struct reiserfs_journal_list), GFP_NOFS, s); 2465 jl = reiserfs_kmalloc(sizeof(struct reiserfs_journal_list), GFP_NOFS,
2240 if (!jl) { 2466 s);
2241 yield(); 2467 if (!jl) {
2242 goto retry; 2468 yield();
2243 } 2469 goto retry;
2244 memset(jl, 0, sizeof(*jl)); 2470 }
2245 INIT_LIST_HEAD(&jl->j_list); 2471 memset(jl, 0, sizeof(*jl));
2246 INIT_LIST_HEAD(&jl->j_working_list); 2472 INIT_LIST_HEAD(&jl->j_list);
2247 INIT_LIST_HEAD(&jl->j_tail_bh_list); 2473 INIT_LIST_HEAD(&jl->j_working_list);
2248 INIT_LIST_HEAD(&jl->j_bh_list); 2474 INIT_LIST_HEAD(&jl->j_tail_bh_list);
2249 sema_init(&jl->j_commit_lock, 1); 2475 INIT_LIST_HEAD(&jl->j_bh_list);
2250 SB_JOURNAL(s)->j_num_lists++; 2476 sema_init(&jl->j_commit_lock, 1);
2251 get_journal_list(jl); 2477 SB_JOURNAL(s)->j_num_lists++;
2252 return jl; 2478 get_journal_list(jl);
2253} 2479 return jl;
2254 2480}
2255static void journal_list_init(struct super_block *p_s_sb) { 2481
2256 SB_JOURNAL(p_s_sb)->j_current_jl = alloc_journal_list(p_s_sb); 2482static void journal_list_init(struct super_block *p_s_sb)
2257} 2483{
2258 2484 SB_JOURNAL(p_s_sb)->j_current_jl = alloc_journal_list(p_s_sb);
2259static int release_journal_dev( struct super_block *super, 2485}
2260 struct reiserfs_journal *journal ) 2486
2261{ 2487static int release_journal_dev(struct super_block *super,
2262 int result; 2488 struct reiserfs_journal *journal)
2263 2489{
2264 result = 0; 2490 int result;
2265 2491
2266 if( journal -> j_dev_file != NULL ) { 2492 result = 0;
2267 result = filp_close( journal -> j_dev_file, NULL ); 2493
2268 journal -> j_dev_file = NULL; 2494 if (journal->j_dev_file != NULL) {
2269 journal -> j_dev_bd = NULL; 2495 result = filp_close(journal->j_dev_file, NULL);
2270 } else if( journal -> j_dev_bd != NULL ) { 2496 journal->j_dev_file = NULL;
2271 result = blkdev_put( journal -> j_dev_bd ); 2497 journal->j_dev_bd = NULL;
2272 journal -> j_dev_bd = NULL; 2498 } else if (journal->j_dev_bd != NULL) {
2273 } 2499 result = blkdev_put(journal->j_dev_bd);
2274 2500 journal->j_dev_bd = NULL;
2275 if( result != 0 ) { 2501 }
2276 reiserfs_warning(super, "sh-457: release_journal_dev: Cannot release journal device: %i", result ); 2502
2277 } 2503 if (result != 0) {
2278 return result; 2504 reiserfs_warning(super,
2279} 2505 "sh-457: release_journal_dev: Cannot release journal device: %i",
2280 2506 result);
2281static int journal_init_dev( struct super_block *super, 2507 }
2282 struct reiserfs_journal *journal, 2508 return result;
2283 const char *jdev_name ) 2509}
2510
2511static int journal_init_dev(struct super_block *super,
2512 struct reiserfs_journal *journal,
2513 const char *jdev_name)
2284{ 2514{
2285 int result; 2515 int result;
2286 dev_t jdev; 2516 dev_t jdev;
@@ -2289,50 +2519,51 @@ static int journal_init_dev( struct super_block *super,
2289 2519
2290 result = 0; 2520 result = 0;
2291 2521
2292 journal -> j_dev_bd = NULL; 2522 journal->j_dev_bd = NULL;
2293 journal -> j_dev_file = NULL; 2523 journal->j_dev_file = NULL;
2294 jdev = SB_ONDISK_JOURNAL_DEVICE( super ) ? 2524 jdev = SB_ONDISK_JOURNAL_DEVICE(super) ?
2295 new_decode_dev(SB_ONDISK_JOURNAL_DEVICE(super)) : super->s_dev; 2525 new_decode_dev(SB_ONDISK_JOURNAL_DEVICE(super)) : super->s_dev;
2296 2526
2297 if (bdev_read_only(super->s_bdev)) 2527 if (bdev_read_only(super->s_bdev))
2298 blkdev_mode = FMODE_READ; 2528 blkdev_mode = FMODE_READ;
2299 2529
2300 /* there is no "jdev" option and journal is on separate device */ 2530 /* there is no "jdev" option and journal is on separate device */
2301 if( ( !jdev_name || !jdev_name[ 0 ] ) ) { 2531 if ((!jdev_name || !jdev_name[0])) {
2302 journal->j_dev_bd = open_by_devnum(jdev, blkdev_mode); 2532 journal->j_dev_bd = open_by_devnum(jdev, blkdev_mode);
2303 if (IS_ERR(journal->j_dev_bd)) { 2533 if (IS_ERR(journal->j_dev_bd)) {
2304 result = PTR_ERR(journal->j_dev_bd); 2534 result = PTR_ERR(journal->j_dev_bd);
2305 journal->j_dev_bd = NULL; 2535 journal->j_dev_bd = NULL;
2306 reiserfs_warning (super, "sh-458: journal_init_dev: " 2536 reiserfs_warning(super, "sh-458: journal_init_dev: "
2307 "cannot init journal device '%s': %i", 2537 "cannot init journal device '%s': %i",
2308 __bdevname(jdev, b), result ); 2538 __bdevname(jdev, b), result);
2309 return result; 2539 return result;
2310 } else if (jdev != super->s_dev) 2540 } else if (jdev != super->s_dev)
2311 set_blocksize(journal->j_dev_bd, super->s_blocksize); 2541 set_blocksize(journal->j_dev_bd, super->s_blocksize);
2312 return 0; 2542 return 0;
2313 } 2543 }
2314 2544
2315 journal -> j_dev_file = filp_open( jdev_name, 0, 0 ); 2545 journal->j_dev_file = filp_open(jdev_name, 0, 0);
2316 if( !IS_ERR( journal -> j_dev_file ) ) { 2546 if (!IS_ERR(journal->j_dev_file)) {
2317 struct inode *jdev_inode = journal->j_dev_file->f_mapping->host; 2547 struct inode *jdev_inode = journal->j_dev_file->f_mapping->host;
2318 if( !S_ISBLK( jdev_inode -> i_mode ) ) { 2548 if (!S_ISBLK(jdev_inode->i_mode)) {
2319 reiserfs_warning(super, "journal_init_dev: '%s' is " 2549 reiserfs_warning(super, "journal_init_dev: '%s' is "
2320 "not a block device", jdev_name ); 2550 "not a block device", jdev_name);
2321 result = -ENOTBLK; 2551 result = -ENOTBLK;
2322 release_journal_dev( super, journal ); 2552 release_journal_dev(super, journal);
2323 } else { 2553 } else {
2324 /* ok */ 2554 /* ok */
2325 journal->j_dev_bd = I_BDEV(jdev_inode); 2555 journal->j_dev_bd = I_BDEV(jdev_inode);
2326 set_blocksize(journal->j_dev_bd, super->s_blocksize); 2556 set_blocksize(journal->j_dev_bd, super->s_blocksize);
2327 reiserfs_info(super, "journal_init_dev: journal device: %s\n", 2557 reiserfs_info(super,
2558 "journal_init_dev: journal device: %s\n",
2328 bdevname(journal->j_dev_bd, b)); 2559 bdevname(journal->j_dev_bd, b));
2329 } 2560 }
2330 } else { 2561 } else {
2331 result = PTR_ERR( journal -> j_dev_file ); 2562 result = PTR_ERR(journal->j_dev_file);
2332 journal -> j_dev_file = NULL; 2563 journal->j_dev_file = NULL;
2333 reiserfs_warning (super, 2564 reiserfs_warning(super,
2334 "journal_init_dev: Cannot open '%s': %i", 2565 "journal_init_dev: Cannot open '%s': %i",
2335 jdev_name, result ); 2566 jdev_name, result);
2336 } 2567 }
2337 return result; 2568 return result;
2338} 2569}
@@ -2340,193 +2571,214 @@ static int journal_init_dev( struct super_block *super,
2340/* 2571/*
2341** must be called once on fs mount. calls journal_read for you 2572** must be called once on fs mount. calls journal_read for you
2342*/ 2573*/
2343int journal_init(struct super_block *p_s_sb, const char * j_dev_name, int old_format, unsigned int commit_max_age) { 2574int journal_init(struct super_block *p_s_sb, const char *j_dev_name,
2344 int num_cnodes = SB_ONDISK_JOURNAL_SIZE(p_s_sb) * 2 ; 2575 int old_format, unsigned int commit_max_age)
2345 struct buffer_head *bhjh; 2576{
2346 struct reiserfs_super_block * rs; 2577 int num_cnodes = SB_ONDISK_JOURNAL_SIZE(p_s_sb) * 2;
2347 struct reiserfs_journal_header *jh; 2578 struct buffer_head *bhjh;
2348 struct reiserfs_journal *journal; 2579 struct reiserfs_super_block *rs;
2349 struct reiserfs_journal_list *jl; 2580 struct reiserfs_journal_header *jh;
2350 char b[BDEVNAME_SIZE]; 2581 struct reiserfs_journal *journal;
2351 2582 struct reiserfs_journal_list *jl;
2352 journal = SB_JOURNAL(p_s_sb) = vmalloc(sizeof (struct reiserfs_journal)) ; 2583 char b[BDEVNAME_SIZE];
2353 if (!journal) { 2584
2354 reiserfs_warning (p_s_sb, "journal-1256: unable to get memory for journal structure") ; 2585 journal = SB_JOURNAL(p_s_sb) = vmalloc(sizeof(struct reiserfs_journal));
2355 return 1 ; 2586 if (!journal) {
2356 } 2587 reiserfs_warning(p_s_sb,
2357 memset(journal, 0, sizeof(struct reiserfs_journal)) ; 2588 "journal-1256: unable to get memory for journal structure");
2358 INIT_LIST_HEAD(&journal->j_bitmap_nodes) ; 2589 return 1;
2359 INIT_LIST_HEAD (&journal->j_prealloc_list); 2590 }
2360 INIT_LIST_HEAD(&journal->j_working_list); 2591 memset(journal, 0, sizeof(struct reiserfs_journal));
2361 INIT_LIST_HEAD(&journal->j_journal_list); 2592 INIT_LIST_HEAD(&journal->j_bitmap_nodes);
2362 journal->j_persistent_trans = 0; 2593 INIT_LIST_HEAD(&journal->j_prealloc_list);
2363 if (reiserfs_allocate_list_bitmaps(p_s_sb, 2594 INIT_LIST_HEAD(&journal->j_working_list);
2364 journal->j_list_bitmap, 2595 INIT_LIST_HEAD(&journal->j_journal_list);
2365 SB_BMAP_NR(p_s_sb))) 2596 journal->j_persistent_trans = 0;
2366 goto free_and_return ; 2597 if (reiserfs_allocate_list_bitmaps(p_s_sb,
2367 allocate_bitmap_nodes(p_s_sb) ; 2598 journal->j_list_bitmap,
2368 2599 SB_BMAP_NR(p_s_sb)))
2369 /* reserved for journal area support */ 2600 goto free_and_return;
2370 SB_JOURNAL_1st_RESERVED_BLOCK(p_s_sb) = (old_format ? 2601 allocate_bitmap_nodes(p_s_sb);
2371 REISERFS_OLD_DISK_OFFSET_IN_BYTES / p_s_sb->s_blocksize + 2602
2372 SB_BMAP_NR(p_s_sb) + 1 : 2603 /* reserved for journal area support */
2373 REISERFS_DISK_OFFSET_IN_BYTES / p_s_sb->s_blocksize + 2); 2604 SB_JOURNAL_1st_RESERVED_BLOCK(p_s_sb) = (old_format ?
2374 2605 REISERFS_OLD_DISK_OFFSET_IN_BYTES
2375 /* Sanity check to see is the standard journal fitting withing first bitmap 2606 / p_s_sb->s_blocksize +
2376 (actual for small blocksizes) */ 2607 SB_BMAP_NR(p_s_sb) +
2377 if ( !SB_ONDISK_JOURNAL_DEVICE( p_s_sb ) && 2608 1 :
2378 (SB_JOURNAL_1st_RESERVED_BLOCK(p_s_sb) + SB_ONDISK_JOURNAL_SIZE(p_s_sb) > p_s_sb->s_blocksize * 8) ) { 2609 REISERFS_DISK_OFFSET_IN_BYTES /
2379 reiserfs_warning (p_s_sb, "journal-1393: journal does not fit for area " 2610 p_s_sb->s_blocksize + 2);
2380 "addressed by first of bitmap blocks. It starts at " 2611
2381 "%u and its size is %u. Block size %ld", 2612 /* Sanity check to see is the standard journal fitting withing first bitmap
2382 SB_JOURNAL_1st_RESERVED_BLOCK(p_s_sb), 2613 (actual for small blocksizes) */
2383 SB_ONDISK_JOURNAL_SIZE(p_s_sb), p_s_sb->s_blocksize); 2614 if (!SB_ONDISK_JOURNAL_DEVICE(p_s_sb) &&
2384 goto free_and_return; 2615 (SB_JOURNAL_1st_RESERVED_BLOCK(p_s_sb) +
2385 } 2616 SB_ONDISK_JOURNAL_SIZE(p_s_sb) > p_s_sb->s_blocksize * 8)) {
2386 2617 reiserfs_warning(p_s_sb,
2387 if( journal_init_dev( p_s_sb, journal, j_dev_name ) != 0 ) { 2618 "journal-1393: journal does not fit for area "
2388 reiserfs_warning (p_s_sb, "sh-462: unable to initialize jornal device"); 2619 "addressed by first of bitmap blocks. It starts at "
2389 goto free_and_return; 2620 "%u and its size is %u. Block size %ld",
2390 } 2621 SB_JOURNAL_1st_RESERVED_BLOCK(p_s_sb),
2391 2622 SB_ONDISK_JOURNAL_SIZE(p_s_sb),
2392 rs = SB_DISK_SUPER_BLOCK(p_s_sb); 2623 p_s_sb->s_blocksize);
2393 2624 goto free_and_return;
2394 /* read journal header */ 2625 }
2395 bhjh = journal_bread(p_s_sb, 2626
2396 SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) + SB_ONDISK_JOURNAL_SIZE(p_s_sb)); 2627 if (journal_init_dev(p_s_sb, journal, j_dev_name) != 0) {
2397 if (!bhjh) { 2628 reiserfs_warning(p_s_sb,
2398 reiserfs_warning (p_s_sb, "sh-459: unable to read journal header"); 2629 "sh-462: unable to initialize jornal device");
2399 goto free_and_return; 2630 goto free_and_return;
2400 } 2631 }
2401 jh = (struct reiserfs_journal_header *)(bhjh->b_data); 2632
2402 2633 rs = SB_DISK_SUPER_BLOCK(p_s_sb);
2403 /* make sure that journal matches to the super block */ 2634
2404 if (is_reiserfs_jr(rs) && (le32_to_cpu(jh->jh_journal.jp_journal_magic) != sb_jp_journal_magic(rs))) { 2635 /* read journal header */
2405 reiserfs_warning (p_s_sb, "sh-460: journal header magic %x " 2636 bhjh = journal_bread(p_s_sb,
2406 "(device %s) does not match to magic found in super " 2637 SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) +
2407 "block %x", 2638 SB_ONDISK_JOURNAL_SIZE(p_s_sb));
2408 jh->jh_journal.jp_journal_magic, 2639 if (!bhjh) {
2409 bdevname( journal->j_dev_bd, b), 2640 reiserfs_warning(p_s_sb,
2410 sb_jp_journal_magic(rs)); 2641 "sh-459: unable to read journal header");
2411 brelse (bhjh); 2642 goto free_and_return;
2412 goto free_and_return; 2643 }
2413 } 2644 jh = (struct reiserfs_journal_header *)(bhjh->b_data);
2414 2645
2415 journal->j_trans_max = le32_to_cpu (jh->jh_journal.jp_journal_trans_max); 2646 /* make sure that journal matches to the super block */
2416 journal->j_max_batch = le32_to_cpu (jh->jh_journal.jp_journal_max_batch); 2647 if (is_reiserfs_jr(rs)
2417 journal->j_max_commit_age = le32_to_cpu (jh->jh_journal.jp_journal_max_commit_age); 2648 && (le32_to_cpu(jh->jh_journal.jp_journal_magic) !=
2418 journal->j_max_trans_age = JOURNAL_MAX_TRANS_AGE; 2649 sb_jp_journal_magic(rs))) {
2419 2650 reiserfs_warning(p_s_sb,
2420 if (journal->j_trans_max) { 2651 "sh-460: journal header magic %x "
2421 /* make sure these parameters are available, assign it if they are not */ 2652 "(device %s) does not match to magic found in super "
2422 __u32 initial = journal->j_trans_max; 2653 "block %x", jh->jh_journal.jp_journal_magic,
2423 __u32 ratio = 1; 2654 bdevname(journal->j_dev_bd, b),
2424 2655 sb_jp_journal_magic(rs));
2425 if (p_s_sb->s_blocksize < 4096) 2656 brelse(bhjh);
2426 ratio = 4096 / p_s_sb->s_blocksize; 2657 goto free_and_return;
2427 2658 }
2428 if (SB_ONDISK_JOURNAL_SIZE(p_s_sb)/journal->j_trans_max < JOURNAL_MIN_RATIO) 2659
2429 journal->j_trans_max = SB_ONDISK_JOURNAL_SIZE(p_s_sb) / JOURNAL_MIN_RATIO; 2660 journal->j_trans_max = le32_to_cpu(jh->jh_journal.jp_journal_trans_max);
2430 if (journal->j_trans_max > JOURNAL_TRANS_MAX_DEFAULT / ratio) 2661 journal->j_max_batch = le32_to_cpu(jh->jh_journal.jp_journal_max_batch);
2431 journal->j_trans_max = JOURNAL_TRANS_MAX_DEFAULT / ratio; 2662 journal->j_max_commit_age =
2432 if (journal->j_trans_max < JOURNAL_TRANS_MIN_DEFAULT / ratio) 2663 le32_to_cpu(jh->jh_journal.jp_journal_max_commit_age);
2433 journal->j_trans_max = JOURNAL_TRANS_MIN_DEFAULT / ratio; 2664 journal->j_max_trans_age = JOURNAL_MAX_TRANS_AGE;
2434 2665
2435 if (journal->j_trans_max != initial) 2666 if (journal->j_trans_max) {
2436 reiserfs_warning (p_s_sb, "sh-461: journal_init: wrong transaction max size (%u). Changed to %u", 2667 /* make sure these parameters are available, assign it if they are not */
2437 initial, journal->j_trans_max); 2668 __u32 initial = journal->j_trans_max;
2438 2669 __u32 ratio = 1;
2439 journal->j_max_batch = journal->j_trans_max* 2670
2440 JOURNAL_MAX_BATCH_DEFAULT/JOURNAL_TRANS_MAX_DEFAULT; 2671 if (p_s_sb->s_blocksize < 4096)
2441 } 2672 ratio = 4096 / p_s_sb->s_blocksize;
2442 2673
2443 if (!journal->j_trans_max) { 2674 if (SB_ONDISK_JOURNAL_SIZE(p_s_sb) / journal->j_trans_max <
2444 /*we have the file system was created by old version of mkreiserfs 2675 JOURNAL_MIN_RATIO)
2445 so this field contains zero value */ 2676 journal->j_trans_max =
2446 journal->j_trans_max = JOURNAL_TRANS_MAX_DEFAULT ; 2677 SB_ONDISK_JOURNAL_SIZE(p_s_sb) / JOURNAL_MIN_RATIO;
2447 journal->j_max_batch = JOURNAL_MAX_BATCH_DEFAULT ; 2678 if (journal->j_trans_max > JOURNAL_TRANS_MAX_DEFAULT / ratio)
2448 journal->j_max_commit_age = JOURNAL_MAX_COMMIT_AGE ; 2679 journal->j_trans_max =
2449 2680 JOURNAL_TRANS_MAX_DEFAULT / ratio;
2450 /* for blocksize >= 4096 - max transaction size is 1024. For block size < 4096 2681 if (journal->j_trans_max < JOURNAL_TRANS_MIN_DEFAULT / ratio)
2451 trans max size is decreased proportionally */ 2682 journal->j_trans_max =
2452 if (p_s_sb->s_blocksize < 4096) { 2683 JOURNAL_TRANS_MIN_DEFAULT / ratio;
2453 journal->j_trans_max /= (4096 / p_s_sb->s_blocksize) ; 2684
2454 journal->j_max_batch = (journal->j_trans_max) * 9 / 10 ; 2685 if (journal->j_trans_max != initial)
2455 } 2686 reiserfs_warning(p_s_sb,
2456 } 2687 "sh-461: journal_init: wrong transaction max size (%u). Changed to %u",
2457 2688 initial, journal->j_trans_max);
2458 journal->j_default_max_commit_age = journal->j_max_commit_age; 2689
2459 2690 journal->j_max_batch = journal->j_trans_max *
2460 if (commit_max_age != 0) { 2691 JOURNAL_MAX_BATCH_DEFAULT / JOURNAL_TRANS_MAX_DEFAULT;
2461 journal->j_max_commit_age = commit_max_age; 2692 }
2462 journal->j_max_trans_age = commit_max_age; 2693
2463 } 2694 if (!journal->j_trans_max) {
2464 2695 /*we have the file system was created by old version of mkreiserfs
2465 reiserfs_info (p_s_sb, "journal params: device %s, size %u, " 2696 so this field contains zero value */
2466 "journal first block %u, max trans len %u, max batch %u, " 2697 journal->j_trans_max = JOURNAL_TRANS_MAX_DEFAULT;
2467 "max commit age %u, max trans age %u\n", 2698 journal->j_max_batch = JOURNAL_MAX_BATCH_DEFAULT;
2468 bdevname( journal->j_dev_bd, b), 2699 journal->j_max_commit_age = JOURNAL_MAX_COMMIT_AGE;
2469 SB_ONDISK_JOURNAL_SIZE(p_s_sb), 2700
2470 SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb), 2701 /* for blocksize >= 4096 - max transaction size is 1024. For block size < 4096
2471 journal->j_trans_max, 2702 trans max size is decreased proportionally */
2472 journal->j_max_batch, 2703 if (p_s_sb->s_blocksize < 4096) {
2473 journal->j_max_commit_age, 2704 journal->j_trans_max /= (4096 / p_s_sb->s_blocksize);
2474 journal->j_max_trans_age); 2705 journal->j_max_batch = (journal->j_trans_max) * 9 / 10;
2475 2706 }
2476 brelse (bhjh); 2707 }
2477 2708
2478 journal->j_list_bitmap_index = 0 ; 2709 journal->j_default_max_commit_age = journal->j_max_commit_age;
2479 journal_list_init(p_s_sb) ; 2710
2480 2711 if (commit_max_age != 0) {
2481 memset(journal->j_list_hash_table, 0, JOURNAL_HASH_SIZE * sizeof(struct reiserfs_journal_cnode *)) ; 2712 journal->j_max_commit_age = commit_max_age;
2482 2713 journal->j_max_trans_age = commit_max_age;
2483 INIT_LIST_HEAD(&journal->j_dirty_buffers) ; 2714 }
2484 spin_lock_init(&journal->j_dirty_buffers_lock) ; 2715
2485 2716 reiserfs_info(p_s_sb, "journal params: device %s, size %u, "
2486 journal->j_start = 0 ; 2717 "journal first block %u, max trans len %u, max batch %u, "
2487 journal->j_len = 0 ; 2718 "max commit age %u, max trans age %u\n",
2488 journal->j_len_alloc = 0 ; 2719 bdevname(journal->j_dev_bd, b),
2489 atomic_set(&(journal->j_wcount), 0) ; 2720 SB_ONDISK_JOURNAL_SIZE(p_s_sb),
2490 atomic_set(&(journal->j_async_throttle), 0) ; 2721 SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb),
2491 journal->j_bcount = 0 ; 2722 journal->j_trans_max,
2492 journal->j_trans_start_time = 0 ; 2723 journal->j_max_batch,
2493 journal->j_last = NULL ; 2724 journal->j_max_commit_age, journal->j_max_trans_age);
2494 journal->j_first = NULL ; 2725
2495 init_waitqueue_head(&(journal->j_join_wait)) ; 2726 brelse(bhjh);
2496 sema_init(&journal->j_lock, 1); 2727
2497 sema_init(&journal->j_flush_sem, 1); 2728 journal->j_list_bitmap_index = 0;
2498 2729 journal_list_init(p_s_sb);
2499 journal->j_trans_id = 10 ; 2730
2500 journal->j_mount_id = 10 ; 2731 memset(journal->j_list_hash_table, 0,
2501 journal->j_state = 0 ; 2732 JOURNAL_HASH_SIZE * sizeof(struct reiserfs_journal_cnode *));
2502 atomic_set(&(journal->j_jlock), 0) ; 2733
2503 journal->j_cnode_free_list = allocate_cnodes(num_cnodes) ; 2734 INIT_LIST_HEAD(&journal->j_dirty_buffers);
2504 journal->j_cnode_free_orig = journal->j_cnode_free_list ; 2735 spin_lock_init(&journal->j_dirty_buffers_lock);
2505 journal->j_cnode_free = journal->j_cnode_free_list ? num_cnodes : 0 ; 2736
2506 journal->j_cnode_used = 0 ; 2737 journal->j_start = 0;
2507 journal->j_must_wait = 0 ; 2738 journal->j_len = 0;
2508 2739 journal->j_len_alloc = 0;
2509 init_journal_hash(p_s_sb) ; 2740 atomic_set(&(journal->j_wcount), 0);
2510 jl = journal->j_current_jl; 2741 atomic_set(&(journal->j_async_throttle), 0);
2511 jl->j_list_bitmap = get_list_bitmap(p_s_sb, jl); 2742 journal->j_bcount = 0;
2512 if (!jl->j_list_bitmap) { 2743 journal->j_trans_start_time = 0;
2513 reiserfs_warning(p_s_sb, "journal-2005, get_list_bitmap failed for journal list 0") ; 2744 journal->j_last = NULL;
2514 goto free_and_return; 2745 journal->j_first = NULL;
2515 } 2746 init_waitqueue_head(&(journal->j_join_wait));
2516 if (journal_read(p_s_sb) < 0) { 2747 sema_init(&journal->j_lock, 1);
2517 reiserfs_warning(p_s_sb, "Replay Failure, unable to mount") ; 2748 sema_init(&journal->j_flush_sem, 1);
2518 goto free_and_return; 2749
2519 } 2750 journal->j_trans_id = 10;
2520 2751 journal->j_mount_id = 10;
2521 reiserfs_mounted_fs_count++ ; 2752 journal->j_state = 0;
2522 if (reiserfs_mounted_fs_count <= 1) 2753 atomic_set(&(journal->j_jlock), 0);
2523 commit_wq = create_workqueue("reiserfs"); 2754 journal->j_cnode_free_list = allocate_cnodes(num_cnodes);
2524 2755 journal->j_cnode_free_orig = journal->j_cnode_free_list;
2525 INIT_WORK(&journal->j_work, flush_async_commits, p_s_sb); 2756 journal->j_cnode_free = journal->j_cnode_free_list ? num_cnodes : 0;
2526 return 0 ; 2757 journal->j_cnode_used = 0;
2527free_and_return: 2758 journal->j_must_wait = 0;
2528 free_journal_ram(p_s_sb); 2759
2529 return 1; 2760 init_journal_hash(p_s_sb);
2761 jl = journal->j_current_jl;
2762 jl->j_list_bitmap = get_list_bitmap(p_s_sb, jl);
2763 if (!jl->j_list_bitmap) {
2764 reiserfs_warning(p_s_sb,
2765 "journal-2005, get_list_bitmap failed for journal list 0");
2766 goto free_and_return;
2767 }
2768 if (journal_read(p_s_sb) < 0) {
2769 reiserfs_warning(p_s_sb, "Replay Failure, unable to mount");
2770 goto free_and_return;
2771 }
2772
2773 reiserfs_mounted_fs_count++;
2774 if (reiserfs_mounted_fs_count <= 1)
2775 commit_wq = create_workqueue("reiserfs");
2776
2777 INIT_WORK(&journal->j_work, flush_async_commits, p_s_sb);
2778 return 0;
2779 free_and_return:
2780 free_journal_ram(p_s_sb);
2781 return 1;
2530} 2782}
2531 2783
2532/* 2784/*
@@ -2534,96 +2786,102 @@ free_and_return:
2534** be used by delete to make sure they don't write more than can fit inside a single 2786** be used by delete to make sure they don't write more than can fit inside a single
2535** transaction 2787** transaction
2536*/ 2788*/
2537int journal_transaction_should_end(struct reiserfs_transaction_handle *th, int new_alloc) { 2789int journal_transaction_should_end(struct reiserfs_transaction_handle *th,
2538 struct reiserfs_journal *journal = SB_JOURNAL (th->t_super); 2790 int new_alloc)
2539 time_t now = get_seconds() ; 2791{
2540 /* cannot restart while nested */ 2792 struct reiserfs_journal *journal = SB_JOURNAL(th->t_super);
2541 BUG_ON (!th->t_trans_id); 2793 time_t now = get_seconds();
2542 if (th->t_refcount > 1) 2794 /* cannot restart while nested */
2543 return 0 ; 2795 BUG_ON(!th->t_trans_id);
2544 if ( journal->j_must_wait > 0 || 2796 if (th->t_refcount > 1)
2545 (journal->j_len_alloc + new_alloc) >= journal->j_max_batch || 2797 return 0;
2546 atomic_read(&(journal->j_jlock)) || 2798 if (journal->j_must_wait > 0 ||
2547 (now - journal->j_trans_start_time) > journal->j_max_trans_age || 2799 (journal->j_len_alloc + new_alloc) >= journal->j_max_batch ||
2548 journal->j_cnode_free < (journal->j_trans_max * 3)) { 2800 atomic_read(&(journal->j_jlock)) ||
2549 return 1 ; 2801 (now - journal->j_trans_start_time) > journal->j_max_trans_age ||
2550 } 2802 journal->j_cnode_free < (journal->j_trans_max * 3)) {
2551 return 0 ; 2803 return 1;
2804 }
2805 return 0;
2552} 2806}
2553 2807
2554/* this must be called inside a transaction, and requires the 2808/* this must be called inside a transaction, and requires the
2555** kernel_lock to be held 2809** kernel_lock to be held
2556*/ 2810*/
2557void reiserfs_block_writes(struct reiserfs_transaction_handle *th) { 2811void reiserfs_block_writes(struct reiserfs_transaction_handle *th)
2558 struct reiserfs_journal *journal = SB_JOURNAL (th->t_super); 2812{
2559 BUG_ON (!th->t_trans_id); 2813 struct reiserfs_journal *journal = SB_JOURNAL(th->t_super);
2560 journal->j_must_wait = 1 ; 2814 BUG_ON(!th->t_trans_id);
2561 set_bit(J_WRITERS_BLOCKED, &journal->j_state) ; 2815 journal->j_must_wait = 1;
2562 return ; 2816 set_bit(J_WRITERS_BLOCKED, &journal->j_state);
2817 return;
2563} 2818}
2564 2819
2565/* this must be called without a transaction started, and does not 2820/* this must be called without a transaction started, and does not
2566** require BKL 2821** require BKL
2567*/ 2822*/
2568void reiserfs_allow_writes(struct super_block *s) { 2823void reiserfs_allow_writes(struct super_block *s)
2569 struct reiserfs_journal *journal = SB_JOURNAL (s); 2824{
2570 clear_bit(J_WRITERS_BLOCKED, &journal->j_state) ; 2825 struct reiserfs_journal *journal = SB_JOURNAL(s);
2571 wake_up(&journal->j_join_wait) ; 2826 clear_bit(J_WRITERS_BLOCKED, &journal->j_state);
2827 wake_up(&journal->j_join_wait);
2572} 2828}
2573 2829
2574/* this must be called without a transaction started, and does not 2830/* this must be called without a transaction started, and does not
2575** require BKL 2831** require BKL
2576*/ 2832*/
2577void reiserfs_wait_on_write_block(struct super_block *s) { 2833void reiserfs_wait_on_write_block(struct super_block *s)
2578 struct reiserfs_journal *journal = SB_JOURNAL (s); 2834{
2579 wait_event(journal->j_join_wait, 2835 struct reiserfs_journal *journal = SB_JOURNAL(s);
2580 !test_bit(J_WRITERS_BLOCKED, &journal->j_state)) ; 2836 wait_event(journal->j_join_wait,
2581} 2837 !test_bit(J_WRITERS_BLOCKED, &journal->j_state));
2582 2838}
2583static void queue_log_writer(struct super_block *s) { 2839
2584 wait_queue_t wait; 2840static void queue_log_writer(struct super_block *s)
2585 struct reiserfs_journal *journal = SB_JOURNAL (s); 2841{
2586 set_bit(J_WRITERS_QUEUED, &journal->j_state); 2842 wait_queue_t wait;
2587 2843 struct reiserfs_journal *journal = SB_JOURNAL(s);
2588 /* 2844 set_bit(J_WRITERS_QUEUED, &journal->j_state);
2589 * we don't want to use wait_event here because 2845
2590 * we only want to wait once. 2846 /*
2591 */ 2847 * we don't want to use wait_event here because
2592 init_waitqueue_entry(&wait, current); 2848 * we only want to wait once.
2593 add_wait_queue(&journal->j_join_wait, &wait); 2849 */
2594 set_current_state(TASK_UNINTERRUPTIBLE); 2850 init_waitqueue_entry(&wait, current);
2595 if (test_bit(J_WRITERS_QUEUED, &journal->j_state)) 2851 add_wait_queue(&journal->j_join_wait, &wait);
2596 schedule();
2597 current->state = TASK_RUNNING;
2598 remove_wait_queue(&journal->j_join_wait, &wait);
2599}
2600
2601static void wake_queued_writers(struct super_block *s) {
2602 struct reiserfs_journal *journal = SB_JOURNAL (s);
2603 if (test_and_clear_bit(J_WRITERS_QUEUED, &journal->j_state))
2604 wake_up(&journal->j_join_wait);
2605}
2606
2607static void let_transaction_grow(struct super_block *sb,
2608 unsigned long trans_id)
2609{
2610 struct reiserfs_journal *journal = SB_JOURNAL (sb);
2611 unsigned long bcount = journal->j_bcount;
2612 while(1) {
2613 set_current_state(TASK_UNINTERRUPTIBLE); 2852 set_current_state(TASK_UNINTERRUPTIBLE);
2614 schedule_timeout(1); 2853 if (test_bit(J_WRITERS_QUEUED, &journal->j_state))
2615 journal->j_current_jl->j_state |= LIST_COMMIT_PENDING; 2854 schedule();
2616 while ((atomic_read(&journal->j_wcount) > 0 || 2855 current->state = TASK_RUNNING;
2617 atomic_read(&journal->j_jlock)) && 2856 remove_wait_queue(&journal->j_join_wait, &wait);
2618 journal->j_trans_id == trans_id) { 2857}
2619 queue_log_writer(sb); 2858
2859static void wake_queued_writers(struct super_block *s)
2860{
2861 struct reiserfs_journal *journal = SB_JOURNAL(s);
2862 if (test_and_clear_bit(J_WRITERS_QUEUED, &journal->j_state))
2863 wake_up(&journal->j_join_wait);
2864}
2865
2866static void let_transaction_grow(struct super_block *sb, unsigned long trans_id)
2867{
2868 struct reiserfs_journal *journal = SB_JOURNAL(sb);
2869 unsigned long bcount = journal->j_bcount;
2870 while (1) {
2871 set_current_state(TASK_UNINTERRUPTIBLE);
2872 schedule_timeout(1);
2873 journal->j_current_jl->j_state |= LIST_COMMIT_PENDING;
2874 while ((atomic_read(&journal->j_wcount) > 0 ||
2875 atomic_read(&journal->j_jlock)) &&
2876 journal->j_trans_id == trans_id) {
2877 queue_log_writer(sb);
2878 }
2879 if (journal->j_trans_id != trans_id)
2880 break;
2881 if (bcount == journal->j_bcount)
2882 break;
2883 bcount = journal->j_bcount;
2620 } 2884 }
2621 if (journal->j_trans_id != trans_id)
2622 break;
2623 if (bcount == journal->j_bcount)
2624 break;
2625 bcount = journal->j_bcount;
2626 }
2627} 2885}
2628 2886
2629/* join == true if you must join an existing transaction. 2887/* join == true if you must join an existing transaction.
@@ -2632,224 +2890,244 @@ static void let_transaction_grow(struct super_block *sb,
2632** this will block until the transaction is joinable. send the number of blocks you 2890** this will block until the transaction is joinable. send the number of blocks you
2633** expect to use in nblocks. 2891** expect to use in nblocks.
2634*/ 2892*/
2635static int do_journal_begin_r(struct reiserfs_transaction_handle *th, struct super_block * p_s_sb,unsigned long nblocks,int join) { 2893static int do_journal_begin_r(struct reiserfs_transaction_handle *th,
2636 time_t now = get_seconds() ; 2894 struct super_block *p_s_sb, unsigned long nblocks,
2637 int old_trans_id ; 2895 int join)
2638 struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); 2896{
2639 struct reiserfs_transaction_handle myth; 2897 time_t now = get_seconds();
2640 int sched_count = 0; 2898 int old_trans_id;
2641 int retval; 2899 struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb);
2642 2900 struct reiserfs_transaction_handle myth;
2643 reiserfs_check_lock_depth(p_s_sb, "journal_begin") ; 2901 int sched_count = 0;
2644 if (nblocks > journal->j_trans_max) 2902 int retval;
2645 BUG(); 2903
2646 2904 reiserfs_check_lock_depth(p_s_sb, "journal_begin");
2647 PROC_INFO_INC( p_s_sb, journal.journal_being ); 2905 if (nblocks > journal->j_trans_max)
2648 /* set here for journal_join */ 2906 BUG();
2649 th->t_refcount = 1; 2907
2650 th->t_super = p_s_sb ; 2908 PROC_INFO_INC(p_s_sb, journal.journal_being);
2651 2909 /* set here for journal_join */
2652relock: 2910 th->t_refcount = 1;
2653 lock_journal(p_s_sb) ; 2911 th->t_super = p_s_sb;
2654 if (join != JBEGIN_ABORT && reiserfs_is_journal_aborted (journal)) { 2912
2655 unlock_journal (p_s_sb); 2913 relock:
2656 retval = journal->j_errno; 2914 lock_journal(p_s_sb);
2657 goto out_fail; 2915 if (join != JBEGIN_ABORT && reiserfs_is_journal_aborted(journal)) {
2658 } 2916 unlock_journal(p_s_sb);
2659 journal->j_bcount++; 2917 retval = journal->j_errno;
2660 2918 goto out_fail;
2661 if (test_bit(J_WRITERS_BLOCKED, &journal->j_state)) { 2919 }
2662 unlock_journal(p_s_sb) ; 2920 journal->j_bcount++;
2663 reiserfs_wait_on_write_block(p_s_sb) ; 2921
2664 PROC_INFO_INC( p_s_sb, journal.journal_relock_writers ); 2922 if (test_bit(J_WRITERS_BLOCKED, &journal->j_state)) {
2665 goto relock ; 2923 unlock_journal(p_s_sb);
2666 } 2924 reiserfs_wait_on_write_block(p_s_sb);
2667 now = get_seconds(); 2925 PROC_INFO_INC(p_s_sb, journal.journal_relock_writers);
2668 2926 goto relock;
2669 /* if there is no room in the journal OR 2927 }
2670 ** if this transaction is too old, and we weren't called joinable, wait for it to finish before beginning 2928 now = get_seconds();
2671 ** we don't sleep if there aren't other writers 2929
2672 */ 2930 /* if there is no room in the journal OR
2673 2931 ** if this transaction is too old, and we weren't called joinable, wait for it to finish before beginning
2674 if ( (!join && journal->j_must_wait > 0) || 2932 ** we don't sleep if there aren't other writers
2675 ( !join && (journal->j_len_alloc + nblocks + 2) >= journal->j_max_batch) || 2933 */
2676 (!join && atomic_read(&journal->j_wcount) > 0 && journal->j_trans_start_time > 0 && 2934
2677 (now - journal->j_trans_start_time) > journal->j_max_trans_age) || 2935 if ((!join && journal->j_must_wait > 0) ||
2678 (!join && atomic_read(&journal->j_jlock)) || 2936 (!join
2679 (!join && journal->j_cnode_free < (journal->j_trans_max * 3))) { 2937 && (journal->j_len_alloc + nblocks + 2) >= journal->j_max_batch)
2680 2938 || (!join && atomic_read(&journal->j_wcount) > 0
2681 old_trans_id = journal->j_trans_id; 2939 && journal->j_trans_start_time > 0
2682 unlock_journal(p_s_sb) ; /* allow others to finish this transaction */ 2940 && (now - journal->j_trans_start_time) >
2683 2941 journal->j_max_trans_age) || (!join
2684 if (!join && (journal->j_len_alloc + nblocks + 2) >= 2942 && atomic_read(&journal->j_jlock))
2685 journal->j_max_batch && 2943 || (!join && journal->j_cnode_free < (journal->j_trans_max * 3))) {
2686 ((journal->j_len + nblocks + 2) * 100) < (journal->j_len_alloc * 75)) 2944
2687 { 2945 old_trans_id = journal->j_trans_id;
2688 if (atomic_read(&journal->j_wcount) > 10) { 2946 unlock_journal(p_s_sb); /* allow others to finish this transaction */
2689 sched_count++; 2947
2690 queue_log_writer(p_s_sb); 2948 if (!join && (journal->j_len_alloc + nblocks + 2) >=
2691 goto relock; 2949 journal->j_max_batch &&
2692 } 2950 ((journal->j_len + nblocks + 2) * 100) <
2693 } 2951 (journal->j_len_alloc * 75)) {
2694 /* don't mess with joining the transaction if all we have to do is 2952 if (atomic_read(&journal->j_wcount) > 10) {
2695 * wait for someone else to do a commit 2953 sched_count++;
2696 */ 2954 queue_log_writer(p_s_sb);
2697 if (atomic_read(&journal->j_jlock)) { 2955 goto relock;
2698 while (journal->j_trans_id == old_trans_id && 2956 }
2699 atomic_read(&journal->j_jlock)) { 2957 }
2700 queue_log_writer(p_s_sb); 2958 /* don't mess with joining the transaction if all we have to do is
2701 } 2959 * wait for someone else to do a commit
2702 goto relock; 2960 */
2703 } 2961 if (atomic_read(&journal->j_jlock)) {
2704 retval = journal_join(&myth, p_s_sb, 1) ; 2962 while (journal->j_trans_id == old_trans_id &&
2705 if (retval) 2963 atomic_read(&journal->j_jlock)) {
2706 goto out_fail; 2964 queue_log_writer(p_s_sb);
2707 2965 }
2708 /* someone might have ended the transaction while we joined */ 2966 goto relock;
2709 if (old_trans_id != journal->j_trans_id) { 2967 }
2710 retval = do_journal_end(&myth, p_s_sb, 1, 0) ; 2968 retval = journal_join(&myth, p_s_sb, 1);
2711 } else { 2969 if (retval)
2712 retval = do_journal_end(&myth, p_s_sb, 1, COMMIT_NOW) ; 2970 goto out_fail;
2713 } 2971
2714 2972 /* someone might have ended the transaction while we joined */
2715 if (retval) 2973 if (old_trans_id != journal->j_trans_id) {
2716 goto out_fail; 2974 retval = do_journal_end(&myth, p_s_sb, 1, 0);
2717 2975 } else {
2718 PROC_INFO_INC( p_s_sb, journal.journal_relock_wcount ); 2976 retval = do_journal_end(&myth, p_s_sb, 1, COMMIT_NOW);
2719 goto relock ; 2977 }
2720 } 2978
2721 /* we are the first writer, set trans_id */ 2979 if (retval)
2722 if (journal->j_trans_start_time == 0) { 2980 goto out_fail;
2723 journal->j_trans_start_time = get_seconds(); 2981
2724 } 2982 PROC_INFO_INC(p_s_sb, journal.journal_relock_wcount);
2725 atomic_inc(&(journal->j_wcount)) ; 2983 goto relock;
2726 journal->j_len_alloc += nblocks ; 2984 }
2727 th->t_blocks_logged = 0 ; 2985 /* we are the first writer, set trans_id */
2728 th->t_blocks_allocated = nblocks ; 2986 if (journal->j_trans_start_time == 0) {
2729 th->t_trans_id = journal->j_trans_id ; 2987 journal->j_trans_start_time = get_seconds();
2730 unlock_journal(p_s_sb) ; 2988 }
2731 INIT_LIST_HEAD (&th->t_list); 2989 atomic_inc(&(journal->j_wcount));
2732 get_fs_excl(); 2990 journal->j_len_alloc += nblocks;
2733 return 0 ; 2991 th->t_blocks_logged = 0;
2734 2992 th->t_blocks_allocated = nblocks;
2735out_fail: 2993 th->t_trans_id = journal->j_trans_id;
2736 memset (th, 0, sizeof (*th)); 2994 unlock_journal(p_s_sb);
2737 /* Re-set th->t_super, so we can properly keep track of how many 2995 INIT_LIST_HEAD(&th->t_list);
2738 * persistent transactions there are. We need to do this so if this 2996 get_fs_excl();
2739 * call is part of a failed restart_transaction, we can free it later */ 2997 return 0;
2740 th->t_super = p_s_sb; 2998
2741 return retval; 2999 out_fail:
2742} 3000 memset(th, 0, sizeof(*th));
2743 3001 /* Re-set th->t_super, so we can properly keep track of how many
2744struct reiserfs_transaction_handle * 3002 * persistent transactions there are. We need to do this so if this
2745reiserfs_persistent_transaction(struct super_block *s, int nblocks) { 3003 * call is part of a failed restart_transaction, we can free it later */
2746 int ret ; 3004 th->t_super = p_s_sb;
2747 struct reiserfs_transaction_handle *th ; 3005 return retval;
2748 3006}
2749 /* if we're nesting into an existing transaction. It will be 3007
2750 ** persistent on its own 3008struct reiserfs_transaction_handle *reiserfs_persistent_transaction(struct
2751 */ 3009 super_block
2752 if (reiserfs_transaction_running(s)) { 3010 *s,
2753 th = current->journal_info ; 3011 int nblocks)
2754 th->t_refcount++ ; 3012{
2755 if (th->t_refcount < 2) { 3013 int ret;
2756 BUG() ; 3014 struct reiserfs_transaction_handle *th;
2757 } 3015
2758 return th ; 3016 /* if we're nesting into an existing transaction. It will be
2759 } 3017 ** persistent on its own
2760 th = reiserfs_kmalloc(sizeof(struct reiserfs_transaction_handle), GFP_NOFS, s) ; 3018 */
2761 if (!th) 3019 if (reiserfs_transaction_running(s)) {
2762 return NULL; 3020 th = current->journal_info;
2763 ret = journal_begin(th, s, nblocks) ; 3021 th->t_refcount++;
2764 if (ret) { 3022 if (th->t_refcount < 2) {
2765 reiserfs_kfree(th, sizeof(struct reiserfs_transaction_handle), s) ; 3023 BUG();
2766 return NULL; 3024 }
2767 } 3025 return th;
2768 3026 }
2769 SB_JOURNAL(s)->j_persistent_trans++; 3027 th = reiserfs_kmalloc(sizeof(struct reiserfs_transaction_handle),
2770 return th ; 3028 GFP_NOFS, s);
2771} 3029 if (!th)
2772 3030 return NULL;
2773int 3031 ret = journal_begin(th, s, nblocks);
2774reiserfs_end_persistent_transaction(struct reiserfs_transaction_handle *th) { 3032 if (ret) {
2775 struct super_block *s = th->t_super; 3033 reiserfs_kfree(th, sizeof(struct reiserfs_transaction_handle),
2776 int ret = 0; 3034 s);
2777 if (th->t_trans_id) 3035 return NULL;
2778 ret = journal_end(th, th->t_super, th->t_blocks_allocated); 3036 }
2779 else 3037
2780 ret = -EIO; 3038 SB_JOURNAL(s)->j_persistent_trans++;
2781 if (th->t_refcount == 0) { 3039 return th;
2782 SB_JOURNAL(s)->j_persistent_trans--; 3040}
2783 reiserfs_kfree(th, sizeof(struct reiserfs_transaction_handle), s) ; 3041
2784 } 3042int reiserfs_end_persistent_transaction(struct reiserfs_transaction_handle *th)
2785 return ret; 3043{
2786} 3044 struct super_block *s = th->t_super;
2787 3045 int ret = 0;
2788static int journal_join(struct reiserfs_transaction_handle *th, struct super_block *p_s_sb, unsigned long nblocks) { 3046 if (th->t_trans_id)
2789 struct reiserfs_transaction_handle *cur_th = current->journal_info; 3047 ret = journal_end(th, th->t_super, th->t_blocks_allocated);
2790 3048 else
2791 /* this keeps do_journal_end from NULLing out the current->journal_info 3049 ret = -EIO;
2792 ** pointer 3050 if (th->t_refcount == 0) {
2793 */ 3051 SB_JOURNAL(s)->j_persistent_trans--;
2794 th->t_handle_save = cur_th ; 3052 reiserfs_kfree(th, sizeof(struct reiserfs_transaction_handle),
2795 if (cur_th && cur_th->t_refcount > 1) { 3053 s);
2796 BUG() ; 3054 }
2797 } 3055 return ret;
2798 return do_journal_begin_r(th, p_s_sb, nblocks, JBEGIN_JOIN) ; 3056}
2799} 3057
2800 3058static int journal_join(struct reiserfs_transaction_handle *th,
2801int journal_join_abort(struct reiserfs_transaction_handle *th, struct super_block *p_s_sb, unsigned long nblocks) { 3059 struct super_block *p_s_sb, unsigned long nblocks)
2802 struct reiserfs_transaction_handle *cur_th = current->journal_info; 3060{
2803 3061 struct reiserfs_transaction_handle *cur_th = current->journal_info;
2804 /* this keeps do_journal_end from NULLing out the current->journal_info 3062
2805 ** pointer 3063 /* this keeps do_journal_end from NULLing out the current->journal_info
2806 */ 3064 ** pointer
2807 th->t_handle_save = cur_th ; 3065 */
2808 if (cur_th && cur_th->t_refcount > 1) { 3066 th->t_handle_save = cur_th;
2809 BUG() ; 3067 if (cur_th && cur_th->t_refcount > 1) {
2810 } 3068 BUG();
2811 return do_journal_begin_r(th, p_s_sb, nblocks, JBEGIN_ABORT) ; 3069 }
2812} 3070 return do_journal_begin_r(th, p_s_sb, nblocks, JBEGIN_JOIN);
2813 3071}
2814int journal_begin(struct reiserfs_transaction_handle *th, struct super_block * p_s_sb, unsigned long nblocks) { 3072
2815 struct reiserfs_transaction_handle *cur_th = current->journal_info ; 3073int journal_join_abort(struct reiserfs_transaction_handle *th,
2816 int ret ; 3074 struct super_block *p_s_sb, unsigned long nblocks)
2817 3075{
2818 th->t_handle_save = NULL ; 3076 struct reiserfs_transaction_handle *cur_th = current->journal_info;
2819 if (cur_th) { 3077
2820 /* we are nesting into the current transaction */ 3078 /* this keeps do_journal_end from NULLing out the current->journal_info
2821 if (cur_th->t_super == p_s_sb) { 3079 ** pointer
2822 BUG_ON (!cur_th->t_refcount); 3080 */
2823 cur_th->t_refcount++ ; 3081 th->t_handle_save = cur_th;
2824 memcpy(th, cur_th, sizeof(*th)); 3082 if (cur_th && cur_th->t_refcount > 1) {
2825 if (th->t_refcount <= 1) 3083 BUG();
2826 reiserfs_warning (p_s_sb, "BAD: refcount <= 1, but journal_info != 0"); 3084 }
2827 return 0; 3085 return do_journal_begin_r(th, p_s_sb, nblocks, JBEGIN_ABORT);
3086}
3087
3088int journal_begin(struct reiserfs_transaction_handle *th,
3089 struct super_block *p_s_sb, unsigned long nblocks)
3090{
3091 struct reiserfs_transaction_handle *cur_th = current->journal_info;
3092 int ret;
3093
3094 th->t_handle_save = NULL;
3095 if (cur_th) {
3096 /* we are nesting into the current transaction */
3097 if (cur_th->t_super == p_s_sb) {
3098 BUG_ON(!cur_th->t_refcount);
3099 cur_th->t_refcount++;
3100 memcpy(th, cur_th, sizeof(*th));
3101 if (th->t_refcount <= 1)
3102 reiserfs_warning(p_s_sb,
3103 "BAD: refcount <= 1, but journal_info != 0");
3104 return 0;
3105 } else {
3106 /* we've ended up with a handle from a different filesystem.
3107 ** save it and restore on journal_end. This should never
3108 ** really happen...
3109 */
3110 reiserfs_warning(p_s_sb,
3111 "clm-2100: nesting info a different FS");
3112 th->t_handle_save = current->journal_info;
3113 current->journal_info = th;
3114 }
2828 } else { 3115 } else {
2829 /* we've ended up with a handle from a different filesystem. 3116 current->journal_info = th;
2830 ** save it and restore on journal_end. This should never 3117 }
2831 ** really happen... 3118 ret = do_journal_begin_r(th, p_s_sb, nblocks, JBEGIN_REG);
2832 */ 3119 if (current->journal_info != th)
2833 reiserfs_warning(p_s_sb, "clm-2100: nesting info a different FS") ; 3120 BUG();
2834 th->t_handle_save = current->journal_info ;
2835 current->journal_info = th;
2836 }
2837 } else {
2838 current->journal_info = th;
2839 }
2840 ret = do_journal_begin_r(th, p_s_sb, nblocks, JBEGIN_REG) ;
2841 if (current->journal_info != th)
2842 BUG() ;
2843 3121
2844 /* I guess this boils down to being the reciprocal of clm-2100 above. 3122 /* I guess this boils down to being the reciprocal of clm-2100 above.
2845 * If do_journal_begin_r fails, we need to put it back, since journal_end 3123 * If do_journal_begin_r fails, we need to put it back, since journal_end
2846 * won't be called to do it. */ 3124 * won't be called to do it. */
2847 if (ret) 3125 if (ret)
2848 current->journal_info = th->t_handle_save; 3126 current->journal_info = th->t_handle_save;
2849 else 3127 else
2850 BUG_ON (!th->t_refcount); 3128 BUG_ON(!th->t_refcount);
2851 3129
2852 return ret ; 3130 return ret;
2853} 3131}
2854 3132
2855/* 3133/*
@@ -2861,129 +3139,140 @@ int journal_begin(struct reiserfs_transaction_handle *th, struct super_block *
2861** 3139**
2862** if j_len, is bigger than j_len_alloc, it pushes j_len_alloc to 10 + j_len. 3140** if j_len, is bigger than j_len_alloc, it pushes j_len_alloc to 10 + j_len.
2863*/ 3141*/
2864int journal_mark_dirty(struct reiserfs_transaction_handle *th, struct super_block *p_s_sb, struct buffer_head *bh) { 3142int journal_mark_dirty(struct reiserfs_transaction_handle *th,
2865 struct reiserfs_journal *journal = SB_JOURNAL (p_s_sb); 3143 struct super_block *p_s_sb, struct buffer_head *bh)
2866 struct reiserfs_journal_cnode *cn = NULL; 3144{
2867 int count_already_incd = 0 ; 3145 struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb);
2868 int prepared = 0 ; 3146 struct reiserfs_journal_cnode *cn = NULL;
2869 BUG_ON (!th->t_trans_id); 3147 int count_already_incd = 0;
2870 3148 int prepared = 0;
2871 PROC_INFO_INC( p_s_sb, journal.mark_dirty ); 3149 BUG_ON(!th->t_trans_id);
2872 if (th->t_trans_id != journal->j_trans_id) { 3150
2873 reiserfs_panic(th->t_super, "journal-1577: handle trans id %ld != current trans id %ld\n", 3151 PROC_INFO_INC(p_s_sb, journal.mark_dirty);
2874 th->t_trans_id, journal->j_trans_id); 3152 if (th->t_trans_id != journal->j_trans_id) {
2875 } 3153 reiserfs_panic(th->t_super,
2876 3154 "journal-1577: handle trans id %ld != current trans id %ld\n",
2877 p_s_sb->s_dirt = 1; 3155 th->t_trans_id, journal->j_trans_id);
2878 3156 }
2879 prepared = test_clear_buffer_journal_prepared (bh); 3157
2880 clear_buffer_journal_restore_dirty (bh); 3158 p_s_sb->s_dirt = 1;
2881 /* already in this transaction, we are done */ 3159
2882 if (buffer_journaled(bh)) { 3160 prepared = test_clear_buffer_journal_prepared(bh);
2883 PROC_INFO_INC( p_s_sb, journal.mark_dirty_already ); 3161 clear_buffer_journal_restore_dirty(bh);
2884 return 0 ; 3162 /* already in this transaction, we are done */
2885 } 3163 if (buffer_journaled(bh)) {
2886 3164 PROC_INFO_INC(p_s_sb, journal.mark_dirty_already);
2887 /* this must be turned into a panic instead of a warning. We can't allow 3165 return 0;
2888 ** a dirty or journal_dirty or locked buffer to be logged, as some changes 3166 }
2889 ** could get to disk too early. NOT GOOD. 3167
2890 */ 3168 /* this must be turned into a panic instead of a warning. We can't allow
2891 if (!prepared || buffer_dirty(bh)) { 3169 ** a dirty or journal_dirty or locked buffer to be logged, as some changes
2892 reiserfs_warning (p_s_sb, "journal-1777: buffer %llu bad state " 3170 ** could get to disk too early. NOT GOOD.
2893 "%cPREPARED %cLOCKED %cDIRTY %cJDIRTY_WAIT", 3171 */
2894 (unsigned long long)bh->b_blocknr, prepared ? ' ' : '!', 3172 if (!prepared || buffer_dirty(bh)) {
2895 buffer_locked(bh) ? ' ' : '!', 3173 reiserfs_warning(p_s_sb, "journal-1777: buffer %llu bad state "
2896 buffer_dirty(bh) ? ' ' : '!', 3174 "%cPREPARED %cLOCKED %cDIRTY %cJDIRTY_WAIT",
2897 buffer_journal_dirty(bh) ? ' ' : '!') ; 3175 (unsigned long long)bh->b_blocknr,
2898 } 3176 prepared ? ' ' : '!',
2899 3177 buffer_locked(bh) ? ' ' : '!',
2900 if (atomic_read(&(journal->j_wcount)) <= 0) { 3178 buffer_dirty(bh) ? ' ' : '!',
2901 reiserfs_warning (p_s_sb, "journal-1409: journal_mark_dirty returning because j_wcount was %d", atomic_read(&(journal->j_wcount))) ; 3179 buffer_journal_dirty(bh) ? ' ' : '!');
2902 return 1 ; 3180 }
2903 } 3181
2904 /* this error means I've screwed up, and we've overflowed the transaction. 3182 if (atomic_read(&(journal->j_wcount)) <= 0) {
2905 ** Nothing can be done here, except make the FS readonly or panic. 3183 reiserfs_warning(p_s_sb,
2906 */ 3184 "journal-1409: journal_mark_dirty returning because j_wcount was %d",
2907 if (journal->j_len >= journal->j_trans_max) { 3185 atomic_read(&(journal->j_wcount)));
2908 reiserfs_panic(th->t_super, "journal-1413: journal_mark_dirty: j_len (%lu) is too big\n", journal->j_len) ; 3186 return 1;
2909 } 3187 }
2910 3188 /* this error means I've screwed up, and we've overflowed the transaction.
2911 if (buffer_journal_dirty(bh)) { 3189 ** Nothing can be done here, except make the FS readonly or panic.
2912 count_already_incd = 1 ; 3190 */
2913 PROC_INFO_INC( p_s_sb, journal.mark_dirty_notjournal ); 3191 if (journal->j_len >= journal->j_trans_max) {
2914 clear_buffer_journal_dirty (bh); 3192 reiserfs_panic(th->t_super,
2915 } 3193 "journal-1413: journal_mark_dirty: j_len (%lu) is too big\n",
2916 3194 journal->j_len);
2917 if (journal->j_len > journal->j_len_alloc) { 3195 }
2918 journal->j_len_alloc = journal->j_len + JOURNAL_PER_BALANCE_CNT ; 3196
2919 } 3197 if (buffer_journal_dirty(bh)) {
2920 3198 count_already_incd = 1;
2921 set_buffer_journaled (bh); 3199 PROC_INFO_INC(p_s_sb, journal.mark_dirty_notjournal);
2922 3200 clear_buffer_journal_dirty(bh);
2923 /* now put this guy on the end */ 3201 }
2924 if (!cn) { 3202
2925 cn = get_cnode(p_s_sb) ; 3203 if (journal->j_len > journal->j_len_alloc) {
2926 if (!cn) { 3204 journal->j_len_alloc = journal->j_len + JOURNAL_PER_BALANCE_CNT;
2927 reiserfs_panic(p_s_sb, "get_cnode failed!\n"); 3205 }
2928 } 3206
2929 3207 set_buffer_journaled(bh);
2930 if (th->t_blocks_logged == th->t_blocks_allocated) { 3208
2931 th->t_blocks_allocated += JOURNAL_PER_BALANCE_CNT ; 3209 /* now put this guy on the end */
2932 journal->j_len_alloc += JOURNAL_PER_BALANCE_CNT ; 3210 if (!cn) {
2933 } 3211 cn = get_cnode(p_s_sb);
2934 th->t_blocks_logged++ ; 3212 if (!cn) {
2935 journal->j_len++ ; 3213 reiserfs_panic(p_s_sb, "get_cnode failed!\n");
2936 3214 }
2937 cn->bh = bh ; 3215
2938 cn->blocknr = bh->b_blocknr ; 3216 if (th->t_blocks_logged == th->t_blocks_allocated) {
2939 cn->sb = p_s_sb; 3217 th->t_blocks_allocated += JOURNAL_PER_BALANCE_CNT;
2940 cn->jlist = NULL ; 3218 journal->j_len_alloc += JOURNAL_PER_BALANCE_CNT;
2941 insert_journal_hash(journal->j_hash_table, cn) ; 3219 }
2942 if (!count_already_incd) { 3220 th->t_blocks_logged++;
2943 get_bh(bh) ; 3221 journal->j_len++;
2944 } 3222
2945 } 3223 cn->bh = bh;
2946 cn->next = NULL ; 3224 cn->blocknr = bh->b_blocknr;
2947 cn->prev = journal->j_last ; 3225 cn->sb = p_s_sb;
2948 cn->bh = bh ; 3226 cn->jlist = NULL;
2949 if (journal->j_last) { 3227 insert_journal_hash(journal->j_hash_table, cn);
2950 journal->j_last->next = cn ; 3228 if (!count_already_incd) {
2951 journal->j_last = cn ; 3229 get_bh(bh);
2952 } else { 3230 }
2953 journal->j_first = cn ; 3231 }
2954 journal->j_last = cn ; 3232 cn->next = NULL;
2955 } 3233 cn->prev = journal->j_last;
2956 return 0 ; 3234 cn->bh = bh;
2957} 3235 if (journal->j_last) {
2958 3236 journal->j_last->next = cn;
2959int journal_end(struct reiserfs_transaction_handle *th, struct super_block *p_s_sb, unsigned long nblocks) { 3237 journal->j_last = cn;
2960 if (!current->journal_info && th->t_refcount > 1) 3238 } else {
2961 reiserfs_warning (p_s_sb, "REISER-NESTING: th NULL, refcount %d", 3239 journal->j_first = cn;
2962 th->t_refcount); 3240 journal->j_last = cn;
2963 3241 }
2964 if (!th->t_trans_id) { 3242 return 0;
2965 WARN_ON (1); 3243}
2966 return -EIO; 3244
2967 } 3245int journal_end(struct reiserfs_transaction_handle *th,
2968 3246 struct super_block *p_s_sb, unsigned long nblocks)
2969 th->t_refcount--; 3247{
2970 if (th->t_refcount > 0) { 3248 if (!current->journal_info && th->t_refcount > 1)
2971 struct reiserfs_transaction_handle *cur_th = current->journal_info ; 3249 reiserfs_warning(p_s_sb, "REISER-NESTING: th NULL, refcount %d",
2972 3250 th->t_refcount);
2973 /* we aren't allowed to close a nested transaction on a different 3251
2974 ** filesystem from the one in the task struct 3252 if (!th->t_trans_id) {
2975 */ 3253 WARN_ON(1);
2976 if (cur_th->t_super != th->t_super) 3254 return -EIO;
2977 BUG() ; 3255 }
2978 3256
2979 if (th != cur_th) { 3257 th->t_refcount--;
2980 memcpy(current->journal_info, th, sizeof(*th)); 3258 if (th->t_refcount > 0) {
2981 th->t_trans_id = 0; 3259 struct reiserfs_transaction_handle *cur_th =
2982 } 3260 current->journal_info;
2983 return 0; 3261
2984 } else { 3262 /* we aren't allowed to close a nested transaction on a different
2985 return do_journal_end(th, p_s_sb, nblocks, 0) ; 3263 ** filesystem from the one in the task struct
2986 } 3264 */
3265 if (cur_th->t_super != th->t_super)
3266 BUG();
3267
3268 if (th != cur_th) {
3269 memcpy(current->journal_info, th, sizeof(*th));
3270 th->t_trans_id = 0;
3271 }
3272 return 0;
3273 } else {
3274 return do_journal_end(th, p_s_sb, nblocks, 0);
3275 }
2987} 3276}
2988 3277
2989/* removes from the current transaction, relsing and descrementing any counters. 3278/* removes from the current transaction, relsing and descrementing any counters.
@@ -2993,47 +3282,51 @@ int journal_end(struct reiserfs_transaction_handle *th, struct super_block *p_s_
2993** 3282**
2994** returns 1 if it cleaned and relsed the buffer. 0 otherwise 3283** returns 1 if it cleaned and relsed the buffer. 0 otherwise
2995*/ 3284*/
2996static int remove_from_transaction(struct super_block *p_s_sb, b_blocknr_t blocknr, int already_cleaned) { 3285static int remove_from_transaction(struct super_block *p_s_sb,
2997 struct buffer_head *bh ; 3286 b_blocknr_t blocknr, int already_cleaned)
2998 struct reiserfs_journal_cnode *cn ; 3287{
2999 struct reiserfs_journal *journal = SB_JOURNAL (p_s_sb); 3288 struct buffer_head *bh;
3000 int ret = 0; 3289 struct reiserfs_journal_cnode *cn;
3001 3290 struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb);
3002 cn = get_journal_hash_dev(p_s_sb, journal->j_hash_table, blocknr) ; 3291 int ret = 0;
3003 if (!cn || !cn->bh) { 3292
3004 return ret ; 3293 cn = get_journal_hash_dev(p_s_sb, journal->j_hash_table, blocknr);
3005 } 3294 if (!cn || !cn->bh) {
3006 bh = cn->bh ; 3295 return ret;
3007 if (cn->prev) { 3296 }
3008 cn->prev->next = cn->next ; 3297 bh = cn->bh;
3009 } 3298 if (cn->prev) {
3010 if (cn->next) { 3299 cn->prev->next = cn->next;
3011 cn->next->prev = cn->prev ; 3300 }
3012 } 3301 if (cn->next) {
3013 if (cn == journal->j_first) { 3302 cn->next->prev = cn->prev;
3014 journal->j_first = cn->next ; 3303 }
3015 } 3304 if (cn == journal->j_first) {
3016 if (cn == journal->j_last) { 3305 journal->j_first = cn->next;
3017 journal->j_last = cn->prev ; 3306 }
3018 } 3307 if (cn == journal->j_last) {
3019 if (bh) 3308 journal->j_last = cn->prev;
3020 remove_journal_hash(p_s_sb, journal->j_hash_table, NULL, bh->b_blocknr, 0) ; 3309 }
3021 clear_buffer_journaled (bh); /* don't log this one */ 3310 if (bh)
3022 3311 remove_journal_hash(p_s_sb, journal->j_hash_table, NULL,
3023 if (!already_cleaned) { 3312 bh->b_blocknr, 0);
3024 clear_buffer_journal_dirty (bh); 3313 clear_buffer_journaled(bh); /* don't log this one */
3025 clear_buffer_dirty(bh); 3314
3026 clear_buffer_journal_test (bh); 3315 if (!already_cleaned) {
3027 put_bh(bh) ; 3316 clear_buffer_journal_dirty(bh);
3028 if (atomic_read(&(bh->b_count)) < 0) { 3317 clear_buffer_dirty(bh);
3029 reiserfs_warning (p_s_sb, "journal-1752: remove from trans, b_count < 0"); 3318 clear_buffer_journal_test(bh);
3030 } 3319 put_bh(bh);
3031 ret = 1 ; 3320 if (atomic_read(&(bh->b_count)) < 0) {
3032 } 3321 reiserfs_warning(p_s_sb,
3033 journal->j_len-- ; 3322 "journal-1752: remove from trans, b_count < 0");
3034 journal->j_len_alloc-- ; 3323 }
3035 free_cnode(p_s_sb, cn) ; 3324 ret = 1;
3036 return ret ; 3325 }
3326 journal->j_len--;
3327 journal->j_len_alloc--;
3328 free_cnode(p_s_sb, cn);
3329 return ret;
3037} 3330}
3038 3331
3039/* 3332/*
@@ -3046,120 +3339,129 @@ static int remove_from_transaction(struct super_block *p_s_sb, b_blocknr_t block
3046** blocks for a given transaction on disk 3339** blocks for a given transaction on disk
3047** 3340**
3048*/ 3341*/
3049static int can_dirty(struct reiserfs_journal_cnode *cn) { 3342static int can_dirty(struct reiserfs_journal_cnode *cn)
3050 struct super_block *sb = cn->sb; 3343{
3051 b_blocknr_t blocknr = cn->blocknr ; 3344 struct super_block *sb = cn->sb;
3052 struct reiserfs_journal_cnode *cur = cn->hprev ; 3345 b_blocknr_t blocknr = cn->blocknr;
3053 int can_dirty = 1 ; 3346 struct reiserfs_journal_cnode *cur = cn->hprev;
3054 3347 int can_dirty = 1;
3055 /* first test hprev. These are all newer than cn, so any node here 3348
3056 ** with the same block number and dev means this node can't be sent 3349 /* first test hprev. These are all newer than cn, so any node here
3057 ** to disk right now. 3350 ** with the same block number and dev means this node can't be sent
3058 */ 3351 ** to disk right now.
3059 while(cur && can_dirty) { 3352 */
3060 if (cur->jlist && cur->bh && cur->blocknr && cur->sb == sb && 3353 while (cur && can_dirty) {
3061 cur->blocknr == blocknr) { 3354 if (cur->jlist && cur->bh && cur->blocknr && cur->sb == sb &&
3062 can_dirty = 0 ; 3355 cur->blocknr == blocknr) {
3063 } 3356 can_dirty = 0;
3064 cur = cur->hprev ; 3357 }
3065 } 3358 cur = cur->hprev;
3066 /* then test hnext. These are all older than cn. As long as they 3359 }
3067 ** are committed to the log, it is safe to write cn to disk 3360 /* then test hnext. These are all older than cn. As long as they
3068 */ 3361 ** are committed to the log, it is safe to write cn to disk
3069 cur = cn->hnext ; 3362 */
3070 while(cur && can_dirty) { 3363 cur = cn->hnext;
3071 if (cur->jlist && cur->jlist->j_len > 0 && 3364 while (cur && can_dirty) {
3072 atomic_read(&(cur->jlist->j_commit_left)) > 0 && cur->bh && 3365 if (cur->jlist && cur->jlist->j_len > 0 &&
3073 cur->blocknr && cur->sb == sb && cur->blocknr == blocknr) { 3366 atomic_read(&(cur->jlist->j_commit_left)) > 0 && cur->bh &&
3074 can_dirty = 0 ; 3367 cur->blocknr && cur->sb == sb && cur->blocknr == blocknr) {
3075 } 3368 can_dirty = 0;
3076 cur = cur->hnext ; 3369 }
3077 } 3370 cur = cur->hnext;
3078 return can_dirty ; 3371 }
3372 return can_dirty;
3079} 3373}
3080 3374
3081/* syncs the commit blocks, but does not force the real buffers to disk 3375/* syncs the commit blocks, but does not force the real buffers to disk
3082** will wait until the current transaction is done/commited before returning 3376** will wait until the current transaction is done/commited before returning
3083*/ 3377*/
3084int journal_end_sync(struct reiserfs_transaction_handle *th, struct super_block *p_s_sb, unsigned long nblocks) { 3378int journal_end_sync(struct reiserfs_transaction_handle *th,
3085 struct reiserfs_journal *journal = SB_JOURNAL (p_s_sb); 3379 struct super_block *p_s_sb, unsigned long nblocks)
3380{
3381 struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb);
3086 3382
3087 BUG_ON (!th->t_trans_id); 3383 BUG_ON(!th->t_trans_id);
3088 /* you can sync while nested, very, very bad */ 3384 /* you can sync while nested, very, very bad */
3089 if (th->t_refcount > 1) { 3385 if (th->t_refcount > 1) {
3090 BUG() ; 3386 BUG();
3091 } 3387 }
3092 if (journal->j_len == 0) { 3388 if (journal->j_len == 0) {
3093 reiserfs_prepare_for_journal(p_s_sb, SB_BUFFER_WITH_SB(p_s_sb), 1) ; 3389 reiserfs_prepare_for_journal(p_s_sb, SB_BUFFER_WITH_SB(p_s_sb),
3094 journal_mark_dirty(th, p_s_sb, SB_BUFFER_WITH_SB(p_s_sb)) ; 3390 1);
3095 } 3391 journal_mark_dirty(th, p_s_sb, SB_BUFFER_WITH_SB(p_s_sb));
3096 return do_journal_end(th, p_s_sb, nblocks, COMMIT_NOW | WAIT) ; 3392 }
3393 return do_journal_end(th, p_s_sb, nblocks, COMMIT_NOW | WAIT);
3097} 3394}
3098 3395
3099/* 3396/*
3100** writeback the pending async commits to disk 3397** writeback the pending async commits to disk
3101*/ 3398*/
3102static void flush_async_commits(void *p) { 3399static void flush_async_commits(void *p)
3103 struct super_block *p_s_sb = p; 3400{
3104 struct reiserfs_journal *journal = SB_JOURNAL (p_s_sb); 3401 struct super_block *p_s_sb = p;
3105 struct reiserfs_journal_list *jl; 3402 struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb);
3106 struct list_head *entry; 3403 struct reiserfs_journal_list *jl;
3107 3404 struct list_head *entry;
3108 lock_kernel(); 3405
3109 if (!list_empty(&journal->j_journal_list)) { 3406 lock_kernel();
3110 /* last entry is the youngest, commit it and you get everything */ 3407 if (!list_empty(&journal->j_journal_list)) {
3111 entry = journal->j_journal_list.prev; 3408 /* last entry is the youngest, commit it and you get everything */
3112 jl = JOURNAL_LIST_ENTRY(entry); 3409 entry = journal->j_journal_list.prev;
3113 flush_commit_list(p_s_sb, jl, 1); 3410 jl = JOURNAL_LIST_ENTRY(entry);
3114 } 3411 flush_commit_list(p_s_sb, jl, 1);
3115 unlock_kernel(); 3412 }
3116 /* 3413 unlock_kernel();
3117 * this is a little racey, but there's no harm in missing 3414 /*
3118 * the filemap_fdata_write 3415 * this is a little racey, but there's no harm in missing
3119 */ 3416 * the filemap_fdata_write
3120 if (!atomic_read(&journal->j_async_throttle) && !reiserfs_is_journal_aborted (journal)) { 3417 */
3121 atomic_inc(&journal->j_async_throttle); 3418 if (!atomic_read(&journal->j_async_throttle)
3122 filemap_fdatawrite(p_s_sb->s_bdev->bd_inode->i_mapping); 3419 && !reiserfs_is_journal_aborted(journal)) {
3123 atomic_dec(&journal->j_async_throttle); 3420 atomic_inc(&journal->j_async_throttle);
3124 } 3421 filemap_fdatawrite(p_s_sb->s_bdev->bd_inode->i_mapping);
3422 atomic_dec(&journal->j_async_throttle);
3423 }
3125} 3424}
3126 3425
3127/* 3426/*
3128** flushes any old transactions to disk 3427** flushes any old transactions to disk
3129** ends the current transaction if it is too old 3428** ends the current transaction if it is too old
3130*/ 3429*/
3131int reiserfs_flush_old_commits(struct super_block *p_s_sb) { 3430int reiserfs_flush_old_commits(struct super_block *p_s_sb)
3132 time_t now ; 3431{
3133 struct reiserfs_transaction_handle th ; 3432 time_t now;
3134 struct reiserfs_journal *journal = SB_JOURNAL (p_s_sb); 3433 struct reiserfs_transaction_handle th;
3135 3434 struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb);
3136 now = get_seconds(); 3435
3137 /* safety check so we don't flush while we are replaying the log during 3436 now = get_seconds();
3138 * mount 3437 /* safety check so we don't flush while we are replaying the log during
3139 */ 3438 * mount
3140 if (list_empty(&journal->j_journal_list)) { 3439 */
3141 return 0 ; 3440 if (list_empty(&journal->j_journal_list)) {
3142 } 3441 return 0;
3143 3442 }
3144 /* check the current transaction. If there are no writers, and it is 3443
3145 * too old, finish it, and force the commit blocks to disk 3444 /* check the current transaction. If there are no writers, and it is
3146 */ 3445 * too old, finish it, and force the commit blocks to disk
3147 if (atomic_read(&journal->j_wcount) <= 0 && 3446 */
3148 journal->j_trans_start_time > 0 && 3447 if (atomic_read(&journal->j_wcount) <= 0 &&
3149 journal->j_len > 0 && 3448 journal->j_trans_start_time > 0 &&
3150 (now - journal->j_trans_start_time) > journal->j_max_trans_age) 3449 journal->j_len > 0 &&
3151 { 3450 (now - journal->j_trans_start_time) > journal->j_max_trans_age) {
3152 if (!journal_join(&th, p_s_sb, 1)) { 3451 if (!journal_join(&th, p_s_sb, 1)) {
3153 reiserfs_prepare_for_journal(p_s_sb, SB_BUFFER_WITH_SB(p_s_sb), 1) ; 3452 reiserfs_prepare_for_journal(p_s_sb,
3154 journal_mark_dirty(&th, p_s_sb, SB_BUFFER_WITH_SB(p_s_sb)) ; 3453 SB_BUFFER_WITH_SB(p_s_sb),
3155 3454 1);
3156 /* we're only being called from kreiserfsd, it makes no sense to do 3455 journal_mark_dirty(&th, p_s_sb,
3157 ** an async commit so that kreiserfsd can do it later 3456 SB_BUFFER_WITH_SB(p_s_sb));
3158 */ 3457
3159 do_journal_end(&th, p_s_sb,1, COMMIT_NOW | WAIT) ; 3458 /* we're only being called from kreiserfsd, it makes no sense to do
3160 } 3459 ** an async commit so that kreiserfsd can do it later
3161 } 3460 */
3162 return p_s_sb->s_dirt; 3461 do_journal_end(&th, p_s_sb, 1, COMMIT_NOW | WAIT);
3462 }
3463 }
3464 return p_s_sb->s_dirt;
3163} 3465}
3164 3466
3165/* 3467/*
@@ -3173,101 +3475,108 @@ int reiserfs_flush_old_commits(struct super_block *p_s_sb) {
3173** 3475**
3174** Note, we can't allow the journal_end to proceed while there are still writers in the log. 3476** Note, we can't allow the journal_end to proceed while there are still writers in the log.
3175*/ 3477*/
3176static int check_journal_end(struct reiserfs_transaction_handle *th, struct super_block * p_s_sb, 3478static int check_journal_end(struct reiserfs_transaction_handle *th,
3177 unsigned long nblocks, int flags) { 3479 struct super_block *p_s_sb, unsigned long nblocks,
3178 3480 int flags)
3179 time_t now ; 3481{
3180 int flush = flags & FLUSH_ALL ; 3482
3181 int commit_now = flags & COMMIT_NOW ; 3483 time_t now;
3182 int wait_on_commit = flags & WAIT ; 3484 int flush = flags & FLUSH_ALL;
3183 struct reiserfs_journal_list *jl; 3485 int commit_now = flags & COMMIT_NOW;
3184 struct reiserfs_journal *journal = SB_JOURNAL (p_s_sb); 3486 int wait_on_commit = flags & WAIT;
3185 3487 struct reiserfs_journal_list *jl;
3186 BUG_ON (!th->t_trans_id); 3488 struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb);
3187 3489
3188 if (th->t_trans_id != journal->j_trans_id) { 3490 BUG_ON(!th->t_trans_id);
3189 reiserfs_panic(th->t_super, "journal-1577: handle trans id %ld != current trans id %ld\n", 3491
3190 th->t_trans_id, journal->j_trans_id); 3492 if (th->t_trans_id != journal->j_trans_id) {
3191 } 3493 reiserfs_panic(th->t_super,
3192 3494 "journal-1577: handle trans id %ld != current trans id %ld\n",
3193 journal->j_len_alloc -= (th->t_blocks_allocated - th->t_blocks_logged) ; 3495 th->t_trans_id, journal->j_trans_id);
3194 if (atomic_read(&(journal->j_wcount)) > 0) { /* <= 0 is allowed. unmounting might not call begin */ 3496 }
3195 atomic_dec(&(journal->j_wcount)) ; 3497
3196 } 3498 journal->j_len_alloc -= (th->t_blocks_allocated - th->t_blocks_logged);
3197 3499 if (atomic_read(&(journal->j_wcount)) > 0) { /* <= 0 is allowed. unmounting might not call begin */
3198 /* BUG, deal with case where j_len is 0, but people previously freed blocks need to be released 3500 atomic_dec(&(journal->j_wcount));
3199 ** will be dealt with by next transaction that actually writes something, but should be taken 3501 }
3200 ** care of in this trans 3502
3201 */ 3503 /* BUG, deal with case where j_len is 0, but people previously freed blocks need to be released
3202 if (journal->j_len == 0) { 3504 ** will be dealt with by next transaction that actually writes something, but should be taken
3203 BUG(); 3505 ** care of in this trans
3204 } 3506 */
3205 /* if wcount > 0, and we are called to with flush or commit_now, 3507 if (journal->j_len == 0) {
3206 ** we wait on j_join_wait. We will wake up when the last writer has 3508 BUG();
3207 ** finished the transaction, and started it on its way to the disk. 3509 }
3208 ** Then, we flush the commit or journal list, and just return 0 3510 /* if wcount > 0, and we are called to with flush or commit_now,
3209 ** because the rest of journal end was already done for this transaction. 3511 ** we wait on j_join_wait. We will wake up when the last writer has
3210 */ 3512 ** finished the transaction, and started it on its way to the disk.
3211 if (atomic_read(&(journal->j_wcount)) > 0) { 3513 ** Then, we flush the commit or journal list, and just return 0
3212 if (flush || commit_now) { 3514 ** because the rest of journal end was already done for this transaction.
3213 unsigned trans_id ; 3515 */
3214 3516 if (atomic_read(&(journal->j_wcount)) > 0) {
3215 jl = journal->j_current_jl; 3517 if (flush || commit_now) {
3216 trans_id = jl->j_trans_id; 3518 unsigned trans_id;
3217 if (wait_on_commit) 3519
3218 jl->j_state |= LIST_COMMIT_PENDING; 3520 jl = journal->j_current_jl;
3219 atomic_set(&(journal->j_jlock), 1) ; 3521 trans_id = jl->j_trans_id;
3220 if (flush) { 3522 if (wait_on_commit)
3221 journal->j_next_full_flush = 1 ; 3523 jl->j_state |= LIST_COMMIT_PENDING;
3222 } 3524 atomic_set(&(journal->j_jlock), 1);
3223 unlock_journal(p_s_sb) ; 3525 if (flush) {
3224 3526 journal->j_next_full_flush = 1;
3225 /* sleep while the current transaction is still j_jlocked */ 3527 }
3226 while(journal->j_trans_id == trans_id) { 3528 unlock_journal(p_s_sb);
3227 if (atomic_read(&journal->j_jlock)) { 3529
3228 queue_log_writer(p_s_sb); 3530 /* sleep while the current transaction is still j_jlocked */
3229 } else { 3531 while (journal->j_trans_id == trans_id) {
3230 lock_journal(p_s_sb); 3532 if (atomic_read(&journal->j_jlock)) {
3231 if (journal->j_trans_id == trans_id) { 3533 queue_log_writer(p_s_sb);
3232 atomic_set(&(journal->j_jlock), 1) ; 3534 } else {
3233 } 3535 lock_journal(p_s_sb);
3234 unlock_journal(p_s_sb); 3536 if (journal->j_trans_id == trans_id) {
3235 } 3537 atomic_set(&(journal->j_jlock),
3236 } 3538 1);
3237 if (journal->j_trans_id == trans_id) { 3539 }
3238 BUG(); 3540 unlock_journal(p_s_sb);
3239 } 3541 }
3240 if (commit_now && journal_list_still_alive(p_s_sb, trans_id) && 3542 }
3241 wait_on_commit) 3543 if (journal->j_trans_id == trans_id) {
3242 { 3544 BUG();
3243 flush_commit_list(p_s_sb, jl, 1) ; 3545 }
3244 } 3546 if (commit_now
3245 return 0 ; 3547 && journal_list_still_alive(p_s_sb, trans_id)
3246 } 3548 && wait_on_commit) {
3247 unlock_journal(p_s_sb) ; 3549 flush_commit_list(p_s_sb, jl, 1);
3248 return 0 ; 3550 }
3249 } 3551 return 0;
3250 3552 }
3251 /* deal with old transactions where we are the last writers */ 3553 unlock_journal(p_s_sb);
3252 now = get_seconds(); 3554 return 0;
3253 if ((now - journal->j_trans_start_time) > journal->j_max_trans_age) { 3555 }
3254 commit_now = 1 ; 3556
3255 journal->j_next_async_flush = 1 ; 3557 /* deal with old transactions where we are the last writers */
3256 } 3558 now = get_seconds();
3257 /* don't batch when someone is waiting on j_join_wait */ 3559 if ((now - journal->j_trans_start_time) > journal->j_max_trans_age) {
3258 /* don't batch when syncing the commit or flushing the whole trans */ 3560 commit_now = 1;
3259 if (!(journal->j_must_wait > 0) && !(atomic_read(&(journal->j_jlock))) && !flush && !commit_now && 3561 journal->j_next_async_flush = 1;
3260 (journal->j_len < journal->j_max_batch) && 3562 }
3261 journal->j_len_alloc < journal->j_max_batch && journal->j_cnode_free > (journal->j_trans_max * 3)) { 3563 /* don't batch when someone is waiting on j_join_wait */
3262 journal->j_bcount++ ; 3564 /* don't batch when syncing the commit or flushing the whole trans */
3263 unlock_journal(p_s_sb) ; 3565 if (!(journal->j_must_wait > 0) && !(atomic_read(&(journal->j_jlock)))
3264 return 0 ; 3566 && !flush && !commit_now && (journal->j_len < journal->j_max_batch)
3265 } 3567 && journal->j_len_alloc < journal->j_max_batch
3266 3568 && journal->j_cnode_free > (journal->j_trans_max * 3)) {
3267 if (journal->j_start > SB_ONDISK_JOURNAL_SIZE(p_s_sb)) { 3569 journal->j_bcount++;
3268 reiserfs_panic(p_s_sb, "journal-003: journal_end: j_start (%ld) is too high\n", journal->j_start) ; 3570 unlock_journal(p_s_sb);
3269 } 3571 return 0;
3270 return 1 ; 3572 }
3573
3574 if (journal->j_start > SB_ONDISK_JOURNAL_SIZE(p_s_sb)) {
3575 reiserfs_panic(p_s_sb,
3576 "journal-003: journal_end: j_start (%ld) is too high\n",
3577 journal->j_start);
3578 }
3579 return 1;
3271} 3580}
3272 3581
3273/* 3582/*
@@ -3284,83 +3593,95 @@ static int check_journal_end(struct reiserfs_transaction_handle *th, struct supe
3284** 3593**
3285** Then remove it from the current transaction, decrementing any counters and filing it on the clean list. 3594** Then remove it from the current transaction, decrementing any counters and filing it on the clean list.
3286*/ 3595*/
3287int journal_mark_freed(struct reiserfs_transaction_handle *th, struct super_block *p_s_sb, b_blocknr_t blocknr) { 3596int journal_mark_freed(struct reiserfs_transaction_handle *th,
3288 struct reiserfs_journal *journal = SB_JOURNAL (p_s_sb); 3597 struct super_block *p_s_sb, b_blocknr_t blocknr)
3289 struct reiserfs_journal_cnode *cn = NULL ; 3598{
3290 struct buffer_head *bh = NULL ; 3599 struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb);
3291 struct reiserfs_list_bitmap *jb = NULL ; 3600 struct reiserfs_journal_cnode *cn = NULL;
3292 int cleaned = 0 ; 3601 struct buffer_head *bh = NULL;
3293 BUG_ON (!th->t_trans_id); 3602 struct reiserfs_list_bitmap *jb = NULL;
3294 3603 int cleaned = 0;
3295 cn = get_journal_hash_dev(p_s_sb, journal->j_hash_table, blocknr); 3604 BUG_ON(!th->t_trans_id);
3296 if (cn && cn->bh) { 3605
3297 bh = cn->bh ; 3606 cn = get_journal_hash_dev(p_s_sb, journal->j_hash_table, blocknr);
3298 get_bh(bh) ; 3607 if (cn && cn->bh) {
3299 } 3608 bh = cn->bh;
3300 /* if it is journal new, we just remove it from this transaction */ 3609 get_bh(bh);
3301 if (bh && buffer_journal_new(bh)) { 3610 }
3302 clear_buffer_journal_new (bh); 3611 /* if it is journal new, we just remove it from this transaction */
3303 clear_prepared_bits(bh) ; 3612 if (bh && buffer_journal_new(bh)) {
3304 reiserfs_clean_and_file_buffer(bh) ; 3613 clear_buffer_journal_new(bh);
3305 cleaned = remove_from_transaction(p_s_sb, blocknr, cleaned) ; 3614 clear_prepared_bits(bh);
3306 } else { 3615 reiserfs_clean_and_file_buffer(bh);
3307 /* set the bit for this block in the journal bitmap for this transaction */ 3616 cleaned = remove_from_transaction(p_s_sb, blocknr, cleaned);
3308 jb = journal->j_current_jl->j_list_bitmap; 3617 } else {
3309 if (!jb) { 3618 /* set the bit for this block in the journal bitmap for this transaction */
3310 reiserfs_panic(p_s_sb, "journal-1702: journal_mark_freed, journal_list_bitmap is NULL\n") ; 3619 jb = journal->j_current_jl->j_list_bitmap;
3311 } 3620 if (!jb) {
3312 set_bit_in_list_bitmap(p_s_sb, blocknr, jb) ; 3621 reiserfs_panic(p_s_sb,
3313 3622 "journal-1702: journal_mark_freed, journal_list_bitmap is NULL\n");
3314 /* Note, the entire while loop is not allowed to schedule. */ 3623 }
3315 3624 set_bit_in_list_bitmap(p_s_sb, blocknr, jb);
3316 if (bh) { 3625
3317 clear_prepared_bits(bh) ; 3626 /* Note, the entire while loop is not allowed to schedule. */
3318 reiserfs_clean_and_file_buffer(bh) ; 3627
3319 } 3628 if (bh) {
3320 cleaned = remove_from_transaction(p_s_sb, blocknr, cleaned) ; 3629 clear_prepared_bits(bh);
3321 3630 reiserfs_clean_and_file_buffer(bh);
3322 /* find all older transactions with this block, make sure they don't try to write it out */ 3631 }
3323 cn = get_journal_hash_dev(p_s_sb,journal->j_list_hash_table, blocknr) ; 3632 cleaned = remove_from_transaction(p_s_sb, blocknr, cleaned);
3324 while (cn) { 3633
3325 if (p_s_sb == cn->sb && blocknr == cn->blocknr) { 3634 /* find all older transactions with this block, make sure they don't try to write it out */
3326 set_bit(BLOCK_FREED, &cn->state) ; 3635 cn = get_journal_hash_dev(p_s_sb, journal->j_list_hash_table,
3327 if (cn->bh) { 3636 blocknr);
3328 if (!cleaned) { 3637 while (cn) {
3329 /* remove_from_transaction will brelse the buffer if it was 3638 if (p_s_sb == cn->sb && blocknr == cn->blocknr) {
3330 ** in the current trans 3639 set_bit(BLOCK_FREED, &cn->state);
3331 */ 3640 if (cn->bh) {
3332 clear_buffer_journal_dirty (cn->bh); 3641 if (!cleaned) {
3333 clear_buffer_dirty(cn->bh); 3642 /* remove_from_transaction will brelse the buffer if it was
3334 clear_buffer_journal_test(cn->bh); 3643 ** in the current trans
3335 cleaned = 1 ; 3644 */
3336 put_bh(cn->bh) ; 3645 clear_buffer_journal_dirty(cn->
3337 if (atomic_read(&(cn->bh->b_count)) < 0) { 3646 bh);
3338 reiserfs_warning (p_s_sb, "journal-2138: cn->bh->b_count < 0"); 3647 clear_buffer_dirty(cn->bh);
3339 } 3648 clear_buffer_journal_test(cn->
3340 } 3649 bh);
3341 if (cn->jlist) { /* since we are clearing the bh, we MUST dec nonzerolen */ 3650 cleaned = 1;
3342 atomic_dec(&(cn->jlist->j_nonzerolen)) ; 3651 put_bh(cn->bh);
3343 } 3652 if (atomic_read
3344 cn->bh = NULL ; 3653 (&(cn->bh->b_count)) < 0) {
3345 } 3654 reiserfs_warning(p_s_sb,
3346 } 3655 "journal-2138: cn->bh->b_count < 0");
3347 cn = cn->hnext ; 3656 }
3348 } 3657 }
3349 } 3658 if (cn->jlist) { /* since we are clearing the bh, we MUST dec nonzerolen */
3350 3659 atomic_dec(&
3351 if (bh) { 3660 (cn->jlist->
3352 put_bh(bh) ; /* get_hash grabs the buffer */ 3661 j_nonzerolen));
3353 if (atomic_read(&(bh->b_count)) < 0) { 3662 }
3354 reiserfs_warning (p_s_sb, "journal-2165: bh->b_count < 0"); 3663 cn->bh = NULL;
3355 } 3664 }
3356 } 3665 }
3357 return 0 ; 3666 cn = cn->hnext;
3358} 3667 }
3359 3668 }
3360void reiserfs_update_inode_transaction(struct inode *inode) { 3669
3361 struct reiserfs_journal *journal = SB_JOURNAL (inode->i_sb); 3670 if (bh) {
3362 REISERFS_I(inode)->i_jl = journal->j_current_jl; 3671 put_bh(bh); /* get_hash grabs the buffer */
3363 REISERFS_I(inode)->i_trans_id = journal->j_trans_id ; 3672 if (atomic_read(&(bh->b_count)) < 0) {
3673 reiserfs_warning(p_s_sb,
3674 "journal-2165: bh->b_count < 0");
3675 }
3676 }
3677 return 0;
3678}
3679
3680void reiserfs_update_inode_transaction(struct inode *inode)
3681{
3682 struct reiserfs_journal *journal = SB_JOURNAL(inode->i_sb);
3683 REISERFS_I(inode)->i_jl = journal->j_current_jl;
3684 REISERFS_I(inode)->i_trans_id = journal->j_trans_id;
3364} 3685}
3365 3686
3366/* 3687/*
@@ -3368,99 +3689,102 @@ void reiserfs_update_inode_transaction(struct inode *inode) {
3368 * if a transaction was actually committed and the barrier was done 3689 * if a transaction was actually committed and the barrier was done
3369 */ 3690 */
3370static int __commit_trans_jl(struct inode *inode, unsigned long id, 3691static int __commit_trans_jl(struct inode *inode, unsigned long id,
3371 struct reiserfs_journal_list *jl) 3692 struct reiserfs_journal_list *jl)
3372{ 3693{
3373 struct reiserfs_transaction_handle th ; 3694 struct reiserfs_transaction_handle th;
3374 struct super_block *sb = inode->i_sb ; 3695 struct super_block *sb = inode->i_sb;
3375 struct reiserfs_journal *journal = SB_JOURNAL (sb); 3696 struct reiserfs_journal *journal = SB_JOURNAL(sb);
3376 int ret = 0; 3697 int ret = 0;
3698
3699 /* is it from the current transaction, or from an unknown transaction? */
3700 if (id == journal->j_trans_id) {
3701 jl = journal->j_current_jl;
3702 /* try to let other writers come in and grow this transaction */
3703 let_transaction_grow(sb, id);
3704 if (journal->j_trans_id != id) {
3705 goto flush_commit_only;
3706 }
3377 3707
3378 /* is it from the current transaction, or from an unknown transaction? */ 3708 ret = journal_begin(&th, sb, 1);
3379 if (id == journal->j_trans_id) { 3709 if (ret)
3380 jl = journal->j_current_jl; 3710 return ret;
3381 /* try to let other writers come in and grow this transaction */ 3711
3382 let_transaction_grow(sb, id); 3712 /* someone might have ended this transaction while we joined */
3383 if (journal->j_trans_id != id) { 3713 if (journal->j_trans_id != id) {
3384 goto flush_commit_only; 3714 reiserfs_prepare_for_journal(sb, SB_BUFFER_WITH_SB(sb),
3385 } 3715 1);
3716 journal_mark_dirty(&th, sb, SB_BUFFER_WITH_SB(sb));
3717 ret = journal_end(&th, sb, 1);
3718 goto flush_commit_only;
3719 }
3386 3720
3387 ret = journal_begin(&th, sb, 1) ; 3721 ret = journal_end_sync(&th, sb, 1);
3388 if (ret) 3722 if (!ret)
3389 return ret; 3723 ret = 1;
3390 3724
3391 /* someone might have ended this transaction while we joined */ 3725 } else {
3392 if (journal->j_trans_id != id) { 3726 /* this gets tricky, we have to make sure the journal list in
3393 reiserfs_prepare_for_journal(sb, SB_BUFFER_WITH_SB(sb), 1) ; 3727 * the inode still exists. We know the list is still around
3394 journal_mark_dirty(&th, sb, SB_BUFFER_WITH_SB(sb)) ; 3728 * if we've got a larger transaction id than the oldest list
3395 ret = journal_end(&th, sb, 1) ; 3729 */
3396 goto flush_commit_only; 3730 flush_commit_only:
3731 if (journal_list_still_alive(inode->i_sb, id)) {
3732 /*
3733 * we only set ret to 1 when we know for sure
3734 * the barrier hasn't been started yet on the commit
3735 * block.
3736 */
3737 if (atomic_read(&jl->j_commit_left) > 1)
3738 ret = 1;
3739 flush_commit_list(sb, jl, 1);
3740 if (journal->j_errno)
3741 ret = journal->j_errno;
3742 }
3397 } 3743 }
3744 /* otherwise the list is gone, and long since committed */
3745 return ret;
3746}
3398 3747
3399 ret = journal_end_sync(&th, sb, 1) ; 3748int reiserfs_commit_for_inode(struct inode *inode)
3400 if (!ret) 3749{
3401 ret = 1; 3750 unsigned long id = REISERFS_I(inode)->i_trans_id;
3751 struct reiserfs_journal_list *jl = REISERFS_I(inode)->i_jl;
3402 3752
3403 } else { 3753 /* for the whole inode, assume unset id means it was
3404 /* this gets tricky, we have to make sure the journal list in 3754 * changed in the current transaction. More conservative
3405 * the inode still exists. We know the list is still around
3406 * if we've got a larger transaction id than the oldest list
3407 */ 3755 */
3408flush_commit_only: 3756 if (!id || !jl) {
3409 if (journal_list_still_alive(inode->i_sb, id)) { 3757 reiserfs_update_inode_transaction(inode);
3410 /* 3758 id = REISERFS_I(inode)->i_trans_id;
3411 * we only set ret to 1 when we know for sure 3759 /* jl will be updated in __commit_trans_jl */
3412 * the barrier hasn't been started yet on the commit 3760 }
3413 * block. 3761
3414 */ 3762 return __commit_trans_jl(inode, id, jl);
3415 if (atomic_read(&jl->j_commit_left) > 1) 3763}
3416 ret = 1; 3764
3417 flush_commit_list(sb, jl, 1) ; 3765void reiserfs_restore_prepared_buffer(struct super_block *p_s_sb,
3418 if (journal->j_errno) 3766 struct buffer_head *bh)
3419 ret = journal->j_errno; 3767{
3420 } 3768 struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb);
3421 } 3769 PROC_INFO_INC(p_s_sb, journal.restore_prepared);
3422 /* otherwise the list is gone, and long since committed */ 3770 if (!bh) {
3423 return ret; 3771 return;
3424} 3772 }
3425 3773 if (test_clear_buffer_journal_restore_dirty(bh) &&
3426int reiserfs_commit_for_inode(struct inode *inode) { 3774 buffer_journal_dirty(bh)) {
3427 unsigned long id = REISERFS_I(inode)->i_trans_id; 3775 struct reiserfs_journal_cnode *cn;
3428 struct reiserfs_journal_list *jl = REISERFS_I(inode)->i_jl; 3776 cn = get_journal_hash_dev(p_s_sb,
3429 3777 journal->j_list_hash_table,
3430 /* for the whole inode, assume unset id means it was 3778 bh->b_blocknr);
3431 * changed in the current transaction. More conservative 3779 if (cn && can_dirty(cn)) {
3432 */ 3780 set_buffer_journal_test(bh);
3433 if (!id || !jl) { 3781 mark_buffer_dirty(bh);
3434 reiserfs_update_inode_transaction(inode) ; 3782 }
3435 id = REISERFS_I(inode)->i_trans_id; 3783 }
3436 /* jl will be updated in __commit_trans_jl */ 3784 clear_buffer_journal_prepared(bh);
3437 } 3785}
3438 3786
3439 return __commit_trans_jl(inode, id, jl); 3787extern struct tree_balance *cur_tb;
3440}
3441
3442void reiserfs_restore_prepared_buffer(struct super_block *p_s_sb,
3443 struct buffer_head *bh) {
3444 struct reiserfs_journal *journal = SB_JOURNAL (p_s_sb);
3445 PROC_INFO_INC( p_s_sb, journal.restore_prepared );
3446 if (!bh) {
3447 return ;
3448 }
3449 if (test_clear_buffer_journal_restore_dirty (bh) &&
3450 buffer_journal_dirty(bh)) {
3451 struct reiserfs_journal_cnode *cn;
3452 cn = get_journal_hash_dev(p_s_sb,
3453 journal->j_list_hash_table,
3454 bh->b_blocknr);
3455 if (cn && can_dirty(cn)) {
3456 set_buffer_journal_test (bh);
3457 mark_buffer_dirty(bh);
3458 }
3459 }
3460 clear_buffer_journal_prepared (bh);
3461}
3462
3463extern struct tree_balance *cur_tb ;
3464/* 3788/*
3465** before we can change a metadata block, we have to make sure it won't 3789** before we can change a metadata block, we have to make sure it won't
3466** be written to disk while we are altering it. So, we must: 3790** be written to disk while we are altering it. So, we must:
@@ -3469,39 +3793,41 @@ extern struct tree_balance *cur_tb ;
3469** 3793**
3470*/ 3794*/
3471int reiserfs_prepare_for_journal(struct super_block *p_s_sb, 3795int reiserfs_prepare_for_journal(struct super_block *p_s_sb,
3472 struct buffer_head *bh, int wait) { 3796 struct buffer_head *bh, int wait)
3473 PROC_INFO_INC( p_s_sb, journal.prepare ); 3797{
3474 3798 PROC_INFO_INC(p_s_sb, journal.prepare);
3475 if (test_set_buffer_locked(bh)) { 3799
3476 if (!wait) 3800 if (test_set_buffer_locked(bh)) {
3477 return 0; 3801 if (!wait)
3478 lock_buffer(bh); 3802 return 0;
3479 } 3803 lock_buffer(bh);
3480 set_buffer_journal_prepared (bh); 3804 }
3481 if (test_clear_buffer_dirty(bh) && buffer_journal_dirty(bh)) { 3805 set_buffer_journal_prepared(bh);
3482 clear_buffer_journal_test (bh); 3806 if (test_clear_buffer_dirty(bh) && buffer_journal_dirty(bh)) {
3483 set_buffer_journal_restore_dirty (bh); 3807 clear_buffer_journal_test(bh);
3484 } 3808 set_buffer_journal_restore_dirty(bh);
3485 unlock_buffer(bh); 3809 }
3486 return 1; 3810 unlock_buffer(bh);
3487} 3811 return 1;
3488 3812}
3489static void flush_old_journal_lists(struct super_block *s) { 3813
3490 struct reiserfs_journal *journal = SB_JOURNAL (s); 3814static void flush_old_journal_lists(struct super_block *s)
3491 struct reiserfs_journal_list *jl; 3815{
3492 struct list_head *entry; 3816 struct reiserfs_journal *journal = SB_JOURNAL(s);
3493 time_t now = get_seconds(); 3817 struct reiserfs_journal_list *jl;
3494 3818 struct list_head *entry;
3495 while(!list_empty(&journal->j_journal_list)) { 3819 time_t now = get_seconds();
3496 entry = journal->j_journal_list.next; 3820
3497 jl = JOURNAL_LIST_ENTRY(entry); 3821 while (!list_empty(&journal->j_journal_list)) {
3498 /* this check should always be run, to send old lists to disk */ 3822 entry = journal->j_journal_list.next;
3499 if (jl->j_timestamp < (now - (JOURNAL_MAX_TRANS_AGE * 4))) { 3823 jl = JOURNAL_LIST_ENTRY(entry);
3500 flush_used_journal_lists(s, jl); 3824 /* this check should always be run, to send old lists to disk */
3501 } else { 3825 if (jl->j_timestamp < (now - (JOURNAL_MAX_TRANS_AGE * 4))) {
3502 break; 3826 flush_used_journal_lists(s, jl);
3827 } else {
3828 break;
3829 }
3503 } 3830 }
3504 }
3505} 3831}
3506 3832
3507/* 3833/*
@@ -3514,375 +3840,390 @@ static void flush_old_journal_lists(struct super_block *s) {
3514** If the journal is aborted, we just clean up. Things like flushing 3840** If the journal is aborted, we just clean up. Things like flushing
3515** journal lists, etc just won't happen. 3841** journal lists, etc just won't happen.
3516*/ 3842*/
3517static int do_journal_end(struct reiserfs_transaction_handle *th, struct super_block * p_s_sb, unsigned long nblocks, 3843static int do_journal_end(struct reiserfs_transaction_handle *th,
3518 int flags) { 3844 struct super_block *p_s_sb, unsigned long nblocks,
3519 struct reiserfs_journal *journal = SB_JOURNAL (p_s_sb); 3845 int flags)
3520 struct reiserfs_journal_cnode *cn, *next, *jl_cn; 3846{
3521 struct reiserfs_journal_cnode *last_cn = NULL; 3847 struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb);
3522 struct reiserfs_journal_desc *desc ; 3848 struct reiserfs_journal_cnode *cn, *next, *jl_cn;
3523 struct reiserfs_journal_commit *commit ; 3849 struct reiserfs_journal_cnode *last_cn = NULL;
3524 struct buffer_head *c_bh ; /* commit bh */ 3850 struct reiserfs_journal_desc *desc;
3525 struct buffer_head *d_bh ; /* desc bh */ 3851 struct reiserfs_journal_commit *commit;
3526 int cur_write_start = 0 ; /* start index of current log write */ 3852 struct buffer_head *c_bh; /* commit bh */
3527 int old_start ; 3853 struct buffer_head *d_bh; /* desc bh */
3528 int i ; 3854 int cur_write_start = 0; /* start index of current log write */
3529 int flush = flags & FLUSH_ALL ; 3855 int old_start;
3530 int wait_on_commit = flags & WAIT ; 3856 int i;
3531 struct reiserfs_journal_list *jl, *temp_jl; 3857 int flush = flags & FLUSH_ALL;
3532 struct list_head *entry, *safe; 3858 int wait_on_commit = flags & WAIT;
3533 unsigned long jindex; 3859 struct reiserfs_journal_list *jl, *temp_jl;
3534 unsigned long commit_trans_id; 3860 struct list_head *entry, *safe;
3535 int trans_half; 3861 unsigned long jindex;
3536 3862 unsigned long commit_trans_id;
3537 BUG_ON (th->t_refcount > 1); 3863 int trans_half;
3538 BUG_ON (!th->t_trans_id); 3864
3539 3865 BUG_ON(th->t_refcount > 1);
3540 put_fs_excl(); 3866 BUG_ON(!th->t_trans_id);
3541 current->journal_info = th->t_handle_save; 3867
3542 reiserfs_check_lock_depth(p_s_sb, "journal end"); 3868 put_fs_excl();
3543 if (journal->j_len == 0) { 3869 current->journal_info = th->t_handle_save;
3544 reiserfs_prepare_for_journal(p_s_sb, SB_BUFFER_WITH_SB(p_s_sb), 1) ; 3870 reiserfs_check_lock_depth(p_s_sb, "journal end");
3545 journal_mark_dirty(th, p_s_sb, SB_BUFFER_WITH_SB(p_s_sb)) ; 3871 if (journal->j_len == 0) {
3546 } 3872 reiserfs_prepare_for_journal(p_s_sb, SB_BUFFER_WITH_SB(p_s_sb),
3547 3873 1);
3548 lock_journal(p_s_sb) ; 3874 journal_mark_dirty(th, p_s_sb, SB_BUFFER_WITH_SB(p_s_sb));
3549 if (journal->j_next_full_flush) { 3875 }
3550 flags |= FLUSH_ALL ;
3551 flush = 1 ;
3552 }
3553 if (journal->j_next_async_flush) {
3554 flags |= COMMIT_NOW | WAIT;
3555 wait_on_commit = 1;
3556 }
3557
3558 /* check_journal_end locks the journal, and unlocks if it does not return 1
3559 ** it tells us if we should continue with the journal_end, or just return
3560 */
3561 if (!check_journal_end(th, p_s_sb, nblocks, flags)) {
3562 p_s_sb->s_dirt = 1;
3563 wake_queued_writers(p_s_sb);
3564 reiserfs_async_progress_wait(p_s_sb);
3565 goto out ;
3566 }
3567
3568 /* check_journal_end might set these, check again */
3569 if (journal->j_next_full_flush) {
3570 flush = 1 ;
3571 }
3572
3573 /*
3574 ** j must wait means we have to flush the log blocks, and the real blocks for
3575 ** this transaction
3576 */
3577 if (journal->j_must_wait > 0) {
3578 flush = 1 ;
3579 }
3580 3876
3877 lock_journal(p_s_sb);
3878 if (journal->j_next_full_flush) {
3879 flags |= FLUSH_ALL;
3880 flush = 1;
3881 }
3882 if (journal->j_next_async_flush) {
3883 flags |= COMMIT_NOW | WAIT;
3884 wait_on_commit = 1;
3885 }
3886
3887 /* check_journal_end locks the journal, and unlocks if it does not return 1
3888 ** it tells us if we should continue with the journal_end, or just return
3889 */
3890 if (!check_journal_end(th, p_s_sb, nblocks, flags)) {
3891 p_s_sb->s_dirt = 1;
3892 wake_queued_writers(p_s_sb);
3893 reiserfs_async_progress_wait(p_s_sb);
3894 goto out;
3895 }
3896
3897 /* check_journal_end might set these, check again */
3898 if (journal->j_next_full_flush) {
3899 flush = 1;
3900 }
3901
3902 /*
3903 ** j must wait means we have to flush the log blocks, and the real blocks for
3904 ** this transaction
3905 */
3906 if (journal->j_must_wait > 0) {
3907 flush = 1;
3908 }
3581#ifdef REISERFS_PREALLOCATE 3909#ifdef REISERFS_PREALLOCATE
3582 /* quota ops might need to nest, setup the journal_info pointer for them */ 3910 /* quota ops might need to nest, setup the journal_info pointer for them */
3583 current->journal_info = th ; 3911 current->journal_info = th;
3584 reiserfs_discard_all_prealloc(th); /* it should not involve new blocks into 3912 reiserfs_discard_all_prealloc(th); /* it should not involve new blocks into
3585 * the transaction */ 3913 * the transaction */
3586 current->journal_info = th->t_handle_save ; 3914 current->journal_info = th->t_handle_save;
3587#endif 3915#endif
3588 3916
3589 /* setup description block */ 3917 /* setup description block */
3590 d_bh = journal_getblk(p_s_sb, SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) + journal->j_start) ; 3918 d_bh =
3591 set_buffer_uptodate(d_bh); 3919 journal_getblk(p_s_sb,
3592 desc = (struct reiserfs_journal_desc *)(d_bh)->b_data ; 3920 SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) +
3593 memset(d_bh->b_data, 0, d_bh->b_size) ; 3921 journal->j_start);
3594 memcpy(get_journal_desc_magic (d_bh), JOURNAL_DESC_MAGIC, 8) ; 3922 set_buffer_uptodate(d_bh);
3595 set_desc_trans_id(desc, journal->j_trans_id) ; 3923 desc = (struct reiserfs_journal_desc *)(d_bh)->b_data;
3596 3924 memset(d_bh->b_data, 0, d_bh->b_size);
3597 /* setup commit block. Don't write (keep it clean too) this one until after everyone else is written */ 3925 memcpy(get_journal_desc_magic(d_bh), JOURNAL_DESC_MAGIC, 8);
3598 c_bh = journal_getblk(p_s_sb, SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) + 3926 set_desc_trans_id(desc, journal->j_trans_id);
3599 ((journal->j_start + journal->j_len + 1) % SB_ONDISK_JOURNAL_SIZE(p_s_sb))) ; 3927
3600 commit = (struct reiserfs_journal_commit *)c_bh->b_data ; 3928 /* setup commit block. Don't write (keep it clean too) this one until after everyone else is written */
3601 memset(c_bh->b_data, 0, c_bh->b_size) ; 3929 c_bh = journal_getblk(p_s_sb, SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) +
3602 set_commit_trans_id(commit, journal->j_trans_id) ; 3930 ((journal->j_start + journal->j_len +
3603 set_buffer_uptodate(c_bh) ; 3931 1) % SB_ONDISK_JOURNAL_SIZE(p_s_sb)));
3604 3932 commit = (struct reiserfs_journal_commit *)c_bh->b_data;
3605 /* init this journal list */ 3933 memset(c_bh->b_data, 0, c_bh->b_size);
3606 jl = journal->j_current_jl; 3934 set_commit_trans_id(commit, journal->j_trans_id);
3607 3935 set_buffer_uptodate(c_bh);
3608 /* we lock the commit before doing anything because 3936
3609 * we want to make sure nobody tries to run flush_commit_list until 3937 /* init this journal list */
3610 * the new transaction is fully setup, and we've already flushed the 3938 jl = journal->j_current_jl;
3611 * ordered bh list 3939
3612 */ 3940 /* we lock the commit before doing anything because
3613 down(&jl->j_commit_lock); 3941 * we want to make sure nobody tries to run flush_commit_list until
3614 3942 * the new transaction is fully setup, and we've already flushed the
3615 /* save the transaction id in case we need to commit it later */ 3943 * ordered bh list
3616 commit_trans_id = jl->j_trans_id; 3944 */
3617 3945 down(&jl->j_commit_lock);
3618 atomic_set(&jl->j_older_commits_done, 0) ; 3946
3619 jl->j_trans_id = journal->j_trans_id ; 3947 /* save the transaction id in case we need to commit it later */
3620 jl->j_timestamp = journal->j_trans_start_time ; 3948 commit_trans_id = jl->j_trans_id;
3621 jl->j_commit_bh = c_bh ; 3949
3622 jl->j_start = journal->j_start ; 3950 atomic_set(&jl->j_older_commits_done, 0);
3623 jl->j_len = journal->j_len ; 3951 jl->j_trans_id = journal->j_trans_id;
3624 atomic_set(&jl->j_nonzerolen, journal->j_len) ; 3952 jl->j_timestamp = journal->j_trans_start_time;
3625 atomic_set(&jl->j_commit_left, journal->j_len + 2); 3953 jl->j_commit_bh = c_bh;
3626 jl->j_realblock = NULL ; 3954 jl->j_start = journal->j_start;
3627 3955 jl->j_len = journal->j_len;
3628 /* The ENTIRE FOR LOOP MUST not cause schedule to occur. 3956 atomic_set(&jl->j_nonzerolen, journal->j_len);
3629 ** for each real block, add it to the journal list hash, 3957 atomic_set(&jl->j_commit_left, journal->j_len + 2);
3630 ** copy into real block index array in the commit or desc block 3958 jl->j_realblock = NULL;
3631 */ 3959
3632 trans_half = journal_trans_half(p_s_sb->s_blocksize); 3960 /* The ENTIRE FOR LOOP MUST not cause schedule to occur.
3633 for (i = 0, cn = journal->j_first ; cn ; cn = cn->next, i++) { 3961 ** for each real block, add it to the journal list hash,
3634 if (buffer_journaled (cn->bh)) { 3962 ** copy into real block index array in the commit or desc block
3635 jl_cn = get_cnode(p_s_sb) ; 3963 */
3636 if (!jl_cn) { 3964 trans_half = journal_trans_half(p_s_sb->s_blocksize);
3637 reiserfs_panic(p_s_sb, "journal-1676, get_cnode returned NULL\n") ; 3965 for (i = 0, cn = journal->j_first; cn; cn = cn->next, i++) {
3638 } 3966 if (buffer_journaled(cn->bh)) {
3639 if (i == 0) { 3967 jl_cn = get_cnode(p_s_sb);
3640 jl->j_realblock = jl_cn ; 3968 if (!jl_cn) {
3641 } 3969 reiserfs_panic(p_s_sb,
3642 jl_cn->prev = last_cn ; 3970 "journal-1676, get_cnode returned NULL\n");
3643 jl_cn->next = NULL ; 3971 }
3644 if (last_cn) { 3972 if (i == 0) {
3645 last_cn->next = jl_cn ; 3973 jl->j_realblock = jl_cn;
3646 } 3974 }
3647 last_cn = jl_cn ; 3975 jl_cn->prev = last_cn;
3648 /* make sure the block we are trying to log is not a block 3976 jl_cn->next = NULL;
3649 of journal or reserved area */ 3977 if (last_cn) {
3650 3978 last_cn->next = jl_cn;
3651 if (is_block_in_log_or_reserved_area(p_s_sb, cn->bh->b_blocknr)) { 3979 }
3652 reiserfs_panic(p_s_sb, "journal-2332: Trying to log block %lu, which is a log block\n", cn->bh->b_blocknr) ; 3980 last_cn = jl_cn;
3653 } 3981 /* make sure the block we are trying to log is not a block
3654 jl_cn->blocknr = cn->bh->b_blocknr ; 3982 of journal or reserved area */
3655 jl_cn->state = 0 ; 3983
3656 jl_cn->sb = p_s_sb; 3984 if (is_block_in_log_or_reserved_area
3657 jl_cn->bh = cn->bh ; 3985 (p_s_sb, cn->bh->b_blocknr)) {
3658 jl_cn->jlist = jl; 3986 reiserfs_panic(p_s_sb,
3659 insert_journal_hash(journal->j_list_hash_table, jl_cn) ; 3987 "journal-2332: Trying to log block %lu, which is a log block\n",
3660 if (i < trans_half) { 3988 cn->bh->b_blocknr);
3661 desc->j_realblock[i] = cpu_to_le32(cn->bh->b_blocknr) ; 3989 }
3662 } else { 3990 jl_cn->blocknr = cn->bh->b_blocknr;
3663 commit->j_realblock[i - trans_half] = cpu_to_le32(cn->bh->b_blocknr) ; 3991 jl_cn->state = 0;
3664 } 3992 jl_cn->sb = p_s_sb;
3665 } else { 3993 jl_cn->bh = cn->bh;
3666 i-- ; 3994 jl_cn->jlist = jl;
3667 } 3995 insert_journal_hash(journal->j_list_hash_table, jl_cn);
3668 } 3996 if (i < trans_half) {
3669 set_desc_trans_len(desc, journal->j_len) ; 3997 desc->j_realblock[i] =
3670 set_desc_mount_id(desc, journal->j_mount_id) ; 3998 cpu_to_le32(cn->bh->b_blocknr);
3671 set_desc_trans_id(desc, journal->j_trans_id) ; 3999 } else {
3672 set_commit_trans_len(commit, journal->j_len); 4000 commit->j_realblock[i - trans_half] =
3673 4001 cpu_to_le32(cn->bh->b_blocknr);
3674 /* special check in case all buffers in the journal were marked for not logging */ 4002 }
3675 if (journal->j_len == 0) { 4003 } else {
3676 BUG(); 4004 i--;
3677 } 4005 }
3678 4006 }
3679 /* we're about to dirty all the log blocks, mark the description block 4007 set_desc_trans_len(desc, journal->j_len);
3680 * dirty now too. Don't mark the commit block dirty until all the 4008 set_desc_mount_id(desc, journal->j_mount_id);
3681 * others are on disk 4009 set_desc_trans_id(desc, journal->j_trans_id);
3682 */ 4010 set_commit_trans_len(commit, journal->j_len);
3683 mark_buffer_dirty(d_bh); 4011
3684 4012 /* special check in case all buffers in the journal were marked for not logging */
3685 /* first data block is j_start + 1, so add one to cur_write_start wherever you use it */ 4013 if (journal->j_len == 0) {
3686 cur_write_start = journal->j_start ; 4014 BUG();
3687 cn = journal->j_first ; 4015 }
3688 jindex = 1 ; /* start at one so we don't get the desc again */ 4016
3689 while(cn) { 4017 /* we're about to dirty all the log blocks, mark the description block
3690 clear_buffer_journal_new (cn->bh); 4018 * dirty now too. Don't mark the commit block dirty until all the
3691 /* copy all the real blocks into log area. dirty log blocks */ 4019 * others are on disk
3692 if (buffer_journaled (cn->bh)) { 4020 */
3693 struct buffer_head *tmp_bh ; 4021 mark_buffer_dirty(d_bh);
3694 char *addr; 4022
3695 struct page *page; 4023 /* first data block is j_start + 1, so add one to cur_write_start wherever you use it */
3696 tmp_bh = journal_getblk(p_s_sb, SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) + 4024 cur_write_start = journal->j_start;
3697 ((cur_write_start + jindex) % SB_ONDISK_JOURNAL_SIZE(p_s_sb))) ; 4025 cn = journal->j_first;
3698 set_buffer_uptodate(tmp_bh); 4026 jindex = 1; /* start at one so we don't get the desc again */
3699 page = cn->bh->b_page; 4027 while (cn) {
3700 addr = kmap(page); 4028 clear_buffer_journal_new(cn->bh);
3701 memcpy(tmp_bh->b_data, addr + offset_in_page(cn->bh->b_data), 4029 /* copy all the real blocks into log area. dirty log blocks */
3702 cn->bh->b_size); 4030 if (buffer_journaled(cn->bh)) {
3703 kunmap(page); 4031 struct buffer_head *tmp_bh;
3704 mark_buffer_dirty(tmp_bh); 4032 char *addr;
3705 jindex++ ; 4033 struct page *page;
3706 set_buffer_journal_dirty (cn->bh); 4034 tmp_bh =
3707 clear_buffer_journaled (cn->bh); 4035 journal_getblk(p_s_sb,
3708 } else { 4036 SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) +
3709 /* JDirty cleared sometime during transaction. don't log this one */ 4037 ((cur_write_start +
3710 reiserfs_warning(p_s_sb, "journal-2048: do_journal_end: BAD, buffer in journal hash, but not JDirty!") ; 4038 jindex) %
3711 brelse(cn->bh) ; 4039 SB_ONDISK_JOURNAL_SIZE(p_s_sb)));
3712 } 4040 set_buffer_uptodate(tmp_bh);
3713 next = cn->next ; 4041 page = cn->bh->b_page;
3714 free_cnode(p_s_sb, cn) ; 4042 addr = kmap(page);
3715 cn = next ; 4043 memcpy(tmp_bh->b_data,
3716 cond_resched(); 4044 addr + offset_in_page(cn->bh->b_data),
3717 } 4045 cn->bh->b_size);
3718 4046 kunmap(page);
3719 /* we are done with both the c_bh and d_bh, but 4047 mark_buffer_dirty(tmp_bh);
3720 ** c_bh must be written after all other commit blocks, 4048 jindex++;
3721 ** so we dirty/relse c_bh in flush_commit_list, with commit_left <= 1. 4049 set_buffer_journal_dirty(cn->bh);
3722 */ 4050 clear_buffer_journaled(cn->bh);
3723 4051 } else {
3724 journal->j_current_jl = alloc_journal_list(p_s_sb); 4052 /* JDirty cleared sometime during transaction. don't log this one */
3725 4053 reiserfs_warning(p_s_sb,
3726 /* now it is safe to insert this transaction on the main list */ 4054 "journal-2048: do_journal_end: BAD, buffer in journal hash, but not JDirty!");
3727 list_add_tail(&jl->j_list, &journal->j_journal_list); 4055 brelse(cn->bh);
3728 list_add_tail(&jl->j_working_list, &journal->j_working_list); 4056 }
3729 journal->j_num_work_lists++; 4057 next = cn->next;
3730 4058 free_cnode(p_s_sb, cn);
3731 /* reset journal values for the next transaction */ 4059 cn = next;
3732 old_start = journal->j_start ; 4060 cond_resched();
3733 journal->j_start = (journal->j_start + journal->j_len + 2) % SB_ONDISK_JOURNAL_SIZE(p_s_sb); 4061 }
3734 atomic_set(&(journal->j_wcount), 0) ; 4062
3735 journal->j_bcount = 0 ; 4063 /* we are done with both the c_bh and d_bh, but
3736 journal->j_last = NULL ; 4064 ** c_bh must be written after all other commit blocks,
3737 journal->j_first = NULL ; 4065 ** so we dirty/relse c_bh in flush_commit_list, with commit_left <= 1.
3738 journal->j_len = 0 ; 4066 */
3739 journal->j_trans_start_time = 0 ; 4067
3740 journal->j_trans_id++ ; 4068 journal->j_current_jl = alloc_journal_list(p_s_sb);
3741 journal->j_current_jl->j_trans_id = journal->j_trans_id; 4069
3742 journal->j_must_wait = 0 ; 4070 /* now it is safe to insert this transaction on the main list */
3743 journal->j_len_alloc = 0 ; 4071 list_add_tail(&jl->j_list, &journal->j_journal_list);
3744 journal->j_next_full_flush = 0 ; 4072 list_add_tail(&jl->j_working_list, &journal->j_working_list);
3745 journal->j_next_async_flush = 0 ; 4073 journal->j_num_work_lists++;
3746 init_journal_hash(p_s_sb) ; 4074
3747 4075 /* reset journal values for the next transaction */
3748 // make sure reiserfs_add_jh sees the new current_jl before we 4076 old_start = journal->j_start;
3749 // write out the tails 4077 journal->j_start =
3750 smp_mb(); 4078 (journal->j_start + journal->j_len +
3751 4079 2) % SB_ONDISK_JOURNAL_SIZE(p_s_sb);
3752 /* tail conversion targets have to hit the disk before we end the 4080 atomic_set(&(journal->j_wcount), 0);
3753 * transaction. Otherwise a later transaction might repack the tail 4081 journal->j_bcount = 0;
3754 * before this transaction commits, leaving the data block unflushed and 4082 journal->j_last = NULL;
3755 * clean, if we crash before the later transaction commits, the data block 4083 journal->j_first = NULL;
3756 * is lost. 4084 journal->j_len = 0;
3757 */ 4085 journal->j_trans_start_time = 0;
3758 if (!list_empty(&jl->j_tail_bh_list)) { 4086 journal->j_trans_id++;
3759 unlock_kernel(); 4087 journal->j_current_jl->j_trans_id = journal->j_trans_id;
3760 write_ordered_buffers(&journal->j_dirty_buffers_lock, 4088 journal->j_must_wait = 0;
3761 journal, jl, &jl->j_tail_bh_list); 4089 journal->j_len_alloc = 0;
3762 lock_kernel(); 4090 journal->j_next_full_flush = 0;
3763 } 4091 journal->j_next_async_flush = 0;
3764 if (!list_empty(&jl->j_tail_bh_list)) 4092 init_journal_hash(p_s_sb);
3765 BUG(); 4093
3766 up(&jl->j_commit_lock); 4094 // make sure reiserfs_add_jh sees the new current_jl before we
3767 4095 // write out the tails
3768 /* honor the flush wishes from the caller, simple commits can 4096 smp_mb();
3769 ** be done outside the journal lock, they are done below 4097
3770 ** 4098 /* tail conversion targets have to hit the disk before we end the
3771 ** if we don't flush the commit list right now, we put it into 4099 * transaction. Otherwise a later transaction might repack the tail
3772 ** the work queue so the people waiting on the async progress work 4100 * before this transaction commits, leaving the data block unflushed and
3773 ** queue don't wait for this proc to flush journal lists and such. 4101 * clean, if we crash before the later transaction commits, the data block
3774 */ 4102 * is lost.
3775 if (flush) { 4103 */
3776 flush_commit_list(p_s_sb, jl, 1) ; 4104 if (!list_empty(&jl->j_tail_bh_list)) {
3777 flush_journal_list(p_s_sb, jl, 1) ; 4105 unlock_kernel();
3778 } else if (!(jl->j_state & LIST_COMMIT_PENDING)) 4106 write_ordered_buffers(&journal->j_dirty_buffers_lock,
3779 queue_delayed_work(commit_wq, &journal->j_work, HZ/10); 4107 journal, jl, &jl->j_tail_bh_list);
3780 4108 lock_kernel();
3781 4109 }
3782 /* if the next transaction has any chance of wrapping, flush 4110 if (!list_empty(&jl->j_tail_bh_list))
3783 ** transactions that might get overwritten. If any journal lists are very 4111 BUG();
3784 ** old flush them as well. 4112 up(&jl->j_commit_lock);
3785 */ 4113
3786first_jl: 4114 /* honor the flush wishes from the caller, simple commits can
3787 list_for_each_safe(entry, safe, &journal->j_journal_list) { 4115 ** be done outside the journal lock, they are done below
3788 temp_jl = JOURNAL_LIST_ENTRY(entry); 4116 **
3789 if (journal->j_start <= temp_jl->j_start) { 4117 ** if we don't flush the commit list right now, we put it into
3790 if ((journal->j_start + journal->j_trans_max + 1) >= 4118 ** the work queue so the people waiting on the async progress work
3791 temp_jl->j_start) 4119 ** queue don't wait for this proc to flush journal lists and such.
3792 { 4120 */
3793 flush_used_journal_lists(p_s_sb, temp_jl); 4121 if (flush) {
3794 goto first_jl; 4122 flush_commit_list(p_s_sb, jl, 1);
3795 } else if ((journal->j_start + 4123 flush_journal_list(p_s_sb, jl, 1);
3796 journal->j_trans_max + 1) < 4124 } else if (!(jl->j_state & LIST_COMMIT_PENDING))
3797 SB_ONDISK_JOURNAL_SIZE(p_s_sb)) 4125 queue_delayed_work(commit_wq, &journal->j_work, HZ / 10);
3798 { 4126
3799 /* if we don't cross into the next transaction and we don't 4127 /* if the next transaction has any chance of wrapping, flush
3800 * wrap, there is no way we can overlap any later transactions 4128 ** transactions that might get overwritten. If any journal lists are very
3801 * break now 4129 ** old flush them as well.
3802 */ 4130 */
3803 break; 4131 first_jl:
3804 } 4132 list_for_each_safe(entry, safe, &journal->j_journal_list) {
3805 } else if ((journal->j_start + 4133 temp_jl = JOURNAL_LIST_ENTRY(entry);
3806 journal->j_trans_max + 1) > 4134 if (journal->j_start <= temp_jl->j_start) {
3807 SB_ONDISK_JOURNAL_SIZE(p_s_sb)) 4135 if ((journal->j_start + journal->j_trans_max + 1) >=
3808 { 4136 temp_jl->j_start) {
3809 if (((journal->j_start + journal->j_trans_max + 1) % 4137 flush_used_journal_lists(p_s_sb, temp_jl);
3810 SB_ONDISK_JOURNAL_SIZE(p_s_sb)) >= temp_jl->j_start) 4138 goto first_jl;
3811 { 4139 } else if ((journal->j_start +
3812 flush_used_journal_lists(p_s_sb, temp_jl); 4140 journal->j_trans_max + 1) <
3813 goto first_jl; 4141 SB_ONDISK_JOURNAL_SIZE(p_s_sb)) {
3814 } else { 4142 /* if we don't cross into the next transaction and we don't
3815 /* we don't overlap anything from out start to the end of the 4143 * wrap, there is no way we can overlap any later transactions
3816 * log, and our wrapped portion doesn't overlap anything at 4144 * break now
3817 * the start of the log. We can break 4145 */
3818 */ 4146 break;
3819 break; 4147 }
3820 } 4148 } else if ((journal->j_start +
3821 } 4149 journal->j_trans_max + 1) >
3822 } 4150 SB_ONDISK_JOURNAL_SIZE(p_s_sb)) {
3823 flush_old_journal_lists(p_s_sb); 4151 if (((journal->j_start + journal->j_trans_max + 1) %
3824 4152 SB_ONDISK_JOURNAL_SIZE(p_s_sb)) >=
3825 journal->j_current_jl->j_list_bitmap = get_list_bitmap(p_s_sb, journal->j_current_jl) ; 4153 temp_jl->j_start) {
3826 4154 flush_used_journal_lists(p_s_sb, temp_jl);
3827 if (!(journal->j_current_jl->j_list_bitmap)) { 4155 goto first_jl;
3828 reiserfs_panic(p_s_sb, "journal-1996: do_journal_end, could not get a list bitmap\n") ; 4156 } else {
3829 } 4157 /* we don't overlap anything from out start to the end of the
3830 4158 * log, and our wrapped portion doesn't overlap anything at
3831 atomic_set(&(journal->j_jlock), 0) ; 4159 * the start of the log. We can break
3832 unlock_journal(p_s_sb) ; 4160 */
3833 /* wake up any body waiting to join. */ 4161 break;
3834 clear_bit(J_WRITERS_QUEUED, &journal->j_state); 4162 }
3835 wake_up(&(journal->j_join_wait)) ; 4163 }
3836 4164 }
3837 if (!flush && wait_on_commit && 4165 flush_old_journal_lists(p_s_sb);
3838 journal_list_still_alive(p_s_sb, commit_trans_id)) { 4166
3839 flush_commit_list(p_s_sb, jl, 1) ; 4167 journal->j_current_jl->j_list_bitmap =
3840 } 4168 get_list_bitmap(p_s_sb, journal->j_current_jl);
3841out: 4169
3842 reiserfs_check_lock_depth(p_s_sb, "journal end2"); 4170 if (!(journal->j_current_jl->j_list_bitmap)) {
3843 4171 reiserfs_panic(p_s_sb,
3844 memset (th, 0, sizeof (*th)); 4172 "journal-1996: do_journal_end, could not get a list bitmap\n");
3845 /* Re-set th->t_super, so we can properly keep track of how many 4173 }
3846 * persistent transactions there are. We need to do this so if this 4174
3847 * call is part of a failed restart_transaction, we can free it later */ 4175 atomic_set(&(journal->j_jlock), 0);
3848 th->t_super = p_s_sb; 4176 unlock_journal(p_s_sb);
3849 4177 /* wake up any body waiting to join. */
3850 return journal->j_errno; 4178 clear_bit(J_WRITERS_QUEUED, &journal->j_state);
3851} 4179 wake_up(&(journal->j_join_wait));
3852 4180
3853static void 4181 if (!flush && wait_on_commit &&
3854__reiserfs_journal_abort_hard (struct super_block *sb) 4182 journal_list_still_alive(p_s_sb, commit_trans_id)) {
3855{ 4183 flush_commit_list(p_s_sb, jl, 1);
3856 struct reiserfs_journal *journal = SB_JOURNAL (sb); 4184 }
3857 if (test_bit (J_ABORTED, &journal->j_state)) 4185 out:
3858 return; 4186 reiserfs_check_lock_depth(p_s_sb, "journal end2");
3859 4187
3860 printk (KERN_CRIT "REISERFS: Aborting journal for filesystem on %s\n", 4188 memset(th, 0, sizeof(*th));
3861 reiserfs_bdevname (sb)); 4189 /* Re-set th->t_super, so we can properly keep track of how many
3862 4190 * persistent transactions there are. We need to do this so if this
3863 sb->s_flags |= MS_RDONLY; 4191 * call is part of a failed restart_transaction, we can free it later */
3864 set_bit (J_ABORTED, &journal->j_state); 4192 th->t_super = p_s_sb;
4193
4194 return journal->j_errno;
4195}
4196
4197static void __reiserfs_journal_abort_hard(struct super_block *sb)
4198{
4199 struct reiserfs_journal *journal = SB_JOURNAL(sb);
4200 if (test_bit(J_ABORTED, &journal->j_state))
4201 return;
4202
4203 printk(KERN_CRIT "REISERFS: Aborting journal for filesystem on %s\n",
4204 reiserfs_bdevname(sb));
4205
4206 sb->s_flags |= MS_RDONLY;
4207 set_bit(J_ABORTED, &journal->j_state);
3865 4208
3866#ifdef CONFIG_REISERFS_CHECK 4209#ifdef CONFIG_REISERFS_CHECK
3867 dump_stack(); 4210 dump_stack();
3868#endif 4211#endif
3869} 4212}
3870 4213
3871static void 4214static void __reiserfs_journal_abort_soft(struct super_block *sb, int errno)
3872__reiserfs_journal_abort_soft (struct super_block *sb, int errno)
3873{ 4215{
3874 struct reiserfs_journal *journal = SB_JOURNAL (sb); 4216 struct reiserfs_journal *journal = SB_JOURNAL(sb);
3875 if (test_bit (J_ABORTED, &journal->j_state)) 4217 if (test_bit(J_ABORTED, &journal->j_state))
3876 return; 4218 return;
3877 4219
3878 if (!journal->j_errno) 4220 if (!journal->j_errno)
3879 journal->j_errno = errno; 4221 journal->j_errno = errno;
3880 4222
3881 __reiserfs_journal_abort_hard (sb); 4223 __reiserfs_journal_abort_hard(sb);
3882} 4224}
3883 4225
3884void 4226void reiserfs_journal_abort(struct super_block *sb, int errno)
3885reiserfs_journal_abort (struct super_block *sb, int errno)
3886{ 4227{
3887 return __reiserfs_journal_abort_soft (sb, errno); 4228 return __reiserfs_journal_abort_soft(sb, errno);
3888} 4229}
diff --git a/fs/reiserfs/lbalance.c b/fs/reiserfs/lbalance.c
index 2406608fc5c..2533c1f64ab 100644
--- a/fs/reiserfs/lbalance.c
+++ b/fs/reiserfs/lbalance.c
@@ -21,648 +21,709 @@
21 leaf_paste_entries 21 leaf_paste_entries
22 */ 22 */
23 23
24
25/* copy copy_count entries from source directory item to dest buffer (creating new item if needed) */ 24/* copy copy_count entries from source directory item to dest buffer (creating new item if needed) */
26static void leaf_copy_dir_entries (struct buffer_info * dest_bi, struct buffer_head * source, 25static void leaf_copy_dir_entries(struct buffer_info *dest_bi,
27 int last_first, int item_num, int from, int copy_count) 26 struct buffer_head *source, int last_first,
27 int item_num, int from, int copy_count)
28{ 28{
29 struct buffer_head * dest = dest_bi->bi_bh; 29 struct buffer_head *dest = dest_bi->bi_bh;
30 int item_num_in_dest; /* either the number of target item, 30 int item_num_in_dest; /* either the number of target item,
31 or if we must create a new item, 31 or if we must create a new item,
32 the number of the item we will 32 the number of the item we will
33 create it next to */ 33 create it next to */
34 struct item_head * ih; 34 struct item_head *ih;
35 struct reiserfs_de_head * deh; 35 struct reiserfs_de_head *deh;
36 int copy_records_len; /* length of all records in item to be copied */ 36 int copy_records_len; /* length of all records in item to be copied */
37 char * records; 37 char *records;
38 38
39 ih = B_N_PITEM_HEAD (source, item_num); 39 ih = B_N_PITEM_HEAD(source, item_num);
40 40
41 RFALSE( !is_direntry_le_ih (ih), "vs-10000: item must be directory item"); 41 RFALSE(!is_direntry_le_ih(ih), "vs-10000: item must be directory item");
42 42
43 /* length of all record to be copied and first byte of the last of them */ 43 /* length of all record to be copied and first byte of the last of them */
44 deh = B_I_DEH (source, ih); 44 deh = B_I_DEH(source, ih);
45 if (copy_count) { 45 if (copy_count) {
46 copy_records_len = (from ? deh_location( &(deh[from - 1]) ) : 46 copy_records_len = (from ? deh_location(&(deh[from - 1])) :
47 ih_item_len(ih)) - deh_location( &(deh[from + copy_count - 1])); 47 ih_item_len(ih)) -
48 records = source->b_data + ih_location(ih) + 48 deh_location(&(deh[from + copy_count - 1]));
49 deh_location( &(deh[from + copy_count - 1])); 49 records =
50 } else { 50 source->b_data + ih_location(ih) +
51 copy_records_len = 0; 51 deh_location(&(deh[from + copy_count - 1]));
52 records = NULL; 52 } else {
53 } 53 copy_records_len = 0;
54 54 records = NULL;
55 /* when copy last to first, dest buffer can contain 0 items */ 55 }
56 item_num_in_dest = (last_first == LAST_TO_FIRST) ? (( B_NR_ITEMS(dest) ) ? 0 : -1) : (B_NR_ITEMS(dest) - 1); 56
57 57 /* when copy last to first, dest buffer can contain 0 items */
58 /* if there are no items in dest or the first/last item in dest is not item of the same directory */ 58 item_num_in_dest =
59 if ( (item_num_in_dest == - 1) || 59 (last_first ==
60 (last_first == FIRST_TO_LAST && le_ih_k_offset (ih) == DOT_OFFSET) || 60 LAST_TO_FIRST) ? ((B_NR_ITEMS(dest)) ? 0 : -1) : (B_NR_ITEMS(dest)
61 (last_first == LAST_TO_FIRST && comp_short_le_keys/*COMP_SHORT_KEYS*/ (&ih->ih_key, B_N_PKEY (dest, item_num_in_dest)))) { 61 - 1);
62 /* create new item in dest */ 62
63 struct item_head new_ih; 63 /* if there are no items in dest or the first/last item in dest is not item of the same directory */
64 64 if ((item_num_in_dest == -1) ||
65 /* form item header */ 65 (last_first == FIRST_TO_LAST && le_ih_k_offset(ih) == DOT_OFFSET) ||
66 memcpy (&new_ih.ih_key, &ih->ih_key, KEY_SIZE); 66 (last_first == LAST_TO_FIRST
67 put_ih_version( &new_ih, KEY_FORMAT_3_5 ); 67 && comp_short_le_keys /*COMP_SHORT_KEYS */ (&ih->ih_key,
68 /* calculate item len */ 68 B_N_PKEY(dest,
69 put_ih_item_len( &new_ih, DEH_SIZE * copy_count + copy_records_len ); 69 item_num_in_dest))))
70 put_ih_entry_count( &new_ih, 0 ); 70 {
71 71 /* create new item in dest */
72 if (last_first == LAST_TO_FIRST) { 72 struct item_head new_ih;
73 /* form key by the following way */ 73
74 if (from < I_ENTRY_COUNT(ih)) { 74 /* form item header */
75 set_le_ih_k_offset( &new_ih, deh_offset( &(deh[from]) ) ); 75 memcpy(&new_ih.ih_key, &ih->ih_key, KEY_SIZE);
76 /*memcpy (&new_ih.ih_key.k_offset, &deh[from].deh_offset, SHORT_KEY_SIZE);*/ 76 put_ih_version(&new_ih, KEY_FORMAT_3_5);
77 } else { 77 /* calculate item len */
78 /* no entries will be copied to this item in this function */ 78 put_ih_item_len(&new_ih,
79 set_le_ih_k_offset (&new_ih, U32_MAX); 79 DEH_SIZE * copy_count + copy_records_len);
80 /* this item is not yet valid, but we want I_IS_DIRECTORY_ITEM to return 1 for it, so we -1 */ 80 put_ih_entry_count(&new_ih, 0);
81 } 81
82 set_le_key_k_type (KEY_FORMAT_3_5, &(new_ih.ih_key), TYPE_DIRENTRY); 82 if (last_first == LAST_TO_FIRST) {
83 /* form key by the following way */
84 if (from < I_ENTRY_COUNT(ih)) {
85 set_le_ih_k_offset(&new_ih,
86 deh_offset(&(deh[from])));
87 /*memcpy (&new_ih.ih_key.k_offset, &deh[from].deh_offset, SHORT_KEY_SIZE); */
88 } else {
89 /* no entries will be copied to this item in this function */
90 set_le_ih_k_offset(&new_ih, U32_MAX);
91 /* this item is not yet valid, but we want I_IS_DIRECTORY_ITEM to return 1 for it, so we -1 */
92 }
93 set_le_key_k_type(KEY_FORMAT_3_5, &(new_ih.ih_key),
94 TYPE_DIRENTRY);
95 }
96
97 /* insert item into dest buffer */
98 leaf_insert_into_buf(dest_bi,
99 (last_first ==
100 LAST_TO_FIRST) ? 0 : B_NR_ITEMS(dest),
101 &new_ih, NULL, 0);
102 } else {
103 /* prepare space for entries */
104 leaf_paste_in_buffer(dest_bi,
105 (last_first ==
106 FIRST_TO_LAST) ? (B_NR_ITEMS(dest) -
107 1) : 0, MAX_US_INT,
108 DEH_SIZE * copy_count + copy_records_len,
109 records, 0);
83 } 110 }
84
85 /* insert item into dest buffer */
86 leaf_insert_into_buf (dest_bi, (last_first == LAST_TO_FIRST) ? 0 : B_NR_ITEMS(dest), &new_ih, NULL, 0);
87 } else {
88 /* prepare space for entries */
89 leaf_paste_in_buffer (dest_bi, (last_first==FIRST_TO_LAST) ? (B_NR_ITEMS(dest) - 1) : 0, MAX_US_INT,
90 DEH_SIZE * copy_count + copy_records_len, records, 0
91 );
92 }
93
94 item_num_in_dest = (last_first == FIRST_TO_LAST) ? (B_NR_ITEMS(dest)-1) : 0;
95
96 leaf_paste_entries (dest_bi->bi_bh, item_num_in_dest,
97 (last_first == FIRST_TO_LAST) ? I_ENTRY_COUNT(B_N_PITEM_HEAD (dest, item_num_in_dest)) : 0,
98 copy_count, deh + from, records,
99 DEH_SIZE * copy_count + copy_records_len
100 );
101}
102 111
112 item_num_in_dest =
113 (last_first == FIRST_TO_LAST) ? (B_NR_ITEMS(dest) - 1) : 0;
114
115 leaf_paste_entries(dest_bi->bi_bh, item_num_in_dest,
116 (last_first ==
117 FIRST_TO_LAST) ? I_ENTRY_COUNT(B_N_PITEM_HEAD(dest,
118 item_num_in_dest))
119 : 0, copy_count, deh + from, records,
120 DEH_SIZE * copy_count + copy_records_len);
121}
103 122
104/* Copy the first (if last_first == FIRST_TO_LAST) or last (last_first == LAST_TO_FIRST) item or 123/* Copy the first (if last_first == FIRST_TO_LAST) or last (last_first == LAST_TO_FIRST) item or
105 part of it or nothing (see the return 0 below) from SOURCE to the end 124 part of it or nothing (see the return 0 below) from SOURCE to the end
106 (if last_first) or beginning (!last_first) of the DEST */ 125 (if last_first) or beginning (!last_first) of the DEST */
107/* returns 1 if anything was copied, else 0 */ 126/* returns 1 if anything was copied, else 0 */
108static int leaf_copy_boundary_item (struct buffer_info * dest_bi, struct buffer_head * src, int last_first, 127static int leaf_copy_boundary_item(struct buffer_info *dest_bi,
109 int bytes_or_entries) 128 struct buffer_head *src, int last_first,
129 int bytes_or_entries)
110{ 130{
111 struct buffer_head * dest = dest_bi->bi_bh; 131 struct buffer_head *dest = dest_bi->bi_bh;
112 int dest_nr_item, src_nr_item; /* number of items in the source and destination buffers */ 132 int dest_nr_item, src_nr_item; /* number of items in the source and destination buffers */
113 struct item_head * ih; 133 struct item_head *ih;
114 struct item_head * dih; 134 struct item_head *dih;
115 135
116 dest_nr_item = B_NR_ITEMS(dest); 136 dest_nr_item = B_NR_ITEMS(dest);
117 137
118 if ( last_first == FIRST_TO_LAST ) { 138 if (last_first == FIRST_TO_LAST) {
119 /* if ( DEST is empty or first item of SOURCE and last item of DEST are the items of different objects 139 /* if ( DEST is empty or first item of SOURCE and last item of DEST are the items of different objects
120 or of different types ) then there is no need to treat this item differently from the other items 140 or of different types ) then there is no need to treat this item differently from the other items
121 that we copy, so we return */ 141 that we copy, so we return */
122 ih = B_N_PITEM_HEAD (src, 0); 142 ih = B_N_PITEM_HEAD(src, 0);
123 dih = B_N_PITEM_HEAD (dest, dest_nr_item - 1); 143 dih = B_N_PITEM_HEAD(dest, dest_nr_item - 1);
124 if (!dest_nr_item || (!op_is_left_mergeable (&(ih->ih_key), src->b_size))) 144 if (!dest_nr_item
125 /* there is nothing to merge */ 145 || (!op_is_left_mergeable(&(ih->ih_key), src->b_size)))
126 return 0; 146 /* there is nothing to merge */
127 147 return 0;
128 RFALSE( ! ih_item_len(ih), "vs-10010: item can not have empty length"); 148
129 149 RFALSE(!ih_item_len(ih),
130 if ( is_direntry_le_ih (ih) ) { 150 "vs-10010: item can not have empty length");
131 if ( bytes_or_entries == -1 ) 151
132 /* copy all entries to dest */ 152 if (is_direntry_le_ih(ih)) {
133 bytes_or_entries = ih_entry_count(ih); 153 if (bytes_or_entries == -1)
134 leaf_copy_dir_entries (dest_bi, src, FIRST_TO_LAST, 0, 0, bytes_or_entries); 154 /* copy all entries to dest */
135 return 1; 155 bytes_or_entries = ih_entry_count(ih);
136 } 156 leaf_copy_dir_entries(dest_bi, src, FIRST_TO_LAST, 0, 0,
137 157 bytes_or_entries);
138 /* copy part of the body of the first item of SOURCE to the end of the body of the last item of the DEST 158 return 1;
139 part defined by 'bytes_or_entries'; if bytes_or_entries == -1 copy whole body; don't create new item header 159 }
140 */ 160
141 if ( bytes_or_entries == -1 ) 161 /* copy part of the body of the first item of SOURCE to the end of the body of the last item of the DEST
142 bytes_or_entries = ih_item_len(ih); 162 part defined by 'bytes_or_entries'; if bytes_or_entries == -1 copy whole body; don't create new item header
163 */
164 if (bytes_or_entries == -1)
165 bytes_or_entries = ih_item_len(ih);
143 166
144#ifdef CONFIG_REISERFS_CHECK 167#ifdef CONFIG_REISERFS_CHECK
145 else { 168 else {
146 if (bytes_or_entries == ih_item_len(ih) && is_indirect_le_ih(ih)) 169 if (bytes_or_entries == ih_item_len(ih)
147 if (get_ih_free_space (ih)) 170 && is_indirect_le_ih(ih))
148 reiserfs_panic (NULL, "vs-10020: leaf_copy_boundary_item: " 171 if (get_ih_free_space(ih))
149 "last unformatted node must be filled entirely (%h)", 172 reiserfs_panic(NULL,
150 ih); 173 "vs-10020: leaf_copy_boundary_item: "
151 } 174 "last unformatted node must be filled entirely (%h)",
175 ih);
176 }
152#endif 177#endif
153
154 /* merge first item (or its part) of src buffer with the last
155 item of dest buffer. Both are of the same file */
156 leaf_paste_in_buffer (dest_bi,
157 dest_nr_item - 1, ih_item_len(dih), bytes_or_entries, B_I_PITEM(src,ih), 0
158 );
159
160 if (is_indirect_le_ih (dih)) {
161 RFALSE( get_ih_free_space (dih),
162 "vs-10030: merge to left: last unformatted node of non-last indirect item %h must have zerto free space",
163 ih);
164 if (bytes_or_entries == ih_item_len(ih))
165 set_ih_free_space (dih, get_ih_free_space (ih));
166 }
167
168 return 1;
169 }
170
171
172 /* copy boundary item to right (last_first == LAST_TO_FIRST) */
173
174 /* ( DEST is empty or last item of SOURCE and first item of DEST
175 are the items of different object or of different types )
176 */
177 src_nr_item = B_NR_ITEMS (src);
178 ih = B_N_PITEM_HEAD (src, src_nr_item - 1);
179 dih = B_N_PITEM_HEAD (dest, 0);
180
181 if (!dest_nr_item || !op_is_left_mergeable (&(dih->ih_key), src->b_size))
182 return 0;
183
184 if ( is_direntry_le_ih (ih)) {
185 if ( bytes_or_entries == -1 )
186 /* bytes_or_entries = entries number in last item body of SOURCE */
187 bytes_or_entries = ih_entry_count(ih);
188
189 leaf_copy_dir_entries (dest_bi, src, LAST_TO_FIRST, src_nr_item - 1, ih_entry_count(ih) - bytes_or_entries, bytes_or_entries);
190 return 1;
191 }
192
193 /* copy part of the body of the last item of SOURCE to the begin of the body of the first item of the DEST;
194 part defined by 'bytes_or_entries'; if byte_or_entriess == -1 copy whole body; change first item key of the DEST;
195 don't create new item header
196 */
197
198 RFALSE( is_indirect_le_ih(ih) && get_ih_free_space (ih),
199 "vs-10040: merge to right: last unformatted node of non-last indirect item must be filled entirely (%h)",
200 ih);
201
202 if ( bytes_or_entries == -1 ) {
203 /* bytes_or_entries = length of last item body of SOURCE */
204 bytes_or_entries = ih_item_len(ih);
205
206 RFALSE( le_ih_k_offset (dih) !=
207 le_ih_k_offset (ih) + op_bytes_number (ih, src->b_size),
208 "vs-10050: items %h and %h do not match", ih, dih);
209
210 /* change first item key of the DEST */
211 set_le_ih_k_offset (dih, le_ih_k_offset (ih));
212
213 /* item becomes non-mergeable */
214 /* or mergeable if left item was */
215 set_le_ih_k_type (dih, le_ih_k_type (ih));
216 } else {
217 /* merge to right only part of item */
218 RFALSE( ih_item_len(ih) <= bytes_or_entries,
219 "vs-10060: no so much bytes %lu (needed %lu)",
220 ( unsigned long )ih_item_len(ih), ( unsigned long )bytes_or_entries);
221
222 /* change first item key of the DEST */
223 if ( is_direct_le_ih (dih) ) {
224 RFALSE( le_ih_k_offset (dih) <= (unsigned long)bytes_or_entries,
225 "vs-10070: dih %h, bytes_or_entries(%d)", dih, bytes_or_entries);
226 set_le_ih_k_offset (dih, le_ih_k_offset (dih) - bytes_or_entries);
227 } else {
228 RFALSE( le_ih_k_offset (dih) <=
229 (bytes_or_entries / UNFM_P_SIZE) * dest->b_size,
230 "vs-10080: dih %h, bytes_or_entries(%d)",
231 dih, (bytes_or_entries/UNFM_P_SIZE)*dest->b_size);
232 set_le_ih_k_offset (dih, le_ih_k_offset (dih) - ((bytes_or_entries / UNFM_P_SIZE) * dest->b_size));
233 }
234 }
235
236 leaf_paste_in_buffer (dest_bi, 0, 0, bytes_or_entries, B_I_PITEM(src,ih) + ih_item_len(ih) - bytes_or_entries, 0);
237 return 1;
238}
239 178
179 /* merge first item (or its part) of src buffer with the last
180 item of dest buffer. Both are of the same file */
181 leaf_paste_in_buffer(dest_bi,
182 dest_nr_item - 1, ih_item_len(dih),
183 bytes_or_entries, B_I_PITEM(src, ih), 0);
184
185 if (is_indirect_le_ih(dih)) {
186 RFALSE(get_ih_free_space(dih),
187 "vs-10030: merge to left: last unformatted node of non-last indirect item %h must have zerto free space",
188 ih);
189 if (bytes_or_entries == ih_item_len(ih))
190 set_ih_free_space(dih, get_ih_free_space(ih));
191 }
192
193 return 1;
194 }
195
196 /* copy boundary item to right (last_first == LAST_TO_FIRST) */
197
198 /* ( DEST is empty or last item of SOURCE and first item of DEST
199 are the items of different object or of different types )
200 */
201 src_nr_item = B_NR_ITEMS(src);
202 ih = B_N_PITEM_HEAD(src, src_nr_item - 1);
203 dih = B_N_PITEM_HEAD(dest, 0);
204
205 if (!dest_nr_item || !op_is_left_mergeable(&(dih->ih_key), src->b_size))
206 return 0;
207
208 if (is_direntry_le_ih(ih)) {
209 if (bytes_or_entries == -1)
210 /* bytes_or_entries = entries number in last item body of SOURCE */
211 bytes_or_entries = ih_entry_count(ih);
212
213 leaf_copy_dir_entries(dest_bi, src, LAST_TO_FIRST,
214 src_nr_item - 1,
215 ih_entry_count(ih) - bytes_or_entries,
216 bytes_or_entries);
217 return 1;
218 }
219
220 /* copy part of the body of the last item of SOURCE to the begin of the body of the first item of the DEST;
221 part defined by 'bytes_or_entries'; if byte_or_entriess == -1 copy whole body; change first item key of the DEST;
222 don't create new item header
223 */
224
225 RFALSE(is_indirect_le_ih(ih) && get_ih_free_space(ih),
226 "vs-10040: merge to right: last unformatted node of non-last indirect item must be filled entirely (%h)",
227 ih);
228
229 if (bytes_or_entries == -1) {
230 /* bytes_or_entries = length of last item body of SOURCE */
231 bytes_or_entries = ih_item_len(ih);
232
233 RFALSE(le_ih_k_offset(dih) !=
234 le_ih_k_offset(ih) + op_bytes_number(ih, src->b_size),
235 "vs-10050: items %h and %h do not match", ih, dih);
236
237 /* change first item key of the DEST */
238 set_le_ih_k_offset(dih, le_ih_k_offset(ih));
239
240 /* item becomes non-mergeable */
241 /* or mergeable if left item was */
242 set_le_ih_k_type(dih, le_ih_k_type(ih));
243 } else {
244 /* merge to right only part of item */
245 RFALSE(ih_item_len(ih) <= bytes_or_entries,
246 "vs-10060: no so much bytes %lu (needed %lu)",
247 (unsigned long)ih_item_len(ih),
248 (unsigned long)bytes_or_entries);
249
250 /* change first item key of the DEST */
251 if (is_direct_le_ih(dih)) {
252 RFALSE(le_ih_k_offset(dih) <=
253 (unsigned long)bytes_or_entries,
254 "vs-10070: dih %h, bytes_or_entries(%d)", dih,
255 bytes_or_entries);
256 set_le_ih_k_offset(dih,
257 le_ih_k_offset(dih) -
258 bytes_or_entries);
259 } else {
260 RFALSE(le_ih_k_offset(dih) <=
261 (bytes_or_entries / UNFM_P_SIZE) * dest->b_size,
262 "vs-10080: dih %h, bytes_or_entries(%d)",
263 dih,
264 (bytes_or_entries / UNFM_P_SIZE) * dest->b_size);
265 set_le_ih_k_offset(dih,
266 le_ih_k_offset(dih) -
267 ((bytes_or_entries / UNFM_P_SIZE) *
268 dest->b_size));
269 }
270 }
271
272 leaf_paste_in_buffer(dest_bi, 0, 0, bytes_or_entries,
273 B_I_PITEM(src,
274 ih) + ih_item_len(ih) - bytes_or_entries,
275 0);
276 return 1;
277}
240 278
241/* copy cpy_mun items from buffer src to buffer dest 279/* copy cpy_mun items from buffer src to buffer dest
242 * last_first == FIRST_TO_LAST means, that we copy cpy_num items beginning from first-th item in src to tail of dest 280 * last_first == FIRST_TO_LAST means, that we copy cpy_num items beginning from first-th item in src to tail of dest
243 * last_first == LAST_TO_FIRST means, that we copy cpy_num items beginning from first-th item in src to head of dest 281 * last_first == LAST_TO_FIRST means, that we copy cpy_num items beginning from first-th item in src to head of dest
244 */ 282 */
245static void leaf_copy_items_entirely (struct buffer_info * dest_bi, struct buffer_head * src, int last_first, 283static void leaf_copy_items_entirely(struct buffer_info *dest_bi,
246 int first, int cpy_num) 284 struct buffer_head *src, int last_first,
285 int first, int cpy_num)
247{ 286{
248 struct buffer_head * dest; 287 struct buffer_head *dest;
249 int nr, free_space; 288 int nr, free_space;
250 int dest_before; 289 int dest_before;
251 int last_loc, last_inserted_loc, location; 290 int last_loc, last_inserted_loc, location;
252 int i, j; 291 int i, j;
253 struct block_head * blkh; 292 struct block_head *blkh;
254 struct item_head * ih; 293 struct item_head *ih;
255 294
256 RFALSE( last_first != LAST_TO_FIRST && last_first != FIRST_TO_LAST, 295 RFALSE(last_first != LAST_TO_FIRST && last_first != FIRST_TO_LAST,
257 "vs-10090: bad last_first parameter %d", last_first); 296 "vs-10090: bad last_first parameter %d", last_first);
258 RFALSE( B_NR_ITEMS (src) - first < cpy_num, 297 RFALSE(B_NR_ITEMS(src) - first < cpy_num,
259 "vs-10100: too few items in source %d, required %d from %d", 298 "vs-10100: too few items in source %d, required %d from %d",
260 B_NR_ITEMS(src), cpy_num, first); 299 B_NR_ITEMS(src), cpy_num, first);
261 RFALSE( cpy_num < 0, "vs-10110: can not copy negative amount of items"); 300 RFALSE(cpy_num < 0, "vs-10110: can not copy negative amount of items");
262 RFALSE( ! dest_bi, "vs-10120: can not copy negative amount of items"); 301 RFALSE(!dest_bi, "vs-10120: can not copy negative amount of items");
263 302
264 dest = dest_bi->bi_bh; 303 dest = dest_bi->bi_bh;
265 304
266 RFALSE( ! dest, "vs-10130: can not copy negative amount of items"); 305 RFALSE(!dest, "vs-10130: can not copy negative amount of items");
267 306
268 if (cpy_num == 0) 307 if (cpy_num == 0)
269 return; 308 return;
270 309
271 blkh = B_BLK_HEAD(dest); 310 blkh = B_BLK_HEAD(dest);
272 nr = blkh_nr_item( blkh ); 311 nr = blkh_nr_item(blkh);
273 free_space = blkh_free_space(blkh); 312 free_space = blkh_free_space(blkh);
274 313
275 /* we will insert items before 0-th or nr-th item in dest buffer. It depends of last_first parameter */ 314 /* we will insert items before 0-th or nr-th item in dest buffer. It depends of last_first parameter */
276 dest_before = (last_first == LAST_TO_FIRST) ? 0 : nr; 315 dest_before = (last_first == LAST_TO_FIRST) ? 0 : nr;
277 316
278 /* location of head of first new item */ 317 /* location of head of first new item */
279 ih = B_N_PITEM_HEAD (dest, dest_before); 318 ih = B_N_PITEM_HEAD(dest, dest_before);
280 319
281 RFALSE( blkh_free_space(blkh) < cpy_num * IH_SIZE, 320 RFALSE(blkh_free_space(blkh) < cpy_num * IH_SIZE,
282 "vs-10140: not enough free space for headers %d (needed %d)", 321 "vs-10140: not enough free space for headers %d (needed %d)",
283 B_FREE_SPACE (dest), cpy_num * IH_SIZE); 322 B_FREE_SPACE(dest), cpy_num * IH_SIZE);
284 323
285 /* prepare space for headers */ 324 /* prepare space for headers */
286 memmove (ih + cpy_num, ih, (nr-dest_before) * IH_SIZE); 325 memmove(ih + cpy_num, ih, (nr - dest_before) * IH_SIZE);
287
288 /* copy item headers */
289 memcpy (ih, B_N_PITEM_HEAD (src, first), cpy_num * IH_SIZE);
290
291 free_space -= (IH_SIZE * cpy_num);
292 set_blkh_free_space( blkh, free_space );
293
294 /* location of unmovable item */
295 j = location = (dest_before == 0) ? dest->b_size : ih_location(ih-1);
296 for (i = dest_before; i < nr + cpy_num; i ++) {
297 location -= ih_item_len( ih + i - dest_before );
298 put_ih_location( ih + i - dest_before, location );
299 }
300
301 /* prepare space for items */
302 last_loc = ih_location( &(ih[nr+cpy_num-1-dest_before]) );
303 last_inserted_loc = ih_location( &(ih[cpy_num-1]) );
304
305 /* check free space */
306 RFALSE( free_space < j - last_inserted_loc,
307 "vs-10150: not enough free space for items %d (needed %d)",
308 free_space, j - last_inserted_loc);
309
310 memmove (dest->b_data + last_loc,
311 dest->b_data + last_loc + j - last_inserted_loc,
312 last_inserted_loc - last_loc);
313
314 /* copy items */
315 memcpy (dest->b_data + last_inserted_loc, B_N_PITEM(src,(first + cpy_num - 1)),
316 j - last_inserted_loc);
317
318 /* sizes, item number */
319 set_blkh_nr_item( blkh, nr + cpy_num );
320 set_blkh_free_space( blkh, free_space - (j - last_inserted_loc) );
321
322 do_balance_mark_leaf_dirty (dest_bi->tb, dest, 0);
323
324 if (dest_bi->bi_parent) {
325 struct disk_child *t_dc;
326 t_dc = B_N_CHILD (dest_bi->bi_parent, dest_bi->bi_position);
327 RFALSE( dc_block_number(t_dc) != dest->b_blocknr,
328 "vs-10160: block number in bh does not match to field in disk_child structure %lu and %lu",
329 ( long unsigned ) dest->b_blocknr,
330 ( long unsigned ) dc_block_number(t_dc));
331 put_dc_size( t_dc, dc_size(t_dc) + (j - last_inserted_loc + IH_SIZE * cpy_num ) );
332
333 do_balance_mark_internal_dirty (dest_bi->tb, dest_bi->bi_parent, 0);
334 }
335}
336 326
327 /* copy item headers */
328 memcpy(ih, B_N_PITEM_HEAD(src, first), cpy_num * IH_SIZE);
329
330 free_space -= (IH_SIZE * cpy_num);
331 set_blkh_free_space(blkh, free_space);
332
333 /* location of unmovable item */
334 j = location = (dest_before == 0) ? dest->b_size : ih_location(ih - 1);
335 for (i = dest_before; i < nr + cpy_num; i++) {
336 location -= ih_item_len(ih + i - dest_before);
337 put_ih_location(ih + i - dest_before, location);
338 }
339
340 /* prepare space for items */
341 last_loc = ih_location(&(ih[nr + cpy_num - 1 - dest_before]));
342 last_inserted_loc = ih_location(&(ih[cpy_num - 1]));
343
344 /* check free space */
345 RFALSE(free_space < j - last_inserted_loc,
346 "vs-10150: not enough free space for items %d (needed %d)",
347 free_space, j - last_inserted_loc);
348
349 memmove(dest->b_data + last_loc,
350 dest->b_data + last_loc + j - last_inserted_loc,
351 last_inserted_loc - last_loc);
352
353 /* copy items */
354 memcpy(dest->b_data + last_inserted_loc,
355 B_N_PITEM(src, (first + cpy_num - 1)), j - last_inserted_loc);
356
357 /* sizes, item number */
358 set_blkh_nr_item(blkh, nr + cpy_num);
359 set_blkh_free_space(blkh, free_space - (j - last_inserted_loc));
360
361 do_balance_mark_leaf_dirty(dest_bi->tb, dest, 0);
362
363 if (dest_bi->bi_parent) {
364 struct disk_child *t_dc;
365 t_dc = B_N_CHILD(dest_bi->bi_parent, dest_bi->bi_position);
366 RFALSE(dc_block_number(t_dc) != dest->b_blocknr,
367 "vs-10160: block number in bh does not match to field in disk_child structure %lu and %lu",
368 (long unsigned)dest->b_blocknr,
369 (long unsigned)dc_block_number(t_dc));
370 put_dc_size(t_dc,
371 dc_size(t_dc) + (j - last_inserted_loc +
372 IH_SIZE * cpy_num));
373
374 do_balance_mark_internal_dirty(dest_bi->tb, dest_bi->bi_parent,
375 0);
376 }
377}
337 378
338/* This function splits the (liquid) item into two items (useful when 379/* This function splits the (liquid) item into two items (useful when
339 shifting part of an item into another node.) */ 380 shifting part of an item into another node.) */
340static void leaf_item_bottle (struct buffer_info * dest_bi, struct buffer_head * src, int last_first, 381static void leaf_item_bottle(struct buffer_info *dest_bi,
341 int item_num, int cpy_bytes) 382 struct buffer_head *src, int last_first,
383 int item_num, int cpy_bytes)
342{ 384{
343 struct buffer_head * dest = dest_bi->bi_bh; 385 struct buffer_head *dest = dest_bi->bi_bh;
344 struct item_head * ih; 386 struct item_head *ih;
345 387
346 RFALSE( cpy_bytes == -1, "vs-10170: bytes == - 1 means: do not split item"); 388 RFALSE(cpy_bytes == -1,
347 389 "vs-10170: bytes == - 1 means: do not split item");
348 if ( last_first == FIRST_TO_LAST ) { 390
349 /* if ( if item in position item_num in buffer SOURCE is directory item ) */ 391 if (last_first == FIRST_TO_LAST) {
350 if (is_direntry_le_ih (ih = B_N_PITEM_HEAD(src,item_num))) 392 /* if ( if item in position item_num in buffer SOURCE is directory item ) */
351 leaf_copy_dir_entries (dest_bi, src, FIRST_TO_LAST, item_num, 0, cpy_bytes); 393 if (is_direntry_le_ih(ih = B_N_PITEM_HEAD(src, item_num)))
352 else { 394 leaf_copy_dir_entries(dest_bi, src, FIRST_TO_LAST,
353 struct item_head n_ih; 395 item_num, 0, cpy_bytes);
354 396 else {
355 /* copy part of the body of the item number 'item_num' of SOURCE to the end of the DEST 397 struct item_head n_ih;
356 part defined by 'cpy_bytes'; create new item header; change old item_header (????); 398
357 n_ih = new item_header; 399 /* copy part of the body of the item number 'item_num' of SOURCE to the end of the DEST
358 */ 400 part defined by 'cpy_bytes'; create new item header; change old item_header (????);
359 memcpy (&n_ih, ih, IH_SIZE); 401 n_ih = new item_header;
360 put_ih_item_len( &n_ih, cpy_bytes ); 402 */
361 if (is_indirect_le_ih (ih)) { 403 memcpy(&n_ih, ih, IH_SIZE);
362 RFALSE( cpy_bytes == ih_item_len(ih) && get_ih_free_space(ih), 404 put_ih_item_len(&n_ih, cpy_bytes);
363 "vs-10180: when whole indirect item is bottle to left neighbor, it must have free_space==0 (not %lu)", 405 if (is_indirect_le_ih(ih)) {
364 ( long unsigned ) get_ih_free_space (ih)); 406 RFALSE(cpy_bytes == ih_item_len(ih)
365 set_ih_free_space (&n_ih, 0); 407 && get_ih_free_space(ih),
366 } 408 "vs-10180: when whole indirect item is bottle to left neighbor, it must have free_space==0 (not %lu)",
367 409 (long unsigned)get_ih_free_space(ih));
368 RFALSE( op_is_left_mergeable (&(ih->ih_key), src->b_size), 410 set_ih_free_space(&n_ih, 0);
369 "vs-10190: bad mergeability of item %h", ih); 411 }
370 n_ih.ih_version = ih->ih_version; /* JDM Endian safe, both le */ 412
371 leaf_insert_into_buf (dest_bi, B_NR_ITEMS(dest), &n_ih, B_N_PITEM (src, item_num), 0); 413 RFALSE(op_is_left_mergeable(&(ih->ih_key), src->b_size),
414 "vs-10190: bad mergeability of item %h", ih);
415 n_ih.ih_version = ih->ih_version; /* JDM Endian safe, both le */
416 leaf_insert_into_buf(dest_bi, B_NR_ITEMS(dest), &n_ih,
417 B_N_PITEM(src, item_num), 0);
418 }
419 } else {
420 /* if ( if item in position item_num in buffer SOURCE is directory item ) */
421 if (is_direntry_le_ih(ih = B_N_PITEM_HEAD(src, item_num)))
422 leaf_copy_dir_entries(dest_bi, src, LAST_TO_FIRST,
423 item_num,
424 I_ENTRY_COUNT(ih) - cpy_bytes,
425 cpy_bytes);
426 else {
427 struct item_head n_ih;
428
429 /* copy part of the body of the item number 'item_num' of SOURCE to the begin of the DEST
430 part defined by 'cpy_bytes'; create new item header;
431 n_ih = new item_header;
432 */
433 memcpy(&n_ih, ih, SHORT_KEY_SIZE);
434
435 n_ih.ih_version = ih->ih_version; /* JDM Endian safe, both le */
436
437 if (is_direct_le_ih(ih)) {
438 set_le_ih_k_offset(&n_ih,
439 le_ih_k_offset(ih) +
440 ih_item_len(ih) - cpy_bytes);
441 set_le_ih_k_type(&n_ih, TYPE_DIRECT);
442 set_ih_free_space(&n_ih, MAX_US_INT);
443 } else {
444 /* indirect item */
445 RFALSE(!cpy_bytes && get_ih_free_space(ih),
446 "vs-10200: ih->ih_free_space must be 0 when indirect item will be appended");
447 set_le_ih_k_offset(&n_ih,
448 le_ih_k_offset(ih) +
449 (ih_item_len(ih) -
450 cpy_bytes) / UNFM_P_SIZE *
451 dest->b_size);
452 set_le_ih_k_type(&n_ih, TYPE_INDIRECT);
453 set_ih_free_space(&n_ih, get_ih_free_space(ih));
454 }
455
456 /* set item length */
457 put_ih_item_len(&n_ih, cpy_bytes);
458
459 n_ih.ih_version = ih->ih_version; /* JDM Endian safe, both le */
460
461 leaf_insert_into_buf(dest_bi, 0, &n_ih,
462 B_N_PITEM(src,
463 item_num) +
464 ih_item_len(ih) - cpy_bytes, 0);
465 }
372 } 466 }
373 } else {
374 /* if ( if item in position item_num in buffer SOURCE is directory item ) */
375 if (is_direntry_le_ih(ih = B_N_PITEM_HEAD (src, item_num)))
376 leaf_copy_dir_entries (dest_bi, src, LAST_TO_FIRST, item_num, I_ENTRY_COUNT(ih) - cpy_bytes, cpy_bytes);
377 else {
378 struct item_head n_ih;
379
380 /* copy part of the body of the item number 'item_num' of SOURCE to the begin of the DEST
381 part defined by 'cpy_bytes'; create new item header;
382 n_ih = new item_header;
383 */
384 memcpy (&n_ih, ih, SHORT_KEY_SIZE);
385
386 n_ih.ih_version = ih->ih_version; /* JDM Endian safe, both le */
387
388 if (is_direct_le_ih (ih)) {
389 set_le_ih_k_offset (&n_ih, le_ih_k_offset (ih) + ih_item_len(ih) - cpy_bytes);
390 set_le_ih_k_type (&n_ih, TYPE_DIRECT);
391 set_ih_free_space (&n_ih, MAX_US_INT);
392 } else {
393 /* indirect item */
394 RFALSE( !cpy_bytes && get_ih_free_space (ih),
395 "vs-10200: ih->ih_free_space must be 0 when indirect item will be appended");
396 set_le_ih_k_offset (&n_ih, le_ih_k_offset (ih) + (ih_item_len(ih) - cpy_bytes) / UNFM_P_SIZE * dest->b_size);
397 set_le_ih_k_type (&n_ih, TYPE_INDIRECT);
398 set_ih_free_space (&n_ih, get_ih_free_space (ih));
399 }
400
401 /* set item length */
402 put_ih_item_len( &n_ih, cpy_bytes );
403
404 n_ih.ih_version = ih->ih_version; /* JDM Endian safe, both le */
405
406 leaf_insert_into_buf (dest_bi, 0, &n_ih, B_N_PITEM(src,item_num) + ih_item_len(ih) - cpy_bytes, 0);
407 }
408 }
409} 467}
410 468
411
412/* If cpy_bytes equals minus one than copy cpy_num whole items from SOURCE to DEST. 469/* If cpy_bytes equals minus one than copy cpy_num whole items from SOURCE to DEST.
413 If cpy_bytes not equal to minus one than copy cpy_num-1 whole items from SOURCE to DEST. 470 If cpy_bytes not equal to minus one than copy cpy_num-1 whole items from SOURCE to DEST.
414 From last item copy cpy_num bytes for regular item and cpy_num directory entries for 471 From last item copy cpy_num bytes for regular item and cpy_num directory entries for
415 directory item. */ 472 directory item. */
416static int leaf_copy_items (struct buffer_info * dest_bi, struct buffer_head * src, int last_first, int cpy_num, 473static int leaf_copy_items(struct buffer_info *dest_bi, struct buffer_head *src,
417 int cpy_bytes) 474 int last_first, int cpy_num, int cpy_bytes)
418{ 475{
419 struct buffer_head * dest; 476 struct buffer_head *dest;
420 int pos, i, src_nr_item, bytes; 477 int pos, i, src_nr_item, bytes;
421 478
422 dest = dest_bi->bi_bh; 479 dest = dest_bi->bi_bh;
423 RFALSE( !dest || !src, "vs-10210: !dest || !src"); 480 RFALSE(!dest || !src, "vs-10210: !dest || !src");
424 RFALSE( last_first != FIRST_TO_LAST && last_first != LAST_TO_FIRST, 481 RFALSE(last_first != FIRST_TO_LAST && last_first != LAST_TO_FIRST,
425 "vs-10220:last_first != FIRST_TO_LAST && last_first != LAST_TO_FIRST"); 482 "vs-10220:last_first != FIRST_TO_LAST && last_first != LAST_TO_FIRST");
426 RFALSE( B_NR_ITEMS(src) < cpy_num, 483 RFALSE(B_NR_ITEMS(src) < cpy_num,
427 "vs-10230: No enough items: %d, req. %d", B_NR_ITEMS(src), cpy_num); 484 "vs-10230: No enough items: %d, req. %d", B_NR_ITEMS(src),
428 RFALSE( cpy_num < 0,"vs-10240: cpy_num < 0 (%d)", cpy_num); 485 cpy_num);
429 486 RFALSE(cpy_num < 0, "vs-10240: cpy_num < 0 (%d)", cpy_num);
430 if ( cpy_num == 0 ) 487
431 return 0; 488 if (cpy_num == 0)
432 489 return 0;
433 if ( last_first == FIRST_TO_LAST ) { 490
434 /* copy items to left */ 491 if (last_first == FIRST_TO_LAST) {
435 pos = 0; 492 /* copy items to left */
436 if ( cpy_num == 1 ) 493 pos = 0;
437 bytes = cpy_bytes; 494 if (cpy_num == 1)
438 else 495 bytes = cpy_bytes;
439 bytes = -1; 496 else
440 497 bytes = -1;
441 /* copy the first item or it part or nothing to the end of the DEST (i = leaf_copy_boundary_item(DEST,SOURCE,0,bytes)) */ 498
442 i = leaf_copy_boundary_item (dest_bi, src, FIRST_TO_LAST, bytes); 499 /* copy the first item or it part or nothing to the end of the DEST (i = leaf_copy_boundary_item(DEST,SOURCE,0,bytes)) */
443 cpy_num -= i; 500 i = leaf_copy_boundary_item(dest_bi, src, FIRST_TO_LAST, bytes);
444 if ( cpy_num == 0 ) 501 cpy_num -= i;
445 return i; 502 if (cpy_num == 0)
446 pos += i; 503 return i;
447 if ( cpy_bytes == -1 ) 504 pos += i;
448 /* copy first cpy_num items starting from position 'pos' of SOURCE to end of DEST */ 505 if (cpy_bytes == -1)
449 leaf_copy_items_entirely (dest_bi, src, FIRST_TO_LAST, pos, cpy_num); 506 /* copy first cpy_num items starting from position 'pos' of SOURCE to end of DEST */
450 else { 507 leaf_copy_items_entirely(dest_bi, src, FIRST_TO_LAST,
451 /* copy first cpy_num-1 items starting from position 'pos-1' of the SOURCE to the end of the DEST */ 508 pos, cpy_num);
452 leaf_copy_items_entirely (dest_bi, src, FIRST_TO_LAST, pos, cpy_num-1); 509 else {
453 510 /* copy first cpy_num-1 items starting from position 'pos-1' of the SOURCE to the end of the DEST */
454 /* copy part of the item which number is cpy_num+pos-1 to the end of the DEST */ 511 leaf_copy_items_entirely(dest_bi, src, FIRST_TO_LAST,
455 leaf_item_bottle (dest_bi, src, FIRST_TO_LAST, cpy_num+pos-1, cpy_bytes); 512 pos, cpy_num - 1);
456 } 513
457 } else { 514 /* copy part of the item which number is cpy_num+pos-1 to the end of the DEST */
458 /* copy items to right */ 515 leaf_item_bottle(dest_bi, src, FIRST_TO_LAST,
459 src_nr_item = B_NR_ITEMS (src); 516 cpy_num + pos - 1, cpy_bytes);
460 if ( cpy_num == 1 ) 517 }
461 bytes = cpy_bytes; 518 } else {
462 else 519 /* copy items to right */
463 bytes = -1; 520 src_nr_item = B_NR_ITEMS(src);
464 521 if (cpy_num == 1)
465 /* copy the last item or it part or nothing to the begin of the DEST (i = leaf_copy_boundary_item(DEST,SOURCE,1,bytes)); */ 522 bytes = cpy_bytes;
466 i = leaf_copy_boundary_item (dest_bi, src, LAST_TO_FIRST, bytes); 523 else
467 524 bytes = -1;
468 cpy_num -= i; 525
469 if ( cpy_num == 0 ) 526 /* copy the last item or it part or nothing to the begin of the DEST (i = leaf_copy_boundary_item(DEST,SOURCE,1,bytes)); */
470 return i; 527 i = leaf_copy_boundary_item(dest_bi, src, LAST_TO_FIRST, bytes);
471 528
472 pos = src_nr_item - cpy_num - i; 529 cpy_num -= i;
473 if ( cpy_bytes == -1 ) { 530 if (cpy_num == 0)
474 /* starting from position 'pos' copy last cpy_num items of SOURCE to begin of DEST */ 531 return i;
475 leaf_copy_items_entirely (dest_bi, src, LAST_TO_FIRST, pos, cpy_num); 532
476 } else { 533 pos = src_nr_item - cpy_num - i;
477 /* copy last cpy_num-1 items starting from position 'pos+1' of the SOURCE to the begin of the DEST; */ 534 if (cpy_bytes == -1) {
478 leaf_copy_items_entirely (dest_bi, src, LAST_TO_FIRST, pos+1, cpy_num-1); 535 /* starting from position 'pos' copy last cpy_num items of SOURCE to begin of DEST */
479 536 leaf_copy_items_entirely(dest_bi, src, LAST_TO_FIRST,
480 /* copy part of the item which number is pos to the begin of the DEST */ 537 pos, cpy_num);
481 leaf_item_bottle (dest_bi, src, LAST_TO_FIRST, pos, cpy_bytes); 538 } else {
482 } 539 /* copy last cpy_num-1 items starting from position 'pos+1' of the SOURCE to the begin of the DEST; */
483 } 540 leaf_copy_items_entirely(dest_bi, src, LAST_TO_FIRST,
484 return i; 541 pos + 1, cpy_num - 1);
542
543 /* copy part of the item which number is pos to the begin of the DEST */
544 leaf_item_bottle(dest_bi, src, LAST_TO_FIRST, pos,
545 cpy_bytes);
546 }
547 }
548 return i;
485} 549}
486 550
487
488/* there are types of coping: from S[0] to L[0], from S[0] to R[0], 551/* there are types of coping: from S[0] to L[0], from S[0] to R[0],
489 from R[0] to L[0]. for each of these we have to define parent and 552 from R[0] to L[0]. for each of these we have to define parent and
490 positions of destination and source buffers */ 553 positions of destination and source buffers */
491static void leaf_define_dest_src_infos (int shift_mode, struct tree_balance * tb, struct buffer_info * dest_bi, 554static void leaf_define_dest_src_infos(int shift_mode, struct tree_balance *tb,
492 struct buffer_info * src_bi, int * first_last, 555 struct buffer_info *dest_bi,
493 struct buffer_head * Snew) 556 struct buffer_info *src_bi,
557 int *first_last,
558 struct buffer_head *Snew)
494{ 559{
495 memset (dest_bi, 0, sizeof (struct buffer_info)); 560 memset(dest_bi, 0, sizeof(struct buffer_info));
496 memset (src_bi, 0, sizeof (struct buffer_info)); 561 memset(src_bi, 0, sizeof(struct buffer_info));
497 562
498 /* define dest, src, dest parent, dest position */ 563 /* define dest, src, dest parent, dest position */
499 switch (shift_mode) { 564 switch (shift_mode) {
500 case LEAF_FROM_S_TO_L: /* it is used in leaf_shift_left */ 565 case LEAF_FROM_S_TO_L: /* it is used in leaf_shift_left */
501 src_bi->tb = tb; 566 src_bi->tb = tb;
502 src_bi->bi_bh = PATH_PLAST_BUFFER (tb->tb_path); 567 src_bi->bi_bh = PATH_PLAST_BUFFER(tb->tb_path);
503 src_bi->bi_parent = PATH_H_PPARENT (tb->tb_path, 0); 568 src_bi->bi_parent = PATH_H_PPARENT(tb->tb_path, 0);
504 src_bi->bi_position = PATH_H_B_ITEM_ORDER (tb->tb_path, 0); /* src->b_item_order */ 569 src_bi->bi_position = PATH_H_B_ITEM_ORDER(tb->tb_path, 0); /* src->b_item_order */
505 dest_bi->tb = tb; 570 dest_bi->tb = tb;
506 dest_bi->bi_bh = tb->L[0]; 571 dest_bi->bi_bh = tb->L[0];
507 dest_bi->bi_parent = tb->FL[0]; 572 dest_bi->bi_parent = tb->FL[0];
508 dest_bi->bi_position = get_left_neighbor_position (tb, 0); 573 dest_bi->bi_position = get_left_neighbor_position(tb, 0);
509 *first_last = FIRST_TO_LAST; 574 *first_last = FIRST_TO_LAST;
510 break; 575 break;
511 576
512 case LEAF_FROM_S_TO_R: /* it is used in leaf_shift_right */ 577 case LEAF_FROM_S_TO_R: /* it is used in leaf_shift_right */
513 src_bi->tb = tb; 578 src_bi->tb = tb;
514 src_bi->bi_bh = PATH_PLAST_BUFFER (tb->tb_path); 579 src_bi->bi_bh = PATH_PLAST_BUFFER(tb->tb_path);
515 src_bi->bi_parent = PATH_H_PPARENT (tb->tb_path, 0); 580 src_bi->bi_parent = PATH_H_PPARENT(tb->tb_path, 0);
516 src_bi->bi_position = PATH_H_B_ITEM_ORDER (tb->tb_path, 0); 581 src_bi->bi_position = PATH_H_B_ITEM_ORDER(tb->tb_path, 0);
517 dest_bi->tb = tb; 582 dest_bi->tb = tb;
518 dest_bi->bi_bh = tb->R[0]; 583 dest_bi->bi_bh = tb->R[0];
519 dest_bi->bi_parent = tb->FR[0]; 584 dest_bi->bi_parent = tb->FR[0];
520 dest_bi->bi_position = get_right_neighbor_position (tb, 0); 585 dest_bi->bi_position = get_right_neighbor_position(tb, 0);
521 *first_last = LAST_TO_FIRST; 586 *first_last = LAST_TO_FIRST;
522 break; 587 break;
523 588
524 case LEAF_FROM_R_TO_L: /* it is used in balance_leaf_when_delete */ 589 case LEAF_FROM_R_TO_L: /* it is used in balance_leaf_when_delete */
525 src_bi->tb = tb; 590 src_bi->tb = tb;
526 src_bi->bi_bh = tb->R[0]; 591 src_bi->bi_bh = tb->R[0];
527 src_bi->bi_parent = tb->FR[0]; 592 src_bi->bi_parent = tb->FR[0];
528 src_bi->bi_position = get_right_neighbor_position (tb, 0); 593 src_bi->bi_position = get_right_neighbor_position(tb, 0);
529 dest_bi->tb = tb; 594 dest_bi->tb = tb;
530 dest_bi->bi_bh = tb->L[0]; 595 dest_bi->bi_bh = tb->L[0];
531 dest_bi->bi_parent = tb->FL[0]; 596 dest_bi->bi_parent = tb->FL[0];
532 dest_bi->bi_position = get_left_neighbor_position (tb, 0); 597 dest_bi->bi_position = get_left_neighbor_position(tb, 0);
533 *first_last = FIRST_TO_LAST; 598 *first_last = FIRST_TO_LAST;
534 break; 599 break;
535 600
536 case LEAF_FROM_L_TO_R: /* it is used in balance_leaf_when_delete */ 601 case LEAF_FROM_L_TO_R: /* it is used in balance_leaf_when_delete */
537 src_bi->tb = tb; 602 src_bi->tb = tb;
538 src_bi->bi_bh = tb->L[0]; 603 src_bi->bi_bh = tb->L[0];
539 src_bi->bi_parent = tb->FL[0]; 604 src_bi->bi_parent = tb->FL[0];
540 src_bi->bi_position = get_left_neighbor_position (tb, 0); 605 src_bi->bi_position = get_left_neighbor_position(tb, 0);
541 dest_bi->tb = tb; 606 dest_bi->tb = tb;
542 dest_bi->bi_bh = tb->R[0]; 607 dest_bi->bi_bh = tb->R[0];
543 dest_bi->bi_parent = tb->FR[0]; 608 dest_bi->bi_parent = tb->FR[0];
544 dest_bi->bi_position = get_right_neighbor_position (tb, 0); 609 dest_bi->bi_position = get_right_neighbor_position(tb, 0);
545 *first_last = LAST_TO_FIRST; 610 *first_last = LAST_TO_FIRST;
546 break; 611 break;
547 612
548 case LEAF_FROM_S_TO_SNEW: 613 case LEAF_FROM_S_TO_SNEW:
549 src_bi->tb = tb; 614 src_bi->tb = tb;
550 src_bi->bi_bh = PATH_PLAST_BUFFER (tb->tb_path); 615 src_bi->bi_bh = PATH_PLAST_BUFFER(tb->tb_path);
551 src_bi->bi_parent = PATH_H_PPARENT (tb->tb_path, 0); 616 src_bi->bi_parent = PATH_H_PPARENT(tb->tb_path, 0);
552 src_bi->bi_position = PATH_H_B_ITEM_ORDER (tb->tb_path, 0); 617 src_bi->bi_position = PATH_H_B_ITEM_ORDER(tb->tb_path, 0);
553 dest_bi->tb = tb; 618 dest_bi->tb = tb;
554 dest_bi->bi_bh = Snew; 619 dest_bi->bi_bh = Snew;
555 dest_bi->bi_parent = NULL; 620 dest_bi->bi_parent = NULL;
556 dest_bi->bi_position = 0; 621 dest_bi->bi_position = 0;
557 *first_last = LAST_TO_FIRST; 622 *first_last = LAST_TO_FIRST;
558 break; 623 break;
559 624
560 default: 625 default:
561 reiserfs_panic (NULL, "vs-10250: leaf_define_dest_src_infos: shift type is unknown (%d)", shift_mode); 626 reiserfs_panic(NULL,
562 } 627 "vs-10250: leaf_define_dest_src_infos: shift type is unknown (%d)",
563 RFALSE( src_bi->bi_bh == 0 || dest_bi->bi_bh == 0, 628 shift_mode);
564 "vs-10260: mode==%d, source (%p) or dest (%p) buffer is initialized incorrectly", 629 }
565 shift_mode, src_bi->bi_bh, dest_bi->bi_bh); 630 RFALSE(src_bi->bi_bh == 0 || dest_bi->bi_bh == 0,
631 "vs-10260: mode==%d, source (%p) or dest (%p) buffer is initialized incorrectly",
632 shift_mode, src_bi->bi_bh, dest_bi->bi_bh);
566} 633}
567 634
568
569
570
571/* copy mov_num items and mov_bytes of the (mov_num-1)th item to 635/* copy mov_num items and mov_bytes of the (mov_num-1)th item to
572 neighbor. Delete them from source */ 636 neighbor. Delete them from source */
573int leaf_move_items (int shift_mode, struct tree_balance * tb, int mov_num, int mov_bytes, struct buffer_head * Snew) 637int leaf_move_items(int shift_mode, struct tree_balance *tb, int mov_num,
638 int mov_bytes, struct buffer_head *Snew)
574{ 639{
575 int ret_value; 640 int ret_value;
576 struct buffer_info dest_bi, src_bi; 641 struct buffer_info dest_bi, src_bi;
577 int first_last; 642 int first_last;
578 643
579 leaf_define_dest_src_infos (shift_mode, tb, &dest_bi, &src_bi, &first_last, Snew); 644 leaf_define_dest_src_infos(shift_mode, tb, &dest_bi, &src_bi,
645 &first_last, Snew);
580 646
581 ret_value = leaf_copy_items (&dest_bi, src_bi.bi_bh, first_last, mov_num, mov_bytes); 647 ret_value =
648 leaf_copy_items(&dest_bi, src_bi.bi_bh, first_last, mov_num,
649 mov_bytes);
582 650
583 leaf_delete_items (&src_bi, first_last, (first_last == FIRST_TO_LAST) ? 0 : (B_NR_ITEMS(src_bi.bi_bh) - mov_num), mov_num, mov_bytes); 651 leaf_delete_items(&src_bi, first_last,
652 (first_last ==
653 FIRST_TO_LAST) ? 0 : (B_NR_ITEMS(src_bi.bi_bh) -
654 mov_num), mov_num, mov_bytes);
584 655
585 656 return ret_value;
586 return ret_value;
587} 657}
588 658
589
590/* Shift shift_num items (and shift_bytes of last shifted item if shift_bytes != -1) 659/* Shift shift_num items (and shift_bytes of last shifted item if shift_bytes != -1)
591 from S[0] to L[0] and replace the delimiting key */ 660 from S[0] to L[0] and replace the delimiting key */
592int leaf_shift_left (struct tree_balance * tb, int shift_num, int shift_bytes) 661int leaf_shift_left(struct tree_balance *tb, int shift_num, int shift_bytes)
593{ 662{
594 struct buffer_head * S0 = PATH_PLAST_BUFFER (tb->tb_path); 663 struct buffer_head *S0 = PATH_PLAST_BUFFER(tb->tb_path);
595 int i; 664 int i;
596 665
597 /* move shift_num (and shift_bytes bytes) items from S[0] to left neighbor L[0] */ 666 /* move shift_num (and shift_bytes bytes) items from S[0] to left neighbor L[0] */
598 i = leaf_move_items (LEAF_FROM_S_TO_L, tb, shift_num, shift_bytes, NULL); 667 i = leaf_move_items(LEAF_FROM_S_TO_L, tb, shift_num, shift_bytes, NULL);
599 668
600 if ( shift_num ) { 669 if (shift_num) {
601 if (B_NR_ITEMS (S0) == 0) { /* number of items in S[0] == 0 */ 670 if (B_NR_ITEMS(S0) == 0) { /* number of items in S[0] == 0 */
602 671
603 RFALSE( shift_bytes != -1, 672 RFALSE(shift_bytes != -1,
604 "vs-10270: S0 is empty now, but shift_bytes != -1 (%d)", 673 "vs-10270: S0 is empty now, but shift_bytes != -1 (%d)",
605 shift_bytes); 674 shift_bytes);
606#ifdef CONFIG_REISERFS_CHECK 675#ifdef CONFIG_REISERFS_CHECK
607 if (tb->tb_mode == M_PASTE || tb->tb_mode == M_INSERT) { 676 if (tb->tb_mode == M_PASTE || tb->tb_mode == M_INSERT) {
608 print_cur_tb ("vs-10275"); 677 print_cur_tb("vs-10275");
609 reiserfs_panic (tb->tb_sb, "vs-10275: leaf_shift_left: balance condition corrupted (%c)", tb->tb_mode); 678 reiserfs_panic(tb->tb_sb,
610 } 679 "vs-10275: leaf_shift_left: balance condition corrupted (%c)",
680 tb->tb_mode);
681 }
611#endif 682#endif
612 683
613 if (PATH_H_POSITION (tb->tb_path, 1) == 0) 684 if (PATH_H_POSITION(tb->tb_path, 1) == 0)
614 replace_key (tb, tb->CFL[0], tb->lkey[0], PATH_H_PPARENT (tb->tb_path, 0), 0); 685 replace_key(tb, tb->CFL[0], tb->lkey[0],
615 686 PATH_H_PPARENT(tb->tb_path, 0), 0);
616 } else { 687
617 /* replace lkey in CFL[0] by 0-th key from S[0]; */ 688 } else {
618 replace_key (tb, tb->CFL[0], tb->lkey[0], S0, 0); 689 /* replace lkey in CFL[0] by 0-th key from S[0]; */
619 690 replace_key(tb, tb->CFL[0], tb->lkey[0], S0, 0);
620 RFALSE( (shift_bytes != -1 && 691
621 !(is_direntry_le_ih (B_N_PITEM_HEAD (S0, 0)) 692 RFALSE((shift_bytes != -1 &&
622 && !I_ENTRY_COUNT (B_N_PITEM_HEAD (S0, 0)))) && 693 !(is_direntry_le_ih(B_N_PITEM_HEAD(S0, 0))
623 (!op_is_left_mergeable (B_N_PKEY (S0, 0), S0->b_size)), 694 && !I_ENTRY_COUNT(B_N_PITEM_HEAD(S0, 0)))) &&
624 "vs-10280: item must be mergeable"); 695 (!op_is_left_mergeable
625 } 696 (B_N_PKEY(S0, 0), S0->b_size)),
626 } 697 "vs-10280: item must be mergeable");
627 698 }
628 return i; 699 }
629}
630
631
632
633 700
701 return i;
702}
634 703
635/* CLEANING STOPPED HERE */ 704/* CLEANING STOPPED HERE */
636 705
637
638
639
640/* Shift shift_num (shift_bytes) items from S[0] to the right neighbor, and replace the delimiting key */ 706/* Shift shift_num (shift_bytes) items from S[0] to the right neighbor, and replace the delimiting key */
641int leaf_shift_right( 707int leaf_shift_right(struct tree_balance *tb, int shift_num, int shift_bytes)
642 struct tree_balance * tb,
643 int shift_num,
644 int shift_bytes
645 )
646{ 708{
647 // struct buffer_head * S0 = PATH_PLAST_BUFFER (tb->tb_path); 709 // struct buffer_head * S0 = PATH_PLAST_BUFFER (tb->tb_path);
648 int ret_value; 710 int ret_value;
649 711
650 /* move shift_num (and shift_bytes) items from S[0] to right neighbor R[0] */ 712 /* move shift_num (and shift_bytes) items from S[0] to right neighbor R[0] */
651 ret_value = leaf_move_items (LEAF_FROM_S_TO_R, tb, shift_num, shift_bytes, NULL); 713 ret_value =
714 leaf_move_items(LEAF_FROM_S_TO_R, tb, shift_num, shift_bytes, NULL);
652 715
653 /* replace rkey in CFR[0] by the 0-th key from R[0] */ 716 /* replace rkey in CFR[0] by the 0-th key from R[0] */
654 if (shift_num) { 717 if (shift_num) {
655 replace_key (tb, tb->CFR[0], tb->rkey[0], tb->R[0], 0); 718 replace_key(tb, tb->CFR[0], tb->rkey[0], tb->R[0], 0);
656 719
657 } 720 }
658 721
659 return ret_value; 722 return ret_value;
660} 723}
661 724
662 725static void leaf_delete_items_entirely(struct buffer_info *bi,
663 726 int first, int del_num);
664static void leaf_delete_items_entirely (struct buffer_info * bi,
665 int first, int del_num);
666/* If del_bytes == -1, starting from position 'first' delete del_num items in whole in buffer CUR. 727/* If del_bytes == -1, starting from position 'first' delete del_num items in whole in buffer CUR.
667 If not. 728 If not.
668 If last_first == 0. Starting from position 'first' delete del_num-1 items in whole. Delete part of body of 729 If last_first == 0. Starting from position 'first' delete del_num-1 items in whole. Delete part of body of
@@ -670,287 +731,292 @@ static void leaf_delete_items_entirely (struct buffer_info * bi,
670 If last_first == 1. Starting from position 'first+1' delete del_num-1 items in whole. Delete part of body of 731 If last_first == 1. Starting from position 'first+1' delete del_num-1 items in whole. Delete part of body of
671 the last item . Part defined by del_bytes. Don't delete last item header. 732 the last item . Part defined by del_bytes. Don't delete last item header.
672*/ 733*/
673void leaf_delete_items (struct buffer_info * cur_bi, int last_first, 734void leaf_delete_items(struct buffer_info *cur_bi, int last_first,
674 int first, int del_num, int del_bytes) 735 int first, int del_num, int del_bytes)
675{ 736{
676 struct buffer_head * bh; 737 struct buffer_head *bh;
677 int item_amount = B_NR_ITEMS (bh = cur_bi->bi_bh); 738 int item_amount = B_NR_ITEMS(bh = cur_bi->bi_bh);
678 739
679 RFALSE( !bh, "10155: bh is not defined"); 740 RFALSE(!bh, "10155: bh is not defined");
680 RFALSE( del_num < 0, "10160: del_num can not be < 0. del_num==%d", del_num); 741 RFALSE(del_num < 0, "10160: del_num can not be < 0. del_num==%d",
681 RFALSE( first < 0 || first + del_num > item_amount, 742 del_num);
682 "10165: invalid number of first item to be deleted (%d) or " 743 RFALSE(first < 0
683 "no so much items (%d) to delete (only %d)", 744 || first + del_num > item_amount,
684 first, first + del_num, item_amount); 745 "10165: invalid number of first item to be deleted (%d) or "
685 746 "no so much items (%d) to delete (only %d)", first,
686 if ( del_num == 0 ) 747 first + del_num, item_amount);
687 return; 748
688 749 if (del_num == 0)
689 if ( first == 0 && del_num == item_amount && del_bytes == -1 ) { 750 return;
690 make_empty_node (cur_bi); 751
691 do_balance_mark_leaf_dirty (cur_bi->tb, bh, 0); 752 if (first == 0 && del_num == item_amount && del_bytes == -1) {
692 return; 753 make_empty_node(cur_bi);
693 } 754 do_balance_mark_leaf_dirty(cur_bi->tb, bh, 0);
694 755 return;
695 if ( del_bytes == -1 )
696 /* delete del_num items beginning from item in position first */
697 leaf_delete_items_entirely (cur_bi, first, del_num);
698 else {
699 if ( last_first == FIRST_TO_LAST ) {
700 /* delete del_num-1 items beginning from item in position first */
701 leaf_delete_items_entirely (cur_bi, first, del_num-1);
702
703 /* delete the part of the first item of the bh
704 do not delete item header
705 */
706 leaf_cut_from_buffer (cur_bi, 0, 0, del_bytes);
707 } else {
708 struct item_head * ih;
709 int len;
710
711 /* delete del_num-1 items beginning from item in position first+1 */
712 leaf_delete_items_entirely (cur_bi, first+1, del_num-1);
713
714 if (is_direntry_le_ih (ih = B_N_PITEM_HEAD(bh, B_NR_ITEMS(bh)-1))) /* the last item is directory */
715 /* len = numbers of directory entries in this item */
716 len = ih_entry_count(ih);
717 else
718 /* len = body len of item */
719 len = ih_item_len(ih);
720
721 /* delete the part of the last item of the bh
722 do not delete item header
723 */
724 leaf_cut_from_buffer (cur_bi, B_NR_ITEMS(bh)-1, len - del_bytes, del_bytes);
725 } 756 }
726 }
727}
728 757
758 if (del_bytes == -1)
759 /* delete del_num items beginning from item in position first */
760 leaf_delete_items_entirely(cur_bi, first, del_num);
761 else {
762 if (last_first == FIRST_TO_LAST) {
763 /* delete del_num-1 items beginning from item in position first */
764 leaf_delete_items_entirely(cur_bi, first, del_num - 1);
765
766 /* delete the part of the first item of the bh
767 do not delete item header
768 */
769 leaf_cut_from_buffer(cur_bi, 0, 0, del_bytes);
770 } else {
771 struct item_head *ih;
772 int len;
773
774 /* delete del_num-1 items beginning from item in position first+1 */
775 leaf_delete_items_entirely(cur_bi, first + 1,
776 del_num - 1);
777
778 if (is_direntry_le_ih
779 (ih = B_N_PITEM_HEAD(bh, B_NR_ITEMS(bh) - 1)))
780 /* the last item is directory */
781 /* len = numbers of directory entries in this item */
782 len = ih_entry_count(ih);
783 else
784 /* len = body len of item */
785 len = ih_item_len(ih);
786
787 /* delete the part of the last item of the bh
788 do not delete item header
789 */
790 leaf_cut_from_buffer(cur_bi, B_NR_ITEMS(bh) - 1,
791 len - del_bytes, del_bytes);
792 }
793 }
794}
729 795
730/* insert item into the leaf node in position before */ 796/* insert item into the leaf node in position before */
731void leaf_insert_into_buf (struct buffer_info * bi, int before, 797void leaf_insert_into_buf(struct buffer_info *bi, int before,
732 struct item_head * inserted_item_ih, 798 struct item_head *inserted_item_ih,
733 const char * inserted_item_body, 799 const char *inserted_item_body, int zeros_number)
734 int zeros_number)
735{ 800{
736 struct buffer_head * bh = bi->bi_bh; 801 struct buffer_head *bh = bi->bi_bh;
737 int nr, free_space; 802 int nr, free_space;
738 struct block_head * blkh; 803 struct block_head *blkh;
739 struct item_head * ih; 804 struct item_head *ih;
740 int i; 805 int i;
741 int last_loc, unmoved_loc; 806 int last_loc, unmoved_loc;
742 char * to; 807 char *to;
743 808
744 809 blkh = B_BLK_HEAD(bh);
745 blkh = B_BLK_HEAD(bh); 810 nr = blkh_nr_item(blkh);
746 nr = blkh_nr_item(blkh); 811 free_space = blkh_free_space(blkh);
747 free_space = blkh_free_space( blkh ); 812
748 813 /* check free space */
749 /* check free space */ 814 RFALSE(free_space < ih_item_len(inserted_item_ih) + IH_SIZE,
750 RFALSE( free_space < ih_item_len(inserted_item_ih) + IH_SIZE, 815 "vs-10170: not enough free space in block %z, new item %h",
751 "vs-10170: not enough free space in block %z, new item %h", 816 bh, inserted_item_ih);
752 bh, inserted_item_ih); 817 RFALSE(zeros_number > ih_item_len(inserted_item_ih),
753 RFALSE( zeros_number > ih_item_len(inserted_item_ih), 818 "vs-10172: zero number == %d, item length == %d",
754 "vs-10172: zero number == %d, item length == %d", 819 zeros_number, ih_item_len(inserted_item_ih));
755 zeros_number, ih_item_len(inserted_item_ih)); 820
756 821 /* get item new item must be inserted before */
757 822 ih = B_N_PITEM_HEAD(bh, before);
758 /* get item new item must be inserted before */ 823
759 ih = B_N_PITEM_HEAD (bh, before); 824 /* prepare space for the body of new item */
760 825 last_loc = nr ? ih_location(&(ih[nr - before - 1])) : bh->b_size;
761 /* prepare space for the body of new item */ 826 unmoved_loc = before ? ih_location(ih - 1) : bh->b_size;
762 last_loc = nr ? ih_location( &(ih[nr - before - 1]) ) : bh->b_size; 827
763 unmoved_loc = before ? ih_location( ih-1 ) : bh->b_size; 828 memmove(bh->b_data + last_loc - ih_item_len(inserted_item_ih),
764 829 bh->b_data + last_loc, unmoved_loc - last_loc);
765 830
766 memmove (bh->b_data + last_loc - ih_item_len(inserted_item_ih), 831 to = bh->b_data + unmoved_loc - ih_item_len(inserted_item_ih);
767 bh->b_data + last_loc, unmoved_loc - last_loc); 832 memset(to, 0, zeros_number);
768 833 to += zeros_number;
769 to = bh->b_data + unmoved_loc - ih_item_len(inserted_item_ih); 834
770 memset (to, 0, zeros_number); 835 /* copy body to prepared space */
771 to += zeros_number; 836 if (inserted_item_body)
772 837 memmove(to, inserted_item_body,
773 /* copy body to prepared space */ 838 ih_item_len(inserted_item_ih) - zeros_number);
774 if (inserted_item_body) 839 else
775 memmove (to, inserted_item_body, ih_item_len(inserted_item_ih) - zeros_number); 840 memset(to, '\0', ih_item_len(inserted_item_ih) - zeros_number);
776 else 841
777 memset(to, '\0', ih_item_len(inserted_item_ih) - zeros_number); 842 /* insert item header */
778 843 memmove(ih + 1, ih, IH_SIZE * (nr - before));
779 /* insert item header */ 844 memmove(ih, inserted_item_ih, IH_SIZE);
780 memmove (ih + 1, ih, IH_SIZE * (nr - before)); 845
781 memmove (ih, inserted_item_ih, IH_SIZE); 846 /* change locations */
782 847 for (i = before; i < nr + 1; i++) {
783 /* change locations */ 848 unmoved_loc -= ih_item_len(&(ih[i - before]));
784 for (i = before; i < nr + 1; i ++) 849 put_ih_location(&(ih[i - before]), unmoved_loc);
785 { 850 }
786 unmoved_loc -= ih_item_len( &(ih[i-before]));
787 put_ih_location( &(ih[i-before]), unmoved_loc );
788 }
789
790 /* sizes, free space, item number */
791 set_blkh_nr_item( blkh, blkh_nr_item(blkh) + 1 );
792 set_blkh_free_space( blkh,
793 free_space - (IH_SIZE + ih_item_len(inserted_item_ih ) ) );
794 do_balance_mark_leaf_dirty (bi->tb, bh, 1);
795
796 if (bi->bi_parent) {
797 struct disk_child *t_dc;
798 t_dc = B_N_CHILD (bi->bi_parent, bi->bi_position);
799 put_dc_size( t_dc, dc_size(t_dc) + (IH_SIZE + ih_item_len(inserted_item_ih)));
800 do_balance_mark_internal_dirty (bi->tb, bi->bi_parent, 0);
801 }
802}
803 851
852 /* sizes, free space, item number */
853 set_blkh_nr_item(blkh, blkh_nr_item(blkh) + 1);
854 set_blkh_free_space(blkh,
855 free_space - (IH_SIZE +
856 ih_item_len(inserted_item_ih)));
857 do_balance_mark_leaf_dirty(bi->tb, bh, 1);
858
859 if (bi->bi_parent) {
860 struct disk_child *t_dc;
861 t_dc = B_N_CHILD(bi->bi_parent, bi->bi_position);
862 put_dc_size(t_dc,
863 dc_size(t_dc) + (IH_SIZE +
864 ih_item_len(inserted_item_ih)));
865 do_balance_mark_internal_dirty(bi->tb, bi->bi_parent, 0);
866 }
867}
804 868
805/* paste paste_size bytes to affected_item_num-th item. 869/* paste paste_size bytes to affected_item_num-th item.
806 When item is a directory, this only prepare space for new entries */ 870 When item is a directory, this only prepare space for new entries */
807void leaf_paste_in_buffer (struct buffer_info * bi, int affected_item_num, 871void leaf_paste_in_buffer(struct buffer_info *bi, int affected_item_num,
808 int pos_in_item, int paste_size, 872 int pos_in_item, int paste_size,
809 const char * body, 873 const char *body, int zeros_number)
810 int zeros_number)
811{ 874{
812 struct buffer_head * bh = bi->bi_bh; 875 struct buffer_head *bh = bi->bi_bh;
813 int nr, free_space; 876 int nr, free_space;
814 struct block_head * blkh; 877 struct block_head *blkh;
815 struct item_head * ih; 878 struct item_head *ih;
816 int i; 879 int i;
817 int last_loc, unmoved_loc; 880 int last_loc, unmoved_loc;
818 881
819 blkh = B_BLK_HEAD(bh); 882 blkh = B_BLK_HEAD(bh);
820 nr = blkh_nr_item(blkh); 883 nr = blkh_nr_item(blkh);
821 free_space = blkh_free_space(blkh); 884 free_space = blkh_free_space(blkh);
822 885
823 886 /* check free space */
824 /* check free space */ 887 RFALSE(free_space < paste_size,
825 RFALSE( free_space < paste_size, 888 "vs-10175: not enough free space: needed %d, available %d",
826 "vs-10175: not enough free space: needed %d, available %d", 889 paste_size, free_space);
827 paste_size, free_space);
828 890
829#ifdef CONFIG_REISERFS_CHECK 891#ifdef CONFIG_REISERFS_CHECK
830 if (zeros_number > paste_size) { 892 if (zeros_number > paste_size) {
831 print_cur_tb ("10177"); 893 print_cur_tb("10177");
832 reiserfs_panic ( NULL, "vs-10177: leaf_paste_in_buffer: ero number == %d, paste_size == %d", 894 reiserfs_panic(NULL,
833 zeros_number, paste_size); 895 "vs-10177: leaf_paste_in_buffer: ero number == %d, paste_size == %d",
834 } 896 zeros_number, paste_size);
835#endif /* CONFIG_REISERFS_CHECK */ 897 }
836 898#endif /* CONFIG_REISERFS_CHECK */
837 899
838 /* item to be appended */ 900 /* item to be appended */
839 ih = B_N_PITEM_HEAD(bh, affected_item_num); 901 ih = B_N_PITEM_HEAD(bh, affected_item_num);
840 902
841 last_loc = ih_location( &(ih[nr - affected_item_num - 1]) ); 903 last_loc = ih_location(&(ih[nr - affected_item_num - 1]));
842 unmoved_loc = affected_item_num ? ih_location( ih-1 ) : bh->b_size; 904 unmoved_loc = affected_item_num ? ih_location(ih - 1) : bh->b_size;
843 905
844 /* prepare space */ 906 /* prepare space */
845 memmove (bh->b_data + last_loc - paste_size, bh->b_data + last_loc, 907 memmove(bh->b_data + last_loc - paste_size, bh->b_data + last_loc,
846 unmoved_loc - last_loc); 908 unmoved_loc - last_loc);
847 909
848 910 /* change locations */
849 /* change locations */ 911 for (i = affected_item_num; i < nr; i++)
850 for (i = affected_item_num; i < nr; i ++) 912 put_ih_location(&(ih[i - affected_item_num]),
851 put_ih_location( &(ih[i-affected_item_num]), 913 ih_location(&(ih[i - affected_item_num])) -
852 ih_location( &(ih[i-affected_item_num])) - paste_size ); 914 paste_size);
853 915
854 if ( body ) { 916 if (body) {
855 if (!is_direntry_le_ih (ih)) { 917 if (!is_direntry_le_ih(ih)) {
856 if (!pos_in_item) { 918 if (!pos_in_item) {
857 /* shift data to right */ 919 /* shift data to right */
858 memmove (bh->b_data + ih_location(ih) + paste_size, 920 memmove(bh->b_data + ih_location(ih) +
859 bh->b_data + ih_location(ih), ih_item_len(ih)); 921 paste_size,
860 /* paste data in the head of item */ 922 bh->b_data + ih_location(ih),
861 memset (bh->b_data + ih_location(ih), 0, zeros_number); 923 ih_item_len(ih));
862 memcpy (bh->b_data + ih_location(ih) + zeros_number, body, paste_size - zeros_number); 924 /* paste data in the head of item */
863 } else { 925 memset(bh->b_data + ih_location(ih), 0,
864 memset (bh->b_data + unmoved_loc - paste_size, 0, zeros_number); 926 zeros_number);
865 memcpy (bh->b_data + unmoved_loc - paste_size + zeros_number, body, paste_size - zeros_number); 927 memcpy(bh->b_data + ih_location(ih) +
866 } 928 zeros_number, body,
929 paste_size - zeros_number);
930 } else {
931 memset(bh->b_data + unmoved_loc - paste_size, 0,
932 zeros_number);
933 memcpy(bh->b_data + unmoved_loc - paste_size +
934 zeros_number, body,
935 paste_size - zeros_number);
936 }
937 }
938 } else
939 memset(bh->b_data + unmoved_loc - paste_size, '\0', paste_size);
940
941 put_ih_item_len(ih, ih_item_len(ih) + paste_size);
942
943 /* change free space */
944 set_blkh_free_space(blkh, free_space - paste_size);
945
946 do_balance_mark_leaf_dirty(bi->tb, bh, 0);
947
948 if (bi->bi_parent) {
949 struct disk_child *t_dc =
950 B_N_CHILD(bi->bi_parent, bi->bi_position);
951 put_dc_size(t_dc, dc_size(t_dc) + paste_size);
952 do_balance_mark_internal_dirty(bi->tb, bi->bi_parent, 0);
867 } 953 }
868 }
869 else
870 memset(bh->b_data + unmoved_loc - paste_size, '\0', paste_size);
871
872 put_ih_item_len( ih, ih_item_len(ih) + paste_size );
873
874 /* change free space */
875 set_blkh_free_space( blkh, free_space - paste_size );
876
877 do_balance_mark_leaf_dirty (bi->tb, bh, 0);
878
879 if (bi->bi_parent) {
880 struct disk_child *t_dc = B_N_CHILD (bi->bi_parent, bi->bi_position);
881 put_dc_size( t_dc, dc_size(t_dc) + paste_size );
882 do_balance_mark_internal_dirty (bi->tb, bi->bi_parent, 0);
883 }
884} 954}
885 955
886
887/* cuts DEL_COUNT entries beginning from FROM-th entry. Directory item 956/* cuts DEL_COUNT entries beginning from FROM-th entry. Directory item
888 does not have free space, so it moves DEHs and remaining records as 957 does not have free space, so it moves DEHs and remaining records as
889 necessary. Return value is size of removed part of directory item 958 necessary. Return value is size of removed part of directory item
890 in bytes. */ 959 in bytes. */
891static int leaf_cut_entries ( 960static int leaf_cut_entries(struct buffer_head *bh,
892 struct buffer_head * bh, 961 struct item_head *ih, int from, int del_count)
893 struct item_head * ih,
894 int from,
895 int del_count
896 )
897{ 962{
898 char * item; 963 char *item;
899 struct reiserfs_de_head * deh; 964 struct reiserfs_de_head *deh;
900 int prev_record_offset; /* offset of record, that is (from-1)th */ 965 int prev_record_offset; /* offset of record, that is (from-1)th */
901 char * prev_record; /* */ 966 char *prev_record; /* */
902 int cut_records_len; /* length of all removed records */ 967 int cut_records_len; /* length of all removed records */
903 int i; 968 int i;
904 969
905 970 /* make sure, that item is directory and there are enough entries to
906 /* make sure, that item is directory and there are enough entries to 971 remove */
907 remove */ 972 RFALSE(!is_direntry_le_ih(ih), "10180: item is not directory item");
908 RFALSE( !is_direntry_le_ih (ih), "10180: item is not directory item"); 973 RFALSE(I_ENTRY_COUNT(ih) < from + del_count,
909 RFALSE( I_ENTRY_COUNT(ih) < from + del_count, 974 "10185: item contains not enough entries: entry_cout = %d, from = %d, to delete = %d",
910 "10185: item contains not enough entries: entry_cout = %d, from = %d, to delete = %d", 975 I_ENTRY_COUNT(ih), from, del_count);
911 I_ENTRY_COUNT(ih), from, del_count); 976
912 977 if (del_count == 0)
913 if (del_count == 0) 978 return 0;
914 return 0; 979
915 980 /* first byte of item */
916 /* first byte of item */ 981 item = bh->b_data + ih_location(ih);
917 item = bh->b_data + ih_location(ih); 982
918 983 /* entry head array */
919 /* entry head array */ 984 deh = B_I_DEH(bh, ih);
920 deh = B_I_DEH (bh, ih); 985
921 986 /* first byte of remaining entries, those are BEFORE cut entries
922 /* first byte of remaining entries, those are BEFORE cut entries 987 (prev_record) and length of all removed records (cut_records_len) */
923 (prev_record) and length of all removed records (cut_records_len) */ 988 prev_record_offset =
924 prev_record_offset = (from ? deh_location( &(deh[from - 1])) : ih_item_len(ih)); 989 (from ? deh_location(&(deh[from - 1])) : ih_item_len(ih));
925 cut_records_len = prev_record_offset/*from_record*/ - 990 cut_records_len = prev_record_offset /*from_record */ -
926 deh_location( &(deh[from + del_count - 1])); 991 deh_location(&(deh[from + del_count - 1]));
927 prev_record = item + prev_record_offset; 992 prev_record = item + prev_record_offset;
928 993
929 994 /* adjust locations of remaining entries */
930 /* adjust locations of remaining entries */ 995 for (i = I_ENTRY_COUNT(ih) - 1; i > from + del_count - 1; i--)
931 for (i = I_ENTRY_COUNT(ih) - 1; i > from + del_count - 1; i --) 996 put_deh_location(&(deh[i]),
932 put_deh_location( &(deh[i]), 997 deh_location(&deh[i]) -
933 deh_location( &deh[i] ) - (DEH_SIZE * del_count ) ); 998 (DEH_SIZE * del_count));
934 999
935 for (i = 0; i < from; i ++) 1000 for (i = 0; i < from; i++)
936 put_deh_location( &(deh[i]), 1001 put_deh_location(&(deh[i]),
937 deh_location( &deh[i] ) - (DEH_SIZE * del_count + cut_records_len) ); 1002 deh_location(&deh[i]) - (DEH_SIZE * del_count +
938 1003 cut_records_len));
939 put_ih_entry_count( ih, ih_entry_count(ih) - del_count ); 1004
940 1005 put_ih_entry_count(ih, ih_entry_count(ih) - del_count);
941 /* shift entry head array and entries those are AFTER removed entries */ 1006
942 memmove ((char *)(deh + from), 1007 /* shift entry head array and entries those are AFTER removed entries */
943 deh + from + del_count, 1008 memmove((char *)(deh + from),
944 prev_record - cut_records_len - (char *)(deh + from + del_count)); 1009 deh + from + del_count,
945 1010 prev_record - cut_records_len - (char *)(deh + from +
946 /* shift records, those are BEFORE removed entries */ 1011 del_count));
947 memmove (prev_record - cut_records_len - DEH_SIZE * del_count, 1012
948 prev_record, item + ih_item_len(ih) - prev_record); 1013 /* shift records, those are BEFORE removed entries */
949 1014 memmove(prev_record - cut_records_len - DEH_SIZE * del_count,
950 return DEH_SIZE * del_count + cut_records_len; 1015 prev_record, item + ih_item_len(ih) - prev_record);
1016
1017 return DEH_SIZE * del_count + cut_records_len;
951} 1018}
952 1019
953
954/* when cut item is part of regular file 1020/* when cut item is part of regular file
955 pos_in_item - first byte that must be cut 1021 pos_in_item - first byte that must be cut
956 cut_size - number of bytes to be cut beginning from pos_in_item 1022 cut_size - number of bytes to be cut beginning from pos_in_item
@@ -959,264 +1025,278 @@ static int leaf_cut_entries (
959 pos_in_item - number of first deleted entry 1025 pos_in_item - number of first deleted entry
960 cut_size - count of deleted entries 1026 cut_size - count of deleted entries
961 */ 1027 */
962void leaf_cut_from_buffer (struct buffer_info * bi, int cut_item_num, 1028void leaf_cut_from_buffer(struct buffer_info *bi, int cut_item_num,
963 int pos_in_item, int cut_size) 1029 int pos_in_item, int cut_size)
964{ 1030{
965 int nr; 1031 int nr;
966 struct buffer_head * bh = bi->bi_bh; 1032 struct buffer_head *bh = bi->bi_bh;
967 struct block_head * blkh; 1033 struct block_head *blkh;
968 struct item_head * ih; 1034 struct item_head *ih;
969 int last_loc, unmoved_loc; 1035 int last_loc, unmoved_loc;
970 int i; 1036 int i;
971 1037
972 blkh = B_BLK_HEAD(bh); 1038 blkh = B_BLK_HEAD(bh);
973 nr = blkh_nr_item(blkh); 1039 nr = blkh_nr_item(blkh);
974 1040
975 /* item head of truncated item */ 1041 /* item head of truncated item */
976 ih = B_N_PITEM_HEAD (bh, cut_item_num); 1042 ih = B_N_PITEM_HEAD(bh, cut_item_num);
977 1043
978 if (is_direntry_le_ih (ih)) { 1044 if (is_direntry_le_ih(ih)) {
979 /* first cut entry ()*/ 1045 /* first cut entry () */
980 cut_size = leaf_cut_entries (bh, ih, pos_in_item, cut_size); 1046 cut_size = leaf_cut_entries(bh, ih, pos_in_item, cut_size);
981 if (pos_in_item == 0) { 1047 if (pos_in_item == 0) {
982 /* change key */ 1048 /* change key */
983 RFALSE( cut_item_num, 1049 RFALSE(cut_item_num,
984 "when 0-th enrty of item is cut, that item must be first in the node, not %d-th", cut_item_num); 1050 "when 0-th enrty of item is cut, that item must be first in the node, not %d-th",
985 /* change item key by key of first entry in the item */ 1051 cut_item_num);
986 set_le_ih_k_offset (ih, deh_offset(B_I_DEH (bh, ih))); 1052 /* change item key by key of first entry in the item */
987 /*memcpy (&ih->ih_key.k_offset, &(B_I_DEH (bh, ih)->deh_offset), SHORT_KEY_SIZE);*/ 1053 set_le_ih_k_offset(ih, deh_offset(B_I_DEH(bh, ih)));
988 } 1054 /*memcpy (&ih->ih_key.k_offset, &(B_I_DEH (bh, ih)->deh_offset), SHORT_KEY_SIZE); */
989 } else { 1055 }
990 /* item is direct or indirect */ 1056 } else {
991 RFALSE( is_statdata_le_ih (ih), "10195: item is stat data"); 1057 /* item is direct or indirect */
992 RFALSE( pos_in_item && pos_in_item + cut_size != ih_item_len(ih), 1058 RFALSE(is_statdata_le_ih(ih), "10195: item is stat data");
993 "10200: invalid offset (%lu) or trunc_size (%lu) or ih_item_len (%lu)", 1059 RFALSE(pos_in_item && pos_in_item + cut_size != ih_item_len(ih),
994 ( long unsigned ) pos_in_item, ( long unsigned ) cut_size, 1060 "10200: invalid offset (%lu) or trunc_size (%lu) or ih_item_len (%lu)",
995 ( long unsigned ) ih_item_len (ih)); 1061 (long unsigned)pos_in_item, (long unsigned)cut_size,
996 1062 (long unsigned)ih_item_len(ih));
997 /* shift item body to left if cut is from the head of item */ 1063
998 if (pos_in_item == 0) { 1064 /* shift item body to left if cut is from the head of item */
999 memmove( bh->b_data + ih_location(ih), 1065 if (pos_in_item == 0) {
1000 bh->b_data + ih_location(ih) + cut_size, 1066 memmove(bh->b_data + ih_location(ih),
1001 ih_item_len(ih) - cut_size); 1067 bh->b_data + ih_location(ih) + cut_size,
1002 1068 ih_item_len(ih) - cut_size);
1003 /* change key of item */ 1069
1004 if (is_direct_le_ih (ih)) 1070 /* change key of item */
1005 set_le_ih_k_offset (ih, le_ih_k_offset (ih) + cut_size); 1071 if (is_direct_le_ih(ih))
1006 else { 1072 set_le_ih_k_offset(ih,
1007 set_le_ih_k_offset (ih, le_ih_k_offset (ih) + (cut_size / UNFM_P_SIZE) * bh->b_size); 1073 le_ih_k_offset(ih) +
1008 RFALSE( ih_item_len(ih) == cut_size && get_ih_free_space (ih), 1074 cut_size);
1009 "10205: invalid ih_free_space (%h)", ih); 1075 else {
1010 } 1076 set_le_ih_k_offset(ih,
1011 } 1077 le_ih_k_offset(ih) +
1012 } 1078 (cut_size / UNFM_P_SIZE) *
1013 1079 bh->b_size);
1014 1080 RFALSE(ih_item_len(ih) == cut_size
1015 /* location of the last item */ 1081 && get_ih_free_space(ih),
1016 last_loc = ih_location( &(ih[nr - cut_item_num - 1]) ); 1082 "10205: invalid ih_free_space (%h)", ih);
1017 1083 }
1018 /* location of the item, which is remaining at the same place */ 1084 }
1019 unmoved_loc = cut_item_num ? ih_location(ih-1) : bh->b_size; 1085 }
1020 1086
1021 1087 /* location of the last item */
1022 /* shift */ 1088 last_loc = ih_location(&(ih[nr - cut_item_num - 1]));
1023 memmove (bh->b_data + last_loc + cut_size, bh->b_data + last_loc, 1089
1024 unmoved_loc - last_loc - cut_size); 1090 /* location of the item, which is remaining at the same place */
1025 1091 unmoved_loc = cut_item_num ? ih_location(ih - 1) : bh->b_size;
1026 /* change item length */ 1092
1027 put_ih_item_len( ih, ih_item_len(ih) - cut_size ); 1093 /* shift */
1028 1094 memmove(bh->b_data + last_loc + cut_size, bh->b_data + last_loc,
1029 if (is_indirect_le_ih (ih)) { 1095 unmoved_loc - last_loc - cut_size);
1030 if (pos_in_item) 1096
1031 set_ih_free_space (ih, 0); 1097 /* change item length */
1032 } 1098 put_ih_item_len(ih, ih_item_len(ih) - cut_size);
1033
1034 /* change locations */
1035 for (i = cut_item_num; i < nr; i ++)
1036 put_ih_location( &(ih[i-cut_item_num]), ih_location( &ih[i-cut_item_num]) + cut_size );
1037
1038 /* size, free space */
1039 set_blkh_free_space( blkh, blkh_free_space(blkh) + cut_size );
1040
1041 do_balance_mark_leaf_dirty (bi->tb, bh, 0);
1042
1043 if (bi->bi_parent) {
1044 struct disk_child *t_dc;
1045 t_dc = B_N_CHILD (bi->bi_parent, bi->bi_position);
1046 put_dc_size( t_dc, dc_size(t_dc) - cut_size );
1047 do_balance_mark_internal_dirty (bi->tb, bi->bi_parent, 0);
1048 }
1049}
1050 1099
1100 if (is_indirect_le_ih(ih)) {
1101 if (pos_in_item)
1102 set_ih_free_space(ih, 0);
1103 }
1104
1105 /* change locations */
1106 for (i = cut_item_num; i < nr; i++)
1107 put_ih_location(&(ih[i - cut_item_num]),
1108 ih_location(&ih[i - cut_item_num]) + cut_size);
1109
1110 /* size, free space */
1111 set_blkh_free_space(blkh, blkh_free_space(blkh) + cut_size);
1112
1113 do_balance_mark_leaf_dirty(bi->tb, bh, 0);
1114
1115 if (bi->bi_parent) {
1116 struct disk_child *t_dc;
1117 t_dc = B_N_CHILD(bi->bi_parent, bi->bi_position);
1118 put_dc_size(t_dc, dc_size(t_dc) - cut_size);
1119 do_balance_mark_internal_dirty(bi->tb, bi->bi_parent, 0);
1120 }
1121}
1051 1122
1052/* delete del_num items from buffer starting from the first'th item */ 1123/* delete del_num items from buffer starting from the first'th item */
1053static void leaf_delete_items_entirely (struct buffer_info * bi, 1124static void leaf_delete_items_entirely(struct buffer_info *bi,
1054 int first, int del_num) 1125 int first, int del_num)
1055{ 1126{
1056 struct buffer_head * bh = bi->bi_bh; 1127 struct buffer_head *bh = bi->bi_bh;
1057 int nr; 1128 int nr;
1058 int i, j; 1129 int i, j;
1059 int last_loc, last_removed_loc; 1130 int last_loc, last_removed_loc;
1060 struct block_head * blkh; 1131 struct block_head *blkh;
1061 struct item_head * ih; 1132 struct item_head *ih;
1062 1133
1063 RFALSE( bh == NULL, "10210: buffer is 0"); 1134 RFALSE(bh == NULL, "10210: buffer is 0");
1064 RFALSE( del_num < 0, "10215: del_num less than 0 (%d)", del_num); 1135 RFALSE(del_num < 0, "10215: del_num less than 0 (%d)", del_num);
1065 1136
1066 if (del_num == 0) 1137 if (del_num == 0)
1067 return; 1138 return;
1068 1139
1069 blkh = B_BLK_HEAD(bh); 1140 blkh = B_BLK_HEAD(bh);
1070 nr = blkh_nr_item(blkh); 1141 nr = blkh_nr_item(blkh);
1071
1072 RFALSE( first < 0 || first + del_num > nr,
1073 "10220: first=%d, number=%d, there is %d items", first, del_num, nr);
1074
1075 if (first == 0 && del_num == nr) {
1076 /* this does not work */
1077 make_empty_node (bi);
1078
1079 do_balance_mark_leaf_dirty (bi->tb, bh, 0);
1080 return;
1081 }
1082
1083 ih = B_N_PITEM_HEAD (bh, first);
1084
1085 /* location of unmovable item */
1086 j = (first == 0) ? bh->b_size : ih_location(ih-1);
1087
1088 /* delete items */
1089 last_loc = ih_location( &(ih[nr-1-first]) );
1090 last_removed_loc = ih_location( &(ih[del_num-1]) );
1091
1092 memmove (bh->b_data + last_loc + j - last_removed_loc,
1093 bh->b_data + last_loc, last_removed_loc - last_loc);
1094
1095 /* delete item headers */
1096 memmove (ih, ih + del_num, (nr - first - del_num) * IH_SIZE);
1097
1098 /* change item location */
1099 for (i = first; i < nr - del_num; i ++)
1100 put_ih_location( &(ih[i-first]), ih_location( &(ih[i-first]) ) + (j - last_removed_loc) );
1101
1102 /* sizes, item number */
1103 set_blkh_nr_item( blkh, blkh_nr_item(blkh) - del_num );
1104 set_blkh_free_space( blkh, blkh_free_space(blkh) + (j - last_removed_loc + IH_SIZE * del_num) );
1105
1106 do_balance_mark_leaf_dirty (bi->tb, bh, 0);
1107
1108 if (bi->bi_parent) {
1109 struct disk_child *t_dc = B_N_CHILD (bi->bi_parent, bi->bi_position);
1110 put_dc_size( t_dc, dc_size(t_dc) -
1111 (j - last_removed_loc + IH_SIZE * del_num));
1112 do_balance_mark_internal_dirty (bi->tb, bi->bi_parent, 0);
1113 }
1114}
1115 1142
1143 RFALSE(first < 0 || first + del_num > nr,
1144 "10220: first=%d, number=%d, there is %d items", first, del_num,
1145 nr);
1146
1147 if (first == 0 && del_num == nr) {
1148 /* this does not work */
1149 make_empty_node(bi);
1150
1151 do_balance_mark_leaf_dirty(bi->tb, bh, 0);
1152 return;
1153 }
1116 1154
1155 ih = B_N_PITEM_HEAD(bh, first);
1117 1156
1157 /* location of unmovable item */
1158 j = (first == 0) ? bh->b_size : ih_location(ih - 1);
1118 1159
1160 /* delete items */
1161 last_loc = ih_location(&(ih[nr - 1 - first]));
1162 last_removed_loc = ih_location(&(ih[del_num - 1]));
1163
1164 memmove(bh->b_data + last_loc + j - last_removed_loc,
1165 bh->b_data + last_loc, last_removed_loc - last_loc);
1166
1167 /* delete item headers */
1168 memmove(ih, ih + del_num, (nr - first - del_num) * IH_SIZE);
1169
1170 /* change item location */
1171 for (i = first; i < nr - del_num; i++)
1172 put_ih_location(&(ih[i - first]),
1173 ih_location(&(ih[i - first])) + (j -
1174 last_removed_loc));
1175
1176 /* sizes, item number */
1177 set_blkh_nr_item(blkh, blkh_nr_item(blkh) - del_num);
1178 set_blkh_free_space(blkh,
1179 blkh_free_space(blkh) + (j - last_removed_loc +
1180 IH_SIZE * del_num));
1181
1182 do_balance_mark_leaf_dirty(bi->tb, bh, 0);
1183
1184 if (bi->bi_parent) {
1185 struct disk_child *t_dc =
1186 B_N_CHILD(bi->bi_parent, bi->bi_position);
1187 put_dc_size(t_dc,
1188 dc_size(t_dc) - (j - last_removed_loc +
1189 IH_SIZE * del_num));
1190 do_balance_mark_internal_dirty(bi->tb, bi->bi_parent, 0);
1191 }
1192}
1119 1193
1120/* paste new_entry_count entries (new_dehs, records) into position before to item_num-th item */ 1194/* paste new_entry_count entries (new_dehs, records) into position before to item_num-th item */
1121void leaf_paste_entries ( 1195void leaf_paste_entries(struct buffer_head *bh,
1122 struct buffer_head * bh,
1123 int item_num, 1196 int item_num,
1124 int before, 1197 int before,
1125 int new_entry_count, 1198 int new_entry_count,
1126 struct reiserfs_de_head * new_dehs, 1199 struct reiserfs_de_head *new_dehs,
1127 const char * records, 1200 const char *records, int paste_size)
1128 int paste_size
1129 )
1130{ 1201{
1131 struct item_head * ih; 1202 struct item_head *ih;
1132 char * item; 1203 char *item;
1133 struct reiserfs_de_head * deh; 1204 struct reiserfs_de_head *deh;
1134 char * insert_point; 1205 char *insert_point;
1135 int i, old_entry_num; 1206 int i, old_entry_num;
1136 1207
1137 if (new_entry_count == 0) 1208 if (new_entry_count == 0)
1138 return; 1209 return;
1139 1210
1140 ih = B_N_PITEM_HEAD(bh, item_num); 1211 ih = B_N_PITEM_HEAD(bh, item_num);
1141 1212
1142 /* make sure, that item is directory, and there are enough records in it */ 1213 /* make sure, that item is directory, and there are enough records in it */
1143 RFALSE( !is_direntry_le_ih (ih), "10225: item is not directory item"); 1214 RFALSE(!is_direntry_le_ih(ih), "10225: item is not directory item");
1144 RFALSE( I_ENTRY_COUNT (ih) < before, 1215 RFALSE(I_ENTRY_COUNT(ih) < before,
1145 "10230: there are no entry we paste entries before. entry_count = %d, before = %d", 1216 "10230: there are no entry we paste entries before. entry_count = %d, before = %d",
1146 I_ENTRY_COUNT (ih), before); 1217 I_ENTRY_COUNT(ih), before);
1147 1218
1148 1219 /* first byte of dest item */
1149 /* first byte of dest item */ 1220 item = bh->b_data + ih_location(ih);
1150 item = bh->b_data + ih_location(ih); 1221
1151 1222 /* entry head array */
1152 /* entry head array */ 1223 deh = B_I_DEH(bh, ih);
1153 deh = B_I_DEH (bh, ih); 1224
1154 1225 /* new records will be pasted at this point */
1155 /* new records will be pasted at this point */ 1226 insert_point =
1156 insert_point = item + (before ? deh_location( &(deh[before - 1])) : (ih_item_len(ih) - paste_size)); 1227 item +
1157 1228 (before ? deh_location(&(deh[before - 1]))
1158 /* adjust locations of records that will be AFTER new records */ 1229 : (ih_item_len(ih) - paste_size));
1159 for (i = I_ENTRY_COUNT(ih) - 1; i >= before; i --) 1230
1160 put_deh_location( &(deh[i]), 1231 /* adjust locations of records that will be AFTER new records */
1161 deh_location(&(deh[i])) + (DEH_SIZE * new_entry_count )); 1232 for (i = I_ENTRY_COUNT(ih) - 1; i >= before; i--)
1162 1233 put_deh_location(&(deh[i]),
1163 /* adjust locations of records that will be BEFORE new records */ 1234 deh_location(&(deh[i])) +
1164 for (i = 0; i < before; i ++) 1235 (DEH_SIZE * new_entry_count));
1165 put_deh_location( &(deh[i]), deh_location(&(deh[i])) + paste_size ); 1236
1166 1237 /* adjust locations of records that will be BEFORE new records */
1167 old_entry_num = I_ENTRY_COUNT(ih); 1238 for (i = 0; i < before; i++)
1168 put_ih_entry_count( ih, ih_entry_count(ih) + new_entry_count ); 1239 put_deh_location(&(deh[i]),
1169 1240 deh_location(&(deh[i])) + paste_size);
1170 /* prepare space for pasted records */ 1241
1171 memmove (insert_point + paste_size, insert_point, item + (ih_item_len(ih) - paste_size) - insert_point); 1242 old_entry_num = I_ENTRY_COUNT(ih);
1172 1243 put_ih_entry_count(ih, ih_entry_count(ih) + new_entry_count);
1173 /* copy new records */ 1244
1174 memcpy (insert_point + DEH_SIZE * new_entry_count, records, 1245 /* prepare space for pasted records */
1175 paste_size - DEH_SIZE * new_entry_count); 1246 memmove(insert_point + paste_size, insert_point,
1176 1247 item + (ih_item_len(ih) - paste_size) - insert_point);
1177 /* prepare space for new entry heads */ 1248
1178 deh += before; 1249 /* copy new records */
1179 memmove ((char *)(deh + new_entry_count), deh, insert_point - (char *)deh); 1250 memcpy(insert_point + DEH_SIZE * new_entry_count, records,
1180 1251 paste_size - DEH_SIZE * new_entry_count);
1181 /* copy new entry heads */ 1252
1182 deh = (struct reiserfs_de_head *)((char *)deh); 1253 /* prepare space for new entry heads */
1183 memcpy (deh, new_dehs, DEH_SIZE * new_entry_count); 1254 deh += before;
1184 1255 memmove((char *)(deh + new_entry_count), deh,
1185 /* set locations of new records */ 1256 insert_point - (char *)deh);
1186 for (i = 0; i < new_entry_count; i ++) 1257
1187 { 1258 /* copy new entry heads */
1188 put_deh_location( &(deh[i]), 1259 deh = (struct reiserfs_de_head *)((char *)deh);
1189 deh_location( &(deh[i] )) + 1260 memcpy(deh, new_dehs, DEH_SIZE * new_entry_count);
1190 (- deh_location( &(new_dehs[new_entry_count - 1])) + 1261
1191 insert_point + DEH_SIZE * new_entry_count - item)); 1262 /* set locations of new records */
1192 } 1263 for (i = 0; i < new_entry_count; i++) {
1193 1264 put_deh_location(&(deh[i]),
1194 1265 deh_location(&(deh[i])) +
1195 /* change item key if necessary (when we paste before 0-th entry */ 1266 (-deh_location
1196 if (!before) 1267 (&(new_dehs[new_entry_count - 1])) +
1197 { 1268 insert_point + DEH_SIZE * new_entry_count -
1198 set_le_ih_k_offset (ih, deh_offset(new_dehs)); 1269 item));
1270 }
1271
1272 /* change item key if necessary (when we paste before 0-th entry */
1273 if (!before) {
1274 set_le_ih_k_offset(ih, deh_offset(new_dehs));
1199/* memcpy (&ih->ih_key.k_offset, 1275/* memcpy (&ih->ih_key.k_offset,
1200 &new_dehs->deh_offset, SHORT_KEY_SIZE);*/ 1276 &new_dehs->deh_offset, SHORT_KEY_SIZE);*/
1201 } 1277 }
1202
1203#ifdef CONFIG_REISERFS_CHECK 1278#ifdef CONFIG_REISERFS_CHECK
1204 { 1279 {
1205 int prev, next; 1280 int prev, next;
1206 /* check record locations */ 1281 /* check record locations */
1207 deh = B_I_DEH (bh, ih); 1282 deh = B_I_DEH(bh, ih);
1208 for (i = 0; i < I_ENTRY_COUNT(ih); i ++) { 1283 for (i = 0; i < I_ENTRY_COUNT(ih); i++) {
1209 next = (i < I_ENTRY_COUNT(ih) - 1) ? deh_location( &(deh[i + 1])) : 0; 1284 next =
1210 prev = (i != 0) ? deh_location( &(deh[i - 1]) ) : 0; 1285 (i <
1211 1286 I_ENTRY_COUNT(ih) -
1212 if (prev && prev <= deh_location( &(deh[i]))) 1287 1) ? deh_location(&(deh[i + 1])) : 0;
1213 reiserfs_warning (NULL, "vs-10240: leaf_paste_entries: directory item (%h) corrupted (prev %a, cur(%d) %a)", 1288 prev = (i != 0) ? deh_location(&(deh[i - 1])) : 0;
1214 ih, deh + i - 1, i, deh + i); 1289
1215 if (next && next >= deh_location( &(deh[i]))) 1290 if (prev && prev <= deh_location(&(deh[i])))
1216 reiserfs_warning (NULL, "vs-10250: leaf_paste_entries: directory item (%h) corrupted (cur(%d) %a, next %a)", 1291 reiserfs_warning(NULL,
1217 ih, i, deh + i, deh + i + 1); 1292 "vs-10240: leaf_paste_entries: directory item (%h) corrupted (prev %a, cur(%d) %a)",
1218 } 1293 ih, deh + i - 1, i, deh + i);
1219 } 1294 if (next && next >= deh_location(&(deh[i])))
1295 reiserfs_warning(NULL,
1296 "vs-10250: leaf_paste_entries: directory item (%h) corrupted (cur(%d) %a, next %a)",
1297 ih, i, deh + i, deh + i + 1);
1298 }
1299 }
1220#endif 1300#endif
1221 1301
1222} 1302}
diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c
index 4a333255f27..a20bbc1642d 100644
--- a/fs/reiserfs/namei.c
+++ b/fs/reiserfs/namei.c
@@ -25,86 +25,85 @@
25 25
26// directory item contains array of entry headers. This performs 26// directory item contains array of entry headers. This performs
27// binary search through that array 27// binary search through that array
28static int bin_search_in_dir_item (struct reiserfs_dir_entry * de, loff_t off) 28static int bin_search_in_dir_item(struct reiserfs_dir_entry *de, loff_t off)
29{ 29{
30 struct item_head * ih = de->de_ih; 30 struct item_head *ih = de->de_ih;
31 struct reiserfs_de_head * deh = de->de_deh; 31 struct reiserfs_de_head *deh = de->de_deh;
32 int rbound, lbound, j; 32 int rbound, lbound, j;
33 33
34 lbound = 0; 34 lbound = 0;
35 rbound = I_ENTRY_COUNT (ih) - 1; 35 rbound = I_ENTRY_COUNT(ih) - 1;
36 36
37 for (j = (rbound + lbound) / 2; lbound <= rbound; j = (rbound + lbound) / 2) { 37 for (j = (rbound + lbound) / 2; lbound <= rbound;
38 if (off < deh_offset (deh + j)) { 38 j = (rbound + lbound) / 2) {
39 rbound = j - 1; 39 if (off < deh_offset(deh + j)) {
40 continue; 40 rbound = j - 1;
41 continue;
42 }
43 if (off > deh_offset(deh + j)) {
44 lbound = j + 1;
45 continue;
46 }
47 // this is not name found, but matched third key component
48 de->de_entry_num = j;
49 return NAME_FOUND;
41 } 50 }
42 if (off > deh_offset (deh + j)) {
43 lbound = j + 1;
44 continue;
45 }
46 // this is not name found, but matched third key component
47 de->de_entry_num = j;
48 return NAME_FOUND;
49 }
50 51
51 de->de_entry_num = lbound; 52 de->de_entry_num = lbound;
52 return NAME_NOT_FOUND; 53 return NAME_NOT_FOUND;
53} 54}
54 55
55
56// comment? maybe something like set de to point to what the path points to? 56// comment? maybe something like set de to point to what the path points to?
57static inline void set_de_item_location (struct reiserfs_dir_entry * de, struct path * path) 57static inline void set_de_item_location(struct reiserfs_dir_entry *de,
58 struct path *path)
58{ 59{
59 de->de_bh = get_last_bh (path); 60 de->de_bh = get_last_bh(path);
60 de->de_ih = get_ih (path); 61 de->de_ih = get_ih(path);
61 de->de_deh = B_I_DEH (de->de_bh, de->de_ih); 62 de->de_deh = B_I_DEH(de->de_bh, de->de_ih);
62 de->de_item_num = PATH_LAST_POSITION (path); 63 de->de_item_num = PATH_LAST_POSITION(path);
63} 64}
64
65 65
66// de_bh, de_ih, de_deh (points to first element of array), de_item_num is set 66// de_bh, de_ih, de_deh (points to first element of array), de_item_num is set
67inline void set_de_name_and_namelen (struct reiserfs_dir_entry * de) 67inline void set_de_name_and_namelen(struct reiserfs_dir_entry *de)
68{ 68{
69 struct reiserfs_de_head * deh = de->de_deh + de->de_entry_num; 69 struct reiserfs_de_head *deh = de->de_deh + de->de_entry_num;
70 70
71 if (de->de_entry_num >= ih_entry_count (de->de_ih)) 71 if (de->de_entry_num >= ih_entry_count(de->de_ih))
72 BUG (); 72 BUG();
73 73
74 de->de_entrylen = entry_length (de->de_bh, de->de_ih, de->de_entry_num); 74 de->de_entrylen = entry_length(de->de_bh, de->de_ih, de->de_entry_num);
75 de->de_namelen = de->de_entrylen - (de_with_sd (deh) ? SD_SIZE : 0); 75 de->de_namelen = de->de_entrylen - (de_with_sd(deh) ? SD_SIZE : 0);
76 de->de_name = B_I_PITEM (de->de_bh, de->de_ih) + deh_location(deh); 76 de->de_name = B_I_PITEM(de->de_bh, de->de_ih) + deh_location(deh);
77 if (de->de_name[de->de_namelen - 1] == 0) 77 if (de->de_name[de->de_namelen - 1] == 0)
78 de->de_namelen = strlen (de->de_name); 78 de->de_namelen = strlen(de->de_name);
79} 79}
80 80
81
82// what entry points to 81// what entry points to
83static inline void set_de_object_key (struct reiserfs_dir_entry * de) 82static inline void set_de_object_key(struct reiserfs_dir_entry *de)
84{ 83{
85 if (de->de_entry_num >= ih_entry_count (de->de_ih)) 84 if (de->de_entry_num >= ih_entry_count(de->de_ih))
86 BUG (); 85 BUG();
87 de->de_dir_id = deh_dir_id( &(de->de_deh[de->de_entry_num])); 86 de->de_dir_id = deh_dir_id(&(de->de_deh[de->de_entry_num]));
88 de->de_objectid = deh_objectid( &(de->de_deh[de->de_entry_num])); 87 de->de_objectid = deh_objectid(&(de->de_deh[de->de_entry_num]));
89} 88}
90 89
91 90static inline void store_de_entry_key(struct reiserfs_dir_entry *de)
92static inline void store_de_entry_key (struct reiserfs_dir_entry * de)
93{ 91{
94 struct reiserfs_de_head * deh = de->de_deh + de->de_entry_num; 92 struct reiserfs_de_head *deh = de->de_deh + de->de_entry_num;
95 93
96 if (de->de_entry_num >= ih_entry_count (de->de_ih)) 94 if (de->de_entry_num >= ih_entry_count(de->de_ih))
97 BUG (); 95 BUG();
98 96
99 /* store key of the found entry */ 97 /* store key of the found entry */
100 de->de_entry_key.version = KEY_FORMAT_3_5; 98 de->de_entry_key.version = KEY_FORMAT_3_5;
101 de->de_entry_key.on_disk_key.k_dir_id = le32_to_cpu (de->de_ih->ih_key.k_dir_id); 99 de->de_entry_key.on_disk_key.k_dir_id =
102 de->de_entry_key.on_disk_key.k_objectid = le32_to_cpu (de->de_ih->ih_key.k_objectid); 100 le32_to_cpu(de->de_ih->ih_key.k_dir_id);
103 set_cpu_key_k_offset (&(de->de_entry_key), deh_offset (deh)); 101 de->de_entry_key.on_disk_key.k_objectid =
104 set_cpu_key_k_type (&(de->de_entry_key), TYPE_DIRENTRY); 102 le32_to_cpu(de->de_ih->ih_key.k_objectid);
103 set_cpu_key_k_offset(&(de->de_entry_key), deh_offset(deh));
104 set_cpu_key_k_type(&(de->de_entry_key), TYPE_DIRENTRY);
105} 105}
106 106
107
108/* We assign a key to each directory item, and place multiple entries 107/* We assign a key to each directory item, and place multiple entries
109in a single directory item. A directory item has a key equal to the 108in a single directory item. A directory item has a key equal to the
110key of the first directory entry in it. 109key of the first directory entry in it.
@@ -117,58 +116,60 @@ entry position in the item
117*/ 116*/
118 117
119/* The function is NOT SCHEDULE-SAFE! */ 118/* The function is NOT SCHEDULE-SAFE! */
120int search_by_entry_key (struct super_block * sb, const struct cpu_key * key, 119int search_by_entry_key(struct super_block *sb, const struct cpu_key *key,
121 struct path * path, struct reiserfs_dir_entry * de) 120 struct path *path, struct reiserfs_dir_entry *de)
122{ 121{
123 int retval; 122 int retval;
124 123
125 retval = search_item (sb, key, path); 124 retval = search_item(sb, key, path);
126 switch (retval) { 125 switch (retval) {
127 case ITEM_NOT_FOUND: 126 case ITEM_NOT_FOUND:
128 if (!PATH_LAST_POSITION (path)) { 127 if (!PATH_LAST_POSITION(path)) {
129 reiserfs_warning (sb, "vs-7000: search_by_entry_key: search_by_key returned item position == 0"); 128 reiserfs_warning(sb,
130 pathrelse(path) ; 129 "vs-7000: search_by_entry_key: search_by_key returned item position == 0");
131 return IO_ERROR ; 130 pathrelse(path);
131 return IO_ERROR;
132 }
133 PATH_LAST_POSITION(path)--;
134
135 case ITEM_FOUND:
136 break;
137
138 case IO_ERROR:
139 return retval;
140
141 default:
142 pathrelse(path);
143 reiserfs_warning(sb,
144 "vs-7002: search_by_entry_key: no path to here");
145 return IO_ERROR;
132 } 146 }
133 PATH_LAST_POSITION (path) --;
134
135 case ITEM_FOUND:
136 break;
137
138 case IO_ERROR:
139 return retval;
140 147
141 default: 148 set_de_item_location(de, path);
142 pathrelse (path);
143 reiserfs_warning (sb, "vs-7002: search_by_entry_key: no path to here");
144 return IO_ERROR;
145 }
146
147 set_de_item_location (de, path);
148 149
149#ifdef CONFIG_REISERFS_CHECK 150#ifdef CONFIG_REISERFS_CHECK
150 if (!is_direntry_le_ih (de->de_ih) || 151 if (!is_direntry_le_ih(de->de_ih) ||
151 COMP_SHORT_KEYS (&(de->de_ih->ih_key), key)) { 152 COMP_SHORT_KEYS(&(de->de_ih->ih_key), key)) {
152 print_block (de->de_bh, 0, -1, -1); 153 print_block(de->de_bh, 0, -1, -1);
153 reiserfs_panic (sb, "vs-7005: search_by_entry_key: found item %h is not directory item or " 154 reiserfs_panic(sb,
154 "does not belong to the same directory as key %K", de->de_ih, key); 155 "vs-7005: search_by_entry_key: found item %h is not directory item or "
155 } 156 "does not belong to the same directory as key %K",
156#endif /* CONFIG_REISERFS_CHECK */ 157 de->de_ih, key);
157 158 }
158 /* binary search in directory item by third componen t of the 159#endif /* CONFIG_REISERFS_CHECK */
159 key. sets de->de_entry_num of de */ 160
160 retval = bin_search_in_dir_item (de, cpu_key_k_offset (key)); 161 /* binary search in directory item by third componen t of the
161 path->pos_in_item = de->de_entry_num; 162 key. sets de->de_entry_num of de */
162 if (retval != NAME_NOT_FOUND) { 163 retval = bin_search_in_dir_item(de, cpu_key_k_offset(key));
163 // ugly, but rename needs de_bh, de_deh, de_name, de_namelen, de_objectid set 164 path->pos_in_item = de->de_entry_num;
164 set_de_name_and_namelen (de); 165 if (retval != NAME_NOT_FOUND) {
165 set_de_object_key (de); 166 // ugly, but rename needs de_bh, de_deh, de_name, de_namelen, de_objectid set
166 } 167 set_de_name_and_namelen(de);
167 return retval; 168 set_de_object_key(de);
169 }
170 return retval;
168} 171}
169 172
170
171
172/* Keyed 32-bit hash function using TEA in a Davis-Meyer function */ 173/* Keyed 32-bit hash function using TEA in a Davis-Meyer function */
173 174
174/* The third component is hashed, and you can choose from more than 175/* The third component is hashed, and you can choose from more than
@@ -176,197 +177,210 @@ int search_by_entry_key (struct super_block * sb, const struct cpu_key * key,
176 but are thought about. This function should be moved to hashes.c 177 but are thought about. This function should be moved to hashes.c
177 Jedi, please do so. -Hans */ 178 Jedi, please do so. -Hans */
178 179
179static __u32 get_third_component (struct super_block * s, 180static __u32 get_third_component(struct super_block *s,
180 const char * name, int len) 181 const char *name, int len)
181{ 182{
182 __u32 res; 183 __u32 res;
183 184
184 if (!len || (len == 1 && name[0] == '.')) 185 if (!len || (len == 1 && name[0] == '.'))
185 return DOT_OFFSET; 186 return DOT_OFFSET;
186 if (len == 2 && name[0] == '.' && name[1] == '.') 187 if (len == 2 && name[0] == '.' && name[1] == '.')
187 return DOT_DOT_OFFSET; 188 return DOT_DOT_OFFSET;
188 189
189 res = REISERFS_SB(s)->s_hash_function (name, len); 190 res = REISERFS_SB(s)->s_hash_function(name, len);
190 191
191 // take bits from 7-th to 30-th including both bounds 192 // take bits from 7-th to 30-th including both bounds
192 res = GET_HASH_VALUE(res); 193 res = GET_HASH_VALUE(res);
193 if (res == 0) 194 if (res == 0)
194 // needed to have no names before "." and ".." those have hash 195 // needed to have no names before "." and ".." those have hash
195 // value == 0 and generation conters 1 and 2 accordingly 196 // value == 0 and generation conters 1 and 2 accordingly
196 res = 128; 197 res = 128;
197 return res + MAX_GENERATION_NUMBER; 198 return res + MAX_GENERATION_NUMBER;
198} 199}
199 200
200 201static int reiserfs_match(struct reiserfs_dir_entry *de,
201static int reiserfs_match (struct reiserfs_dir_entry * de, 202 const char *name, int namelen)
202 const char * name, int namelen)
203{ 203{
204 int retval = NAME_NOT_FOUND; 204 int retval = NAME_NOT_FOUND;
205 205
206 if ((namelen == de->de_namelen) && 206 if ((namelen == de->de_namelen) &&
207 !memcmp(de->de_name, name, de->de_namelen)) 207 !memcmp(de->de_name, name, de->de_namelen))
208 retval = (de_visible (de->de_deh + de->de_entry_num) ? NAME_FOUND : NAME_FOUND_INVISIBLE); 208 retval =
209 (de_visible(de->de_deh + de->de_entry_num) ? NAME_FOUND :
210 NAME_FOUND_INVISIBLE);
209 211
210 return retval; 212 return retval;
211} 213}
212 214
213
214/* de's de_bh, de_ih, de_deh, de_item_num, de_entry_num are set already */ 215/* de's de_bh, de_ih, de_deh, de_item_num, de_entry_num are set already */
215 216
216 /* used when hash collisions exist */ 217 /* used when hash collisions exist */
217 218
218 219static int linear_search_in_dir_item(struct cpu_key *key,
219static int linear_search_in_dir_item (struct cpu_key * key, struct reiserfs_dir_entry * de, 220 struct reiserfs_dir_entry *de,
220 const char * name, int namelen) 221 const char *name, int namelen)
221{ 222{
222 struct reiserfs_de_head * deh = de->de_deh; 223 struct reiserfs_de_head *deh = de->de_deh;
223 int retval; 224 int retval;
224 int i; 225 int i;
225 226
226 i = de->de_entry_num; 227 i = de->de_entry_num;
227 228
228 if (i == I_ENTRY_COUNT (de->de_ih) || 229 if (i == I_ENTRY_COUNT(de->de_ih) ||
229 GET_HASH_VALUE (deh_offset (deh + i)) != GET_HASH_VALUE (cpu_key_k_offset (key))) { 230 GET_HASH_VALUE(deh_offset(deh + i)) !=
230 i --; 231 GET_HASH_VALUE(cpu_key_k_offset(key))) {
231 } 232 i--;
233 }
232 234
233 RFALSE( de->de_deh != B_I_DEH (de->de_bh, de->de_ih), 235 RFALSE(de->de_deh != B_I_DEH(de->de_bh, de->de_ih),
234 "vs-7010: array of entry headers not found"); 236 "vs-7010: array of entry headers not found");
235 237
236 deh += i; 238 deh += i;
237 239
238 for (; i >= 0; i --, deh --) { 240 for (; i >= 0; i--, deh--) {
239 if (GET_HASH_VALUE (deh_offset (deh)) != 241 if (GET_HASH_VALUE(deh_offset(deh)) !=
240 GET_HASH_VALUE (cpu_key_k_offset (key))) { 242 GET_HASH_VALUE(cpu_key_k_offset(key))) {
241 // hash value does not match, no need to check whole name 243 // hash value does not match, no need to check whole name
242 return NAME_NOT_FOUND; 244 return NAME_NOT_FOUND;
243 } 245 }
244 246
245 /* mark, that this generation number is used */ 247 /* mark, that this generation number is used */
246 if (de->de_gen_number_bit_string) 248 if (de->de_gen_number_bit_string)
247 set_bit (GET_GENERATION_NUMBER (deh_offset (deh)), (unsigned long *)de->de_gen_number_bit_string); 249 set_bit(GET_GENERATION_NUMBER(deh_offset(deh)),
250 (unsigned long *)de->de_gen_number_bit_string);
248 251
249 // calculate pointer to name and namelen 252 // calculate pointer to name and namelen
250 de->de_entry_num = i; 253 de->de_entry_num = i;
251 set_de_name_and_namelen (de); 254 set_de_name_and_namelen(de);
252 255
253 if ((retval = reiserfs_match (de, name, namelen)) != NAME_NOT_FOUND) { 256 if ((retval =
254 // de's de_name, de_namelen, de_recordlen are set. Fill the rest: 257 reiserfs_match(de, name, namelen)) != NAME_NOT_FOUND) {
258 // de's de_name, de_namelen, de_recordlen are set. Fill the rest:
255 259
256 // key of pointed object 260 // key of pointed object
257 set_de_object_key (de); 261 set_de_object_key(de);
258 262
259 store_de_entry_key (de); 263 store_de_entry_key(de);
260 264
261 // retval can be NAME_FOUND or NAME_FOUND_INVISIBLE 265 // retval can be NAME_FOUND or NAME_FOUND_INVISIBLE
262 return retval; 266 return retval;
267 }
263 } 268 }
264 }
265
266 if (GET_GENERATION_NUMBER (le_ih_k_offset (de->de_ih)) == 0)
267 /* we have reached left most entry in the node. In common we
268 have to go to the left neighbor, but if generation counter
269 is 0 already, we know for sure, that there is no name with
270 the same hash value */
271 // FIXME: this work correctly only because hash value can not
272 // be 0. Btw, in case of Yura's hash it is probably possible,
273 // so, this is a bug
274 return NAME_NOT_FOUND;
275 269
276 RFALSE( de->de_item_num, 270 if (GET_GENERATION_NUMBER(le_ih_k_offset(de->de_ih)) == 0)
277 "vs-7015: two diritems of the same directory in one node?"); 271 /* we have reached left most entry in the node. In common we
272 have to go to the left neighbor, but if generation counter
273 is 0 already, we know for sure, that there is no name with
274 the same hash value */
275 // FIXME: this work correctly only because hash value can not
276 // be 0. Btw, in case of Yura's hash it is probably possible,
277 // so, this is a bug
278 return NAME_NOT_FOUND;
278 279
279 return GOTO_PREVIOUS_ITEM; 280 RFALSE(de->de_item_num,
280} 281 "vs-7015: two diritems of the same directory in one node?");
281 282
283 return GOTO_PREVIOUS_ITEM;
284}
282 285
283// may return NAME_FOUND, NAME_FOUND_INVISIBLE, NAME_NOT_FOUND 286// may return NAME_FOUND, NAME_FOUND_INVISIBLE, NAME_NOT_FOUND
284// FIXME: should add something like IOERROR 287// FIXME: should add something like IOERROR
285static int reiserfs_find_entry (struct inode * dir, const char * name, int namelen, 288static int reiserfs_find_entry(struct inode *dir, const char *name, int namelen,
286 struct path * path_to_entry, struct reiserfs_dir_entry * de) 289 struct path *path_to_entry,
290 struct reiserfs_dir_entry *de)
287{ 291{
288 struct cpu_key key_to_search; 292 struct cpu_key key_to_search;
289 int retval; 293 int retval;
290 294
291 295 if (namelen > REISERFS_MAX_NAME(dir->i_sb->s_blocksize))
292 if (namelen > REISERFS_MAX_NAME (dir->i_sb->s_blocksize)) 296 return NAME_NOT_FOUND;
293 return NAME_NOT_FOUND; 297
294 298 /* we will search for this key in the tree */
295 /* we will search for this key in the tree */ 299 make_cpu_key(&key_to_search, dir,
296 make_cpu_key (&key_to_search, dir, 300 get_third_component(dir->i_sb, name, namelen),
297 get_third_component (dir->i_sb, name, namelen), TYPE_DIRENTRY, 3); 301 TYPE_DIRENTRY, 3);
298 302
299 while (1) { 303 while (1) {
300 retval = search_by_entry_key (dir->i_sb, &key_to_search, path_to_entry, de); 304 retval =
301 if (retval == IO_ERROR) { 305 search_by_entry_key(dir->i_sb, &key_to_search,
302 reiserfs_warning (dir->i_sb, "zam-7001: io error in %s", 306 path_to_entry, de);
303 __FUNCTION__); 307 if (retval == IO_ERROR) {
304 return IO_ERROR; 308 reiserfs_warning(dir->i_sb, "zam-7001: io error in %s",
305 } 309 __FUNCTION__);
306 310 return IO_ERROR;
307 /* compare names for all entries having given hash value */ 311 }
308 retval = linear_search_in_dir_item (&key_to_search, de, name, namelen); 312
309 if (retval != GOTO_PREVIOUS_ITEM) { 313 /* compare names for all entries having given hash value */
310 /* there is no need to scan directory anymore. Given entry found or does not exist */ 314 retval =
311 path_to_entry->pos_in_item = de->de_entry_num; 315 linear_search_in_dir_item(&key_to_search, de, name,
312 return retval; 316 namelen);
313 } 317 if (retval != GOTO_PREVIOUS_ITEM) {
314 318 /* there is no need to scan directory anymore. Given entry found or does not exist */
315 /* there is left neighboring item of this directory and given entry can be there */ 319 path_to_entry->pos_in_item = de->de_entry_num;
316 set_cpu_key_k_offset (&key_to_search, le_ih_k_offset (de->de_ih) - 1); 320 return retval;
317 pathrelse (path_to_entry); 321 }
318 322
319 } /* while (1) */ 323 /* there is left neighboring item of this directory and given entry can be there */
324 set_cpu_key_k_offset(&key_to_search,
325 le_ih_k_offset(de->de_ih) - 1);
326 pathrelse(path_to_entry);
327
328 } /* while (1) */
320} 329}
321 330
322 331static struct dentry *reiserfs_lookup(struct inode *dir, struct dentry *dentry,
323static struct dentry * reiserfs_lookup (struct inode * dir, struct dentry * dentry, struct nameidata *nd) 332 struct nameidata *nd)
324{ 333{
325 int retval; 334 int retval;
326 struct inode * inode = NULL; 335 struct inode *inode = NULL;
327 struct reiserfs_dir_entry de; 336 struct reiserfs_dir_entry de;
328 INITIALIZE_PATH (path_to_entry); 337 INITIALIZE_PATH(path_to_entry);
329 338
330 if (REISERFS_MAX_NAME (dir->i_sb->s_blocksize) < dentry->d_name.len) 339 if (REISERFS_MAX_NAME(dir->i_sb->s_blocksize) < dentry->d_name.len)
331 return ERR_PTR(-ENAMETOOLONG); 340 return ERR_PTR(-ENAMETOOLONG);
332 341
333 reiserfs_write_lock(dir->i_sb); 342 reiserfs_write_lock(dir->i_sb);
334 de.de_gen_number_bit_string = NULL; 343 de.de_gen_number_bit_string = NULL;
335 retval = reiserfs_find_entry (dir, dentry->d_name.name, dentry->d_name.len, &path_to_entry, &de); 344 retval =
336 pathrelse (&path_to_entry); 345 reiserfs_find_entry(dir, dentry->d_name.name, dentry->d_name.len,
337 if (retval == NAME_FOUND) { 346 &path_to_entry, &de);
338 /* Hide the .reiserfs_priv directory */ 347 pathrelse(&path_to_entry);
339 if (reiserfs_xattrs (dir->i_sb) && 348 if (retval == NAME_FOUND) {
340 !old_format_only(dir->i_sb) && 349 /* Hide the .reiserfs_priv directory */
341 REISERFS_SB(dir->i_sb)->priv_root && 350 if (reiserfs_xattrs(dir->i_sb) &&
342 REISERFS_SB(dir->i_sb)->priv_root->d_inode && 351 !old_format_only(dir->i_sb) &&
343 de.de_objectid == le32_to_cpu (INODE_PKEY(REISERFS_SB(dir->i_sb)->priv_root->d_inode)->k_objectid)) { 352 REISERFS_SB(dir->i_sb)->priv_root &&
344 reiserfs_write_unlock (dir->i_sb); 353 REISERFS_SB(dir->i_sb)->priv_root->d_inode &&
345 return ERR_PTR (-EACCES); 354 de.de_objectid ==
355 le32_to_cpu(INODE_PKEY
356 (REISERFS_SB(dir->i_sb)->priv_root->d_inode)->
357 k_objectid)) {
358 reiserfs_write_unlock(dir->i_sb);
359 return ERR_PTR(-EACCES);
360 }
361
362 inode =
363 reiserfs_iget(dir->i_sb, (struct cpu_key *)&(de.de_dir_id));
364 if (!inode || IS_ERR(inode)) {
365 reiserfs_write_unlock(dir->i_sb);
366 return ERR_PTR(-EACCES);
367 }
368
369 /* Propogate the priv_object flag so we know we're in the priv tree */
370 if (is_reiserfs_priv_object(dir))
371 reiserfs_mark_inode_private(inode);
372 }
373 reiserfs_write_unlock(dir->i_sb);
374 if (retval == IO_ERROR) {
375 return ERR_PTR(-EIO);
346 } 376 }
347 377
348 inode = reiserfs_iget (dir->i_sb, (struct cpu_key *)&(de.de_dir_id)); 378 if (inode)
349 if (!inode || IS_ERR(inode)) { 379 return d_splice_alias(inode, dentry);
350 reiserfs_write_unlock(dir->i_sb);
351 return ERR_PTR(-EACCES);
352 }
353
354 /* Propogate the priv_object flag so we know we're in the priv tree */
355 if (is_reiserfs_priv_object (dir))
356 reiserfs_mark_inode_private (inode);
357 }
358 reiserfs_write_unlock(dir->i_sb);
359 if ( retval == IO_ERROR ) {
360 return ERR_PTR(-EIO);
361 }
362
363 if (inode)
364 return d_splice_alias(inode, dentry);
365
366 d_add(dentry, inode);
367 return NULL;
368}
369 380
381 d_add(dentry, inode);
382 return NULL;
383}
370 384
371/* 385/*
372** looks up the dentry of the parent directory for child. 386** looks up the dentry of the parent directory for child.
@@ -374,40 +388,38 @@ static struct dentry * reiserfs_lookup (struct inode * dir, struct dentry * dent
374*/ 388*/
375struct dentry *reiserfs_get_parent(struct dentry *child) 389struct dentry *reiserfs_get_parent(struct dentry *child)
376{ 390{
377 int retval; 391 int retval;
378 struct inode * inode = NULL; 392 struct inode *inode = NULL;
379 struct reiserfs_dir_entry de; 393 struct reiserfs_dir_entry de;
380 INITIALIZE_PATH (path_to_entry); 394 INITIALIZE_PATH(path_to_entry);
381 struct dentry *parent; 395 struct dentry *parent;
382 struct inode *dir = child->d_inode ; 396 struct inode *dir = child->d_inode;
383 397
384 398 if (dir->i_nlink == 0) {
385 if (dir->i_nlink == 0) { 399 return ERR_PTR(-ENOENT);
386 return ERR_PTR(-ENOENT); 400 }
387 } 401 de.de_gen_number_bit_string = NULL;
388 de.de_gen_number_bit_string = NULL; 402
389 403 reiserfs_write_lock(dir->i_sb);
390 reiserfs_write_lock(dir->i_sb); 404 retval = reiserfs_find_entry(dir, "..", 2, &path_to_entry, &de);
391 retval = reiserfs_find_entry (dir, "..", 2, &path_to_entry, &de); 405 pathrelse(&path_to_entry);
392 pathrelse (&path_to_entry); 406 if (retval != NAME_FOUND) {
393 if (retval != NAME_FOUND) { 407 reiserfs_write_unlock(dir->i_sb);
408 return ERR_PTR(-ENOENT);
409 }
410 inode = reiserfs_iget(dir->i_sb, (struct cpu_key *)&(de.de_dir_id));
394 reiserfs_write_unlock(dir->i_sb); 411 reiserfs_write_unlock(dir->i_sb);
395 return ERR_PTR(-ENOENT);
396 }
397 inode = reiserfs_iget (dir->i_sb, (struct cpu_key *)&(de.de_dir_id));
398 reiserfs_write_unlock(dir->i_sb);
399
400 if (!inode || IS_ERR(inode)) {
401 return ERR_PTR(-EACCES);
402 }
403 parent = d_alloc_anon(inode);
404 if (!parent) {
405 iput(inode);
406 parent = ERR_PTR(-ENOMEM);
407 }
408 return parent;
409}
410 412
413 if (!inode || IS_ERR(inode)) {
414 return ERR_PTR(-EACCES);
415 }
416 parent = d_alloc_anon(inode);
417 if (!parent) {
418 iput(inode);
419 parent = ERR_PTR(-ENOMEM);
420 }
421 return parent;
422}
411 423
412/* add entry to the directory (entry can be hidden). 424/* add entry to the directory (entry can be hidden).
413 425
@@ -415,132 +427,143 @@ insert definition of when hidden directories are used here -Hans
415 427
416 Does not mark dir inode dirty, do it after successesfull call to it */ 428 Does not mark dir inode dirty, do it after successesfull call to it */
417 429
418static int reiserfs_add_entry (struct reiserfs_transaction_handle *th, struct inode * dir, 430static int reiserfs_add_entry(struct reiserfs_transaction_handle *th,
419 const char * name, int namelen, struct inode * inode, 431 struct inode *dir, const char *name, int namelen,
420 int visible) 432 struct inode *inode, int visible)
421{ 433{
422 struct cpu_key entry_key; 434 struct cpu_key entry_key;
423 struct reiserfs_de_head * deh; 435 struct reiserfs_de_head *deh;
424 INITIALIZE_PATH (path); 436 INITIALIZE_PATH(path);
425 struct reiserfs_dir_entry de; 437 struct reiserfs_dir_entry de;
426 int bit_string [MAX_GENERATION_NUMBER / (sizeof(int) * 8) + 1]; 438 int bit_string[MAX_GENERATION_NUMBER / (sizeof(int) * 8) + 1];
427 int gen_number; 439 int gen_number;
428 char small_buf[32+DEH_SIZE] ; /* 48 bytes now and we avoid kmalloc 440 char small_buf[32 + DEH_SIZE]; /* 48 bytes now and we avoid kmalloc
429 if we create file with short name */ 441 if we create file with short name */
430 char * buffer; 442 char *buffer;
431 int buflen, paste_size; 443 int buflen, paste_size;
432 int retval; 444 int retval;
433 445
434 BUG_ON (!th->t_trans_id); 446 BUG_ON(!th->t_trans_id);
435 447
436 /* cannot allow items to be added into a busy deleted directory */ 448 /* cannot allow items to be added into a busy deleted directory */
437 if (!namelen) 449 if (!namelen)
438 return -EINVAL; 450 return -EINVAL;
439 451
440 if (namelen > REISERFS_MAX_NAME (dir->i_sb->s_blocksize)) 452 if (namelen > REISERFS_MAX_NAME(dir->i_sb->s_blocksize))
441 return -ENAMETOOLONG; 453 return -ENAMETOOLONG;
442 454
443 /* each entry has unique key. compose it */ 455 /* each entry has unique key. compose it */
444 make_cpu_key (&entry_key, dir, 456 make_cpu_key(&entry_key, dir,
445 get_third_component (dir->i_sb, name, namelen), TYPE_DIRENTRY, 3); 457 get_third_component(dir->i_sb, name, namelen),
446 458 TYPE_DIRENTRY, 3);
447 /* get memory for composing the entry */ 459
448 buflen = DEH_SIZE + ROUND_UP (namelen); 460 /* get memory for composing the entry */
449 if (buflen > sizeof (small_buf)) { 461 buflen = DEH_SIZE + ROUND_UP(namelen);
450 buffer = reiserfs_kmalloc (buflen, GFP_NOFS, dir->i_sb); 462 if (buflen > sizeof(small_buf)) {
451 if (buffer == 0) 463 buffer = reiserfs_kmalloc(buflen, GFP_NOFS, dir->i_sb);
452 return -ENOMEM; 464 if (buffer == 0)
453 } else 465 return -ENOMEM;
454 buffer = small_buf; 466 } else
455 467 buffer = small_buf;
456 paste_size = (get_inode_sd_version (dir) == STAT_DATA_V1) ? (DEH_SIZE + namelen) : buflen; 468
457 469 paste_size =
458 /* fill buffer : directory entry head, name[, dir objectid | , stat data | ,stat data, dir objectid ] */ 470 (get_inode_sd_version(dir) ==
459 deh = (struct reiserfs_de_head *)buffer; 471 STAT_DATA_V1) ? (DEH_SIZE + namelen) : buflen;
460 deh->deh_location = 0; /* JDM Endian safe if 0 */ 472
461 put_deh_offset( deh, cpu_key_k_offset( &entry_key ) ); 473 /* fill buffer : directory entry head, name[, dir objectid | , stat data | ,stat data, dir objectid ] */
462 deh->deh_state = 0; /* JDM Endian safe if 0 */ 474 deh = (struct reiserfs_de_head *)buffer;
463 /* put key (ino analog) to de */ 475 deh->deh_location = 0; /* JDM Endian safe if 0 */
464 deh->deh_dir_id = INODE_PKEY (inode)->k_dir_id; /* safe: k_dir_id is le */ 476 put_deh_offset(deh, cpu_key_k_offset(&entry_key));
465 deh->deh_objectid = INODE_PKEY (inode)->k_objectid; /* safe: k_objectid is le */ 477 deh->deh_state = 0; /* JDM Endian safe if 0 */
466 478 /* put key (ino analog) to de */
467 /* copy name */ 479 deh->deh_dir_id = INODE_PKEY(inode)->k_dir_id; /* safe: k_dir_id is le */
468 memcpy ((char *)(deh + 1), name, namelen); 480 deh->deh_objectid = INODE_PKEY(inode)->k_objectid; /* safe: k_objectid is le */
469 /* padd by 0s to the 4 byte boundary */ 481
470 padd_item ((char *)(deh + 1), ROUND_UP (namelen), namelen); 482 /* copy name */
471 483 memcpy((char *)(deh + 1), name, namelen);
472 /* entry is ready to be pasted into tree, set 'visibility' and 'stat data in entry' attributes */ 484 /* padd by 0s to the 4 byte boundary */
473 mark_de_without_sd (deh); 485 padd_item((char *)(deh + 1), ROUND_UP(namelen), namelen);
474 visible ? mark_de_visible (deh) : mark_de_hidden (deh); 486
475 487 /* entry is ready to be pasted into tree, set 'visibility' and 'stat data in entry' attributes */
476 /* find the proper place for the new entry */ 488 mark_de_without_sd(deh);
477 memset (bit_string, 0, sizeof (bit_string)); 489 visible ? mark_de_visible(deh) : mark_de_hidden(deh);
478 de.de_gen_number_bit_string = (char *)bit_string; 490
479 retval = reiserfs_find_entry (dir, name, namelen, &path, &de); 491 /* find the proper place for the new entry */
480 if( retval != NAME_NOT_FOUND ) { 492 memset(bit_string, 0, sizeof(bit_string));
481 if (buffer != small_buf) 493 de.de_gen_number_bit_string = (char *)bit_string;
482 reiserfs_kfree (buffer, buflen, dir->i_sb); 494 retval = reiserfs_find_entry(dir, name, namelen, &path, &de);
483 pathrelse (&path); 495 if (retval != NAME_NOT_FOUND) {
496 if (buffer != small_buf)
497 reiserfs_kfree(buffer, buflen, dir->i_sb);
498 pathrelse(&path);
499
500 if (retval == IO_ERROR) {
501 return -EIO;
502 }
503
504 if (retval != NAME_FOUND) {
505 reiserfs_warning(dir->i_sb,
506 "zam-7002:%s: \"reiserfs_find_entry\" "
507 "has returned unexpected value (%d)",
508 __FUNCTION__, retval);
509 }
510
511 return -EEXIST;
512 }
484 513
485 if ( retval == IO_ERROR ) { 514 gen_number =
486 return -EIO; 515 find_first_zero_bit((unsigned long *)bit_string,
516 MAX_GENERATION_NUMBER + 1);
517 if (gen_number > MAX_GENERATION_NUMBER) {
518 /* there is no free generation number */
519 reiserfs_warning(dir->i_sb,
520 "reiserfs_add_entry: Congratulations! we have got hash function screwed up");
521 if (buffer != small_buf)
522 reiserfs_kfree(buffer, buflen, dir->i_sb);
523 pathrelse(&path);
524 return -EBUSY;
525 }
526 /* adjust offset of directory enrty */
527 put_deh_offset(deh, SET_GENERATION_NUMBER(deh_offset(deh), gen_number));
528 set_cpu_key_k_offset(&entry_key, deh_offset(deh));
529
530 /* update max-hash-collisions counter in reiserfs_sb_info */
531 PROC_INFO_MAX(th->t_super, max_hash_collisions, gen_number);
532
533 if (gen_number != 0) { /* we need to re-search for the insertion point */
534 if (search_by_entry_key(dir->i_sb, &entry_key, &path, &de) !=
535 NAME_NOT_FOUND) {
536 reiserfs_warning(dir->i_sb,
537 "vs-7032: reiserfs_add_entry: "
538 "entry with this key (%K) already exists",
539 &entry_key);
540
541 if (buffer != small_buf)
542 reiserfs_kfree(buffer, buflen, dir->i_sb);
543 pathrelse(&path);
544 return -EBUSY;
545 }
487 } 546 }
488 547
489 if (retval != NAME_FOUND) { 548 /* perform the insertion of the entry that we have prepared */
490 reiserfs_warning (dir->i_sb, "zam-7002:%s: \"reiserfs_find_entry\" " 549 retval =
491 "has returned unexpected value (%d)", 550 reiserfs_paste_into_item(th, &path, &entry_key, dir, buffer,
492 __FUNCTION__, retval); 551 paste_size);
493 } 552 if (buffer != small_buf)
494 553 reiserfs_kfree(buffer, buflen, dir->i_sb);
495 return -EEXIST; 554 if (retval) {
496 } 555 reiserfs_check_path(&path);
497 556 return retval;
498 gen_number = find_first_zero_bit ((unsigned long *)bit_string, MAX_GENERATION_NUMBER + 1);
499 if (gen_number > MAX_GENERATION_NUMBER) {
500 /* there is no free generation number */
501 reiserfs_warning (dir->i_sb, "reiserfs_add_entry: Congratulations! we have got hash function screwed up");
502 if (buffer != small_buf)
503 reiserfs_kfree (buffer, buflen, dir->i_sb);
504 pathrelse (&path);
505 return -EBUSY;
506 }
507 /* adjust offset of directory enrty */
508 put_deh_offset(deh, SET_GENERATION_NUMBER(deh_offset(deh), gen_number));
509 set_cpu_key_k_offset (&entry_key, deh_offset(deh));
510
511 /* update max-hash-collisions counter in reiserfs_sb_info */
512 PROC_INFO_MAX( th -> t_super, max_hash_collisions, gen_number );
513
514 if (gen_number != 0) { /* we need to re-search for the insertion point */
515 if (search_by_entry_key (dir->i_sb, &entry_key, &path, &de) != NAME_NOT_FOUND) {
516 reiserfs_warning (dir->i_sb, "vs-7032: reiserfs_add_entry: "
517 "entry with this key (%K) already exists",
518 &entry_key);
519
520 if (buffer != small_buf)
521 reiserfs_kfree (buffer, buflen, dir->i_sb);
522 pathrelse (&path);
523 return -EBUSY;
524 } 557 }
525 }
526
527 /* perform the insertion of the entry that we have prepared */
528 retval = reiserfs_paste_into_item (th, &path, &entry_key, dir, buffer, paste_size);
529 if (buffer != small_buf)
530 reiserfs_kfree (buffer, buflen, dir->i_sb);
531 if (retval) {
532 reiserfs_check_path(&path) ;
533 return retval;
534 }
535 558
536 dir->i_size += paste_size; 559 dir->i_size += paste_size;
537 dir->i_mtime = dir->i_ctime = CURRENT_TIME_SEC; 560 dir->i_mtime = dir->i_ctime = CURRENT_TIME_SEC;
538 if (!S_ISDIR (inode->i_mode) && visible) 561 if (!S_ISDIR(inode->i_mode) && visible)
539 // reiserfs_mkdir or reiserfs_rename will do that by itself 562 // reiserfs_mkdir or reiserfs_rename will do that by itself
540 reiserfs_update_sd (th, dir); 563 reiserfs_update_sd(th, dir);
541 564
542 reiserfs_check_path(&path) ; 565 reiserfs_check_path(&path);
543 return 0; 566 return 0;
544} 567}
545 568
546/* quota utility function, call if you've had to abort after calling 569/* quota utility function, call if you've had to abort after calling
@@ -548,12 +571,13 @@ static int reiserfs_add_entry (struct reiserfs_transaction_handle *th, struct in
548** This should only be called on inodes that do not have stat data 571** This should only be called on inodes that do not have stat data
549** inserted into the tree yet. 572** inserted into the tree yet.
550*/ 573*/
551static int drop_new_inode(struct inode *inode) { 574static int drop_new_inode(struct inode *inode)
552 DQUOT_DROP(inode); 575{
553 make_bad_inode(inode) ; 576 DQUOT_DROP(inode);
554 inode->i_flags |= S_NOQUOTA; 577 make_bad_inode(inode);
555 iput(inode) ; 578 inode->i_flags |= S_NOQUOTA;
556 return 0 ; 579 iput(inode);
580 return 0;
557} 581}
558 582
559/* utility function that does setup for reiserfs_new_inode. 583/* utility function that does setup for reiserfs_new_inode.
@@ -561,905 +585,968 @@ static int drop_new_inode(struct inode *inode) {
561** outside of a transaction, so we had to pull some bits of 585** outside of a transaction, so we had to pull some bits of
562** reiserfs_new_inode out into this func. 586** reiserfs_new_inode out into this func.
563*/ 587*/
564static int new_inode_init(struct inode *inode, struct inode *dir, int mode) { 588static int new_inode_init(struct inode *inode, struct inode *dir, int mode)
565 589{
566 /* the quota init calls have to know who to charge the quota to, so 590
567 ** we have to set uid and gid here 591 /* the quota init calls have to know who to charge the quota to, so
568 */ 592 ** we have to set uid and gid here
569 inode->i_uid = current->fsuid; 593 */
570 inode->i_mode = mode; 594 inode->i_uid = current->fsuid;
571 595 inode->i_mode = mode;
572 if (dir->i_mode & S_ISGID) { 596
573 inode->i_gid = dir->i_gid; 597 if (dir->i_mode & S_ISGID) {
574 if (S_ISDIR(mode)) 598 inode->i_gid = dir->i_gid;
575 inode->i_mode |= S_ISGID; 599 if (S_ISDIR(mode))
576 } else { 600 inode->i_mode |= S_ISGID;
577 inode->i_gid = current->fsgid; 601 } else {
578 } 602 inode->i_gid = current->fsgid;
579 DQUOT_INIT(inode); 603 }
580 return 0 ; 604 DQUOT_INIT(inode);
605 return 0;
581} 606}
582 607
583static int reiserfs_create (struct inode * dir, struct dentry *dentry, int mode, 608static int reiserfs_create(struct inode *dir, struct dentry *dentry, int mode,
584 struct nameidata *nd) 609 struct nameidata *nd)
585{ 610{
586 int retval; 611 int retval;
587 struct inode * inode; 612 struct inode *inode;
588 /* We need blocks for transaction + (user+group)*(quotas for new inode + update of quota for directory owner) */ 613 /* We need blocks for transaction + (user+group)*(quotas for new inode + update of quota for directory owner) */
589 int jbegin_count = JOURNAL_PER_BALANCE_CNT * 2 + 2 * (REISERFS_QUOTA_INIT_BLOCKS(dir->i_sb)+REISERFS_QUOTA_TRANS_BLOCKS(dir->i_sb)); 614 int jbegin_count =
590 struct reiserfs_transaction_handle th ; 615 JOURNAL_PER_BALANCE_CNT * 2 +
591 int locked; 616 2 * (REISERFS_QUOTA_INIT_BLOCKS(dir->i_sb) +
592 617 REISERFS_QUOTA_TRANS_BLOCKS(dir->i_sb));
593 if (!(inode = new_inode(dir->i_sb))) { 618 struct reiserfs_transaction_handle th;
594 return -ENOMEM ; 619 int locked;
595 } 620
596 new_inode_init(inode, dir, mode); 621 if (!(inode = new_inode(dir->i_sb))) {
597 622 return -ENOMEM;
598 locked = reiserfs_cache_default_acl (dir); 623 }
599 624 new_inode_init(inode, dir, mode);
600 reiserfs_write_lock(dir->i_sb);
601
602 if (locked)
603 reiserfs_write_lock_xattrs (dir->i_sb);
604
605 retval = journal_begin(&th, dir->i_sb, jbegin_count);
606 if (retval) {
607 drop_new_inode (inode);
608 goto out_failed;
609 }
610
611 retval = reiserfs_new_inode (&th, dir, mode, NULL, 0/*i_size*/, dentry, inode);
612 if (retval)
613 goto out_failed;
614
615 if (locked) {
616 reiserfs_write_unlock_xattrs (dir->i_sb);
617 locked = 0;
618 }
619
620 inode->i_op = &reiserfs_file_inode_operations;
621 inode->i_fop = &reiserfs_file_operations;
622 inode->i_mapping->a_ops = &reiserfs_address_space_operations ;
623
624 retval = reiserfs_add_entry (&th, dir, dentry->d_name.name, dentry->d_name.len,
625 inode, 1/*visible*/);
626 if (retval) {
627 int err;
628 inode->i_nlink--;
629 reiserfs_update_sd (&th, inode);
630 err = journal_end(&th, dir->i_sb, jbegin_count) ;
631 if (err)
632 retval = err;
633 iput (inode);
634 goto out_failed;
635 }
636 reiserfs_update_inode_transaction(inode) ;
637 reiserfs_update_inode_transaction(dir) ;
638
639 d_instantiate(dentry, inode);
640 retval = journal_end(&th, dir->i_sb, jbegin_count) ;
641
642out_failed:
643 if (locked)
644 reiserfs_write_unlock_xattrs (dir->i_sb);
645 reiserfs_write_unlock(dir->i_sb);
646 return retval;
647}
648 625
626 locked = reiserfs_cache_default_acl(dir);
649 627
650static int reiserfs_mknod (struct inode * dir, struct dentry *dentry, int mode, dev_t rdev) 628 reiserfs_write_lock(dir->i_sb);
651{
652 int retval;
653 struct inode * inode;
654 struct reiserfs_transaction_handle th ;
655 /* We need blocks for transaction + (user+group)*(quotas for new inode + update of quota for directory owner) */
656 int jbegin_count = JOURNAL_PER_BALANCE_CNT * 3 + 2 * (REISERFS_QUOTA_INIT_BLOCKS(dir->i_sb)+REISERFS_QUOTA_TRANS_BLOCKS(dir->i_sb));
657 int locked;
658 629
659 if (!new_valid_dev(rdev)) 630 if (locked)
660 return -EINVAL; 631 reiserfs_write_lock_xattrs(dir->i_sb);
632
633 retval = journal_begin(&th, dir->i_sb, jbegin_count);
634 if (retval) {
635 drop_new_inode(inode);
636 goto out_failed;
637 }
638
639 retval =
640 reiserfs_new_inode(&th, dir, mode, NULL, 0 /*i_size */ , dentry,
641 inode);
642 if (retval)
643 goto out_failed;
644
645 if (locked) {
646 reiserfs_write_unlock_xattrs(dir->i_sb);
647 locked = 0;
648 }
649
650 inode->i_op = &reiserfs_file_inode_operations;
651 inode->i_fop = &reiserfs_file_operations;
652 inode->i_mapping->a_ops = &reiserfs_address_space_operations;
653
654 retval =
655 reiserfs_add_entry(&th, dir, dentry->d_name.name,
656 dentry->d_name.len, inode, 1 /*visible */ );
657 if (retval) {
658 int err;
659 inode->i_nlink--;
660 reiserfs_update_sd(&th, inode);
661 err = journal_end(&th, dir->i_sb, jbegin_count);
662 if (err)
663 retval = err;
664 iput(inode);
665 goto out_failed;
666 }
667 reiserfs_update_inode_transaction(inode);
668 reiserfs_update_inode_transaction(dir);
661 669
662 if (!(inode = new_inode(dir->i_sb))) { 670 d_instantiate(dentry, inode);
663 return -ENOMEM ; 671 retval = journal_end(&th, dir->i_sb, jbegin_count);
664 }
665 new_inode_init(inode, dir, mode);
666 672
667 locked = reiserfs_cache_default_acl (dir); 673 out_failed:
674 if (locked)
675 reiserfs_write_unlock_xattrs(dir->i_sb);
676 reiserfs_write_unlock(dir->i_sb);
677 return retval;
678}
668 679
669 reiserfs_write_lock(dir->i_sb); 680static int reiserfs_mknod(struct inode *dir, struct dentry *dentry, int mode,
681 dev_t rdev)
682{
683 int retval;
684 struct inode *inode;
685 struct reiserfs_transaction_handle th;
686 /* We need blocks for transaction + (user+group)*(quotas for new inode + update of quota for directory owner) */
687 int jbegin_count =
688 JOURNAL_PER_BALANCE_CNT * 3 +
689 2 * (REISERFS_QUOTA_INIT_BLOCKS(dir->i_sb) +
690 REISERFS_QUOTA_TRANS_BLOCKS(dir->i_sb));
691 int locked;
692
693 if (!new_valid_dev(rdev))
694 return -EINVAL;
695
696 if (!(inode = new_inode(dir->i_sb))) {
697 return -ENOMEM;
698 }
699 new_inode_init(inode, dir, mode);
670 700
671 if (locked) 701 locked = reiserfs_cache_default_acl(dir);
672 reiserfs_write_lock_xattrs (dir->i_sb);
673 702
674 retval = journal_begin(&th, dir->i_sb, jbegin_count) ; 703 reiserfs_write_lock(dir->i_sb);
675 if (retval) {
676 drop_new_inode (inode);
677 goto out_failed;
678 }
679 704
680 retval = reiserfs_new_inode (&th, dir, mode, NULL, 0/*i_size*/, dentry, inode); 705 if (locked)
681 if (retval) { 706 reiserfs_write_lock_xattrs(dir->i_sb);
682 goto out_failed;
683 }
684 707
685 if (locked) { 708 retval = journal_begin(&th, dir->i_sb, jbegin_count);
686 reiserfs_write_unlock_xattrs (dir->i_sb); 709 if (retval) {
687 locked = 0; 710 drop_new_inode(inode);
688 } 711 goto out_failed;
712 }
689 713
714 retval =
715 reiserfs_new_inode(&th, dir, mode, NULL, 0 /*i_size */ , dentry,
716 inode);
717 if (retval) {
718 goto out_failed;
719 }
690 720
691 inode->i_op = &reiserfs_special_inode_operations; 721 if (locked) {
692 init_special_inode(inode, inode->i_mode, rdev) ; 722 reiserfs_write_unlock_xattrs(dir->i_sb);
723 locked = 0;
724 }
693 725
694 //FIXME: needed for block and char devices only 726 inode->i_op = &reiserfs_special_inode_operations;
695 reiserfs_update_sd (&th, inode); 727 init_special_inode(inode, inode->i_mode, rdev);
728
729 //FIXME: needed for block and char devices only
730 reiserfs_update_sd(&th, inode);
731
732 reiserfs_update_inode_transaction(inode);
733 reiserfs_update_inode_transaction(dir);
734
735 retval =
736 reiserfs_add_entry(&th, dir, dentry->d_name.name,
737 dentry->d_name.len, inode, 1 /*visible */ );
738 if (retval) {
739 int err;
740 inode->i_nlink--;
741 reiserfs_update_sd(&th, inode);
742 err = journal_end(&th, dir->i_sb, jbegin_count);
743 if (err)
744 retval = err;
745 iput(inode);
746 goto out_failed;
747 }
696 748
697 reiserfs_update_inode_transaction(inode) ; 749 d_instantiate(dentry, inode);
698 reiserfs_update_inode_transaction(dir) ; 750 retval = journal_end(&th, dir->i_sb, jbegin_count);
699 751
700 retval = reiserfs_add_entry (&th, dir, dentry->d_name.name, dentry->d_name.len, 752 out_failed:
701 inode, 1/*visible*/); 753 if (locked)
702 if (retval) { 754 reiserfs_write_unlock_xattrs(dir->i_sb);
703 int err; 755 reiserfs_write_unlock(dir->i_sb);
704 inode->i_nlink--; 756 return retval;
705 reiserfs_update_sd (&th, inode);
706 err = journal_end(&th, dir->i_sb, jbegin_count) ;
707 if (err)
708 retval = err;
709 iput (inode);
710 goto out_failed;
711 }
712
713 d_instantiate(dentry, inode);
714 retval = journal_end(&th, dir->i_sb, jbegin_count) ;
715
716out_failed:
717 if (locked)
718 reiserfs_write_unlock_xattrs (dir->i_sb);
719 reiserfs_write_unlock(dir->i_sb);
720 return retval;
721} 757}
722 758
723 759static int reiserfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
724static int reiserfs_mkdir (struct inode * dir, struct dentry *dentry, int mode)
725{ 760{
726 int retval; 761 int retval;
727 struct inode * inode; 762 struct inode *inode;
728 struct reiserfs_transaction_handle th ; 763 struct reiserfs_transaction_handle th;
729 /* We need blocks for transaction + (user+group)*(quotas for new inode + update of quota for directory owner) */ 764 /* We need blocks for transaction + (user+group)*(quotas for new inode + update of quota for directory owner) */
730 int jbegin_count = JOURNAL_PER_BALANCE_CNT * 3 + 2 * (REISERFS_QUOTA_INIT_BLOCKS(dir->i_sb)+REISERFS_QUOTA_TRANS_BLOCKS(dir->i_sb)); 765 int jbegin_count =
731 int locked; 766 JOURNAL_PER_BALANCE_CNT * 3 +
767 2 * (REISERFS_QUOTA_INIT_BLOCKS(dir->i_sb) +
768 REISERFS_QUOTA_TRANS_BLOCKS(dir->i_sb));
769 int locked;
732 770
733#ifdef DISPLACE_NEW_PACKING_LOCALITIES 771#ifdef DISPLACE_NEW_PACKING_LOCALITIES
734 /* set flag that new packing locality created and new blocks for the content * of that directory are not displaced yet */ 772 /* set flag that new packing locality created and new blocks for the content * of that directory are not displaced yet */
735 REISERFS_I(dir)->new_packing_locality = 1; 773 REISERFS_I(dir)->new_packing_locality = 1;
736#endif 774#endif
737 mode = S_IFDIR | mode; 775 mode = S_IFDIR | mode;
738 if (!(inode = new_inode(dir->i_sb))) { 776 if (!(inode = new_inode(dir->i_sb))) {
739 return -ENOMEM ; 777 return -ENOMEM;
740 } 778 }
741 new_inode_init(inode, dir, mode); 779 new_inode_init(inode, dir, mode);
742 780
743 locked = reiserfs_cache_default_acl (dir); 781 locked = reiserfs_cache_default_acl(dir);
744 782
745 reiserfs_write_lock(dir->i_sb); 783 reiserfs_write_lock(dir->i_sb);
746 if (locked) 784 if (locked)
747 reiserfs_write_lock_xattrs (dir->i_sb); 785 reiserfs_write_lock_xattrs(dir->i_sb);
748 786
749 retval = journal_begin(&th, dir->i_sb, jbegin_count) ; 787 retval = journal_begin(&th, dir->i_sb, jbegin_count);
750 if (retval) { 788 if (retval) {
751 drop_new_inode (inode); 789 drop_new_inode(inode);
752 goto out_failed; 790 goto out_failed;
753 } 791 }
754
755
756 /* inc the link count now, so another writer doesn't overflow it while
757 ** we sleep later on.
758 */
759 INC_DIR_INODE_NLINK(dir)
760
761 retval = reiserfs_new_inode (&th, dir, mode, NULL/*symlink*/,
762 old_format_only (dir->i_sb) ?
763 EMPTY_DIR_SIZE_V1 : EMPTY_DIR_SIZE,
764 dentry, inode);
765 if (retval) {
766 dir->i_nlink-- ;
767 goto out_failed;
768 }
769
770 if (locked) {
771 reiserfs_write_unlock_xattrs (dir->i_sb);
772 locked = 0;
773 }
774
775 reiserfs_update_inode_transaction(inode) ;
776 reiserfs_update_inode_transaction(dir) ;
777
778 inode->i_op = &reiserfs_dir_inode_operations;
779 inode->i_fop = &reiserfs_dir_operations;
780
781 // note, _this_ add_entry will not update dir's stat data
782 retval = reiserfs_add_entry (&th, dir, dentry->d_name.name, dentry->d_name.len,
783 inode, 1/*visible*/);
784 if (retval) {
785 int err;
786 inode->i_nlink = 0;
787 DEC_DIR_INODE_NLINK(dir);
788 reiserfs_update_sd (&th, inode);
789 err = journal_end(&th, dir->i_sb, jbegin_count) ;
790 if (err)
791 retval = err;
792 iput (inode);
793 goto out_failed;
794 }
795
796 // the above add_entry did not update dir's stat data
797 reiserfs_update_sd (&th, dir);
798
799 d_instantiate(dentry, inode);
800 retval = journal_end(&th, dir->i_sb, jbegin_count) ;
801out_failed:
802 if (locked)
803 reiserfs_write_unlock_xattrs (dir->i_sb);
804 reiserfs_write_unlock(dir->i_sb);
805 return retval;
806}
807 792
808static inline int reiserfs_empty_dir(struct inode *inode) { 793 /* inc the link count now, so another writer doesn't overflow it while
809 /* we can cheat because an old format dir cannot have 794 ** we sleep later on.
810 ** EMPTY_DIR_SIZE, and a new format dir cannot have 795 */
811 ** EMPTY_DIR_SIZE_V1. So, if the inode is either size, 796 INC_DIR_INODE_NLINK(dir)
812 ** regardless of disk format version, the directory is empty. 797
813 */ 798 retval = reiserfs_new_inode(&th, dir, mode, NULL /*symlink */ ,
814 if (inode->i_size != EMPTY_DIR_SIZE && 799 old_format_only(dir->i_sb) ?
815 inode->i_size != EMPTY_DIR_SIZE_V1) { 800 EMPTY_DIR_SIZE_V1 : EMPTY_DIR_SIZE,
816 return 0 ; 801 dentry, inode);
817 } 802 if (retval) {
818 return 1 ; 803 dir->i_nlink--;
804 goto out_failed;
805 }
806
807 if (locked) {
808 reiserfs_write_unlock_xattrs(dir->i_sb);
809 locked = 0;
810 }
811
812 reiserfs_update_inode_transaction(inode);
813 reiserfs_update_inode_transaction(dir);
814
815 inode->i_op = &reiserfs_dir_inode_operations;
816 inode->i_fop = &reiserfs_dir_operations;
817
818 // note, _this_ add_entry will not update dir's stat data
819 retval =
820 reiserfs_add_entry(&th, dir, dentry->d_name.name,
821 dentry->d_name.len, inode, 1 /*visible */ );
822 if (retval) {
823 int err;
824 inode->i_nlink = 0;
825 DEC_DIR_INODE_NLINK(dir);
826 reiserfs_update_sd(&th, inode);
827 err = journal_end(&th, dir->i_sb, jbegin_count);
828 if (err)
829 retval = err;
830 iput(inode);
831 goto out_failed;
832 }
833 // the above add_entry did not update dir's stat data
834 reiserfs_update_sd(&th, dir);
835
836 d_instantiate(dentry, inode);
837 retval = journal_end(&th, dir->i_sb, jbegin_count);
838 out_failed:
839 if (locked)
840 reiserfs_write_unlock_xattrs(dir->i_sb);
841 reiserfs_write_unlock(dir->i_sb);
842 return retval;
819} 843}
820 844
821static int reiserfs_rmdir (struct inode * dir, struct dentry *dentry) 845static inline int reiserfs_empty_dir(struct inode *inode)
822{ 846{
823 int retval, err; 847 /* we can cheat because an old format dir cannot have
824 struct inode * inode; 848 ** EMPTY_DIR_SIZE, and a new format dir cannot have
825 struct reiserfs_transaction_handle th ; 849 ** EMPTY_DIR_SIZE_V1. So, if the inode is either size,
826 int jbegin_count; 850 ** regardless of disk format version, the directory is empty.
827 INITIALIZE_PATH (path); 851 */
828 struct reiserfs_dir_entry de; 852 if (inode->i_size != EMPTY_DIR_SIZE &&
829 853 inode->i_size != EMPTY_DIR_SIZE_V1) {
830 854 return 0;
831 /* we will be doing 2 balancings and update 2 stat data, we change quotas 855 }
832 * of the owner of the directory and of the owner of the parent directory. 856 return 1;
833 * The quota structure is possibly deleted only on last iput => outside
834 * of this transaction */
835 jbegin_count = JOURNAL_PER_BALANCE_CNT * 2 + 2 + 4 * REISERFS_QUOTA_TRANS_BLOCKS(dir->i_sb);
836
837 reiserfs_write_lock(dir->i_sb);
838 retval = journal_begin(&th, dir->i_sb, jbegin_count) ;
839 if (retval)
840 goto out_rmdir;
841
842 de.de_gen_number_bit_string = NULL;
843 if ( (retval = reiserfs_find_entry (dir, dentry->d_name.name, dentry->d_name.len, &path, &de)) == NAME_NOT_FOUND) {
844 retval = -ENOENT;
845 goto end_rmdir;
846 } else if ( retval == IO_ERROR) {
847 retval = -EIO;
848 goto end_rmdir;
849 }
850
851 inode = dentry->d_inode;
852
853 reiserfs_update_inode_transaction(inode) ;
854 reiserfs_update_inode_transaction(dir) ;
855
856 if (de.de_objectid != inode->i_ino) {
857 // FIXME: compare key of an object and a key found in the
858 // entry
859 retval = -EIO;
860 goto end_rmdir;
861 }
862 if (!reiserfs_empty_dir(inode)) {
863 retval = -ENOTEMPTY;
864 goto end_rmdir;
865 }
866
867 /* cut entry from dir directory */
868 retval = reiserfs_cut_from_item (&th, &path, &(de.de_entry_key), dir,
869 NULL, /* page */
870 0/*new file size - not used here*/);
871 if (retval < 0)
872 goto end_rmdir;
873
874 if ( inode->i_nlink != 2 && inode->i_nlink != 1 )
875 reiserfs_warning (inode->i_sb, "%s: empty directory has nlink "
876 "!= 2 (%d)", __FUNCTION__, inode->i_nlink);
877
878 inode->i_nlink = 0;
879 inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME_SEC;
880 reiserfs_update_sd (&th, inode);
881
882 DEC_DIR_INODE_NLINK(dir)
883 dir->i_size -= (DEH_SIZE + de.de_entrylen);
884 reiserfs_update_sd (&th, dir);
885
886 /* prevent empty directory from getting lost */
887 add_save_link (&th, inode, 0/* not truncate */);
888
889 retval = journal_end(&th, dir->i_sb, jbegin_count) ;
890 reiserfs_check_path(&path) ;
891out_rmdir:
892 reiserfs_write_unlock(dir->i_sb);
893 return retval;
894
895 end_rmdir:
896 /* we must release path, because we did not call
897 reiserfs_cut_from_item, or reiserfs_cut_from_item does not
898 release path if operation was not complete */
899 pathrelse (&path);
900 err = journal_end(&th, dir->i_sb, jbegin_count) ;
901 reiserfs_write_unlock(dir->i_sb);
902 return err ? err : retval;
903} 857}
904 858
905static int reiserfs_unlink (struct inode * dir, struct dentry *dentry) 859static int reiserfs_rmdir(struct inode *dir, struct dentry *dentry)
906{ 860{
907 int retval, err; 861 int retval, err;
908 struct inode * inode; 862 struct inode *inode;
909 struct reiserfs_dir_entry de; 863 struct reiserfs_transaction_handle th;
910 INITIALIZE_PATH (path); 864 int jbegin_count;
911 struct reiserfs_transaction_handle th ; 865 INITIALIZE_PATH(path);
912 int jbegin_count; 866 struct reiserfs_dir_entry de;
913 unsigned long savelink; 867
914 868 /* we will be doing 2 balancings and update 2 stat data, we change quotas
915 inode = dentry->d_inode; 869 * of the owner of the directory and of the owner of the parent directory.
916 870 * The quota structure is possibly deleted only on last iput => outside
917 /* in this transaction we can be doing at max two balancings and update 871 * of this transaction */
918 * two stat datas, we change quotas of the owner of the directory and of 872 jbegin_count =
919 * the owner of the parent directory. The quota structure is possibly 873 JOURNAL_PER_BALANCE_CNT * 2 + 2 +
920 * deleted only on iput => outside of this transaction */ 874 4 * REISERFS_QUOTA_TRANS_BLOCKS(dir->i_sb);
921 jbegin_count = JOURNAL_PER_BALANCE_CNT * 2 + 2 + 4 * REISERFS_QUOTA_TRANS_BLOCKS(dir->i_sb); 875
922 876 reiserfs_write_lock(dir->i_sb);
923 reiserfs_write_lock(dir->i_sb); 877 retval = journal_begin(&th, dir->i_sb, jbegin_count);
924 retval = journal_begin(&th, dir->i_sb, jbegin_count) ; 878 if (retval)
925 if (retval) 879 goto out_rmdir;
926 goto out_unlink; 880
927 881 de.de_gen_number_bit_string = NULL;
928 de.de_gen_number_bit_string = NULL; 882 if ((retval =
929 if ( (retval = reiserfs_find_entry (dir, dentry->d_name.name, dentry->d_name.len, &path, &de)) == NAME_NOT_FOUND) { 883 reiserfs_find_entry(dir, dentry->d_name.name, dentry->d_name.len,
930 retval = -ENOENT; 884 &path, &de)) == NAME_NOT_FOUND) {
931 goto end_unlink; 885 retval = -ENOENT;
932 } else if (retval == IO_ERROR) { 886 goto end_rmdir;
933 retval = -EIO; 887 } else if (retval == IO_ERROR) {
934 goto end_unlink; 888 retval = -EIO;
935 } 889 goto end_rmdir;
936 890 }
937 reiserfs_update_inode_transaction(inode) ; 891
938 reiserfs_update_inode_transaction(dir) ; 892 inode = dentry->d_inode;
939 893
940 if (de.de_objectid != inode->i_ino) { 894 reiserfs_update_inode_transaction(inode);
941 // FIXME: compare key of an object and a key found in the 895 reiserfs_update_inode_transaction(dir);
942 // entry 896
943 retval = -EIO; 897 if (de.de_objectid != inode->i_ino) {
944 goto end_unlink; 898 // FIXME: compare key of an object and a key found in the
945 } 899 // entry
946 900 retval = -EIO;
947 if (!inode->i_nlink) { 901 goto end_rmdir;
948 reiserfs_warning (inode->i_sb, "%s: deleting nonexistent file " 902 }
949 "(%s:%lu), %d", __FUNCTION__, 903 if (!reiserfs_empty_dir(inode)) {
950 reiserfs_bdevname (inode->i_sb), inode->i_ino, 904 retval = -ENOTEMPTY;
951 inode->i_nlink); 905 goto end_rmdir;
952 inode->i_nlink = 1; 906 }
953 } 907
954 908 /* cut entry from dir directory */
955 inode->i_nlink--; 909 retval = reiserfs_cut_from_item(&th, &path, &(de.de_entry_key), dir, NULL, /* page */
956 910 0 /*new file size - not used here */ );
957 /* 911 if (retval < 0)
958 * we schedule before doing the add_save_link call, save the link 912 goto end_rmdir;
959 * count so we don't race 913
960 */ 914 if (inode->i_nlink != 2 && inode->i_nlink != 1)
961 savelink = inode->i_nlink; 915 reiserfs_warning(inode->i_sb, "%s: empty directory has nlink "
962 916 "!= 2 (%d)", __FUNCTION__, inode->i_nlink);
963 917
964 retval = reiserfs_cut_from_item (&th, &path, &(de.de_entry_key), dir, NULL, 0); 918 inode->i_nlink = 0;
965 if (retval < 0) { 919 inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME_SEC;
966 inode->i_nlink++; 920 reiserfs_update_sd(&th, inode);
967 goto end_unlink; 921
968 } 922 DEC_DIR_INODE_NLINK(dir)
969 inode->i_ctime = CURRENT_TIME_SEC; 923 dir->i_size -= (DEH_SIZE + de.de_entrylen);
970 reiserfs_update_sd (&th, inode); 924 reiserfs_update_sd(&th, dir);
971 925
972 dir->i_size -= (de.de_entrylen + DEH_SIZE); 926 /* prevent empty directory from getting lost */
973 dir->i_ctime = dir->i_mtime = CURRENT_TIME_SEC; 927 add_save_link(&th, inode, 0 /* not truncate */ );
974 reiserfs_update_sd (&th, dir); 928
975 929 retval = journal_end(&th, dir->i_sb, jbegin_count);
976 if (!savelink) 930 reiserfs_check_path(&path);
977 /* prevent file from getting lost */ 931 out_rmdir:
978 add_save_link (&th, inode, 0/* not truncate */); 932 reiserfs_write_unlock(dir->i_sb);
979 933 return retval;
980 retval = journal_end(&th, dir->i_sb, jbegin_count) ; 934
981 reiserfs_check_path(&path) ; 935 end_rmdir:
982 reiserfs_write_unlock(dir->i_sb); 936 /* we must release path, because we did not call
983 return retval; 937 reiserfs_cut_from_item, or reiserfs_cut_from_item does not
984 938 release path if operation was not complete */
985 end_unlink: 939 pathrelse(&path);
986 pathrelse (&path); 940 err = journal_end(&th, dir->i_sb, jbegin_count);
987 err = journal_end(&th, dir->i_sb, jbegin_count) ; 941 reiserfs_write_unlock(dir->i_sb);
988 reiserfs_check_path(&path) ; 942 return err ? err : retval;
989 if (err)
990 retval = err;
991out_unlink:
992 reiserfs_write_unlock(dir->i_sb);
993 return retval;
994} 943}
995 944
996static int reiserfs_symlink (struct inode * parent_dir, 945static int reiserfs_unlink(struct inode *dir, struct dentry *dentry)
997 struct dentry * dentry, const char * symname)
998{ 946{
999 int retval; 947 int retval, err;
1000 struct inode * inode; 948 struct inode *inode;
1001 char * name; 949 struct reiserfs_dir_entry de;
1002 int item_len; 950 INITIALIZE_PATH(path);
1003 struct reiserfs_transaction_handle th ; 951 struct reiserfs_transaction_handle th;
1004 int mode = S_IFLNK | S_IRWXUGO; 952 int jbegin_count;
1005 /* We need blocks for transaction + (user+group)*(quotas for new inode + update of quota for directory owner) */ 953 unsigned long savelink;
1006 int jbegin_count = JOURNAL_PER_BALANCE_CNT * 3 + 2 * (REISERFS_QUOTA_INIT_BLOCKS(parent_dir->i_sb)+REISERFS_QUOTA_TRANS_BLOCKS(parent_dir->i_sb)); 954
1007 955 inode = dentry->d_inode;
1008 if (!(inode = new_inode(parent_dir->i_sb))) { 956
1009 return -ENOMEM ; 957 /* in this transaction we can be doing at max two balancings and update
1010 } 958 * two stat datas, we change quotas of the owner of the directory and of
1011 new_inode_init(inode, parent_dir, mode); 959 * the owner of the parent directory. The quota structure is possibly
1012 960 * deleted only on iput => outside of this transaction */
1013 reiserfs_write_lock(parent_dir->i_sb); 961 jbegin_count =
1014 item_len = ROUND_UP (strlen (symname)); 962 JOURNAL_PER_BALANCE_CNT * 2 + 2 +
1015 if (item_len > MAX_DIRECT_ITEM_LEN (parent_dir->i_sb->s_blocksize)) { 963 4 * REISERFS_QUOTA_TRANS_BLOCKS(dir->i_sb);
1016 retval = -ENAMETOOLONG; 964
1017 drop_new_inode(inode); 965 reiserfs_write_lock(dir->i_sb);
1018 goto out_failed; 966 retval = journal_begin(&th, dir->i_sb, jbegin_count);
1019 } 967 if (retval)
1020 968 goto out_unlink;
1021 name = reiserfs_kmalloc (item_len, GFP_NOFS, parent_dir->i_sb); 969
1022 if (!name) { 970 de.de_gen_number_bit_string = NULL;
1023 drop_new_inode(inode); 971 if ((retval =
1024 retval = -ENOMEM; 972 reiserfs_find_entry(dir, dentry->d_name.name, dentry->d_name.len,
1025 goto out_failed; 973 &path, &de)) == NAME_NOT_FOUND) {
1026 } 974 retval = -ENOENT;
1027 memcpy (name, symname, strlen (symname)); 975 goto end_unlink;
1028 padd_item (name, item_len, strlen (symname)); 976 } else if (retval == IO_ERROR) {
1029 977 retval = -EIO;
1030 /* We would inherit the default ACL here, but symlinks don't get ACLs */ 978 goto end_unlink;
1031 979 }
1032 retval = journal_begin(&th, parent_dir->i_sb, jbegin_count) ; 980
1033 if (retval) { 981 reiserfs_update_inode_transaction(inode);
1034 drop_new_inode (inode); 982 reiserfs_update_inode_transaction(dir);
1035 reiserfs_kfree (name, item_len, parent_dir->i_sb); 983
1036 goto out_failed; 984 if (de.de_objectid != inode->i_ino) {
1037 } 985 // FIXME: compare key of an object and a key found in the
1038 986 // entry
1039 retval = reiserfs_new_inode (&th, parent_dir, mode, name, strlen (symname), 987 retval = -EIO;
1040 dentry, inode); 988 goto end_unlink;
1041 reiserfs_kfree (name, item_len, parent_dir->i_sb); 989 }
1042 if (retval) { /* reiserfs_new_inode iputs for us */ 990
1043 goto out_failed; 991 if (!inode->i_nlink) {
1044 } 992 reiserfs_warning(inode->i_sb, "%s: deleting nonexistent file "
1045 993 "(%s:%lu), %d", __FUNCTION__,
1046 reiserfs_update_inode_transaction(inode) ; 994 reiserfs_bdevname(inode->i_sb), inode->i_ino,
1047 reiserfs_update_inode_transaction(parent_dir) ; 995 inode->i_nlink);
1048 996 inode->i_nlink = 1;
1049 inode->i_op = &reiserfs_symlink_inode_operations; 997 }
1050 inode->i_mapping->a_ops = &reiserfs_address_space_operations; 998
1051
1052 // must be sure this inode is written with this transaction
1053 //
1054 //reiserfs_update_sd (&th, inode, READ_BLOCKS);
1055
1056 retval = reiserfs_add_entry (&th, parent_dir, dentry->d_name.name,
1057 dentry->d_name.len, inode, 1/*visible*/);
1058 if (retval) {
1059 int err;
1060 inode->i_nlink--; 999 inode->i_nlink--;
1061 reiserfs_update_sd (&th, inode); 1000
1062 err = journal_end(&th, parent_dir->i_sb, jbegin_count) ; 1001 /*
1002 * we schedule before doing the add_save_link call, save the link
1003 * count so we don't race
1004 */
1005 savelink = inode->i_nlink;
1006
1007 retval =
1008 reiserfs_cut_from_item(&th, &path, &(de.de_entry_key), dir, NULL,
1009 0);
1010 if (retval < 0) {
1011 inode->i_nlink++;
1012 goto end_unlink;
1013 }
1014 inode->i_ctime = CURRENT_TIME_SEC;
1015 reiserfs_update_sd(&th, inode);
1016
1017 dir->i_size -= (de.de_entrylen + DEH_SIZE);
1018 dir->i_ctime = dir->i_mtime = CURRENT_TIME_SEC;
1019 reiserfs_update_sd(&th, dir);
1020
1021 if (!savelink)
1022 /* prevent file from getting lost */
1023 add_save_link(&th, inode, 0 /* not truncate */ );
1024
1025 retval = journal_end(&th, dir->i_sb, jbegin_count);
1026 reiserfs_check_path(&path);
1027 reiserfs_write_unlock(dir->i_sb);
1028 return retval;
1029
1030 end_unlink:
1031 pathrelse(&path);
1032 err = journal_end(&th, dir->i_sb, jbegin_count);
1033 reiserfs_check_path(&path);
1063 if (err) 1034 if (err)
1064 retval = err; 1035 retval = err;
1065 iput (inode); 1036 out_unlink:
1066 goto out_failed; 1037 reiserfs_write_unlock(dir->i_sb);
1067 } 1038 return retval;
1068
1069 d_instantiate(dentry, inode);
1070 retval = journal_end(&th, parent_dir->i_sb, jbegin_count) ;
1071out_failed:
1072 reiserfs_write_unlock(parent_dir->i_sb);
1073 return retval;
1074} 1039}
1075 1040
1076static int reiserfs_link (struct dentry * old_dentry, struct inode * dir, struct dentry * dentry) 1041static int reiserfs_symlink(struct inode *parent_dir,
1042 struct dentry *dentry, const char *symname)
1077{ 1043{
1078 int retval; 1044 int retval;
1079 struct inode *inode = old_dentry->d_inode; 1045 struct inode *inode;
1080 struct reiserfs_transaction_handle th ; 1046 char *name;
1081 /* We need blocks for transaction + update of quotas for the owners of the directory */ 1047 int item_len;
1082 int jbegin_count = JOURNAL_PER_BALANCE_CNT * 3 + 2 * REISERFS_QUOTA_TRANS_BLOCKS(dir->i_sb); 1048 struct reiserfs_transaction_handle th;
1083 1049 int mode = S_IFLNK | S_IRWXUGO;
1084 reiserfs_write_lock(dir->i_sb); 1050 /* We need blocks for transaction + (user+group)*(quotas for new inode + update of quota for directory owner) */
1085 if (inode->i_nlink >= REISERFS_LINK_MAX) { 1051 int jbegin_count =
1086 //FIXME: sd_nlink is 32 bit for new files 1052 JOURNAL_PER_BALANCE_CNT * 3 +
1087 reiserfs_write_unlock(dir->i_sb); 1053 2 * (REISERFS_QUOTA_INIT_BLOCKS(parent_dir->i_sb) +
1088 return -EMLINK; 1054 REISERFS_QUOTA_TRANS_BLOCKS(parent_dir->i_sb));
1089 } 1055
1090 if (inode->i_nlink == 0) { 1056 if (!(inode = new_inode(parent_dir->i_sb))) {
1091 reiserfs_write_unlock(dir->i_sb); 1057 return -ENOMEM;
1092 return -ENOENT; 1058 }
1093 } 1059 new_inode_init(inode, parent_dir, mode);
1094 1060
1095 /* inc before scheduling so reiserfs_unlink knows we are here */ 1061 reiserfs_write_lock(parent_dir->i_sb);
1096 inode->i_nlink++; 1062 item_len = ROUND_UP(strlen(symname));
1097 1063 if (item_len > MAX_DIRECT_ITEM_LEN(parent_dir->i_sb->s_blocksize)) {
1098 retval = journal_begin(&th, dir->i_sb, jbegin_count) ; 1064 retval = -ENAMETOOLONG;
1099 if (retval) { 1065 drop_new_inode(inode);
1100 inode->i_nlink--; 1066 goto out_failed;
1101 reiserfs_write_unlock (dir->i_sb); 1067 }
1102 return retval; 1068
1103 } 1069 name = reiserfs_kmalloc(item_len, GFP_NOFS, parent_dir->i_sb);
1104 1070 if (!name) {
1105 /* create new entry */ 1071 drop_new_inode(inode);
1106 retval = reiserfs_add_entry (&th, dir, dentry->d_name.name, dentry->d_name.len, 1072 retval = -ENOMEM;
1107 inode, 1/*visible*/); 1073 goto out_failed;
1108 1074 }
1109 reiserfs_update_inode_transaction(inode) ; 1075 memcpy(name, symname, strlen(symname));
1110 reiserfs_update_inode_transaction(dir) ; 1076 padd_item(name, item_len, strlen(symname));
1111 1077
1112 if (retval) { 1078 /* We would inherit the default ACL here, but symlinks don't get ACLs */
1113 int err; 1079
1114 inode->i_nlink--; 1080 retval = journal_begin(&th, parent_dir->i_sb, jbegin_count);
1115 err = journal_end(&th, dir->i_sb, jbegin_count) ; 1081 if (retval) {
1116 reiserfs_write_unlock(dir->i_sb); 1082 drop_new_inode(inode);
1117 return err ? err : retval; 1083 reiserfs_kfree(name, item_len, parent_dir->i_sb);
1118 } 1084 goto out_failed;
1085 }
1086
1087 retval =
1088 reiserfs_new_inode(&th, parent_dir, mode, name, strlen(symname),
1089 dentry, inode);
1090 reiserfs_kfree(name, item_len, parent_dir->i_sb);
1091 if (retval) { /* reiserfs_new_inode iputs for us */
1092 goto out_failed;
1093 }
1119 1094
1120 inode->i_ctime = CURRENT_TIME_SEC; 1095 reiserfs_update_inode_transaction(inode);
1121 reiserfs_update_sd (&th, inode); 1096 reiserfs_update_inode_transaction(parent_dir);
1097
1098 inode->i_op = &reiserfs_symlink_inode_operations;
1099 inode->i_mapping->a_ops = &reiserfs_address_space_operations;
1100
1101 // must be sure this inode is written with this transaction
1102 //
1103 //reiserfs_update_sd (&th, inode, READ_BLOCKS);
1104
1105 retval = reiserfs_add_entry(&th, parent_dir, dentry->d_name.name,
1106 dentry->d_name.len, inode, 1 /*visible */ );
1107 if (retval) {
1108 int err;
1109 inode->i_nlink--;
1110 reiserfs_update_sd(&th, inode);
1111 err = journal_end(&th, parent_dir->i_sb, jbegin_count);
1112 if (err)
1113 retval = err;
1114 iput(inode);
1115 goto out_failed;
1116 }
1122 1117
1123 atomic_inc(&inode->i_count) ; 1118 d_instantiate(dentry, inode);
1124 d_instantiate(dentry, inode); 1119 retval = journal_end(&th, parent_dir->i_sb, jbegin_count);
1125 retval = journal_end(&th, dir->i_sb, jbegin_count) ; 1120 out_failed:
1126 reiserfs_write_unlock(dir->i_sb); 1121 reiserfs_write_unlock(parent_dir->i_sb);
1127 return retval; 1122 return retval;
1128} 1123}
1129 1124
1125static int reiserfs_link(struct dentry *old_dentry, struct inode *dir,
1126 struct dentry *dentry)
1127{
1128 int retval;
1129 struct inode *inode = old_dentry->d_inode;
1130 struct reiserfs_transaction_handle th;
1131 /* We need blocks for transaction + update of quotas for the owners of the directory */
1132 int jbegin_count =
1133 JOURNAL_PER_BALANCE_CNT * 3 +
1134 2 * REISERFS_QUOTA_TRANS_BLOCKS(dir->i_sb);
1135
1136 reiserfs_write_lock(dir->i_sb);
1137 if (inode->i_nlink >= REISERFS_LINK_MAX) {
1138 //FIXME: sd_nlink is 32 bit for new files
1139 reiserfs_write_unlock(dir->i_sb);
1140 return -EMLINK;
1141 }
1142 if (inode->i_nlink == 0) {
1143 reiserfs_write_unlock(dir->i_sb);
1144 return -ENOENT;
1145 }
1146
1147 /* inc before scheduling so reiserfs_unlink knows we are here */
1148 inode->i_nlink++;
1149
1150 retval = journal_begin(&th, dir->i_sb, jbegin_count);
1151 if (retval) {
1152 inode->i_nlink--;
1153 reiserfs_write_unlock(dir->i_sb);
1154 return retval;
1155 }
1156
1157 /* create new entry */
1158 retval =
1159 reiserfs_add_entry(&th, dir, dentry->d_name.name,
1160 dentry->d_name.len, inode, 1 /*visible */ );
1161
1162 reiserfs_update_inode_transaction(inode);
1163 reiserfs_update_inode_transaction(dir);
1164
1165 if (retval) {
1166 int err;
1167 inode->i_nlink--;
1168 err = journal_end(&th, dir->i_sb, jbegin_count);
1169 reiserfs_write_unlock(dir->i_sb);
1170 return err ? err : retval;
1171 }
1172
1173 inode->i_ctime = CURRENT_TIME_SEC;
1174 reiserfs_update_sd(&th, inode);
1175
1176 atomic_inc(&inode->i_count);
1177 d_instantiate(dentry, inode);
1178 retval = journal_end(&th, dir->i_sb, jbegin_count);
1179 reiserfs_write_unlock(dir->i_sb);
1180 return retval;
1181}
1130 1182
1131// de contains information pointing to an entry which 1183// de contains information pointing to an entry which
1132static int de_still_valid (const char * name, int len, struct reiserfs_dir_entry * de) 1184static int de_still_valid(const char *name, int len,
1185 struct reiserfs_dir_entry *de)
1133{ 1186{
1134 struct reiserfs_dir_entry tmp = *de; 1187 struct reiserfs_dir_entry tmp = *de;
1135 1188
1136 // recalculate pointer to name and name length 1189 // recalculate pointer to name and name length
1137 set_de_name_and_namelen (&tmp); 1190 set_de_name_and_namelen(&tmp);
1138 // FIXME: could check more 1191 // FIXME: could check more
1139 if (tmp.de_namelen != len || memcmp (name, de->de_name, len)) 1192 if (tmp.de_namelen != len || memcmp(name, de->de_name, len))
1140 return 0; 1193 return 0;
1141 return 1; 1194 return 1;
1142} 1195}
1143 1196
1144 1197static int entry_points_to_object(const char *name, int len,
1145static int entry_points_to_object (const char * name, int len, struct reiserfs_dir_entry * de, struct inode * inode) 1198 struct reiserfs_dir_entry *de,
1199 struct inode *inode)
1146{ 1200{
1147 if (!de_still_valid (name, len, de)) 1201 if (!de_still_valid(name, len, de))
1148 return 0; 1202 return 0;
1149 1203
1150 if (inode) { 1204 if (inode) {
1151 if (!de_visible (de->de_deh + de->de_entry_num)) 1205 if (!de_visible(de->de_deh + de->de_entry_num))
1152 reiserfs_panic (NULL, "vs-7042: entry_points_to_object: entry must be visible"); 1206 reiserfs_panic(NULL,
1153 return (de->de_objectid == inode->i_ino) ? 1 : 0; 1207 "vs-7042: entry_points_to_object: entry must be visible");
1154 } 1208 return (de->de_objectid == inode->i_ino) ? 1 : 0;
1209 }
1155 1210
1156 /* this must be added hidden entry */ 1211 /* this must be added hidden entry */
1157 if (de_visible (de->de_deh + de->de_entry_num)) 1212 if (de_visible(de->de_deh + de->de_entry_num))
1158 reiserfs_panic (NULL, "vs-7043: entry_points_to_object: entry must be visible"); 1213 reiserfs_panic(NULL,
1214 "vs-7043: entry_points_to_object: entry must be visible");
1159 1215
1160 return 1; 1216 return 1;
1161} 1217}
1162 1218
1163
1164/* sets key of objectid the entry has to point to */ 1219/* sets key of objectid the entry has to point to */
1165static void set_ino_in_dir_entry (struct reiserfs_dir_entry * de, struct reiserfs_key * key) 1220static void set_ino_in_dir_entry(struct reiserfs_dir_entry *de,
1221 struct reiserfs_key *key)
1166{ 1222{
1167 /* JDM These operations are endian safe - both are le */ 1223 /* JDM These operations are endian safe - both are le */
1168 de->de_deh[de->de_entry_num].deh_dir_id = key->k_dir_id; 1224 de->de_deh[de->de_entry_num].deh_dir_id = key->k_dir_id;
1169 de->de_deh[de->de_entry_num].deh_objectid = key->k_objectid; 1225 de->de_deh[de->de_entry_num].deh_objectid = key->k_objectid;
1170} 1226}
1171 1227
1172
1173/* 1228/*
1174 * process, that is going to call fix_nodes/do_balance must hold only 1229 * process, that is going to call fix_nodes/do_balance must hold only
1175 * one path. If it holds 2 or more, it can get into endless waiting in 1230 * one path. If it holds 2 or more, it can get into endless waiting in
1176 * get_empty_nodes or its clones 1231 * get_empty_nodes or its clones
1177 */ 1232 */
1178static int reiserfs_rename (struct inode * old_dir, struct dentry *old_dentry, 1233static int reiserfs_rename(struct inode *old_dir, struct dentry *old_dentry,
1179 struct inode * new_dir, struct dentry *new_dentry) 1234 struct inode *new_dir, struct dentry *new_dentry)
1180{ 1235{
1181 int retval; 1236 int retval;
1182 INITIALIZE_PATH (old_entry_path); 1237 INITIALIZE_PATH(old_entry_path);
1183 INITIALIZE_PATH (new_entry_path); 1238 INITIALIZE_PATH(new_entry_path);
1184 INITIALIZE_PATH (dot_dot_entry_path); 1239 INITIALIZE_PATH(dot_dot_entry_path);
1185 struct item_head new_entry_ih, old_entry_ih, dot_dot_ih ; 1240 struct item_head new_entry_ih, old_entry_ih, dot_dot_ih;
1186 struct reiserfs_dir_entry old_de, new_de, dot_dot_de; 1241 struct reiserfs_dir_entry old_de, new_de, dot_dot_de;
1187 struct inode * old_inode, * new_dentry_inode; 1242 struct inode *old_inode, *new_dentry_inode;
1188 struct reiserfs_transaction_handle th ; 1243 struct reiserfs_transaction_handle th;
1189 int jbegin_count ; 1244 int jbegin_count;
1190 umode_t old_inode_mode; 1245 umode_t old_inode_mode;
1191 unsigned long savelink = 1; 1246 unsigned long savelink = 1;
1192 struct timespec ctime; 1247 struct timespec ctime;
1193 1248
1194 /* three balancings: (1) old name removal, (2) new name insertion 1249 /* three balancings: (1) old name removal, (2) new name insertion
1195 and (3) maybe "save" link insertion 1250 and (3) maybe "save" link insertion
1196 stat data updates: (1) old directory, 1251 stat data updates: (1) old directory,
1197 (2) new directory and (3) maybe old object stat data (when it is 1252 (2) new directory and (3) maybe old object stat data (when it is
1198 directory) and (4) maybe stat data of object to which new entry 1253 directory) and (4) maybe stat data of object to which new entry
1199 pointed initially and (5) maybe block containing ".." of 1254 pointed initially and (5) maybe block containing ".." of
1200 renamed directory 1255 renamed directory
1201 quota updates: two parent directories */ 1256 quota updates: two parent directories */
1202 jbegin_count = JOURNAL_PER_BALANCE_CNT * 3 + 5 + 4 * REISERFS_QUOTA_TRANS_BLOCKS(old_dir->i_sb); 1257 jbegin_count =
1203 1258 JOURNAL_PER_BALANCE_CNT * 3 + 5 +
1204 old_inode = old_dentry->d_inode; 1259 4 * REISERFS_QUOTA_TRANS_BLOCKS(old_dir->i_sb);
1205 new_dentry_inode = new_dentry->d_inode; 1260
1206 1261 old_inode = old_dentry->d_inode;
1207 // make sure, that oldname still exists and points to an object we 1262 new_dentry_inode = new_dentry->d_inode;
1208 // are going to rename 1263
1209 old_de.de_gen_number_bit_string = NULL; 1264 // make sure, that oldname still exists and points to an object we
1210 reiserfs_write_lock(old_dir->i_sb); 1265 // are going to rename
1211 retval = reiserfs_find_entry (old_dir, old_dentry->d_name.name, old_dentry->d_name.len, 1266 old_de.de_gen_number_bit_string = NULL;
1212 &old_entry_path, &old_de); 1267 reiserfs_write_lock(old_dir->i_sb);
1213 pathrelse (&old_entry_path); 1268 retval =
1214 if (retval == IO_ERROR) { 1269 reiserfs_find_entry(old_dir, old_dentry->d_name.name,
1215 reiserfs_write_unlock(old_dir->i_sb); 1270 old_dentry->d_name.len, &old_entry_path,
1216 return -EIO; 1271 &old_de);
1217 } 1272 pathrelse(&old_entry_path);
1218 1273 if (retval == IO_ERROR) {
1219 if (retval != NAME_FOUND || old_de.de_objectid != old_inode->i_ino) {
1220 reiserfs_write_unlock(old_dir->i_sb);
1221 return -ENOENT;
1222 }
1223
1224 old_inode_mode = old_inode->i_mode;
1225 if (S_ISDIR(old_inode_mode)) {
1226 // make sure, that directory being renamed has correct ".."
1227 // and that its new parent directory has not too many links
1228 // already
1229
1230 if (new_dentry_inode) {
1231 if (!reiserfs_empty_dir(new_dentry_inode)) {
1232 reiserfs_write_unlock(old_dir->i_sb); 1274 reiserfs_write_unlock(old_dir->i_sb);
1233 return -ENOTEMPTY; 1275 return -EIO;
1234 }
1235 } 1276 }
1236 1277
1237 /* directory is renamed, its parent directory will be changed, 1278 if (retval != NAME_FOUND || old_de.de_objectid != old_inode->i_ino) {
1238 ** so find ".." entry 1279 reiserfs_write_unlock(old_dir->i_sb);
1239 */ 1280 return -ENOENT;
1240 dot_dot_de.de_gen_number_bit_string = NULL;
1241 retval = reiserfs_find_entry (old_inode, "..", 2, &dot_dot_entry_path, &dot_dot_de);
1242 pathrelse (&dot_dot_entry_path);
1243 if (retval != NAME_FOUND) {
1244 reiserfs_write_unlock(old_dir->i_sb);
1245 return -EIO;
1246 } 1281 }
1247 1282
1248 /* inode number of .. must equal old_dir->i_ino */ 1283 old_inode_mode = old_inode->i_mode;
1249 if (dot_dot_de.de_objectid != old_dir->i_ino) { 1284 if (S_ISDIR(old_inode_mode)) {
1250 reiserfs_write_unlock(old_dir->i_sb); 1285 // make sure, that directory being renamed has correct ".."
1251 return -EIO; 1286 // and that its new parent directory has not too many links
1287 // already
1288
1289 if (new_dentry_inode) {
1290 if (!reiserfs_empty_dir(new_dentry_inode)) {
1291 reiserfs_write_unlock(old_dir->i_sb);
1292 return -ENOTEMPTY;
1293 }
1294 }
1295
1296 /* directory is renamed, its parent directory will be changed,
1297 ** so find ".." entry
1298 */
1299 dot_dot_de.de_gen_number_bit_string = NULL;
1300 retval =
1301 reiserfs_find_entry(old_inode, "..", 2, &dot_dot_entry_path,
1302 &dot_dot_de);
1303 pathrelse(&dot_dot_entry_path);
1304 if (retval != NAME_FOUND) {
1305 reiserfs_write_unlock(old_dir->i_sb);
1306 return -EIO;
1307 }
1308
1309 /* inode number of .. must equal old_dir->i_ino */
1310 if (dot_dot_de.de_objectid != old_dir->i_ino) {
1311 reiserfs_write_unlock(old_dir->i_sb);
1312 return -EIO;
1313 }
1252 } 1314 }
1253 } 1315
1254 1316 retval = journal_begin(&th, old_dir->i_sb, jbegin_count);
1255 retval = journal_begin(&th, old_dir->i_sb, jbegin_count) ; 1317 if (retval) {
1256 if (retval) { 1318 reiserfs_write_unlock(old_dir->i_sb);
1257 reiserfs_write_unlock (old_dir->i_sb); 1319 return retval;
1258 return retval;
1259 }
1260
1261 /* add new entry (or find the existing one) */
1262 retval = reiserfs_add_entry (&th, new_dir, new_dentry->d_name.name, new_dentry->d_name.len,
1263 old_inode, 0);
1264 if (retval == -EEXIST) {
1265 if (!new_dentry_inode) {
1266 reiserfs_panic (old_dir->i_sb,
1267 "vs-7050: new entry is found, new inode == 0\n");
1268 } 1320 }
1269 } else if (retval) { 1321
1270 int err = journal_end(&th, old_dir->i_sb, jbegin_count) ; 1322 /* add new entry (or find the existing one) */
1271 reiserfs_write_unlock(old_dir->i_sb); 1323 retval =
1272 return err ? err : retval; 1324 reiserfs_add_entry(&th, new_dir, new_dentry->d_name.name,
1273 } 1325 new_dentry->d_name.len, old_inode, 0);
1274 1326 if (retval == -EEXIST) {
1275 reiserfs_update_inode_transaction(old_dir) ; 1327 if (!new_dentry_inode) {
1276 reiserfs_update_inode_transaction(new_dir) ; 1328 reiserfs_panic(old_dir->i_sb,
1277 1329 "vs-7050: new entry is found, new inode == 0\n");
1278 /* this makes it so an fsync on an open fd for the old name will 1330 }
1279 ** commit the rename operation 1331 } else if (retval) {
1280 */ 1332 int err = journal_end(&th, old_dir->i_sb, jbegin_count);
1281 reiserfs_update_inode_transaction(old_inode) ; 1333 reiserfs_write_unlock(old_dir->i_sb);
1282 1334 return err ? err : retval;
1283 if (new_dentry_inode)
1284 reiserfs_update_inode_transaction(new_dentry_inode) ;
1285
1286 while (1) {
1287 // look for old name using corresponding entry key (found by reiserfs_find_entry)
1288 if ((retval = search_by_entry_key (new_dir->i_sb, &old_de.de_entry_key,
1289 &old_entry_path, &old_de)) != NAME_FOUND) {
1290 pathrelse(&old_entry_path);
1291 journal_end(&th, old_dir->i_sb, jbegin_count);
1292 reiserfs_write_unlock(old_dir->i_sb);
1293 return -EIO;
1294 } 1335 }
1295 1336
1296 copy_item_head(&old_entry_ih, get_ih(&old_entry_path)) ; 1337 reiserfs_update_inode_transaction(old_dir);
1297 1338 reiserfs_update_inode_transaction(new_dir);
1298 reiserfs_prepare_for_journal(old_inode->i_sb, old_de.de_bh, 1) ; 1339
1299 1340 /* this makes it so an fsync on an open fd for the old name will
1300 // look for new name by reiserfs_find_entry 1341 ** commit the rename operation
1301 new_de.de_gen_number_bit_string = NULL; 1342 */
1302 retval = reiserfs_find_entry (new_dir, new_dentry->d_name.name, new_dentry->d_name.len, 1343 reiserfs_update_inode_transaction(old_inode);
1303 &new_entry_path, &new_de); 1344
1304 // reiserfs_add_entry should not return IO_ERROR, because it is called with essentially same parameters from 1345 if (new_dentry_inode)
1305 // reiserfs_add_entry above, and we'll catch any i/o errors before we get here. 1346 reiserfs_update_inode_transaction(new_dentry_inode);
1306 if (retval != NAME_FOUND_INVISIBLE && retval != NAME_FOUND) { 1347
1307 pathrelse(&new_entry_path); 1348 while (1) {
1308 pathrelse(&old_entry_path); 1349 // look for old name using corresponding entry key (found by reiserfs_find_entry)
1309 journal_end(&th, old_dir->i_sb, jbegin_count); 1350 if ((retval =
1310 reiserfs_write_unlock(old_dir->i_sb); 1351 search_by_entry_key(new_dir->i_sb, &old_de.de_entry_key,
1311 return -EIO; 1352 &old_entry_path,
1353 &old_de)) != NAME_FOUND) {
1354 pathrelse(&old_entry_path);
1355 journal_end(&th, old_dir->i_sb, jbegin_count);
1356 reiserfs_write_unlock(old_dir->i_sb);
1357 return -EIO;
1358 }
1359
1360 copy_item_head(&old_entry_ih, get_ih(&old_entry_path));
1361
1362 reiserfs_prepare_for_journal(old_inode->i_sb, old_de.de_bh, 1);
1363
1364 // look for new name by reiserfs_find_entry
1365 new_de.de_gen_number_bit_string = NULL;
1366 retval =
1367 reiserfs_find_entry(new_dir, new_dentry->d_name.name,
1368 new_dentry->d_name.len, &new_entry_path,
1369 &new_de);
1370 // reiserfs_add_entry should not return IO_ERROR, because it is called with essentially same parameters from
1371 // reiserfs_add_entry above, and we'll catch any i/o errors before we get here.
1372 if (retval != NAME_FOUND_INVISIBLE && retval != NAME_FOUND) {
1373 pathrelse(&new_entry_path);
1374 pathrelse(&old_entry_path);
1375 journal_end(&th, old_dir->i_sb, jbegin_count);
1376 reiserfs_write_unlock(old_dir->i_sb);
1377 return -EIO;
1378 }
1379
1380 copy_item_head(&new_entry_ih, get_ih(&new_entry_path));
1381
1382 reiserfs_prepare_for_journal(old_inode->i_sb, new_de.de_bh, 1);
1383
1384 if (S_ISDIR(old_inode->i_mode)) {
1385 if ((retval =
1386 search_by_entry_key(new_dir->i_sb,
1387 &dot_dot_de.de_entry_key,
1388 &dot_dot_entry_path,
1389 &dot_dot_de)) != NAME_FOUND) {
1390 pathrelse(&dot_dot_entry_path);
1391 pathrelse(&new_entry_path);
1392 pathrelse(&old_entry_path);
1393 journal_end(&th, old_dir->i_sb, jbegin_count);
1394 reiserfs_write_unlock(old_dir->i_sb);
1395 return -EIO;
1396 }
1397 copy_item_head(&dot_dot_ih,
1398 get_ih(&dot_dot_entry_path));
1399 // node containing ".." gets into transaction
1400 reiserfs_prepare_for_journal(old_inode->i_sb,
1401 dot_dot_de.de_bh, 1);
1402 }
1403 /* we should check seals here, not do
1404 this stuff, yes? Then, having
1405 gathered everything into RAM we
1406 should lock the buffers, yes? -Hans */
1407 /* probably. our rename needs to hold more
1408 ** than one path at once. The seals would
1409 ** have to be written to deal with multi-path
1410 ** issues -chris
1411 */
1412 /* sanity checking before doing the rename - avoid races many
1413 ** of the above checks could have scheduled. We have to be
1414 ** sure our items haven't been shifted by another process.
1415 */
1416 if (item_moved(&new_entry_ih, &new_entry_path) ||
1417 !entry_points_to_object(new_dentry->d_name.name,
1418 new_dentry->d_name.len,
1419 &new_de, new_dentry_inode) ||
1420 item_moved(&old_entry_ih, &old_entry_path) ||
1421 !entry_points_to_object(old_dentry->d_name.name,
1422 old_dentry->d_name.len,
1423 &old_de, old_inode)) {
1424 reiserfs_restore_prepared_buffer(old_inode->i_sb,
1425 new_de.de_bh);
1426 reiserfs_restore_prepared_buffer(old_inode->i_sb,
1427 old_de.de_bh);
1428 if (S_ISDIR(old_inode_mode))
1429 reiserfs_restore_prepared_buffer(old_inode->
1430 i_sb,
1431 dot_dot_de.
1432 de_bh);
1433 continue;
1434 }
1435 if (S_ISDIR(old_inode_mode)) {
1436 if (item_moved(&dot_dot_ih, &dot_dot_entry_path) ||
1437 !entry_points_to_object("..", 2, &dot_dot_de,
1438 old_dir)) {
1439 reiserfs_restore_prepared_buffer(old_inode->
1440 i_sb,
1441 old_de.de_bh);
1442 reiserfs_restore_prepared_buffer(old_inode->
1443 i_sb,
1444 new_de.de_bh);
1445 reiserfs_restore_prepared_buffer(old_inode->
1446 i_sb,
1447 dot_dot_de.
1448 de_bh);
1449 continue;
1450 }
1451 }
1452
1453 RFALSE(S_ISDIR(old_inode_mode) &&
1454 !buffer_journal_prepared(dot_dot_de.de_bh), "");
1455
1456 break;
1312 } 1457 }
1313 1458
1314 copy_item_head(&new_entry_ih, get_ih(&new_entry_path)) ; 1459 /* ok, all the changes can be done in one fell swoop when we
1460 have claimed all the buffers needed. */
1315 1461
1316 reiserfs_prepare_for_journal(old_inode->i_sb, new_de.de_bh, 1) ; 1462 mark_de_visible(new_de.de_deh + new_de.de_entry_num);
1463 set_ino_in_dir_entry(&new_de, INODE_PKEY(old_inode));
1464 journal_mark_dirty(&th, old_dir->i_sb, new_de.de_bh);
1317 1465
1318 if (S_ISDIR(old_inode->i_mode)) { 1466 mark_de_hidden(old_de.de_deh + old_de.de_entry_num);
1319 if ((retval = search_by_entry_key (new_dir->i_sb, &dot_dot_de.de_entry_key, 1467 journal_mark_dirty(&th, old_dir->i_sb, old_de.de_bh);
1320 &dot_dot_entry_path, &dot_dot_de)) != NAME_FOUND) { 1468 ctime = CURRENT_TIME_SEC;
1321 pathrelse(&dot_dot_entry_path); 1469 old_dir->i_ctime = old_dir->i_mtime = ctime;
1322 pathrelse(&new_entry_path); 1470 new_dir->i_ctime = new_dir->i_mtime = ctime;
1323 pathrelse(&old_entry_path); 1471 /* thanks to Alex Adriaanse <alex_a@caltech.edu> for patch which adds ctime update of
1324 journal_end(&th, old_dir->i_sb, jbegin_count); 1472 renamed object */
1325 reiserfs_write_unlock(old_dir->i_sb); 1473 old_inode->i_ctime = ctime;
1326 return -EIO; 1474
1327 } 1475 if (new_dentry_inode) {
1328 copy_item_head(&dot_dot_ih, get_ih(&dot_dot_entry_path)) ; 1476 // adjust link number of the victim
1329 // node containing ".." gets into transaction 1477 if (S_ISDIR(new_dentry_inode->i_mode)) {
1330 reiserfs_prepare_for_journal(old_inode->i_sb, dot_dot_de.de_bh, 1) ; 1478 new_dentry_inode->i_nlink = 0;
1331 } 1479 } else {
1332 /* we should check seals here, not do 1480 new_dentry_inode->i_nlink--;
1333 this stuff, yes? Then, having 1481 }
1334 gathered everything into RAM we 1482 new_dentry_inode->i_ctime = ctime;
1335 should lock the buffers, yes? -Hans */ 1483 savelink = new_dentry_inode->i_nlink;
1336 /* probably. our rename needs to hold more
1337 ** than one path at once. The seals would
1338 ** have to be written to deal with multi-path
1339 ** issues -chris
1340 */
1341 /* sanity checking before doing the rename - avoid races many
1342 ** of the above checks could have scheduled. We have to be
1343 ** sure our items haven't been shifted by another process.
1344 */
1345 if (item_moved(&new_entry_ih, &new_entry_path) ||
1346 !entry_points_to_object(new_dentry->d_name.name,
1347 new_dentry->d_name.len,
1348 &new_de, new_dentry_inode) ||
1349 item_moved(&old_entry_ih, &old_entry_path) ||
1350 !entry_points_to_object (old_dentry->d_name.name,
1351 old_dentry->d_name.len,
1352 &old_de, old_inode)) {
1353 reiserfs_restore_prepared_buffer (old_inode->i_sb, new_de.de_bh);
1354 reiserfs_restore_prepared_buffer (old_inode->i_sb, old_de.de_bh);
1355 if (S_ISDIR(old_inode_mode))
1356 reiserfs_restore_prepared_buffer (old_inode->i_sb, dot_dot_de.de_bh);
1357 continue;
1358 } 1484 }
1485
1359 if (S_ISDIR(old_inode_mode)) { 1486 if (S_ISDIR(old_inode_mode)) {
1360 if ( item_moved(&dot_dot_ih, &dot_dot_entry_path) || 1487 // adjust ".." of renamed directory
1361 !entry_points_to_object ( "..", 2, &dot_dot_de, old_dir) ) { 1488 set_ino_in_dir_entry(&dot_dot_de, INODE_PKEY(new_dir));
1362 reiserfs_restore_prepared_buffer (old_inode->i_sb, old_de.de_bh); 1489 journal_mark_dirty(&th, new_dir->i_sb, dot_dot_de.de_bh);
1363 reiserfs_restore_prepared_buffer (old_inode->i_sb, new_de.de_bh); 1490
1364 reiserfs_restore_prepared_buffer (old_inode->i_sb, dot_dot_de.de_bh); 1491 if (!new_dentry_inode)
1365 continue; 1492 /* there (in new_dir) was no directory, so it got new link
1366 } 1493 (".." of renamed directory) */
1494 INC_DIR_INODE_NLINK(new_dir);
1495
1496 /* old directory lost one link - ".. " of renamed directory */
1497 DEC_DIR_INODE_NLINK(old_dir);
1367 } 1498 }
1499 // looks like in 2.3.99pre3 brelse is atomic. so we can use pathrelse
1500 pathrelse(&new_entry_path);
1501 pathrelse(&dot_dot_entry_path);
1368 1502
1369 RFALSE( S_ISDIR(old_inode_mode) && 1503 // FIXME: this reiserfs_cut_from_item's return value may screw up
1370 !buffer_journal_prepared(dot_dot_de.de_bh), "" ); 1504 // anybody, but it will panic if will not be able to find the
1371 1505 // entry. This needs one more clean up
1372 break; 1506 if (reiserfs_cut_from_item
1373 } 1507 (&th, &old_entry_path, &(old_de.de_entry_key), old_dir, NULL,
1374 1508 0) < 0)
1375 /* ok, all the changes can be done in one fell swoop when we 1509 reiserfs_warning(old_dir->i_sb,
1376 have claimed all the buffers needed.*/ 1510 "vs-7060: reiserfs_rename: couldn't not cut old name. Fsck later?");
1377 1511
1378 mark_de_visible (new_de.de_deh + new_de.de_entry_num); 1512 old_dir->i_size -= DEH_SIZE + old_de.de_entrylen;
1379 set_ino_in_dir_entry (&new_de, INODE_PKEY (old_inode)); 1513
1380 journal_mark_dirty (&th, old_dir->i_sb, new_de.de_bh); 1514 reiserfs_update_sd(&th, old_dir);
1381 1515 reiserfs_update_sd(&th, new_dir);
1382 mark_de_hidden (old_de.de_deh + old_de.de_entry_num); 1516 reiserfs_update_sd(&th, old_inode);
1383 journal_mark_dirty (&th, old_dir->i_sb, old_de.de_bh); 1517
1384 ctime = CURRENT_TIME_SEC; 1518 if (new_dentry_inode) {
1385 old_dir->i_ctime = old_dir->i_mtime = ctime; 1519 if (savelink == 0)
1386 new_dir->i_ctime = new_dir->i_mtime = ctime; 1520 add_save_link(&th, new_dentry_inode,
1387 /* thanks to Alex Adriaanse <alex_a@caltech.edu> for patch which adds ctime update of 1521 0 /* not truncate */ );
1388 renamed object */ 1522 reiserfs_update_sd(&th, new_dentry_inode);
1389 old_inode->i_ctime = ctime;
1390
1391 if (new_dentry_inode) {
1392 // adjust link number of the victim
1393 if (S_ISDIR(new_dentry_inode->i_mode)) {
1394 new_dentry_inode->i_nlink = 0;
1395 } else {
1396 new_dentry_inode->i_nlink--;
1397 } 1523 }
1398 new_dentry_inode->i_ctime = ctime; 1524
1399 savelink = new_dentry_inode->i_nlink; 1525 retval = journal_end(&th, old_dir->i_sb, jbegin_count);
1400 } 1526 reiserfs_write_unlock(old_dir->i_sb);
1401 1527 return retval;
1402 if (S_ISDIR(old_inode_mode)) {
1403 // adjust ".." of renamed directory
1404 set_ino_in_dir_entry (&dot_dot_de, INODE_PKEY (new_dir));
1405 journal_mark_dirty (&th, new_dir->i_sb, dot_dot_de.de_bh);
1406
1407 if (!new_dentry_inode)
1408 /* there (in new_dir) was no directory, so it got new link
1409 (".." of renamed directory) */
1410 INC_DIR_INODE_NLINK(new_dir);
1411
1412 /* old directory lost one link - ".. " of renamed directory */
1413 DEC_DIR_INODE_NLINK(old_dir);
1414 }
1415
1416 // looks like in 2.3.99pre3 brelse is atomic. so we can use pathrelse
1417 pathrelse (&new_entry_path);
1418 pathrelse (&dot_dot_entry_path);
1419
1420 // FIXME: this reiserfs_cut_from_item's return value may screw up
1421 // anybody, but it will panic if will not be able to find the
1422 // entry. This needs one more clean up
1423 if (reiserfs_cut_from_item (&th, &old_entry_path, &(old_de.de_entry_key), old_dir, NULL, 0) < 0)
1424 reiserfs_warning (old_dir->i_sb, "vs-7060: reiserfs_rename: couldn't not cut old name. Fsck later?");
1425
1426 old_dir->i_size -= DEH_SIZE + old_de.de_entrylen;
1427
1428 reiserfs_update_sd (&th, old_dir);
1429 reiserfs_update_sd (&th, new_dir);
1430 reiserfs_update_sd (&th, old_inode);
1431
1432 if (new_dentry_inode) {
1433 if (savelink == 0)
1434 add_save_link (&th, new_dentry_inode, 0/* not truncate */);
1435 reiserfs_update_sd (&th, new_dentry_inode);
1436 }
1437
1438 retval = journal_end(&th, old_dir->i_sb, jbegin_count) ;
1439 reiserfs_write_unlock(old_dir->i_sb);
1440 return retval;
1441} 1528}
1442 1529
1443/* 1530/*
1444 * directories can handle most operations... 1531 * directories can handle most operations...
1445 */ 1532 */
1446struct inode_operations reiserfs_dir_inode_operations = { 1533struct inode_operations reiserfs_dir_inode_operations = {
1447 //&reiserfs_dir_operations, /* default_file_ops */ 1534 //&reiserfs_dir_operations, /* default_file_ops */
1448 .create = reiserfs_create, 1535 .create = reiserfs_create,
1449 .lookup = reiserfs_lookup, 1536 .lookup = reiserfs_lookup,
1450 .link = reiserfs_link, 1537 .link = reiserfs_link,
1451 .unlink = reiserfs_unlink, 1538 .unlink = reiserfs_unlink,
1452 .symlink = reiserfs_symlink, 1539 .symlink = reiserfs_symlink,
1453 .mkdir = reiserfs_mkdir, 1540 .mkdir = reiserfs_mkdir,
1454 .rmdir = reiserfs_rmdir, 1541 .rmdir = reiserfs_rmdir,
1455 .mknod = reiserfs_mknod, 1542 .mknod = reiserfs_mknod,
1456 .rename = reiserfs_rename, 1543 .rename = reiserfs_rename,
1457 .setattr = reiserfs_setattr, 1544 .setattr = reiserfs_setattr,
1458 .setxattr = reiserfs_setxattr, 1545 .setxattr = reiserfs_setxattr,
1459 .getxattr = reiserfs_getxattr, 1546 .getxattr = reiserfs_getxattr,
1460 .listxattr = reiserfs_listxattr, 1547 .listxattr = reiserfs_listxattr,
1461 .removexattr = reiserfs_removexattr, 1548 .removexattr = reiserfs_removexattr,
1462 .permission = reiserfs_permission, 1549 .permission = reiserfs_permission,
1463}; 1550};
1464 1551
1465/* 1552/*
@@ -1467,28 +1554,27 @@ struct inode_operations reiserfs_dir_inode_operations = {
1467 * stuff added 1554 * stuff added
1468 */ 1555 */
1469struct inode_operations reiserfs_symlink_inode_operations = { 1556struct inode_operations reiserfs_symlink_inode_operations = {
1470 .readlink = generic_readlink, 1557 .readlink = generic_readlink,
1471 .follow_link = page_follow_link_light, 1558 .follow_link = page_follow_link_light,
1472 .put_link = page_put_link, 1559 .put_link = page_put_link,
1473 .setattr = reiserfs_setattr, 1560 .setattr = reiserfs_setattr,
1474 .setxattr = reiserfs_setxattr, 1561 .setxattr = reiserfs_setxattr,
1475 .getxattr = reiserfs_getxattr, 1562 .getxattr = reiserfs_getxattr,
1476 .listxattr = reiserfs_listxattr, 1563 .listxattr = reiserfs_listxattr,
1477 .removexattr = reiserfs_removexattr, 1564 .removexattr = reiserfs_removexattr,
1478 .permission = reiserfs_permission, 1565 .permission = reiserfs_permission,
1479 1566
1480}; 1567};
1481 1568
1482
1483/* 1569/*
1484 * special file operations.. just xattr/acl stuff 1570 * special file operations.. just xattr/acl stuff
1485 */ 1571 */
1486struct inode_operations reiserfs_special_inode_operations = { 1572struct inode_operations reiserfs_special_inode_operations = {
1487 .setattr = reiserfs_setattr, 1573 .setattr = reiserfs_setattr,
1488 .setxattr = reiserfs_setxattr, 1574 .setxattr = reiserfs_setxattr,
1489 .getxattr = reiserfs_getxattr, 1575 .getxattr = reiserfs_getxattr,
1490 .listxattr = reiserfs_listxattr, 1576 .listxattr = reiserfs_listxattr,
1491 .removexattr = reiserfs_removexattr, 1577 .removexattr = reiserfs_removexattr,
1492 .permission = reiserfs_permission, 1578 .permission = reiserfs_permission,
1493 1579
1494}; 1580};
diff --git a/fs/reiserfs/objectid.c b/fs/reiserfs/objectid.c
index bfe8e25ef29..f62590aa9c9 100644
--- a/fs/reiserfs/objectid.c
+++ b/fs/reiserfs/objectid.c
@@ -14,24 +14,24 @@
14 (__le32 *)((struct reiserfs_super_block_v1 *)(rs) + 1) :\ 14 (__le32 *)((struct reiserfs_super_block_v1 *)(rs) + 1) :\
15 (__le32 *)((rs) + 1)) 15 (__le32 *)((rs) + 1))
16 16
17
18#ifdef CONFIG_REISERFS_CHECK 17#ifdef CONFIG_REISERFS_CHECK
19 18
20static void check_objectid_map (struct super_block * s, __le32 * map) 19static void check_objectid_map(struct super_block *s, __le32 * map)
21{ 20{
22 if (le32_to_cpu (map[0]) != 1) 21 if (le32_to_cpu(map[0]) != 1)
23 reiserfs_panic (s, "vs-15010: check_objectid_map: map corrupted: %lx", 22 reiserfs_panic(s,
24 ( long unsigned int ) le32_to_cpu (map[0])); 23 "vs-15010: check_objectid_map: map corrupted: %lx",
24 (long unsigned int)le32_to_cpu(map[0]));
25 25
26 // FIXME: add something else here 26 // FIXME: add something else here
27} 27}
28 28
29#else 29#else
30static void check_objectid_map (struct super_block * s, __le32 * map) 30static void check_objectid_map(struct super_block *s, __le32 * map)
31{;} 31{;
32}
32#endif 33#endif
33 34
34
35/* When we allocate objectids we allocate the first unused objectid. 35/* When we allocate objectids we allocate the first unused objectid.
36 Each sequence of objectids in use (the odd sequences) is followed 36 Each sequence of objectids in use (the odd sequences) is followed
37 by a sequence of objectids not in use (the even sequences). We 37 by a sequence of objectids not in use (the even sequences). We
@@ -46,161 +46,162 @@ static void check_objectid_map (struct super_block * s, __le32 * map)
46 interesting optimizations of layout could result from complicating 46 interesting optimizations of layout could result from complicating
47 objectid assignment, but we have deferred making them for now. */ 47 objectid assignment, but we have deferred making them for now. */
48 48
49
50/* get unique object identifier */ 49/* get unique object identifier */
51__u32 reiserfs_get_unused_objectid (struct reiserfs_transaction_handle *th) 50__u32 reiserfs_get_unused_objectid(struct reiserfs_transaction_handle *th)
52{ 51{
53 struct super_block * s = th->t_super; 52 struct super_block *s = th->t_super;
54 struct reiserfs_super_block * rs = SB_DISK_SUPER_BLOCK (s); 53 struct reiserfs_super_block *rs = SB_DISK_SUPER_BLOCK(s);
55 __le32 * map = objectid_map (s, rs); 54 __le32 *map = objectid_map(s, rs);
56 __u32 unused_objectid; 55 __u32 unused_objectid;
57 56
58 BUG_ON (!th->t_trans_id); 57 BUG_ON(!th->t_trans_id);
58
59 check_objectid_map(s, map);
60
61 reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s), 1);
62 /* comment needed -Hans */
63 unused_objectid = le32_to_cpu(map[1]);
64 if (unused_objectid == U32_MAX) {
65 reiserfs_warning(s, "%s: no more object ids", __FUNCTION__);
66 reiserfs_restore_prepared_buffer(s, SB_BUFFER_WITH_SB(s));
67 return 0;
68 }
59 69
60 check_objectid_map (s, map); 70 /* This incrementation allocates the first unused objectid. That
71 is to say, the first entry on the objectid map is the first
72 unused objectid, and by incrementing it we use it. See below
73 where we check to see if we eliminated a sequence of unused
74 objectids.... */
75 map[1] = cpu_to_le32(unused_objectid + 1);
76
77 /* Now we check to see if we eliminated the last remaining member of
78 the first even sequence (and can eliminate the sequence by
79 eliminating its last objectid from oids), and can collapse the
80 first two odd sequences into one sequence. If so, then the net
81 result is to eliminate a pair of objectids from oids. We do this
82 by shifting the entire map to the left. */
83 if (sb_oid_cursize(rs) > 2 && map[1] == map[2]) {
84 memmove(map + 1, map + 3,
85 (sb_oid_cursize(rs) - 3) * sizeof(__u32));
86 set_sb_oid_cursize(rs, sb_oid_cursize(rs) - 2);
87 }
61 88
62 reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s), 1) ; 89 journal_mark_dirty(th, s, SB_BUFFER_WITH_SB(s));
63 /* comment needed -Hans */ 90 return unused_objectid;
64 unused_objectid = le32_to_cpu (map[1]);
65 if (unused_objectid == U32_MAX) {
66 reiserfs_warning (s, "%s: no more object ids", __FUNCTION__);
67 reiserfs_restore_prepared_buffer(s, SB_BUFFER_WITH_SB(s)) ;
68 return 0;
69 }
70
71 /* This incrementation allocates the first unused objectid. That
72 is to say, the first entry on the objectid map is the first
73 unused objectid, and by incrementing it we use it. See below
74 where we check to see if we eliminated a sequence of unused
75 objectids.... */
76 map[1] = cpu_to_le32 (unused_objectid + 1);
77
78 /* Now we check to see if we eliminated the last remaining member of
79 the first even sequence (and can eliminate the sequence by
80 eliminating its last objectid from oids), and can collapse the
81 first two odd sequences into one sequence. If so, then the net
82 result is to eliminate a pair of objectids from oids. We do this
83 by shifting the entire map to the left. */
84 if (sb_oid_cursize(rs) > 2 && map[1] == map[2]) {
85 memmove (map + 1, map + 3, (sb_oid_cursize(rs) - 3) * sizeof(__u32));
86 set_sb_oid_cursize( rs, sb_oid_cursize(rs) - 2 );
87 }
88
89 journal_mark_dirty(th, s, SB_BUFFER_WITH_SB (s));
90 return unused_objectid;
91} 91}
92 92
93
94/* makes object identifier unused */ 93/* makes object identifier unused */
95void reiserfs_release_objectid (struct reiserfs_transaction_handle *th, 94void reiserfs_release_objectid(struct reiserfs_transaction_handle *th,
96 __u32 objectid_to_release) 95 __u32 objectid_to_release)
97{ 96{
98 struct super_block * s = th->t_super; 97 struct super_block *s = th->t_super;
99 struct reiserfs_super_block * rs = SB_DISK_SUPER_BLOCK (s); 98 struct reiserfs_super_block *rs = SB_DISK_SUPER_BLOCK(s);
100 __le32 * map = objectid_map (s, rs); 99 __le32 *map = objectid_map(s, rs);
101 int i = 0; 100 int i = 0;
102 101
103 BUG_ON (!th->t_trans_id); 102 BUG_ON(!th->t_trans_id);
104 //return; 103 //return;
105 check_objectid_map (s, map); 104 check_objectid_map(s, map);
106 105
107 reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s), 1) ; 106 reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s), 1);
108 journal_mark_dirty(th, s, SB_BUFFER_WITH_SB (s)); 107 journal_mark_dirty(th, s, SB_BUFFER_WITH_SB(s));
109 108
110 /* start at the beginning of the objectid map (i = 0) and go to 109 /* start at the beginning of the objectid map (i = 0) and go to
111 the end of it (i = disk_sb->s_oid_cursize). Linear search is 110 the end of it (i = disk_sb->s_oid_cursize). Linear search is
112 what we use, though it is possible that binary search would be 111 what we use, though it is possible that binary search would be
113 more efficient after performing lots of deletions (which is 112 more efficient after performing lots of deletions (which is
114 when oids is large.) We only check even i's. */ 113 when oids is large.) We only check even i's. */
115 while (i < sb_oid_cursize(rs)) { 114 while (i < sb_oid_cursize(rs)) {
116 if (objectid_to_release == le32_to_cpu (map[i])) { 115 if (objectid_to_release == le32_to_cpu(map[i])) {
117 /* This incrementation unallocates the objectid. */ 116 /* This incrementation unallocates the objectid. */
118 //map[i]++; 117 //map[i]++;
119 map[i] = cpu_to_le32 (le32_to_cpu (map[i]) + 1); 118 map[i] = cpu_to_le32(le32_to_cpu(map[i]) + 1);
120 119
121 /* Did we unallocate the last member of an odd sequence, and can shrink oids? */ 120 /* Did we unallocate the last member of an odd sequence, and can shrink oids? */
122 if (map[i] == map[i+1]) { 121 if (map[i] == map[i + 1]) {
123 /* shrink objectid map */ 122 /* shrink objectid map */
124 memmove (map + i, map + i + 2, 123 memmove(map + i, map + i + 2,
125 (sb_oid_cursize(rs) - i - 2) * sizeof (__u32)); 124 (sb_oid_cursize(rs) - i -
126 //disk_sb->s_oid_cursize -= 2; 125 2) * sizeof(__u32));
127 set_sb_oid_cursize( rs, sb_oid_cursize(rs) - 2 ); 126 //disk_sb->s_oid_cursize -= 2;
128 127 set_sb_oid_cursize(rs, sb_oid_cursize(rs) - 2);
129 RFALSE( sb_oid_cursize(rs) < 2 || 128
130 sb_oid_cursize(rs) > sb_oid_maxsize(rs), 129 RFALSE(sb_oid_cursize(rs) < 2 ||
131 "vs-15005: objectid map corrupted cur_size == %d (max == %d)", 130 sb_oid_cursize(rs) > sb_oid_maxsize(rs),
132 sb_oid_cursize(rs), sb_oid_maxsize(rs)); 131 "vs-15005: objectid map corrupted cur_size == %d (max == %d)",
133 } 132 sb_oid_cursize(rs), sb_oid_maxsize(rs));
134 return; 133 }
134 return;
135 }
136
137 if (objectid_to_release > le32_to_cpu(map[i]) &&
138 objectid_to_release < le32_to_cpu(map[i + 1])) {
139 /* size of objectid map is not changed */
140 if (objectid_to_release + 1 == le32_to_cpu(map[i + 1])) {
141 //objectid_map[i+1]--;
142 map[i + 1] =
143 cpu_to_le32(le32_to_cpu(map[i + 1]) - 1);
144 return;
145 }
146
147 /* JDM comparing two little-endian values for equality -- safe */
148 if (sb_oid_cursize(rs) == sb_oid_maxsize(rs)) {
149 /* objectid map must be expanded, but there is no space */
150 PROC_INFO_INC(s, leaked_oid);
151 return;
152 }
153
154 /* expand the objectid map */
155 memmove(map + i + 3, map + i + 1,
156 (sb_oid_cursize(rs) - i - 1) * sizeof(__u32));
157 map[i + 1] = cpu_to_le32(objectid_to_release);
158 map[i + 2] = cpu_to_le32(objectid_to_release + 1);
159 set_sb_oid_cursize(rs, sb_oid_cursize(rs) + 2);
160 return;
161 }
162 i += 2;
135 } 163 }
136 164
137 if (objectid_to_release > le32_to_cpu (map[i]) && 165 reiserfs_warning(s,
138 objectid_to_release < le32_to_cpu (map[i + 1])) { 166 "vs-15011: reiserfs_release_objectid: tried to free free object id (%lu)",
139 /* size of objectid map is not changed */ 167 (long unsigned)objectid_to_release);
140 if (objectid_to_release + 1 == le32_to_cpu (map[i + 1])) { 168}
141 //objectid_map[i+1]--;
142 map[i + 1] = cpu_to_le32 (le32_to_cpu (map[i + 1]) - 1);
143 return;
144 }
145
146 /* JDM comparing two little-endian values for equality -- safe */
147 if (sb_oid_cursize(rs) == sb_oid_maxsize(rs)) {
148 /* objectid map must be expanded, but there is no space */
149 PROC_INFO_INC( s, leaked_oid );
150 return;
151 }
152 169
153 /* expand the objectid map*/ 170int reiserfs_convert_objectid_map_v1(struct super_block *s)
154 memmove (map + i + 3, map + i + 1, 171{
155 (sb_oid_cursize(rs) - i - 1) * sizeof(__u32)); 172 struct reiserfs_super_block *disk_sb = SB_DISK_SUPER_BLOCK(s);
156 map[i + 1] = cpu_to_le32 (objectid_to_release); 173 int cur_size = sb_oid_cursize(disk_sb);
157 map[i + 2] = cpu_to_le32 (objectid_to_release + 1); 174 int new_size = (s->s_blocksize - SB_SIZE) / sizeof(__u32) / 2 * 2;
158 set_sb_oid_cursize( rs, sb_oid_cursize(rs) + 2 ); 175 int old_max = sb_oid_maxsize(disk_sb);
159 return; 176 struct reiserfs_super_block_v1 *disk_sb_v1;
177 __le32 *objectid_map, *new_objectid_map;
178 int i;
179
180 disk_sb_v1 =
181 (struct reiserfs_super_block_v1 *)(SB_BUFFER_WITH_SB(s)->b_data);
182 objectid_map = (__le32 *) (disk_sb_v1 + 1);
183 new_objectid_map = (__le32 *) (disk_sb + 1);
184
185 if (cur_size > new_size) {
186 /* mark everyone used that was listed as free at the end of the objectid
187 ** map
188 */
189 objectid_map[new_size - 1] = objectid_map[cur_size - 1];
190 set_sb_oid_cursize(disk_sb, new_size);
191 }
192 /* move the smaller objectid map past the end of the new super */
193 for (i = new_size - 1; i >= 0; i--) {
194 objectid_map[i + (old_max - new_size)] = objectid_map[i];
160 } 195 }
161 i += 2;
162 }
163 196
164 reiserfs_warning (s, "vs-15011: reiserfs_release_objectid: tried to free free object id (%lu)", 197 /* set the max size so we don't overflow later */
165 ( long unsigned ) objectid_to_release); 198 set_sb_oid_maxsize(disk_sb, new_size);
166}
167 199
200 /* Zero out label and generate random UUID */
201 memset(disk_sb->s_label, 0, sizeof(disk_sb->s_label));
202 generate_random_uuid(disk_sb->s_uuid);
168 203
169int reiserfs_convert_objectid_map_v1(struct super_block *s) { 204 /* finally, zero out the unused chunk of the new super */
170 struct reiserfs_super_block *disk_sb = SB_DISK_SUPER_BLOCK (s); 205 memset(disk_sb->s_unused, 0, sizeof(disk_sb->s_unused));
171 int cur_size = sb_oid_cursize(disk_sb); 206 return 0;
172 int new_size = (s->s_blocksize - SB_SIZE) / sizeof(__u32) / 2 * 2 ;
173 int old_max = sb_oid_maxsize(disk_sb);
174 struct reiserfs_super_block_v1 *disk_sb_v1 ;
175 __le32 *objectid_map, *new_objectid_map ;
176 int i ;
177
178 disk_sb_v1=(struct reiserfs_super_block_v1 *)(SB_BUFFER_WITH_SB(s)->b_data);
179 objectid_map = (__le32 *)(disk_sb_v1 + 1) ;
180 new_objectid_map = (__le32 *)(disk_sb + 1) ;
181
182 if (cur_size > new_size) {
183 /* mark everyone used that was listed as free at the end of the objectid
184 ** map
185 */
186 objectid_map[new_size - 1] = objectid_map[cur_size - 1] ;
187 set_sb_oid_cursize(disk_sb,new_size) ;
188 }
189 /* move the smaller objectid map past the end of the new super */
190 for (i = new_size - 1 ; i >= 0 ; i--) {
191 objectid_map[i + (old_max - new_size)] = objectid_map[i] ;
192 }
193
194
195 /* set the max size so we don't overflow later */
196 set_sb_oid_maxsize(disk_sb,new_size) ;
197
198 /* Zero out label and generate random UUID */
199 memset(disk_sb->s_label, 0, sizeof(disk_sb->s_label)) ;
200 generate_random_uuid(disk_sb->s_uuid);
201
202 /* finally, zero out the unused chunk of the new super */
203 memset(disk_sb->s_unused, 0, sizeof(disk_sb->s_unused)) ;
204 return 0 ;
205} 207}
206
diff --git a/fs/reiserfs/prints.c b/fs/reiserfs/prints.c
index 16fdca1d4bd..d55e164bd5c 100644
--- a/fs/reiserfs/prints.c
+++ b/fs/reiserfs/prints.c
@@ -15,168 +15,166 @@ static char error_buf[1024];
15static char fmt_buf[1024]; 15static char fmt_buf[1024];
16static char off_buf[80]; 16static char off_buf[80];
17 17
18 18static char *reiserfs_cpu_offset(struct cpu_key *key)
19static char * reiserfs_cpu_offset (struct cpu_key * key)
20{ 19{
21 if (cpu_key_k_type(key) == TYPE_DIRENTRY) 20 if (cpu_key_k_type(key) == TYPE_DIRENTRY)
22 sprintf (off_buf, "%Lu(%Lu)", 21 sprintf(off_buf, "%Lu(%Lu)",
23 (unsigned long long)GET_HASH_VALUE (cpu_key_k_offset (key)), 22 (unsigned long long)
24 (unsigned long long)GET_GENERATION_NUMBER (cpu_key_k_offset (key))); 23 GET_HASH_VALUE(cpu_key_k_offset(key)),
25 else 24 (unsigned long long)
26 sprintf (off_buf, "0x%Lx", (unsigned long long)cpu_key_k_offset (key)); 25 GET_GENERATION_NUMBER(cpu_key_k_offset(key)));
27 return off_buf; 26 else
27 sprintf(off_buf, "0x%Lx",
28 (unsigned long long)cpu_key_k_offset(key));
29 return off_buf;
28} 30}
29 31
30 32static char *le_offset(struct reiserfs_key *key)
31static char * le_offset (struct reiserfs_key * key)
32{ 33{
33 int version; 34 int version;
34 35
35 version = le_key_version (key); 36 version = le_key_version(key);
36 if (le_key_k_type (version, key) == TYPE_DIRENTRY) 37 if (le_key_k_type(version, key) == TYPE_DIRENTRY)
37 sprintf (off_buf, "%Lu(%Lu)", 38 sprintf(off_buf, "%Lu(%Lu)",
38 (unsigned long long)GET_HASH_VALUE (le_key_k_offset (version, key)), 39 (unsigned long long)
39 (unsigned long long)GET_GENERATION_NUMBER (le_key_k_offset (version, key))); 40 GET_HASH_VALUE(le_key_k_offset(version, key)),
40 else 41 (unsigned long long)
41 sprintf (off_buf, "0x%Lx", (unsigned long long)le_key_k_offset (version, key)); 42 GET_GENERATION_NUMBER(le_key_k_offset(version, key)));
42 return off_buf; 43 else
44 sprintf(off_buf, "0x%Lx",
45 (unsigned long long)le_key_k_offset(version, key));
46 return off_buf;
43} 47}
44 48
45 49static char *cpu_type(struct cpu_key *key)
46static char * cpu_type (struct cpu_key * key)
47{ 50{
48 if (cpu_key_k_type (key) == TYPE_STAT_DATA) 51 if (cpu_key_k_type(key) == TYPE_STAT_DATA)
49 return "SD"; 52 return "SD";
50 if (cpu_key_k_type (key) == TYPE_DIRENTRY) 53 if (cpu_key_k_type(key) == TYPE_DIRENTRY)
51 return "DIR"; 54 return "DIR";
52 if (cpu_key_k_type (key) == TYPE_DIRECT) 55 if (cpu_key_k_type(key) == TYPE_DIRECT)
53 return "DIRECT"; 56 return "DIRECT";
54 if (cpu_key_k_type (key) == TYPE_INDIRECT) 57 if (cpu_key_k_type(key) == TYPE_INDIRECT)
55 return "IND"; 58 return "IND";
56 return "UNKNOWN"; 59 return "UNKNOWN";
57} 60}
58 61
59 62static char *le_type(struct reiserfs_key *key)
60static char * le_type (struct reiserfs_key * key)
61{ 63{
62 int version; 64 int version;
63
64 version = le_key_version (key);
65 65
66 if (le_key_k_type (version, key) == TYPE_STAT_DATA) 66 version = le_key_version(key);
67 return "SD";
68 if (le_key_k_type (version, key) == TYPE_DIRENTRY)
69 return "DIR";
70 if (le_key_k_type (version, key) == TYPE_DIRECT)
71 return "DIRECT";
72 if (le_key_k_type (version, key) == TYPE_INDIRECT)
73 return "IND";
74 return "UNKNOWN";
75}
76 67
68 if (le_key_k_type(version, key) == TYPE_STAT_DATA)
69 return "SD";
70 if (le_key_k_type(version, key) == TYPE_DIRENTRY)
71 return "DIR";
72 if (le_key_k_type(version, key) == TYPE_DIRECT)
73 return "DIRECT";
74 if (le_key_k_type(version, key) == TYPE_INDIRECT)
75 return "IND";
76 return "UNKNOWN";
77}
77 78
78/* %k */ 79/* %k */
79static void sprintf_le_key (char * buf, struct reiserfs_key * key) 80static void sprintf_le_key(char *buf, struct reiserfs_key *key)
80{ 81{
81 if (key) 82 if (key)
82 sprintf (buf, "[%d %d %s %s]", le32_to_cpu (key->k_dir_id), 83 sprintf(buf, "[%d %d %s %s]", le32_to_cpu(key->k_dir_id),
83 le32_to_cpu (key->k_objectid), le_offset (key), le_type (key)); 84 le32_to_cpu(key->k_objectid), le_offset(key),
84 else 85 le_type(key));
85 sprintf (buf, "[NULL]"); 86 else
87 sprintf(buf, "[NULL]");
86} 88}
87 89
88
89/* %K */ 90/* %K */
90static void sprintf_cpu_key (char * buf, struct cpu_key * key) 91static void sprintf_cpu_key(char *buf, struct cpu_key *key)
91{ 92{
92 if (key) 93 if (key)
93 sprintf (buf, "[%d %d %s %s]", key->on_disk_key.k_dir_id, 94 sprintf(buf, "[%d %d %s %s]", key->on_disk_key.k_dir_id,
94 key->on_disk_key.k_objectid, reiserfs_cpu_offset (key), 95 key->on_disk_key.k_objectid, reiserfs_cpu_offset(key),
95 cpu_type (key)); 96 cpu_type(key));
96 else 97 else
97 sprintf (buf, "[NULL]"); 98 sprintf(buf, "[NULL]");
98} 99}
99 100
100static void sprintf_de_head( char *buf, struct reiserfs_de_head *deh ) 101static void sprintf_de_head(char *buf, struct reiserfs_de_head *deh)
101{ 102{
102 if( deh ) 103 if (deh)
103 sprintf( buf, "[offset=%d dir_id=%d objectid=%d location=%d state=%04x]", deh_offset(deh), deh_dir_id(deh), 104 sprintf(buf,
104 deh_objectid(deh), deh_location(deh), deh_state(deh) ); 105 "[offset=%d dir_id=%d objectid=%d location=%d state=%04x]",
105 else 106 deh_offset(deh), deh_dir_id(deh), deh_objectid(deh),
106 sprintf( buf, "[NULL]" ); 107 deh_location(deh), deh_state(deh));
108 else
109 sprintf(buf, "[NULL]");
107 110
108} 111}
109 112
110static void sprintf_item_head (char * buf, struct item_head * ih) 113static void sprintf_item_head(char *buf, struct item_head *ih)
111{ 114{
112 if (ih) { 115 if (ih) {
113 strcpy (buf, (ih_version (ih) == KEY_FORMAT_3_6) ? "*3.6* " : "*3.5*"); 116 strcpy(buf,
114 sprintf_le_key (buf + strlen (buf), &(ih->ih_key)); 117 (ih_version(ih) == KEY_FORMAT_3_6) ? "*3.6* " : "*3.5*");
115 sprintf (buf + strlen (buf), ", item_len %d, item_location %d, " 118 sprintf_le_key(buf + strlen(buf), &(ih->ih_key));
116 "free_space(entry_count) %d", 119 sprintf(buf + strlen(buf), ", item_len %d, item_location %d, "
117 ih_item_len(ih), ih_location(ih), ih_free_space (ih)); 120 "free_space(entry_count) %d",
118 } else 121 ih_item_len(ih), ih_location(ih), ih_free_space(ih));
119 sprintf (buf, "[NULL]"); 122 } else
123 sprintf(buf, "[NULL]");
120} 124}
121 125
122 126static void sprintf_direntry(char *buf, struct reiserfs_dir_entry *de)
123static void sprintf_direntry (char * buf, struct reiserfs_dir_entry * de)
124{ 127{
125 char name[20]; 128 char name[20];
126 129
127 memcpy (name, de->de_name, de->de_namelen > 19 ? 19 : de->de_namelen); 130 memcpy(name, de->de_name, de->de_namelen > 19 ? 19 : de->de_namelen);
128 name [de->de_namelen > 19 ? 19 : de->de_namelen] = 0; 131 name[de->de_namelen > 19 ? 19 : de->de_namelen] = 0;
129 sprintf (buf, "\"%s\"==>[%d %d]", name, de->de_dir_id, de->de_objectid); 132 sprintf(buf, "\"%s\"==>[%d %d]", name, de->de_dir_id, de->de_objectid);
130} 133}
131 134
132 135static void sprintf_block_head(char *buf, struct buffer_head *bh)
133static void sprintf_block_head (char * buf, struct buffer_head * bh)
134{ 136{
135 sprintf (buf, "level=%d, nr_items=%d, free_space=%d rdkey ", 137 sprintf(buf, "level=%d, nr_items=%d, free_space=%d rdkey ",
136 B_LEVEL (bh), B_NR_ITEMS (bh), B_FREE_SPACE (bh)); 138 B_LEVEL(bh), B_NR_ITEMS(bh), B_FREE_SPACE(bh));
137} 139}
138 140
139 141static void sprintf_buffer_head(char *buf, struct buffer_head *bh)
140static void sprintf_buffer_head (char * buf, struct buffer_head * bh)
141{ 142{
142 char b[BDEVNAME_SIZE]; 143 char b[BDEVNAME_SIZE];
143 144
144 sprintf (buf, "dev %s, size %d, blocknr %llu, count %d, state 0x%lx, page %p, (%s, %s, %s)", 145 sprintf(buf,
145 bdevname (bh->b_bdev, b), bh->b_size, 146 "dev %s, size %d, blocknr %llu, count %d, state 0x%lx, page %p, (%s, %s, %s)",
146 (unsigned long long)bh->b_blocknr, 147 bdevname(bh->b_bdev, b), bh->b_size,
147 atomic_read (&(bh->b_count)), 148 (unsigned long long)bh->b_blocknr, atomic_read(&(bh->b_count)),
148 bh->b_state, bh->b_page, 149 bh->b_state, bh->b_page,
149 buffer_uptodate (bh) ? "UPTODATE" : "!UPTODATE", 150 buffer_uptodate(bh) ? "UPTODATE" : "!UPTODATE",
150 buffer_dirty (bh) ? "DIRTY" : "CLEAN", 151 buffer_dirty(bh) ? "DIRTY" : "CLEAN",
151 buffer_locked (bh) ? "LOCKED" : "UNLOCKED"); 152 buffer_locked(bh) ? "LOCKED" : "UNLOCKED");
152} 153}
153 154
154 155static void sprintf_disk_child(char *buf, struct disk_child *dc)
155static void sprintf_disk_child (char * buf, struct disk_child * dc)
156{ 156{
157 sprintf (buf, "[dc_number=%d, dc_size=%u]", dc_block_number(dc), dc_size(dc)); 157 sprintf(buf, "[dc_number=%d, dc_size=%u]", dc_block_number(dc),
158 dc_size(dc));
158} 159}
159 160
160 161static char *is_there_reiserfs_struct(char *fmt, int *what, int *skip)
161static char * is_there_reiserfs_struct (char * fmt, int * what, int * skip)
162{ 162{
163 char * k = fmt; 163 char *k = fmt;
164 164
165 *skip = 0; 165 *skip = 0;
166
167 while ((k = strchr (k, '%')) != NULL)
168 {
169 if (k[1] == 'k' || k[1] == 'K' || k[1] == 'h' || k[1] == 't' ||
170 k[1] == 'z' || k[1] == 'b' || k[1] == 'y' || k[1] == 'a' ) {
171 *what = k[1];
172 break;
173 }
174 (*skip) ++;
175 k ++;
176 }
177 return k;
178}
179 166
167 while ((k = strchr(k, '%')) != NULL) {
168 if (k[1] == 'k' || k[1] == 'K' || k[1] == 'h' || k[1] == 't' ||
169 k[1] == 'z' || k[1] == 'b' || k[1] == 'y' || k[1] == 'a') {
170 *what = k[1];
171 break;
172 }
173 (*skip)++;
174 k++;
175 }
176 return k;
177}
180 178
181/* debugging reiserfs we used to print out a lot of different 179/* debugging reiserfs we used to print out a lot of different
182 variables, like keys, item headers, buffer heads etc. Values of 180 variables, like keys, item headers, buffer heads etc. Values of
@@ -191,61 +189,64 @@ static char * is_there_reiserfs_struct (char * fmt, int * what, int * skip)
191 key->k_offset, key->k_uniqueness); 189 key->k_offset, key->k_uniqueness);
192*/ 190*/
193 191
194 192static void prepare_error_buf(const char *fmt, va_list args)
195static void 193{
196prepare_error_buf( const char *fmt, va_list args ) 194 char *fmt1 = fmt_buf;
197{ 195 char *k;
198 char * fmt1 = fmt_buf; 196 char *p = error_buf;
199 char * k; 197 int i, j, what, skip;
200 char * p = error_buf; 198
201 int i, j, what, skip; 199 strcpy(fmt1, fmt);
202 200
203 strcpy (fmt1, fmt); 201 while ((k = is_there_reiserfs_struct(fmt1, &what, &skip)) != NULL) {
204 202 *k = 0;
205 while( (k = is_there_reiserfs_struct( fmt1, &what, &skip )) != NULL ) 203
206 { 204 p += vsprintf(p, fmt1, args);
207 *k = 0; 205
208 206 for (i = 0; i < skip; i++)
209 p += vsprintf (p, fmt1, args); 207 j = va_arg(args, int);
210 208
211 for (i = 0; i < skip; i ++) 209 switch (what) {
212 j = va_arg (args, int); 210 case 'k':
213 211 sprintf_le_key(p, va_arg(args, struct reiserfs_key *));
214 switch (what) { 212 break;
215 case 'k': 213 case 'K':
216 sprintf_le_key (p, va_arg(args, struct reiserfs_key *)); 214 sprintf_cpu_key(p, va_arg(args, struct cpu_key *));
217 break; 215 break;
218 case 'K': 216 case 'h':
219 sprintf_cpu_key (p, va_arg(args, struct cpu_key *)); 217 sprintf_item_head(p, va_arg(args, struct item_head *));
220 break; 218 break;
221 case 'h': 219 case 't':
222 sprintf_item_head (p, va_arg(args, struct item_head *)); 220 sprintf_direntry(p,
223 break; 221 va_arg(args,
224 case 't': 222 struct reiserfs_dir_entry *));
225 sprintf_direntry (p, va_arg(args, struct reiserfs_dir_entry *)); 223 break;
226 break; 224 case 'y':
227 case 'y': 225 sprintf_disk_child(p,
228 sprintf_disk_child (p, va_arg(args, struct disk_child *)); 226 va_arg(args, struct disk_child *));
229 break; 227 break;
230 case 'z': 228 case 'z':
231 sprintf_block_head (p, va_arg(args, struct buffer_head *)); 229 sprintf_block_head(p,
232 break; 230 va_arg(args, struct buffer_head *));
233 case 'b': 231 break;
234 sprintf_buffer_head (p, va_arg(args, struct buffer_head *)); 232 case 'b':
235 break; 233 sprintf_buffer_head(p,
236 case 'a': 234 va_arg(args, struct buffer_head *));
237 sprintf_de_head (p, va_arg(args, struct reiserfs_de_head *)); 235 break;
238 break; 236 case 'a':
239 } 237 sprintf_de_head(p,
240 238 va_arg(args,
241 p += strlen (p); 239 struct reiserfs_de_head *));
242 fmt1 = k + 2; 240 break;
243 } 241 }
244 vsprintf (p, fmt1, args); 242
243 p += strlen(p);
244 fmt1 = k + 2;
245 }
246 vsprintf(p, fmt1, args);
245 247
246} 248}
247 249
248
249/* in addition to usual conversion specifiers this accepts reiserfs 250/* in addition to usual conversion specifiers this accepts reiserfs
250 specific conversion specifiers: 251 specific conversion specifiers:
251 %k to print little endian key, 252 %k to print little endian key,
@@ -264,43 +265,43 @@ prepare_error_buf( const char *fmt, va_list args )
264 va_end( args );\ 265 va_end( args );\
265} 266}
266 267
267void reiserfs_warning (struct super_block *sb, const char * fmt, ...) 268void reiserfs_warning(struct super_block *sb, const char *fmt, ...)
268{ 269{
269 do_reiserfs_warning(fmt); 270 do_reiserfs_warning(fmt);
270 if (sb) 271 if (sb)
271 printk (KERN_WARNING "ReiserFS: %s: warning: %s\n", 272 printk(KERN_WARNING "ReiserFS: %s: warning: %s\n",
272 reiserfs_bdevname (sb), error_buf); 273 reiserfs_bdevname(sb), error_buf);
273 else 274 else
274 printk (KERN_WARNING "ReiserFS: warning: %s\n", error_buf); 275 printk(KERN_WARNING "ReiserFS: warning: %s\n", error_buf);
275} 276}
276 277
277/* No newline.. reiserfs_info calls can be followed by printk's */ 278/* No newline.. reiserfs_info calls can be followed by printk's */
278void reiserfs_info (struct super_block *sb, const char * fmt, ...) 279void reiserfs_info(struct super_block *sb, const char *fmt, ...)
279{ 280{
280 do_reiserfs_warning(fmt); 281 do_reiserfs_warning(fmt);
281 if (sb) 282 if (sb)
282 printk (KERN_NOTICE "ReiserFS: %s: %s", 283 printk(KERN_NOTICE "ReiserFS: %s: %s",
283 reiserfs_bdevname (sb), error_buf); 284 reiserfs_bdevname(sb), error_buf);
284 else 285 else
285 printk (KERN_NOTICE "ReiserFS: %s", error_buf); 286 printk(KERN_NOTICE "ReiserFS: %s", error_buf);
286} 287}
287 288
288/* No newline.. reiserfs_printk calls can be followed by printk's */ 289/* No newline.. reiserfs_printk calls can be followed by printk's */
289static void reiserfs_printk (const char * fmt, ...) 290static void reiserfs_printk(const char *fmt, ...)
290{ 291{
291 do_reiserfs_warning(fmt); 292 do_reiserfs_warning(fmt);
292 printk (error_buf); 293 printk(error_buf);
293} 294}
294 295
295void reiserfs_debug (struct super_block *s, int level, const char * fmt, ...) 296void reiserfs_debug(struct super_block *s, int level, const char *fmt, ...)
296{ 297{
297#ifdef CONFIG_REISERFS_CHECK 298#ifdef CONFIG_REISERFS_CHECK
298 do_reiserfs_warning(fmt); 299 do_reiserfs_warning(fmt);
299 if (s) 300 if (s)
300 printk (KERN_DEBUG "ReiserFS: %s: %s\n", 301 printk(KERN_DEBUG "ReiserFS: %s: %s\n",
301 reiserfs_bdevname (s), error_buf); 302 reiserfs_bdevname(s), error_buf);
302 else 303 else
303 printk (KERN_DEBUG "ReiserFS: %s\n", error_buf); 304 printk(KERN_DEBUG "ReiserFS: %s\n", error_buf);
304#endif 305#endif
305} 306}
306 307
@@ -349,379 +350,403 @@ void reiserfs_debug (struct super_block *s, int level, const char * fmt, ...)
349 350
350 . */ 351 . */
351 352
352
353#ifdef CONFIG_REISERFS_CHECK 353#ifdef CONFIG_REISERFS_CHECK
354extern struct tree_balance * cur_tb; 354extern struct tree_balance *cur_tb;
355#endif 355#endif
356 356
357void reiserfs_panic (struct super_block * sb, const char * fmt, ...) 357void reiserfs_panic(struct super_block *sb, const char *fmt, ...)
358{ 358{
359 do_reiserfs_warning(fmt); 359 do_reiserfs_warning(fmt);
360 printk (KERN_EMERG "REISERFS: panic (device %s): %s\n", 360 printk(KERN_EMERG "REISERFS: panic (device %s): %s\n",
361 reiserfs_bdevname (sb), error_buf); 361 reiserfs_bdevname(sb), error_buf);
362 BUG (); 362 BUG();
363 363
364 /* this is not actually called, but makes reiserfs_panic() "noreturn" */ 364 /* this is not actually called, but makes reiserfs_panic() "noreturn" */
365 panic ("REISERFS: panic (device %s): %s\n", 365 panic("REISERFS: panic (device %s): %s\n",
366 reiserfs_bdevname (sb), error_buf); 366 reiserfs_bdevname(sb), error_buf);
367} 367}
368 368
369void 369void reiserfs_abort(struct super_block *sb, int errno, const char *fmt, ...)
370reiserfs_abort (struct super_block *sb, int errno, const char *fmt, ...)
371{ 370{
372 do_reiserfs_warning (fmt); 371 do_reiserfs_warning(fmt);
373 372
374 if (reiserfs_error_panic (sb)) { 373 if (reiserfs_error_panic(sb)) {
375 panic (KERN_CRIT "REISERFS: panic (device %s): %s\n", 374 panic(KERN_CRIT "REISERFS: panic (device %s): %s\n",
376 reiserfs_bdevname (sb), error_buf); 375 reiserfs_bdevname(sb), error_buf);
377 } 376 }
378 377
379 if (sb->s_flags & MS_RDONLY) 378 if (sb->s_flags & MS_RDONLY)
380 return; 379 return;
381 380
382 printk (KERN_CRIT "REISERFS: abort (device %s): %s\n", 381 printk(KERN_CRIT "REISERFS: abort (device %s): %s\n",
383 reiserfs_bdevname (sb), error_buf); 382 reiserfs_bdevname(sb), error_buf);
384 383
385 sb->s_flags |= MS_RDONLY; 384 sb->s_flags |= MS_RDONLY;
386 reiserfs_journal_abort (sb, errno); 385 reiserfs_journal_abort(sb, errno);
387} 386}
388 387
389/* this prints internal nodes (4 keys/items in line) (dc_number, 388/* this prints internal nodes (4 keys/items in line) (dc_number,
390 dc_size)[k_dirid, k_objectid, k_offset, k_uniqueness](dc_number, 389 dc_size)[k_dirid, k_objectid, k_offset, k_uniqueness](dc_number,
391 dc_size)...*/ 390 dc_size)...*/
392static int print_internal (struct buffer_head * bh, int first, int last) 391static int print_internal(struct buffer_head *bh, int first, int last)
393{ 392{
394 struct reiserfs_key * key; 393 struct reiserfs_key *key;
395 struct disk_child * dc; 394 struct disk_child *dc;
396 int i; 395 int i;
397 int from, to; 396 int from, to;
398
399 if (!B_IS_KEYS_LEVEL (bh))
400 return 1;
401
402 check_internal (bh);
403
404 if (first == -1) {
405 from = 0;
406 to = B_NR_ITEMS (bh);
407 } else {
408 from = first;
409 to = last < B_NR_ITEMS (bh) ? last : B_NR_ITEMS (bh);
410 }
411
412 reiserfs_printk ("INTERNAL NODE (%ld) contains %z\n", bh->b_blocknr, bh);
413
414 dc = B_N_CHILD (bh, from);
415 reiserfs_printk ("PTR %d: %y ", from, dc);
416
417 for (i = from, key = B_N_PDELIM_KEY (bh, from), dc ++; i < to; i ++, key ++, dc ++) {
418 reiserfs_printk ("KEY %d: %k PTR %d: %y ", i, key, i + 1, dc);
419 if (i && i % 4 == 0)
420 printk ("\n");
421 }
422 printk ("\n");
423 return 0;
424}
425 397
398 if (!B_IS_KEYS_LEVEL(bh))
399 return 1;
426 400
401 check_internal(bh);
427 402
403 if (first == -1) {
404 from = 0;
405 to = B_NR_ITEMS(bh);
406 } else {
407 from = first;
408 to = last < B_NR_ITEMS(bh) ? last : B_NR_ITEMS(bh);
409 }
428 410
411 reiserfs_printk("INTERNAL NODE (%ld) contains %z\n", bh->b_blocknr, bh);
429 412
430static int print_leaf (struct buffer_head * bh, int print_mode, int first, int last) 413 dc = B_N_CHILD(bh, from);
431{ 414 reiserfs_printk("PTR %d: %y ", from, dc);
432 struct block_head * blkh;
433 struct item_head * ih;
434 int i, nr;
435 int from, to;
436 415
437 if (!B_IS_ITEMS_LEVEL (bh)) 416 for (i = from, key = B_N_PDELIM_KEY(bh, from), dc++; i < to;
438 return 1; 417 i++, key++, dc++) {
418 reiserfs_printk("KEY %d: %k PTR %d: %y ", i, key, i + 1, dc);
419 if (i && i % 4 == 0)
420 printk("\n");
421 }
422 printk("\n");
423 return 0;
424}
439 425
440 check_leaf (bh); 426static int print_leaf(struct buffer_head *bh, int print_mode, int first,
427 int last)
428{
429 struct block_head *blkh;
430 struct item_head *ih;
431 int i, nr;
432 int from, to;
441 433
442 blkh = B_BLK_HEAD (bh); 434 if (!B_IS_ITEMS_LEVEL(bh))
443 ih = B_N_PITEM_HEAD (bh,0); 435 return 1;
444 nr = blkh_nr_item(blkh);
445 436
446 printk ("\n===================================================================\n"); 437 check_leaf(bh);
447 reiserfs_printk ("LEAF NODE (%ld) contains %z\n", bh->b_blocknr, bh);
448 438
449 if (!(print_mode & PRINT_LEAF_ITEMS)) { 439 blkh = B_BLK_HEAD(bh);
450 reiserfs_printk ("FIRST ITEM_KEY: %k, LAST ITEM KEY: %k\n", 440 ih = B_N_PITEM_HEAD(bh, 0);
451 &(ih->ih_key), &((ih + nr - 1)->ih_key)); 441 nr = blkh_nr_item(blkh);
452 return 0;
453 }
454 442
455 if (first < 0 || first > nr - 1) 443 printk
456 from = 0; 444 ("\n===================================================================\n");
457 else 445 reiserfs_printk("LEAF NODE (%ld) contains %z\n", bh->b_blocknr, bh);
458 from = first;
459 446
460 if (last < 0 || last > nr ) 447 if (!(print_mode & PRINT_LEAF_ITEMS)) {
461 to = nr; 448 reiserfs_printk("FIRST ITEM_KEY: %k, LAST ITEM KEY: %k\n",
462 else 449 &(ih->ih_key), &((ih + nr - 1)->ih_key));
463 to = last; 450 return 0;
451 }
464 452
465 ih += from; 453 if (first < 0 || first > nr - 1)
466 printk ("-------------------------------------------------------------------------------\n"); 454 from = 0;
467 printk ("|##| type | key | ilen | free_space | version | loc |\n"); 455 else
468 for (i = from; i < to; i++, ih ++) { 456 from = first;
469 printk ("-------------------------------------------------------------------------------\n"); 457
470 reiserfs_printk ("|%2d| %h |\n", i, ih); 458 if (last < 0 || last > nr)
471 if (print_mode & PRINT_LEAF_ITEMS) 459 to = nr;
472 op_print_item (ih, B_I_PITEM (bh, ih)); 460 else
473 } 461 to = last;
462
463 ih += from;
464 printk
465 ("-------------------------------------------------------------------------------\n");
466 printk
467 ("|##| type | key | ilen | free_space | version | loc |\n");
468 for (i = from; i < to; i++, ih++) {
469 printk
470 ("-------------------------------------------------------------------------------\n");
471 reiserfs_printk("|%2d| %h |\n", i, ih);
472 if (print_mode & PRINT_LEAF_ITEMS)
473 op_print_item(ih, B_I_PITEM(bh, ih));
474 }
474 475
475 printk ("===================================================================\n"); 476 printk
477 ("===================================================================\n");
476 478
477 return 0; 479 return 0;
478} 480}
479 481
480char * reiserfs_hashname(int code) 482char *reiserfs_hashname(int code)
481{ 483{
482 if ( code == YURA_HASH) 484 if (code == YURA_HASH)
483 return "rupasov"; 485 return "rupasov";
484 if ( code == TEA_HASH) 486 if (code == TEA_HASH)
485 return "tea"; 487 return "tea";
486 if ( code == R5_HASH) 488 if (code == R5_HASH)
487 return "r5"; 489 return "r5";
488 490
489 return "unknown"; 491 return "unknown";
490} 492}
491 493
492/* return 1 if this is not super block */ 494/* return 1 if this is not super block */
493static int print_super_block (struct buffer_head * bh) 495static int print_super_block(struct buffer_head *bh)
494{ 496{
495 struct reiserfs_super_block * rs = (struct reiserfs_super_block *)(bh->b_data); 497 struct reiserfs_super_block *rs =
496 int skipped, data_blocks; 498 (struct reiserfs_super_block *)(bh->b_data);
497 char *version; 499 int skipped, data_blocks;
498 char b[BDEVNAME_SIZE]; 500 char *version;
499 501 char b[BDEVNAME_SIZE];
500 if (is_reiserfs_3_5(rs)) { 502
501 version = "3.5"; 503 if (is_reiserfs_3_5(rs)) {
502 } else if (is_reiserfs_3_6(rs)) { 504 version = "3.5";
503 version = "3.6"; 505 } else if (is_reiserfs_3_6(rs)) {
504 } else if (is_reiserfs_jr(rs)) { 506 version = "3.6";
505 version = ((sb_version(rs) == REISERFS_VERSION_2) ? 507 } else if (is_reiserfs_jr(rs)) {
506 "3.6" : "3.5"); 508 version = ((sb_version(rs) == REISERFS_VERSION_2) ?
507 } else { 509 "3.6" : "3.5");
508 return 1; 510 } else {
509 } 511 return 1;
510 512 }
511 printk ("%s\'s super block is in block %llu\n", bdevname (bh->b_bdev, b), 513
512 (unsigned long long)bh->b_blocknr); 514 printk("%s\'s super block is in block %llu\n", bdevname(bh->b_bdev, b),
513 printk ("Reiserfs version %s\n", version ); 515 (unsigned long long)bh->b_blocknr);
514 printk ("Block count %u\n", sb_block_count(rs)); 516 printk("Reiserfs version %s\n", version);
515 printk ("Blocksize %d\n", sb_blocksize(rs)); 517 printk("Block count %u\n", sb_block_count(rs));
516 printk ("Free blocks %u\n", sb_free_blocks(rs)); 518 printk("Blocksize %d\n", sb_blocksize(rs));
517 // FIXME: this would be confusing if 519 printk("Free blocks %u\n", sb_free_blocks(rs));
518 // someone stores reiserfs super block in some data block ;) 520 // FIXME: this would be confusing if
521 // someone stores reiserfs super block in some data block ;)
519// skipped = (bh->b_blocknr * bh->b_size) / sb_blocksize(rs); 522// skipped = (bh->b_blocknr * bh->b_size) / sb_blocksize(rs);
520 skipped = bh->b_blocknr; 523 skipped = bh->b_blocknr;
521 data_blocks = sb_block_count(rs) - skipped - 1 - sb_bmap_nr(rs) - 524 data_blocks = sb_block_count(rs) - skipped - 1 - sb_bmap_nr(rs) -
522 (!is_reiserfs_jr(rs) ? sb_jp_journal_size(rs) + 1 : sb_reserved_for_journal(rs)) - 525 (!is_reiserfs_jr(rs) ? sb_jp_journal_size(rs) +
523 sb_free_blocks(rs); 526 1 : sb_reserved_for_journal(rs)) - sb_free_blocks(rs);
524 printk ("Busy blocks (skipped %d, bitmaps - %d, journal (or reserved) blocks - %d\n" 527 printk
525 "1 super block, %d data blocks\n", 528 ("Busy blocks (skipped %d, bitmaps - %d, journal (or reserved) blocks - %d\n"
526 skipped, sb_bmap_nr(rs), (!is_reiserfs_jr(rs) ? (sb_jp_journal_size(rs) + 1) : 529 "1 super block, %d data blocks\n", skipped, sb_bmap_nr(rs),
527 sb_reserved_for_journal(rs)) , data_blocks); 530 (!is_reiserfs_jr(rs) ? (sb_jp_journal_size(rs) + 1) :
528 printk ("Root block %u\n", sb_root_block(rs)); 531 sb_reserved_for_journal(rs)), data_blocks);
529 printk ("Journal block (first) %d\n", sb_jp_journal_1st_block(rs)); 532 printk("Root block %u\n", sb_root_block(rs));
530 printk ("Journal dev %d\n", sb_jp_journal_dev(rs)); 533 printk("Journal block (first) %d\n", sb_jp_journal_1st_block(rs));
531 printk ("Journal orig size %d\n", sb_jp_journal_size(rs)); 534 printk("Journal dev %d\n", sb_jp_journal_dev(rs));
532 printk ("FS state %d\n", sb_fs_state(rs)); 535 printk("Journal orig size %d\n", sb_jp_journal_size(rs));
533 printk ("Hash function \"%s\"\n", 536 printk("FS state %d\n", sb_fs_state(rs));
534 reiserfs_hashname(sb_hash_function_code(rs))); 537 printk("Hash function \"%s\"\n",
535 538 reiserfs_hashname(sb_hash_function_code(rs)));
536 printk ("Tree height %d\n", sb_tree_height(rs)); 539
537 return 0; 540 printk("Tree height %d\n", sb_tree_height(rs));
541 return 0;
538} 542}
539 543
540static int print_desc_block (struct buffer_head * bh) 544static int print_desc_block(struct buffer_head *bh)
541{ 545{
542 struct reiserfs_journal_desc * desc; 546 struct reiserfs_journal_desc *desc;
543 547
544 if (memcmp(get_journal_desc_magic (bh), JOURNAL_DESC_MAGIC, 8)) 548 if (memcmp(get_journal_desc_magic(bh), JOURNAL_DESC_MAGIC, 8))
545 return 1; 549 return 1;
546 550
547 desc = (struct reiserfs_journal_desc *)(bh->b_data); 551 desc = (struct reiserfs_journal_desc *)(bh->b_data);
548 printk ("Desc block %llu (j_trans_id %d, j_mount_id %d, j_len %d)", 552 printk("Desc block %llu (j_trans_id %d, j_mount_id %d, j_len %d)",
549 (unsigned long long)bh->b_blocknr, get_desc_trans_id (desc), get_desc_mount_id (desc), 553 (unsigned long long)bh->b_blocknr, get_desc_trans_id(desc),
550 get_desc_trans_len (desc)); 554 get_desc_mount_id(desc), get_desc_trans_len(desc));
551 555
552 return 0; 556 return 0;
553} 557}
554 558
555 559void print_block(struct buffer_head *bh, ...) //int print_mode, int first, int last)
556void print_block (struct buffer_head * bh, ...)//int print_mode, int first, int last)
557{ 560{
558 va_list args; 561 va_list args;
559 int mode, first, last; 562 int mode, first, last;
560 563
561 va_start (args, bh); 564 va_start(args, bh);
562 565
563 if ( ! bh ) { 566 if (!bh) {
564 printk("print_block: buffer is NULL\n"); 567 printk("print_block: buffer is NULL\n");
565 return; 568 return;
566 } 569 }
567 570
568 mode = va_arg (args, int); 571 mode = va_arg(args, int);
569 first = va_arg (args, int); 572 first = va_arg(args, int);
570 last = va_arg (args, int); 573 last = va_arg(args, int);
571 if (print_leaf (bh, mode, first, last)) 574 if (print_leaf(bh, mode, first, last))
572 if (print_internal (bh, first, last)) 575 if (print_internal(bh, first, last))
573 if (print_super_block (bh)) 576 if (print_super_block(bh))
574 if (print_desc_block (bh)) 577 if (print_desc_block(bh))
575 printk ("Block %llu contains unformatted data\n", (unsigned long long)bh->b_blocknr); 578 printk
579 ("Block %llu contains unformatted data\n",
580 (unsigned long long)bh->b_blocknr);
576} 581}
577 582
578
579
580static char print_tb_buf[2048]; 583static char print_tb_buf[2048];
581 584
582/* this stores initial state of tree balance in the print_tb_buf */ 585/* this stores initial state of tree balance in the print_tb_buf */
583void store_print_tb (struct tree_balance * tb) 586void store_print_tb(struct tree_balance *tb)
584{ 587{
585 int h = 0; 588 int h = 0;
586 int i; 589 int i;
587 struct buffer_head * tbSh, * tbFh; 590 struct buffer_head *tbSh, *tbFh;
588 591
589 if (!tb) 592 if (!tb)
590 return; 593 return;
591 594
592 sprintf (print_tb_buf, "\n" 595 sprintf(print_tb_buf, "\n"
593 "BALANCING %d\n" 596 "BALANCING %d\n"
594 "MODE=%c, ITEM_POS=%d POS_IN_ITEM=%d\n" 597 "MODE=%c, ITEM_POS=%d POS_IN_ITEM=%d\n"
595 "=====================================================================\n" 598 "=====================================================================\n"
596 "* h * S * L * R * F * FL * FR * CFL * CFR *\n", 599 "* h * S * L * R * F * FL * FR * CFL * CFR *\n",
597 REISERFS_SB(tb->tb_sb)->s_do_balance, 600 REISERFS_SB(tb->tb_sb)->s_do_balance,
598 tb->tb_mode, PATH_LAST_POSITION (tb->tb_path), tb->tb_path->pos_in_item); 601 tb->tb_mode, PATH_LAST_POSITION(tb->tb_path),
599 602 tb->tb_path->pos_in_item);
600 for (h = 0; h < sizeof(tb->insert_size) / sizeof (tb->insert_size[0]); h ++) { 603
601 if (PATH_H_PATH_OFFSET (tb->tb_path, h) <= tb->tb_path->path_length && 604 for (h = 0; h < sizeof(tb->insert_size) / sizeof(tb->insert_size[0]);
602 PATH_H_PATH_OFFSET (tb->tb_path, h) > ILLEGAL_PATH_ELEMENT_OFFSET) { 605 h++) {
603 tbSh = PATH_H_PBUFFER (tb->tb_path, h); 606 if (PATH_H_PATH_OFFSET(tb->tb_path, h) <=
604 tbFh = PATH_H_PPARENT (tb->tb_path, h); 607 tb->tb_path->path_length
605 } else { 608 && PATH_H_PATH_OFFSET(tb->tb_path,
606 tbSh = NULL; 609 h) > ILLEGAL_PATH_ELEMENT_OFFSET) {
607 tbFh = NULL; 610 tbSh = PATH_H_PBUFFER(tb->tb_path, h);
611 tbFh = PATH_H_PPARENT(tb->tb_path, h);
612 } else {
613 tbSh = NULL;
614 tbFh = NULL;
615 }
616 sprintf(print_tb_buf + strlen(print_tb_buf),
617 "* %d * %3lld(%2d) * %3lld(%2d) * %3lld(%2d) * %5lld * %5lld * %5lld * %5lld * %5lld *\n",
618 h,
619 (tbSh) ? (long long)(tbSh->b_blocknr) : (-1LL),
620 (tbSh) ? atomic_read(&(tbSh->b_count)) : -1,
621 (tb->L[h]) ? (long long)(tb->L[h]->b_blocknr) : (-1LL),
622 (tb->L[h]) ? atomic_read(&(tb->L[h]->b_count)) : -1,
623 (tb->R[h]) ? (long long)(tb->R[h]->b_blocknr) : (-1LL),
624 (tb->R[h]) ? atomic_read(&(tb->R[h]->b_count)) : -1,
625 (tbFh) ? (long long)(tbFh->b_blocknr) : (-1LL),
626 (tb->FL[h]) ? (long long)(tb->FL[h]->
627 b_blocknr) : (-1LL),
628 (tb->FR[h]) ? (long long)(tb->FR[h]->
629 b_blocknr) : (-1LL),
630 (tb->CFL[h]) ? (long long)(tb->CFL[h]->
631 b_blocknr) : (-1LL),
632 (tb->CFR[h]) ? (long long)(tb->CFR[h]->
633 b_blocknr) : (-1LL));
608 } 634 }
609 sprintf (print_tb_buf + strlen (print_tb_buf),
610 "* %d * %3lld(%2d) * %3lld(%2d) * %3lld(%2d) * %5lld * %5lld * %5lld * %5lld * %5lld *\n",
611 h,
612 (tbSh) ? (long long)(tbSh->b_blocknr):(-1LL),
613 (tbSh) ? atomic_read (&(tbSh->b_count)) : -1,
614 (tb->L[h]) ? (long long)(tb->L[h]->b_blocknr):(-1LL),
615 (tb->L[h]) ? atomic_read (&(tb->L[h]->b_count)) : -1,
616 (tb->R[h]) ? (long long)(tb->R[h]->b_blocknr):(-1LL),
617 (tb->R[h]) ? atomic_read (&(tb->R[h]->b_count)) : -1,
618 (tbFh) ? (long long)(tbFh->b_blocknr):(-1LL),
619 (tb->FL[h]) ? (long long)(tb->FL[h]->b_blocknr):(-1LL),
620 (tb->FR[h]) ? (long long)(tb->FR[h]->b_blocknr):(-1LL),
621 (tb->CFL[h]) ? (long long)(tb->CFL[h]->b_blocknr):(-1LL),
622 (tb->CFR[h]) ? (long long)(tb->CFR[h]->b_blocknr):(-1LL));
623 }
624
625 sprintf (print_tb_buf + strlen (print_tb_buf),
626 "=====================================================================\n"
627 "* h * size * ln * lb * rn * rb * blkn * s0 * s1 * s1b * s2 * s2b * curb * lk * rk *\n"
628 "* 0 * %4d * %2d * %2d * %2d * %2d * %4d * %2d * %2d * %3d * %2d * %3d * %4d * %2d * %2d *\n",
629 tb->insert_size[0], tb->lnum[0], tb->lbytes, tb->rnum[0],tb->rbytes, tb->blknum[0],
630 tb->s0num, tb->s1num,tb->s1bytes, tb->s2num, tb->s2bytes, tb->cur_blknum, tb->lkey[0], tb->rkey[0]);
631
632 /* this prints balance parameters for non-leaf levels */
633 h = 0;
634 do {
635 h++;
636 sprintf (print_tb_buf + strlen (print_tb_buf),
637 "* %d * %4d * %2d * * %2d * * %2d *\n",
638 h, tb->insert_size[h], tb->lnum[h], tb->rnum[h], tb->blknum[h]);
639 } while (tb->insert_size[h]);
640
641 sprintf (print_tb_buf + strlen (print_tb_buf),
642 "=====================================================================\n"
643 "FEB list: ");
644
645 /* print FEB list (list of buffers in form (bh (b_blocknr, b_count), that will be used for new nodes) */
646 h = 0;
647 for (i = 0; i < sizeof (tb->FEB) / sizeof (tb->FEB[0]); i ++)
648 sprintf (print_tb_buf + strlen (print_tb_buf),
649 "%p (%llu %d)%s", tb->FEB[i], tb->FEB[i] ? (unsigned long long)tb->FEB[i]->b_blocknr : 0ULL,
650 tb->FEB[i] ? atomic_read (&(tb->FEB[i]->b_count)) : 0,
651 (i == sizeof (tb->FEB) / sizeof (tb->FEB[0]) - 1) ? "\n" : ", ");
652
653 sprintf (print_tb_buf + strlen (print_tb_buf),
654 "======================== the end ====================================\n");
655}
656
657void print_cur_tb (char * mes)
658{
659 printk ("%s\n%s", mes, print_tb_buf);
660}
661
662static void check_leaf_block_head (struct buffer_head * bh)
663{
664 struct block_head * blkh;
665 int nr;
666
667 blkh = B_BLK_HEAD (bh);
668 nr = blkh_nr_item(blkh);
669 if ( nr > (bh->b_size - BLKH_SIZE) / IH_SIZE)
670 reiserfs_panic (NULL, "vs-6010: check_leaf_block_head: invalid item number %z", bh);
671 if ( blkh_free_space(blkh) >
672 bh->b_size - BLKH_SIZE - IH_SIZE * nr )
673 reiserfs_panic (NULL, "vs-6020: check_leaf_block_head: invalid free space %z", bh);
674
675}
676 635
677static void check_internal_block_head (struct buffer_head * bh) 636 sprintf(print_tb_buf + strlen(print_tb_buf),
678{ 637 "=====================================================================\n"
679 struct block_head * blkh; 638 "* h * size * ln * lb * rn * rb * blkn * s0 * s1 * s1b * s2 * s2b * curb * lk * rk *\n"
680 639 "* 0 * %4d * %2d * %2d * %2d * %2d * %4d * %2d * %2d * %3d * %2d * %3d * %4d * %2d * %2d *\n",
681 blkh = B_BLK_HEAD (bh); 640 tb->insert_size[0], tb->lnum[0], tb->lbytes, tb->rnum[0],
682 if (!(B_LEVEL (bh) > DISK_LEAF_NODE_LEVEL && B_LEVEL (bh) <= MAX_HEIGHT)) 641 tb->rbytes, tb->blknum[0], tb->s0num, tb->s1num, tb->s1bytes,
683 reiserfs_panic (NULL, "vs-6025: check_internal_block_head: invalid level %z", bh); 642 tb->s2num, tb->s2bytes, tb->cur_blknum, tb->lkey[0],
643 tb->rkey[0]);
644
645 /* this prints balance parameters for non-leaf levels */
646 h = 0;
647 do {
648 h++;
649 sprintf(print_tb_buf + strlen(print_tb_buf),
650 "* %d * %4d * %2d * * %2d * * %2d *\n",
651 h, tb->insert_size[h], tb->lnum[h], tb->rnum[h],
652 tb->blknum[h]);
653 } while (tb->insert_size[h]);
684 654
685 if (B_NR_ITEMS (bh) > (bh->b_size - BLKH_SIZE) / IH_SIZE) 655 sprintf(print_tb_buf + strlen(print_tb_buf),
686 reiserfs_panic (NULL, "vs-6030: check_internal_block_head: invalid item number %z", bh); 656 "=====================================================================\n"
657 "FEB list: ");
687 658
688 if (B_FREE_SPACE (bh) != 659 /* print FEB list (list of buffers in form (bh (b_blocknr, b_count), that will be used for new nodes) */
689 bh->b_size - BLKH_SIZE - KEY_SIZE * B_NR_ITEMS (bh) - DC_SIZE * (B_NR_ITEMS (bh) + 1)) 660 h = 0;
690 reiserfs_panic (NULL, "vs-6040: check_internal_block_head: invalid free space %z", bh); 661 for (i = 0; i < sizeof(tb->FEB) / sizeof(tb->FEB[0]); i++)
662 sprintf(print_tb_buf + strlen(print_tb_buf),
663 "%p (%llu %d)%s", tb->FEB[i],
664 tb->FEB[i] ? (unsigned long long)tb->FEB[i]->
665 b_blocknr : 0ULL,
666 tb->FEB[i] ? atomic_read(&(tb->FEB[i]->b_count)) : 0,
667 (i ==
668 sizeof(tb->FEB) / sizeof(tb->FEB[0]) -
669 1) ? "\n" : ", ");
691 670
671 sprintf(print_tb_buf + strlen(print_tb_buf),
672 "======================== the end ====================================\n");
692} 673}
693 674
675void print_cur_tb(char *mes)
676{
677 printk("%s\n%s", mes, print_tb_buf);
678}
694 679
695void check_leaf (struct buffer_head * bh) 680static void check_leaf_block_head(struct buffer_head *bh)
696{ 681{
697 int i; 682 struct block_head *blkh;
698 struct item_head * ih; 683 int nr;
684
685 blkh = B_BLK_HEAD(bh);
686 nr = blkh_nr_item(blkh);
687 if (nr > (bh->b_size - BLKH_SIZE) / IH_SIZE)
688 reiserfs_panic(NULL,
689 "vs-6010: check_leaf_block_head: invalid item number %z",
690 bh);
691 if (blkh_free_space(blkh) > bh->b_size - BLKH_SIZE - IH_SIZE * nr)
692 reiserfs_panic(NULL,
693 "vs-6020: check_leaf_block_head: invalid free space %z",
694 bh);
699 695
700 if (!bh)
701 return;
702 check_leaf_block_head (bh);
703 for (i = 0, ih = B_N_PITEM_HEAD (bh, 0); i < B_NR_ITEMS (bh); i ++, ih ++)
704 op_check_item (ih, B_I_PITEM (bh, ih));
705} 696}
706 697
698static void check_internal_block_head(struct buffer_head *bh)
699{
700 struct block_head *blkh;
701
702 blkh = B_BLK_HEAD(bh);
703 if (!(B_LEVEL(bh) > DISK_LEAF_NODE_LEVEL && B_LEVEL(bh) <= MAX_HEIGHT))
704 reiserfs_panic(NULL,
705 "vs-6025: check_internal_block_head: invalid level %z",
706 bh);
707
708 if (B_NR_ITEMS(bh) > (bh->b_size - BLKH_SIZE) / IH_SIZE)
709 reiserfs_panic(NULL,
710 "vs-6030: check_internal_block_head: invalid item number %z",
711 bh);
712
713 if (B_FREE_SPACE(bh) !=
714 bh->b_size - BLKH_SIZE - KEY_SIZE * B_NR_ITEMS(bh) -
715 DC_SIZE * (B_NR_ITEMS(bh) + 1))
716 reiserfs_panic(NULL,
717 "vs-6040: check_internal_block_head: invalid free space %z",
718 bh);
719
720}
707 721
708void check_internal (struct buffer_head * bh) 722void check_leaf(struct buffer_head *bh)
709{ 723{
710 if (!bh) 724 int i;
711 return; 725 struct item_head *ih;
712 check_internal_block_head (bh); 726
727 if (!bh)
728 return;
729 check_leaf_block_head(bh);
730 for (i = 0, ih = B_N_PITEM_HEAD(bh, 0); i < B_NR_ITEMS(bh); i++, ih++)
731 op_check_item(ih, B_I_PITEM(bh, ih));
713} 732}
714 733
734void check_internal(struct buffer_head *bh)
735{
736 if (!bh)
737 return;
738 check_internal_block_head(bh);
739}
715 740
716void print_statistics (struct super_block * s) 741void print_statistics(struct super_block *s)
717{ 742{
718 743
719 /* 744 /*
720 printk ("reiserfs_put_super: session statistics: balances %d, fix_nodes %d, \ 745 printk ("reiserfs_put_super: session statistics: balances %d, fix_nodes %d, \
721bmap with search %d, without %d, dir2ind %d, ind2dir %d\n", 746 bmap with search %d, without %d, dir2ind %d, ind2dir %d\n",
722 REISERFS_SB(s)->s_do_balance, REISERFS_SB(s)->s_fix_nodes, 747 REISERFS_SB(s)->s_do_balance, REISERFS_SB(s)->s_fix_nodes,
723 REISERFS_SB(s)->s_bmaps, REISERFS_SB(s)->s_bmaps_without_search, 748 REISERFS_SB(s)->s_bmaps, REISERFS_SB(s)->s_bmaps_without_search,
724 REISERFS_SB(s)->s_direct2indirect, REISERFS_SB(s)->s_indirect2direct); 749 REISERFS_SB(s)->s_direct2indirect, REISERFS_SB(s)->s_indirect2direct);
725 */ 750 */
726 751
727} 752}
diff --git a/fs/reiserfs/procfs.c b/fs/reiserfs/procfs.c
index e242ebc7f6f..fc2f43c75df 100644
--- a/fs/reiserfs/procfs.c
+++ b/fs/reiserfs/procfs.c
@@ -33,28 +33,27 @@
33static int show_version(struct seq_file *m, struct super_block *sb) 33static int show_version(struct seq_file *m, struct super_block *sb)
34{ 34{
35 char *format; 35 char *format;
36 36
37 if ( REISERFS_SB(sb)->s_properties & (1 << REISERFS_3_6) ) { 37 if (REISERFS_SB(sb)->s_properties & (1 << REISERFS_3_6)) {
38 format = "3.6"; 38 format = "3.6";
39 } else if ( REISERFS_SB(sb)->s_properties & (1 << REISERFS_3_5) ) { 39 } else if (REISERFS_SB(sb)->s_properties & (1 << REISERFS_3_5)) {
40 format = "3.5"; 40 format = "3.5";
41 } else { 41 } else {
42 format = "unknown"; 42 format = "unknown";
43 } 43 }
44 44
45 seq_printf(m, "%s format\twith checks %s\n", 45 seq_printf(m, "%s format\twith checks %s\n", format,
46 format,
47#if defined( CONFIG_REISERFS_CHECK ) 46#if defined( CONFIG_REISERFS_CHECK )
48 "on" 47 "on"
49#else 48#else
50 "off" 49 "off"
51#endif 50#endif
52 ); 51 );
53 return 0; 52 return 0;
54} 53}
55 54
56int reiserfs_global_version_in_proc( char *buffer, char **start, off_t offset, 55int reiserfs_global_version_in_proc(char *buffer, char **start, off_t offset,
57 int count, int *eof, void *data ) 56 int count, int *eof, void *data)
58{ 57{
59 *start = buffer; 58 *start = buffer;
60 *eof = 1; 59 *eof = 1;
@@ -79,87 +78,68 @@ int reiserfs_global_version_in_proc( char *buffer, char **start, off_t offset,
79 78
80#define DJF( x ) le32_to_cpu( rs -> x ) 79#define DJF( x ) le32_to_cpu( rs -> x )
81#define DJV( x ) le32_to_cpu( s_v1 -> x ) 80#define DJV( x ) le32_to_cpu( s_v1 -> x )
82#define DJP( x ) le32_to_cpu( jp -> x ) 81#define DJP( x ) le32_to_cpu( jp -> x )
83#define JF( x ) ( r -> s_journal -> x ) 82#define JF( x ) ( r -> s_journal -> x )
84 83
85static int show_super(struct seq_file *m, struct super_block *sb) 84static int show_super(struct seq_file *m, struct super_block *sb)
86{ 85{
87 struct reiserfs_sb_info *r = REISERFS_SB(sb); 86 struct reiserfs_sb_info *r = REISERFS_SB(sb);
88 87
89 seq_printf(m, "state: \t%s\n" 88 seq_printf(m, "state: \t%s\n"
90 "mount options: \t%s%s%s%s%s%s%s%s%s%s%s\n" 89 "mount options: \t%s%s%s%s%s%s%s%s%s%s%s\n"
91 "gen. counter: \t%i\n" 90 "gen. counter: \t%i\n"
92 "s_kmallocs: \t%i\n" 91 "s_kmallocs: \t%i\n"
93 "s_disk_reads: \t%i\n" 92 "s_disk_reads: \t%i\n"
94 "s_disk_writes: \t%i\n" 93 "s_disk_writes: \t%i\n"
95 "s_fix_nodes: \t%i\n" 94 "s_fix_nodes: \t%i\n"
96 "s_do_balance: \t%i\n" 95 "s_do_balance: \t%i\n"
97 "s_unneeded_left_neighbor: \t%i\n" 96 "s_unneeded_left_neighbor: \t%i\n"
98 "s_good_search_by_key_reada: \t%i\n" 97 "s_good_search_by_key_reada: \t%i\n"
99 "s_bmaps: \t%i\n" 98 "s_bmaps: \t%i\n"
100 "s_bmaps_without_search: \t%i\n" 99 "s_bmaps_without_search: \t%i\n"
101 "s_direct2indirect: \t%i\n" 100 "s_direct2indirect: \t%i\n"
102 "s_indirect2direct: \t%i\n" 101 "s_indirect2direct: \t%i\n"
103 "\n" 102 "\n"
104 "max_hash_collisions: \t%i\n" 103 "max_hash_collisions: \t%i\n"
105 104 "breads: \t%lu\n"
106 "breads: \t%lu\n" 105 "bread_misses: \t%lu\n"
107 "bread_misses: \t%lu\n" 106 "search_by_key: \t%lu\n"
108 107 "search_by_key_fs_changed: \t%lu\n"
109 "search_by_key: \t%lu\n" 108 "search_by_key_restarted: \t%lu\n"
110 "search_by_key_fs_changed: \t%lu\n" 109 "insert_item_restarted: \t%lu\n"
111 "search_by_key_restarted: \t%lu\n" 110 "paste_into_item_restarted: \t%lu\n"
112 111 "cut_from_item_restarted: \t%lu\n"
113 "insert_item_restarted: \t%lu\n" 112 "delete_solid_item_restarted: \t%lu\n"
114 "paste_into_item_restarted: \t%lu\n" 113 "delete_item_restarted: \t%lu\n"
115 "cut_from_item_restarted: \t%lu\n" 114 "leaked_oid: \t%lu\n"
116 "delete_solid_item_restarted: \t%lu\n" 115 "leaves_removable: \t%lu\n",
117 "delete_item_restarted: \t%lu\n" 116 SF(s_mount_state) == REISERFS_VALID_FS ?
118 117 "REISERFS_VALID_FS" : "REISERFS_ERROR_FS",
119 "leaked_oid: \t%lu\n" 118 reiserfs_r5_hash(sb) ? "FORCE_R5 " : "",
120 "leaves_removable: \t%lu\n", 119 reiserfs_rupasov_hash(sb) ? "FORCE_RUPASOV " : "",
121 120 reiserfs_tea_hash(sb) ? "FORCE_TEA " : "",
122 SF( s_mount_state ) == REISERFS_VALID_FS ? 121 reiserfs_hash_detect(sb) ? "DETECT_HASH " : "",
123 "REISERFS_VALID_FS" : "REISERFS_ERROR_FS", 122 reiserfs_no_border(sb) ? "NO_BORDER " : "BORDER ",
124 reiserfs_r5_hash( sb ) ? "FORCE_R5 " : "", 123 reiserfs_no_unhashed_relocation(sb) ?
125 reiserfs_rupasov_hash( sb ) ? "FORCE_RUPASOV " : "", 124 "NO_UNHASHED_RELOCATION " : "",
126 reiserfs_tea_hash( sb ) ? "FORCE_TEA " : "", 125 reiserfs_hashed_relocation(sb) ? "UNHASHED_RELOCATION " : "",
127 reiserfs_hash_detect( sb ) ? "DETECT_HASH " : "", 126 reiserfs_test4(sb) ? "TEST4 " : "",
128 reiserfs_no_border( sb ) ? "NO_BORDER " : "BORDER ", 127 have_large_tails(sb) ? "TAILS " : have_small_tails(sb) ?
129 reiserfs_no_unhashed_relocation( sb ) ? "NO_UNHASHED_RELOCATION " : "", 128 "SMALL_TAILS " : "NO_TAILS ",
130 reiserfs_hashed_relocation( sb ) ? "UNHASHED_RELOCATION " : "", 129 replay_only(sb) ? "REPLAY_ONLY " : "",
131 reiserfs_test4( sb ) ? "TEST4 " : "", 130 convert_reiserfs(sb) ? "CONV " : "",
132 have_large_tails( sb ) ? "TAILS " : have_small_tails(sb)?"SMALL_TAILS ":"NO_TAILS ", 131 atomic_read(&r->s_generation_counter), SF(s_kmallocs),
133 replay_only( sb ) ? "REPLAY_ONLY " : "", 132 SF(s_disk_reads), SF(s_disk_writes), SF(s_fix_nodes),
134 convert_reiserfs( sb ) ? "CONV " : "", 133 SF(s_do_balance), SF(s_unneeded_left_neighbor),
135 134 SF(s_good_search_by_key_reada), SF(s_bmaps),
136 atomic_read( &r -> s_generation_counter ), 135 SF(s_bmaps_without_search), SF(s_direct2indirect),
137 SF( s_kmallocs ), 136 SF(s_indirect2direct), SFP(max_hash_collisions), SFP(breads),
138 SF( s_disk_reads ), 137 SFP(bread_miss), SFP(search_by_key),
139 SF( s_disk_writes ), 138 SFP(search_by_key_fs_changed), SFP(search_by_key_restarted),
140 SF( s_fix_nodes ), 139 SFP(insert_item_restarted), SFP(paste_into_item_restarted),
141 SF( s_do_balance ), 140 SFP(cut_from_item_restarted),
142 SF( s_unneeded_left_neighbor ), 141 SFP(delete_solid_item_restarted), SFP(delete_item_restarted),
143 SF( s_good_search_by_key_reada ), 142 SFP(leaked_oid), SFP(leaves_removable));
144 SF( s_bmaps ),
145 SF( s_bmaps_without_search ),
146 SF( s_direct2indirect ),
147 SF( s_indirect2direct ),
148 SFP( max_hash_collisions ),
149 SFP( breads ),
150 SFP( bread_miss ),
151 SFP( search_by_key ),
152 SFP( search_by_key_fs_changed ),
153 SFP( search_by_key_restarted ),
154
155 SFP( insert_item_restarted ),
156 SFP( paste_into_item_restarted ),
157 SFP( cut_from_item_restarted ),
158 SFP( delete_solid_item_restarted ),
159 SFP( delete_item_restarted ),
160
161 SFP( leaked_oid ),
162 SFP( leaves_removable ) );
163 143
164 return 0; 144 return 0;
165} 145}
@@ -169,61 +149,55 @@ static int show_per_level(struct seq_file *m, struct super_block *sb)
169 struct reiserfs_sb_info *r = REISERFS_SB(sb); 149 struct reiserfs_sb_info *r = REISERFS_SB(sb);
170 int level; 150 int level;
171 151
172 seq_printf(m, "level\t" 152 seq_printf(m, "level\t"
173 " balances" 153 " balances"
174 " [sbk: reads" 154 " [sbk: reads"
175 " fs_changed" 155 " fs_changed"
176 " restarted]" 156 " restarted]"
177 " free space" 157 " free space"
178 " items" 158 " items"
179 " can_remove" 159 " can_remove"
180 " lnum" 160 " lnum"
181 " rnum" 161 " rnum"
182 " lbytes" 162 " lbytes"
183 " rbytes" 163 " rbytes"
184 " get_neig" 164 " get_neig"
185 " get_neig_res" 165 " get_neig_res" " need_l_neig" " need_r_neig" "\n");
186 " need_l_neig" 166
187 " need_r_neig" 167 for (level = 0; level < MAX_HEIGHT; ++level) {
188 "\n" 168 seq_printf(m, "%i\t"
189 169 " %12lu"
190 ); 170 " %12lu"
191 171 " %12lu"
192 for( level = 0 ; level < MAX_HEIGHT ; ++ level ) { 172 " %12lu"
193 seq_printf(m, "%i\t" 173 " %12lu"
194 " %12lu" 174 " %12lu"
195 " %12lu" 175 " %12lu"
196 " %12lu" 176 " %12li"
197 " %12lu" 177 " %12li"
198 " %12lu" 178 " %12li"
199 " %12lu" 179 " %12li"
200 " %12lu" 180 " %12lu"
201 " %12li" 181 " %12lu"
202 " %12li" 182 " %12lu"
203 " %12li" 183 " %12lu"
204 " %12li" 184 "\n",
205 " %12lu" 185 level,
206 " %12lu" 186 SFPL(balance_at),
207 " %12lu" 187 SFPL(sbk_read_at),
208 " %12lu" 188 SFPL(sbk_fs_changed),
209 "\n", 189 SFPL(sbk_restarted),
210 level, 190 SFPL(free_at),
211 SFPL( balance_at ), 191 SFPL(items_at),
212 SFPL( sbk_read_at ), 192 SFPL(can_node_be_removed),
213 SFPL( sbk_fs_changed ), 193 SFPL(lnum),
214 SFPL( sbk_restarted ), 194 SFPL(rnum),
215 SFPL( free_at ), 195 SFPL(lbytes),
216 SFPL( items_at ), 196 SFPL(rbytes),
217 SFPL( can_node_be_removed ), 197 SFPL(get_neighbors),
218 SFPL( lnum ), 198 SFPL(get_neighbors_restart),
219 SFPL( rnum ), 199 SFPL(need_l_neighbor), SFPL(need_r_neighbor)
220 SFPL( lbytes ), 200 );
221 SFPL( rbytes ),
222 SFPL( get_neighbors ),
223 SFPL( get_neighbors_restart ),
224 SFPL( need_l_neighbor ),
225 SFPL( need_r_neighbor )
226 );
227 } 201 }
228 return 0; 202 return 0;
229} 203}
@@ -232,31 +206,30 @@ static int show_bitmap(struct seq_file *m, struct super_block *sb)
232{ 206{
233 struct reiserfs_sb_info *r = REISERFS_SB(sb); 207 struct reiserfs_sb_info *r = REISERFS_SB(sb);
234 208
235 seq_printf(m, "free_block: %lu\n" 209 seq_printf(m, "free_block: %lu\n"
236 " scan_bitmap:" 210 " scan_bitmap:"
237 " wait" 211 " wait"
238 " bmap" 212 " bmap"
239 " retry" 213 " retry"
240 " stolen" 214 " stolen"
241 " journal_hint" 215 " journal_hint"
242 "journal_nohint" 216 "journal_nohint"
243 "\n" 217 "\n"
244 " %14lu" 218 " %14lu"
245 " %14lu" 219 " %14lu"
246 " %14lu" 220 " %14lu"
247 " %14lu" 221 " %14lu"
248 " %14lu" 222 " %14lu"
249 " %14lu" 223 " %14lu"
250 " %14lu" 224 " %14lu"
251 "\n", 225 "\n",
252 SFP( free_block ), 226 SFP(free_block),
253 SFPF( call ), 227 SFPF(call),
254 SFPF( wait ), 228 SFPF(wait),
255 SFPF( bmap ), 229 SFPF(bmap),
256 SFPF( retry ), 230 SFPF(retry),
257 SFPF( stolen ), 231 SFPF(stolen),
258 SFPF( in_journal_hint ), 232 SFPF(in_journal_hint), SFPF(in_journal_nohint));
259 SFPF( in_journal_nohint ) );
260 233
261 return 0; 234 return 0;
262} 235}
@@ -264,46 +237,42 @@ static int show_bitmap(struct seq_file *m, struct super_block *sb)
264static int show_on_disk_super(struct seq_file *m, struct super_block *sb) 237static int show_on_disk_super(struct seq_file *m, struct super_block *sb)
265{ 238{
266 struct reiserfs_sb_info *sb_info = REISERFS_SB(sb); 239 struct reiserfs_sb_info *sb_info = REISERFS_SB(sb);
267 struct reiserfs_super_block *rs = sb_info -> s_rs; 240 struct reiserfs_super_block *rs = sb_info->s_rs;
268 int hash_code = DFL( s_hash_function_code ); 241 int hash_code = DFL(s_hash_function_code);
269 __u32 flags = DJF( s_flags ); 242 __u32 flags = DJF(s_flags);
270 243
271 seq_printf(m, "block_count: \t%i\n" 244 seq_printf(m, "block_count: \t%i\n"
272 "free_blocks: \t%i\n" 245 "free_blocks: \t%i\n"
273 "root_block: \t%i\n" 246 "root_block: \t%i\n"
274 "blocksize: \t%i\n" 247 "blocksize: \t%i\n"
275 "oid_maxsize: \t%i\n" 248 "oid_maxsize: \t%i\n"
276 "oid_cursize: \t%i\n" 249 "oid_cursize: \t%i\n"
277 "umount_state: \t%i\n" 250 "umount_state: \t%i\n"
278 "magic: \t%10.10s\n" 251 "magic: \t%10.10s\n"
279 "fs_state: \t%i\n" 252 "fs_state: \t%i\n"
280 "hash: \t%s\n" 253 "hash: \t%s\n"
281 "tree_height: \t%i\n" 254 "tree_height: \t%i\n"
282 "bmap_nr: \t%i\n" 255 "bmap_nr: \t%i\n"
283 "version: \t%i\n" 256 "version: \t%i\n"
284 "flags: \t%x[%s]\n" 257 "flags: \t%x[%s]\n"
285 "reserved_for_journal: \t%i\n", 258 "reserved_for_journal: \t%i\n",
286 259 DFL(s_block_count),
287 DFL( s_block_count ), 260 DFL(s_free_blocks),
288 DFL( s_free_blocks ), 261 DFL(s_root_block),
289 DFL( s_root_block ), 262 DF(s_blocksize),
290 DF( s_blocksize ), 263 DF(s_oid_maxsize),
291 DF( s_oid_maxsize ), 264 DF(s_oid_cursize),
292 DF( s_oid_cursize ), 265 DF(s_umount_state),
293 DF( s_umount_state ), 266 rs->s_v1.s_magic,
294 rs -> s_v1.s_magic, 267 DF(s_fs_state),
295 DF( s_fs_state ), 268 hash_code == TEA_HASH ? "tea" :
296 hash_code == TEA_HASH ? "tea" : 269 (hash_code == YURA_HASH) ? "rupasov" :
297 ( hash_code == YURA_HASH ) ? "rupasov" : 270 (hash_code == R5_HASH) ? "r5" :
298 ( hash_code == R5_HASH ) ? "r5" : 271 (hash_code == UNSET_HASH) ? "unset" : "unknown",
299 ( hash_code == UNSET_HASH ) ? "unset" : "unknown", 272 DF(s_tree_height),
300 DF( s_tree_height ), 273 DF(s_bmap_nr),
301 DF( s_bmap_nr ), 274 DF(s_version), flags, (flags & reiserfs_attrs_cleared)
302 DF( s_version ), 275 ? "attrs_cleared" : "", DF(s_reserved_for_journal));
303 flags,
304 ( flags & reiserfs_attrs_cleared )
305 ? "attrs_cleared" : "",
306 DF (s_reserved_for_journal));
307 276
308 return 0; 277 return 0;
309} 278}
@@ -311,131 +280,122 @@ static int show_on_disk_super(struct seq_file *m, struct super_block *sb)
311static int show_oidmap(struct seq_file *m, struct super_block *sb) 280static int show_oidmap(struct seq_file *m, struct super_block *sb)
312{ 281{
313 struct reiserfs_sb_info *sb_info = REISERFS_SB(sb); 282 struct reiserfs_sb_info *sb_info = REISERFS_SB(sb);
314 struct reiserfs_super_block *rs = sb_info -> s_rs; 283 struct reiserfs_super_block *rs = sb_info->s_rs;
315 unsigned int mapsize = le16_to_cpu( rs -> s_v1.s_oid_cursize ); 284 unsigned int mapsize = le16_to_cpu(rs->s_v1.s_oid_cursize);
316 unsigned long total_used = 0; 285 unsigned long total_used = 0;
317 int i; 286 int i;
318 287
319 for( i = 0 ; i < mapsize ; ++i ) { 288 for (i = 0; i < mapsize; ++i) {
320 __u32 right; 289 __u32 right;
321 290
322 right = ( i == mapsize - 1 ) ? MAX_KEY_OBJECTID : MAP( i + 1 ); 291 right = (i == mapsize - 1) ? MAX_KEY_OBJECTID : MAP(i + 1);
323 seq_printf(m, "%s: [ %x .. %x )\n", 292 seq_printf(m, "%s: [ %x .. %x )\n",
324 ( i & 1 ) ? "free" : "used", MAP( i ), right ); 293 (i & 1) ? "free" : "used", MAP(i), right);
325 if( ! ( i & 1 ) ) { 294 if (!(i & 1)) {
326 total_used += right - MAP( i ); 295 total_used += right - MAP(i);
327 } 296 }
328 } 297 }
329#if defined( REISERFS_USE_OIDMAPF ) 298#if defined( REISERFS_USE_OIDMAPF )
330 if( sb_info -> oidmap.use_file && ( sb_info -> oidmap.mapf != NULL ) ) { 299 if (sb_info->oidmap.use_file && (sb_info->oidmap.mapf != NULL)) {
331 loff_t size = sb_info->oidmap.mapf->f_dentry->d_inode->i_size; 300 loff_t size = sb_info->oidmap.mapf->f_dentry->d_inode->i_size;
332 total_used += size / sizeof( reiserfs_oidinterval_d_t ); 301 total_used += size / sizeof(reiserfs_oidinterval_d_t);
333 } 302 }
334#endif 303#endif
335 seq_printf(m, "total: \t%i [%i/%i] used: %lu [exact]\n", 304 seq_printf(m, "total: \t%i [%i/%i] used: %lu [exact]\n",
336 mapsize, 305 mapsize,
337 mapsize, le16_to_cpu( rs -> s_v1.s_oid_maxsize ), 306 mapsize, le16_to_cpu(rs->s_v1.s_oid_maxsize), total_used);
338 total_used);
339 return 0; 307 return 0;
340} 308}
341 309
342static int show_journal(struct seq_file *m, struct super_block *sb) 310static int show_journal(struct seq_file *m, struct super_block *sb)
343{ 311{
344 struct reiserfs_sb_info *r = REISERFS_SB(sb); 312 struct reiserfs_sb_info *r = REISERFS_SB(sb);
345 struct reiserfs_super_block *rs = r -> s_rs; 313 struct reiserfs_super_block *rs = r->s_rs;
346 struct journal_params *jp = &rs->s_v1.s_journal; 314 struct journal_params *jp = &rs->s_v1.s_journal;
347 char b[BDEVNAME_SIZE]; 315 char b[BDEVNAME_SIZE];
348 316
349 317 seq_printf(m, /* on-disk fields */
350 seq_printf(m, /* on-disk fields */ 318 "jp_journal_1st_block: \t%i\n"
351 "jp_journal_1st_block: \t%i\n" 319 "jp_journal_dev: \t%s[%x]\n"
352 "jp_journal_dev: \t%s[%x]\n" 320 "jp_journal_size: \t%i\n"
353 "jp_journal_size: \t%i\n" 321 "jp_journal_trans_max: \t%i\n"
354 "jp_journal_trans_max: \t%i\n" 322 "jp_journal_magic: \t%i\n"
355 "jp_journal_magic: \t%i\n" 323 "jp_journal_max_batch: \t%i\n"
356 "jp_journal_max_batch: \t%i\n" 324 "jp_journal_max_commit_age: \t%i\n"
357 "jp_journal_max_commit_age: \t%i\n" 325 "jp_journal_max_trans_age: \t%i\n"
358 "jp_journal_max_trans_age: \t%i\n" 326 /* incore fields */
359 /* incore fields */ 327 "j_1st_reserved_block: \t%i\n"
360 "j_1st_reserved_block: \t%i\n" 328 "j_state: \t%li\n"
361 "j_state: \t%li\n" 329 "j_trans_id: \t%lu\n"
362 "j_trans_id: \t%lu\n" 330 "j_mount_id: \t%lu\n"
363 "j_mount_id: \t%lu\n" 331 "j_start: \t%lu\n"
364 "j_start: \t%lu\n" 332 "j_len: \t%lu\n"
365 "j_len: \t%lu\n" 333 "j_len_alloc: \t%lu\n"
366 "j_len_alloc: \t%lu\n" 334 "j_wcount: \t%i\n"
367 "j_wcount: \t%i\n" 335 "j_bcount: \t%lu\n"
368 "j_bcount: \t%lu\n" 336 "j_first_unflushed_offset: \t%lu\n"
369 "j_first_unflushed_offset: \t%lu\n" 337 "j_last_flush_trans_id: \t%lu\n"
370 "j_last_flush_trans_id: \t%lu\n" 338 "j_trans_start_time: \t%li\n"
371 "j_trans_start_time: \t%li\n" 339 "j_list_bitmap_index: \t%i\n"
372 "j_list_bitmap_index: \t%i\n" 340 "j_must_wait: \t%i\n"
373 "j_must_wait: \t%i\n" 341 "j_next_full_flush: \t%i\n"
374 "j_next_full_flush: \t%i\n" 342 "j_next_async_flush: \t%i\n"
375 "j_next_async_flush: \t%i\n" 343 "j_cnode_used: \t%i\n" "j_cnode_free: \t%i\n" "\n"
376 "j_cnode_used: \t%i\n" 344 /* reiserfs_proc_info_data_t.journal fields */
377 "j_cnode_free: \t%i\n" 345 "in_journal: \t%12lu\n"
378 "\n" 346 "in_journal_bitmap: \t%12lu\n"
379 /* reiserfs_proc_info_data_t.journal fields */ 347 "in_journal_reusable: \t%12lu\n"
380 "in_journal: \t%12lu\n" 348 "lock_journal: \t%12lu\n"
381 "in_journal_bitmap: \t%12lu\n" 349 "lock_journal_wait: \t%12lu\n"
382 "in_journal_reusable: \t%12lu\n" 350 "journal_begin: \t%12lu\n"
383 "lock_journal: \t%12lu\n" 351 "journal_relock_writers: \t%12lu\n"
384 "lock_journal_wait: \t%12lu\n" 352 "journal_relock_wcount: \t%12lu\n"
385 "journal_begin: \t%12lu\n" 353 "mark_dirty: \t%12lu\n"
386 "journal_relock_writers: \t%12lu\n" 354 "mark_dirty_already: \t%12lu\n"
387 "journal_relock_wcount: \t%12lu\n" 355 "mark_dirty_notjournal: \t%12lu\n"
388 "mark_dirty: \t%12lu\n" 356 "restore_prepared: \t%12lu\n"
389 "mark_dirty_already: \t%12lu\n" 357 "prepare: \t%12lu\n"
390 "mark_dirty_notjournal: \t%12lu\n" 358 "prepare_retry: \t%12lu\n",
391 "restore_prepared: \t%12lu\n" 359 DJP(jp_journal_1st_block),
392 "prepare: \t%12lu\n" 360 bdevname(SB_JOURNAL(sb)->j_dev_bd, b),
393 "prepare_retry: \t%12lu\n", 361 DJP(jp_journal_dev),
394 362 DJP(jp_journal_size),
395 DJP( jp_journal_1st_block ), 363 DJP(jp_journal_trans_max),
396 bdevname(SB_JOURNAL(sb)->j_dev_bd, b), 364 DJP(jp_journal_magic),
397 DJP( jp_journal_dev ), 365 DJP(jp_journal_max_batch),
398 DJP( jp_journal_size ), 366 SB_JOURNAL(sb)->j_max_commit_age,
399 DJP( jp_journal_trans_max ), 367 DJP(jp_journal_max_trans_age),
400 DJP( jp_journal_magic ), 368 JF(j_1st_reserved_block),
401 DJP( jp_journal_max_batch ), 369 JF(j_state),
402 SB_JOURNAL(sb)->j_max_commit_age, 370 JF(j_trans_id),
403 DJP( jp_journal_max_trans_age ), 371 JF(j_mount_id),
404 372 JF(j_start),
405 JF( j_1st_reserved_block ), 373 JF(j_len),
406 JF( j_state ), 374 JF(j_len_alloc),
407 JF( j_trans_id ), 375 atomic_read(&r->s_journal->j_wcount),
408 JF( j_mount_id ), 376 JF(j_bcount),
409 JF( j_start ), 377 JF(j_first_unflushed_offset),
410 JF( j_len ), 378 JF(j_last_flush_trans_id),
411 JF( j_len_alloc ), 379 JF(j_trans_start_time),
412 atomic_read( & r -> s_journal -> j_wcount ), 380 JF(j_list_bitmap_index),
413 JF( j_bcount ), 381 JF(j_must_wait),
414 JF( j_first_unflushed_offset ), 382 JF(j_next_full_flush),
415 JF( j_last_flush_trans_id ), 383 JF(j_next_async_flush),
416 JF( j_trans_start_time ), 384 JF(j_cnode_used),
417 JF( j_list_bitmap_index ), 385 JF(j_cnode_free),
418 JF( j_must_wait ), 386 SFPJ(in_journal),
419 JF( j_next_full_flush ), 387 SFPJ(in_journal_bitmap),
420 JF( j_next_async_flush ), 388 SFPJ(in_journal_reusable),
421 JF( j_cnode_used ), 389 SFPJ(lock_journal),
422 JF( j_cnode_free ), 390 SFPJ(lock_journal_wait),
423 391 SFPJ(journal_being),
424 SFPJ( in_journal ), 392 SFPJ(journal_relock_writers),
425 SFPJ( in_journal_bitmap ), 393 SFPJ(journal_relock_wcount),
426 SFPJ( in_journal_reusable ), 394 SFPJ(mark_dirty),
427 SFPJ( lock_journal ), 395 SFPJ(mark_dirty_already),
428 SFPJ( lock_journal_wait ), 396 SFPJ(mark_dirty_notjournal),
429 SFPJ( journal_being ), 397 SFPJ(restore_prepared), SFPJ(prepare), SFPJ(prepare_retry)
430 SFPJ( journal_relock_writers ), 398 );
431 SFPJ( journal_relock_wcount ),
432 SFPJ( mark_dirty ),
433 SFPJ( mark_dirty_already ),
434 SFPJ( mark_dirty_notjournal ),
435 SFPJ( restore_prepared ),
436 SFPJ( prepare ),
437 SFPJ( prepare_retry )
438 );
439 return 0; 399 return 0;
440} 400}
441 401
@@ -450,7 +410,7 @@ static int set_sb(struct super_block *sb, void *data)
450 return -ENOENT; 410 return -ENOENT;
451} 411}
452 412
453static void *r_start(struct seq_file *m, loff_t *pos) 413static void *r_start(struct seq_file *m, loff_t * pos)
454{ 414{
455 struct proc_dir_entry *de = m->private; 415 struct proc_dir_entry *de = m->private;
456 struct super_block *s = de->parent->data; 416 struct super_block *s = de->parent->data;
@@ -472,7 +432,7 @@ static void *r_start(struct seq_file *m, loff_t *pos)
472 return s; 432 return s;
473} 433}
474 434
475static void *r_next(struct seq_file *m, void *v, loff_t *pos) 435static void *r_next(struct seq_file *m, void *v, loff_t * pos)
476{ 436{
477 ++*pos; 437 ++*pos;
478 if (v) 438 if (v)
@@ -489,7 +449,7 @@ static void r_stop(struct seq_file *m, void *v)
489static int r_show(struct seq_file *m, void *v) 449static int r_show(struct seq_file *m, void *v)
490{ 450{
491 struct proc_dir_entry *de = m->private; 451 struct proc_dir_entry *de = m->private;
492 int (*show)(struct seq_file *, struct super_block *) = de->data; 452 int (*show) (struct seq_file *, struct super_block *) = de->data;
493 return show(m, v); 453 return show(m, v);
494} 454}
495 455
@@ -512,17 +472,17 @@ static int r_open(struct inode *inode, struct file *file)
512} 472}
513 473
514static struct file_operations r_file_operations = { 474static struct file_operations r_file_operations = {
515 .open = r_open, 475 .open = r_open,
516 .read = seq_read, 476 .read = seq_read,
517 .llseek = seq_lseek, 477 .llseek = seq_lseek,
518 .release = seq_release, 478 .release = seq_release,
519}; 479};
520 480
521static struct proc_dir_entry *proc_info_root = NULL; 481static struct proc_dir_entry *proc_info_root = NULL;
522static const char proc_info_root_name[] = "fs/reiserfs"; 482static const char proc_info_root_name[] = "fs/reiserfs";
523 483
524static void add_file(struct super_block *sb, char *name, 484static void add_file(struct super_block *sb, char *name,
525 int (*func)(struct seq_file *, struct super_block *)) 485 int (*func) (struct seq_file *, struct super_block *))
526{ 486{
527 struct proc_dir_entry *de; 487 struct proc_dir_entry *de;
528 de = create_proc_entry(name, 0, REISERFS_SB(sb)->procdir); 488 de = create_proc_entry(name, 0, REISERFS_SB(sb)->procdir);
@@ -532,11 +492,12 @@ static void add_file(struct super_block *sb, char *name,
532 } 492 }
533} 493}
534 494
535int reiserfs_proc_info_init( struct super_block *sb ) 495int reiserfs_proc_info_init(struct super_block *sb)
536{ 496{
537 spin_lock_init( & __PINFO( sb ).lock ); 497 spin_lock_init(&__PINFO(sb).lock);
538 REISERFS_SB(sb)->procdir = proc_mkdir(reiserfs_bdevname (sb), proc_info_root); 498 REISERFS_SB(sb)->procdir =
539 if( REISERFS_SB(sb)->procdir ) { 499 proc_mkdir(reiserfs_bdevname(sb), proc_info_root);
500 if (REISERFS_SB(sb)->procdir) {
540 REISERFS_SB(sb)->procdir->owner = THIS_MODULE; 501 REISERFS_SB(sb)->procdir->owner = THIS_MODULE;
541 REISERFS_SB(sb)->procdir->data = sb; 502 REISERFS_SB(sb)->procdir->data = sb;
542 add_file(sb, "version", show_version); 503 add_file(sb, "version", show_version);
@@ -549,11 +510,11 @@ int reiserfs_proc_info_init( struct super_block *sb )
549 return 0; 510 return 0;
550 } 511 }
551 reiserfs_warning(sb, "reiserfs: cannot create /proc/%s/%s", 512 reiserfs_warning(sb, "reiserfs: cannot create /proc/%s/%s",
552 proc_info_root_name, reiserfs_bdevname (sb) ); 513 proc_info_root_name, reiserfs_bdevname(sb));
553 return 1; 514 return 1;
554} 515}
555 516
556int reiserfs_proc_info_done( struct super_block *sb ) 517int reiserfs_proc_info_done(struct super_block *sb)
557{ 518{
558 struct proc_dir_entry *de = REISERFS_SB(sb)->procdir; 519 struct proc_dir_entry *de = REISERFS_SB(sb)->procdir;
559 if (de) { 520 if (de) {
@@ -565,48 +526,48 @@ int reiserfs_proc_info_done( struct super_block *sb )
565 remove_proc_entry("super", de); 526 remove_proc_entry("super", de);
566 remove_proc_entry("version", de); 527 remove_proc_entry("version", de);
567 } 528 }
568 spin_lock( & __PINFO( sb ).lock ); 529 spin_lock(&__PINFO(sb).lock);
569 __PINFO( sb ).exiting = 1; 530 __PINFO(sb).exiting = 1;
570 spin_unlock( & __PINFO( sb ).lock ); 531 spin_unlock(&__PINFO(sb).lock);
571 if ( proc_info_root ) { 532 if (proc_info_root) {
572 remove_proc_entry( reiserfs_bdevname (sb), proc_info_root ); 533 remove_proc_entry(reiserfs_bdevname(sb), proc_info_root);
573 REISERFS_SB(sb)->procdir = NULL; 534 REISERFS_SB(sb)->procdir = NULL;
574 } 535 }
575 return 0; 536 return 0;
576} 537}
577 538
578struct proc_dir_entry *reiserfs_proc_register_global( char *name, 539struct proc_dir_entry *reiserfs_proc_register_global(char *name,
579 read_proc_t *func ) 540 read_proc_t * func)
580{ 541{
581 return ( proc_info_root ) ? create_proc_read_entry( name, 0, 542 return (proc_info_root) ? create_proc_read_entry(name, 0,
582 proc_info_root, 543 proc_info_root,
583 func, NULL ) : NULL; 544 func, NULL) : NULL;
584} 545}
585 546
586void reiserfs_proc_unregister_global( const char *name ) 547void reiserfs_proc_unregister_global(const char *name)
587{ 548{
588 remove_proc_entry( name, proc_info_root ); 549 remove_proc_entry(name, proc_info_root);
589} 550}
590 551
591int reiserfs_proc_info_global_init( void ) 552int reiserfs_proc_info_global_init(void)
592{ 553{
593 if( proc_info_root == NULL ) { 554 if (proc_info_root == NULL) {
594 proc_info_root = proc_mkdir(proc_info_root_name, NULL); 555 proc_info_root = proc_mkdir(proc_info_root_name, NULL);
595 if( proc_info_root ) { 556 if (proc_info_root) {
596 proc_info_root -> owner = THIS_MODULE; 557 proc_info_root->owner = THIS_MODULE;
597 } else { 558 } else {
598 reiserfs_warning (NULL, 559 reiserfs_warning(NULL,
599 "reiserfs: cannot create /proc/%s", 560 "reiserfs: cannot create /proc/%s",
600 proc_info_root_name ); 561 proc_info_root_name);
601 return 1; 562 return 1;
602 } 563 }
603 } 564 }
604 return 0; 565 return 0;
605} 566}
606 567
607int reiserfs_proc_info_global_done( void ) 568int reiserfs_proc_info_global_done(void)
608{ 569{
609 if ( proc_info_root != NULL ) { 570 if (proc_info_root != NULL) {
610 proc_info_root = NULL; 571 proc_info_root = NULL;
611 remove_proc_entry(proc_info_root_name, NULL); 572 remove_proc_entry(proc_info_root_name, NULL);
612 } 573 }
@@ -616,22 +577,40 @@ int reiserfs_proc_info_global_done( void )
616/* REISERFS_PROC_INFO */ 577/* REISERFS_PROC_INFO */
617#else 578#else
618 579
619int reiserfs_proc_info_init( struct super_block *sb ) { return 0; } 580int reiserfs_proc_info_init(struct super_block *sb)
620int reiserfs_proc_info_done( struct super_block *sb ) { return 0; } 581{
582 return 0;
583}
584int reiserfs_proc_info_done(struct super_block *sb)
585{
586 return 0;
587}
621 588
622struct proc_dir_entry *reiserfs_proc_register_global( char *name, 589struct proc_dir_entry *reiserfs_proc_register_global(char *name,
623 read_proc_t *func ) 590 read_proc_t * func)
624{ return NULL; } 591{
592 return NULL;
593}
625 594
626void reiserfs_proc_unregister_global( const char *name ) {;} 595void reiserfs_proc_unregister_global(const char *name)
596{;
597}
627 598
628int reiserfs_proc_info_global_init( void ) { return 0; } 599int reiserfs_proc_info_global_init(void)
629int reiserfs_proc_info_global_done( void ) { return 0; } 600{
601 return 0;
602}
603int reiserfs_proc_info_global_done(void)
604{
605 return 0;
606}
630 607
631int reiserfs_global_version_in_proc( char *buffer, char **start, 608int reiserfs_global_version_in_proc(char *buffer, char **start,
632 off_t offset, 609 off_t offset,
633 int count, int *eof, void *data ) 610 int count, int *eof, void *data)
634{ return 0; } 611{
612 return 0;
613}
635 614
636/* REISERFS_PROC_INFO */ 615/* REISERFS_PROC_INFO */
637#endif 616#endif
diff --git a/fs/reiserfs/resize.c b/fs/reiserfs/resize.c
index 170012078b7..39cc7f47f5d 100644
--- a/fs/reiserfs/resize.c
+++ b/fs/reiserfs/resize.c
@@ -1,7 +1,7 @@
1/* 1/*
2 * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README 2 * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README
3 */ 3 */
4 4
5/* 5/*
6 * Written by Alexander Zarochentcev. 6 * Written by Alexander Zarochentcev.
7 * 7 *
@@ -17,23 +17,23 @@
17#include <linux/reiserfs_fs_sb.h> 17#include <linux/reiserfs_fs_sb.h>
18#include <linux/buffer_head.h> 18#include <linux/buffer_head.h>
19 19
20int reiserfs_resize (struct super_block * s, unsigned long block_count_new) 20int reiserfs_resize(struct super_block *s, unsigned long block_count_new)
21{ 21{
22 int err = 0; 22 int err = 0;
23 struct reiserfs_super_block * sb; 23 struct reiserfs_super_block *sb;
24 struct reiserfs_bitmap_info *bitmap; 24 struct reiserfs_bitmap_info *bitmap;
25 struct reiserfs_bitmap_info *old_bitmap = SB_AP_BITMAP(s); 25 struct reiserfs_bitmap_info *old_bitmap = SB_AP_BITMAP(s);
26 struct buffer_head * bh; 26 struct buffer_head *bh;
27 struct reiserfs_transaction_handle th; 27 struct reiserfs_transaction_handle th;
28 unsigned int bmap_nr_new, bmap_nr; 28 unsigned int bmap_nr_new, bmap_nr;
29 unsigned int block_r_new, block_r; 29 unsigned int block_r_new, block_r;
30 30
31 struct reiserfs_list_bitmap * jb; 31 struct reiserfs_list_bitmap *jb;
32 struct reiserfs_list_bitmap jbitmap[JOURNAL_NUM_BITMAPS]; 32 struct reiserfs_list_bitmap jbitmap[JOURNAL_NUM_BITMAPS];
33 33
34 unsigned long int block_count, free_blocks; 34 unsigned long int block_count, free_blocks;
35 int i; 35 int i;
36 int copy_size ; 36 int copy_size;
37 37
38 sb = SB_DISK_SUPER_BLOCK(s); 38 sb = SB_DISK_SUPER_BLOCK(s);
39 39
@@ -47,136 +47,145 @@ int reiserfs_resize (struct super_block * s, unsigned long block_count_new)
47 if (!bh) { 47 if (!bh) {
48 printk("reiserfs_resize: can\'t read last block\n"); 48 printk("reiserfs_resize: can\'t read last block\n");
49 return -EINVAL; 49 return -EINVAL;
50 } 50 }
51 bforget(bh); 51 bforget(bh);
52 52
53 /* old disk layout detection; those partitions can be mounted, but 53 /* old disk layout detection; those partitions can be mounted, but
54 * cannot be resized */ 54 * cannot be resized */
55 if (SB_BUFFER_WITH_SB(s)->b_blocknr * SB_BUFFER_WITH_SB(s)->b_size 55 if (SB_BUFFER_WITH_SB(s)->b_blocknr * SB_BUFFER_WITH_SB(s)->b_size
56 != REISERFS_DISK_OFFSET_IN_BYTES ) { 56 != REISERFS_DISK_OFFSET_IN_BYTES) {
57 printk("reiserfs_resize: unable to resize a reiserfs without distributed bitmap (fs version < 3.5.12)\n"); 57 printk
58 ("reiserfs_resize: unable to resize a reiserfs without distributed bitmap (fs version < 3.5.12)\n");
58 return -ENOTSUPP; 59 return -ENOTSUPP;
59 } 60 }
60 61
61 /* count used bits in last bitmap block */ 62 /* count used bits in last bitmap block */
62 block_r = SB_BLOCK_COUNT(s) - 63 block_r = SB_BLOCK_COUNT(s) - (SB_BMAP_NR(s) - 1) * s->s_blocksize * 8;
63 (SB_BMAP_NR(s) - 1) * s->s_blocksize * 8; 64
64
65 /* count bitmap blocks in new fs */ 65 /* count bitmap blocks in new fs */
66 bmap_nr_new = block_count_new / ( s->s_blocksize * 8 ); 66 bmap_nr_new = block_count_new / (s->s_blocksize * 8);
67 block_r_new = block_count_new - bmap_nr_new * s->s_blocksize * 8; 67 block_r_new = block_count_new - bmap_nr_new * s->s_blocksize * 8;
68 if (block_r_new) 68 if (block_r_new)
69 bmap_nr_new++; 69 bmap_nr_new++;
70 else 70 else
71 block_r_new = s->s_blocksize * 8; 71 block_r_new = s->s_blocksize * 8;
72 72
73 /* save old values */ 73 /* save old values */
74 block_count = SB_BLOCK_COUNT(s); 74 block_count = SB_BLOCK_COUNT(s);
75 bmap_nr = SB_BMAP_NR(s); 75 bmap_nr = SB_BMAP_NR(s);
76 76
77 /* resizing of reiserfs bitmaps (journal and real), if needed */ 77 /* resizing of reiserfs bitmaps (journal and real), if needed */
78 if (bmap_nr_new > bmap_nr) { 78 if (bmap_nr_new > bmap_nr) {
79 /* reallocate journal bitmaps */ 79 /* reallocate journal bitmaps */
80 if (reiserfs_allocate_list_bitmaps(s, jbitmap, bmap_nr_new) < 0) { 80 if (reiserfs_allocate_list_bitmaps(s, jbitmap, bmap_nr_new) < 0) {
81 printk("reiserfs_resize: unable to allocate memory for journal bitmaps\n"); 81 printk
82 unlock_super(s) ; 82 ("reiserfs_resize: unable to allocate memory for journal bitmaps\n");
83 return -ENOMEM ; 83 unlock_super(s);
84 } 84 return -ENOMEM;
85 /* the new journal bitmaps are zero filled, now we copy in the bitmap 85 }
86 ** node pointers from the old journal bitmap structs, and then 86 /* the new journal bitmaps are zero filled, now we copy in the bitmap
87 ** transfer the new data structures into the journal struct. 87 ** node pointers from the old journal bitmap structs, and then
88 ** 88 ** transfer the new data structures into the journal struct.
89 ** using the copy_size var below allows this code to work for 89 **
90 ** both shrinking and expanding the FS. 90 ** using the copy_size var below allows this code to work for
91 */ 91 ** both shrinking and expanding the FS.
92 copy_size = bmap_nr_new < bmap_nr ? bmap_nr_new : bmap_nr ; 92 */
93 copy_size = copy_size * sizeof(struct reiserfs_list_bitmap_node *) ; 93 copy_size = bmap_nr_new < bmap_nr ? bmap_nr_new : bmap_nr;
94 for (i = 0 ; i < JOURNAL_NUM_BITMAPS ; i++) { 94 copy_size =
95 struct reiserfs_bitmap_node **node_tmp ; 95 copy_size * sizeof(struct reiserfs_list_bitmap_node *);
96 jb = SB_JOURNAL(s)->j_list_bitmap + i ; 96 for (i = 0; i < JOURNAL_NUM_BITMAPS; i++) {
97 memcpy(jbitmap[i].bitmaps, jb->bitmaps, copy_size) ; 97 struct reiserfs_bitmap_node **node_tmp;
98 98 jb = SB_JOURNAL(s)->j_list_bitmap + i;
99 /* just in case vfree schedules on us, copy the new 99 memcpy(jbitmap[i].bitmaps, jb->bitmaps, copy_size);
100 ** pointer into the journal struct before freeing the 100
101 ** old one 101 /* just in case vfree schedules on us, copy the new
102 */ 102 ** pointer into the journal struct before freeing the
103 node_tmp = jb->bitmaps ; 103 ** old one
104 jb->bitmaps = jbitmap[i].bitmaps ; 104 */
105 vfree(node_tmp) ; 105 node_tmp = jb->bitmaps;
106 } 106 jb->bitmaps = jbitmap[i].bitmaps;
107 107 vfree(node_tmp);
108 /* allocate additional bitmap blocks, reallocate array of bitmap 108 }
109 * block pointers */ 109
110 bitmap = vmalloc(sizeof(struct reiserfs_bitmap_info) * bmap_nr_new); 110 /* allocate additional bitmap blocks, reallocate array of bitmap
111 if (!bitmap) { 111 * block pointers */
112 /* Journal bitmaps are still supersized, but the memory isn't 112 bitmap =
113 * leaked, so I guess it's ok */ 113 vmalloc(sizeof(struct reiserfs_bitmap_info) * bmap_nr_new);
114 printk("reiserfs_resize: unable to allocate memory.\n"); 114 if (!bitmap) {
115 return -ENOMEM; 115 /* Journal bitmaps are still supersized, but the memory isn't
116 } 116 * leaked, so I guess it's ok */
117 memset (bitmap, 0, sizeof (struct reiserfs_bitmap_info) * SB_BMAP_NR(s)); 117 printk("reiserfs_resize: unable to allocate memory.\n");
118 for (i = 0; i < bmap_nr; i++) 118 return -ENOMEM;
119 bitmap[i] = old_bitmap[i]; 119 }
120 120 memset(bitmap, 0,
121 /* This doesn't go through the journal, but it doesn't have to. 121 sizeof(struct reiserfs_bitmap_info) * SB_BMAP_NR(s));
122 * The changes are still atomic: We're synced up when the journal 122 for (i = 0; i < bmap_nr; i++)
123 * transaction begins, and the new bitmaps don't matter if the 123 bitmap[i] = old_bitmap[i];
124 * transaction fails. */ 124
125 for (i = bmap_nr; i < bmap_nr_new; i++) { 125 /* This doesn't go through the journal, but it doesn't have to.
126 bitmap[i].bh = sb_getblk(s, i * s->s_blocksize * 8); 126 * The changes are still atomic: We're synced up when the journal
127 memset(bitmap[i].bh->b_data, 0, sb_blocksize(sb)); 127 * transaction begins, and the new bitmaps don't matter if the
128 reiserfs_test_and_set_le_bit(0, bitmap[i].bh->b_data); 128 * transaction fails. */
129 129 for (i = bmap_nr; i < bmap_nr_new; i++) {
130 set_buffer_uptodate(bitmap[i].bh); 130 bitmap[i].bh = sb_getblk(s, i * s->s_blocksize * 8);
131 mark_buffer_dirty(bitmap[i].bh) ; 131 memset(bitmap[i].bh->b_data, 0, sb_blocksize(sb));
132 sync_dirty_buffer(bitmap[i].bh); 132 reiserfs_test_and_set_le_bit(0, bitmap[i].bh->b_data);
133 // update bitmap_info stuff 133
134 bitmap[i].first_zero_hint=1; 134 set_buffer_uptodate(bitmap[i].bh);
135 bitmap[i].free_count = sb_blocksize(sb) * 8 - 1; 135 mark_buffer_dirty(bitmap[i].bh);
136 } 136 sync_dirty_buffer(bitmap[i].bh);
137 /* free old bitmap blocks array */ 137 // update bitmap_info stuff
138 SB_AP_BITMAP(s) = bitmap; 138 bitmap[i].first_zero_hint = 1;
139 vfree (old_bitmap); 139 bitmap[i].free_count = sb_blocksize(sb) * 8 - 1;
140 }
141 /* free old bitmap blocks array */
142 SB_AP_BITMAP(s) = bitmap;
143 vfree(old_bitmap);
140 } 144 }
141 145
142 /* begin transaction, if there was an error, it's fine. Yes, we have 146 /* begin transaction, if there was an error, it's fine. Yes, we have
143 * incorrect bitmaps now, but none of it is ever going to touch the 147 * incorrect bitmaps now, but none of it is ever going to touch the
144 * disk anyway. */ 148 * disk anyway. */
145 err = journal_begin(&th, s, 10); 149 err = journal_begin(&th, s, 10);
146 if (err) 150 if (err)
147 return err; 151 return err;
148 152
149 /* correct last bitmap blocks in old and new disk layout */ 153 /* correct last bitmap blocks in old and new disk layout */
150 reiserfs_prepare_for_journal(s, SB_AP_BITMAP(s)[bmap_nr - 1].bh, 1); 154 reiserfs_prepare_for_journal(s, SB_AP_BITMAP(s)[bmap_nr - 1].bh, 1);
151 for (i = block_r; i < s->s_blocksize * 8; i++) 155 for (i = block_r; i < s->s_blocksize * 8; i++)
152 reiserfs_test_and_clear_le_bit(i, 156 reiserfs_test_and_clear_le_bit(i,
153 SB_AP_BITMAP(s)[bmap_nr - 1].bh->b_data); 157 SB_AP_BITMAP(s)[bmap_nr -
158 1].bh->b_data);
154 SB_AP_BITMAP(s)[bmap_nr - 1].free_count += s->s_blocksize * 8 - block_r; 159 SB_AP_BITMAP(s)[bmap_nr - 1].free_count += s->s_blocksize * 8 - block_r;
155 if ( !SB_AP_BITMAP(s)[bmap_nr - 1].first_zero_hint) 160 if (!SB_AP_BITMAP(s)[bmap_nr - 1].first_zero_hint)
156 SB_AP_BITMAP(s)[bmap_nr - 1].first_zero_hint = block_r; 161 SB_AP_BITMAP(s)[bmap_nr - 1].first_zero_hint = block_r;
157 162
158 journal_mark_dirty(&th, s, SB_AP_BITMAP(s)[bmap_nr - 1].bh); 163 journal_mark_dirty(&th, s, SB_AP_BITMAP(s)[bmap_nr - 1].bh);
159 164
160 reiserfs_prepare_for_journal(s, SB_AP_BITMAP(s)[bmap_nr_new - 1].bh, 1); 165 reiserfs_prepare_for_journal(s, SB_AP_BITMAP(s)[bmap_nr_new - 1].bh, 1);
161 for (i = block_r_new; i < s->s_blocksize * 8; i++) 166 for (i = block_r_new; i < s->s_blocksize * 8; i++)
162 reiserfs_test_and_set_le_bit(i, 167 reiserfs_test_and_set_le_bit(i,
163 SB_AP_BITMAP(s)[bmap_nr_new - 1].bh->b_data); 168 SB_AP_BITMAP(s)[bmap_nr_new -
169 1].bh->b_data);
164 journal_mark_dirty(&th, s, SB_AP_BITMAP(s)[bmap_nr_new - 1].bh); 170 journal_mark_dirty(&th, s, SB_AP_BITMAP(s)[bmap_nr_new - 1].bh);
165 171
166 SB_AP_BITMAP(s)[bmap_nr_new - 1].free_count -= s->s_blocksize * 8 - block_r_new; 172 SB_AP_BITMAP(s)[bmap_nr_new - 1].free_count -=
173 s->s_blocksize * 8 - block_r_new;
167 /* Extreme case where last bitmap is the only valid block in itself. */ 174 /* Extreme case where last bitmap is the only valid block in itself. */
168 if ( !SB_AP_BITMAP(s)[bmap_nr_new - 1].free_count ) 175 if (!SB_AP_BITMAP(s)[bmap_nr_new - 1].free_count)
169 SB_AP_BITMAP(s)[bmap_nr_new - 1].first_zero_hint = 0; 176 SB_AP_BITMAP(s)[bmap_nr_new - 1].first_zero_hint = 0;
170 /* update super */ 177 /* update super */
171 reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s), 1) ; 178 reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s), 1);
172 free_blocks = SB_FREE_BLOCKS(s); 179 free_blocks = SB_FREE_BLOCKS(s);
173 PUT_SB_FREE_BLOCKS(s, free_blocks + (block_count_new - block_count - (bmap_nr_new - bmap_nr))); 180 PUT_SB_FREE_BLOCKS(s,
181 free_blocks + (block_count_new - block_count -
182 (bmap_nr_new - bmap_nr)));
174 PUT_SB_BLOCK_COUNT(s, block_count_new); 183 PUT_SB_BLOCK_COUNT(s, block_count_new);
175 PUT_SB_BMAP_NR(s, bmap_nr_new); 184 PUT_SB_BMAP_NR(s, bmap_nr_new);
176 s->s_dirt = 1; 185 s->s_dirt = 1;
177 186
178 journal_mark_dirty(&th, s, SB_BUFFER_WITH_SB(s)); 187 journal_mark_dirty(&th, s, SB_BUFFER_WITH_SB(s));
179 188
180 SB_JOURNAL(s)->j_must_wait = 1; 189 SB_JOURNAL(s)->j_must_wait = 1;
181 return journal_end(&th, s, 10); 190 return journal_end(&th, s, 10);
182} 191}
diff --git a/fs/reiserfs/stree.c b/fs/reiserfs/stree.c
index 63158491e15..e2d08d7bcff 100644
--- a/fs/reiserfs/stree.c
+++ b/fs/reiserfs/stree.c
@@ -59,46 +59,45 @@
59#include <linux/quotaops.h> 59#include <linux/quotaops.h>
60 60
61/* Does the buffer contain a disk block which is in the tree. */ 61/* Does the buffer contain a disk block which is in the tree. */
62inline int B_IS_IN_TREE (const struct buffer_head * p_s_bh) 62inline int B_IS_IN_TREE(const struct buffer_head *p_s_bh)
63{ 63{
64 64
65 RFALSE( B_LEVEL (p_s_bh) > MAX_HEIGHT, 65 RFALSE(B_LEVEL(p_s_bh) > MAX_HEIGHT,
66 "PAP-1010: block (%b) has too big level (%z)", p_s_bh, p_s_bh); 66 "PAP-1010: block (%b) has too big level (%z)", p_s_bh, p_s_bh);
67 67
68 return ( B_LEVEL (p_s_bh) != FREE_LEVEL ); 68 return (B_LEVEL(p_s_bh) != FREE_LEVEL);
69} 69}
70 70
71// 71//
72// to gets item head in le form 72// to gets item head in le form
73// 73//
74inline void copy_item_head(struct item_head * p_v_to, 74inline void copy_item_head(struct item_head *p_v_to,
75 const struct item_head * p_v_from) 75 const struct item_head *p_v_from)
76{ 76{
77 memcpy (p_v_to, p_v_from, IH_SIZE); 77 memcpy(p_v_to, p_v_from, IH_SIZE);
78} 78}
79 79
80
81/* k1 is pointer to on-disk structure which is stored in little-endian 80/* k1 is pointer to on-disk structure which is stored in little-endian
82 form. k2 is pointer to cpu variable. For key of items of the same 81 form. k2 is pointer to cpu variable. For key of items of the same
83 object this returns 0. 82 object this returns 0.
84 Returns: -1 if key1 < key2 83 Returns: -1 if key1 < key2
85 0 if key1 == key2 84 0 if key1 == key2
86 1 if key1 > key2 */ 85 1 if key1 > key2 */
87inline int comp_short_keys (const struct reiserfs_key * le_key, 86inline int comp_short_keys(const struct reiserfs_key *le_key,
88 const struct cpu_key * cpu_key) 87 const struct cpu_key *cpu_key)
89{ 88{
90 __u32 n; 89 __u32 n;
91 n = le32_to_cpu(le_key->k_dir_id); 90 n = le32_to_cpu(le_key->k_dir_id);
92 if (n < cpu_key->on_disk_key.k_dir_id) 91 if (n < cpu_key->on_disk_key.k_dir_id)
93 return -1; 92 return -1;
94 if (n > cpu_key->on_disk_key.k_dir_id) 93 if (n > cpu_key->on_disk_key.k_dir_id)
95 return 1; 94 return 1;
96 n = le32_to_cpu(le_key->k_objectid); 95 n = le32_to_cpu(le_key->k_objectid);
97 if (n < cpu_key->on_disk_key.k_objectid) 96 if (n < cpu_key->on_disk_key.k_objectid)
98 return -1; 97 return -1;
99 if (n > cpu_key->on_disk_key.k_objectid) 98 if (n > cpu_key->on_disk_key.k_objectid)
100 return 1; 99 return 1;
101 return 0; 100 return 0;
102} 101}
103 102
104/* k1 is pointer to on-disk structure which is stored in little-endian 103/* k1 is pointer to on-disk structure which is stored in little-endian
@@ -106,68 +105,72 @@ inline int comp_short_keys (const struct reiserfs_key * le_key,
106 Compare keys using all 4 key fields. 105 Compare keys using all 4 key fields.
107 Returns: -1 if key1 < key2 0 106 Returns: -1 if key1 < key2 0
108 if key1 = key2 1 if key1 > key2 */ 107 if key1 = key2 1 if key1 > key2 */
109static inline int comp_keys (const struct reiserfs_key * le_key, const struct cpu_key * cpu_key) 108static inline int comp_keys(const struct reiserfs_key *le_key,
109 const struct cpu_key *cpu_key)
110{ 110{
111 int retval; 111 int retval;
112 112
113 retval = comp_short_keys (le_key, cpu_key); 113 retval = comp_short_keys(le_key, cpu_key);
114 if (retval) 114 if (retval)
115 return retval; 115 return retval;
116 if (le_key_k_offset (le_key_version(le_key), le_key) < cpu_key_k_offset (cpu_key)) 116 if (le_key_k_offset(le_key_version(le_key), le_key) <
117 return -1; 117 cpu_key_k_offset(cpu_key))
118 if (le_key_k_offset (le_key_version(le_key), le_key) > cpu_key_k_offset (cpu_key)) 118 return -1;
119 return 1; 119 if (le_key_k_offset(le_key_version(le_key), le_key) >
120 120 cpu_key_k_offset(cpu_key))
121 if (cpu_key->key_length == 3) 121 return 1;
122 return 0; 122
123 123 if (cpu_key->key_length == 3)
124 /* this part is needed only when tail conversion is in progress */ 124 return 0;
125 if (le_key_k_type (le_key_version(le_key), le_key) < cpu_key_k_type (cpu_key)) 125
126 return -1; 126 /* this part is needed only when tail conversion is in progress */
127 if (le_key_k_type(le_key_version(le_key), le_key) <
128 cpu_key_k_type(cpu_key))
129 return -1;
130
131 if (le_key_k_type(le_key_version(le_key), le_key) >
132 cpu_key_k_type(cpu_key))
133 return 1;
127 134
128 if (le_key_k_type (le_key_version(le_key), le_key) > cpu_key_k_type (cpu_key)) 135 return 0;
129 return 1;
130
131 return 0;
132} 136}
133 137
134 138inline int comp_short_le_keys(const struct reiserfs_key *key1,
135inline int comp_short_le_keys (const struct reiserfs_key * key1, const struct reiserfs_key * key2) 139 const struct reiserfs_key *key2)
136{ 140{
137 __u32 * p_s_1_u32, * p_s_2_u32; 141 __u32 *p_s_1_u32, *p_s_2_u32;
138 int n_key_length = REISERFS_SHORT_KEY_LEN; 142 int n_key_length = REISERFS_SHORT_KEY_LEN;
139 143
140 p_s_1_u32 = (__u32 *)key1; 144 p_s_1_u32 = (__u32 *) key1;
141 p_s_2_u32 = (__u32 *)key2; 145 p_s_2_u32 = (__u32 *) key2;
142 for( ; n_key_length--; ++p_s_1_u32, ++p_s_2_u32 ) { 146 for (; n_key_length--; ++p_s_1_u32, ++p_s_2_u32) {
143 if ( le32_to_cpu (*p_s_1_u32) < le32_to_cpu (*p_s_2_u32) ) 147 if (le32_to_cpu(*p_s_1_u32) < le32_to_cpu(*p_s_2_u32))
144 return -1; 148 return -1;
145 if ( le32_to_cpu (*p_s_1_u32) > le32_to_cpu (*p_s_2_u32) ) 149 if (le32_to_cpu(*p_s_1_u32) > le32_to_cpu(*p_s_2_u32))
146 return 1; 150 return 1;
147 } 151 }
148 return 0; 152 return 0;
149} 153}
150 154
151inline void le_key2cpu_key (struct cpu_key * to, const struct reiserfs_key * from) 155inline void le_key2cpu_key(struct cpu_key *to, const struct reiserfs_key *from)
152{ 156{
153 int version; 157 int version;
154 to->on_disk_key.k_dir_id = le32_to_cpu (from->k_dir_id); 158 to->on_disk_key.k_dir_id = le32_to_cpu(from->k_dir_id);
155 to->on_disk_key.k_objectid = le32_to_cpu (from->k_objectid); 159 to->on_disk_key.k_objectid = le32_to_cpu(from->k_objectid);
156 160
157 // find out version of the key 161 // find out version of the key
158 version = le_key_version (from); 162 version = le_key_version(from);
159 to->version = version; 163 to->version = version;
160 to->on_disk_key.k_offset = le_key_k_offset(version, from); 164 to->on_disk_key.k_offset = le_key_k_offset(version, from);
161 to->on_disk_key.k_type = le_key_k_type(version, from); 165 to->on_disk_key.k_type = le_key_k_type(version, from);
162} 166}
163 167
164
165
166// this does not say which one is bigger, it only returns 1 if keys 168// this does not say which one is bigger, it only returns 1 if keys
167// are not equal, 0 otherwise 169// are not equal, 0 otherwise
168inline int comp_le_keys (const struct reiserfs_key * k1, const struct reiserfs_key * k2) 170inline int comp_le_keys(const struct reiserfs_key *k1,
171 const struct reiserfs_key *k2)
169{ 172{
170 return memcmp (k1, k2, sizeof (struct reiserfs_key)); 173 return memcmp(k1, k2, sizeof(struct reiserfs_key));
171} 174}
172 175
173/************************************************************************** 176/**************************************************************************
@@ -184,373 +187,396 @@ inline int comp_le_keys (const struct reiserfs_key * k1, const struct reiserfs_k
184 there are no possible items, and we have not found it. With each examination we 187 there are no possible items, and we have not found it. With each examination we
185 cut the number of possible items it could be by one more than half rounded down, 188 cut the number of possible items it could be by one more than half rounded down,
186 or we find it. */ 189 or we find it. */
187static inline int bin_search ( 190static inline int bin_search(const void *p_v_key, /* Key to search for. */
188 const void * p_v_key, /* Key to search for. */ 191 const void *p_v_base, /* First item in the array. */
189 const void * p_v_base,/* First item in the array. */ 192 int p_n_num, /* Number of items in the array. */
190 int p_n_num, /* Number of items in the array. */ 193 int p_n_width, /* Item size in the array.
191 int p_n_width, /* Item size in the array. 194 searched. Lest the reader be
192 searched. Lest the reader be 195 confused, note that this is crafted
193 confused, note that this is crafted 196 as a general function, and when it
194 as a general function, and when it 197 is applied specifically to the array
195 is applied specifically to the array 198 of item headers in a node, p_n_width
196 of item headers in a node, p_n_width 199 is actually the item header size not
197 is actually the item header size not 200 the item size. */
198 the item size. */ 201 int *p_n_pos /* Number of the searched for element. */
199 int * p_n_pos /* Number of the searched for element. */ 202 )
200 ) { 203{
201 int n_rbound, n_lbound, n_j; 204 int n_rbound, n_lbound, n_j;
202 205
203 for ( n_j = ((n_rbound = p_n_num - 1) + (n_lbound = 0))/2; n_lbound <= n_rbound; n_j = (n_rbound + n_lbound)/2 ) 206 for (n_j = ((n_rbound = p_n_num - 1) + (n_lbound = 0)) / 2;
204 switch( comp_keys((struct reiserfs_key *)((char * )p_v_base + n_j * p_n_width), (struct cpu_key *)p_v_key) ) { 207 n_lbound <= n_rbound; n_j = (n_rbound + n_lbound) / 2)
205 case -1: n_lbound = n_j + 1; continue; 208 switch (comp_keys
206 case 1: n_rbound = n_j - 1; continue; 209 ((struct reiserfs_key *)((char *)p_v_base +
207 case 0: *p_n_pos = n_j; return ITEM_FOUND; /* Key found in the array. */ 210 n_j * p_n_width),
208 } 211 (struct cpu_key *)p_v_key)) {
209 212 case -1:
210 /* bin_search did not find given key, it returns position of key, 213 n_lbound = n_j + 1;
211 that is minimal and greater than the given one. */ 214 continue;
212 *p_n_pos = n_lbound; 215 case 1:
213 return ITEM_NOT_FOUND; 216 n_rbound = n_j - 1;
217 continue;
218 case 0:
219 *p_n_pos = n_j;
220 return ITEM_FOUND; /* Key found in the array. */
221 }
222
223 /* bin_search did not find given key, it returns position of key,
224 that is minimal and greater than the given one. */
225 *p_n_pos = n_lbound;
226 return ITEM_NOT_FOUND;
214} 227}
215 228
216#ifdef CONFIG_REISERFS_CHECK 229#ifdef CONFIG_REISERFS_CHECK
217extern struct tree_balance * cur_tb; 230extern struct tree_balance *cur_tb;
218#endif 231#endif
219 232
220
221
222/* Minimal possible key. It is never in the tree. */ 233/* Minimal possible key. It is never in the tree. */
223const struct reiserfs_key MIN_KEY = {0, 0, {{0, 0},}}; 234const struct reiserfs_key MIN_KEY = { 0, 0, {{0, 0},} };
224 235
225/* Maximal possible key. It is never in the tree. */ 236/* Maximal possible key. It is never in the tree. */
226static const struct reiserfs_key MAX_KEY = { 237static const struct reiserfs_key MAX_KEY = {
227 __constant_cpu_to_le32(0xffffffff), 238 __constant_cpu_to_le32(0xffffffff),
228 __constant_cpu_to_le32(0xffffffff), 239 __constant_cpu_to_le32(0xffffffff),
229 {{__constant_cpu_to_le32(0xffffffff), 240 {{__constant_cpu_to_le32(0xffffffff),
230 __constant_cpu_to_le32(0xffffffff)},} 241 __constant_cpu_to_le32(0xffffffff)},}
231}; 242};
232 243
233
234/* Get delimiting key of the buffer by looking for it in the buffers in the path, starting from the bottom 244/* Get delimiting key of the buffer by looking for it in the buffers in the path, starting from the bottom
235 of the path, and going upwards. We must check the path's validity at each step. If the key is not in 245 of the path, and going upwards. We must check the path's validity at each step. If the key is not in
236 the path, there is no delimiting key in the tree (buffer is first or last buffer in tree), and in this 246 the path, there is no delimiting key in the tree (buffer is first or last buffer in tree), and in this
237 case we return a special key, either MIN_KEY or MAX_KEY. */ 247 case we return a special key, either MIN_KEY or MAX_KEY. */
238static inline const struct reiserfs_key * get_lkey ( 248static inline const struct reiserfs_key *get_lkey(const struct path
239 const struct path * p_s_chk_path, 249 *p_s_chk_path,
240 const struct super_block * p_s_sb 250 const struct super_block
241 ) { 251 *p_s_sb)
242 int n_position, n_path_offset = p_s_chk_path->path_length; 252{
243 struct buffer_head * p_s_parent; 253 int n_position, n_path_offset = p_s_chk_path->path_length;
244 254 struct buffer_head *p_s_parent;
245 RFALSE( n_path_offset < FIRST_PATH_ELEMENT_OFFSET, 255
246 "PAP-5010: invalid offset in the path"); 256 RFALSE(n_path_offset < FIRST_PATH_ELEMENT_OFFSET,
247 257 "PAP-5010: invalid offset in the path");
248 /* While not higher in path than first element. */ 258
249 while ( n_path_offset-- > FIRST_PATH_ELEMENT_OFFSET ) { 259 /* While not higher in path than first element. */
250 260 while (n_path_offset-- > FIRST_PATH_ELEMENT_OFFSET) {
251 RFALSE( ! buffer_uptodate(PATH_OFFSET_PBUFFER(p_s_chk_path, n_path_offset)), 261
252 "PAP-5020: parent is not uptodate"); 262 RFALSE(!buffer_uptodate
253 263 (PATH_OFFSET_PBUFFER(p_s_chk_path, n_path_offset)),
254 /* Parent at the path is not in the tree now. */ 264 "PAP-5020: parent is not uptodate");
255 if ( ! B_IS_IN_TREE(p_s_parent = PATH_OFFSET_PBUFFER(p_s_chk_path, n_path_offset)) ) 265
256 return &MAX_KEY; 266 /* Parent at the path is not in the tree now. */
257 /* Check whether position in the parent is correct. */ 267 if (!B_IS_IN_TREE
258 if ( (n_position = PATH_OFFSET_POSITION(p_s_chk_path, n_path_offset)) > B_NR_ITEMS(p_s_parent) ) 268 (p_s_parent =
259 return &MAX_KEY; 269 PATH_OFFSET_PBUFFER(p_s_chk_path, n_path_offset)))
260 /* Check whether parent at the path really points to the child. */ 270 return &MAX_KEY;
261 if ( B_N_CHILD_NUM(p_s_parent, n_position) != 271 /* Check whether position in the parent is correct. */
262 PATH_OFFSET_PBUFFER(p_s_chk_path, n_path_offset + 1)->b_blocknr ) 272 if ((n_position =
263 return &MAX_KEY; 273 PATH_OFFSET_POSITION(p_s_chk_path,
264 /* Return delimiting key if position in the parent is not equal to zero. */ 274 n_path_offset)) >
265 if ( n_position ) 275 B_NR_ITEMS(p_s_parent))
266 return B_N_PDELIM_KEY(p_s_parent, n_position - 1); 276 return &MAX_KEY;
267 } 277 /* Check whether parent at the path really points to the child. */
268 /* Return MIN_KEY if we are in the root of the buffer tree. */ 278 if (B_N_CHILD_NUM(p_s_parent, n_position) !=
269 if ( PATH_OFFSET_PBUFFER(p_s_chk_path, FIRST_PATH_ELEMENT_OFFSET)->b_blocknr == 279 PATH_OFFSET_PBUFFER(p_s_chk_path,
270 SB_ROOT_BLOCK (p_s_sb) ) 280 n_path_offset + 1)->b_blocknr)
271 return &MIN_KEY; 281 return &MAX_KEY;
272 return &MAX_KEY; 282 /* Return delimiting key if position in the parent is not equal to zero. */
283 if (n_position)
284 return B_N_PDELIM_KEY(p_s_parent, n_position - 1);
285 }
286 /* Return MIN_KEY if we are in the root of the buffer tree. */
287 if (PATH_OFFSET_PBUFFER(p_s_chk_path, FIRST_PATH_ELEMENT_OFFSET)->
288 b_blocknr == SB_ROOT_BLOCK(p_s_sb))
289 return &MIN_KEY;
290 return &MAX_KEY;
273} 291}
274 292
275
276/* Get delimiting key of the buffer at the path and its right neighbor. */ 293/* Get delimiting key of the buffer at the path and its right neighbor. */
277inline const struct reiserfs_key * get_rkey ( 294inline const struct reiserfs_key *get_rkey(const struct path *p_s_chk_path,
278 const struct path * p_s_chk_path, 295 const struct super_block *p_s_sb)
279 const struct super_block * p_s_sb 296{
280 ) { 297 int n_position, n_path_offset = p_s_chk_path->path_length;
281 int n_position, 298 struct buffer_head *p_s_parent;
282 n_path_offset = p_s_chk_path->path_length; 299
283 struct buffer_head * p_s_parent; 300 RFALSE(n_path_offset < FIRST_PATH_ELEMENT_OFFSET,
284 301 "PAP-5030: invalid offset in the path");
285 RFALSE( n_path_offset < FIRST_PATH_ELEMENT_OFFSET, 302
286 "PAP-5030: invalid offset in the path"); 303 while (n_path_offset-- > FIRST_PATH_ELEMENT_OFFSET) {
287 304
288 while ( n_path_offset-- > FIRST_PATH_ELEMENT_OFFSET ) { 305 RFALSE(!buffer_uptodate
289 306 (PATH_OFFSET_PBUFFER(p_s_chk_path, n_path_offset)),
290 RFALSE( ! buffer_uptodate(PATH_OFFSET_PBUFFER(p_s_chk_path, n_path_offset)), 307 "PAP-5040: parent is not uptodate");
291 "PAP-5040: parent is not uptodate"); 308
292 309 /* Parent at the path is not in the tree now. */
293 /* Parent at the path is not in the tree now. */ 310 if (!B_IS_IN_TREE
294 if ( ! B_IS_IN_TREE(p_s_parent = PATH_OFFSET_PBUFFER(p_s_chk_path, n_path_offset)) ) 311 (p_s_parent =
295 return &MIN_KEY; 312 PATH_OFFSET_PBUFFER(p_s_chk_path, n_path_offset)))
296 /* Check whether position in the parent is correct. */ 313 return &MIN_KEY;
297 if ( (n_position = PATH_OFFSET_POSITION(p_s_chk_path, n_path_offset)) > B_NR_ITEMS(p_s_parent) ) 314 /* Check whether position in the parent is correct. */
298 return &MIN_KEY; 315 if ((n_position =
299 /* Check whether parent at the path really points to the child. */ 316 PATH_OFFSET_POSITION(p_s_chk_path,
300 if ( B_N_CHILD_NUM(p_s_parent, n_position) != 317 n_path_offset)) >
301 PATH_OFFSET_PBUFFER(p_s_chk_path, n_path_offset + 1)->b_blocknr ) 318 B_NR_ITEMS(p_s_parent))
302 return &MIN_KEY; 319 return &MIN_KEY;
303 /* Return delimiting key if position in the parent is not the last one. */ 320 /* Check whether parent at the path really points to the child. */
304 if ( n_position != B_NR_ITEMS(p_s_parent) ) 321 if (B_N_CHILD_NUM(p_s_parent, n_position) !=
305 return B_N_PDELIM_KEY(p_s_parent, n_position); 322 PATH_OFFSET_PBUFFER(p_s_chk_path,
306 } 323 n_path_offset + 1)->b_blocknr)
307 /* Return MAX_KEY if we are in the root of the buffer tree. */ 324 return &MIN_KEY;
308 if ( PATH_OFFSET_PBUFFER(p_s_chk_path, FIRST_PATH_ELEMENT_OFFSET)->b_blocknr == 325 /* Return delimiting key if position in the parent is not the last one. */
309 SB_ROOT_BLOCK (p_s_sb) ) 326 if (n_position != B_NR_ITEMS(p_s_parent))
310 return &MAX_KEY; 327 return B_N_PDELIM_KEY(p_s_parent, n_position);
311 return &MIN_KEY; 328 }
329 /* Return MAX_KEY if we are in the root of the buffer tree. */
330 if (PATH_OFFSET_PBUFFER(p_s_chk_path, FIRST_PATH_ELEMENT_OFFSET)->
331 b_blocknr == SB_ROOT_BLOCK(p_s_sb))
332 return &MAX_KEY;
333 return &MIN_KEY;
312} 334}
313 335
314
315/* Check whether a key is contained in the tree rooted from a buffer at a path. */ 336/* Check whether a key is contained in the tree rooted from a buffer at a path. */
316/* This works by looking at the left and right delimiting keys for the buffer in the last path_element in 337/* This works by looking at the left and right delimiting keys for the buffer in the last path_element in
317 the path. These delimiting keys are stored at least one level above that buffer in the tree. If the 338 the path. These delimiting keys are stored at least one level above that buffer in the tree. If the
318 buffer is the first or last node in the tree order then one of the delimiting keys may be absent, and in 339 buffer is the first or last node in the tree order then one of the delimiting keys may be absent, and in
319 this case get_lkey and get_rkey return a special key which is MIN_KEY or MAX_KEY. */ 340 this case get_lkey and get_rkey return a special key which is MIN_KEY or MAX_KEY. */
320static inline int key_in_buffer ( 341static inline int key_in_buffer(struct path *p_s_chk_path, /* Path which should be checked. */
321 struct path * p_s_chk_path, /* Path which should be checked. */ 342 const struct cpu_key *p_s_key, /* Key which should be checked. */
322 const struct cpu_key * p_s_key, /* Key which should be checked. */ 343 struct super_block *p_s_sb /* Super block pointer. */
323 struct super_block * p_s_sb /* Super block pointer. */ 344 )
324 ) { 345{
325
326 RFALSE( ! p_s_key || p_s_chk_path->path_length < FIRST_PATH_ELEMENT_OFFSET ||
327 p_s_chk_path->path_length > MAX_HEIGHT,
328 "PAP-5050: pointer to the key(%p) is NULL or invalid path length(%d)",
329 p_s_key, p_s_chk_path->path_length);
330 RFALSE( !PATH_PLAST_BUFFER(p_s_chk_path)->b_bdev,
331 "PAP-5060: device must not be NODEV");
332
333 if ( comp_keys(get_lkey(p_s_chk_path, p_s_sb), p_s_key) == 1 )
334 /* left delimiting key is bigger, that the key we look for */
335 return 0;
336 // if ( comp_keys(p_s_key, get_rkey(p_s_chk_path, p_s_sb)) != -1 )
337 if ( comp_keys(get_rkey(p_s_chk_path, p_s_sb), p_s_key) != 1 )
338 /* p_s_key must be less than right delimitiing key */
339 return 0;
340 return 1;
341}
342
343 346
344inline void decrement_bcount( 347 RFALSE(!p_s_key || p_s_chk_path->path_length < FIRST_PATH_ELEMENT_OFFSET
345 struct buffer_head * p_s_bh 348 || p_s_chk_path->path_length > MAX_HEIGHT,
346 ) { 349 "PAP-5050: pointer to the key(%p) is NULL or invalid path length(%d)",
347 if ( p_s_bh ) { 350 p_s_key, p_s_chk_path->path_length);
348 if ( atomic_read (&(p_s_bh->b_count)) ) { 351 RFALSE(!PATH_PLAST_BUFFER(p_s_chk_path)->b_bdev,
349 put_bh(p_s_bh) ; 352 "PAP-5060: device must not be NODEV");
350 return; 353
351 } 354 if (comp_keys(get_lkey(p_s_chk_path, p_s_sb), p_s_key) == 1)
352 reiserfs_panic(NULL, "PAP-5070: decrement_bcount: trying to free free buffer %b", p_s_bh); 355 /* left delimiting key is bigger, that the key we look for */
353 } 356 return 0;
357 // if ( comp_keys(p_s_key, get_rkey(p_s_chk_path, p_s_sb)) != -1 )
358 if (comp_keys(get_rkey(p_s_chk_path, p_s_sb), p_s_key) != 1)
359 /* p_s_key must be less than right delimitiing key */
360 return 0;
361 return 1;
354} 362}
355 363
364inline void decrement_bcount(struct buffer_head *p_s_bh)
365{
366 if (p_s_bh) {
367 if (atomic_read(&(p_s_bh->b_count))) {
368 put_bh(p_s_bh);
369 return;
370 }
371 reiserfs_panic(NULL,
372 "PAP-5070: decrement_bcount: trying to free free buffer %b",
373 p_s_bh);
374 }
375}
356 376
357/* Decrement b_count field of the all buffers in the path. */ 377/* Decrement b_count field of the all buffers in the path. */
358void decrement_counters_in_path ( 378void decrement_counters_in_path(struct path *p_s_search_path)
359 struct path * p_s_search_path 379{
360 ) { 380 int n_path_offset = p_s_search_path->path_length;
361 int n_path_offset = p_s_search_path->path_length; 381
362 382 RFALSE(n_path_offset < ILLEGAL_PATH_ELEMENT_OFFSET ||
363 RFALSE( n_path_offset < ILLEGAL_PATH_ELEMENT_OFFSET || 383 n_path_offset > EXTENDED_MAX_HEIGHT - 1,
364 n_path_offset > EXTENDED_MAX_HEIGHT - 1, 384 "PAP-5080: invalid path offset of %d", n_path_offset);
365 "PAP-5080: invalid path offset of %d", n_path_offset);
366
367 while ( n_path_offset > ILLEGAL_PATH_ELEMENT_OFFSET ) {
368 struct buffer_head * bh;
369
370 bh = PATH_OFFSET_PBUFFER(p_s_search_path, n_path_offset--);
371 decrement_bcount (bh);
372 }
373 p_s_search_path->path_length = ILLEGAL_PATH_ELEMENT_OFFSET;
374}
375 385
386 while (n_path_offset > ILLEGAL_PATH_ELEMENT_OFFSET) {
387 struct buffer_head *bh;
376 388
377int reiserfs_check_path(struct path *p) { 389 bh = PATH_OFFSET_PBUFFER(p_s_search_path, n_path_offset--);
378 RFALSE( p->path_length != ILLEGAL_PATH_ELEMENT_OFFSET, 390 decrement_bcount(bh);
379 "path not properly relsed") ; 391 }
380 return 0 ; 392 p_s_search_path->path_length = ILLEGAL_PATH_ELEMENT_OFFSET;
381} 393}
382 394
395int reiserfs_check_path(struct path *p)
396{
397 RFALSE(p->path_length != ILLEGAL_PATH_ELEMENT_OFFSET,
398 "path not properly relsed");
399 return 0;
400}
383 401
384/* Release all buffers in the path. Restore dirty bits clean 402/* Release all buffers in the path. Restore dirty bits clean
385** when preparing the buffer for the log 403** when preparing the buffer for the log
386** 404**
387** only called from fix_nodes() 405** only called from fix_nodes()
388*/ 406*/
389void pathrelse_and_restore ( 407void pathrelse_and_restore(struct super_block *s, struct path *p_s_search_path)
390 struct super_block *s, 408{
391 struct path * p_s_search_path 409 int n_path_offset = p_s_search_path->path_length;
392 ) { 410
393 int n_path_offset = p_s_search_path->path_length; 411 RFALSE(n_path_offset < ILLEGAL_PATH_ELEMENT_OFFSET,
394 412 "clm-4000: invalid path offset");
395 RFALSE( n_path_offset < ILLEGAL_PATH_ELEMENT_OFFSET, 413
396 "clm-4000: invalid path offset"); 414 while (n_path_offset > ILLEGAL_PATH_ELEMENT_OFFSET) {
397 415 reiserfs_restore_prepared_buffer(s,
398 while ( n_path_offset > ILLEGAL_PATH_ELEMENT_OFFSET ) { 416 PATH_OFFSET_PBUFFER
399 reiserfs_restore_prepared_buffer(s, PATH_OFFSET_PBUFFER(p_s_search_path, 417 (p_s_search_path,
400 n_path_offset)); 418 n_path_offset));
401 brelse(PATH_OFFSET_PBUFFER(p_s_search_path, n_path_offset--)); 419 brelse(PATH_OFFSET_PBUFFER(p_s_search_path, n_path_offset--));
402 } 420 }
403 p_s_search_path->path_length = ILLEGAL_PATH_ELEMENT_OFFSET; 421 p_s_search_path->path_length = ILLEGAL_PATH_ELEMENT_OFFSET;
404} 422}
405 423
406/* Release all buffers in the path. */ 424/* Release all buffers in the path. */
407void pathrelse ( 425void pathrelse(struct path *p_s_search_path)
408 struct path * p_s_search_path 426{
409 ) { 427 int n_path_offset = p_s_search_path->path_length;
410 int n_path_offset = p_s_search_path->path_length;
411
412 RFALSE( n_path_offset < ILLEGAL_PATH_ELEMENT_OFFSET,
413 "PAP-5090: invalid path offset");
414
415 while ( n_path_offset > ILLEGAL_PATH_ELEMENT_OFFSET )
416 brelse(PATH_OFFSET_PBUFFER(p_s_search_path, n_path_offset--));
417
418 p_s_search_path->path_length = ILLEGAL_PATH_ELEMENT_OFFSET;
419}
420 428
429 RFALSE(n_path_offset < ILLEGAL_PATH_ELEMENT_OFFSET,
430 "PAP-5090: invalid path offset");
421 431
432 while (n_path_offset > ILLEGAL_PATH_ELEMENT_OFFSET)
433 brelse(PATH_OFFSET_PBUFFER(p_s_search_path, n_path_offset--));
422 434
423static int is_leaf (char * buf, int blocksize, struct buffer_head * bh) 435 p_s_search_path->path_length = ILLEGAL_PATH_ELEMENT_OFFSET;
424{ 436}
425 struct block_head * blkh;
426 struct item_head * ih;
427 int used_space;
428 int prev_location;
429 int i;
430 int nr;
431
432 blkh = (struct block_head *)buf;
433 if ( blkh_level(blkh) != DISK_LEAF_NODE_LEVEL) {
434 reiserfs_warning (NULL, "is_leaf: this should be caught earlier");
435 return 0;
436 }
437 437
438 nr = blkh_nr_item(blkh); 438static int is_leaf(char *buf, int blocksize, struct buffer_head *bh)
439 if (nr < 1 || nr > ((blocksize - BLKH_SIZE) / (IH_SIZE + MIN_ITEM_LEN))) { 439{
440 /* item number is too big or too small */ 440 struct block_head *blkh;
441 reiserfs_warning (NULL, "is_leaf: nr_item seems wrong: %z", bh); 441 struct item_head *ih;
442 return 0; 442 int used_space;
443 } 443 int prev_location;
444 ih = (struct item_head *)(buf + BLKH_SIZE) + nr - 1; 444 int i;
445 used_space = BLKH_SIZE + IH_SIZE * nr + (blocksize - ih_location (ih)); 445 int nr;
446 if (used_space != blocksize - blkh_free_space(blkh)) { 446
447 /* free space does not match to calculated amount of use space */ 447 blkh = (struct block_head *)buf;
448 reiserfs_warning (NULL, "is_leaf: free space seems wrong: %z", bh); 448 if (blkh_level(blkh) != DISK_LEAF_NODE_LEVEL) {
449 return 0; 449 reiserfs_warning(NULL,
450 } 450 "is_leaf: this should be caught earlier");
451 451 return 0;
452 // FIXME: it is_leaf will hit performance too much - we may have
453 // return 1 here
454
455 /* check tables of item heads */
456 ih = (struct item_head *)(buf + BLKH_SIZE);
457 prev_location = blocksize;
458 for (i = 0; i < nr; i ++, ih ++) {
459 if ( le_ih_k_type(ih) == TYPE_ANY) {
460 reiserfs_warning (NULL, "is_leaf: wrong item type for item %h",ih);
461 return 0;
462 } 452 }
463 if (ih_location (ih) >= blocksize || ih_location (ih) < IH_SIZE * nr) { 453
464 reiserfs_warning (NULL, "is_leaf: item location seems wrong: %h", ih); 454 nr = blkh_nr_item(blkh);
465 return 0; 455 if (nr < 1 || nr > ((blocksize - BLKH_SIZE) / (IH_SIZE + MIN_ITEM_LEN))) {
456 /* item number is too big or too small */
457 reiserfs_warning(NULL, "is_leaf: nr_item seems wrong: %z", bh);
458 return 0;
466 } 459 }
467 if (ih_item_len (ih) < 1 || ih_item_len (ih) > MAX_ITEM_LEN (blocksize)) { 460 ih = (struct item_head *)(buf + BLKH_SIZE) + nr - 1;
468 reiserfs_warning (NULL, "is_leaf: item length seems wrong: %h", ih); 461 used_space = BLKH_SIZE + IH_SIZE * nr + (blocksize - ih_location(ih));
469 return 0; 462 if (used_space != blocksize - blkh_free_space(blkh)) {
463 /* free space does not match to calculated amount of use space */
464 reiserfs_warning(NULL, "is_leaf: free space seems wrong: %z",
465 bh);
466 return 0;
470 } 467 }
471 if (prev_location - ih_location (ih) != ih_item_len (ih)) { 468 // FIXME: it is_leaf will hit performance too much - we may have
472 reiserfs_warning (NULL, "is_leaf: item location seems wrong (second one): %h", ih); 469 // return 1 here
473 return 0; 470
471 /* check tables of item heads */
472 ih = (struct item_head *)(buf + BLKH_SIZE);
473 prev_location = blocksize;
474 for (i = 0; i < nr; i++, ih++) {
475 if (le_ih_k_type(ih) == TYPE_ANY) {
476 reiserfs_warning(NULL,
477 "is_leaf: wrong item type for item %h",
478 ih);
479 return 0;
480 }
481 if (ih_location(ih) >= blocksize
482 || ih_location(ih) < IH_SIZE * nr) {
483 reiserfs_warning(NULL,
484 "is_leaf: item location seems wrong: %h",
485 ih);
486 return 0;
487 }
488 if (ih_item_len(ih) < 1
489 || ih_item_len(ih) > MAX_ITEM_LEN(blocksize)) {
490 reiserfs_warning(NULL,
491 "is_leaf: item length seems wrong: %h",
492 ih);
493 return 0;
494 }
495 if (prev_location - ih_location(ih) != ih_item_len(ih)) {
496 reiserfs_warning(NULL,
497 "is_leaf: item location seems wrong (second one): %h",
498 ih);
499 return 0;
500 }
501 prev_location = ih_location(ih);
474 } 502 }
475 prev_location = ih_location (ih);
476 }
477 503
478 // one may imagine much more checks 504 // one may imagine much more checks
479 return 1; 505 return 1;
480} 506}
481 507
482
483/* returns 1 if buf looks like an internal node, 0 otherwise */ 508/* returns 1 if buf looks like an internal node, 0 otherwise */
484static int is_internal (char * buf, int blocksize, struct buffer_head * bh) 509static int is_internal(char *buf, int blocksize, struct buffer_head *bh)
485{ 510{
486 struct block_head * blkh; 511 struct block_head *blkh;
487 int nr; 512 int nr;
488 int used_space; 513 int used_space;
489 514
490 blkh = (struct block_head *)buf; 515 blkh = (struct block_head *)buf;
491 nr = blkh_level(blkh); 516 nr = blkh_level(blkh);
492 if (nr <= DISK_LEAF_NODE_LEVEL || nr > MAX_HEIGHT) { 517 if (nr <= DISK_LEAF_NODE_LEVEL || nr > MAX_HEIGHT) {
493 /* this level is not possible for internal nodes */ 518 /* this level is not possible for internal nodes */
494 reiserfs_warning (NULL, "is_internal: this should be caught earlier"); 519 reiserfs_warning(NULL,
495 return 0; 520 "is_internal: this should be caught earlier");
496 } 521 return 0;
497 522 }
498 nr = blkh_nr_item(blkh);
499 if (nr > (blocksize - BLKH_SIZE - DC_SIZE) / (KEY_SIZE + DC_SIZE)) {
500 /* for internal which is not root we might check min number of keys */
501 reiserfs_warning (NULL, "is_internal: number of key seems wrong: %z", bh);
502 return 0;
503 }
504 523
505 used_space = BLKH_SIZE + KEY_SIZE * nr + DC_SIZE * (nr + 1); 524 nr = blkh_nr_item(blkh);
506 if (used_space != blocksize - blkh_free_space(blkh)) { 525 if (nr > (blocksize - BLKH_SIZE - DC_SIZE) / (KEY_SIZE + DC_SIZE)) {
507 reiserfs_warning (NULL, "is_internal: free space seems wrong: %z", bh); 526 /* for internal which is not root we might check min number of keys */
508 return 0; 527 reiserfs_warning(NULL,
509 } 528 "is_internal: number of key seems wrong: %z",
529 bh);
530 return 0;
531 }
510 532
511 // one may imagine much more checks 533 used_space = BLKH_SIZE + KEY_SIZE * nr + DC_SIZE * (nr + 1);
512 return 1; 534 if (used_space != blocksize - blkh_free_space(blkh)) {
535 reiserfs_warning(NULL,
536 "is_internal: free space seems wrong: %z", bh);
537 return 0;
538 }
539 // one may imagine much more checks
540 return 1;
513} 541}
514 542
515
516// make sure that bh contains formatted node of reiserfs tree of 543// make sure that bh contains formatted node of reiserfs tree of
517// 'level'-th level 544// 'level'-th level
518static int is_tree_node (struct buffer_head * bh, int level) 545static int is_tree_node(struct buffer_head *bh, int level)
519{ 546{
520 if (B_LEVEL (bh) != level) { 547 if (B_LEVEL(bh) != level) {
521 reiserfs_warning (NULL, "is_tree_node: node level %d does not match to the expected one %d", 548 reiserfs_warning(NULL,
522 B_LEVEL (bh), level); 549 "is_tree_node: node level %d does not match to the expected one %d",
523 return 0; 550 B_LEVEL(bh), level);
524 } 551 return 0;
525 if (level == DISK_LEAF_NODE_LEVEL) 552 }
526 return is_leaf (bh->b_data, bh->b_size, bh); 553 if (level == DISK_LEAF_NODE_LEVEL)
554 return is_leaf(bh->b_data, bh->b_size, bh);
527 555
528 return is_internal (bh->b_data, bh->b_size, bh); 556 return is_internal(bh->b_data, bh->b_size, bh);
529} 557}
530 558
531
532
533#define SEARCH_BY_KEY_READA 16 559#define SEARCH_BY_KEY_READA 16
534 560
535/* The function is NOT SCHEDULE-SAFE! */ 561/* The function is NOT SCHEDULE-SAFE! */
536static void search_by_key_reada (struct super_block * s, 562static void search_by_key_reada(struct super_block *s,
537 struct buffer_head **bh, 563 struct buffer_head **bh,
538 unsigned long *b, int num) 564 unsigned long *b, int num)
539{ 565{
540 int i,j; 566 int i, j;
541 567
542 for (i = 0 ; i < num ; i++) { 568 for (i = 0; i < num; i++) {
543 bh[i] = sb_getblk (s, b[i]); 569 bh[i] = sb_getblk(s, b[i]);
544 } 570 }
545 for (j = 0 ; j < i ; j++) { 571 for (j = 0; j < i; j++) {
546 /* 572 /*
547 * note, this needs attention if we are getting rid of the BKL 573 * note, this needs attention if we are getting rid of the BKL
548 * you have to make sure the prepared bit isn't set on this buffer 574 * you have to make sure the prepared bit isn't set on this buffer
549 */ 575 */
550 if (!buffer_uptodate(bh[j])) 576 if (!buffer_uptodate(bh[j]))
551 ll_rw_block(READA, 1, bh + j); 577 ll_rw_block(READA, 1, bh + j);
552 brelse(bh[j]); 578 brelse(bh[j]);
553 } 579 }
554} 580}
555 581
556/************************************************************************** 582/**************************************************************************
@@ -576,194 +602,200 @@ static void search_by_key_reada (struct super_block * s,
576 correctness of the top of the path but need not be checked for the 602 correctness of the top of the path but need not be checked for the
577 correctness of the bottom of the path */ 603 correctness of the bottom of the path */
578/* The function is NOT SCHEDULE-SAFE! */ 604/* The function is NOT SCHEDULE-SAFE! */
579int search_by_key (struct super_block * p_s_sb, 605int search_by_key(struct super_block *p_s_sb, const struct cpu_key *p_s_key, /* Key to search. */
580 const struct cpu_key * p_s_key, /* Key to search. */ 606 struct path *p_s_search_path, /* This structure was
581 struct path * p_s_search_path, /* This structure was 607 allocated and initialized
582 allocated and initialized 608 by the calling
583 by the calling 609 function. It is filled up
584 function. It is filled up 610 by this function. */
585 by this function. */ 611 int n_stop_level /* How far down the tree to search. To
586 int n_stop_level /* How far down the tree to search. To 612 stop at leaf level - set to
587 stop at leaf level - set to 613 DISK_LEAF_NODE_LEVEL */
588 DISK_LEAF_NODE_LEVEL */ 614 )
589 ) { 615{
590 int n_block_number; 616 int n_block_number;
591 int expected_level; 617 int expected_level;
592 struct buffer_head * p_s_bh; 618 struct buffer_head *p_s_bh;
593 struct path_element * p_s_last_element; 619 struct path_element *p_s_last_element;
594 int n_node_level, n_retval; 620 int n_node_level, n_retval;
595 int right_neighbor_of_leaf_node; 621 int right_neighbor_of_leaf_node;
596 int fs_gen; 622 int fs_gen;
597 struct buffer_head *reada_bh[SEARCH_BY_KEY_READA]; 623 struct buffer_head *reada_bh[SEARCH_BY_KEY_READA];
598 unsigned long reada_blocks[SEARCH_BY_KEY_READA]; 624 unsigned long reada_blocks[SEARCH_BY_KEY_READA];
599 int reada_count = 0; 625 int reada_count = 0;
600 626
601#ifdef CONFIG_REISERFS_CHECK 627#ifdef CONFIG_REISERFS_CHECK
602 int n_repeat_counter = 0; 628 int n_repeat_counter = 0;
603#endif 629#endif
604
605 PROC_INFO_INC( p_s_sb, search_by_key );
606
607 /* As we add each node to a path we increase its count. This means that
608 we must be careful to release all nodes in a path before we either
609 discard the path struct or re-use the path struct, as we do here. */
610 630
611 decrement_counters_in_path(p_s_search_path); 631 PROC_INFO_INC(p_s_sb, search_by_key);
632
633 /* As we add each node to a path we increase its count. This means that
634 we must be careful to release all nodes in a path before we either
635 discard the path struct or re-use the path struct, as we do here. */
612 636
613 right_neighbor_of_leaf_node = 0; 637 decrement_counters_in_path(p_s_search_path);
614 638
615 /* With each iteration of this loop we search through the items in the 639 right_neighbor_of_leaf_node = 0;
616 current node, and calculate the next current node(next path element) 640
617 for the next iteration of this loop.. */ 641 /* With each iteration of this loop we search through the items in the
618 n_block_number = SB_ROOT_BLOCK (p_s_sb); 642 current node, and calculate the next current node(next path element)
619 expected_level = -1; 643 for the next iteration of this loop.. */
620 while ( 1 ) { 644 n_block_number = SB_ROOT_BLOCK(p_s_sb);
645 expected_level = -1;
646 while (1) {
621 647
622#ifdef CONFIG_REISERFS_CHECK 648#ifdef CONFIG_REISERFS_CHECK
623 if ( !(++n_repeat_counter % 50000) ) 649 if (!(++n_repeat_counter % 50000))
624 reiserfs_warning (p_s_sb, "PAP-5100: search_by_key: %s:" 650 reiserfs_warning(p_s_sb, "PAP-5100: search_by_key: %s:"
625 "there were %d iterations of while loop " 651 "there were %d iterations of while loop "
626 "looking for key %K", 652 "looking for key %K",
627 current->comm, n_repeat_counter, p_s_key); 653 current->comm, n_repeat_counter,
654 p_s_key);
628#endif 655#endif
629 656
630 /* prep path to have another element added to it. */ 657 /* prep path to have another element added to it. */
631 p_s_last_element = PATH_OFFSET_PELEMENT(p_s_search_path, ++p_s_search_path->path_length); 658 p_s_last_element =
632 fs_gen = get_generation (p_s_sb); 659 PATH_OFFSET_PELEMENT(p_s_search_path,
633 660 ++p_s_search_path->path_length);
634 /* Read the next tree node, and set the last element in the path to 661 fs_gen = get_generation(p_s_sb);
635 have a pointer to it. */ 662
636 if ((p_s_bh = p_s_last_element->pe_buffer = 663 /* Read the next tree node, and set the last element in the path to
637 sb_getblk(p_s_sb, n_block_number)) ) { 664 have a pointer to it. */
638 if (!buffer_uptodate(p_s_bh) && reada_count > 1) { 665 if ((p_s_bh = p_s_last_element->pe_buffer =
639 search_by_key_reada (p_s_sb, reada_bh, 666 sb_getblk(p_s_sb, n_block_number))) {
640 reada_blocks, reada_count); 667 if (!buffer_uptodate(p_s_bh) && reada_count > 1) {
641 } 668 search_by_key_reada(p_s_sb, reada_bh,
642 ll_rw_block(READ, 1, &p_s_bh); 669 reada_blocks, reada_count);
643 wait_on_buffer(p_s_bh); 670 }
644 if (!buffer_uptodate(p_s_bh)) 671 ll_rw_block(READ, 1, &p_s_bh);
645 goto io_error; 672 wait_on_buffer(p_s_bh);
646 } else { 673 if (!buffer_uptodate(p_s_bh))
647io_error: 674 goto io_error;
648 p_s_search_path->path_length --; 675 } else {
649 pathrelse(p_s_search_path); 676 io_error:
650 return IO_ERROR; 677 p_s_search_path->path_length--;
651 } 678 pathrelse(p_s_search_path);
652 reada_count = 0; 679 return IO_ERROR;
653 if (expected_level == -1) 680 }
654 expected_level = SB_TREE_HEIGHT (p_s_sb); 681 reada_count = 0;
655 expected_level --; 682 if (expected_level == -1)
656 683 expected_level = SB_TREE_HEIGHT(p_s_sb);
657 /* It is possible that schedule occurred. We must check whether the key 684 expected_level--;
658 to search is still in the tree rooted from the current buffer. If 685
659 not then repeat search from the root. */ 686 /* It is possible that schedule occurred. We must check whether the key
660 if ( fs_changed (fs_gen, p_s_sb) && 687 to search is still in the tree rooted from the current buffer. If
661 (!B_IS_IN_TREE (p_s_bh) || 688 not then repeat search from the root. */
662 B_LEVEL(p_s_bh) != expected_level || 689 if (fs_changed(fs_gen, p_s_sb) &&
663 !key_in_buffer(p_s_search_path, p_s_key, p_s_sb))) { 690 (!B_IS_IN_TREE(p_s_bh) ||
664 PROC_INFO_INC( p_s_sb, search_by_key_fs_changed ); 691 B_LEVEL(p_s_bh) != expected_level ||
665 PROC_INFO_INC( p_s_sb, search_by_key_restarted ); 692 !key_in_buffer(p_s_search_path, p_s_key, p_s_sb))) {
666 PROC_INFO_INC( p_s_sb, sbk_restarted[ expected_level - 1 ] ); 693 PROC_INFO_INC(p_s_sb, search_by_key_fs_changed);
667 decrement_counters_in_path(p_s_search_path); 694 PROC_INFO_INC(p_s_sb, search_by_key_restarted);
668 695 PROC_INFO_INC(p_s_sb,
669 /* Get the root block number so that we can repeat the search 696 sbk_restarted[expected_level - 1]);
670 starting from the root. */ 697 decrement_counters_in_path(p_s_search_path);
671 n_block_number = SB_ROOT_BLOCK (p_s_sb); 698
672 expected_level = -1; 699 /* Get the root block number so that we can repeat the search
673 right_neighbor_of_leaf_node = 0; 700 starting from the root. */
674 701 n_block_number = SB_ROOT_BLOCK(p_s_sb);
675 /* repeat search from the root */ 702 expected_level = -1;
676 continue; 703 right_neighbor_of_leaf_node = 0;
677 } 704
705 /* repeat search from the root */
706 continue;
707 }
678 708
679 /* only check that the key is in the buffer if p_s_key is not 709 /* only check that the key is in the buffer if p_s_key is not
680 equal to the MAX_KEY. Latter case is only possible in 710 equal to the MAX_KEY. Latter case is only possible in
681 "finish_unfinished()" processing during mount. */ 711 "finish_unfinished()" processing during mount. */
682 RFALSE( comp_keys( &MAX_KEY, p_s_key ) && 712 RFALSE(comp_keys(&MAX_KEY, p_s_key) &&
683 ! key_in_buffer(p_s_search_path, p_s_key, p_s_sb), 713 !key_in_buffer(p_s_search_path, p_s_key, p_s_sb),
684 "PAP-5130: key is not in the buffer"); 714 "PAP-5130: key is not in the buffer");
685#ifdef CONFIG_REISERFS_CHECK 715#ifdef CONFIG_REISERFS_CHECK
686 if ( cur_tb ) { 716 if (cur_tb) {
687 print_cur_tb ("5140"); 717 print_cur_tb("5140");
688 reiserfs_panic(p_s_sb, "PAP-5140: search_by_key: schedule occurred in do_balance!"); 718 reiserfs_panic(p_s_sb,
689 } 719 "PAP-5140: search_by_key: schedule occurred in do_balance!");
720 }
690#endif 721#endif
691 722
692 // make sure, that the node contents look like a node of 723 // make sure, that the node contents look like a node of
693 // certain level 724 // certain level
694 if (!is_tree_node (p_s_bh, expected_level)) { 725 if (!is_tree_node(p_s_bh, expected_level)) {
695 reiserfs_warning (p_s_sb, "vs-5150: search_by_key: " 726 reiserfs_warning(p_s_sb, "vs-5150: search_by_key: "
696 "invalid format found in block %ld. Fsck?", 727 "invalid format found in block %ld. Fsck?",
697 p_s_bh->b_blocknr); 728 p_s_bh->b_blocknr);
698 pathrelse (p_s_search_path); 729 pathrelse(p_s_search_path);
699 return IO_ERROR; 730 return IO_ERROR;
700 } 731 }
701
702 /* ok, we have acquired next formatted node in the tree */
703 n_node_level = B_LEVEL (p_s_bh);
704
705 PROC_INFO_BH_STAT( p_s_sb, p_s_bh, n_node_level - 1 );
706
707 RFALSE( n_node_level < n_stop_level,
708 "vs-5152: tree level (%d) is less than stop level (%d)",
709 n_node_level, n_stop_level);
710
711 n_retval = bin_search( p_s_key, B_N_PITEM_HEAD(p_s_bh, 0),
712 B_NR_ITEMS(p_s_bh),
713 ( n_node_level == DISK_LEAF_NODE_LEVEL ) ? IH_SIZE : KEY_SIZE,
714 &(p_s_last_element->pe_position));
715 if (n_node_level == n_stop_level) {
716 return n_retval;
717 }
718 732
719 /* we are not in the stop level */ 733 /* ok, we have acquired next formatted node in the tree */
720 if (n_retval == ITEM_FOUND) 734 n_node_level = B_LEVEL(p_s_bh);
721 /* item has been found, so we choose the pointer which is to the right of the found one */
722 p_s_last_element->pe_position++;
723 735
724 /* if item was not found we choose the position which is to 736 PROC_INFO_BH_STAT(p_s_sb, p_s_bh, n_node_level - 1);
725 the left of the found item. This requires no code,
726 bin_search did it already.*/
727 737
728 /* So we have chosen a position in the current node which is 738 RFALSE(n_node_level < n_stop_level,
729 an internal node. Now we calculate child block number by 739 "vs-5152: tree level (%d) is less than stop level (%d)",
730 position in the node. */ 740 n_node_level, n_stop_level);
731 n_block_number = B_N_CHILD_NUM(p_s_bh, p_s_last_element->pe_position);
732 741
733 /* if we are going to read leaf nodes, try for read ahead as well */ 742 n_retval = bin_search(p_s_key, B_N_PITEM_HEAD(p_s_bh, 0),
734 if ((p_s_search_path->reada & PATH_READA) && 743 B_NR_ITEMS(p_s_bh),
735 n_node_level == DISK_LEAF_NODE_LEVEL + 1) 744 (n_node_level ==
736 { 745 DISK_LEAF_NODE_LEVEL) ? IH_SIZE :
737 int pos = p_s_last_element->pe_position; 746 KEY_SIZE,
738 int limit = B_NR_ITEMS(p_s_bh); 747 &(p_s_last_element->pe_position));
739 struct reiserfs_key *le_key; 748 if (n_node_level == n_stop_level) {
740 749 return n_retval;
741 if (p_s_search_path->reada & PATH_READA_BACK) 750 }
742 limit = 0;
743 while(reada_count < SEARCH_BY_KEY_READA) {
744 if (pos == limit)
745 break;
746 reada_blocks[reada_count++] = B_N_CHILD_NUM(p_s_bh, pos);
747 if (p_s_search_path->reada & PATH_READA_BACK)
748 pos--;
749 else
750 pos++;
751 751
752 /* 752 /* we are not in the stop level */
753 * check to make sure we're in the same object 753 if (n_retval == ITEM_FOUND)
754 */ 754 /* item has been found, so we choose the pointer which is to the right of the found one */
755 le_key = B_N_PDELIM_KEY(p_s_bh, pos); 755 p_s_last_element->pe_position++;
756 if (le32_to_cpu(le_key->k_objectid) != 756
757 p_s_key->on_disk_key.k_objectid) 757 /* if item was not found we choose the position which is to
758 { 758 the left of the found item. This requires no code,
759 break; 759 bin_search did it already. */
760
761 /* So we have chosen a position in the current node which is
762 an internal node. Now we calculate child block number by
763 position in the node. */
764 n_block_number =
765 B_N_CHILD_NUM(p_s_bh, p_s_last_element->pe_position);
766
767 /* if we are going to read leaf nodes, try for read ahead as well */
768 if ((p_s_search_path->reada & PATH_READA) &&
769 n_node_level == DISK_LEAF_NODE_LEVEL + 1) {
770 int pos = p_s_last_element->pe_position;
771 int limit = B_NR_ITEMS(p_s_bh);
772 struct reiserfs_key *le_key;
773
774 if (p_s_search_path->reada & PATH_READA_BACK)
775 limit = 0;
776 while (reada_count < SEARCH_BY_KEY_READA) {
777 if (pos == limit)
778 break;
779 reada_blocks[reada_count++] =
780 B_N_CHILD_NUM(p_s_bh, pos);
781 if (p_s_search_path->reada & PATH_READA_BACK)
782 pos--;
783 else
784 pos++;
785
786 /*
787 * check to make sure we're in the same object
788 */
789 le_key = B_N_PDELIM_KEY(p_s_bh, pos);
790 if (le32_to_cpu(le_key->k_objectid) !=
791 p_s_key->on_disk_key.k_objectid) {
792 break;
793 }
794 }
760 } 795 }
761 } 796 }
762 }
763 }
764} 797}
765 798
766
767/* Form the path to an item and position in this item which contains 799/* Form the path to an item and position in this item which contains
768 file byte defined by p_s_key. If there is no such item 800 file byte defined by p_s_key. If there is no such item
769 corresponding to the key, we point the path to the item with 801 corresponding to the key, we point the path to the item with
@@ -780,94 +812,97 @@ io_error:
780 units of directory entries. */ 812 units of directory entries. */
781 813
782/* The function is NOT SCHEDULE-SAFE! */ 814/* The function is NOT SCHEDULE-SAFE! */
783int search_for_position_by_key (struct super_block * p_s_sb, /* Pointer to the super block. */ 815int search_for_position_by_key(struct super_block *p_s_sb, /* Pointer to the super block. */
784 const struct cpu_key * p_cpu_key, /* Key to search (cpu variable) */ 816 const struct cpu_key *p_cpu_key, /* Key to search (cpu variable) */
785 struct path * p_s_search_path /* Filled up by this function. */ 817 struct path *p_s_search_path /* Filled up by this function. */
786 ) { 818 )
787 struct item_head * p_le_ih; /* pointer to on-disk structure */ 819{
788 int n_blk_size; 820 struct item_head *p_le_ih; /* pointer to on-disk structure */
789 loff_t item_offset, offset; 821 int n_blk_size;
790 struct reiserfs_dir_entry de; 822 loff_t item_offset, offset;
791 int retval; 823 struct reiserfs_dir_entry de;
792 824 int retval;
793 /* If searching for directory entry. */ 825
794 if ( is_direntry_cpu_key (p_cpu_key) ) 826 /* If searching for directory entry. */
795 return search_by_entry_key (p_s_sb, p_cpu_key, p_s_search_path, &de); 827 if (is_direntry_cpu_key(p_cpu_key))
796 828 return search_by_entry_key(p_s_sb, p_cpu_key, p_s_search_path,
797 /* If not searching for directory entry. */ 829 &de);
798 830
799 /* If item is found. */ 831 /* If not searching for directory entry. */
800 retval = search_item (p_s_sb, p_cpu_key, p_s_search_path); 832
801 if (retval == IO_ERROR) 833 /* If item is found. */
802 return retval; 834 retval = search_item(p_s_sb, p_cpu_key, p_s_search_path);
803 if ( retval == ITEM_FOUND ) { 835 if (retval == IO_ERROR)
804 836 return retval;
805 RFALSE( ! ih_item_len( 837 if (retval == ITEM_FOUND) {
806 B_N_PITEM_HEAD(PATH_PLAST_BUFFER(p_s_search_path),
807 PATH_LAST_POSITION(p_s_search_path))),
808 "PAP-5165: item length equals zero");
809 838
810 pos_in_item(p_s_search_path) = 0; 839 RFALSE(!ih_item_len
811 return POSITION_FOUND; 840 (B_N_PITEM_HEAD
812 } 841 (PATH_PLAST_BUFFER(p_s_search_path),
842 PATH_LAST_POSITION(p_s_search_path))),
843 "PAP-5165: item length equals zero");
813 844
814 RFALSE( ! PATH_LAST_POSITION(p_s_search_path), 845 pos_in_item(p_s_search_path) = 0;
815 "PAP-5170: position equals zero"); 846 return POSITION_FOUND;
847 }
816 848
817 /* Item is not found. Set path to the previous item. */ 849 RFALSE(!PATH_LAST_POSITION(p_s_search_path),
818 p_le_ih = B_N_PITEM_HEAD(PATH_PLAST_BUFFER(p_s_search_path), --PATH_LAST_POSITION(p_s_search_path)); 850 "PAP-5170: position equals zero");
819 n_blk_size = p_s_sb->s_blocksize;
820 851
821 if (comp_short_keys (&(p_le_ih->ih_key), p_cpu_key)) { 852 /* Item is not found. Set path to the previous item. */
822 return FILE_NOT_FOUND; 853 p_le_ih =
823 } 854 B_N_PITEM_HEAD(PATH_PLAST_BUFFER(p_s_search_path),
855 --PATH_LAST_POSITION(p_s_search_path));
856 n_blk_size = p_s_sb->s_blocksize;
824 857
825 // FIXME: quite ugly this far 858 if (comp_short_keys(&(p_le_ih->ih_key), p_cpu_key)) {
859 return FILE_NOT_FOUND;
860 }
861 // FIXME: quite ugly this far
826 862
827 item_offset = le_ih_k_offset (p_le_ih); 863 item_offset = le_ih_k_offset(p_le_ih);
828 offset = cpu_key_k_offset (p_cpu_key); 864 offset = cpu_key_k_offset(p_cpu_key);
829 865
830 /* Needed byte is contained in the item pointed to by the path.*/ 866 /* Needed byte is contained in the item pointed to by the path. */
831 if (item_offset <= offset && 867 if (item_offset <= offset &&
832 item_offset + op_bytes_number (p_le_ih, n_blk_size) > offset) { 868 item_offset + op_bytes_number(p_le_ih, n_blk_size) > offset) {
833 pos_in_item (p_s_search_path) = offset - item_offset; 869 pos_in_item(p_s_search_path) = offset - item_offset;
834 if ( is_indirect_le_ih(p_le_ih) ) { 870 if (is_indirect_le_ih(p_le_ih)) {
835 pos_in_item (p_s_search_path) /= n_blk_size; 871 pos_in_item(p_s_search_path) /= n_blk_size;
872 }
873 return POSITION_FOUND;
836 } 874 }
837 return POSITION_FOUND;
838 }
839
840 /* Needed byte is not contained in the item pointed to by the
841 path. Set pos_in_item out of the item. */
842 if ( is_indirect_le_ih (p_le_ih) )
843 pos_in_item (p_s_search_path) = ih_item_len(p_le_ih) / UNFM_P_SIZE;
844 else
845 pos_in_item (p_s_search_path) = ih_item_len( p_le_ih );
846
847 return POSITION_NOT_FOUND;
848}
849 875
876 /* Needed byte is not contained in the item pointed to by the
877 path. Set pos_in_item out of the item. */
878 if (is_indirect_le_ih(p_le_ih))
879 pos_in_item(p_s_search_path) =
880 ih_item_len(p_le_ih) / UNFM_P_SIZE;
881 else
882 pos_in_item(p_s_search_path) = ih_item_len(p_le_ih);
883
884 return POSITION_NOT_FOUND;
885}
850 886
851/* Compare given item and item pointed to by the path. */ 887/* Compare given item and item pointed to by the path. */
852int comp_items (const struct item_head * stored_ih, const struct path * p_s_path) 888int comp_items(const struct item_head *stored_ih, const struct path *p_s_path)
853{ 889{
854 struct buffer_head * p_s_bh; 890 struct buffer_head *p_s_bh;
855 struct item_head * ih; 891 struct item_head *ih;
856 892
857 /* Last buffer at the path is not in the tree. */ 893 /* Last buffer at the path is not in the tree. */
858 if ( ! B_IS_IN_TREE(p_s_bh = PATH_PLAST_BUFFER(p_s_path)) ) 894 if (!B_IS_IN_TREE(p_s_bh = PATH_PLAST_BUFFER(p_s_path)))
859 return 1; 895 return 1;
860 896
861 /* Last path position is invalid. */ 897 /* Last path position is invalid. */
862 if ( PATH_LAST_POSITION(p_s_path) >= B_NR_ITEMS(p_s_bh) ) 898 if (PATH_LAST_POSITION(p_s_path) >= B_NR_ITEMS(p_s_bh))
863 return 1; 899 return 1;
864 900
865 /* we need only to know, whether it is the same item */ 901 /* we need only to know, whether it is the same item */
866 ih = get_ih (p_s_path); 902 ih = get_ih(p_s_path);
867 return memcmp (stored_ih, ih, IH_SIZE); 903 return memcmp(stored_ih, ih, IH_SIZE);
868} 904}
869 905
870
871/* unformatted nodes are not logged anymore, ever. This is safe 906/* unformatted nodes are not logged anymore, ever. This is safe
872** now 907** now
873*/ 908*/
@@ -876,461 +911,466 @@ int comp_items (const struct item_head * stored_ih, const struct path * p_s_path
876// block can not be forgotten as it is in I/O or held by someone 911// block can not be forgotten as it is in I/O or held by someone
877#define block_in_use(bh) (buffer_locked(bh) || (held_by_others(bh))) 912#define block_in_use(bh) (buffer_locked(bh) || (held_by_others(bh)))
878 913
879
880
881// prepare for delete or cut of direct item 914// prepare for delete or cut of direct item
882static inline int prepare_for_direct_item (struct path * path, 915static inline int prepare_for_direct_item(struct path *path,
883 struct item_head * le_ih, 916 struct item_head *le_ih,
884 struct inode * inode, 917 struct inode *inode,
885 loff_t new_file_length, 918 loff_t new_file_length, int *cut_size)
886 int * cut_size)
887{ 919{
888 loff_t round_len; 920 loff_t round_len;
889 921
890 922 if (new_file_length == max_reiserfs_offset(inode)) {
891 if ( new_file_length == max_reiserfs_offset (inode) ) { 923 /* item has to be deleted */
892 /* item has to be deleted */ 924 *cut_size = -(IH_SIZE + ih_item_len(le_ih));
893 *cut_size = -(IH_SIZE + ih_item_len(le_ih)); 925 return M_DELETE;
894 return M_DELETE; 926 }
895 } 927 // new file gets truncated
896 928 if (get_inode_item_key_version(inode) == KEY_FORMAT_3_6) {
897 // new file gets truncated 929 //
898 if (get_inode_item_key_version (inode) == KEY_FORMAT_3_6) { 930 round_len = ROUND_UP(new_file_length);
899 // 931 /* this was n_new_file_length < le_ih ... */
900 round_len = ROUND_UP (new_file_length); 932 if (round_len < le_ih_k_offset(le_ih)) {
901 /* this was n_new_file_length < le_ih ... */ 933 *cut_size = -(IH_SIZE + ih_item_len(le_ih));
902 if ( round_len < le_ih_k_offset (le_ih) ) { 934 return M_DELETE; /* Delete this item. */
903 *cut_size = -(IH_SIZE + ih_item_len(le_ih)); 935 }
904 return M_DELETE; /* Delete this item. */ 936 /* Calculate first position and size for cutting from item. */
937 pos_in_item(path) = round_len - (le_ih_k_offset(le_ih) - 1);
938 *cut_size = -(ih_item_len(le_ih) - pos_in_item(path));
939
940 return M_CUT; /* Cut from this item. */
941 }
942
943 // old file: items may have any length
944
945 if (new_file_length < le_ih_k_offset(le_ih)) {
946 *cut_size = -(IH_SIZE + ih_item_len(le_ih));
947 return M_DELETE; /* Delete this item. */
905 } 948 }
906 /* Calculate first position and size for cutting from item. */ 949 /* Calculate first position and size for cutting from item. */
907 pos_in_item (path) = round_len - (le_ih_k_offset (le_ih) - 1); 950 *cut_size = -(ih_item_len(le_ih) -
908 *cut_size = -(ih_item_len(le_ih) - pos_in_item(path)); 951 (pos_in_item(path) =
909 952 new_file_length + 1 - le_ih_k_offset(le_ih)));
910 return M_CUT; /* Cut from this item. */ 953 return M_CUT; /* Cut from this item. */
911 }
912
913
914 // old file: items may have any length
915
916 if ( new_file_length < le_ih_k_offset (le_ih) ) {
917 *cut_size = -(IH_SIZE + ih_item_len(le_ih));
918 return M_DELETE; /* Delete this item. */
919 }
920 /* Calculate first position and size for cutting from item. */
921 *cut_size = -(ih_item_len(le_ih) -
922 (pos_in_item (path) = new_file_length + 1 - le_ih_k_offset (le_ih)));
923 return M_CUT; /* Cut from this item. */
924} 954}
925 955
926 956static inline int prepare_for_direntry_item(struct path *path,
927static inline int prepare_for_direntry_item (struct path * path, 957 struct item_head *le_ih,
928 struct item_head * le_ih, 958 struct inode *inode,
929 struct inode * inode, 959 loff_t new_file_length,
930 loff_t new_file_length, 960 int *cut_size)
931 int * cut_size)
932{ 961{
933 if (le_ih_k_offset (le_ih) == DOT_OFFSET && 962 if (le_ih_k_offset(le_ih) == DOT_OFFSET &&
934 new_file_length == max_reiserfs_offset (inode)) { 963 new_file_length == max_reiserfs_offset(inode)) {
935 RFALSE( ih_entry_count (le_ih) != 2, 964 RFALSE(ih_entry_count(le_ih) != 2,
936 "PAP-5220: incorrect empty directory item (%h)", le_ih); 965 "PAP-5220: incorrect empty directory item (%h)", le_ih);
937 *cut_size = -(IH_SIZE + ih_item_len(le_ih)); 966 *cut_size = -(IH_SIZE + ih_item_len(le_ih));
938 return M_DELETE; /* Delete the directory item containing "." and ".." entry. */ 967 return M_DELETE; /* Delete the directory item containing "." and ".." entry. */
939 } 968 }
940
941 if ( ih_entry_count (le_ih) == 1 ) {
942 /* Delete the directory item such as there is one record only
943 in this item*/
944 *cut_size = -(IH_SIZE + ih_item_len(le_ih));
945 return M_DELETE;
946 }
947
948 /* Cut one record from the directory item. */
949 *cut_size = -(DEH_SIZE + entry_length (get_last_bh (path), le_ih, pos_in_item (path)));
950 return M_CUT;
951}
952 969
970 if (ih_entry_count(le_ih) == 1) {
971 /* Delete the directory item such as there is one record only
972 in this item */
973 *cut_size = -(IH_SIZE + ih_item_len(le_ih));
974 return M_DELETE;
975 }
976
977 /* Cut one record from the directory item. */
978 *cut_size =
979 -(DEH_SIZE +
980 entry_length(get_last_bh(path), le_ih, pos_in_item(path)));
981 return M_CUT;
982}
953 983
954/* If the path points to a directory or direct item, calculate mode and the size cut, for balance. 984/* If the path points to a directory or direct item, calculate mode and the size cut, for balance.
955 If the path points to an indirect item, remove some number of its unformatted nodes. 985 If the path points to an indirect item, remove some number of its unformatted nodes.
956 In case of file truncate calculate whether this item must be deleted/truncated or last 986 In case of file truncate calculate whether this item must be deleted/truncated or last
957 unformatted node of this item will be converted to a direct item. 987 unformatted node of this item will be converted to a direct item.
958 This function returns a determination of what balance mode the calling function should employ. */ 988 This function returns a determination of what balance mode the calling function should employ. */
959static char prepare_for_delete_or_cut( 989static char prepare_for_delete_or_cut(struct reiserfs_transaction_handle *th, struct inode *inode, struct path *p_s_path, const struct cpu_key *p_s_item_key, int *p_n_removed, /* Number of unformatted nodes which were removed
960 struct reiserfs_transaction_handle *th, 990 from end of the file. */
961 struct inode * inode, 991 int *p_n_cut_size, unsigned long long n_new_file_length /* MAX_KEY_OFFSET in case of delete. */
962 struct path * p_s_path, 992 )
963 const struct cpu_key * p_s_item_key, 993{
964 int * p_n_removed, /* Number of unformatted nodes which were removed 994 struct super_block *p_s_sb = inode->i_sb;
965 from end of the file. */ 995 struct item_head *p_le_ih = PATH_PITEM_HEAD(p_s_path);
966 int * p_n_cut_size, 996 struct buffer_head *p_s_bh = PATH_PLAST_BUFFER(p_s_path);
967 unsigned long long n_new_file_length /* MAX_KEY_OFFSET in case of delete. */
968 ) {
969 struct super_block * p_s_sb = inode->i_sb;
970 struct item_head * p_le_ih = PATH_PITEM_HEAD(p_s_path);
971 struct buffer_head * p_s_bh = PATH_PLAST_BUFFER(p_s_path);
972
973 BUG_ON (!th->t_trans_id);
974
975 /* Stat_data item. */
976 if ( is_statdata_le_ih (p_le_ih) ) {
977
978 RFALSE( n_new_file_length != max_reiserfs_offset (inode),
979 "PAP-5210: mode must be M_DELETE");
980
981 *p_n_cut_size = -(IH_SIZE + ih_item_len(p_le_ih));
982 return M_DELETE;
983 }
984
985
986 /* Directory item. */
987 if ( is_direntry_le_ih (p_le_ih) )
988 return prepare_for_direntry_item (p_s_path, p_le_ih, inode, n_new_file_length, p_n_cut_size);
989
990 /* Direct item. */
991 if ( is_direct_le_ih (p_le_ih) )
992 return prepare_for_direct_item (p_s_path, p_le_ih, inode, n_new_file_length, p_n_cut_size);
993
994
995 /* Case of an indirect item. */
996 {
997 int n_unfm_number, /* Number of the item unformatted nodes. */
998 n_counter,
999 n_blk_size;
1000 __le32 * p_n_unfm_pointer; /* Pointer to the unformatted node number. */
1001 __u32 tmp;
1002 struct item_head s_ih; /* Item header. */
1003 char c_mode; /* Returned mode of the balance. */
1004 int need_research;
1005 997
998 BUG_ON(!th->t_trans_id);
1006 999
1007 n_blk_size = p_s_sb->s_blocksize; 1000 /* Stat_data item. */
1001 if (is_statdata_le_ih(p_le_ih)) {
1008 1002
1009 /* Search for the needed object indirect item until there are no unformatted nodes to be removed. */ 1003 RFALSE(n_new_file_length != max_reiserfs_offset(inode),
1010 do { 1004 "PAP-5210: mode must be M_DELETE");
1011 need_research = 0;
1012 p_s_bh = PATH_PLAST_BUFFER(p_s_path);
1013 /* Copy indirect item header to a temp variable. */
1014 copy_item_head(&s_ih, PATH_PITEM_HEAD(p_s_path));
1015 /* Calculate number of unformatted nodes in this item. */
1016 n_unfm_number = I_UNFM_NUM(&s_ih);
1017
1018 RFALSE( ! is_indirect_le_ih(&s_ih) || ! n_unfm_number ||
1019 pos_in_item (p_s_path) + 1 != n_unfm_number,
1020 "PAP-5240: invalid item %h "
1021 "n_unfm_number = %d *p_n_pos_in_item = %d",
1022 &s_ih, n_unfm_number, pos_in_item (p_s_path));
1023
1024 /* Calculate balance mode and position in the item to remove unformatted nodes. */
1025 if ( n_new_file_length == max_reiserfs_offset (inode) ) {/* Case of delete. */
1026 pos_in_item (p_s_path) = 0;
1027 *p_n_cut_size = -(IH_SIZE + ih_item_len(&s_ih));
1028 c_mode = M_DELETE;
1029 }
1030 else { /* Case of truncate. */
1031 if ( n_new_file_length < le_ih_k_offset (&s_ih) ) {
1032 pos_in_item (p_s_path) = 0;
1033 *p_n_cut_size = -(IH_SIZE + ih_item_len(&s_ih));
1034 c_mode = M_DELETE; /* Delete this item. */
1035 }
1036 else {
1037 /* indirect item must be truncated starting from *p_n_pos_in_item-th position */
1038 pos_in_item (p_s_path) = (n_new_file_length + n_blk_size - le_ih_k_offset (&s_ih) ) >> p_s_sb->s_blocksize_bits;
1039
1040 RFALSE( pos_in_item (p_s_path) > n_unfm_number,
1041 "PAP-5250: invalid position in the item");
1042
1043 /* Either convert last unformatted node of indirect item to direct item or increase
1044 its free space. */
1045 if ( pos_in_item (p_s_path) == n_unfm_number ) {
1046 *p_n_cut_size = 0; /* Nothing to cut. */
1047 return M_CONVERT; /* Maybe convert last unformatted node to the direct item. */
1048 }
1049 /* Calculate size to cut. */
1050 *p_n_cut_size = -(ih_item_len(&s_ih) - pos_in_item(p_s_path) * UNFM_P_SIZE);
1051
1052 c_mode = M_CUT; /* Cut from this indirect item. */
1053 }
1054 }
1055
1056 RFALSE( n_unfm_number <= pos_in_item (p_s_path),
1057 "PAP-5260: invalid position in the indirect item");
1058
1059 /* pointers to be cut */
1060 n_unfm_number -= pos_in_item (p_s_path);
1061 /* Set pointer to the last unformatted node pointer that is to be cut. */
1062 p_n_unfm_pointer = (__le32 *)B_I_PITEM(p_s_bh, &s_ih) + I_UNFM_NUM(&s_ih) - 1 - *p_n_removed;
1063
1064
1065 /* We go through the unformatted nodes pointers of the indirect
1066 item and look for the unformatted nodes in the cache. If we
1067 found some of them we free it, zero corresponding indirect item
1068 entry and log buffer containing that indirect item. For this we
1069 need to prepare last path element for logging. If some
1070 unformatted node has b_count > 1 we must not free this
1071 unformatted node since it is in use. */
1072 reiserfs_prepare_for_journal(p_s_sb, p_s_bh, 1);
1073 // note: path could be changed, first line in for loop takes care
1074 // of it
1075
1076 for (n_counter = *p_n_removed;
1077 n_counter < n_unfm_number; n_counter++, p_n_unfm_pointer-- ) {
1078
1079 cond_resched();
1080 if (item_moved (&s_ih, p_s_path)) {
1081 need_research = 1 ;
1082 break;
1083 }
1084 RFALSE( p_n_unfm_pointer < (__le32 *)B_I_PITEM(p_s_bh, &s_ih) ||
1085 p_n_unfm_pointer > (__le32 *)B_I_PITEM(p_s_bh, &s_ih) + I_UNFM_NUM(&s_ih) - 1,
1086 "vs-5265: pointer out of range");
1087 1005
1088 /* Hole, nothing to remove. */ 1006 *p_n_cut_size = -(IH_SIZE + ih_item_len(p_le_ih));
1089 if ( ! get_block_num(p_n_unfm_pointer,0) ) { 1007 return M_DELETE;
1090 (*p_n_removed)++; 1008 }
1091 continue;
1092 }
1093 1009
1094 (*p_n_removed)++; 1010 /* Directory item. */
1011 if (is_direntry_le_ih(p_le_ih))
1012 return prepare_for_direntry_item(p_s_path, p_le_ih, inode,
1013 n_new_file_length,
1014 p_n_cut_size);
1095 1015
1096 tmp = get_block_num(p_n_unfm_pointer,0); 1016 /* Direct item. */
1097 put_block_num(p_n_unfm_pointer, 0, 0); 1017 if (is_direct_le_ih(p_le_ih))
1098 journal_mark_dirty (th, p_s_sb, p_s_bh); 1018 return prepare_for_direct_item(p_s_path, p_le_ih, inode,
1099 reiserfs_free_block(th, inode, tmp, 1); 1019 n_new_file_length, p_n_cut_size);
1100 if ( item_moved (&s_ih, p_s_path) ) { 1020
1101 need_research = 1; 1021 /* Case of an indirect item. */
1102 break ; 1022 {
1103 } 1023 int n_unfm_number, /* Number of the item unformatted nodes. */
1104 } 1024 n_counter, n_blk_size;
1105 1025 __le32 *p_n_unfm_pointer; /* Pointer to the unformatted node number. */
1106 /* a trick. If the buffer has been logged, this 1026 __u32 tmp;
1107 ** will do nothing. If we've broken the loop without 1027 struct item_head s_ih; /* Item header. */
1108 ** logging it, it will restore the buffer 1028 char c_mode; /* Returned mode of the balance. */
1109 ** 1029 int need_research;
1110 */ 1030
1111 reiserfs_restore_prepared_buffer(p_s_sb, p_s_bh); 1031 n_blk_size = p_s_sb->s_blocksize;
1112 1032
1113 /* This loop can be optimized. */ 1033 /* Search for the needed object indirect item until there are no unformatted nodes to be removed. */
1114 } while ( (*p_n_removed < n_unfm_number || need_research) && 1034 do {
1115 search_for_position_by_key(p_s_sb, p_s_item_key, p_s_path) == POSITION_FOUND ); 1035 need_research = 0;
1116 1036 p_s_bh = PATH_PLAST_BUFFER(p_s_path);
1117 RFALSE( *p_n_removed < n_unfm_number, 1037 /* Copy indirect item header to a temp variable. */
1118 "PAP-5310: indirect item is not found"); 1038 copy_item_head(&s_ih, PATH_PITEM_HEAD(p_s_path));
1119 RFALSE( item_moved (&s_ih, p_s_path), 1039 /* Calculate number of unformatted nodes in this item. */
1120 "after while, comp failed, retry") ; 1040 n_unfm_number = I_UNFM_NUM(&s_ih);
1121 1041
1122 if (c_mode == M_CUT) 1042 RFALSE(!is_indirect_le_ih(&s_ih) || !n_unfm_number ||
1123 pos_in_item (p_s_path) *= UNFM_P_SIZE; 1043 pos_in_item(p_s_path) + 1 != n_unfm_number,
1124 return c_mode; 1044 "PAP-5240: invalid item %h "
1125 } 1045 "n_unfm_number = %d *p_n_pos_in_item = %d",
1046 &s_ih, n_unfm_number, pos_in_item(p_s_path));
1047
1048 /* Calculate balance mode and position in the item to remove unformatted nodes. */
1049 if (n_new_file_length == max_reiserfs_offset(inode)) { /* Case of delete. */
1050 pos_in_item(p_s_path) = 0;
1051 *p_n_cut_size = -(IH_SIZE + ih_item_len(&s_ih));
1052 c_mode = M_DELETE;
1053 } else { /* Case of truncate. */
1054 if (n_new_file_length < le_ih_k_offset(&s_ih)) {
1055 pos_in_item(p_s_path) = 0;
1056 *p_n_cut_size =
1057 -(IH_SIZE + ih_item_len(&s_ih));
1058 c_mode = M_DELETE; /* Delete this item. */
1059 } else {
1060 /* indirect item must be truncated starting from *p_n_pos_in_item-th position */
1061 pos_in_item(p_s_path) =
1062 (n_new_file_length + n_blk_size -
1063 le_ih_k_offset(&s_ih)) >> p_s_sb->
1064 s_blocksize_bits;
1065
1066 RFALSE(pos_in_item(p_s_path) >
1067 n_unfm_number,
1068 "PAP-5250: invalid position in the item");
1069
1070 /* Either convert last unformatted node of indirect item to direct item or increase
1071 its free space. */
1072 if (pos_in_item(p_s_path) ==
1073 n_unfm_number) {
1074 *p_n_cut_size = 0; /* Nothing to cut. */
1075 return M_CONVERT; /* Maybe convert last unformatted node to the direct item. */
1076 }
1077 /* Calculate size to cut. */
1078 *p_n_cut_size =
1079 -(ih_item_len(&s_ih) -
1080 pos_in_item(p_s_path) *
1081 UNFM_P_SIZE);
1082
1083 c_mode = M_CUT; /* Cut from this indirect item. */
1084 }
1085 }
1086
1087 RFALSE(n_unfm_number <= pos_in_item(p_s_path),
1088 "PAP-5260: invalid position in the indirect item");
1089
1090 /* pointers to be cut */
1091 n_unfm_number -= pos_in_item(p_s_path);
1092 /* Set pointer to the last unformatted node pointer that is to be cut. */
1093 p_n_unfm_pointer =
1094 (__le32 *) B_I_PITEM(p_s_bh,
1095 &s_ih) + I_UNFM_NUM(&s_ih) -
1096 1 - *p_n_removed;
1097
1098 /* We go through the unformatted nodes pointers of the indirect
1099 item and look for the unformatted nodes in the cache. If we
1100 found some of them we free it, zero corresponding indirect item
1101 entry and log buffer containing that indirect item. For this we
1102 need to prepare last path element for logging. If some
1103 unformatted node has b_count > 1 we must not free this
1104 unformatted node since it is in use. */
1105 reiserfs_prepare_for_journal(p_s_sb, p_s_bh, 1);
1106 // note: path could be changed, first line in for loop takes care
1107 // of it
1108
1109 for (n_counter = *p_n_removed;
1110 n_counter < n_unfm_number;
1111 n_counter++, p_n_unfm_pointer--) {
1112
1113 cond_resched();
1114 if (item_moved(&s_ih, p_s_path)) {
1115 need_research = 1;
1116 break;
1117 }
1118 RFALSE(p_n_unfm_pointer <
1119 (__le32 *) B_I_PITEM(p_s_bh, &s_ih)
1120 || p_n_unfm_pointer >
1121 (__le32 *) B_I_PITEM(p_s_bh,
1122 &s_ih) +
1123 I_UNFM_NUM(&s_ih) - 1,
1124 "vs-5265: pointer out of range");
1125
1126 /* Hole, nothing to remove. */
1127 if (!get_block_num(p_n_unfm_pointer, 0)) {
1128 (*p_n_removed)++;
1129 continue;
1130 }
1131
1132 (*p_n_removed)++;
1133
1134 tmp = get_block_num(p_n_unfm_pointer, 0);
1135 put_block_num(p_n_unfm_pointer, 0, 0);
1136 journal_mark_dirty(th, p_s_sb, p_s_bh);
1137 reiserfs_free_block(th, inode, tmp, 1);
1138 if (item_moved(&s_ih, p_s_path)) {
1139 need_research = 1;
1140 break;
1141 }
1142 }
1143
1144 /* a trick. If the buffer has been logged, this
1145 ** will do nothing. If we've broken the loop without
1146 ** logging it, it will restore the buffer
1147 **
1148 */
1149 reiserfs_restore_prepared_buffer(p_s_sb, p_s_bh);
1150
1151 /* This loop can be optimized. */
1152 } while ((*p_n_removed < n_unfm_number || need_research) &&
1153 search_for_position_by_key(p_s_sb, p_s_item_key,
1154 p_s_path) ==
1155 POSITION_FOUND);
1156
1157 RFALSE(*p_n_removed < n_unfm_number,
1158 "PAP-5310: indirect item is not found");
1159 RFALSE(item_moved(&s_ih, p_s_path),
1160 "after while, comp failed, retry");
1161
1162 if (c_mode == M_CUT)
1163 pos_in_item(p_s_path) *= UNFM_P_SIZE;
1164 return c_mode;
1165 }
1126} 1166}
1127 1167
1128/* Calculate number of bytes which will be deleted or cut during balance */ 1168/* Calculate number of bytes which will be deleted or cut during balance */
1129static int calc_deleted_bytes_number( 1169static int calc_deleted_bytes_number(struct tree_balance *p_s_tb, char c_mode)
1130 struct tree_balance * p_s_tb, 1170{
1131 char c_mode 1171 int n_del_size;
1132 ) { 1172 struct item_head *p_le_ih = PATH_PITEM_HEAD(p_s_tb->tb_path);
1133 int n_del_size; 1173
1134 struct item_head * p_le_ih = PATH_PITEM_HEAD(p_s_tb->tb_path); 1174 if (is_statdata_le_ih(p_le_ih))
1135 1175 return 0;
1136 if ( is_statdata_le_ih (p_le_ih) ) 1176
1137 return 0; 1177 n_del_size =
1178 (c_mode ==
1179 M_DELETE) ? ih_item_len(p_le_ih) : -p_s_tb->insert_size[0];
1180 if (is_direntry_le_ih(p_le_ih)) {
1181 // return EMPTY_DIR_SIZE; /* We delete emty directoris only. */
1182 // we can't use EMPTY_DIR_SIZE, as old format dirs have a different
1183 // empty size. ick. FIXME, is this right?
1184 //
1185 return n_del_size;
1186 }
1138 1187
1139 n_del_size = ( c_mode == M_DELETE ) ? ih_item_len(p_le_ih) : -p_s_tb->insert_size[0]; 1188 if (is_indirect_le_ih(p_le_ih))
1140 if ( is_direntry_le_ih (p_le_ih) ) { 1189 n_del_size = (n_del_size / UNFM_P_SIZE) * (PATH_PLAST_BUFFER(p_s_tb->tb_path)->b_size); // - get_ih_free_space (p_le_ih);
1141 // return EMPTY_DIR_SIZE; /* We delete emty directoris only. */ 1190 return n_del_size;
1142 // we can't use EMPTY_DIR_SIZE, as old format dirs have a different
1143 // empty size. ick. FIXME, is this right?
1144 //
1145 return n_del_size ;
1146 }
1147
1148 if ( is_indirect_le_ih (p_le_ih) )
1149 n_del_size = (n_del_size/UNFM_P_SIZE)*
1150 (PATH_PLAST_BUFFER(p_s_tb->tb_path)->b_size);// - get_ih_free_space (p_le_ih);
1151 return n_del_size;
1152} 1191}
1153 1192
1154static void init_tb_struct( 1193static void init_tb_struct(struct reiserfs_transaction_handle *th,
1155 struct reiserfs_transaction_handle *th, 1194 struct tree_balance *p_s_tb,
1156 struct tree_balance * p_s_tb, 1195 struct super_block *p_s_sb,
1157 struct super_block * p_s_sb, 1196 struct path *p_s_path, int n_size)
1158 struct path * p_s_path, 1197{
1159 int n_size
1160 ) {
1161
1162 BUG_ON (!th->t_trans_id);
1163
1164 memset (p_s_tb,'\0',sizeof(struct tree_balance));
1165 p_s_tb->transaction_handle = th ;
1166 p_s_tb->tb_sb = p_s_sb;
1167 p_s_tb->tb_path = p_s_path;
1168 PATH_OFFSET_PBUFFER(p_s_path, ILLEGAL_PATH_ELEMENT_OFFSET) = NULL;
1169 PATH_OFFSET_POSITION(p_s_path, ILLEGAL_PATH_ELEMENT_OFFSET) = 0;
1170 p_s_tb->insert_size[0] = n_size;
1171}
1172 1198
1199 BUG_ON(!th->t_trans_id);
1173 1200
1201 memset(p_s_tb, '\0', sizeof(struct tree_balance));
1202 p_s_tb->transaction_handle = th;
1203 p_s_tb->tb_sb = p_s_sb;
1204 p_s_tb->tb_path = p_s_path;
1205 PATH_OFFSET_PBUFFER(p_s_path, ILLEGAL_PATH_ELEMENT_OFFSET) = NULL;
1206 PATH_OFFSET_POSITION(p_s_path, ILLEGAL_PATH_ELEMENT_OFFSET) = 0;
1207 p_s_tb->insert_size[0] = n_size;
1208}
1174 1209
1175void padd_item (char * item, int total_length, int length) 1210void padd_item(char *item, int total_length, int length)
1176{ 1211{
1177 int i; 1212 int i;
1178 1213
1179 for (i = total_length; i > length; ) 1214 for (i = total_length; i > length;)
1180 item [--i] = 0; 1215 item[--i] = 0;
1181} 1216}
1182 1217
1183#ifdef REISERQUOTA_DEBUG 1218#ifdef REISERQUOTA_DEBUG
1184char key2type(struct reiserfs_key *ih) 1219char key2type(struct reiserfs_key *ih)
1185{ 1220{
1186 if (is_direntry_le_key(2, ih)) 1221 if (is_direntry_le_key(2, ih))
1187 return 'd'; 1222 return 'd';
1188 if (is_direct_le_key(2, ih)) 1223 if (is_direct_le_key(2, ih))
1189 return 'D'; 1224 return 'D';
1190 if (is_indirect_le_key(2, ih)) 1225 if (is_indirect_le_key(2, ih))
1191 return 'i'; 1226 return 'i';
1192 if (is_statdata_le_key(2, ih)) 1227 if (is_statdata_le_key(2, ih))
1193 return 's'; 1228 return 's';
1194 return 'u'; 1229 return 'u';
1195} 1230}
1196 1231
1197char head2type(struct item_head *ih) 1232char head2type(struct item_head *ih)
1198{ 1233{
1199 if (is_direntry_le_ih(ih)) 1234 if (is_direntry_le_ih(ih))
1200 return 'd'; 1235 return 'd';
1201 if (is_direct_le_ih(ih)) 1236 if (is_direct_le_ih(ih))
1202 return 'D'; 1237 return 'D';
1203 if (is_indirect_le_ih(ih)) 1238 if (is_indirect_le_ih(ih))
1204 return 'i'; 1239 return 'i';
1205 if (is_statdata_le_ih(ih)) 1240 if (is_statdata_le_ih(ih))
1206 return 's'; 1241 return 's';
1207 return 'u'; 1242 return 'u';
1208} 1243}
1209#endif 1244#endif
1210 1245
1211/* Delete object item. */ 1246/* Delete object item. */
1212int reiserfs_delete_item (struct reiserfs_transaction_handle *th, 1247int reiserfs_delete_item(struct reiserfs_transaction_handle *th, struct path *p_s_path, /* Path to the deleted item. */
1213 struct path * p_s_path, /* Path to the deleted item. */ 1248 const struct cpu_key *p_s_item_key, /* Key to search for the deleted item. */
1214 const struct cpu_key * p_s_item_key, /* Key to search for the deleted item. */ 1249 struct inode *p_s_inode, /* inode is here just to update i_blocks and quotas */
1215 struct inode * p_s_inode,/* inode is here just to update i_blocks and quotas */ 1250 struct buffer_head *p_s_un_bh)
1216 struct buffer_head * p_s_un_bh) /* NULL or unformatted node pointer. */ 1251{ /* NULL or unformatted node pointer. */
1217{ 1252 struct super_block *p_s_sb = p_s_inode->i_sb;
1218 struct super_block * p_s_sb = p_s_inode->i_sb; 1253 struct tree_balance s_del_balance;
1219 struct tree_balance s_del_balance; 1254 struct item_head s_ih;
1220 struct item_head s_ih; 1255 struct item_head *q_ih;
1221 struct item_head *q_ih; 1256 int quota_cut_bytes;
1222 int quota_cut_bytes; 1257 int n_ret_value, n_del_size, n_removed;
1223 int n_ret_value,
1224 n_del_size,
1225 n_removed;
1226 1258
1227#ifdef CONFIG_REISERFS_CHECK 1259#ifdef CONFIG_REISERFS_CHECK
1228 char c_mode; 1260 char c_mode;
1229 int n_iter = 0; 1261 int n_iter = 0;
1230#endif 1262#endif
1231 1263
1232 BUG_ON (!th->t_trans_id); 1264 BUG_ON(!th->t_trans_id);
1233 1265
1234 init_tb_struct(th, &s_del_balance, p_s_sb, p_s_path, 0/*size is unknown*/); 1266 init_tb_struct(th, &s_del_balance, p_s_sb, p_s_path,
1267 0 /*size is unknown */ );
1235 1268
1236 while ( 1 ) { 1269 while (1) {
1237 n_removed = 0; 1270 n_removed = 0;
1238 1271
1239#ifdef CONFIG_REISERFS_CHECK 1272#ifdef CONFIG_REISERFS_CHECK
1240 n_iter++; 1273 n_iter++;
1241 c_mode = 1274 c_mode =
1242#endif 1275#endif
1243 prepare_for_delete_or_cut(th, p_s_inode, p_s_path, p_s_item_key, &n_removed, &n_del_size, max_reiserfs_offset (p_s_inode)); 1276 prepare_for_delete_or_cut(th, p_s_inode, p_s_path,
1244 1277 p_s_item_key, &n_removed,
1245 RFALSE( c_mode != M_DELETE, "PAP-5320: mode must be M_DELETE"); 1278 &n_del_size,
1246 1279 max_reiserfs_offset(p_s_inode));
1247 copy_item_head(&s_ih, PATH_PITEM_HEAD(p_s_path)); 1280
1248 s_del_balance.insert_size[0] = n_del_size; 1281 RFALSE(c_mode != M_DELETE, "PAP-5320: mode must be M_DELETE");
1249 1282
1250 n_ret_value = fix_nodes(M_DELETE, &s_del_balance, NULL, NULL); 1283 copy_item_head(&s_ih, PATH_PITEM_HEAD(p_s_path));
1251 if ( n_ret_value != REPEAT_SEARCH ) 1284 s_del_balance.insert_size[0] = n_del_size;
1252 break; 1285
1253 1286 n_ret_value = fix_nodes(M_DELETE, &s_del_balance, NULL, NULL);
1254 PROC_INFO_INC( p_s_sb, delete_item_restarted ); 1287 if (n_ret_value != REPEAT_SEARCH)
1288 break;
1289
1290 PROC_INFO_INC(p_s_sb, delete_item_restarted);
1291
1292 // file system changed, repeat search
1293 n_ret_value =
1294 search_for_position_by_key(p_s_sb, p_s_item_key, p_s_path);
1295 if (n_ret_value == IO_ERROR)
1296 break;
1297 if (n_ret_value == FILE_NOT_FOUND) {
1298 reiserfs_warning(p_s_sb,
1299 "vs-5340: reiserfs_delete_item: "
1300 "no items of the file %K found",
1301 p_s_item_key);
1302 break;
1303 }
1304 } /* while (1) */
1255 1305
1256 // file system changed, repeat search 1306 if (n_ret_value != CARRY_ON) {
1257 n_ret_value = search_for_position_by_key(p_s_sb, p_s_item_key, p_s_path); 1307 unfix_nodes(&s_del_balance);
1258 if (n_ret_value == IO_ERROR) 1308 return 0;
1259 break; 1309 }
1260 if (n_ret_value == FILE_NOT_FOUND) { 1310 // reiserfs_delete_item returns item length when success
1261 reiserfs_warning (p_s_sb, "vs-5340: reiserfs_delete_item: " 1311 n_ret_value = calc_deleted_bytes_number(&s_del_balance, M_DELETE);
1262 "no items of the file %K found", p_s_item_key); 1312 q_ih = get_ih(p_s_path);
1263 break; 1313 quota_cut_bytes = ih_item_len(q_ih);
1314
1315 /* hack so the quota code doesn't have to guess if the file
1316 ** has a tail. On tail insert, we allocate quota for 1 unformatted node.
1317 ** We test the offset because the tail might have been
1318 ** split into multiple items, and we only want to decrement for
1319 ** the unfm node once
1320 */
1321 if (!S_ISLNK(p_s_inode->i_mode) && is_direct_le_ih(q_ih)) {
1322 if ((le_ih_k_offset(q_ih) & (p_s_sb->s_blocksize - 1)) == 1) {
1323 quota_cut_bytes = p_s_sb->s_blocksize + UNFM_P_SIZE;
1324 } else {
1325 quota_cut_bytes = 0;
1326 }
1264 } 1327 }
1265 } /* while (1) */
1266 1328
1267 if ( n_ret_value != CARRY_ON ) { 1329 if (p_s_un_bh) {
1268 unfix_nodes(&s_del_balance); 1330 int off;
1269 return 0; 1331 char *data;
1270 } 1332
1271 1333 /* We are in direct2indirect conversion, so move tail contents
1272 // reiserfs_delete_item returns item length when success 1334 to the unformatted node */
1273 n_ret_value = calc_deleted_bytes_number(&s_del_balance, M_DELETE); 1335 /* note, we do the copy before preparing the buffer because we
1274 q_ih = get_ih(p_s_path) ; 1336 ** don't care about the contents of the unformatted node yet.
1275 quota_cut_bytes = ih_item_len(q_ih) ; 1337 ** the only thing we really care about is the direct item's data
1276 1338 ** is in the unformatted node.
1277 /* hack so the quota code doesn't have to guess if the file 1339 **
1278 ** has a tail. On tail insert, we allocate quota for 1 unformatted node. 1340 ** Otherwise, we would have to call reiserfs_prepare_for_journal on
1279 ** We test the offset because the tail might have been 1341 ** the unformatted node, which might schedule, meaning we'd have to
1280 ** split into multiple items, and we only want to decrement for 1342 ** loop all the way back up to the start of the while loop.
1281 ** the unfm node once 1343 **
1282 */ 1344 ** The unformatted node must be dirtied later on. We can't be
1283 if (!S_ISLNK (p_s_inode->i_mode) && is_direct_le_ih(q_ih)) { 1345 ** sure here if the entire tail has been deleted yet.
1284 if ((le_ih_k_offset(q_ih) & (p_s_sb->s_blocksize - 1)) == 1) { 1346 **
1285 quota_cut_bytes = p_s_sb->s_blocksize + UNFM_P_SIZE; 1347 ** p_s_un_bh is from the page cache (all unformatted nodes are
1286 } else { 1348 ** from the page cache) and might be a highmem page. So, we
1287 quota_cut_bytes = 0 ; 1349 ** can't use p_s_un_bh->b_data.
1350 ** -clm
1351 */
1352
1353 data = kmap_atomic(p_s_un_bh->b_page, KM_USER0);
1354 off = ((le_ih_k_offset(&s_ih) - 1) & (PAGE_CACHE_SIZE - 1));
1355 memcpy(data + off,
1356 B_I_PITEM(PATH_PLAST_BUFFER(p_s_path), &s_ih),
1357 n_ret_value);
1358 kunmap_atomic(data, KM_USER0);
1288 } 1359 }
1289 } 1360 /* Perform balancing after all resources have been collected at once. */
1290 1361 do_balance(&s_del_balance, NULL, NULL, M_DELETE);
1291 if ( p_s_un_bh ) {
1292 int off;
1293 char *data ;
1294
1295 /* We are in direct2indirect conversion, so move tail contents
1296 to the unformatted node */
1297 /* note, we do the copy before preparing the buffer because we
1298 ** don't care about the contents of the unformatted node yet.
1299 ** the only thing we really care about is the direct item's data
1300 ** is in the unformatted node.
1301 **
1302 ** Otherwise, we would have to call reiserfs_prepare_for_journal on
1303 ** the unformatted node, which might schedule, meaning we'd have to
1304 ** loop all the way back up to the start of the while loop.
1305 **
1306 ** The unformatted node must be dirtied later on. We can't be
1307 ** sure here if the entire tail has been deleted yet.
1308 **
1309 ** p_s_un_bh is from the page cache (all unformatted nodes are
1310 ** from the page cache) and might be a highmem page. So, we
1311 ** can't use p_s_un_bh->b_data.
1312 ** -clm
1313 */
1314
1315 data = kmap_atomic(p_s_un_bh->b_page, KM_USER0);
1316 off = ((le_ih_k_offset (&s_ih) - 1) & (PAGE_CACHE_SIZE - 1));
1317 memcpy(data + off,
1318 B_I_PITEM(PATH_PLAST_BUFFER(p_s_path), &s_ih), n_ret_value);
1319 kunmap_atomic(data, KM_USER0);
1320 }
1321 /* Perform balancing after all resources have been collected at once. */
1322 do_balance(&s_del_balance, NULL, NULL, M_DELETE);
1323 1362
1324#ifdef REISERQUOTA_DEBUG 1363#ifdef REISERQUOTA_DEBUG
1325 reiserfs_debug (p_s_sb, REISERFS_DEBUG_CODE, "reiserquota delete_item(): freeing %u, id=%u type=%c", quota_cut_bytes, p_s_inode->i_uid, head2type(&s_ih)); 1364 reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE,
1365 "reiserquota delete_item(): freeing %u, id=%u type=%c",
1366 quota_cut_bytes, p_s_inode->i_uid, head2type(&s_ih));
1326#endif 1367#endif
1327 DQUOT_FREE_SPACE_NODIRTY(p_s_inode, quota_cut_bytes); 1368 DQUOT_FREE_SPACE_NODIRTY(p_s_inode, quota_cut_bytes);
1328 1369
1329 /* Return deleted body length */ 1370 /* Return deleted body length */
1330 return n_ret_value; 1371 return n_ret_value;
1331} 1372}
1332 1373
1333
1334/* Summary Of Mechanisms For Handling Collisions Between Processes: 1374/* Summary Of Mechanisms For Handling Collisions Between Processes:
1335 1375
1336 deletion of the body of the object is performed by iput(), with the 1376 deletion of the body of the object is performed by iput(), with the
@@ -1347,727 +1387,804 @@ int reiserfs_delete_item (struct reiserfs_transaction_handle *th,
1347 - Hans 1387 - Hans
1348*/ 1388*/
1349 1389
1350
1351/* this deletes item which never gets split */ 1390/* this deletes item which never gets split */
1352void reiserfs_delete_solid_item (struct reiserfs_transaction_handle *th, 1391void reiserfs_delete_solid_item(struct reiserfs_transaction_handle *th,
1353 struct inode *inode, 1392 struct inode *inode, struct reiserfs_key *key)
1354 struct reiserfs_key * key)
1355{ 1393{
1356 struct tree_balance tb; 1394 struct tree_balance tb;
1357 INITIALIZE_PATH (path); 1395 INITIALIZE_PATH(path);
1358 int item_len = 0; 1396 int item_len = 0;
1359 int tb_init = 0 ; 1397 int tb_init = 0;
1360 struct cpu_key cpu_key; 1398 struct cpu_key cpu_key;
1361 int retval; 1399 int retval;
1362 int quota_cut_bytes = 0; 1400 int quota_cut_bytes = 0;
1363 1401
1364 BUG_ON (!th->t_trans_id); 1402 BUG_ON(!th->t_trans_id);
1365 1403
1366 le_key2cpu_key (&cpu_key, key); 1404 le_key2cpu_key(&cpu_key, key);
1367 1405
1368 while (1) { 1406 while (1) {
1369 retval = search_item (th->t_super, &cpu_key, &path); 1407 retval = search_item(th->t_super, &cpu_key, &path);
1370 if (retval == IO_ERROR) { 1408 if (retval == IO_ERROR) {
1371 reiserfs_warning (th->t_super, 1409 reiserfs_warning(th->t_super,
1372 "vs-5350: reiserfs_delete_solid_item: " 1410 "vs-5350: reiserfs_delete_solid_item: "
1373 "i/o failure occurred trying to delete %K", 1411 "i/o failure occurred trying to delete %K",
1374 &cpu_key); 1412 &cpu_key);
1375 break; 1413 break;
1376 } 1414 }
1377 if (retval != ITEM_FOUND) { 1415 if (retval != ITEM_FOUND) {
1378 pathrelse (&path); 1416 pathrelse(&path);
1379 // No need for a warning, if there is just no free space to insert '..' item into the newly-created subdir 1417 // No need for a warning, if there is just no free space to insert '..' item into the newly-created subdir
1380 if ( !( (unsigned long long) GET_HASH_VALUE (le_key_k_offset (le_key_version (key), key)) == 0 && \ 1418 if (!
1381 (unsigned long long) GET_GENERATION_NUMBER (le_key_k_offset (le_key_version (key), key)) == 1 ) ) 1419 ((unsigned long long)
1382 reiserfs_warning (th->t_super, "vs-5355: reiserfs_delete_solid_item: %k not found", key); 1420 GET_HASH_VALUE(le_key_k_offset
1383 break; 1421 (le_key_version(key), key)) == 0
1384 } 1422 && (unsigned long long)
1385 if (!tb_init) { 1423 GET_GENERATION_NUMBER(le_key_k_offset
1386 tb_init = 1 ; 1424 (le_key_version(key),
1387 item_len = ih_item_len( PATH_PITEM_HEAD(&path) ); 1425 key)) == 1))
1388 init_tb_struct (th, &tb, th->t_super, &path, - (IH_SIZE + item_len)); 1426 reiserfs_warning(th->t_super,
1389 } 1427 "vs-5355: reiserfs_delete_solid_item: %k not found",
1390 quota_cut_bytes = ih_item_len(PATH_PITEM_HEAD(&path)) ; 1428 key);
1429 break;
1430 }
1431 if (!tb_init) {
1432 tb_init = 1;
1433 item_len = ih_item_len(PATH_PITEM_HEAD(&path));
1434 init_tb_struct(th, &tb, th->t_super, &path,
1435 -(IH_SIZE + item_len));
1436 }
1437 quota_cut_bytes = ih_item_len(PATH_PITEM_HEAD(&path));
1391 1438
1392 retval = fix_nodes (M_DELETE, &tb, NULL, NULL); 1439 retval = fix_nodes(M_DELETE, &tb, NULL, NULL);
1393 if (retval == REPEAT_SEARCH) { 1440 if (retval == REPEAT_SEARCH) {
1394 PROC_INFO_INC( th -> t_super, delete_solid_item_restarted ); 1441 PROC_INFO_INC(th->t_super, delete_solid_item_restarted);
1395 continue; 1442 continue;
1396 } 1443 }
1397 1444
1398 if (retval == CARRY_ON) { 1445 if (retval == CARRY_ON) {
1399 do_balance (&tb, NULL, NULL, M_DELETE); 1446 do_balance(&tb, NULL, NULL, M_DELETE);
1400 if (inode) { /* Should we count quota for item? (we don't count quotas for save-links) */ 1447 if (inode) { /* Should we count quota for item? (we don't count quotas for save-links) */
1401#ifdef REISERQUOTA_DEBUG 1448#ifdef REISERQUOTA_DEBUG
1402 reiserfs_debug (th->t_super, REISERFS_DEBUG_CODE, "reiserquota delete_solid_item(): freeing %u id=%u type=%c", quota_cut_bytes, inode->i_uid, key2type(key)); 1449 reiserfs_debug(th->t_super, REISERFS_DEBUG_CODE,
1450 "reiserquota delete_solid_item(): freeing %u id=%u type=%c",
1451 quota_cut_bytes, inode->i_uid,
1452 key2type(key));
1403#endif 1453#endif
1404 DQUOT_FREE_SPACE_NODIRTY(inode, quota_cut_bytes); 1454 DQUOT_FREE_SPACE_NODIRTY(inode,
1405 } 1455 quota_cut_bytes);
1406 break; 1456 }
1457 break;
1458 }
1459 // IO_ERROR, NO_DISK_SPACE, etc
1460 reiserfs_warning(th->t_super,
1461 "vs-5360: reiserfs_delete_solid_item: "
1462 "could not delete %K due to fix_nodes failure",
1463 &cpu_key);
1464 unfix_nodes(&tb);
1465 break;
1407 } 1466 }
1408 1467
1409 // IO_ERROR, NO_DISK_SPACE, etc 1468 reiserfs_check_path(&path);
1410 reiserfs_warning (th->t_super, "vs-5360: reiserfs_delete_solid_item: "
1411 "could not delete %K due to fix_nodes failure", &cpu_key);
1412 unfix_nodes (&tb);
1413 break;
1414 }
1415
1416 reiserfs_check_path(&path) ;
1417} 1469}
1418 1470
1419 1471int reiserfs_delete_object(struct reiserfs_transaction_handle *th,
1420int reiserfs_delete_object (struct reiserfs_transaction_handle *th, struct inode * inode) 1472 struct inode *inode)
1421{ 1473{
1422 int err; 1474 int err;
1423 inode->i_size = 0; 1475 inode->i_size = 0;
1424 BUG_ON (!th->t_trans_id); 1476 BUG_ON(!th->t_trans_id);
1425 1477
1426 /* for directory this deletes item containing "." and ".." */ 1478 /* for directory this deletes item containing "." and ".." */
1427 err = reiserfs_do_truncate (th, inode, NULL, 0/*no timestamp updates*/); 1479 err =
1428 if (err) 1480 reiserfs_do_truncate(th, inode, NULL, 0 /*no timestamp updates */ );
1429 return err; 1481 if (err)
1430 1482 return err;
1483
1431#if defined( USE_INODE_GENERATION_COUNTER ) 1484#if defined( USE_INODE_GENERATION_COUNTER )
1432 if( !old_format_only ( th -> t_super ) ) 1485 if (!old_format_only(th->t_super)) {
1433 { 1486 __le32 *inode_generation;
1434 __le32 *inode_generation; 1487
1435 1488 inode_generation =
1436 inode_generation = 1489 &REISERFS_SB(th->t_super)->s_rs->s_inode_generation;
1437 &REISERFS_SB(th -> t_super) -> s_rs -> s_inode_generation; 1490 *inode_generation =
1438 *inode_generation = cpu_to_le32( le32_to_cpu( *inode_generation ) + 1 ); 1491 cpu_to_le32(le32_to_cpu(*inode_generation) + 1);
1439 } 1492 }
1440/* USE_INODE_GENERATION_COUNTER */ 1493/* USE_INODE_GENERATION_COUNTER */
1441#endif 1494#endif
1442 reiserfs_delete_solid_item (th, inode, INODE_PKEY (inode)); 1495 reiserfs_delete_solid_item(th, inode, INODE_PKEY(inode));
1443 1496
1444 return err; 1497 return err;
1445} 1498}
1446 1499
1447static void 1500static void unmap_buffers(struct page *page, loff_t pos)
1448unmap_buffers(struct page *page, loff_t pos) { 1501{
1449 struct buffer_head *bh ; 1502 struct buffer_head *bh;
1450 struct buffer_head *head ; 1503 struct buffer_head *head;
1451 struct buffer_head *next ; 1504 struct buffer_head *next;
1452 unsigned long tail_index ; 1505 unsigned long tail_index;
1453 unsigned long cur_index ; 1506 unsigned long cur_index;
1454 1507
1455 if (page) { 1508 if (page) {
1456 if (page_has_buffers(page)) { 1509 if (page_has_buffers(page)) {
1457 tail_index = pos & (PAGE_CACHE_SIZE - 1) ; 1510 tail_index = pos & (PAGE_CACHE_SIZE - 1);
1458 cur_index = 0 ; 1511 cur_index = 0;
1459 head = page_buffers(page) ; 1512 head = page_buffers(page);
1460 bh = head ; 1513 bh = head;
1461 do { 1514 do {
1462 next = bh->b_this_page ; 1515 next = bh->b_this_page;
1463 1516
1464 /* we want to unmap the buffers that contain the tail, and 1517 /* we want to unmap the buffers that contain the tail, and
1465 ** all the buffers after it (since the tail must be at the 1518 ** all the buffers after it (since the tail must be at the
1466 ** end of the file). We don't want to unmap file data 1519 ** end of the file). We don't want to unmap file data
1467 ** before the tail, since it might be dirty and waiting to 1520 ** before the tail, since it might be dirty and waiting to
1468 ** reach disk 1521 ** reach disk
1469 */ 1522 */
1470 cur_index += bh->b_size ; 1523 cur_index += bh->b_size;
1471 if (cur_index > tail_index) { 1524 if (cur_index > tail_index) {
1472 reiserfs_unmap_buffer(bh) ; 1525 reiserfs_unmap_buffer(bh);
1526 }
1527 bh = next;
1528 } while (bh != head);
1529 if (PAGE_SIZE == bh->b_size) {
1530 clear_page_dirty(page);
1531 }
1473 } 1532 }
1474 bh = next ;
1475 } while (bh != head) ;
1476 if ( PAGE_SIZE == bh->b_size ) {
1477 clear_page_dirty(page);
1478 }
1479 } 1533 }
1480 }
1481} 1534}
1482 1535
1483static int maybe_indirect_to_direct (struct reiserfs_transaction_handle *th, 1536static int maybe_indirect_to_direct(struct reiserfs_transaction_handle *th,
1484 struct inode * p_s_inode, 1537 struct inode *p_s_inode,
1485 struct page *page, 1538 struct page *page,
1486 struct path * p_s_path, 1539 struct path *p_s_path,
1487 const struct cpu_key * p_s_item_key, 1540 const struct cpu_key *p_s_item_key,
1488 loff_t n_new_file_size, 1541 loff_t n_new_file_size, char *p_c_mode)
1489 char * p_c_mode 1542{
1490 ) { 1543 struct super_block *p_s_sb = p_s_inode->i_sb;
1491 struct super_block * p_s_sb = p_s_inode->i_sb; 1544 int n_block_size = p_s_sb->s_blocksize;
1492 int n_block_size = p_s_sb->s_blocksize; 1545 int cut_bytes;
1493 int cut_bytes; 1546 BUG_ON(!th->t_trans_id);
1494 BUG_ON (!th->t_trans_id); 1547
1495 1548 if (n_new_file_size != p_s_inode->i_size)
1496 if (n_new_file_size != p_s_inode->i_size) 1549 BUG();
1497 BUG ();
1498
1499 /* the page being sent in could be NULL if there was an i/o error
1500 ** reading in the last block. The user will hit problems trying to
1501 ** read the file, but for now we just skip the indirect2direct
1502 */
1503 if (atomic_read(&p_s_inode->i_count) > 1 ||
1504 !tail_has_to_be_packed (p_s_inode) ||
1505 !page || (REISERFS_I(p_s_inode)->i_flags & i_nopack_mask)) {
1506 // leave tail in an unformatted node
1507 *p_c_mode = M_SKIP_BALANCING;
1508 cut_bytes = n_block_size - (n_new_file_size & (n_block_size - 1));
1509 pathrelse(p_s_path);
1510 return cut_bytes;
1511 }
1512 /* Permorm the conversion to a direct_item. */
1513 /*return indirect_to_direct (p_s_inode, p_s_path, p_s_item_key, n_new_file_size, p_c_mode);*/
1514 return indirect2direct (th, p_s_inode, page, p_s_path, p_s_item_key, n_new_file_size, p_c_mode);
1515}
1516 1550
1551 /* the page being sent in could be NULL if there was an i/o error
1552 ** reading in the last block. The user will hit problems trying to
1553 ** read the file, but for now we just skip the indirect2direct
1554 */
1555 if (atomic_read(&p_s_inode->i_count) > 1 ||
1556 !tail_has_to_be_packed(p_s_inode) ||
1557 !page || (REISERFS_I(p_s_inode)->i_flags & i_nopack_mask)) {
1558 // leave tail in an unformatted node
1559 *p_c_mode = M_SKIP_BALANCING;
1560 cut_bytes =
1561 n_block_size - (n_new_file_size & (n_block_size - 1));
1562 pathrelse(p_s_path);
1563 return cut_bytes;
1564 }
1565 /* Permorm the conversion to a direct_item. */
1566 /*return indirect_to_direct (p_s_inode, p_s_path, p_s_item_key, n_new_file_size, p_c_mode); */
1567 return indirect2direct(th, p_s_inode, page, p_s_path, p_s_item_key,
1568 n_new_file_size, p_c_mode);
1569}
1517 1570
1518/* we did indirect_to_direct conversion. And we have inserted direct 1571/* we did indirect_to_direct conversion. And we have inserted direct
1519 item successesfully, but there were no disk space to cut unfm 1572 item successesfully, but there were no disk space to cut unfm
1520 pointer being converted. Therefore we have to delete inserted 1573 pointer being converted. Therefore we have to delete inserted
1521 direct item(s) */ 1574 direct item(s) */
1522static void indirect_to_direct_roll_back (struct reiserfs_transaction_handle *th, struct inode * inode, struct path * path) 1575static void indirect_to_direct_roll_back(struct reiserfs_transaction_handle *th,
1576 struct inode *inode, struct path *path)
1523{ 1577{
1524 struct cpu_key tail_key; 1578 struct cpu_key tail_key;
1525 int tail_len; 1579 int tail_len;
1526 int removed; 1580 int removed;
1527 BUG_ON (!th->t_trans_id); 1581 BUG_ON(!th->t_trans_id);
1528 1582
1529 make_cpu_key (&tail_key, inode, inode->i_size + 1, TYPE_DIRECT, 4);// !!!! 1583 make_cpu_key(&tail_key, inode, inode->i_size + 1, TYPE_DIRECT, 4); // !!!!
1530 tail_key.key_length = 4; 1584 tail_key.key_length = 4;
1531 1585
1532 tail_len = (cpu_key_k_offset (&tail_key) & (inode->i_sb->s_blocksize - 1)) - 1; 1586 tail_len =
1533 while (tail_len) { 1587 (cpu_key_k_offset(&tail_key) & (inode->i_sb->s_blocksize - 1)) - 1;
1534 /* look for the last byte of the tail */ 1588 while (tail_len) {
1535 if (search_for_position_by_key (inode->i_sb, &tail_key, path) == POSITION_NOT_FOUND) 1589 /* look for the last byte of the tail */
1536 reiserfs_panic (inode->i_sb, "vs-5615: indirect_to_direct_roll_back: found invalid item"); 1590 if (search_for_position_by_key(inode->i_sb, &tail_key, path) ==
1537 RFALSE( path->pos_in_item != ih_item_len(PATH_PITEM_HEAD (path)) - 1, 1591 POSITION_NOT_FOUND)
1538 "vs-5616: appended bytes found"); 1592 reiserfs_panic(inode->i_sb,
1539 PATH_LAST_POSITION (path) --; 1593 "vs-5615: indirect_to_direct_roll_back: found invalid item");
1540 1594 RFALSE(path->pos_in_item !=
1541 removed = reiserfs_delete_item (th, path, &tail_key, inode, NULL/*unbh not needed*/); 1595 ih_item_len(PATH_PITEM_HEAD(path)) - 1,
1542 RFALSE( removed <= 0 || removed > tail_len, 1596 "vs-5616: appended bytes found");
1543 "vs-5617: there was tail %d bytes, removed item length %d bytes", 1597 PATH_LAST_POSITION(path)--;
1544 tail_len, removed); 1598
1545 tail_len -= removed; 1599 removed =
1546 set_cpu_key_k_offset (&tail_key, cpu_key_k_offset (&tail_key) - removed); 1600 reiserfs_delete_item(th, path, &tail_key, inode,
1547 } 1601 NULL /*unbh not needed */ );
1548 reiserfs_warning (inode->i_sb, "indirect_to_direct_roll_back: indirect_to_direct conversion has been rolled back due to lack of disk space"); 1602 RFALSE(removed <= 0
1549 //mark_file_without_tail (inode); 1603 || removed > tail_len,
1550 mark_inode_dirty (inode); 1604 "vs-5617: there was tail %d bytes, removed item length %d bytes",
1605 tail_len, removed);
1606 tail_len -= removed;
1607 set_cpu_key_k_offset(&tail_key,
1608 cpu_key_k_offset(&tail_key) - removed);
1609 }
1610 reiserfs_warning(inode->i_sb,
1611 "indirect_to_direct_roll_back: indirect_to_direct conversion has been rolled back due to lack of disk space");
1612 //mark_file_without_tail (inode);
1613 mark_inode_dirty(inode);
1551} 1614}
1552 1615
1553
1554/* (Truncate or cut entry) or delete object item. Returns < 0 on failure */ 1616/* (Truncate or cut entry) or delete object item. Returns < 0 on failure */
1555int reiserfs_cut_from_item (struct reiserfs_transaction_handle *th, 1617int reiserfs_cut_from_item(struct reiserfs_transaction_handle *th,
1556 struct path * p_s_path, 1618 struct path *p_s_path,
1557 struct cpu_key * p_s_item_key, 1619 struct cpu_key *p_s_item_key,
1558 struct inode * p_s_inode, 1620 struct inode *p_s_inode,
1559 struct page *page, 1621 struct page *page, loff_t n_new_file_size)
1560 loff_t n_new_file_size)
1561{ 1622{
1562 struct super_block * p_s_sb = p_s_inode->i_sb; 1623 struct super_block *p_s_sb = p_s_inode->i_sb;
1563 /* Every function which is going to call do_balance must first 1624 /* Every function which is going to call do_balance must first
1564 create a tree_balance structure. Then it must fill up this 1625 create a tree_balance structure. Then it must fill up this
1565 structure by using the init_tb_struct and fix_nodes functions. 1626 structure by using the init_tb_struct and fix_nodes functions.
1566 After that we can make tree balancing. */ 1627 After that we can make tree balancing. */
1567 struct tree_balance s_cut_balance; 1628 struct tree_balance s_cut_balance;
1568 struct item_head *p_le_ih; 1629 struct item_head *p_le_ih;
1569 int n_cut_size = 0, /* Amount to be cut. */ 1630 int n_cut_size = 0, /* Amount to be cut. */
1570 n_ret_value = CARRY_ON, 1631 n_ret_value = CARRY_ON, n_removed = 0, /* Number of the removed unformatted nodes. */
1571 n_removed = 0, /* Number of the removed unformatted nodes. */ 1632 n_is_inode_locked = 0;
1572 n_is_inode_locked = 0; 1633 char c_mode; /* Mode of the balance. */
1573 char c_mode; /* Mode of the balance. */ 1634 int retval2 = -1;
1574 int retval2 = -1; 1635 int quota_cut_bytes;
1575 int quota_cut_bytes; 1636 loff_t tail_pos = 0;
1576 loff_t tail_pos = 0; 1637
1577 1638 BUG_ON(!th->t_trans_id);
1578 BUG_ON (!th->t_trans_id); 1639
1579 1640 init_tb_struct(th, &s_cut_balance, p_s_inode->i_sb, p_s_path,
1580 init_tb_struct(th, &s_cut_balance, p_s_inode->i_sb, p_s_path, n_cut_size); 1641 n_cut_size);
1581 1642
1582 1643 /* Repeat this loop until we either cut the item without needing
1583 /* Repeat this loop until we either cut the item without needing 1644 to balance, or we fix_nodes without schedule occurring */
1584 to balance, or we fix_nodes without schedule occurring */ 1645 while (1) {
1585 while ( 1 ) { 1646 /* Determine the balance mode, position of the first byte to
1586 /* Determine the balance mode, position of the first byte to 1647 be cut, and size to be cut. In case of the indirect item
1587 be cut, and size to be cut. In case of the indirect item 1648 free unformatted nodes which are pointed to by the cut
1588 free unformatted nodes which are pointed to by the cut 1649 pointers. */
1589 pointers. */ 1650
1590 1651 c_mode =
1591 c_mode = prepare_for_delete_or_cut(th, p_s_inode, p_s_path, p_s_item_key, &n_removed, 1652 prepare_for_delete_or_cut(th, p_s_inode, p_s_path,
1592 &n_cut_size, n_new_file_size); 1653 p_s_item_key, &n_removed,
1593 if ( c_mode == M_CONVERT ) { 1654 &n_cut_size, n_new_file_size);
1594 /* convert last unformatted node to direct item or leave 1655 if (c_mode == M_CONVERT) {
1595 tail in the unformatted node */ 1656 /* convert last unformatted node to direct item or leave
1596 RFALSE( n_ret_value != CARRY_ON, "PAP-5570: can not convert twice"); 1657 tail in the unformatted node */
1597 1658 RFALSE(n_ret_value != CARRY_ON,
1598 n_ret_value = maybe_indirect_to_direct (th, p_s_inode, page, p_s_path, p_s_item_key, 1659 "PAP-5570: can not convert twice");
1599 n_new_file_size, &c_mode); 1660
1600 if ( c_mode == M_SKIP_BALANCING ) 1661 n_ret_value =
1601 /* tail has been left in the unformatted node */ 1662 maybe_indirect_to_direct(th, p_s_inode, page,
1602 return n_ret_value; 1663 p_s_path, p_s_item_key,
1603 1664 n_new_file_size, &c_mode);
1604 n_is_inode_locked = 1; 1665 if (c_mode == M_SKIP_BALANCING)
1605 1666 /* tail has been left in the unformatted node */
1606 /* removing of last unformatted node will change value we 1667 return n_ret_value;
1607 have to return to truncate. Save it */ 1668
1608 retval2 = n_ret_value; 1669 n_is_inode_locked = 1;
1609 /*retval2 = p_s_sb->s_blocksize - (n_new_file_size & (p_s_sb->s_blocksize - 1));*/ 1670
1610 1671 /* removing of last unformatted node will change value we
1611 /* So, we have performed the first part of the conversion: 1672 have to return to truncate. Save it */
1612 inserting the new direct item. Now we are removing the 1673 retval2 = n_ret_value;
1613 last unformatted node pointer. Set key to search for 1674 /*retval2 = p_s_sb->s_blocksize - (n_new_file_size & (p_s_sb->s_blocksize - 1)); */
1614 it. */ 1675
1615 set_cpu_key_k_type (p_s_item_key, TYPE_INDIRECT); 1676 /* So, we have performed the first part of the conversion:
1616 p_s_item_key->key_length = 4; 1677 inserting the new direct item. Now we are removing the
1617 n_new_file_size -= (n_new_file_size & (p_s_sb->s_blocksize - 1)); 1678 last unformatted node pointer. Set key to search for
1618 tail_pos = n_new_file_size; 1679 it. */
1619 set_cpu_key_k_offset (p_s_item_key, n_new_file_size + 1); 1680 set_cpu_key_k_type(p_s_item_key, TYPE_INDIRECT);
1620 if ( search_for_position_by_key(p_s_sb, p_s_item_key, p_s_path) == POSITION_NOT_FOUND ){ 1681 p_s_item_key->key_length = 4;
1621 print_block (PATH_PLAST_BUFFER (p_s_path), 3, PATH_LAST_POSITION (p_s_path) - 1, PATH_LAST_POSITION (p_s_path) + 1); 1682 n_new_file_size -=
1622 reiserfs_panic(p_s_sb, "PAP-5580: reiserfs_cut_from_item: item to convert does not exist (%K)", p_s_item_key); 1683 (n_new_file_size & (p_s_sb->s_blocksize - 1));
1623 } 1684 tail_pos = n_new_file_size;
1624 continue; 1685 set_cpu_key_k_offset(p_s_item_key, n_new_file_size + 1);
1625 } 1686 if (search_for_position_by_key
1626 if (n_cut_size == 0) { 1687 (p_s_sb, p_s_item_key,
1627 pathrelse (p_s_path); 1688 p_s_path) == POSITION_NOT_FOUND) {
1628 return 0; 1689 print_block(PATH_PLAST_BUFFER(p_s_path), 3,
1629 } 1690 PATH_LAST_POSITION(p_s_path) - 1,
1691 PATH_LAST_POSITION(p_s_path) + 1);
1692 reiserfs_panic(p_s_sb,
1693 "PAP-5580: reiserfs_cut_from_item: item to convert does not exist (%K)",
1694 p_s_item_key);
1695 }
1696 continue;
1697 }
1698 if (n_cut_size == 0) {
1699 pathrelse(p_s_path);
1700 return 0;
1701 }
1702
1703 s_cut_balance.insert_size[0] = n_cut_size;
1704
1705 n_ret_value = fix_nodes(c_mode, &s_cut_balance, NULL, NULL);
1706 if (n_ret_value != REPEAT_SEARCH)
1707 break;
1708
1709 PROC_INFO_INC(p_s_sb, cut_from_item_restarted);
1710
1711 n_ret_value =
1712 search_for_position_by_key(p_s_sb, p_s_item_key, p_s_path);
1713 if (n_ret_value == POSITION_FOUND)
1714 continue;
1630 1715
1631 s_cut_balance.insert_size[0] = n_cut_size; 1716 reiserfs_warning(p_s_sb,
1632 1717 "PAP-5610: reiserfs_cut_from_item: item %K not found",
1633 n_ret_value = fix_nodes(c_mode, &s_cut_balance, NULL, NULL); 1718 p_s_item_key);
1634 if ( n_ret_value != REPEAT_SEARCH ) 1719 unfix_nodes(&s_cut_balance);
1635 break; 1720 return (n_ret_value == IO_ERROR) ? -EIO : -ENOENT;
1636 1721 } /* while */
1637 PROC_INFO_INC( p_s_sb, cut_from_item_restarted ); 1722
1638 1723 // check fix_nodes results (IO_ERROR or NO_DISK_SPACE)
1639 n_ret_value = search_for_position_by_key(p_s_sb, p_s_item_key, p_s_path); 1724 if (n_ret_value != CARRY_ON) {
1640 if (n_ret_value == POSITION_FOUND) 1725 if (n_is_inode_locked) {
1641 continue; 1726 // FIXME: this seems to be not needed: we are always able
1642 1727 // to cut item
1643 reiserfs_warning (p_s_sb, "PAP-5610: reiserfs_cut_from_item: item %K not found", p_s_item_key); 1728 indirect_to_direct_roll_back(th, p_s_inode, p_s_path);
1644 unfix_nodes (&s_cut_balance); 1729 }
1645 return (n_ret_value == IO_ERROR) ? -EIO : -ENOENT; 1730 if (n_ret_value == NO_DISK_SPACE)
1646 } /* while */ 1731 reiserfs_warning(p_s_sb, "NO_DISK_SPACE");
1647 1732 unfix_nodes(&s_cut_balance);
1648 // check fix_nodes results (IO_ERROR or NO_DISK_SPACE) 1733 return -EIO;
1649 if ( n_ret_value != CARRY_ON ) {
1650 if ( n_is_inode_locked ) {
1651 // FIXME: this seems to be not needed: we are always able
1652 // to cut item
1653 indirect_to_direct_roll_back (th, p_s_inode, p_s_path);
1654 } 1734 }
1655 if (n_ret_value == NO_DISK_SPACE) 1735
1656 reiserfs_warning (p_s_sb, "NO_DISK_SPACE"); 1736 /* go ahead and perform balancing */
1657 unfix_nodes (&s_cut_balance); 1737
1658 return -EIO; 1738 RFALSE(c_mode == M_PASTE || c_mode == M_INSERT, "invalid mode");
1659 } 1739
1660 1740 /* Calculate number of bytes that need to be cut from the item. */
1661 /* go ahead and perform balancing */ 1741 quota_cut_bytes =
1662 1742 (c_mode ==
1663 RFALSE( c_mode == M_PASTE || c_mode == M_INSERT, "invalid mode"); 1743 M_DELETE) ? ih_item_len(get_ih(p_s_path)) : -s_cut_balance.
1664 1744 insert_size[0];
1665 /* Calculate number of bytes that need to be cut from the item. */ 1745 if (retval2 == -1)
1666 quota_cut_bytes = ( c_mode == M_DELETE ) ? ih_item_len(get_ih(p_s_path)) : -s_cut_balance.insert_size[0]; 1746 n_ret_value = calc_deleted_bytes_number(&s_cut_balance, c_mode);
1667 if (retval2 == -1) 1747 else
1668 n_ret_value = calc_deleted_bytes_number(&s_cut_balance, c_mode); 1748 n_ret_value = retval2;
1669 else 1749
1670 n_ret_value = retval2; 1750 /* For direct items, we only change the quota when deleting the last
1671 1751 ** item.
1672 1752 */
1673 /* For direct items, we only change the quota when deleting the last 1753 p_le_ih = PATH_PITEM_HEAD(s_cut_balance.tb_path);
1674 ** item. 1754 if (!S_ISLNK(p_s_inode->i_mode) && is_direct_le_ih(p_le_ih)) {
1675 */ 1755 if (c_mode == M_DELETE &&
1676 p_le_ih = PATH_PITEM_HEAD (s_cut_balance.tb_path); 1756 (le_ih_k_offset(p_le_ih) & (p_s_sb->s_blocksize - 1)) ==
1677 if (!S_ISLNK (p_s_inode->i_mode) && is_direct_le_ih(p_le_ih)) { 1757 1) {
1678 if (c_mode == M_DELETE && 1758 // FIXME: this is to keep 3.5 happy
1679 (le_ih_k_offset (p_le_ih) & (p_s_sb->s_blocksize - 1)) == 1 ) { 1759 REISERFS_I(p_s_inode)->i_first_direct_byte = U32_MAX;
1680 // FIXME: this is to keep 3.5 happy 1760 quota_cut_bytes = p_s_sb->s_blocksize + UNFM_P_SIZE;
1681 REISERFS_I(p_s_inode)->i_first_direct_byte = U32_MAX; 1761 } else {
1682 quota_cut_bytes = p_s_sb->s_blocksize + UNFM_P_SIZE ; 1762 quota_cut_bytes = 0;
1683 } else { 1763 }
1684 quota_cut_bytes = 0 ;
1685 } 1764 }
1686 }
1687#ifdef CONFIG_REISERFS_CHECK 1765#ifdef CONFIG_REISERFS_CHECK
1688 if (n_is_inode_locked) { 1766 if (n_is_inode_locked) {
1689 struct item_head * le_ih = PATH_PITEM_HEAD (s_cut_balance.tb_path); 1767 struct item_head *le_ih =
1690 /* we are going to complete indirect2direct conversion. Make 1768 PATH_PITEM_HEAD(s_cut_balance.tb_path);
1691 sure, that we exactly remove last unformatted node pointer 1769 /* we are going to complete indirect2direct conversion. Make
1692 of the item */ 1770 sure, that we exactly remove last unformatted node pointer
1693 if (!is_indirect_le_ih (le_ih)) 1771 of the item */
1694 reiserfs_panic (p_s_sb, "vs-5652: reiserfs_cut_from_item: " 1772 if (!is_indirect_le_ih(le_ih))
1695 "item must be indirect %h", le_ih); 1773 reiserfs_panic(p_s_sb,
1696 1774 "vs-5652: reiserfs_cut_from_item: "
1697 if (c_mode == M_DELETE && ih_item_len(le_ih) != UNFM_P_SIZE) 1775 "item must be indirect %h", le_ih);
1698 reiserfs_panic (p_s_sb, "vs-5653: reiserfs_cut_from_item: " 1776
1699 "completing indirect2direct conversion indirect item %h " 1777 if (c_mode == M_DELETE && ih_item_len(le_ih) != UNFM_P_SIZE)
1700 "being deleted must be of 4 byte long", le_ih); 1778 reiserfs_panic(p_s_sb,
1701 1779 "vs-5653: reiserfs_cut_from_item: "
1702 if (c_mode == M_CUT && s_cut_balance.insert_size[0] != -UNFM_P_SIZE) { 1780 "completing indirect2direct conversion indirect item %h "
1703 reiserfs_panic (p_s_sb, "vs-5654: reiserfs_cut_from_item: " 1781 "being deleted must be of 4 byte long",
1704 "can not complete indirect2direct conversion of %h (CUT, insert_size==%d)", 1782 le_ih);
1705 le_ih, s_cut_balance.insert_size[0]); 1783
1784 if (c_mode == M_CUT
1785 && s_cut_balance.insert_size[0] != -UNFM_P_SIZE) {
1786 reiserfs_panic(p_s_sb,
1787 "vs-5654: reiserfs_cut_from_item: "
1788 "can not complete indirect2direct conversion of %h (CUT, insert_size==%d)",
1789 le_ih, s_cut_balance.insert_size[0]);
1790 }
1791 /* it would be useful to make sure, that right neighboring
1792 item is direct item of this file */
1706 } 1793 }
1707 /* it would be useful to make sure, that right neighboring
1708 item is direct item of this file */
1709 }
1710#endif 1794#endif
1711 1795
1712 do_balance(&s_cut_balance, NULL, NULL, c_mode); 1796 do_balance(&s_cut_balance, NULL, NULL, c_mode);
1713 if ( n_is_inode_locked ) { 1797 if (n_is_inode_locked) {
1714 /* we've done an indirect->direct conversion. when the data block 1798 /* we've done an indirect->direct conversion. when the data block
1715 ** was freed, it was removed from the list of blocks that must 1799 ** was freed, it was removed from the list of blocks that must
1716 ** be flushed before the transaction commits, make sure to 1800 ** be flushed before the transaction commits, make sure to
1717 ** unmap and invalidate it 1801 ** unmap and invalidate it
1718 */ 1802 */
1719 unmap_buffers(page, tail_pos); 1803 unmap_buffers(page, tail_pos);
1720 REISERFS_I(p_s_inode)->i_flags &= ~i_pack_on_close_mask ; 1804 REISERFS_I(p_s_inode)->i_flags &= ~i_pack_on_close_mask;
1721 } 1805 }
1722#ifdef REISERQUOTA_DEBUG 1806#ifdef REISERQUOTA_DEBUG
1723 reiserfs_debug (p_s_inode->i_sb, REISERFS_DEBUG_CODE, "reiserquota cut_from_item(): freeing %u id=%u type=%c", quota_cut_bytes, p_s_inode->i_uid, '?'); 1807 reiserfs_debug(p_s_inode->i_sb, REISERFS_DEBUG_CODE,
1808 "reiserquota cut_from_item(): freeing %u id=%u type=%c",
1809 quota_cut_bytes, p_s_inode->i_uid, '?');
1724#endif 1810#endif
1725 DQUOT_FREE_SPACE_NODIRTY(p_s_inode, quota_cut_bytes); 1811 DQUOT_FREE_SPACE_NODIRTY(p_s_inode, quota_cut_bytes);
1726 return n_ret_value; 1812 return n_ret_value;
1727} 1813}
1728 1814
1729static void truncate_directory (struct reiserfs_transaction_handle *th, struct inode * inode) 1815static void truncate_directory(struct reiserfs_transaction_handle *th,
1816 struct inode *inode)
1730{ 1817{
1731 BUG_ON (!th->t_trans_id); 1818 BUG_ON(!th->t_trans_id);
1732 if (inode->i_nlink) 1819 if (inode->i_nlink)
1733 reiserfs_warning (inode->i_sb, 1820 reiserfs_warning(inode->i_sb,
1734 "vs-5655: truncate_directory: link count != 0"); 1821 "vs-5655: truncate_directory: link count != 0");
1735 1822
1736 set_le_key_k_offset (KEY_FORMAT_3_5, INODE_PKEY (inode), DOT_OFFSET); 1823 set_le_key_k_offset(KEY_FORMAT_3_5, INODE_PKEY(inode), DOT_OFFSET);
1737 set_le_key_k_type (KEY_FORMAT_3_5, INODE_PKEY (inode), TYPE_DIRENTRY); 1824 set_le_key_k_type(KEY_FORMAT_3_5, INODE_PKEY(inode), TYPE_DIRENTRY);
1738 reiserfs_delete_solid_item (th, inode, INODE_PKEY (inode)); 1825 reiserfs_delete_solid_item(th, inode, INODE_PKEY(inode));
1739 reiserfs_update_sd(th, inode) ; 1826 reiserfs_update_sd(th, inode);
1740 set_le_key_k_offset (KEY_FORMAT_3_5, INODE_PKEY (inode), SD_OFFSET); 1827 set_le_key_k_offset(KEY_FORMAT_3_5, INODE_PKEY(inode), SD_OFFSET);
1741 set_le_key_k_type (KEY_FORMAT_3_5, INODE_PKEY (inode), TYPE_STAT_DATA); 1828 set_le_key_k_type(KEY_FORMAT_3_5, INODE_PKEY(inode), TYPE_STAT_DATA);
1742} 1829}
1743 1830
1831/* Truncate file to the new size. Note, this must be called with a transaction
1832 already started */
1833int reiserfs_do_truncate(struct reiserfs_transaction_handle *th, struct inode *p_s_inode, /* ->i_size contains new
1834 size */
1835 struct page *page, /* up to date for last block */
1836 int update_timestamps /* when it is called by
1837 file_release to convert
1838 the tail - no timestamps
1839 should be updated */
1840 )
1841{
1842 INITIALIZE_PATH(s_search_path); /* Path to the current object item. */
1843 struct item_head *p_le_ih; /* Pointer to an item header. */
1844 struct cpu_key s_item_key; /* Key to search for a previous file item. */
1845 loff_t n_file_size, /* Old file size. */
1846 n_new_file_size; /* New file size. */
1847 int n_deleted; /* Number of deleted or truncated bytes. */
1848 int retval;
1849 int err = 0;
1850
1851 BUG_ON(!th->t_trans_id);
1852 if (!
1853 (S_ISREG(p_s_inode->i_mode) || S_ISDIR(p_s_inode->i_mode)
1854 || S_ISLNK(p_s_inode->i_mode)))
1855 return 0;
1856
1857 if (S_ISDIR(p_s_inode->i_mode)) {
1858 // deletion of directory - no need to update timestamps
1859 truncate_directory(th, p_s_inode);
1860 return 0;
1861 }
1744 1862
1863 /* Get new file size. */
1864 n_new_file_size = p_s_inode->i_size;
1745 1865
1866 // FIXME: note, that key type is unimportant here
1867 make_cpu_key(&s_item_key, p_s_inode, max_reiserfs_offset(p_s_inode),
1868 TYPE_DIRECT, 3);
1746 1869
1747/* Truncate file to the new size. Note, this must be called with a transaction 1870 retval =
1748 already started */ 1871 search_for_position_by_key(p_s_inode->i_sb, &s_item_key,
1749int reiserfs_do_truncate (struct reiserfs_transaction_handle *th, 1872 &s_search_path);
1750 struct inode * p_s_inode, /* ->i_size contains new 1873 if (retval == IO_ERROR) {
1751 size */ 1874 reiserfs_warning(p_s_inode->i_sb,
1752 struct page *page, /* up to date for last block */ 1875 "vs-5657: reiserfs_do_truncate: "
1753 int update_timestamps /* when it is called by 1876 "i/o failure occurred trying to truncate %K",
1754 file_release to convert 1877 &s_item_key);
1755 the tail - no timestamps 1878 err = -EIO;
1756 should be updated */ 1879 goto out;
1757 ) { 1880 }
1758 INITIALIZE_PATH (s_search_path); /* Path to the current object item. */ 1881 if (retval == POSITION_FOUND || retval == FILE_NOT_FOUND) {
1759 struct item_head * p_le_ih; /* Pointer to an item header. */ 1882 reiserfs_warning(p_s_inode->i_sb,
1760 struct cpu_key s_item_key; /* Key to search for a previous file item. */ 1883 "PAP-5660: reiserfs_do_truncate: "
1761 loff_t n_file_size, /* Old file size. */ 1884 "wrong result %d of search for %K", retval,
1762 n_new_file_size;/* New file size. */ 1885 &s_item_key);
1763 int n_deleted; /* Number of deleted or truncated bytes. */ 1886
1764 int retval; 1887 err = -EIO;
1765 int err = 0; 1888 goto out;
1766 1889 }
1767 BUG_ON (!th->t_trans_id);
1768 if ( ! (S_ISREG(p_s_inode->i_mode) || S_ISDIR(p_s_inode->i_mode) || S_ISLNK(p_s_inode->i_mode)) )
1769 return 0;
1770 1890
1771 if (S_ISDIR(p_s_inode->i_mode)) { 1891 s_search_path.pos_in_item--;
1772 // deletion of directory - no need to update timestamps 1892
1773 truncate_directory (th, p_s_inode); 1893 /* Get real file size (total length of all file items) */
1774 return 0; 1894 p_le_ih = PATH_PITEM_HEAD(&s_search_path);
1775 } 1895 if (is_statdata_le_ih(p_le_ih))
1776 1896 n_file_size = 0;
1777 /* Get new file size. */ 1897 else {
1778 n_new_file_size = p_s_inode->i_size; 1898 loff_t offset = le_ih_k_offset(p_le_ih);
1779 1899 int bytes =
1780 // FIXME: note, that key type is unimportant here 1900 op_bytes_number(p_le_ih, p_s_inode->i_sb->s_blocksize);
1781 make_cpu_key (&s_item_key, p_s_inode, max_reiserfs_offset (p_s_inode), TYPE_DIRECT, 3); 1901
1782 1902 /* this may mismatch with real file size: if last direct item
1783 retval = search_for_position_by_key(p_s_inode->i_sb, &s_item_key, &s_search_path); 1903 had no padding zeros and last unformatted node had no free
1784 if (retval == IO_ERROR) { 1904 space, this file would have this file size */
1785 reiserfs_warning (p_s_inode->i_sb, "vs-5657: reiserfs_do_truncate: " 1905 n_file_size = offset + bytes - 1;
1786 "i/o failure occurred trying to truncate %K", &s_item_key); 1906 }
1787 err = -EIO; 1907 /*
1788 goto out; 1908 * are we doing a full truncate or delete, if so
1789 } 1909 * kick in the reada code
1790 if (retval == POSITION_FOUND || retval == FILE_NOT_FOUND) { 1910 */
1791 reiserfs_warning (p_s_inode->i_sb, "PAP-5660: reiserfs_do_truncate: " 1911 if (n_new_file_size == 0)
1792 "wrong result %d of search for %K", retval, &s_item_key); 1912 s_search_path.reada = PATH_READA | PATH_READA_BACK;
1793 1913
1794 err = -EIO; 1914 if (n_file_size == 0 || n_file_size < n_new_file_size) {
1795 goto out; 1915 goto update_and_out;
1796 }
1797
1798 s_search_path.pos_in_item --;
1799
1800 /* Get real file size (total length of all file items) */
1801 p_le_ih = PATH_PITEM_HEAD(&s_search_path);
1802 if ( is_statdata_le_ih (p_le_ih) )
1803 n_file_size = 0;
1804 else {
1805 loff_t offset = le_ih_k_offset (p_le_ih);
1806 int bytes = op_bytes_number (p_le_ih,p_s_inode->i_sb->s_blocksize);
1807
1808 /* this may mismatch with real file size: if last direct item
1809 had no padding zeros and last unformatted node had no free
1810 space, this file would have this file size */
1811 n_file_size = offset + bytes - 1;
1812 }
1813 /*
1814 * are we doing a full truncate or delete, if so
1815 * kick in the reada code
1816 */
1817 if (n_new_file_size == 0)
1818 s_search_path.reada = PATH_READA | PATH_READA_BACK;
1819
1820 if ( n_file_size == 0 || n_file_size < n_new_file_size ) {
1821 goto update_and_out ;
1822 }
1823
1824 /* Update key to search for the last file item. */
1825 set_cpu_key_k_offset (&s_item_key, n_file_size);
1826
1827 do {
1828 /* Cut or delete file item. */
1829 n_deleted = reiserfs_cut_from_item(th, &s_search_path, &s_item_key, p_s_inode, page, n_new_file_size);
1830 if (n_deleted < 0) {
1831 reiserfs_warning (p_s_inode->i_sb, "vs-5665: reiserfs_do_truncate: reiserfs_cut_from_item failed");
1832 reiserfs_check_path(&s_search_path) ;
1833 return 0;
1834 } 1916 }
1835 1917
1836 RFALSE( n_deleted > n_file_size, 1918 /* Update key to search for the last file item. */
1837 "PAP-5670: reiserfs_cut_from_item: too many bytes deleted: deleted %d, file_size %lu, item_key %K", 1919 set_cpu_key_k_offset(&s_item_key, n_file_size);
1838 n_deleted, n_file_size, &s_item_key); 1920
1921 do {
1922 /* Cut or delete file item. */
1923 n_deleted =
1924 reiserfs_cut_from_item(th, &s_search_path, &s_item_key,
1925 p_s_inode, page, n_new_file_size);
1926 if (n_deleted < 0) {
1927 reiserfs_warning(p_s_inode->i_sb,
1928 "vs-5665: reiserfs_do_truncate: reiserfs_cut_from_item failed");
1929 reiserfs_check_path(&s_search_path);
1930 return 0;
1931 }
1839 1932
1840 /* Change key to search the last file item. */ 1933 RFALSE(n_deleted > n_file_size,
1841 n_file_size -= n_deleted; 1934 "PAP-5670: reiserfs_cut_from_item: too many bytes deleted: deleted %d, file_size %lu, item_key %K",
1935 n_deleted, n_file_size, &s_item_key);
1842 1936
1843 set_cpu_key_k_offset (&s_item_key, n_file_size); 1937 /* Change key to search the last file item. */
1938 n_file_size -= n_deleted;
1844 1939
1845 /* While there are bytes to truncate and previous file item is presented in the tree. */ 1940 set_cpu_key_k_offset(&s_item_key, n_file_size);
1846 1941
1847 /* 1942 /* While there are bytes to truncate and previous file item is presented in the tree. */
1848 ** This loop could take a really long time, and could log 1943
1849 ** many more blocks than a transaction can hold. So, we do a polite 1944 /*
1850 ** journal end here, and if the transaction needs ending, we make 1945 ** This loop could take a really long time, and could log
1851 ** sure the file is consistent before ending the current trans 1946 ** many more blocks than a transaction can hold. So, we do a polite
1852 ** and starting a new one 1947 ** journal end here, and if the transaction needs ending, we make
1853 */ 1948 ** sure the file is consistent before ending the current trans
1854 if (journal_transaction_should_end(th, th->t_blocks_allocated)) { 1949 ** and starting a new one
1855 int orig_len_alloc = th->t_blocks_allocated ; 1950 */
1856 decrement_counters_in_path(&s_search_path) ; 1951 if (journal_transaction_should_end(th, th->t_blocks_allocated)) {
1857 1952 int orig_len_alloc = th->t_blocks_allocated;
1858 if (update_timestamps) { 1953 decrement_counters_in_path(&s_search_path);
1859 p_s_inode->i_mtime = p_s_inode->i_ctime = CURRENT_TIME_SEC; 1954
1860 } 1955 if (update_timestamps) {
1861 reiserfs_update_sd(th, p_s_inode) ; 1956 p_s_inode->i_mtime = p_s_inode->i_ctime =
1862 1957 CURRENT_TIME_SEC;
1863 err = journal_end(th, p_s_inode->i_sb, orig_len_alloc) ; 1958 }
1864 if (err) 1959 reiserfs_update_sd(th, p_s_inode);
1865 goto out; 1960
1866 err = journal_begin (th, p_s_inode->i_sb, 1961 err = journal_end(th, p_s_inode->i_sb, orig_len_alloc);
1867 JOURNAL_PER_BALANCE_CNT * 6); 1962 if (err)
1868 if (err) 1963 goto out;
1869 goto out; 1964 err = journal_begin(th, p_s_inode->i_sb,
1870 reiserfs_update_inode_transaction(p_s_inode) ; 1965 JOURNAL_PER_BALANCE_CNT * 6);
1966 if (err)
1967 goto out;
1968 reiserfs_update_inode_transaction(p_s_inode);
1969 }
1970 } while (n_file_size > ROUND_UP(n_new_file_size) &&
1971 search_for_position_by_key(p_s_inode->i_sb, &s_item_key,
1972 &s_search_path) == POSITION_FOUND);
1973
1974 RFALSE(n_file_size > ROUND_UP(n_new_file_size),
1975 "PAP-5680: truncate did not finish: new_file_size %Ld, current %Ld, oid %d",
1976 n_new_file_size, n_file_size, s_item_key.on_disk_key.k_objectid);
1977
1978 update_and_out:
1979 if (update_timestamps) {
1980 // this is truncate, not file closing
1981 p_s_inode->i_mtime = p_s_inode->i_ctime = CURRENT_TIME_SEC;
1871 } 1982 }
1872 } while ( n_file_size > ROUND_UP (n_new_file_size) && 1983 reiserfs_update_sd(th, p_s_inode);
1873 search_for_position_by_key(p_s_inode->i_sb, &s_item_key, &s_search_path) == POSITION_FOUND ) ;
1874
1875 RFALSE( n_file_size > ROUND_UP (n_new_file_size),
1876 "PAP-5680: truncate did not finish: new_file_size %Ld, current %Ld, oid %d",
1877 n_new_file_size, n_file_size, s_item_key.on_disk_key.k_objectid);
1878
1879update_and_out:
1880 if (update_timestamps) {
1881 // this is truncate, not file closing
1882 p_s_inode->i_mtime = p_s_inode->i_ctime = CURRENT_TIME_SEC;
1883 }
1884 reiserfs_update_sd (th, p_s_inode);
1885
1886out:
1887 pathrelse(&s_search_path) ;
1888 return err;
1889}
1890 1984
1985 out:
1986 pathrelse(&s_search_path);
1987 return err;
1988}
1891 1989
1892#ifdef CONFIG_REISERFS_CHECK 1990#ifdef CONFIG_REISERFS_CHECK
1893// this makes sure, that we __append__, not overwrite or add holes 1991// this makes sure, that we __append__, not overwrite or add holes
1894static void check_research_for_paste (struct path * path, 1992static void check_research_for_paste(struct path *path,
1895 const struct cpu_key * p_s_key) 1993 const struct cpu_key *p_s_key)
1896{ 1994{
1897 struct item_head * found_ih = get_ih (path); 1995 struct item_head *found_ih = get_ih(path);
1898 1996
1899 if (is_direct_le_ih (found_ih)) { 1997 if (is_direct_le_ih(found_ih)) {
1900 if (le_ih_k_offset (found_ih) + op_bytes_number (found_ih, get_last_bh (path)->b_size) != 1998 if (le_ih_k_offset(found_ih) +
1901 cpu_key_k_offset (p_s_key) || 1999 op_bytes_number(found_ih,
1902 op_bytes_number (found_ih, get_last_bh (path)->b_size) != pos_in_item (path)) 2000 get_last_bh(path)->b_size) !=
1903 reiserfs_panic (NULL, "PAP-5720: check_research_for_paste: " 2001 cpu_key_k_offset(p_s_key)
1904 "found direct item %h or position (%d) does not match to key %K", 2002 || op_bytes_number(found_ih,
1905 found_ih, pos_in_item (path), p_s_key); 2003 get_last_bh(path)->b_size) !=
1906 } 2004 pos_in_item(path))
1907 if (is_indirect_le_ih (found_ih)) { 2005 reiserfs_panic(NULL,
1908 if (le_ih_k_offset (found_ih) + op_bytes_number (found_ih, get_last_bh (path)->b_size) != cpu_key_k_offset (p_s_key) || 2006 "PAP-5720: check_research_for_paste: "
1909 I_UNFM_NUM (found_ih) != pos_in_item (path) || 2007 "found direct item %h or position (%d) does not match to key %K",
1910 get_ih_free_space (found_ih) != 0) 2008 found_ih, pos_in_item(path), p_s_key);
1911 reiserfs_panic (NULL, "PAP-5730: check_research_for_paste: " 2009 }
1912 "found indirect item (%h) or position (%d) does not match to key (%K)", 2010 if (is_indirect_le_ih(found_ih)) {
1913 found_ih, pos_in_item (path), p_s_key); 2011 if (le_ih_k_offset(found_ih) +
1914 } 2012 op_bytes_number(found_ih,
2013 get_last_bh(path)->b_size) !=
2014 cpu_key_k_offset(p_s_key)
2015 || I_UNFM_NUM(found_ih) != pos_in_item(path)
2016 || get_ih_free_space(found_ih) != 0)
2017 reiserfs_panic(NULL,
2018 "PAP-5730: check_research_for_paste: "
2019 "found indirect item (%h) or position (%d) does not match to key (%K)",
2020 found_ih, pos_in_item(path), p_s_key);
2021 }
1915} 2022}
1916#endif /* config reiserfs check */ 2023#endif /* config reiserfs check */
1917
1918 2024
1919/* Paste bytes to the existing item. Returns bytes number pasted into the item. */ 2025/* Paste bytes to the existing item. Returns bytes number pasted into the item. */
1920int reiserfs_paste_into_item (struct reiserfs_transaction_handle *th, 2026int reiserfs_paste_into_item(struct reiserfs_transaction_handle *th, struct path *p_s_search_path, /* Path to the pasted item. */
1921 struct path * p_s_search_path, /* Path to the pasted item. */ 2027 const struct cpu_key *p_s_key, /* Key to search for the needed item. */
1922 const struct cpu_key * p_s_key, /* Key to search for the needed item.*/ 2028 struct inode *inode, /* Inode item belongs to */
1923 struct inode * inode, /* Inode item belongs to */ 2029 const char *p_c_body, /* Pointer to the bytes to paste. */
1924 const char * p_c_body, /* Pointer to the bytes to paste. */ 2030 int n_pasted_size)
1925 int n_pasted_size) /* Size of pasted bytes. */ 2031{ /* Size of pasted bytes. */
1926{ 2032 struct tree_balance s_paste_balance;
1927 struct tree_balance s_paste_balance; 2033 int retval;
1928 int retval; 2034 int fs_gen;
1929 int fs_gen; 2035
2036 BUG_ON(!th->t_trans_id);
1930 2037
1931 BUG_ON (!th->t_trans_id); 2038 fs_gen = get_generation(inode->i_sb);
1932
1933 fs_gen = get_generation(inode->i_sb) ;
1934 2039
1935#ifdef REISERQUOTA_DEBUG 2040#ifdef REISERQUOTA_DEBUG
1936 reiserfs_debug (inode->i_sb, REISERFS_DEBUG_CODE, "reiserquota paste_into_item(): allocating %u id=%u type=%c", n_pasted_size, inode->i_uid, key2type(&(p_s_key->on_disk_key))); 2041 reiserfs_debug(inode->i_sb, REISERFS_DEBUG_CODE,
2042 "reiserquota paste_into_item(): allocating %u id=%u type=%c",
2043 n_pasted_size, inode->i_uid,
2044 key2type(&(p_s_key->on_disk_key)));
1937#endif 2045#endif
1938 2046
1939 if (DQUOT_ALLOC_SPACE_NODIRTY(inode, n_pasted_size)) { 2047 if (DQUOT_ALLOC_SPACE_NODIRTY(inode, n_pasted_size)) {
1940 pathrelse(p_s_search_path); 2048 pathrelse(p_s_search_path);
1941 return -EDQUOT; 2049 return -EDQUOT;
1942 } 2050 }
1943 init_tb_struct(th, &s_paste_balance, th->t_super, p_s_search_path, n_pasted_size); 2051 init_tb_struct(th, &s_paste_balance, th->t_super, p_s_search_path,
2052 n_pasted_size);
1944#ifdef DISPLACE_NEW_PACKING_LOCALITIES 2053#ifdef DISPLACE_NEW_PACKING_LOCALITIES
1945 s_paste_balance.key = p_s_key->on_disk_key; 2054 s_paste_balance.key = p_s_key->on_disk_key;
1946#endif 2055#endif
1947 2056
1948 /* DQUOT_* can schedule, must check before the fix_nodes */ 2057 /* DQUOT_* can schedule, must check before the fix_nodes */
1949 if (fs_changed(fs_gen, inode->i_sb)) { 2058 if (fs_changed(fs_gen, inode->i_sb)) {
1950 goto search_again; 2059 goto search_again;
1951 }
1952
1953 while ((retval = fix_nodes(M_PASTE, &s_paste_balance, NULL, p_c_body)) ==
1954REPEAT_SEARCH ) {
1955search_again:
1956 /* file system changed while we were in the fix_nodes */
1957 PROC_INFO_INC( th -> t_super, paste_into_item_restarted );
1958 retval = search_for_position_by_key (th->t_super, p_s_key, p_s_search_path);
1959 if (retval == IO_ERROR) {
1960 retval = -EIO ;
1961 goto error_out ;
1962 } 2060 }
1963 if (retval == POSITION_FOUND) { 2061
1964 reiserfs_warning (inode->i_sb, "PAP-5710: reiserfs_paste_into_item: entry or pasted byte (%K) exists", p_s_key); 2062 while ((retval =
1965 retval = -EEXIST ; 2063 fix_nodes(M_PASTE, &s_paste_balance, NULL,
1966 goto error_out ; 2064 p_c_body)) == REPEAT_SEARCH) {
1967 } 2065 search_again:
1968 2066 /* file system changed while we were in the fix_nodes */
2067 PROC_INFO_INC(th->t_super, paste_into_item_restarted);
2068 retval =
2069 search_for_position_by_key(th->t_super, p_s_key,
2070 p_s_search_path);
2071 if (retval == IO_ERROR) {
2072 retval = -EIO;
2073 goto error_out;
2074 }
2075 if (retval == POSITION_FOUND) {
2076 reiserfs_warning(inode->i_sb,
2077 "PAP-5710: reiserfs_paste_into_item: entry or pasted byte (%K) exists",
2078 p_s_key);
2079 retval = -EEXIST;
2080 goto error_out;
2081 }
1969#ifdef CONFIG_REISERFS_CHECK 2082#ifdef CONFIG_REISERFS_CHECK
1970 check_research_for_paste (p_s_search_path, p_s_key); 2083 check_research_for_paste(p_s_search_path, p_s_key);
1971#endif 2084#endif
1972 } 2085 }
1973 2086
1974 /* Perform balancing after all resources are collected by fix_nodes, and 2087 /* Perform balancing after all resources are collected by fix_nodes, and
1975 accessing them will not risk triggering schedule. */ 2088 accessing them will not risk triggering schedule. */
1976 if ( retval == CARRY_ON ) { 2089 if (retval == CARRY_ON) {
1977 do_balance(&s_paste_balance, NULL/*ih*/, p_c_body, M_PASTE); 2090 do_balance(&s_paste_balance, NULL /*ih */ , p_c_body, M_PASTE);
1978 return 0; 2091 return 0;
1979 } 2092 }
1980 retval = (retval == NO_DISK_SPACE) ? -ENOSPC : -EIO; 2093 retval = (retval == NO_DISK_SPACE) ? -ENOSPC : -EIO;
1981error_out: 2094 error_out:
1982 /* this also releases the path */ 2095 /* this also releases the path */
1983 unfix_nodes(&s_paste_balance); 2096 unfix_nodes(&s_paste_balance);
1984#ifdef REISERQUOTA_DEBUG 2097#ifdef REISERQUOTA_DEBUG
1985 reiserfs_debug (inode->i_sb, REISERFS_DEBUG_CODE, "reiserquota paste_into_item(): freeing %u id=%u type=%c", n_pasted_size, inode->i_uid, key2type(&(p_s_key->on_disk_key))); 2098 reiserfs_debug(inode->i_sb, REISERFS_DEBUG_CODE,
2099 "reiserquota paste_into_item(): freeing %u id=%u type=%c",
2100 n_pasted_size, inode->i_uid,
2101 key2type(&(p_s_key->on_disk_key)));
1986#endif 2102#endif
1987 DQUOT_FREE_SPACE_NODIRTY(inode, n_pasted_size); 2103 DQUOT_FREE_SPACE_NODIRTY(inode, n_pasted_size);
1988 return retval ; 2104 return retval;
1989} 2105}
1990 2106
1991
1992/* Insert new item into the buffer at the path. */ 2107/* Insert new item into the buffer at the path. */
1993int reiserfs_insert_item(struct reiserfs_transaction_handle *th, 2108int reiserfs_insert_item(struct reiserfs_transaction_handle *th, struct path *p_s_path, /* Path to the inserteded item. */
1994 struct path * p_s_path, /* Path to the inserteded item. */ 2109 const struct cpu_key *key, struct item_head *p_s_ih, /* Pointer to the item header to insert. */
1995 const struct cpu_key * key, 2110 struct inode *inode, const char *p_c_body)
1996 struct item_head * p_s_ih, /* Pointer to the item header to insert.*/ 2111{ /* Pointer to the bytes to insert. */
1997 struct inode * inode, 2112 struct tree_balance s_ins_balance;
1998 const char * p_c_body) /* Pointer to the bytes to insert. */ 2113 int retval;
1999{ 2114 int fs_gen = 0;
2000 struct tree_balance s_ins_balance; 2115 int quota_bytes = 0;
2001 int retval; 2116
2002 int fs_gen = 0 ; 2117 BUG_ON(!th->t_trans_id);
2003 int quota_bytes = 0 ; 2118
2004 2119 if (inode) { /* Do we count quotas for item? */
2005 BUG_ON (!th->t_trans_id); 2120 fs_gen = get_generation(inode->i_sb);
2006 2121 quota_bytes = ih_item_len(p_s_ih);
2007 if (inode) { /* Do we count quotas for item? */ 2122
2008 fs_gen = get_generation(inode->i_sb); 2123 /* hack so the quota code doesn't have to guess if the file has
2009 quota_bytes = ih_item_len(p_s_ih); 2124 ** a tail, links are always tails, so there's no guessing needed
2010 2125 */
2011 /* hack so the quota code doesn't have to guess if the file has 2126 if (!S_ISLNK(inode->i_mode) && is_direct_le_ih(p_s_ih)) {
2012 ** a tail, links are always tails, so there's no guessing needed 2127 quota_bytes = inode->i_sb->s_blocksize + UNFM_P_SIZE;
2013 */ 2128 }
2014 if (!S_ISLNK (inode->i_mode) && is_direct_le_ih(p_s_ih)) {
2015 quota_bytes = inode->i_sb->s_blocksize + UNFM_P_SIZE ;
2016 }
2017#ifdef REISERQUOTA_DEBUG 2129#ifdef REISERQUOTA_DEBUG
2018 reiserfs_debug (inode->i_sb, REISERFS_DEBUG_CODE, "reiserquota insert_item(): allocating %u id=%u type=%c", quota_bytes, inode->i_uid, head2type(p_s_ih)); 2130 reiserfs_debug(inode->i_sb, REISERFS_DEBUG_CODE,
2131 "reiserquota insert_item(): allocating %u id=%u type=%c",
2132 quota_bytes, inode->i_uid, head2type(p_s_ih));
2019#endif 2133#endif
2020 /* We can't dirty inode here. It would be immediately written but 2134 /* We can't dirty inode here. It would be immediately written but
2021 * appropriate stat item isn't inserted yet... */ 2135 * appropriate stat item isn't inserted yet... */
2022 if (DQUOT_ALLOC_SPACE_NODIRTY(inode, quota_bytes)) { 2136 if (DQUOT_ALLOC_SPACE_NODIRTY(inode, quota_bytes)) {
2023 pathrelse(p_s_path); 2137 pathrelse(p_s_path);
2024 return -EDQUOT; 2138 return -EDQUOT;
2139 }
2025 } 2140 }
2026 } 2141 init_tb_struct(th, &s_ins_balance, th->t_super, p_s_path,
2027 init_tb_struct(th, &s_ins_balance, th->t_super, p_s_path, IH_SIZE + ih_item_len(p_s_ih)); 2142 IH_SIZE + ih_item_len(p_s_ih));
2028#ifdef DISPLACE_NEW_PACKING_LOCALITIES 2143#ifdef DISPLACE_NEW_PACKING_LOCALITIES
2029 s_ins_balance.key = key->on_disk_key; 2144 s_ins_balance.key = key->on_disk_key;
2030#endif 2145#endif
2031 /* DQUOT_* can schedule, must check to be sure calling fix_nodes is safe */ 2146 /* DQUOT_* can schedule, must check to be sure calling fix_nodes is safe */
2032 if (inode && fs_changed(fs_gen, inode->i_sb)) { 2147 if (inode && fs_changed(fs_gen, inode->i_sb)) {
2033 goto search_again; 2148 goto search_again;
2034 }
2035
2036 while ( (retval = fix_nodes(M_INSERT, &s_ins_balance, p_s_ih, p_c_body)) == REPEAT_SEARCH) {
2037search_again:
2038 /* file system changed while we were in the fix_nodes */
2039 PROC_INFO_INC( th -> t_super, insert_item_restarted );
2040 retval = search_item (th->t_super, key, p_s_path);
2041 if (retval == IO_ERROR) {
2042 retval = -EIO;
2043 goto error_out ;
2044 } 2149 }
2045 if (retval == ITEM_FOUND) { 2150
2046 reiserfs_warning (th->t_super, "PAP-5760: reiserfs_insert_item: " 2151 while ((retval =
2047 "key %K already exists in the tree", key); 2152 fix_nodes(M_INSERT, &s_ins_balance, p_s_ih,
2048 retval = -EEXIST ; 2153 p_c_body)) == REPEAT_SEARCH) {
2049 goto error_out; 2154 search_again:
2155 /* file system changed while we were in the fix_nodes */
2156 PROC_INFO_INC(th->t_super, insert_item_restarted);
2157 retval = search_item(th->t_super, key, p_s_path);
2158 if (retval == IO_ERROR) {
2159 retval = -EIO;
2160 goto error_out;
2161 }
2162 if (retval == ITEM_FOUND) {
2163 reiserfs_warning(th->t_super,
2164 "PAP-5760: reiserfs_insert_item: "
2165 "key %K already exists in the tree",
2166 key);
2167 retval = -EEXIST;
2168 goto error_out;
2169 }
2050 } 2170 }
2051 }
2052 2171
2053 /* make balancing after all resources will be collected at a time */ 2172 /* make balancing after all resources will be collected at a time */
2054 if ( retval == CARRY_ON ) { 2173 if (retval == CARRY_ON) {
2055 do_balance (&s_ins_balance, p_s_ih, p_c_body, M_INSERT); 2174 do_balance(&s_ins_balance, p_s_ih, p_c_body, M_INSERT);
2056 return 0; 2175 return 0;
2057 } 2176 }
2058 2177
2059 retval = (retval == NO_DISK_SPACE) ? -ENOSPC : -EIO; 2178 retval = (retval == NO_DISK_SPACE) ? -ENOSPC : -EIO;
2060error_out: 2179 error_out:
2061 /* also releases the path */ 2180 /* also releases the path */
2062 unfix_nodes(&s_ins_balance); 2181 unfix_nodes(&s_ins_balance);
2063#ifdef REISERQUOTA_DEBUG 2182#ifdef REISERQUOTA_DEBUG
2064 reiserfs_debug (th->t_super, REISERFS_DEBUG_CODE, "reiserquota insert_item(): freeing %u id=%u type=%c", quota_bytes, inode->i_uid, head2type(p_s_ih)); 2183 reiserfs_debug(th->t_super, REISERFS_DEBUG_CODE,
2184 "reiserquota insert_item(): freeing %u id=%u type=%c",
2185 quota_bytes, inode->i_uid, head2type(p_s_ih));
2065#endif 2186#endif
2066 if (inode) 2187 if (inode)
2067 DQUOT_FREE_SPACE_NODIRTY(inode, quota_bytes) ; 2188 DQUOT_FREE_SPACE_NODIRTY(inode, quota_bytes);
2068 return retval; 2189 return retval;
2069} 2190}
2070
2071
2072
2073
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index 4b80ab95d33..6951c35755b 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -35,83 +35,81 @@ static const char reiserfs_3_5_magic_string[] = REISERFS_SUPER_MAGIC_STRING;
35static const char reiserfs_3_6_magic_string[] = REISER2FS_SUPER_MAGIC_STRING; 35static const char reiserfs_3_6_magic_string[] = REISER2FS_SUPER_MAGIC_STRING;
36static const char reiserfs_jr_magic_string[] = REISER2FS_JR_SUPER_MAGIC_STRING; 36static const char reiserfs_jr_magic_string[] = REISER2FS_JR_SUPER_MAGIC_STRING;
37 37
38int is_reiserfs_3_5 (struct reiserfs_super_block * rs) 38int is_reiserfs_3_5(struct reiserfs_super_block *rs)
39{ 39{
40 return !strncmp (rs->s_v1.s_magic, reiserfs_3_5_magic_string, 40 return !strncmp(rs->s_v1.s_magic, reiserfs_3_5_magic_string,
41 strlen (reiserfs_3_5_magic_string)); 41 strlen(reiserfs_3_5_magic_string));
42} 42}
43 43
44 44int is_reiserfs_3_6(struct reiserfs_super_block *rs)
45int is_reiserfs_3_6 (struct reiserfs_super_block * rs)
46{ 45{
47 return !strncmp (rs->s_v1.s_magic, reiserfs_3_6_magic_string, 46 return !strncmp(rs->s_v1.s_magic, reiserfs_3_6_magic_string,
48 strlen (reiserfs_3_6_magic_string)); 47 strlen(reiserfs_3_6_magic_string));
49} 48}
50 49
51 50int is_reiserfs_jr(struct reiserfs_super_block *rs)
52int is_reiserfs_jr (struct reiserfs_super_block * rs)
53{ 51{
54 return !strncmp (rs->s_v1.s_magic, reiserfs_jr_magic_string, 52 return !strncmp(rs->s_v1.s_magic, reiserfs_jr_magic_string,
55 strlen (reiserfs_jr_magic_string)); 53 strlen(reiserfs_jr_magic_string));
56} 54}
57 55
58 56static int is_any_reiserfs_magic_string(struct reiserfs_super_block *rs)
59static int is_any_reiserfs_magic_string (struct reiserfs_super_block * rs)
60{ 57{
61 return (is_reiserfs_3_5 (rs) || is_reiserfs_3_6 (rs) || 58 return (is_reiserfs_3_5(rs) || is_reiserfs_3_6(rs) ||
62 is_reiserfs_jr (rs)); 59 is_reiserfs_jr(rs));
63} 60}
64 61
65static int reiserfs_remount (struct super_block * s, int * flags, char * data); 62static int reiserfs_remount(struct super_block *s, int *flags, char *data);
66static int reiserfs_statfs (struct super_block * s, struct kstatfs * buf); 63static int reiserfs_statfs(struct super_block *s, struct kstatfs *buf);
67 64
68static int reiserfs_sync_fs (struct super_block * s, int wait) 65static int reiserfs_sync_fs(struct super_block *s, int wait)
69{ 66{
70 if (!(s->s_flags & MS_RDONLY)) { 67 if (!(s->s_flags & MS_RDONLY)) {
71 struct reiserfs_transaction_handle th; 68 struct reiserfs_transaction_handle th;
72 reiserfs_write_lock(s); 69 reiserfs_write_lock(s);
73 if (!journal_begin(&th, s, 1)) 70 if (!journal_begin(&th, s, 1))
74 if (!journal_end_sync(&th, s, 1)) 71 if (!journal_end_sync(&th, s, 1))
75 reiserfs_flush_old_commits(s); 72 reiserfs_flush_old_commits(s);
76 s->s_dirt = 0; /* Even if it's not true. 73 s->s_dirt = 0; /* Even if it's not true.
77 * We'll loop forever in sync_supers otherwise */ 74 * We'll loop forever in sync_supers otherwise */
78 reiserfs_write_unlock(s); 75 reiserfs_write_unlock(s);
79 } else { 76 } else {
80 s->s_dirt = 0; 77 s->s_dirt = 0;
81 } 78 }
82 return 0; 79 return 0;
83} 80}
84 81
85static void reiserfs_write_super(struct super_block *s) 82static void reiserfs_write_super(struct super_block *s)
86{ 83{
87 reiserfs_sync_fs(s, 1); 84 reiserfs_sync_fs(s, 1);
88} 85}
89 86
90static void reiserfs_write_super_lockfs (struct super_block * s) 87static void reiserfs_write_super_lockfs(struct super_block *s)
91{ 88{
92 struct reiserfs_transaction_handle th ; 89 struct reiserfs_transaction_handle th;
93 reiserfs_write_lock(s); 90 reiserfs_write_lock(s);
94 if (!(s->s_flags & MS_RDONLY)) { 91 if (!(s->s_flags & MS_RDONLY)) {
95 int err = journal_begin(&th, s, 1) ; 92 int err = journal_begin(&th, s, 1);
96 if (err) { 93 if (err) {
97 reiserfs_block_writes(&th) ; 94 reiserfs_block_writes(&th);
98 } else { 95 } else {
99 reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s), 1); 96 reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s),
100 journal_mark_dirty(&th, s, SB_BUFFER_WITH_SB (s)); 97 1);
101 reiserfs_block_writes(&th) ; 98 journal_mark_dirty(&th, s, SB_BUFFER_WITH_SB(s));
102 journal_end_sync(&th, s, 1) ; 99 reiserfs_block_writes(&th);
103 } 100 journal_end_sync(&th, s, 1);
104 } 101 }
105 s->s_dirt = 0; 102 }
106 reiserfs_write_unlock(s); 103 s->s_dirt = 0;
104 reiserfs_write_unlock(s);
107} 105}
108 106
109static void reiserfs_unlockfs(struct super_block *s) { 107static void reiserfs_unlockfs(struct super_block *s)
110 reiserfs_allow_writes(s) ; 108{
109 reiserfs_allow_writes(s);
111} 110}
112 111
113extern const struct in_core_key MAX_IN_CORE_KEY; 112extern const struct in_core_key MAX_IN_CORE_KEY;
114
115 113
116/* this is used to delete "save link" when there are no items of a 114/* this is used to delete "save link" when there are no items of a
117 file it points to. It can either happen if unlink is completed but 115 file it points to. It can either happen if unlink is completed but
@@ -120,364 +118,387 @@ extern const struct in_core_key MAX_IN_CORE_KEY;
120 protecting unlink is bigger that a key lf "save link" which 118 protecting unlink is bigger that a key lf "save link" which
121 protects truncate), so there left no items to make truncate 119 protects truncate), so there left no items to make truncate
122 completion on */ 120 completion on */
123static int remove_save_link_only (struct super_block * s, struct reiserfs_key * key, int oid_free) 121static int remove_save_link_only(struct super_block *s,
122 struct reiserfs_key *key, int oid_free)
124{ 123{
125 struct reiserfs_transaction_handle th; 124 struct reiserfs_transaction_handle th;
126 int err; 125 int err;
127 126
128 /* we are going to do one balancing */ 127 /* we are going to do one balancing */
129 err = journal_begin (&th, s, JOURNAL_PER_BALANCE_CNT); 128 err = journal_begin(&th, s, JOURNAL_PER_BALANCE_CNT);
130 if (err) 129 if (err)
131 return err; 130 return err;
132 131
133 reiserfs_delete_solid_item (&th, NULL, key); 132 reiserfs_delete_solid_item(&th, NULL, key);
134 if (oid_free) 133 if (oid_free)
135 /* removals are protected by direct items */ 134 /* removals are protected by direct items */
136 reiserfs_release_objectid (&th, le32_to_cpu (key->k_objectid)); 135 reiserfs_release_objectid(&th, le32_to_cpu(key->k_objectid));
137 136
138 return journal_end (&th, s, JOURNAL_PER_BALANCE_CNT); 137 return journal_end(&th, s, JOURNAL_PER_BALANCE_CNT);
139} 138}
140 139
141#ifdef CONFIG_QUOTA 140#ifdef CONFIG_QUOTA
142static int reiserfs_quota_on_mount(struct super_block *, int); 141static int reiserfs_quota_on_mount(struct super_block *, int);
143#endif 142#endif
144 143
145/* look for uncompleted unlinks and truncates and complete them */ 144/* look for uncompleted unlinks and truncates and complete them */
146static int finish_unfinished (struct super_block * s) 145static int finish_unfinished(struct super_block *s)
147{ 146{
148 INITIALIZE_PATH (path); 147 INITIALIZE_PATH(path);
149 struct cpu_key max_cpu_key, obj_key; 148 struct cpu_key max_cpu_key, obj_key;
150 struct reiserfs_key save_link_key; 149 struct reiserfs_key save_link_key;
151 int retval = 0; 150 int retval = 0;
152 struct item_head * ih; 151 struct item_head *ih;
153 struct buffer_head * bh; 152 struct buffer_head *bh;
154 int item_pos; 153 int item_pos;
155 char * item; 154 char *item;
156 int done; 155 int done;
157 struct inode * inode; 156 struct inode *inode;
158 int truncate; 157 int truncate;
159#ifdef CONFIG_QUOTA 158#ifdef CONFIG_QUOTA
160 int i; 159 int i;
161 int ms_active_set; 160 int ms_active_set;
162#endif 161#endif
163 162
164 163 /* compose key to look for "save" links */
165 /* compose key to look for "save" links */ 164 max_cpu_key.version = KEY_FORMAT_3_5;
166 max_cpu_key.version = KEY_FORMAT_3_5; 165 max_cpu_key.on_disk_key.k_dir_id = ~0U;
167 max_cpu_key.on_disk_key.k_dir_id = ~0U; 166 max_cpu_key.on_disk_key.k_objectid = ~0U;
168 max_cpu_key.on_disk_key.k_objectid = ~0U; 167 set_cpu_key_k_offset(&max_cpu_key, ~0U);
169 set_cpu_key_k_offset (&max_cpu_key, ~0U); 168 max_cpu_key.key_length = 3;
170 max_cpu_key.key_length = 3;
171 169
172#ifdef CONFIG_QUOTA 170#ifdef CONFIG_QUOTA
173 /* Needed for iput() to work correctly and not trash data */ 171 /* Needed for iput() to work correctly and not trash data */
174 if (s->s_flags & MS_ACTIVE) { 172 if (s->s_flags & MS_ACTIVE) {
175 ms_active_set = 0; 173 ms_active_set = 0;
176 } else { 174 } else {
177 ms_active_set = 1; 175 ms_active_set = 1;
178 s->s_flags |= MS_ACTIVE; 176 s->s_flags |= MS_ACTIVE;
179 } 177 }
180 /* Turn on quotas so that they are updated correctly */ 178 /* Turn on quotas so that they are updated correctly */
181 for (i = 0; i < MAXQUOTAS; i++) { 179 for (i = 0; i < MAXQUOTAS; i++) {
182 if (REISERFS_SB(s)->s_qf_names[i]) { 180 if (REISERFS_SB(s)->s_qf_names[i]) {
183 int ret = reiserfs_quota_on_mount(s, i); 181 int ret = reiserfs_quota_on_mount(s, i);
184 if (ret < 0) 182 if (ret < 0)
185 reiserfs_warning(s, "reiserfs: cannot turn on journalled quota: error %d", ret); 183 reiserfs_warning(s,
186 } 184 "reiserfs: cannot turn on journalled quota: error %d",
187 } 185 ret);
186 }
187 }
188#endif 188#endif
189 189
190 done = 0; 190 done = 0;
191 REISERFS_SB(s)->s_is_unlinked_ok = 1; 191 REISERFS_SB(s)->s_is_unlinked_ok = 1;
192 while (!retval) { 192 while (!retval) {
193 retval = search_item (s, &max_cpu_key, &path); 193 retval = search_item(s, &max_cpu_key, &path);
194 if (retval != ITEM_NOT_FOUND) { 194 if (retval != ITEM_NOT_FOUND) {
195 reiserfs_warning (s, "vs-2140: finish_unfinished: search_by_key returned %d", 195 reiserfs_warning(s,
196 retval); 196 "vs-2140: finish_unfinished: search_by_key returned %d",
197 break; 197 retval);
198 } 198 break;
199 199 }
200 bh = get_last_bh (&path); 200
201 item_pos = get_item_pos (&path); 201 bh = get_last_bh(&path);
202 if (item_pos != B_NR_ITEMS (bh)) { 202 item_pos = get_item_pos(&path);
203 reiserfs_warning (s, "vs-2060: finish_unfinished: wrong position found"); 203 if (item_pos != B_NR_ITEMS(bh)) {
204 break; 204 reiserfs_warning(s,
205 } 205 "vs-2060: finish_unfinished: wrong position found");
206 item_pos --; 206 break;
207 ih = B_N_PITEM_HEAD (bh, item_pos); 207 }
208 208 item_pos--;
209 if (le32_to_cpu (ih->ih_key.k_dir_id) != MAX_KEY_OBJECTID) 209 ih = B_N_PITEM_HEAD(bh, item_pos);
210 /* there are no "save" links anymore */ 210
211 break; 211 if (le32_to_cpu(ih->ih_key.k_dir_id) != MAX_KEY_OBJECTID)
212 212 /* there are no "save" links anymore */
213 save_link_key = ih->ih_key; 213 break;
214 if (is_indirect_le_ih (ih)) 214
215 truncate = 1; 215 save_link_key = ih->ih_key;
216 else 216 if (is_indirect_le_ih(ih))
217 truncate = 0; 217 truncate = 1;
218 218 else
219 /* reiserfs_iget needs k_dirid and k_objectid only */ 219 truncate = 0;
220 item = B_I_PITEM (bh, ih); 220
221 obj_key.on_disk_key.k_dir_id = le32_to_cpu (*(__le32 *)item); 221 /* reiserfs_iget needs k_dirid and k_objectid only */
222 obj_key.on_disk_key.k_objectid = le32_to_cpu (ih->ih_key.k_objectid); 222 item = B_I_PITEM(bh, ih);
223 obj_key.on_disk_key.k_offset = 0; 223 obj_key.on_disk_key.k_dir_id = le32_to_cpu(*(__le32 *) item);
224 obj_key.on_disk_key.k_type = 0; 224 obj_key.on_disk_key.k_objectid =
225 225 le32_to_cpu(ih->ih_key.k_objectid);
226 pathrelse (&path); 226 obj_key.on_disk_key.k_offset = 0;
227 227 obj_key.on_disk_key.k_type = 0;
228 inode = reiserfs_iget (s, &obj_key); 228
229 if (!inode) { 229 pathrelse(&path);
230 /* the unlink almost completed, it just did not manage to remove 230
231 "save" link and release objectid */ 231 inode = reiserfs_iget(s, &obj_key);
232 reiserfs_warning (s, "vs-2180: finish_unfinished: iget failed for %K", 232 if (!inode) {
233 &obj_key); 233 /* the unlink almost completed, it just did not manage to remove
234 retval = remove_save_link_only (s, &save_link_key, 1); 234 "save" link and release objectid */
235 continue; 235 reiserfs_warning(s,
236 } 236 "vs-2180: finish_unfinished: iget failed for %K",
237 237 &obj_key);
238 if (!truncate && inode->i_nlink) { 238 retval = remove_save_link_only(s, &save_link_key, 1);
239 /* file is not unlinked */ 239 continue;
240 reiserfs_warning (s, "vs-2185: finish_unfinished: file %K is not unlinked", 240 }
241 &obj_key); 241
242 retval = remove_save_link_only (s, &save_link_key, 0); 242 if (!truncate && inode->i_nlink) {
243 continue; 243 /* file is not unlinked */
244 } 244 reiserfs_warning(s,
245 DQUOT_INIT(inode); 245 "vs-2185: finish_unfinished: file %K is not unlinked",
246 246 &obj_key);
247 if (truncate && S_ISDIR (inode->i_mode) ) { 247 retval = remove_save_link_only(s, &save_link_key, 0);
248 /* We got a truncate request for a dir which is impossible. 248 continue;
249 The only imaginable way is to execute unfinished truncate request 249 }
250 then boot into old kernel, remove the file and create dir with 250 DQUOT_INIT(inode);
251 the same key. */ 251
252 reiserfs_warning(s, "green-2101: impossible truncate on a directory %k. Please report", INODE_PKEY (inode)); 252 if (truncate && S_ISDIR(inode->i_mode)) {
253 retval = remove_save_link_only (s, &save_link_key, 0); 253 /* We got a truncate request for a dir which is impossible.
254 truncate = 0; 254 The only imaginable way is to execute unfinished truncate request
255 iput (inode); 255 then boot into old kernel, remove the file and create dir with
256 continue; 256 the same key. */
257 } 257 reiserfs_warning(s,
258 258 "green-2101: impossible truncate on a directory %k. Please report",
259 if (truncate) { 259 INODE_PKEY(inode));
260 REISERFS_I(inode) -> i_flags |= i_link_saved_truncate_mask; 260 retval = remove_save_link_only(s, &save_link_key, 0);
261 /* not completed truncate found. New size was committed together 261 truncate = 0;
262 with "save" link */ 262 iput(inode);
263 reiserfs_info (s, "Truncating %k to %Ld ..", 263 continue;
264 INODE_PKEY (inode), inode->i_size); 264 }
265 reiserfs_truncate_file (inode, 0/*don't update modification time*/); 265
266 retval = remove_save_link (inode, truncate); 266 if (truncate) {
267 } else { 267 REISERFS_I(inode)->i_flags |=
268 REISERFS_I(inode) -> i_flags |= i_link_saved_unlink_mask; 268 i_link_saved_truncate_mask;
269 /* not completed unlink (rmdir) found */ 269 /* not completed truncate found. New size was committed together
270 reiserfs_info (s, "Removing %k..", INODE_PKEY (inode)); 270 with "save" link */
271 /* removal gets completed in iput */ 271 reiserfs_info(s, "Truncating %k to %Ld ..",
272 retval = 0; 272 INODE_PKEY(inode), inode->i_size);
273 } 273 reiserfs_truncate_file(inode,
274 274 0
275 iput (inode); 275 /*don't update modification time */
276 printk ("done\n"); 276 );
277 done ++; 277 retval = remove_save_link(inode, truncate);
278 } 278 } else {
279 REISERFS_SB(s)->s_is_unlinked_ok = 0; 279 REISERFS_I(inode)->i_flags |= i_link_saved_unlink_mask;
280 280 /* not completed unlink (rmdir) found */
281 reiserfs_info(s, "Removing %k..", INODE_PKEY(inode));
282 /* removal gets completed in iput */
283 retval = 0;
284 }
285
286 iput(inode);
287 printk("done\n");
288 done++;
289 }
290 REISERFS_SB(s)->s_is_unlinked_ok = 0;
291
281#ifdef CONFIG_QUOTA 292#ifdef CONFIG_QUOTA
282 /* Turn quotas off */ 293 /* Turn quotas off */
283 for (i = 0; i < MAXQUOTAS; i++) { 294 for (i = 0; i < MAXQUOTAS; i++) {
284 if (sb_dqopt(s)->files[i]) 295 if (sb_dqopt(s)->files[i])
285 vfs_quota_off_mount(s, i); 296 vfs_quota_off_mount(s, i);
286 } 297 }
287 if (ms_active_set) 298 if (ms_active_set)
288 /* Restore the flag back */ 299 /* Restore the flag back */
289 s->s_flags &= ~MS_ACTIVE; 300 s->s_flags &= ~MS_ACTIVE;
290#endif 301#endif
291 pathrelse (&path); 302 pathrelse(&path);
292 if (done) 303 if (done)
293 reiserfs_info (s, "There were %d uncompleted unlinks/truncates. " 304 reiserfs_info(s, "There were %d uncompleted unlinks/truncates. "
294 "Completed\n", done); 305 "Completed\n", done);
295 return retval; 306 return retval;
296} 307}
297 308
298/* to protect file being unlinked from getting lost we "safe" link files 309/* to protect file being unlinked from getting lost we "safe" link files
299 being unlinked. This link will be deleted in the same transaction with last 310 being unlinked. This link will be deleted in the same transaction with last
300 item of file. mounting the filesytem we scan all these links and remove 311 item of file. mounting the filesytem we scan all these links and remove
301 files which almost got lost */ 312 files which almost got lost */
302void add_save_link (struct reiserfs_transaction_handle * th, 313void add_save_link(struct reiserfs_transaction_handle *th,
303 struct inode * inode, int truncate) 314 struct inode *inode, int truncate)
304{ 315{
305 INITIALIZE_PATH (path); 316 INITIALIZE_PATH(path);
306 int retval; 317 int retval;
307 struct cpu_key key; 318 struct cpu_key key;
308 struct item_head ih; 319 struct item_head ih;
309 __le32 link; 320 __le32 link;
310 321
311 BUG_ON (!th->t_trans_id); 322 BUG_ON(!th->t_trans_id);
312 323
313 /* file can only get one "save link" of each kind */ 324 /* file can only get one "save link" of each kind */
314 RFALSE( truncate && 325 RFALSE(truncate &&
315 ( REISERFS_I(inode) -> i_flags & i_link_saved_truncate_mask ), 326 (REISERFS_I(inode)->i_flags & i_link_saved_truncate_mask),
316 "saved link already exists for truncated inode %lx", 327 "saved link already exists for truncated inode %lx",
317 ( long ) inode -> i_ino ); 328 (long)inode->i_ino);
318 RFALSE( !truncate && 329 RFALSE(!truncate &&
319 ( REISERFS_I(inode) -> i_flags & i_link_saved_unlink_mask ), 330 (REISERFS_I(inode)->i_flags & i_link_saved_unlink_mask),
320 "saved link already exists for unlinked inode %lx", 331 "saved link already exists for unlinked inode %lx",
321 ( long ) inode -> i_ino ); 332 (long)inode->i_ino);
322 333
323 /* setup key of "save" link */ 334 /* setup key of "save" link */
324 key.version = KEY_FORMAT_3_5; 335 key.version = KEY_FORMAT_3_5;
325 key.on_disk_key.k_dir_id = MAX_KEY_OBJECTID; 336 key.on_disk_key.k_dir_id = MAX_KEY_OBJECTID;
326 key.on_disk_key.k_objectid = inode->i_ino; 337 key.on_disk_key.k_objectid = inode->i_ino;
327 if (!truncate) { 338 if (!truncate) {
328 /* unlink, rmdir, rename */ 339 /* unlink, rmdir, rename */
329 set_cpu_key_k_offset (&key, 1 + inode->i_sb->s_blocksize); 340 set_cpu_key_k_offset(&key, 1 + inode->i_sb->s_blocksize);
330 set_cpu_key_k_type (&key, TYPE_DIRECT); 341 set_cpu_key_k_type(&key, TYPE_DIRECT);
331 342
332 /* item head of "safe" link */ 343 /* item head of "safe" link */
333 make_le_item_head (&ih, &key, key.version, 1 + inode->i_sb->s_blocksize, TYPE_DIRECT, 344 make_le_item_head(&ih, &key, key.version,
334 4/*length*/, 0xffff/*free space*/); 345 1 + inode->i_sb->s_blocksize, TYPE_DIRECT,
335 } else { 346 4 /*length */ , 0xffff /*free space */ );
336 /* truncate */ 347 } else {
337 if (S_ISDIR (inode->i_mode)) 348 /* truncate */
338 reiserfs_warning(inode->i_sb, "green-2102: Adding a truncate savelink for a directory %k! Please report", INODE_PKEY(inode)); 349 if (S_ISDIR(inode->i_mode))
339 set_cpu_key_k_offset (&key, 1); 350 reiserfs_warning(inode->i_sb,
340 set_cpu_key_k_type (&key, TYPE_INDIRECT); 351 "green-2102: Adding a truncate savelink for a directory %k! Please report",
341 352 INODE_PKEY(inode));
342 /* item head of "safe" link */ 353 set_cpu_key_k_offset(&key, 1);
343 make_le_item_head (&ih, &key, key.version, 1, TYPE_INDIRECT, 354 set_cpu_key_k_type(&key, TYPE_INDIRECT);
344 4/*length*/, 0/*free space*/); 355
345 } 356 /* item head of "safe" link */
346 key.key_length = 3; 357 make_le_item_head(&ih, &key, key.version, 1, TYPE_INDIRECT,
347 358 4 /*length */ , 0 /*free space */ );
348 /* look for its place in the tree */ 359 }
349 retval = search_item (inode->i_sb, &key, &path); 360 key.key_length = 3;
350 if (retval != ITEM_NOT_FOUND) { 361
351 if ( retval != -ENOSPC ) 362 /* look for its place in the tree */
352 reiserfs_warning (inode->i_sb, "vs-2100: add_save_link:" 363 retval = search_item(inode->i_sb, &key, &path);
353 "search_by_key (%K) returned %d", &key, retval); 364 if (retval != ITEM_NOT_FOUND) {
354 pathrelse (&path); 365 if (retval != -ENOSPC)
355 return; 366 reiserfs_warning(inode->i_sb, "vs-2100: add_save_link:"
356 } 367 "search_by_key (%K) returned %d", &key,
357 368 retval);
358 /* body of "save" link */ 369 pathrelse(&path);
359 link = INODE_PKEY (inode)->k_dir_id; 370 return;
360 371 }
361 /* put "save" link inot tree, don't charge quota to anyone */
362 retval = reiserfs_insert_item (th, &path, &key, &ih, NULL, (char *)&link);
363 if (retval) {
364 if (retval != -ENOSPC)
365 reiserfs_warning (inode->i_sb, "vs-2120: add_save_link: insert_item returned %d",
366 retval);
367 } else {
368 if( truncate )
369 REISERFS_I(inode) -> i_flags |= i_link_saved_truncate_mask;
370 else
371 REISERFS_I(inode) -> i_flags |= i_link_saved_unlink_mask;
372 }
373}
374 372
373 /* body of "save" link */
374 link = INODE_PKEY(inode)->k_dir_id;
375
376 /* put "save" link inot tree, don't charge quota to anyone */
377 retval =
378 reiserfs_insert_item(th, &path, &key, &ih, NULL, (char *)&link);
379 if (retval) {
380 if (retval != -ENOSPC)
381 reiserfs_warning(inode->i_sb,
382 "vs-2120: add_save_link: insert_item returned %d",
383 retval);
384 } else {
385 if (truncate)
386 REISERFS_I(inode)->i_flags |=
387 i_link_saved_truncate_mask;
388 else
389 REISERFS_I(inode)->i_flags |= i_link_saved_unlink_mask;
390 }
391}
375 392
376/* this opens transaction unlike add_save_link */ 393/* this opens transaction unlike add_save_link */
377int remove_save_link (struct inode * inode, int truncate) 394int remove_save_link(struct inode *inode, int truncate)
378{ 395{
379 struct reiserfs_transaction_handle th; 396 struct reiserfs_transaction_handle th;
380 struct reiserfs_key key; 397 struct reiserfs_key key;
381 int err; 398 int err;
382 399
383 /* we are going to do one balancing only */ 400 /* we are going to do one balancing only */
384 err = journal_begin (&th, inode->i_sb, JOURNAL_PER_BALANCE_CNT); 401 err = journal_begin(&th, inode->i_sb, JOURNAL_PER_BALANCE_CNT);
385 if (err) 402 if (err)
386 return err; 403 return err;
387 404
388 /* setup key of "save" link */ 405 /* setup key of "save" link */
389 key.k_dir_id = cpu_to_le32 (MAX_KEY_OBJECTID); 406 key.k_dir_id = cpu_to_le32(MAX_KEY_OBJECTID);
390 key.k_objectid = INODE_PKEY (inode)->k_objectid; 407 key.k_objectid = INODE_PKEY(inode)->k_objectid;
391 if (!truncate) { 408 if (!truncate) {
392 /* unlink, rmdir, rename */ 409 /* unlink, rmdir, rename */
393 set_le_key_k_offset (KEY_FORMAT_3_5, &key, 410 set_le_key_k_offset(KEY_FORMAT_3_5, &key,
394 1 + inode->i_sb->s_blocksize); 411 1 + inode->i_sb->s_blocksize);
395 set_le_key_k_type (KEY_FORMAT_3_5, &key, TYPE_DIRECT); 412 set_le_key_k_type(KEY_FORMAT_3_5, &key, TYPE_DIRECT);
396 } else { 413 } else {
397 /* truncate */ 414 /* truncate */
398 set_le_key_k_offset (KEY_FORMAT_3_5, &key, 1); 415 set_le_key_k_offset(KEY_FORMAT_3_5, &key, 1);
399 set_le_key_k_type (KEY_FORMAT_3_5, &key, TYPE_INDIRECT); 416 set_le_key_k_type(KEY_FORMAT_3_5, &key, TYPE_INDIRECT);
400 } 417 }
401
402 if( ( truncate &&
403 ( REISERFS_I(inode) -> i_flags & i_link_saved_truncate_mask ) ) ||
404 ( !truncate &&
405 ( REISERFS_I(inode) -> i_flags & i_link_saved_unlink_mask ) ) )
406 /* don't take quota bytes from anywhere */
407 reiserfs_delete_solid_item (&th, NULL, &key);
408 if (!truncate) {
409 reiserfs_release_objectid (&th, inode->i_ino);
410 REISERFS_I(inode) -> i_flags &= ~i_link_saved_unlink_mask;
411 } else
412 REISERFS_I(inode) -> i_flags &= ~i_link_saved_truncate_mask;
413
414 return journal_end (&th, inode->i_sb, JOURNAL_PER_BALANCE_CNT);
415}
416 418
419 if ((truncate &&
420 (REISERFS_I(inode)->i_flags & i_link_saved_truncate_mask)) ||
421 (!truncate &&
422 (REISERFS_I(inode)->i_flags & i_link_saved_unlink_mask)))
423 /* don't take quota bytes from anywhere */
424 reiserfs_delete_solid_item(&th, NULL, &key);
425 if (!truncate) {
426 reiserfs_release_objectid(&th, inode->i_ino);
427 REISERFS_I(inode)->i_flags &= ~i_link_saved_unlink_mask;
428 } else
429 REISERFS_I(inode)->i_flags &= ~i_link_saved_truncate_mask;
430
431 return journal_end(&th, inode->i_sb, JOURNAL_PER_BALANCE_CNT);
432}
417 433
418static void reiserfs_put_super (struct super_block * s) 434static void reiserfs_put_super(struct super_block *s)
419{ 435{
420 int i; 436 int i;
421 struct reiserfs_transaction_handle th ; 437 struct reiserfs_transaction_handle th;
422 th.t_trans_id = 0; 438 th.t_trans_id = 0;
423 439
424 if (REISERFS_SB(s)->xattr_root) { 440 if (REISERFS_SB(s)->xattr_root) {
425 d_invalidate (REISERFS_SB(s)->xattr_root); 441 d_invalidate(REISERFS_SB(s)->xattr_root);
426 dput (REISERFS_SB(s)->xattr_root); 442 dput(REISERFS_SB(s)->xattr_root);
427 } 443 }
428 444
429 if (REISERFS_SB(s)->priv_root) { 445 if (REISERFS_SB(s)->priv_root) {
430 d_invalidate (REISERFS_SB(s)->priv_root); 446 d_invalidate(REISERFS_SB(s)->priv_root);
431 dput (REISERFS_SB(s)->priv_root); 447 dput(REISERFS_SB(s)->priv_root);
432 } 448 }
433 449
434 /* change file system state to current state if it was mounted with read-write permissions */ 450 /* change file system state to current state if it was mounted with read-write permissions */
435 if (!(s->s_flags & MS_RDONLY)) { 451 if (!(s->s_flags & MS_RDONLY)) {
436 if (!journal_begin(&th, s, 10)) { 452 if (!journal_begin(&th, s, 10)) {
437 reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s), 1) ; 453 reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s),
438 set_sb_umount_state( SB_DISK_SUPER_BLOCK(s), REISERFS_SB(s)->s_mount_state ); 454 1);
439 journal_mark_dirty(&th, s, SB_BUFFER_WITH_SB (s)); 455 set_sb_umount_state(SB_DISK_SUPER_BLOCK(s),
440 } 456 REISERFS_SB(s)->s_mount_state);
441 } 457 journal_mark_dirty(&th, s, SB_BUFFER_WITH_SB(s));
442 458 }
443 /* note, journal_release checks for readonly mount, and can decide not 459 }
444 ** to do a journal_end 460
445 */ 461 /* note, journal_release checks for readonly mount, and can decide not
446 journal_release(&th, s) ; 462 ** to do a journal_end
447 463 */
448 for (i = 0; i < SB_BMAP_NR (s); i ++) 464 journal_release(&th, s);
449 brelse (SB_AP_BITMAP (s)[i].bh); 465
450 466 for (i = 0; i < SB_BMAP_NR(s); i++)
451 vfree (SB_AP_BITMAP (s)); 467 brelse(SB_AP_BITMAP(s)[i].bh);
452 468
453 brelse (SB_BUFFER_WITH_SB (s)); 469 vfree(SB_AP_BITMAP(s));
454 470
455 print_statistics (s); 471 brelse(SB_BUFFER_WITH_SB(s));
456 472
457 if (REISERFS_SB(s)->s_kmallocs != 0) { 473 print_statistics(s);
458 reiserfs_warning (s, "vs-2004: reiserfs_put_super: allocated memory left %d", 474
459 REISERFS_SB(s)->s_kmallocs); 475 if (REISERFS_SB(s)->s_kmallocs != 0) {
460 } 476 reiserfs_warning(s,
461 477 "vs-2004: reiserfs_put_super: allocated memory left %d",
462 if (REISERFS_SB(s)->reserved_blocks != 0) { 478 REISERFS_SB(s)->s_kmallocs);
463 reiserfs_warning (s, "green-2005: reiserfs_put_super: reserved blocks left %d", 479 }
464 REISERFS_SB(s)->reserved_blocks); 480
465 } 481 if (REISERFS_SB(s)->reserved_blocks != 0) {
466 482 reiserfs_warning(s,
467 reiserfs_proc_info_done( s ); 483 "green-2005: reiserfs_put_super: reserved blocks left %d",
468 484 REISERFS_SB(s)->reserved_blocks);
469 kfree(s->s_fs_info); 485 }
470 s->s_fs_info = NULL; 486
471 487 reiserfs_proc_info_done(s);
472 return; 488
489 kfree(s->s_fs_info);
490 s->s_fs_info = NULL;
491
492 return;
473} 493}
474 494
475static kmem_cache_t * reiserfs_inode_cachep; 495static kmem_cache_t *reiserfs_inode_cachep;
476 496
477static struct inode *reiserfs_alloc_inode(struct super_block *sb) 497static struct inode *reiserfs_alloc_inode(struct super_block *sb)
478{ 498{
479 struct reiserfs_inode_info *ei; 499 struct reiserfs_inode_info *ei;
480 ei = (struct reiserfs_inode_info *)kmem_cache_alloc(reiserfs_inode_cachep, SLAB_KERNEL); 500 ei = (struct reiserfs_inode_info *)
501 kmem_cache_alloc(reiserfs_inode_cachep, SLAB_KERNEL);
481 if (!ei) 502 if (!ei)
482 return NULL; 503 return NULL;
483 return &ei->vfs_inode; 504 return &ei->vfs_inode;
@@ -488,25 +509,26 @@ static void reiserfs_destroy_inode(struct inode *inode)
488 kmem_cache_free(reiserfs_inode_cachep, REISERFS_I(inode)); 509 kmem_cache_free(reiserfs_inode_cachep, REISERFS_I(inode));
489} 510}
490 511
491static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags) 512static void init_once(void *foo, kmem_cache_t * cachep, unsigned long flags)
492{ 513{
493 struct reiserfs_inode_info *ei = (struct reiserfs_inode_info *) foo; 514 struct reiserfs_inode_info *ei = (struct reiserfs_inode_info *)foo;
494 515
495 if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == 516 if ((flags & (SLAB_CTOR_VERIFY | SLAB_CTOR_CONSTRUCTOR)) ==
496 SLAB_CTOR_CONSTRUCTOR) { 517 SLAB_CTOR_CONSTRUCTOR) {
497 INIT_LIST_HEAD(&ei->i_prealloc_list) ; 518 INIT_LIST_HEAD(&ei->i_prealloc_list);
498 inode_init_once(&ei->vfs_inode); 519 inode_init_once(&ei->vfs_inode);
499 ei->i_acl_access = NULL; 520 ei->i_acl_access = NULL;
500 ei->i_acl_default = NULL; 521 ei->i_acl_default = NULL;
501 } 522 }
502} 523}
503 524
504static int init_inodecache(void) 525static int init_inodecache(void)
505{ 526{
506 reiserfs_inode_cachep = kmem_cache_create("reiser_inode_cache", 527 reiserfs_inode_cachep = kmem_cache_create("reiser_inode_cache",
507 sizeof(struct reiserfs_inode_info), 528 sizeof(struct
508 0, SLAB_RECLAIM_ACCOUNT, 529 reiserfs_inode_info),
509 init_once, NULL); 530 0, SLAB_RECLAIM_ACCOUNT,
531 init_once, NULL);
510 if (reiserfs_inode_cachep == NULL) 532 if (reiserfs_inode_cachep == NULL)
511 return -ENOMEM; 533 return -ENOMEM;
512 return 0; 534 return 0;
@@ -515,72 +537,76 @@ static int init_inodecache(void)
515static void destroy_inodecache(void) 537static void destroy_inodecache(void)
516{ 538{
517 if (kmem_cache_destroy(reiserfs_inode_cachep)) 539 if (kmem_cache_destroy(reiserfs_inode_cachep))
518 reiserfs_warning (NULL, "reiserfs_inode_cache: not all structures were freed"); 540 reiserfs_warning(NULL,
541 "reiserfs_inode_cache: not all structures were freed");
519} 542}
520 543
521/* we don't mark inodes dirty, we just log them */ 544/* we don't mark inodes dirty, we just log them */
522static void reiserfs_dirty_inode (struct inode * inode) { 545static void reiserfs_dirty_inode(struct inode *inode)
523 struct reiserfs_transaction_handle th ; 546{
524 547 struct reiserfs_transaction_handle th;
525 int err = 0; 548
526 if (inode->i_sb->s_flags & MS_RDONLY) { 549 int err = 0;
527 reiserfs_warning(inode->i_sb, "clm-6006: writing inode %lu on readonly FS", 550 if (inode->i_sb->s_flags & MS_RDONLY) {
528 inode->i_ino) ; 551 reiserfs_warning(inode->i_sb,
529 return ; 552 "clm-6006: writing inode %lu on readonly FS",
530 } 553 inode->i_ino);
531 reiserfs_write_lock(inode->i_sb); 554 return;
532 555 }
533 /* this is really only used for atime updates, so they don't have 556 reiserfs_write_lock(inode->i_sb);
534 ** to be included in O_SYNC or fsync 557
535 */ 558 /* this is really only used for atime updates, so they don't have
536 err = journal_begin(&th, inode->i_sb, 1) ; 559 ** to be included in O_SYNC or fsync
537 if (err) { 560 */
538 reiserfs_write_unlock (inode->i_sb); 561 err = journal_begin(&th, inode->i_sb, 1);
539 return; 562 if (err) {
540 } 563 reiserfs_write_unlock(inode->i_sb);
541 reiserfs_update_sd (&th, inode); 564 return;
542 journal_end(&th, inode->i_sb, 1) ; 565 }
543 reiserfs_write_unlock(inode->i_sb); 566 reiserfs_update_sd(&th, inode);
567 journal_end(&th, inode->i_sb, 1);
568 reiserfs_write_unlock(inode->i_sb);
544} 569}
545 570
546static void reiserfs_clear_inode (struct inode *inode) 571static void reiserfs_clear_inode(struct inode *inode)
547{ 572{
548 struct posix_acl *acl; 573 struct posix_acl *acl;
549 574
550 acl = REISERFS_I(inode)->i_acl_access; 575 acl = REISERFS_I(inode)->i_acl_access;
551 if (acl && !IS_ERR (acl)) 576 if (acl && !IS_ERR(acl))
552 posix_acl_release (acl); 577 posix_acl_release(acl);
553 REISERFS_I(inode)->i_acl_access = NULL; 578 REISERFS_I(inode)->i_acl_access = NULL;
554 579
555 acl = REISERFS_I(inode)->i_acl_default; 580 acl = REISERFS_I(inode)->i_acl_default;
556 if (acl && !IS_ERR (acl)) 581 if (acl && !IS_ERR(acl))
557 posix_acl_release (acl); 582 posix_acl_release(acl);
558 REISERFS_I(inode)->i_acl_default = NULL; 583 REISERFS_I(inode)->i_acl_default = NULL;
559} 584}
560 585
561#ifdef CONFIG_QUOTA 586#ifdef CONFIG_QUOTA
562static ssize_t reiserfs_quota_write(struct super_block *, int, const char *, size_t, loff_t); 587static ssize_t reiserfs_quota_write(struct super_block *, int, const char *,
563static ssize_t reiserfs_quota_read(struct super_block *, int, char *, size_t, loff_t); 588 size_t, loff_t);
589static ssize_t reiserfs_quota_read(struct super_block *, int, char *, size_t,
590 loff_t);
564#endif 591#endif
565 592
566static struct super_operations reiserfs_sops = 593static struct super_operations reiserfs_sops = {
567{ 594 .alloc_inode = reiserfs_alloc_inode,
568 .alloc_inode = reiserfs_alloc_inode, 595 .destroy_inode = reiserfs_destroy_inode,
569 .destroy_inode = reiserfs_destroy_inode, 596 .write_inode = reiserfs_write_inode,
570 .write_inode = reiserfs_write_inode, 597 .dirty_inode = reiserfs_dirty_inode,
571 .dirty_inode = reiserfs_dirty_inode, 598 .delete_inode = reiserfs_delete_inode,
572 .delete_inode = reiserfs_delete_inode, 599 .clear_inode = reiserfs_clear_inode,
573 .clear_inode = reiserfs_clear_inode, 600 .put_super = reiserfs_put_super,
574 .put_super = reiserfs_put_super, 601 .write_super = reiserfs_write_super,
575 .write_super = reiserfs_write_super, 602 .sync_fs = reiserfs_sync_fs,
576 .sync_fs = reiserfs_sync_fs, 603 .write_super_lockfs = reiserfs_write_super_lockfs,
577 .write_super_lockfs = reiserfs_write_super_lockfs, 604 .unlockfs = reiserfs_unlockfs,
578 .unlockfs = reiserfs_unlockfs, 605 .statfs = reiserfs_statfs,
579 .statfs = reiserfs_statfs, 606 .remount_fs = reiserfs_remount,
580 .remount_fs = reiserfs_remount,
581#ifdef CONFIG_QUOTA 607#ifdef CONFIG_QUOTA
582 .quota_read = reiserfs_quota_read, 608 .quota_read = reiserfs_quota_read,
583 .quota_write = reiserfs_quota_write, 609 .quota_write = reiserfs_quota_write,
584#endif 610#endif
585}; 611};
586 612
@@ -596,50 +622,48 @@ static int reiserfs_mark_dquot_dirty(struct dquot *);
596static int reiserfs_write_info(struct super_block *, int); 622static int reiserfs_write_info(struct super_block *, int);
597static int reiserfs_quota_on(struct super_block *, int, int, char *); 623static int reiserfs_quota_on(struct super_block *, int, int, char *);
598 624
599static struct dquot_operations reiserfs_quota_operations = 625static struct dquot_operations reiserfs_quota_operations = {
600{ 626 .initialize = reiserfs_dquot_initialize,
601 .initialize = reiserfs_dquot_initialize, 627 .drop = reiserfs_dquot_drop,
602 .drop = reiserfs_dquot_drop, 628 .alloc_space = dquot_alloc_space,
603 .alloc_space = dquot_alloc_space, 629 .alloc_inode = dquot_alloc_inode,
604 .alloc_inode = dquot_alloc_inode, 630 .free_space = dquot_free_space,
605 .free_space = dquot_free_space, 631 .free_inode = dquot_free_inode,
606 .free_inode = dquot_free_inode, 632 .transfer = dquot_transfer,
607 .transfer = dquot_transfer, 633 .write_dquot = reiserfs_write_dquot,
608 .write_dquot = reiserfs_write_dquot, 634 .acquire_dquot = reiserfs_acquire_dquot,
609 .acquire_dquot = reiserfs_acquire_dquot, 635 .release_dquot = reiserfs_release_dquot,
610 .release_dquot = reiserfs_release_dquot, 636 .mark_dirty = reiserfs_mark_dquot_dirty,
611 .mark_dirty = reiserfs_mark_dquot_dirty, 637 .write_info = reiserfs_write_info,
612 .write_info = reiserfs_write_info,
613}; 638};
614 639
615static struct quotactl_ops reiserfs_qctl_operations = 640static struct quotactl_ops reiserfs_qctl_operations = {
616{ 641 .quota_on = reiserfs_quota_on,
617 .quota_on = reiserfs_quota_on, 642 .quota_off = vfs_quota_off,
618 .quota_off = vfs_quota_off, 643 .quota_sync = vfs_quota_sync,
619 .quota_sync = vfs_quota_sync, 644 .get_info = vfs_get_dqinfo,
620 .get_info = vfs_get_dqinfo, 645 .set_info = vfs_set_dqinfo,
621 .set_info = vfs_set_dqinfo, 646 .get_dqblk = vfs_get_dqblk,
622 .get_dqblk = vfs_get_dqblk, 647 .set_dqblk = vfs_set_dqblk,
623 .set_dqblk = vfs_set_dqblk,
624}; 648};
625#endif 649#endif
626 650
627static struct export_operations reiserfs_export_ops = { 651static struct export_operations reiserfs_export_ops = {
628 .encode_fh = reiserfs_encode_fh, 652 .encode_fh = reiserfs_encode_fh,
629 .decode_fh = reiserfs_decode_fh, 653 .decode_fh = reiserfs_decode_fh,
630 .get_parent = reiserfs_get_parent, 654 .get_parent = reiserfs_get_parent,
631 .get_dentry = reiserfs_get_dentry, 655 .get_dentry = reiserfs_get_dentry,
632} ; 656};
633 657
634/* this struct is used in reiserfs_getopt () for containing the value for those 658/* this struct is used in reiserfs_getopt () for containing the value for those
635 mount options that have values rather than being toggles. */ 659 mount options that have values rather than being toggles. */
636typedef struct { 660typedef struct {
637 char * value; 661 char *value;
638 int setmask; /* bitmask which is to set on mount_options bitmask when this 662 int setmask; /* bitmask which is to set on mount_options bitmask when this
639 value is found, 0 is no bits are to be changed. */ 663 value is found, 0 is no bits are to be changed. */
640 int clrmask; /* bitmask which is to clear on mount_options bitmask when this 664 int clrmask; /* bitmask which is to clear on mount_options bitmask when this
641 value is found, 0 is no bits are to be changed. This is 665 value is found, 0 is no bits are to be changed. This is
642 applied BEFORE setmask */ 666 applied BEFORE setmask */
643} arg_desc_t; 667} arg_desc_t;
644 668
645/* Set this bit in arg_required to allow empty arguments */ 669/* Set this bit in arg_required to allow empty arguments */
@@ -648,67 +672,70 @@ typedef struct {
648/* this struct is used in reiserfs_getopt() for describing the set of reiserfs 672/* this struct is used in reiserfs_getopt() for describing the set of reiserfs
649 mount options */ 673 mount options */
650typedef struct { 674typedef struct {
651 char * option_name; 675 char *option_name;
652 int arg_required; /* 0 if argument is not required, not 0 otherwise */ 676 int arg_required; /* 0 if argument is not required, not 0 otherwise */
653 const arg_desc_t * values; /* list of values accepted by an option */ 677 const arg_desc_t *values; /* list of values accepted by an option */
654 int setmask; /* bitmask which is to set on mount_options bitmask when this 678 int setmask; /* bitmask which is to set on mount_options bitmask when this
655 value is found, 0 is no bits are to be changed. */ 679 value is found, 0 is no bits are to be changed. */
656 int clrmask; /* bitmask which is to clear on mount_options bitmask when this 680 int clrmask; /* bitmask which is to clear on mount_options bitmask when this
657 value is found, 0 is no bits are to be changed. This is 681 value is found, 0 is no bits are to be changed. This is
658 applied BEFORE setmask */ 682 applied BEFORE setmask */
659} opt_desc_t; 683} opt_desc_t;
660 684
661/* possible values for -o data= */ 685/* possible values for -o data= */
662static const arg_desc_t logging_mode[] = { 686static const arg_desc_t logging_mode[] = {
663 {"ordered", 1<<REISERFS_DATA_ORDERED, (1<<REISERFS_DATA_LOG|1<<REISERFS_DATA_WRITEBACK)}, 687 {"ordered", 1 << REISERFS_DATA_ORDERED,
664 {"journal", 1<<REISERFS_DATA_LOG, (1<<REISERFS_DATA_ORDERED|1<<REISERFS_DATA_WRITEBACK)}, 688 (1 << REISERFS_DATA_LOG | 1 << REISERFS_DATA_WRITEBACK)},
665 {"writeback", 1<<REISERFS_DATA_WRITEBACK, (1<<REISERFS_DATA_ORDERED|1<<REISERFS_DATA_LOG)}, 689 {"journal", 1 << REISERFS_DATA_LOG,
666 {NULL, 0} 690 (1 << REISERFS_DATA_ORDERED | 1 << REISERFS_DATA_WRITEBACK)},
691 {"writeback", 1 << REISERFS_DATA_WRITEBACK,
692 (1 << REISERFS_DATA_ORDERED | 1 << REISERFS_DATA_LOG)},
693 {NULL, 0}
667}; 694};
668 695
669/* possible values for -o barrier= */ 696/* possible values for -o barrier= */
670static const arg_desc_t barrier_mode[] = { 697static const arg_desc_t barrier_mode[] = {
671 {"none", 1<<REISERFS_BARRIER_NONE, 1<<REISERFS_BARRIER_FLUSH}, 698 {"none", 1 << REISERFS_BARRIER_NONE, 1 << REISERFS_BARRIER_FLUSH},
672 {"flush", 1<<REISERFS_BARRIER_FLUSH, 1<<REISERFS_BARRIER_NONE}, 699 {"flush", 1 << REISERFS_BARRIER_FLUSH, 1 << REISERFS_BARRIER_NONE},
673 {NULL, 0} 700 {NULL, 0}
674}; 701};
675 702
676/* possible values for "-o block-allocator=" and bits which are to be set in 703/* possible values for "-o block-allocator=" and bits which are to be set in
677 s_mount_opt of reiserfs specific part of in-core super block */ 704 s_mount_opt of reiserfs specific part of in-core super block */
678static const arg_desc_t balloc[] = { 705static const arg_desc_t balloc[] = {
679 {"noborder", 1<<REISERFS_NO_BORDER, 0}, 706 {"noborder", 1 << REISERFS_NO_BORDER, 0},
680 {"border", 0, 1<<REISERFS_NO_BORDER}, 707 {"border", 0, 1 << REISERFS_NO_BORDER},
681 {"no_unhashed_relocation", 1<<REISERFS_NO_UNHASHED_RELOCATION, 0}, 708 {"no_unhashed_relocation", 1 << REISERFS_NO_UNHASHED_RELOCATION, 0},
682 {"hashed_relocation", 1<<REISERFS_HASHED_RELOCATION, 0}, 709 {"hashed_relocation", 1 << REISERFS_HASHED_RELOCATION, 0},
683 {"test4", 1<<REISERFS_TEST4, 0}, 710 {"test4", 1 << REISERFS_TEST4, 0},
684 {"notest4", 0, 1<<REISERFS_TEST4}, 711 {"notest4", 0, 1 << REISERFS_TEST4},
685 {NULL, 0, 0} 712 {NULL, 0, 0}
686}; 713};
687 714
688static const arg_desc_t tails[] = { 715static const arg_desc_t tails[] = {
689 {"on", 1<<REISERFS_LARGETAIL, 1<<REISERFS_SMALLTAIL}, 716 {"on", 1 << REISERFS_LARGETAIL, 1 << REISERFS_SMALLTAIL},
690 {"off", 0, (1<<REISERFS_LARGETAIL)|(1<<REISERFS_SMALLTAIL)}, 717 {"off", 0, (1 << REISERFS_LARGETAIL) | (1 << REISERFS_SMALLTAIL)},
691 {"small", 1<<REISERFS_SMALLTAIL, 1<<REISERFS_LARGETAIL}, 718 {"small", 1 << REISERFS_SMALLTAIL, 1 << REISERFS_LARGETAIL},
692 {NULL, 0, 0} 719 {NULL, 0, 0}
693}; 720};
694 721
695static const arg_desc_t error_actions[] = { 722static const arg_desc_t error_actions[] = {
696 {"panic", 1 << REISERFS_ERROR_PANIC, 723 {"panic", 1 << REISERFS_ERROR_PANIC,
697 (1 << REISERFS_ERROR_RO | 1 << REISERFS_ERROR_CONTINUE)}, 724 (1 << REISERFS_ERROR_RO | 1 << REISERFS_ERROR_CONTINUE)},
698 {"ro-remount", 1 << REISERFS_ERROR_RO, 725 {"ro-remount", 1 << REISERFS_ERROR_RO,
699 (1 << REISERFS_ERROR_PANIC | 1 << REISERFS_ERROR_CONTINUE)}, 726 (1 << REISERFS_ERROR_PANIC | 1 << REISERFS_ERROR_CONTINUE)},
700#ifdef REISERFS_JOURNAL_ERROR_ALLOWS_NO_LOG 727#ifdef REISERFS_JOURNAL_ERROR_ALLOWS_NO_LOG
701 {"continue", 1 << REISERFS_ERROR_CONTINUE, 728 {"continue", 1 << REISERFS_ERROR_CONTINUE,
702 (1 << REISERFS_ERROR_PANIC | 1 << REISERFS_ERROR_RO)}, 729 (1 << REISERFS_ERROR_PANIC | 1 << REISERFS_ERROR_RO)},
703#endif 730#endif
704 {NULL, 0, 0}, 731 {NULL, 0, 0},
705}; 732};
706 733
707int reiserfs_default_io_size = 128 * 1024; /* Default recommended I/O size is 128k. 734int reiserfs_default_io_size = 128 * 1024; /* Default recommended I/O size is 128k.
708 There might be broken applications that are 735 There might be broken applications that are
709 confused by this. Use nolargeio mount option 736 confused by this. Use nolargeio mount option
710 to get usual i/o size = PAGE_SIZE. 737 to get usual i/o size = PAGE_SIZE.
711 */ 738 */
712 739
713/* proceed only one option from a list *cur - string containing of mount options 740/* proceed only one option from a list *cur - string containing of mount options
714 opts - array of options which are accepted 741 opts - array of options which are accepted
@@ -716,486 +743,530 @@ int reiserfs_default_io_size = 128 * 1024; /* Default recommended I/O size is 12
716 in the input - pointer to the argument is stored here 743 in the input - pointer to the argument is stored here
717 bit_flags - if option requires to set a certain bit - it is set here 744 bit_flags - if option requires to set a certain bit - it is set here
718 return -1 if unknown option is found, opt->arg_required otherwise */ 745 return -1 if unknown option is found, opt->arg_required otherwise */
719static int reiserfs_getopt ( struct super_block * s, char ** cur, opt_desc_t * opts, char ** opt_arg, 746static int reiserfs_getopt(struct super_block *s, char **cur, opt_desc_t * opts,
720 unsigned long * bit_flags) 747 char **opt_arg, unsigned long *bit_flags)
721{ 748{
722 char * p; 749 char *p;
723 /* foo=bar, 750 /* foo=bar,
724 ^ ^ ^ 751 ^ ^ ^
725 | | +-- option_end 752 | | +-- option_end
726 | +-- arg_start 753 | +-- arg_start
727 +-- option_start 754 +-- option_start
728 */ 755 */
729 const opt_desc_t * opt; 756 const opt_desc_t *opt;
730 const arg_desc_t * arg; 757 const arg_desc_t *arg;
731 758
732 759 p = *cur;
733 p = *cur; 760
734 761 /* assume argument cannot contain commas */
735 /* assume argument cannot contain commas */ 762 *cur = strchr(p, ',');
736 *cur = strchr (p, ','); 763 if (*cur) {
737 if (*cur) { 764 *(*cur) = '\0';
738 *(*cur) = '\0'; 765 (*cur)++;
739 (*cur) ++; 766 }
740 } 767
741 768 if (!strncmp(p, "alloc=", 6)) {
742 if ( !strncmp (p, "alloc=", 6) ) { 769 /* Ugly special case, probably we should redo options parser so that
743 /* Ugly special case, probably we should redo options parser so that 770 it can understand several arguments for some options, also so that
744 it can understand several arguments for some options, also so that 771 it can fill several bitfields with option values. */
745 it can fill several bitfields with option values. */ 772 if (reiserfs_parse_alloc_options(s, p + 6)) {
746 if ( reiserfs_parse_alloc_options( s, p + 6) ) { 773 return -1;
747 return -1; 774 } else {
748 } else { 775 return 0;
749 return 0; 776 }
750 } 777 }
751 } 778
752 779 /* for every option in the list */
753 780 for (opt = opts; opt->option_name; opt++) {
754 /* for every option in the list */ 781 if (!strncmp(p, opt->option_name, strlen(opt->option_name))) {
755 for (opt = opts; opt->option_name; opt ++) { 782 if (bit_flags) {
756 if (!strncmp (p, opt->option_name, strlen (opt->option_name))) { 783 if (opt->clrmask ==
757 if (bit_flags) { 784 (1 << REISERFS_UNSUPPORTED_OPT))
758 if (opt->clrmask == (1 << REISERFS_UNSUPPORTED_OPT)) 785 reiserfs_warning(s, "%s not supported.",
759 reiserfs_warning (s, "%s not supported.", p); 786 p);
760 else 787 else
761 *bit_flags &= ~opt->clrmask; 788 *bit_flags &= ~opt->clrmask;
762 if (opt->setmask == (1 << REISERFS_UNSUPPORTED_OPT)) 789 if (opt->setmask ==
763 reiserfs_warning (s, "%s not supported.", p); 790 (1 << REISERFS_UNSUPPORTED_OPT))
764 else 791 reiserfs_warning(s, "%s not supported.",
765 *bit_flags |= opt->setmask; 792 p);
766 } 793 else
767 break; 794 *bit_flags |= opt->setmask;
768 } 795 }
769 } 796 break;
770 if (!opt->option_name) { 797 }
771 reiserfs_warning (s, "unknown mount option \"%s\"", p); 798 }
772 return -1; 799 if (!opt->option_name) {
773 } 800 reiserfs_warning(s, "unknown mount option \"%s\"", p);
774 801 return -1;
775 p += strlen (opt->option_name); 802 }
776 switch (*p) { 803
777 case '=': 804 p += strlen(opt->option_name);
778 if (!opt->arg_required) { 805 switch (*p) {
779 reiserfs_warning (s, "the option \"%s\" does not require an argument", 806 case '=':
780 opt->option_name); 807 if (!opt->arg_required) {
781 return -1; 808 reiserfs_warning(s,
782 } 809 "the option \"%s\" does not require an argument",
783 break; 810 opt->option_name);
784 811 return -1;
785 case 0: 812 }
786 if (opt->arg_required) { 813 break;
787 reiserfs_warning (s, "the option \"%s\" requires an argument", opt->option_name); 814
788 return -1; 815 case 0:
789 } 816 if (opt->arg_required) {
790 break; 817 reiserfs_warning(s,
791 default: 818 "the option \"%s\" requires an argument",
792 reiserfs_warning (s, "head of option \"%s\" is only correct", opt->option_name); 819 opt->option_name);
793 return -1; 820 return -1;
794 } 821 }
795 822 break;
796 /* move to the argument, or to next option if argument is not required */ 823 default:
797 p ++; 824 reiserfs_warning(s, "head of option \"%s\" is only correct",
798 825 opt->option_name);
799 if ( opt->arg_required && !(opt->arg_required & (1<<REISERFS_OPT_ALLOWEMPTY)) && !strlen (p) ) { 826 return -1;
800 /* this catches "option=," if not allowed */ 827 }
801 reiserfs_warning (s, "empty argument for \"%s\"", opt->option_name); 828
829 /* move to the argument, or to next option if argument is not required */
830 p++;
831
832 if (opt->arg_required
833 && !(opt->arg_required & (1 << REISERFS_OPT_ALLOWEMPTY))
834 && !strlen(p)) {
835 /* this catches "option=," if not allowed */
836 reiserfs_warning(s, "empty argument for \"%s\"",
837 opt->option_name);
838 return -1;
839 }
840
841 if (!opt->values) {
842 /* *=NULLopt_arg contains pointer to argument */
843 *opt_arg = p;
844 return opt->arg_required & ~(1 << REISERFS_OPT_ALLOWEMPTY);
845 }
846
847 /* values possible for this option are listed in opt->values */
848 for (arg = opt->values; arg->value; arg++) {
849 if (!strcmp(p, arg->value)) {
850 if (bit_flags) {
851 *bit_flags &= ~arg->clrmask;
852 *bit_flags |= arg->setmask;
853 }
854 return opt->arg_required;
855 }
856 }
857
858 reiserfs_warning(s, "bad value \"%s\" for option \"%s\"", p,
859 opt->option_name);
802 return -1; 860 return -1;
803 }
804
805 if (!opt->values) {
806 /* *=NULLopt_arg contains pointer to argument */
807 *opt_arg = p;
808 return opt->arg_required & ~(1<<REISERFS_OPT_ALLOWEMPTY);
809 }
810
811 /* values possible for this option are listed in opt->values */
812 for (arg = opt->values; arg->value; arg ++) {
813 if (!strcmp (p, arg->value)) {
814 if (bit_flags) {
815 *bit_flags &= ~arg->clrmask;
816 *bit_flags |= arg->setmask;
817 }
818 return opt->arg_required;
819 }
820 }
821
822 reiserfs_warning (s, "bad value \"%s\" for option \"%s\"", p, opt->option_name);
823 return -1;
824} 861}
825 862
826/* returns 0 if something is wrong in option string, 1 - otherwise */ 863/* returns 0 if something is wrong in option string, 1 - otherwise */
827static int reiserfs_parse_options (struct super_block * s, char * options, /* string given via mount's -o */ 864static int reiserfs_parse_options(struct super_block *s, char *options, /* string given via mount's -o */
828 unsigned long * mount_options, 865 unsigned long *mount_options,
829 /* after the parsing phase, contains the 866 /* after the parsing phase, contains the
830 collection of bitflags defining what 867 collection of bitflags defining what
831 mount options were selected. */ 868 mount options were selected. */
832 unsigned long * blocks, /* strtol-ed from NNN of resize=NNN */ 869 unsigned long *blocks, /* strtol-ed from NNN of resize=NNN */
833 char ** jdev_name, 870 char **jdev_name,
834 unsigned int * commit_max_age) 871 unsigned int *commit_max_age)
835{ 872{
836 int c; 873 int c;
837 char * arg = NULL; 874 char *arg = NULL;
838 char * pos; 875 char *pos;
839 opt_desc_t opts[] = { 876 opt_desc_t opts[] = {
840 /* Compatibility stuff, so that -o notail for old setups still work */ 877 /* Compatibility stuff, so that -o notail for old setups still work */
841 {"tails", .arg_required = 't', .values = tails}, 878 {"tails",.arg_required = 't',.values = tails},
842 {"notail", .clrmask = (1<<REISERFS_LARGETAIL)|(1<<REISERFS_SMALLTAIL)}, 879 {"notail",.clrmask =
843 {"conv", .setmask = 1<<REISERFS_CONVERT}, 880 (1 << REISERFS_LARGETAIL) | (1 << REISERFS_SMALLTAIL)},
844 {"attrs", .setmask = 1<<REISERFS_ATTRS}, 881 {"conv",.setmask = 1 << REISERFS_CONVERT},
845 {"noattrs", .clrmask = 1<<REISERFS_ATTRS}, 882 {"attrs",.setmask = 1 << REISERFS_ATTRS},
883 {"noattrs",.clrmask = 1 << REISERFS_ATTRS},
846#ifdef CONFIG_REISERFS_FS_XATTR 884#ifdef CONFIG_REISERFS_FS_XATTR
847 {"user_xattr", .setmask = 1<<REISERFS_XATTRS_USER}, 885 {"user_xattr",.setmask = 1 << REISERFS_XATTRS_USER},
848 {"nouser_xattr",.clrmask = 1<<REISERFS_XATTRS_USER}, 886 {"nouser_xattr",.clrmask = 1 << REISERFS_XATTRS_USER},
849#else 887#else
850 {"user_xattr", .setmask = 1<<REISERFS_UNSUPPORTED_OPT}, 888 {"user_xattr",.setmask = 1 << REISERFS_UNSUPPORTED_OPT},
851 {"nouser_xattr",.clrmask = 1<<REISERFS_UNSUPPORTED_OPT}, 889 {"nouser_xattr",.clrmask = 1 << REISERFS_UNSUPPORTED_OPT},
852#endif 890#endif
853#ifdef CONFIG_REISERFS_FS_POSIX_ACL 891#ifdef CONFIG_REISERFS_FS_POSIX_ACL
854 {"acl", .setmask = 1<<REISERFS_POSIXACL}, 892 {"acl",.setmask = 1 << REISERFS_POSIXACL},
855 {"noacl", .clrmask = 1<<REISERFS_POSIXACL}, 893 {"noacl",.clrmask = 1 << REISERFS_POSIXACL},
856#else 894#else
857 {"acl", .setmask = 1<<REISERFS_UNSUPPORTED_OPT}, 895 {"acl",.setmask = 1 << REISERFS_UNSUPPORTED_OPT},
858 {"noacl", .clrmask = 1<<REISERFS_UNSUPPORTED_OPT}, 896 {"noacl",.clrmask = 1 << REISERFS_UNSUPPORTED_OPT},
859#endif 897#endif
860 {"nolog",}, /* This is unsupported */ 898 {"nolog",}, /* This is unsupported */
861 {"replayonly", .setmask = 1<<REPLAYONLY}, 899 {"replayonly",.setmask = 1 << REPLAYONLY},
862 {"block-allocator", .arg_required = 'a', .values = balloc}, 900 {"block-allocator",.arg_required = 'a',.values = balloc},
863 {"data", .arg_required = 'd', .values = logging_mode}, 901 {"data",.arg_required = 'd',.values = logging_mode},
864 {"barrier", .arg_required = 'b', .values = barrier_mode}, 902 {"barrier",.arg_required = 'b',.values = barrier_mode},
865 {"resize", .arg_required = 'r', .values = NULL}, 903 {"resize",.arg_required = 'r',.values = NULL},
866 {"jdev", .arg_required = 'j', .values = NULL}, 904 {"jdev",.arg_required = 'j',.values = NULL},
867 {"nolargeio", .arg_required = 'w', .values = NULL}, 905 {"nolargeio",.arg_required = 'w',.values = NULL},
868 {"commit", .arg_required = 'c', .values = NULL}, 906 {"commit",.arg_required = 'c',.values = NULL},
869 {"usrquota", .setmask = 1<<REISERFS_QUOTA}, 907 {"usrquota",.setmask = 1 << REISERFS_QUOTA},
870 {"grpquota", .setmask = 1<<REISERFS_QUOTA}, 908 {"grpquota",.setmask = 1 << REISERFS_QUOTA},
871 {"noquota", .clrmask = 1<<REISERFS_QUOTA}, 909 {"noquota",.clrmask = 1 << REISERFS_QUOTA},
872 {"errors", .arg_required = 'e', .values = error_actions}, 910 {"errors",.arg_required = 'e',.values = error_actions},
873 {"usrjquota", .arg_required = 'u'|(1<<REISERFS_OPT_ALLOWEMPTY), .values = NULL}, 911 {"usrjquota",.arg_required =
874 {"grpjquota", .arg_required = 'g'|(1<<REISERFS_OPT_ALLOWEMPTY), .values = NULL}, 912 'u' | (1 << REISERFS_OPT_ALLOWEMPTY),.values = NULL},
875 {"jqfmt", .arg_required = 'f', .values = NULL}, 913 {"grpjquota",.arg_required =
876 {NULL,} 914 'g' | (1 << REISERFS_OPT_ALLOWEMPTY),.values = NULL},
877 }; 915 {"jqfmt",.arg_required = 'f',.values = NULL},
878 916 {NULL,}
879 *blocks = 0; 917 };
880 if (!options || !*options) 918
881 /* use default configuration: create tails, journaling on, no 919 *blocks = 0;
882 conversion to newest format */ 920 if (!options || !*options)
883 return 1; 921 /* use default configuration: create tails, journaling on, no
884 922 conversion to newest format */
885 for (pos = options; pos; ) { 923 return 1;
886 c = reiserfs_getopt (s, &pos, opts, &arg, mount_options); 924
887 if (c == -1) 925 for (pos = options; pos;) {
888 /* wrong option is given */ 926 c = reiserfs_getopt(s, &pos, opts, &arg, mount_options);
889 return 0; 927 if (c == -1)
890 928 /* wrong option is given */
891 if (c == 'r') {
892 char * p;
893
894 p = NULL;
895 /* "resize=NNN" or "resize=auto" */
896
897 if (!strcmp(arg, "auto")) {
898 /* From JFS code, to auto-get the size.*/
899 *blocks = s->s_bdev->bd_inode->i_size >> s->s_blocksize_bits;
900 } else {
901 *blocks = simple_strtoul (arg, &p, 0);
902 if (*p != '\0') {
903 /* NNN does not look like a number */
904 reiserfs_warning (s, "reiserfs_parse_options: bad value %s", arg);
905 return 0; 929 return 0;
906 }
907 }
908 }
909 930
910 if ( c == 'c' ) { 931 if (c == 'r') {
911 char *p = NULL; 932 char *p;
912 unsigned long val = simple_strtoul (arg, &p, 0); 933
913 /* commit=NNN (time in seconds) */ 934 p = NULL;
914 if ( *p != '\0' || val >= (unsigned int)-1) { 935 /* "resize=NNN" or "resize=auto" */
915 reiserfs_warning (s, "reiserfs_parse_options: bad value %s", arg); 936
916 return 0; 937 if (!strcmp(arg, "auto")) {
938 /* From JFS code, to auto-get the size. */
939 *blocks =
940 s->s_bdev->bd_inode->i_size >> s->
941 s_blocksize_bits;
942 } else {
943 *blocks = simple_strtoul(arg, &p, 0);
944 if (*p != '\0') {
945 /* NNN does not look like a number */
946 reiserfs_warning(s,
947 "reiserfs_parse_options: bad value %s",
948 arg);
949 return 0;
950 }
951 }
917 } 952 }
918 *commit_max_age = (unsigned int)val;
919 }
920 953
921 if ( c == 'w' ) { 954 if (c == 'c') {
922 char *p=NULL; 955 char *p = NULL;
923 int val = simple_strtoul (arg, &p, 0); 956 unsigned long val = simple_strtoul(arg, &p, 0);
924 957 /* commit=NNN (time in seconds) */
925 if ( *p != '\0') { 958 if (*p != '\0' || val >= (unsigned int)-1) {
926 reiserfs_warning (s, "reiserfs_parse_options: non-numeric value %s for nolargeio option", arg); 959 reiserfs_warning(s,
927 return 0; 960 "reiserfs_parse_options: bad value %s",
961 arg);
962 return 0;
963 }
964 *commit_max_age = (unsigned int)val;
928 } 965 }
929 if ( val )
930 reiserfs_default_io_size = PAGE_SIZE;
931 else
932 reiserfs_default_io_size = 128 * 1024;
933 }
934 966
935 if (c == 'j') { 967 if (c == 'w') {
936 if (arg && *arg && jdev_name) { 968 char *p = NULL;
937 if ( *jdev_name ) { //Hm, already assigned? 969 int val = simple_strtoul(arg, &p, 0);
938 reiserfs_warning (s, "reiserfs_parse_options: journal device was already specified to be %s", *jdev_name); 970
939 return 0; 971 if (*p != '\0') {
972 reiserfs_warning(s,
973 "reiserfs_parse_options: non-numeric value %s for nolargeio option",
974 arg);
975 return 0;
976 }
977 if (val)
978 reiserfs_default_io_size = PAGE_SIZE;
979 else
980 reiserfs_default_io_size = 128 * 1024;
940 } 981 }
941 *jdev_name = arg;
942 }
943 }
944 982
945#ifdef CONFIG_QUOTA 983 if (c == 'j') {
946 if (c == 'u' || c == 'g') { 984 if (arg && *arg && jdev_name) {
947 int qtype = c == 'u' ? USRQUOTA : GRPQUOTA; 985 if (*jdev_name) { //Hm, already assigned?
948 986 reiserfs_warning(s,
949 if (sb_any_quota_enabled(s)) { 987 "reiserfs_parse_options: journal device was already specified to be %s",
950 reiserfs_warning(s, "reiserfs_parse_options: cannot change journalled quota options when quota turned on."); 988 *jdev_name);
951 return 0; 989 return 0;
952 } 990 }
953 if (*arg) { /* Some filename specified? */ 991 *jdev_name = arg;
954 if (REISERFS_SB(s)->s_qf_names[qtype] && strcmp(REISERFS_SB(s)->s_qf_names[qtype], arg)) { 992 }
955 reiserfs_warning(s, "reiserfs_parse_options: %s quota file already specified.", QTYPE2NAME(qtype));
956 return 0;
957 } 993 }
958 if (strchr(arg, '/')) { 994#ifdef CONFIG_QUOTA
959 reiserfs_warning(s, "reiserfs_parse_options: quotafile must be on filesystem root."); 995 if (c == 'u' || c == 'g') {
960 return 0; 996 int qtype = c == 'u' ? USRQUOTA : GRPQUOTA;
997
998 if (sb_any_quota_enabled(s)) {
999 reiserfs_warning(s,
1000 "reiserfs_parse_options: cannot change journalled quota options when quota turned on.");
1001 return 0;
1002 }
1003 if (*arg) { /* Some filename specified? */
1004 if (REISERFS_SB(s)->s_qf_names[qtype]
1005 && strcmp(REISERFS_SB(s)->s_qf_names[qtype],
1006 arg)) {
1007 reiserfs_warning(s,
1008 "reiserfs_parse_options: %s quota file already specified.",
1009 QTYPE2NAME(qtype));
1010 return 0;
1011 }
1012 if (strchr(arg, '/')) {
1013 reiserfs_warning(s,
1014 "reiserfs_parse_options: quotafile must be on filesystem root.");
1015 return 0;
1016 }
1017 REISERFS_SB(s)->s_qf_names[qtype] =
1018 kmalloc(strlen(arg) + 1, GFP_KERNEL);
1019 if (!REISERFS_SB(s)->s_qf_names[qtype]) {
1020 reiserfs_warning(s,
1021 "reiserfs_parse_options: not enough memory for storing quotafile name.");
1022 return 0;
1023 }
1024 strcpy(REISERFS_SB(s)->s_qf_names[qtype], arg);
1025 *mount_options |= 1 << REISERFS_QUOTA;
1026 } else {
1027 if (REISERFS_SB(s)->s_qf_names[qtype]) {
1028 kfree(REISERFS_SB(s)->
1029 s_qf_names[qtype]);
1030 REISERFS_SB(s)->s_qf_names[qtype] =
1031 NULL;
1032 }
1033 }
961 } 1034 }
962 REISERFS_SB(s)->s_qf_names[qtype] = kmalloc(strlen(arg)+1, GFP_KERNEL); 1035 if (c == 'f') {
963 if (!REISERFS_SB(s)->s_qf_names[qtype]) { 1036 if (!strcmp(arg, "vfsold"))
964 reiserfs_warning(s, "reiserfs_parse_options: not enough memory for storing quotafile name."); 1037 REISERFS_SB(s)->s_jquota_fmt = QFMT_VFS_OLD;
965 return 0; 1038 else if (!strcmp(arg, "vfsv0"))
1039 REISERFS_SB(s)->s_jquota_fmt = QFMT_VFS_V0;
1040 else {
1041 reiserfs_warning(s,
1042 "reiserfs_parse_options: unknown quota format specified.");
1043 return 0;
1044 }
966 } 1045 }
967 strcpy(REISERFS_SB(s)->s_qf_names[qtype], arg); 1046#else
968 *mount_options |= 1<<REISERFS_QUOTA; 1047 if (c == 'u' || c == 'g' || c == 'f') {
969 } 1048 reiserfs_warning(s,
970 else { 1049 "reiserfs_parse_options: journalled quota options not supported.");
971 if (REISERFS_SB(s)->s_qf_names[qtype]) { 1050 return 0;
972 kfree(REISERFS_SB(s)->s_qf_names[qtype]);
973 REISERFS_SB(s)->s_qf_names[qtype] = NULL;
974 } 1051 }
975 } 1052#endif
976 } 1053 }
977 if (c == 'f') { 1054
978 if (!strcmp(arg, "vfsold")) 1055#ifdef CONFIG_QUOTA
979 REISERFS_SB(s)->s_jquota_fmt = QFMT_VFS_OLD; 1056 if (!REISERFS_SB(s)->s_jquota_fmt
980 else if (!strcmp(arg, "vfsv0")) 1057 && (REISERFS_SB(s)->s_qf_names[USRQUOTA]
981 REISERFS_SB(s)->s_jquota_fmt = QFMT_VFS_V0; 1058 || REISERFS_SB(s)->s_qf_names[GRPQUOTA])) {
982 else { 1059 reiserfs_warning(s,
983 reiserfs_warning(s, "reiserfs_parse_options: unknown quota format specified."); 1060 "reiserfs_parse_options: journalled quota format not specified.");
984 return 0; 1061 return 0;
985 }
986 } 1062 }
987#else 1063 /* This checking is not precise wrt the quota type but for our purposes it is sufficient */
988 if (c == 'u' || c == 'g' || c == 'f') { 1064 if (!(*mount_options & (1 << REISERFS_QUOTA))
989 reiserfs_warning(s, "reiserfs_parse_options: journalled quota options not supported."); 1065 && sb_any_quota_enabled(s)) {
990 return 0; 1066 reiserfs_warning(s,
1067 "reiserfs_parse_options: quota options must be present when quota is turned on.");
1068 return 0;
991 } 1069 }
992#endif 1070#endif
993 }
994
995#ifdef CONFIG_QUOTA
996 if (!REISERFS_SB(s)->s_jquota_fmt && (REISERFS_SB(s)->s_qf_names[USRQUOTA] || REISERFS_SB(s)->s_qf_names[GRPQUOTA])) {
997 reiserfs_warning(s, "reiserfs_parse_options: journalled quota format not specified.");
998 return 0;
999 }
1000 /* This checking is not precise wrt the quota type but for our purposes it is sufficient */
1001 if (!(*mount_options & (1<<REISERFS_QUOTA)) && sb_any_quota_enabled(s)) {
1002 reiserfs_warning(s, "reiserfs_parse_options: quota options must be present when quota is turned on.");
1003 return 0;
1004 }
1005#endif
1006 1071
1007 return 1; 1072 return 1;
1008} 1073}
1009 1074
1010static void switch_data_mode(struct super_block *s, unsigned long mode) { 1075static void switch_data_mode(struct super_block *s, unsigned long mode)
1011 REISERFS_SB(s)->s_mount_opt &= ~((1 << REISERFS_DATA_LOG) | 1076{
1012 (1 << REISERFS_DATA_ORDERED) | 1077 REISERFS_SB(s)->s_mount_opt &= ~((1 << REISERFS_DATA_LOG) |
1013 (1 << REISERFS_DATA_WRITEBACK)); 1078 (1 << REISERFS_DATA_ORDERED) |
1014 REISERFS_SB(s)->s_mount_opt |= (1 << mode); 1079 (1 << REISERFS_DATA_WRITEBACK));
1080 REISERFS_SB(s)->s_mount_opt |= (1 << mode);
1015} 1081}
1016 1082
1017static void handle_data_mode(struct super_block *s, unsigned long mount_options) 1083static void handle_data_mode(struct super_block *s, unsigned long mount_options)
1018{ 1084{
1019 if (mount_options & (1 << REISERFS_DATA_LOG)) { 1085 if (mount_options & (1 << REISERFS_DATA_LOG)) {
1020 if (!reiserfs_data_log(s)) { 1086 if (!reiserfs_data_log(s)) {
1021 switch_data_mode(s, REISERFS_DATA_LOG); 1087 switch_data_mode(s, REISERFS_DATA_LOG);
1022 reiserfs_info (s, "switching to journaled data mode\n"); 1088 reiserfs_info(s, "switching to journaled data mode\n");
1023 } 1089 }
1024 } else if (mount_options & (1 << REISERFS_DATA_ORDERED)) { 1090 } else if (mount_options & (1 << REISERFS_DATA_ORDERED)) {
1025 if (!reiserfs_data_ordered(s)) { 1091 if (!reiserfs_data_ordered(s)) {
1026 switch_data_mode(s, REISERFS_DATA_ORDERED); 1092 switch_data_mode(s, REISERFS_DATA_ORDERED);
1027 reiserfs_info (s, "switching to ordered data mode\n"); 1093 reiserfs_info(s, "switching to ordered data mode\n");
1028 } 1094 }
1029 } else if (mount_options & (1 << REISERFS_DATA_WRITEBACK)) { 1095 } else if (mount_options & (1 << REISERFS_DATA_WRITEBACK)) {
1030 if (!reiserfs_data_writeback(s)) { 1096 if (!reiserfs_data_writeback(s)) {
1031 switch_data_mode(s, REISERFS_DATA_WRITEBACK); 1097 switch_data_mode(s, REISERFS_DATA_WRITEBACK);
1032 reiserfs_info (s, "switching to writeback data mode\n"); 1098 reiserfs_info(s, "switching to writeback data mode\n");
1033 } 1099 }
1034 } 1100 }
1035} 1101}
1036 1102
1037static void handle_barrier_mode(struct super_block *s, unsigned long bits) { 1103static void handle_barrier_mode(struct super_block *s, unsigned long bits)
1038 int flush = (1 << REISERFS_BARRIER_FLUSH); 1104{
1039 int none = (1 << REISERFS_BARRIER_NONE); 1105 int flush = (1 << REISERFS_BARRIER_FLUSH);
1040 int all_barrier = flush | none; 1106 int none = (1 << REISERFS_BARRIER_NONE);
1041 1107 int all_barrier = flush | none;
1042 if (bits & all_barrier) { 1108
1043 REISERFS_SB(s)->s_mount_opt &= ~all_barrier; 1109 if (bits & all_barrier) {
1044 if (bits & flush) { 1110 REISERFS_SB(s)->s_mount_opt &= ~all_barrier;
1045 REISERFS_SB(s)->s_mount_opt |= flush; 1111 if (bits & flush) {
1046 printk("reiserfs: enabling write barrier flush mode\n"); 1112 REISERFS_SB(s)->s_mount_opt |= flush;
1047 } else if (bits & none) { 1113 printk("reiserfs: enabling write barrier flush mode\n");
1048 REISERFS_SB(s)->s_mount_opt |= none; 1114 } else if (bits & none) {
1049 printk("reiserfs: write barriers turned off\n"); 1115 REISERFS_SB(s)->s_mount_opt |= none;
1050 } 1116 printk("reiserfs: write barriers turned off\n");
1051 } 1117 }
1118 }
1052} 1119}
1053 1120
1054static void handle_attrs( struct super_block *s ) 1121static void handle_attrs(struct super_block *s)
1055{ 1122{
1056 struct reiserfs_super_block * rs = SB_DISK_SUPER_BLOCK (s); 1123 struct reiserfs_super_block *rs = SB_DISK_SUPER_BLOCK(s);
1057 1124
1058 if( reiserfs_attrs( s ) ) { 1125 if (reiserfs_attrs(s)) {
1059 if( old_format_only(s) ) { 1126 if (old_format_only(s)) {
1060 reiserfs_warning(s, "reiserfs: cannot support attributes on 3.5.x disk format" ); 1127 reiserfs_warning(s,
1061 REISERFS_SB(s) -> s_mount_opt &= ~ ( 1 << REISERFS_ATTRS ); 1128 "reiserfs: cannot support attributes on 3.5.x disk format");
1129 REISERFS_SB(s)->s_mount_opt &= ~(1 << REISERFS_ATTRS);
1062 return; 1130 return;
1063 } 1131 }
1064 if( !( le32_to_cpu( rs -> s_flags ) & reiserfs_attrs_cleared ) ) { 1132 if (!(le32_to_cpu(rs->s_flags) & reiserfs_attrs_cleared)) {
1065 reiserfs_warning(s, "reiserfs: cannot support attributes until flag is set in super-block" ); 1133 reiserfs_warning(s,
1066 REISERFS_SB(s) -> s_mount_opt &= ~ ( 1 << REISERFS_ATTRS ); 1134 "reiserfs: cannot support attributes until flag is set in super-block");
1135 REISERFS_SB(s)->s_mount_opt &= ~(1 << REISERFS_ATTRS);
1067 } 1136 }
1068 } else if (le32_to_cpu( rs -> s_flags ) & reiserfs_attrs_cleared) { 1137 } else if (le32_to_cpu(rs->s_flags) & reiserfs_attrs_cleared) {
1069 REISERFS_SB(s)->s_mount_opt |= REISERFS_ATTRS; 1138 REISERFS_SB(s)->s_mount_opt |= REISERFS_ATTRS;
1070 } 1139 }
1071} 1140}
1072 1141
1073static int reiserfs_remount (struct super_block * s, int * mount_flags, char * arg) 1142static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg)
1074{ 1143{
1075 struct reiserfs_super_block * rs; 1144 struct reiserfs_super_block *rs;
1076 struct reiserfs_transaction_handle th ; 1145 struct reiserfs_transaction_handle th;
1077 unsigned long blocks; 1146 unsigned long blocks;
1078 unsigned long mount_options = REISERFS_SB(s)->s_mount_opt; 1147 unsigned long mount_options = REISERFS_SB(s)->s_mount_opt;
1079 unsigned long safe_mask = 0; 1148 unsigned long safe_mask = 0;
1080 unsigned int commit_max_age = (unsigned int)-1; 1149 unsigned int commit_max_age = (unsigned int)-1;
1081 struct reiserfs_journal *journal = SB_JOURNAL(s); 1150 struct reiserfs_journal *journal = SB_JOURNAL(s);
1082 int err; 1151 int err;
1083#ifdef CONFIG_QUOTA 1152#ifdef CONFIG_QUOTA
1084 int i; 1153 int i;
1085#endif 1154#endif
1086 1155
1087 rs = SB_DISK_SUPER_BLOCK (s); 1156 rs = SB_DISK_SUPER_BLOCK(s);
1088 1157
1089 if (!reiserfs_parse_options(s, arg, &mount_options, &blocks, NULL, &commit_max_age)) { 1158 if (!reiserfs_parse_options
1159 (s, arg, &mount_options, &blocks, NULL, &commit_max_age)) {
1090#ifdef CONFIG_QUOTA 1160#ifdef CONFIG_QUOTA
1091 for (i = 0; i < MAXQUOTAS; i++) 1161 for (i = 0; i < MAXQUOTAS; i++)
1092 if (REISERFS_SB(s)->s_qf_names[i]) { 1162 if (REISERFS_SB(s)->s_qf_names[i]) {
1093 kfree(REISERFS_SB(s)->s_qf_names[i]); 1163 kfree(REISERFS_SB(s)->s_qf_names[i]);
1094 REISERFS_SB(s)->s_qf_names[i] = NULL; 1164 REISERFS_SB(s)->s_qf_names[i] = NULL;
1095 } 1165 }
1096#endif 1166#endif
1097 return -EINVAL; 1167 return -EINVAL;
1098 } 1168 }
1099 1169
1100 handle_attrs(s); 1170 handle_attrs(s);
1101 1171
1102 /* Add options that are safe here */ 1172 /* Add options that are safe here */
1103 safe_mask |= 1 << REISERFS_SMALLTAIL; 1173 safe_mask |= 1 << REISERFS_SMALLTAIL;
1104 safe_mask |= 1 << REISERFS_LARGETAIL; 1174 safe_mask |= 1 << REISERFS_LARGETAIL;
1105 safe_mask |= 1 << REISERFS_NO_BORDER; 1175 safe_mask |= 1 << REISERFS_NO_BORDER;
1106 safe_mask |= 1 << REISERFS_NO_UNHASHED_RELOCATION; 1176 safe_mask |= 1 << REISERFS_NO_UNHASHED_RELOCATION;
1107 safe_mask |= 1 << REISERFS_HASHED_RELOCATION; 1177 safe_mask |= 1 << REISERFS_HASHED_RELOCATION;
1108 safe_mask |= 1 << REISERFS_TEST4; 1178 safe_mask |= 1 << REISERFS_TEST4;
1109 safe_mask |= 1 << REISERFS_ATTRS; 1179 safe_mask |= 1 << REISERFS_ATTRS;
1110 safe_mask |= 1 << REISERFS_XATTRS_USER; 1180 safe_mask |= 1 << REISERFS_XATTRS_USER;
1111 safe_mask |= 1 << REISERFS_POSIXACL; 1181 safe_mask |= 1 << REISERFS_POSIXACL;
1112 safe_mask |= 1 << REISERFS_BARRIER_FLUSH; 1182 safe_mask |= 1 << REISERFS_BARRIER_FLUSH;
1113 safe_mask |= 1 << REISERFS_BARRIER_NONE; 1183 safe_mask |= 1 << REISERFS_BARRIER_NONE;
1114 safe_mask |= 1 << REISERFS_ERROR_RO; 1184 safe_mask |= 1 << REISERFS_ERROR_RO;
1115 safe_mask |= 1 << REISERFS_ERROR_CONTINUE; 1185 safe_mask |= 1 << REISERFS_ERROR_CONTINUE;
1116 safe_mask |= 1 << REISERFS_ERROR_PANIC; 1186 safe_mask |= 1 << REISERFS_ERROR_PANIC;
1117 safe_mask |= 1 << REISERFS_QUOTA; 1187 safe_mask |= 1 << REISERFS_QUOTA;
1118 1188
1119 /* Update the bitmask, taking care to keep 1189 /* Update the bitmask, taking care to keep
1120 * the bits we're not allowed to change here */ 1190 * the bits we're not allowed to change here */
1121 REISERFS_SB(s)->s_mount_opt = (REISERFS_SB(s)->s_mount_opt & ~safe_mask) | (mount_options & safe_mask); 1191 REISERFS_SB(s)->s_mount_opt =
1122 1192 (REISERFS_SB(s)->
1123 if(commit_max_age != 0 && commit_max_age != (unsigned int)-1) { 1193 s_mount_opt & ~safe_mask) | (mount_options & safe_mask);
1124 journal->j_max_commit_age = commit_max_age; 1194
1125 journal->j_max_trans_age = commit_max_age; 1195 if (commit_max_age != 0 && commit_max_age != (unsigned int)-1) {
1126 } 1196 journal->j_max_commit_age = commit_max_age;
1127 else if(commit_max_age == 0) 1197 journal->j_max_trans_age = commit_max_age;
1128 { 1198 } else if (commit_max_age == 0) {
1129 /* 0 means restore defaults. */ 1199 /* 0 means restore defaults. */
1130 journal->j_max_commit_age = journal->j_default_max_commit_age; 1200 journal->j_max_commit_age = journal->j_default_max_commit_age;
1131 journal->j_max_trans_age = JOURNAL_MAX_TRANS_AGE; 1201 journal->j_max_trans_age = JOURNAL_MAX_TRANS_AGE;
1132 } 1202 }
1133 1203
1134 if(blocks) { 1204 if (blocks) {
1135 int rc = reiserfs_resize(s, blocks); 1205 int rc = reiserfs_resize(s, blocks);
1136 if (rc != 0) 1206 if (rc != 0)
1137 return rc; 1207 return rc;
1138 } 1208 }
1139 1209
1140 if (*mount_flags & MS_RDONLY) { 1210 if (*mount_flags & MS_RDONLY) {
1141 reiserfs_xattr_init (s, *mount_flags); 1211 reiserfs_xattr_init(s, *mount_flags);
1142 /* remount read-only */ 1212 /* remount read-only */
1143 if (s->s_flags & MS_RDONLY) 1213 if (s->s_flags & MS_RDONLY)
1144 /* it is read-only already */ 1214 /* it is read-only already */
1145 return 0; 1215 return 0;
1146 /* try to remount file system with read-only permissions */ 1216 /* try to remount file system with read-only permissions */
1147 if (sb_umount_state(rs) == REISERFS_VALID_FS || REISERFS_SB(s)->s_mount_state != REISERFS_VALID_FS) { 1217 if (sb_umount_state(rs) == REISERFS_VALID_FS
1148 return 0; 1218 || REISERFS_SB(s)->s_mount_state != REISERFS_VALID_FS) {
1149 } 1219 return 0;
1150 1220 }
1151 err = journal_begin(&th, s, 10) ; 1221
1152 if (err) 1222 err = journal_begin(&th, s, 10);
1153 return err; 1223 if (err)
1154 1224 return err;
1155 /* Mounting a rw partition read-only. */ 1225
1156 reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s), 1) ; 1226 /* Mounting a rw partition read-only. */
1157 set_sb_umount_state( rs, REISERFS_SB(s)->s_mount_state ); 1227 reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s), 1);
1158 journal_mark_dirty(&th, s, SB_BUFFER_WITH_SB (s)); 1228 set_sb_umount_state(rs, REISERFS_SB(s)->s_mount_state);
1159 } else { 1229 journal_mark_dirty(&th, s, SB_BUFFER_WITH_SB(s));
1160 /* remount read-write */ 1230 } else {
1161 if (!(s->s_flags & MS_RDONLY)) { 1231 /* remount read-write */
1162 reiserfs_xattr_init (s, *mount_flags); 1232 if (!(s->s_flags & MS_RDONLY)) {
1163 return 0; /* We are read-write already */ 1233 reiserfs_xattr_init(s, *mount_flags);
1164 } 1234 return 0; /* We are read-write already */
1165 1235 }
1166 if (reiserfs_is_journal_aborted (journal)) 1236
1167 return journal->j_errno; 1237 if (reiserfs_is_journal_aborted(journal))
1168 1238 return journal->j_errno;
1169 handle_data_mode(s, mount_options); 1239
1170 handle_barrier_mode(s, mount_options); 1240 handle_data_mode(s, mount_options);
1171 REISERFS_SB(s)->s_mount_state = sb_umount_state(rs) ; 1241 handle_barrier_mode(s, mount_options);
1172 s->s_flags &= ~MS_RDONLY ; /* now it is safe to call journal_begin */ 1242 REISERFS_SB(s)->s_mount_state = sb_umount_state(rs);
1173 err = journal_begin(&th, s, 10) ; 1243 s->s_flags &= ~MS_RDONLY; /* now it is safe to call journal_begin */
1174 if (err) 1244 err = journal_begin(&th, s, 10);
1175 return err; 1245 if (err)
1176 1246 return err;
1177 /* Mount a partition which is read-only, read-write */ 1247
1178 reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s), 1) ; 1248 /* Mount a partition which is read-only, read-write */
1179 REISERFS_SB(s)->s_mount_state = sb_umount_state(rs); 1249 reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s), 1);
1180 s->s_flags &= ~MS_RDONLY; 1250 REISERFS_SB(s)->s_mount_state = sb_umount_state(rs);
1181 set_sb_umount_state( rs, REISERFS_ERROR_FS ); 1251 s->s_flags &= ~MS_RDONLY;
1182 /* mark_buffer_dirty (SB_BUFFER_WITH_SB (s), 1); */ 1252 set_sb_umount_state(rs, REISERFS_ERROR_FS);
1183 journal_mark_dirty(&th, s, SB_BUFFER_WITH_SB (s)); 1253 /* mark_buffer_dirty (SB_BUFFER_WITH_SB (s), 1); */
1184 REISERFS_SB(s)->s_mount_state = REISERFS_VALID_FS ; 1254 journal_mark_dirty(&th, s, SB_BUFFER_WITH_SB(s));
1185 } 1255 REISERFS_SB(s)->s_mount_state = REISERFS_VALID_FS;
1186 /* this will force a full flush of all journal lists */ 1256 }
1187 SB_JOURNAL(s)->j_must_wait = 1 ; 1257 /* this will force a full flush of all journal lists */
1188 err = journal_end(&th, s, 10) ; 1258 SB_JOURNAL(s)->j_must_wait = 1;
1189 if (err) 1259 err = journal_end(&th, s, 10);
1190 return err; 1260 if (err)
1191 s->s_dirt = 0; 1261 return err;
1192 1262 s->s_dirt = 0;
1193 if (!( *mount_flags & MS_RDONLY ) ) { 1263
1194 finish_unfinished( s ); 1264 if (!(*mount_flags & MS_RDONLY)) {
1195 reiserfs_xattr_init (s, *mount_flags); 1265 finish_unfinished(s);
1196 } 1266 reiserfs_xattr_init(s, *mount_flags);
1197 1267 }
1198 return 0; 1268
1269 return 0;
1199} 1270}
1200 1271
1201/* load_bitmap_info_data - Sets up the reiserfs_bitmap_info structure from disk. 1272/* load_bitmap_info_data - Sets up the reiserfs_bitmap_info structure from disk.
@@ -1214,761 +1285,829 @@ static int reiserfs_remount (struct super_block * s, int * mount_flags, char * a
1214 * free blocks at all. 1285 * free blocks at all.
1215 */ 1286 */
1216 1287
1217static void load_bitmap_info_data (struct super_block *sb, 1288static void load_bitmap_info_data(struct super_block *sb,
1218 struct reiserfs_bitmap_info *bi) 1289 struct reiserfs_bitmap_info *bi)
1219{ 1290{
1220 unsigned long *cur = (unsigned long *)bi->bh->b_data; 1291 unsigned long *cur = (unsigned long *)bi->bh->b_data;
1221 1292
1222 while ((char *)cur < (bi->bh->b_data + sb->s_blocksize)) { 1293 while ((char *)cur < (bi->bh->b_data + sb->s_blocksize)) {
1223 1294
1224 /* No need to scan if all 0's or all 1's. 1295 /* No need to scan if all 0's or all 1's.
1225 * Since we're only counting 0's, we can simply ignore all 1's */ 1296 * Since we're only counting 0's, we can simply ignore all 1's */
1226 if (*cur == 0) { 1297 if (*cur == 0) {
1227 if (bi->first_zero_hint == 0) { 1298 if (bi->first_zero_hint == 0) {
1228 bi->first_zero_hint = ((char *)cur - bi->bh->b_data) << 3; 1299 bi->first_zero_hint =
1229 } 1300 ((char *)cur - bi->bh->b_data) << 3;
1230 bi->free_count += sizeof(unsigned long)*8; 1301 }
1231 } else if (*cur != ~0L) { 1302 bi->free_count += sizeof(unsigned long) * 8;
1232 int b; 1303 } else if (*cur != ~0L) {
1233 for (b = 0; b < sizeof(unsigned long)*8; b++) { 1304 int b;
1234 if (!reiserfs_test_le_bit (b, cur)) { 1305 for (b = 0; b < sizeof(unsigned long) * 8; b++) {
1235 bi->free_count ++; 1306 if (!reiserfs_test_le_bit(b, cur)) {
1236 if (bi->first_zero_hint == 0) 1307 bi->free_count++;
1237 bi->first_zero_hint = 1308 if (bi->first_zero_hint == 0)
1238 (((char *)cur - bi->bh->b_data) << 3) + b; 1309 bi->first_zero_hint =
1239 } 1310 (((char *)cur -
1311 bi->bh->b_data) << 3) + b;
1312 }
1313 }
1240 } 1314 }
1241 } 1315 cur++;
1242 cur ++; 1316 }
1243 }
1244 1317
1245#ifdef CONFIG_REISERFS_CHECK 1318#ifdef CONFIG_REISERFS_CHECK
1246// This outputs a lot of unneded info on big FSes 1319// This outputs a lot of unneded info on big FSes
1247// reiserfs_warning ("bitmap loaded from block %d: %d free blocks", 1320// reiserfs_warning ("bitmap loaded from block %d: %d free blocks",
1248// bi->bh->b_blocknr, bi->free_count); 1321// bi->bh->b_blocknr, bi->free_count);
1249#endif 1322#endif
1250} 1323}
1251 1324
1252static int read_bitmaps (struct super_block * s) 1325static int read_bitmaps(struct super_block *s)
1253{ 1326{
1254 int i, bmap_nr; 1327 int i, bmap_nr;
1328
1329 SB_AP_BITMAP(s) =
1330 vmalloc(sizeof(struct reiserfs_bitmap_info) * SB_BMAP_NR(s));
1331 if (SB_AP_BITMAP(s) == 0)
1332 return 1;
1333 memset(SB_AP_BITMAP(s), 0,
1334 sizeof(struct reiserfs_bitmap_info) * SB_BMAP_NR(s));
1335 for (i = 0, bmap_nr =
1336 REISERFS_DISK_OFFSET_IN_BYTES / s->s_blocksize + 1;
1337 i < SB_BMAP_NR(s); i++, bmap_nr = s->s_blocksize * 8 * i) {
1338 SB_AP_BITMAP(s)[i].bh = sb_getblk(s, bmap_nr);
1339 if (!buffer_uptodate(SB_AP_BITMAP(s)[i].bh))
1340 ll_rw_block(READ, 1, &SB_AP_BITMAP(s)[i].bh);
1341 }
1342 for (i = 0; i < SB_BMAP_NR(s); i++) {
1343 wait_on_buffer(SB_AP_BITMAP(s)[i].bh);
1344 if (!buffer_uptodate(SB_AP_BITMAP(s)[i].bh)) {
1345 reiserfs_warning(s, "sh-2029: reiserfs read_bitmaps: "
1346 "bitmap block (#%lu) reading failed",
1347 SB_AP_BITMAP(s)[i].bh->b_blocknr);
1348 for (i = 0; i < SB_BMAP_NR(s); i++)
1349 brelse(SB_AP_BITMAP(s)[i].bh);
1350 vfree(SB_AP_BITMAP(s));
1351 SB_AP_BITMAP(s) = NULL;
1352 return 1;
1353 }
1354 load_bitmap_info_data(s, SB_AP_BITMAP(s) + i);
1355 }
1356 return 0;
1357}
1255 1358
1256 SB_AP_BITMAP (s) = vmalloc (sizeof (struct reiserfs_bitmap_info) * SB_BMAP_NR(s)); 1359static int read_old_bitmaps(struct super_block *s)
1257 if (SB_AP_BITMAP (s) == 0) 1360{
1258 return 1; 1361 int i;
1259 memset (SB_AP_BITMAP (s), 0, sizeof (struct reiserfs_bitmap_info) * SB_BMAP_NR(s)); 1362 struct reiserfs_super_block *rs = SB_DISK_SUPER_BLOCK(s);
1260 for (i = 0, bmap_nr = REISERFS_DISK_OFFSET_IN_BYTES / s->s_blocksize + 1; 1363 int bmp1 = (REISERFS_OLD_DISK_OFFSET_IN_BYTES / s->s_blocksize) + 1; /* first of bitmap blocks */
1261 i < SB_BMAP_NR(s); i++, bmap_nr = s->s_blocksize * 8 * i) { 1364
1262 SB_AP_BITMAP (s)[i].bh = sb_getblk(s, bmap_nr); 1365 /* read true bitmap */
1263 if (!buffer_uptodate(SB_AP_BITMAP(s)[i].bh)) 1366 SB_AP_BITMAP(s) =
1264 ll_rw_block(READ, 1, &SB_AP_BITMAP(s)[i].bh); 1367 vmalloc(sizeof(struct reiserfs_buffer_info *) * sb_bmap_nr(rs));
1265 } 1368 if (SB_AP_BITMAP(s) == 0)
1266 for (i = 0; i < SB_BMAP_NR(s); i++) { 1369 return 1;
1267 wait_on_buffer(SB_AP_BITMAP (s)[i].bh); 1370
1268 if (!buffer_uptodate(SB_AP_BITMAP(s)[i].bh)) { 1371 memset(SB_AP_BITMAP(s), 0,
1269 reiserfs_warning(s,"sh-2029: reiserfs read_bitmaps: " 1372 sizeof(struct reiserfs_buffer_info *) * sb_bmap_nr(rs));
1270 "bitmap block (#%lu) reading failed", 1373
1271 SB_AP_BITMAP(s)[i].bh->b_blocknr); 1374 for (i = 0; i < sb_bmap_nr(rs); i++) {
1272 for (i = 0; i < SB_BMAP_NR(s); i++) 1375 SB_AP_BITMAP(s)[i].bh = sb_bread(s, bmp1 + i);
1273 brelse(SB_AP_BITMAP(s)[i].bh); 1376 if (!SB_AP_BITMAP(s)[i].bh)
1274 vfree(SB_AP_BITMAP(s)); 1377 return 1;
1275 SB_AP_BITMAP(s) = NULL; 1378 load_bitmap_info_data(s, SB_AP_BITMAP(s) + i);
1276 return 1;
1277 } 1379 }
1278 load_bitmap_info_data (s, SB_AP_BITMAP (s) + i); 1380
1279 } 1381 return 0;
1280 return 0;
1281} 1382}
1282 1383
1283static int read_old_bitmaps (struct super_block * s) 1384static int read_super_block(struct super_block *s, int offset)
1284{ 1385{
1285 int i ; 1386 struct buffer_head *bh;
1286 struct reiserfs_super_block * rs = SB_DISK_SUPER_BLOCK(s); 1387 struct reiserfs_super_block *rs;
1287 int bmp1 = (REISERFS_OLD_DISK_OFFSET_IN_BYTES / s->s_blocksize) + 1; /* first of bitmap blocks */ 1388 int fs_blocksize;
1288 1389
1289 /* read true bitmap */ 1390 bh = sb_bread(s, offset / s->s_blocksize);
1290 SB_AP_BITMAP (s) = vmalloc (sizeof (struct reiserfs_buffer_info *) * sb_bmap_nr(rs)); 1391 if (!bh) {
1291 if (SB_AP_BITMAP (s) == 0) 1392 reiserfs_warning(s, "sh-2006: read_super_block: "
1292 return 1; 1393 "bread failed (dev %s, block %lu, size %lu)",
1394 reiserfs_bdevname(s), offset / s->s_blocksize,
1395 s->s_blocksize);
1396 return 1;
1397 }
1293 1398
1294 memset (SB_AP_BITMAP (s), 0, sizeof (struct reiserfs_buffer_info *) * sb_bmap_nr(rs)); 1399 rs = (struct reiserfs_super_block *)bh->b_data;
1400 if (!is_any_reiserfs_magic_string(rs)) {
1401 brelse(bh);
1402 return 1;
1403 }
1404 //
1405 // ok, reiserfs signature (old or new) found in at the given offset
1406 //
1407 fs_blocksize = sb_blocksize(rs);
1408 brelse(bh);
1409 sb_set_blocksize(s, fs_blocksize);
1295 1410
1296 for (i = 0; i < sb_bmap_nr(rs); i ++) { 1411 bh = sb_bread(s, offset / s->s_blocksize);
1297 SB_AP_BITMAP (s)[i].bh = sb_bread (s, bmp1 + i); 1412 if (!bh) {
1298 if (!SB_AP_BITMAP (s)[i].bh) 1413 reiserfs_warning(s, "sh-2007: read_super_block: "
1299 return 1; 1414 "bread failed (dev %s, block %lu, size %lu)\n",
1300 load_bitmap_info_data (s, SB_AP_BITMAP (s) + i); 1415 reiserfs_bdevname(s), offset / s->s_blocksize,
1301 } 1416 s->s_blocksize);
1417 return 1;
1418 }
1302 1419
1303 return 0; 1420 rs = (struct reiserfs_super_block *)bh->b_data;
1304} 1421 if (sb_blocksize(rs) != s->s_blocksize) {
1422 reiserfs_warning(s, "sh-2011: read_super_block: "
1423 "can't find a reiserfs filesystem on (dev %s, block %Lu, size %lu)\n",
1424 reiserfs_bdevname(s),
1425 (unsigned long long)bh->b_blocknr,
1426 s->s_blocksize);
1427 brelse(bh);
1428 return 1;
1429 }
1305 1430
1306static int read_super_block (struct super_block * s, int offset) 1431 if (rs->s_v1.s_root_block == cpu_to_le32(-1)) {
1307{ 1432 brelse(bh);
1308 struct buffer_head * bh; 1433 reiserfs_warning(s,
1309 struct reiserfs_super_block * rs; 1434 "Unfinished reiserfsck --rebuild-tree run detected. Please run\n"
1310 int fs_blocksize; 1435 "reiserfsck --rebuild-tree and wait for a completion. If that fails\n"
1311 1436 "get newer reiserfsprogs package");
1312 1437 return 1;
1313 bh = sb_bread (s, offset / s->s_blocksize);
1314 if (!bh) {
1315 reiserfs_warning (s, "sh-2006: read_super_block: "
1316 "bread failed (dev %s, block %lu, size %lu)",
1317 reiserfs_bdevname (s), offset / s->s_blocksize, s->s_blocksize);
1318 return 1;
1319 }
1320
1321 rs = (struct reiserfs_super_block *)bh->b_data;
1322 if (!is_any_reiserfs_magic_string (rs)) {
1323 brelse (bh);
1324 return 1;
1325 }
1326
1327 //
1328 // ok, reiserfs signature (old or new) found in at the given offset
1329 //
1330 fs_blocksize = sb_blocksize(rs);
1331 brelse (bh);
1332 sb_set_blocksize (s, fs_blocksize);
1333
1334 bh = sb_bread (s, offset / s->s_blocksize);
1335 if (!bh) {
1336 reiserfs_warning (s, "sh-2007: read_super_block: "
1337 "bread failed (dev %s, block %lu, size %lu)\n",
1338 reiserfs_bdevname (s), offset / s->s_blocksize, s->s_blocksize);
1339 return 1;
1340 }
1341
1342 rs = (struct reiserfs_super_block *)bh->b_data;
1343 if (sb_blocksize(rs) != s->s_blocksize) {
1344 reiserfs_warning (s, "sh-2011: read_super_block: "
1345 "can't find a reiserfs filesystem on (dev %s, block %Lu, size %lu)\n",
1346 reiserfs_bdevname (s), (unsigned long long)bh->b_blocknr, s->s_blocksize);
1347 brelse (bh);
1348 return 1;
1349 }
1350
1351 if ( rs->s_v1.s_root_block == cpu_to_le32(-1) ) {
1352 brelse(bh) ;
1353 reiserfs_warning (s, "Unfinished reiserfsck --rebuild-tree run detected. Please run\n"
1354 "reiserfsck --rebuild-tree and wait for a completion. If that fails\n"
1355 "get newer reiserfsprogs package");
1356 return 1;
1357 }
1358
1359 SB_BUFFER_WITH_SB (s) = bh;
1360 SB_DISK_SUPER_BLOCK (s) = rs;
1361
1362 if (is_reiserfs_jr (rs)) {
1363 /* magic is of non-standard journal filesystem, look at s_version to
1364 find which format is in use */
1365 if (sb_version(rs) == REISERFS_VERSION_2)
1366 reiserfs_warning (s, "read_super_block: found reiserfs format \"3.6\""
1367 " with non-standard journal");
1368 else if (sb_version(rs) == REISERFS_VERSION_1)
1369 reiserfs_warning (s, "read_super_block: found reiserfs format \"3.5\""
1370 " with non-standard journal");
1371 else {
1372 reiserfs_warning (s, "sh-2012: read_super_block: found unknown "
1373 "format \"%u\" of reiserfs with non-standard magic",
1374 sb_version(rs));
1375 return 1;
1376 } 1438 }
1377 }
1378 else
1379 /* s_version of standard format may contain incorrect information,
1380 so we just look at the magic string */
1381 reiserfs_info (s, "found reiserfs format \"%s\" with standard journal\n",
1382 is_reiserfs_3_5 (rs) ? "3.5" : "3.6");
1383 1439
1384 s->s_op = &reiserfs_sops; 1440 SB_BUFFER_WITH_SB(s) = bh;
1385 s->s_export_op = &reiserfs_export_ops; 1441 SB_DISK_SUPER_BLOCK(s) = rs;
1442
1443 if (is_reiserfs_jr(rs)) {
1444 /* magic is of non-standard journal filesystem, look at s_version to
1445 find which format is in use */
1446 if (sb_version(rs) == REISERFS_VERSION_2)
1447 reiserfs_warning(s,
1448 "read_super_block: found reiserfs format \"3.6\""
1449 " with non-standard journal");
1450 else if (sb_version(rs) == REISERFS_VERSION_1)
1451 reiserfs_warning(s,
1452 "read_super_block: found reiserfs format \"3.5\""
1453 " with non-standard journal");
1454 else {
1455 reiserfs_warning(s,
1456 "sh-2012: read_super_block: found unknown "
1457 "format \"%u\" of reiserfs with non-standard magic",
1458 sb_version(rs));
1459 return 1;
1460 }
1461 } else
1462 /* s_version of standard format may contain incorrect information,
1463 so we just look at the magic string */
1464 reiserfs_info(s,
1465 "found reiserfs format \"%s\" with standard journal\n",
1466 is_reiserfs_3_5(rs) ? "3.5" : "3.6");
1467
1468 s->s_op = &reiserfs_sops;
1469 s->s_export_op = &reiserfs_export_ops;
1386#ifdef CONFIG_QUOTA 1470#ifdef CONFIG_QUOTA
1387 s->s_qcop = &reiserfs_qctl_operations; 1471 s->s_qcop = &reiserfs_qctl_operations;
1388 s->dq_op = &reiserfs_quota_operations; 1472 s->dq_op = &reiserfs_quota_operations;
1389#endif 1473#endif
1390 1474
1391 /* new format is limited by the 32 bit wide i_blocks field, want to 1475 /* new format is limited by the 32 bit wide i_blocks field, want to
1392 ** be one full block below that. 1476 ** be one full block below that.
1393 */ 1477 */
1394 s->s_maxbytes = (512LL << 32) - s->s_blocksize ; 1478 s->s_maxbytes = (512LL << 32) - s->s_blocksize;
1395 return 0; 1479 return 0;
1396} 1480}
1397 1481
1398
1399
1400/* after journal replay, reread all bitmap and super blocks */ 1482/* after journal replay, reread all bitmap and super blocks */
1401static int reread_meta_blocks(struct super_block *s) { 1483static int reread_meta_blocks(struct super_block *s)
1402 int i ; 1484{
1403 ll_rw_block(READ, 1, &(SB_BUFFER_WITH_SB(s))) ; 1485 int i;
1404 wait_on_buffer(SB_BUFFER_WITH_SB(s)) ; 1486 ll_rw_block(READ, 1, &(SB_BUFFER_WITH_SB(s)));
1405 if (!buffer_uptodate(SB_BUFFER_WITH_SB(s))) { 1487 wait_on_buffer(SB_BUFFER_WITH_SB(s));
1406 reiserfs_warning (s, "reread_meta_blocks, error reading the super") ; 1488 if (!buffer_uptodate(SB_BUFFER_WITH_SB(s))) {
1407 return 1 ; 1489 reiserfs_warning(s,
1408 } 1490 "reread_meta_blocks, error reading the super");
1409 1491 return 1;
1410 for (i = 0; i < SB_BMAP_NR(s) ; i++) { 1492 }
1411 ll_rw_block(READ, 1, &(SB_AP_BITMAP(s)[i].bh)) ;
1412 wait_on_buffer(SB_AP_BITMAP(s)[i].bh) ;
1413 if (!buffer_uptodate(SB_AP_BITMAP(s)[i].bh)) {
1414 reiserfs_warning (s, "reread_meta_blocks, error reading bitmap block number %d at %llu",
1415 i, (unsigned long long)SB_AP_BITMAP(s)[i].bh->b_blocknr) ;
1416 return 1 ;
1417 }
1418 }
1419 return 0 ;
1420 1493
1421} 1494 for (i = 0; i < SB_BMAP_NR(s); i++) {
1495 ll_rw_block(READ, 1, &(SB_AP_BITMAP(s)[i].bh));
1496 wait_on_buffer(SB_AP_BITMAP(s)[i].bh);
1497 if (!buffer_uptodate(SB_AP_BITMAP(s)[i].bh)) {
1498 reiserfs_warning(s,
1499 "reread_meta_blocks, error reading bitmap block number %d at %llu",
1500 i,
1501 (unsigned long long)SB_AP_BITMAP(s)[i].
1502 bh->b_blocknr);
1503 return 1;
1504 }
1505 }
1506 return 0;
1422 1507
1508}
1423 1509
1424///////////////////////////////////////////////////// 1510/////////////////////////////////////////////////////
1425// hash detection stuff 1511// hash detection stuff
1426 1512
1427
1428// if root directory is empty - we set default - Yura's - hash and 1513// if root directory is empty - we set default - Yura's - hash and
1429// warn about it 1514// warn about it
1430// FIXME: we look for only one name in a directory. If tea and yura 1515// FIXME: we look for only one name in a directory. If tea and yura
1431// bith have the same value - we ask user to send report to the 1516// bith have the same value - we ask user to send report to the
1432// mailing list 1517// mailing list
1433static __u32 find_hash_out (struct super_block * s) 1518static __u32 find_hash_out(struct super_block *s)
1434{ 1519{
1435 int retval; 1520 int retval;
1436 struct inode * inode; 1521 struct inode *inode;
1437 struct cpu_key key; 1522 struct cpu_key key;
1438 INITIALIZE_PATH (path); 1523 INITIALIZE_PATH(path);
1439 struct reiserfs_dir_entry de; 1524 struct reiserfs_dir_entry de;
1440 __u32 hash = DEFAULT_HASH; 1525 __u32 hash = DEFAULT_HASH;
1441 1526
1442 inode = s->s_root->d_inode; 1527 inode = s->s_root->d_inode;
1443 1528
1444 do { // Some serious "goto"-hater was there ;) 1529 do { // Some serious "goto"-hater was there ;)
1445 u32 teahash, r5hash, yurahash; 1530 u32 teahash, r5hash, yurahash;
1446 1531
1447 make_cpu_key (&key, inode, ~0, TYPE_DIRENTRY, 3); 1532 make_cpu_key(&key, inode, ~0, TYPE_DIRENTRY, 3);
1448 retval = search_by_entry_key (s, &key, &path, &de); 1533 retval = search_by_entry_key(s, &key, &path, &de);
1449 if (retval == IO_ERROR) { 1534 if (retval == IO_ERROR) {
1450 pathrelse (&path); 1535 pathrelse(&path);
1451 return UNSET_HASH ; 1536 return UNSET_HASH;
1452 } 1537 }
1453 if (retval == NAME_NOT_FOUND) 1538 if (retval == NAME_NOT_FOUND)
1454 de.de_entry_num --; 1539 de.de_entry_num--;
1455 set_de_name_and_namelen (&de); 1540 set_de_name_and_namelen(&de);
1456 if (deh_offset( &(de.de_deh[de.de_entry_num]) ) == DOT_DOT_OFFSET) { 1541 if (deh_offset(&(de.de_deh[de.de_entry_num])) == DOT_DOT_OFFSET) {
1457 /* allow override in this case */ 1542 /* allow override in this case */
1458 if (reiserfs_rupasov_hash(s)) { 1543 if (reiserfs_rupasov_hash(s)) {
1459 hash = YURA_HASH ; 1544 hash = YURA_HASH;
1460 } 1545 }
1461 reiserfs_warning(s,"FS seems to be empty, autodetect " 1546 reiserfs_warning(s, "FS seems to be empty, autodetect "
1462 "is using the default hash"); 1547 "is using the default hash");
1463 break; 1548 break;
1464 } 1549 }
1465 r5hash=GET_HASH_VALUE (r5_hash (de.de_name, de.de_namelen)); 1550 r5hash = GET_HASH_VALUE(r5_hash(de.de_name, de.de_namelen));
1466 teahash=GET_HASH_VALUE (keyed_hash (de.de_name, de.de_namelen)); 1551 teahash = GET_HASH_VALUE(keyed_hash(de.de_name, de.de_namelen));
1467 yurahash=GET_HASH_VALUE (yura_hash (de.de_name, de.de_namelen)); 1552 yurahash = GET_HASH_VALUE(yura_hash(de.de_name, de.de_namelen));
1468 if ( ( (teahash == r5hash) && (GET_HASH_VALUE( deh_offset(&(de.de_deh[de.de_entry_num]))) == r5hash) ) || 1553 if (((teahash == r5hash)
1469 ( (teahash == yurahash) && (yurahash == GET_HASH_VALUE( deh_offset(&(de.de_deh[de.de_entry_num])))) ) || 1554 &&
1470 ( (r5hash == yurahash) && (yurahash == GET_HASH_VALUE( deh_offset(&(de.de_deh[de.de_entry_num])))) ) ) { 1555 (GET_HASH_VALUE(deh_offset(&(de.de_deh[de.de_entry_num])))
1471 reiserfs_warning(s,"Unable to automatically detect hash function. " 1556 == r5hash)) || ((teahash == yurahash)
1472 "Please mount with -o hash={tea,rupasov,r5}", 1557 && (yurahash ==
1473 reiserfs_bdevname (s)); 1558 GET_HASH_VALUE(deh_offset
1474 hash = UNSET_HASH; 1559 (&
1475 break; 1560 (de.
1476 } 1561 de_deh[de.
1477 if (GET_HASH_VALUE( deh_offset(&(de.de_deh[de.de_entry_num])) ) == yurahash) 1562 de_entry_num])))))
1478 hash = YURA_HASH; 1563 || ((r5hash == yurahash)
1479 else if (GET_HASH_VALUE( deh_offset(&(de.de_deh[de.de_entry_num])) ) == teahash) 1564 && (yurahash ==
1480 hash = TEA_HASH; 1565 GET_HASH_VALUE(deh_offset
1481 else if (GET_HASH_VALUE( deh_offset(&(de.de_deh[de.de_entry_num])) ) == r5hash) 1566 (&(de.de_deh[de.de_entry_num])))))) {
1482 hash = R5_HASH; 1567 reiserfs_warning(s,
1483 else { 1568 "Unable to automatically detect hash function. "
1484 reiserfs_warning (s,"Unrecognised hash function"); 1569 "Please mount with -o hash={tea,rupasov,r5}",
1485 hash = UNSET_HASH; 1570 reiserfs_bdevname(s));
1486 } 1571 hash = UNSET_HASH;
1487 } while (0); 1572 break;
1488 1573 }
1489 pathrelse (&path); 1574 if (GET_HASH_VALUE(deh_offset(&(de.de_deh[de.de_entry_num]))) ==
1490 return hash; 1575 yurahash)
1576 hash = YURA_HASH;
1577 else if (GET_HASH_VALUE
1578 (deh_offset(&(de.de_deh[de.de_entry_num]))) == teahash)
1579 hash = TEA_HASH;
1580 else if (GET_HASH_VALUE
1581 (deh_offset(&(de.de_deh[de.de_entry_num]))) == r5hash)
1582 hash = R5_HASH;
1583 else {
1584 reiserfs_warning(s, "Unrecognised hash function");
1585 hash = UNSET_HASH;
1586 }
1587 } while (0);
1588
1589 pathrelse(&path);
1590 return hash;
1491} 1591}
1492 1592
1493// finds out which hash names are sorted with 1593// finds out which hash names are sorted with
1494static int what_hash (struct super_block * s) 1594static int what_hash(struct super_block *s)
1495{ 1595{
1496 __u32 code; 1596 __u32 code;
1497 1597
1498 code = sb_hash_function_code(SB_DISK_SUPER_BLOCK(s)); 1598 code = sb_hash_function_code(SB_DISK_SUPER_BLOCK(s));
1499 1599
1500 /* reiserfs_hash_detect() == true if any of the hash mount options 1600 /* reiserfs_hash_detect() == true if any of the hash mount options
1501 ** were used. We must check them to make sure the user isn't 1601 ** were used. We must check them to make sure the user isn't
1502 ** using a bad hash value 1602 ** using a bad hash value
1503 */ 1603 */
1504 if (code == UNSET_HASH || reiserfs_hash_detect(s)) 1604 if (code == UNSET_HASH || reiserfs_hash_detect(s))
1505 code = find_hash_out (s); 1605 code = find_hash_out(s);
1506 1606
1507 if (code != UNSET_HASH && reiserfs_hash_detect(s)) { 1607 if (code != UNSET_HASH && reiserfs_hash_detect(s)) {
1508 /* detection has found the hash, and we must check against the 1608 /* detection has found the hash, and we must check against the
1509 ** mount options 1609 ** mount options
1510 */ 1610 */
1511 if (reiserfs_rupasov_hash(s) && code != YURA_HASH) { 1611 if (reiserfs_rupasov_hash(s) && code != YURA_HASH) {
1512 reiserfs_warning (s, "Error, %s hash detected, " 1612 reiserfs_warning(s, "Error, %s hash detected, "
1513 "unable to force rupasov hash", reiserfs_hashname(code)) ; 1613 "unable to force rupasov hash",
1514 code = UNSET_HASH ; 1614 reiserfs_hashname(code));
1515 } else if (reiserfs_tea_hash(s) && code != TEA_HASH) { 1615 code = UNSET_HASH;
1516 reiserfs_warning (s, "Error, %s hash detected, " 1616 } else if (reiserfs_tea_hash(s) && code != TEA_HASH) {
1517 "unable to force tea hash", reiserfs_hashname(code)) ; 1617 reiserfs_warning(s, "Error, %s hash detected, "
1518 code = UNSET_HASH ; 1618 "unable to force tea hash",
1519 } else if (reiserfs_r5_hash(s) && code != R5_HASH) { 1619 reiserfs_hashname(code));
1520 reiserfs_warning (s, "Error, %s hash detected, " 1620 code = UNSET_HASH;
1521 "unable to force r5 hash", reiserfs_hashname(code)) ; 1621 } else if (reiserfs_r5_hash(s) && code != R5_HASH) {
1522 code = UNSET_HASH ; 1622 reiserfs_warning(s, "Error, %s hash detected, "
1523 } 1623 "unable to force r5 hash",
1524 } else { 1624 reiserfs_hashname(code));
1525 /* find_hash_out was not called or could not determine the hash */ 1625 code = UNSET_HASH;
1526 if (reiserfs_rupasov_hash(s)) { 1626 }
1527 code = YURA_HASH ; 1627 } else {
1528 } else if (reiserfs_tea_hash(s)) { 1628 /* find_hash_out was not called or could not determine the hash */
1529 code = TEA_HASH ; 1629 if (reiserfs_rupasov_hash(s)) {
1530 } else if (reiserfs_r5_hash(s)) { 1630 code = YURA_HASH;
1531 code = R5_HASH ; 1631 } else if (reiserfs_tea_hash(s)) {
1532 } 1632 code = TEA_HASH;
1533 } 1633 } else if (reiserfs_r5_hash(s)) {
1534 1634 code = R5_HASH;
1535 /* if we are mounted RW, and we have a new valid hash code, update 1635 }
1536 ** the super 1636 }
1537 */ 1637
1538 if (code != UNSET_HASH && 1638 /* if we are mounted RW, and we have a new valid hash code, update
1539 !(s->s_flags & MS_RDONLY) && 1639 ** the super
1540 code != sb_hash_function_code(SB_DISK_SUPER_BLOCK(s))) { 1640 */
1541 set_sb_hash_function_code(SB_DISK_SUPER_BLOCK(s), code); 1641 if (code != UNSET_HASH &&
1542 } 1642 !(s->s_flags & MS_RDONLY) &&
1543 return code; 1643 code != sb_hash_function_code(SB_DISK_SUPER_BLOCK(s))) {
1644 set_sb_hash_function_code(SB_DISK_SUPER_BLOCK(s), code);
1645 }
1646 return code;
1544} 1647}
1545 1648
1546// return pointer to appropriate function 1649// return pointer to appropriate function
1547static hashf_t hash_function (struct super_block * s) 1650static hashf_t hash_function(struct super_block *s)
1548{ 1651{
1549 switch (what_hash (s)) { 1652 switch (what_hash(s)) {
1550 case TEA_HASH: 1653 case TEA_HASH:
1551 reiserfs_info (s, "Using tea hash to sort names\n"); 1654 reiserfs_info(s, "Using tea hash to sort names\n");
1552 return keyed_hash; 1655 return keyed_hash;
1553 case YURA_HASH: 1656 case YURA_HASH:
1554 reiserfs_info (s, "Using rupasov hash to sort names\n"); 1657 reiserfs_info(s, "Using rupasov hash to sort names\n");
1555 return yura_hash; 1658 return yura_hash;
1556 case R5_HASH: 1659 case R5_HASH:
1557 reiserfs_info (s, "Using r5 hash to sort names\n"); 1660 reiserfs_info(s, "Using r5 hash to sort names\n");
1558 return r5_hash; 1661 return r5_hash;
1559 } 1662 }
1560 return NULL; 1663 return NULL;
1561} 1664}
1562 1665
1563// this is used to set up correct value for old partitions 1666// this is used to set up correct value for old partitions
1564static int function2code (hashf_t func) 1667static int function2code(hashf_t func)
1565{ 1668{
1566 if (func == keyed_hash) 1669 if (func == keyed_hash)
1567 return TEA_HASH; 1670 return TEA_HASH;
1568 if (func == yura_hash) 1671 if (func == yura_hash)
1569 return YURA_HASH; 1672 return YURA_HASH;
1570 if (func == r5_hash) 1673 if (func == r5_hash)
1571 return R5_HASH; 1674 return R5_HASH;
1572 1675
1573 BUG() ; // should never happen 1676 BUG(); // should never happen
1574 1677
1575 return 0; 1678 return 0;
1576} 1679}
1577 1680
1578#define SWARN(silent, s, ...) \ 1681#define SWARN(silent, s, ...) \
1579 if (!(silent)) \ 1682 if (!(silent)) \
1580 reiserfs_warning (s, __VA_ARGS__) 1683 reiserfs_warning (s, __VA_ARGS__)
1581 1684
1582static int reiserfs_fill_super (struct super_block * s, void * data, int silent) 1685static int reiserfs_fill_super(struct super_block *s, void *data, int silent)
1583{ 1686{
1584 struct inode *root_inode; 1687 struct inode *root_inode;
1585 int j; 1688 int j;
1586 struct reiserfs_transaction_handle th ; 1689 struct reiserfs_transaction_handle th;
1587 int old_format = 0; 1690 int old_format = 0;
1588 unsigned long blocks; 1691 unsigned long blocks;
1589 unsigned int commit_max_age = 0; 1692 unsigned int commit_max_age = 0;
1590 int jinit_done = 0 ; 1693 int jinit_done = 0;
1591 struct reiserfs_iget_args args ; 1694 struct reiserfs_iget_args args;
1592 struct reiserfs_super_block * rs; 1695 struct reiserfs_super_block *rs;
1593 char *jdev_name; 1696 char *jdev_name;
1594 struct reiserfs_sb_info *sbi; 1697 struct reiserfs_sb_info *sbi;
1595 int errval = -EINVAL; 1698 int errval = -EINVAL;
1596 1699
1597 sbi = kmalloc(sizeof(struct reiserfs_sb_info), GFP_KERNEL); 1700 sbi = kmalloc(sizeof(struct reiserfs_sb_info), GFP_KERNEL);
1598 if (!sbi) { 1701 if (!sbi) {
1599 errval = -ENOMEM; 1702 errval = -ENOMEM;
1600 goto error; 1703 goto error;
1601 } 1704 }
1602 s->s_fs_info = sbi; 1705 s->s_fs_info = sbi;
1603 memset (sbi, 0, sizeof (struct reiserfs_sb_info)); 1706 memset(sbi, 0, sizeof(struct reiserfs_sb_info));
1604 /* Set default values for options: non-aggressive tails, RO on errors */ 1707 /* Set default values for options: non-aggressive tails, RO on errors */
1605 REISERFS_SB(s)->s_mount_opt |= (1 << REISERFS_SMALLTAIL); 1708 REISERFS_SB(s)->s_mount_opt |= (1 << REISERFS_SMALLTAIL);
1606 REISERFS_SB(s)->s_mount_opt |= (1 << REISERFS_ERROR_RO); 1709 REISERFS_SB(s)->s_mount_opt |= (1 << REISERFS_ERROR_RO);
1607 /* no preallocation minimum, be smart in 1710 /* no preallocation minimum, be smart in
1608 reiserfs_file_write instead */ 1711 reiserfs_file_write instead */
1609 REISERFS_SB(s)->s_alloc_options.preallocmin = 0; 1712 REISERFS_SB(s)->s_alloc_options.preallocmin = 0;
1610 /* Preallocate by 16 blocks (17-1) at once */ 1713 /* Preallocate by 16 blocks (17-1) at once */
1611 REISERFS_SB(s)->s_alloc_options.preallocsize = 17; 1714 REISERFS_SB(s)->s_alloc_options.preallocsize = 17;
1612 /* Initialize the rwsem for xattr dir */ 1715 /* Initialize the rwsem for xattr dir */
1613 init_rwsem(&REISERFS_SB(s)->xattr_dir_sem); 1716 init_rwsem(&REISERFS_SB(s)->xattr_dir_sem);
1614 1717
1615 /* setup default block allocator options */ 1718 /* setup default block allocator options */
1616 reiserfs_init_alloc_options(s); 1719 reiserfs_init_alloc_options(s);
1617 1720
1618 jdev_name = NULL; 1721 jdev_name = NULL;
1619 if (reiserfs_parse_options (s, (char *) data, &(sbi->s_mount_opt), &blocks, &jdev_name, &commit_max_age) == 0) { 1722 if (reiserfs_parse_options
1620 goto error; 1723 (s, (char *)data, &(sbi->s_mount_opt), &blocks, &jdev_name,
1621 } 1724 &commit_max_age) == 0) {
1622 1725 goto error;
1623 if (blocks) { 1726 }
1624 SWARN (silent, s, "jmacd-7: reiserfs_fill_super: resize option " 1727
1625 "for remount only"); 1728 if (blocks) {
1626 goto error; 1729 SWARN(silent, s, "jmacd-7: reiserfs_fill_super: resize option "
1627 } 1730 "for remount only");
1628 1731 goto error;
1629 /* try old format (undistributed bitmap, super block in 8-th 1k block of a device) */ 1732 }
1630 if (!read_super_block (s, REISERFS_OLD_DISK_OFFSET_IN_BYTES)) 1733
1631 old_format = 1; 1734 /* try old format (undistributed bitmap, super block in 8-th 1k block of a device) */
1632 /* try new format (64-th 1k block), which can contain reiserfs super block */ 1735 if (!read_super_block(s, REISERFS_OLD_DISK_OFFSET_IN_BYTES))
1633 else if (read_super_block (s, REISERFS_DISK_OFFSET_IN_BYTES)) { 1736 old_format = 1;
1634 SWARN(silent, s, "sh-2021: reiserfs_fill_super: can not find reiserfs on %s", reiserfs_bdevname (s)); 1737 /* try new format (64-th 1k block), which can contain reiserfs super block */
1635 goto error; 1738 else if (read_super_block(s, REISERFS_DISK_OFFSET_IN_BYTES)) {
1636 } 1739 SWARN(silent, s,
1637 1740 "sh-2021: reiserfs_fill_super: can not find reiserfs on %s",
1638 rs = SB_DISK_SUPER_BLOCK (s); 1741 reiserfs_bdevname(s));
1639 /* Let's do basic sanity check to verify that underlying device is not 1742 goto error;
1640 smaller than the filesystem. If the check fails then abort and scream, 1743 }
1641 because bad stuff will happen otherwise. */ 1744
1642 if ( s->s_bdev && s->s_bdev->bd_inode && i_size_read(s->s_bdev->bd_inode) < sb_block_count(rs)*sb_blocksize(rs)) { 1745 rs = SB_DISK_SUPER_BLOCK(s);
1643 SWARN (silent, s, "Filesystem on %s cannot be mounted because it is bigger than the device", reiserfs_bdevname(s)); 1746 /* Let's do basic sanity check to verify that underlying device is not
1644 SWARN(silent, s, "You may need to run fsck or increase size of your LVM partition"); 1747 smaller than the filesystem. If the check fails then abort and scream,
1645 SWARN(silent, s, "Or may be you forgot to reboot after fdisk when it told you to"); 1748 because bad stuff will happen otherwise. */
1646 goto error; 1749 if (s->s_bdev && s->s_bdev->bd_inode
1647 } 1750 && i_size_read(s->s_bdev->bd_inode) <
1648 1751 sb_block_count(rs) * sb_blocksize(rs)) {
1649 sbi->s_mount_state = SB_REISERFS_STATE(s); 1752 SWARN(silent, s,
1650 sbi->s_mount_state = REISERFS_VALID_FS ; 1753 "Filesystem on %s cannot be mounted because it is bigger than the device",
1651 1754 reiserfs_bdevname(s));
1652 if (old_format ? read_old_bitmaps(s) : read_bitmaps(s)) { 1755 SWARN(silent, s,
1653 SWARN(silent, s, "jmacd-8: reiserfs_fill_super: unable to read bitmap"); 1756 "You may need to run fsck or increase size of your LVM partition");
1654 goto error; 1757 SWARN(silent, s,
1655 } 1758 "Or may be you forgot to reboot after fdisk when it told you to");
1759 goto error;
1760 }
1761
1762 sbi->s_mount_state = SB_REISERFS_STATE(s);
1763 sbi->s_mount_state = REISERFS_VALID_FS;
1764
1765 if (old_format ? read_old_bitmaps(s) : read_bitmaps(s)) {
1766 SWARN(silent, s,
1767 "jmacd-8: reiserfs_fill_super: unable to read bitmap");
1768 goto error;
1769 }
1656#ifdef CONFIG_REISERFS_CHECK 1770#ifdef CONFIG_REISERFS_CHECK
1657 SWARN (silent, s, "CONFIG_REISERFS_CHECK is set ON"); 1771 SWARN(silent, s, "CONFIG_REISERFS_CHECK is set ON");
1658 SWARN (silent, s, "- it is slow mode for debugging."); 1772 SWARN(silent, s, "- it is slow mode for debugging.");
1659#endif 1773#endif
1660 1774
1661 /* make data=ordered the default */ 1775 /* make data=ordered the default */
1662 if (!reiserfs_data_log(s) && !reiserfs_data_ordered(s) && 1776 if (!reiserfs_data_log(s) && !reiserfs_data_ordered(s) &&
1663 !reiserfs_data_writeback(s)) 1777 !reiserfs_data_writeback(s)) {
1664 { 1778 REISERFS_SB(s)->s_mount_opt |= (1 << REISERFS_DATA_ORDERED);
1665 REISERFS_SB(s)->s_mount_opt |= (1 << REISERFS_DATA_ORDERED); 1779 }
1666 } 1780
1667 1781 if (reiserfs_data_log(s)) {
1668 if (reiserfs_data_log(s)) { 1782 reiserfs_info(s, "using journaled data mode\n");
1669 reiserfs_info (s, "using journaled data mode\n"); 1783 } else if (reiserfs_data_ordered(s)) {
1670 } else if (reiserfs_data_ordered(s)) { 1784 reiserfs_info(s, "using ordered data mode\n");
1671 reiserfs_info (s, "using ordered data mode\n"); 1785 } else {
1672 } else { 1786 reiserfs_info(s, "using writeback data mode\n");
1673 reiserfs_info (s, "using writeback data mode\n"); 1787 }
1674 } 1788 if (reiserfs_barrier_flush(s)) {
1675 if (reiserfs_barrier_flush(s)) { 1789 printk("reiserfs: using flush barriers\n");
1676 printk("reiserfs: using flush barriers\n"); 1790 }
1677 } 1791 // set_device_ro(s->s_dev, 1) ;
1678 1792 if (journal_init(s, jdev_name, old_format, commit_max_age)) {
1679 // set_device_ro(s->s_dev, 1) ; 1793 SWARN(silent, s,
1680 if( journal_init(s, jdev_name, old_format, commit_max_age) ) { 1794 "sh-2022: reiserfs_fill_super: unable to initialize journal space");
1681 SWARN(silent, s, "sh-2022: reiserfs_fill_super: unable to initialize journal space") ; 1795 goto error;
1682 goto error ; 1796 } else {
1683 } else { 1797 jinit_done = 1; /* once this is set, journal_release must be called
1684 jinit_done = 1 ; /* once this is set, journal_release must be called 1798 ** if we error out of the mount
1685 ** if we error out of the mount 1799 */
1686 */ 1800 }
1687 } 1801 if (reread_meta_blocks(s)) {
1688 if (reread_meta_blocks(s)) { 1802 SWARN(silent, s,
1689 SWARN(silent, s, "jmacd-9: reiserfs_fill_super: unable to reread meta blocks after journal init") ; 1803 "jmacd-9: reiserfs_fill_super: unable to reread meta blocks after journal init");
1690 goto error ; 1804 goto error;
1691 } 1805 }
1692 1806
1693 if (replay_only (s)) 1807 if (replay_only(s))
1694 goto error; 1808 goto error;
1695 1809
1696 if (bdev_read_only(s->s_bdev) && !(s->s_flags & MS_RDONLY)) { 1810 if (bdev_read_only(s->s_bdev) && !(s->s_flags & MS_RDONLY)) {
1697 SWARN(silent, s, "clm-7000: Detected readonly device, marking FS readonly") ; 1811 SWARN(silent, s,
1698 s->s_flags |= MS_RDONLY ; 1812 "clm-7000: Detected readonly device, marking FS readonly");
1699 } 1813 s->s_flags |= MS_RDONLY;
1700 args.objectid = REISERFS_ROOT_OBJECTID ; 1814 }
1701 args.dirid = REISERFS_ROOT_PARENT_OBJECTID ; 1815 args.objectid = REISERFS_ROOT_OBJECTID;
1702 root_inode = iget5_locked (s, REISERFS_ROOT_OBJECTID, reiserfs_find_actor, reiserfs_init_locked_inode, (void *)(&args)); 1816 args.dirid = REISERFS_ROOT_PARENT_OBJECTID;
1703 if (!root_inode) { 1817 root_inode =
1704 SWARN(silent, s, "jmacd-10: reiserfs_fill_super: get root inode failed"); 1818 iget5_locked(s, REISERFS_ROOT_OBJECTID, reiserfs_find_actor,
1705 goto error; 1819 reiserfs_init_locked_inode, (void *)(&args));
1706 } 1820 if (!root_inode) {
1707 1821 SWARN(silent, s,
1708 if (root_inode->i_state & I_NEW) { 1822 "jmacd-10: reiserfs_fill_super: get root inode failed");
1709 reiserfs_read_locked_inode(root_inode, &args); 1823 goto error;
1710 unlock_new_inode(root_inode); 1824 }
1711 } 1825
1712 1826 if (root_inode->i_state & I_NEW) {
1713 s->s_root = d_alloc_root(root_inode); 1827 reiserfs_read_locked_inode(root_inode, &args);
1714 if (!s->s_root) { 1828 unlock_new_inode(root_inode);
1715 iput(root_inode); 1829 }
1716 goto error; 1830
1717 } 1831 s->s_root = d_alloc_root(root_inode);
1718 1832 if (!s->s_root) {
1719 // define and initialize hash function 1833 iput(root_inode);
1720 sbi->s_hash_function = hash_function (s); 1834 goto error;
1721 if (sbi->s_hash_function == NULL) { 1835 }
1722 dput(s->s_root) ; 1836 // define and initialize hash function
1723 s->s_root = NULL ; 1837 sbi->s_hash_function = hash_function(s);
1724 goto error ; 1838 if (sbi->s_hash_function == NULL) {
1725 } 1839 dput(s->s_root);
1726 1840 s->s_root = NULL;
1727 if (is_reiserfs_3_5 (rs) || (is_reiserfs_jr (rs) && SB_VERSION (s) == REISERFS_VERSION_1)) 1841 goto error;
1728 set_bit(REISERFS_3_5, &(sbi->s_properties)); 1842 }
1729 else 1843
1730 set_bit(REISERFS_3_6, &(sbi->s_properties)); 1844 if (is_reiserfs_3_5(rs)
1731 1845 || (is_reiserfs_jr(rs) && SB_VERSION(s) == REISERFS_VERSION_1))
1732 if (!(s->s_flags & MS_RDONLY)) { 1846 set_bit(REISERFS_3_5, &(sbi->s_properties));
1733 1847 else
1734 errval = journal_begin(&th, s, 1) ; 1848 set_bit(REISERFS_3_6, &(sbi->s_properties));
1735 if (errval) { 1849
1736 dput (s->s_root); 1850 if (!(s->s_flags & MS_RDONLY)) {
1737 s->s_root = NULL; 1851
1738 goto error; 1852 errval = journal_begin(&th, s, 1);
1739 } 1853 if (errval) {
1740 reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s), 1) ; 1854 dput(s->s_root);
1741 1855 s->s_root = NULL;
1742 set_sb_umount_state( rs, REISERFS_ERROR_FS ); 1856 goto error;
1743 set_sb_fs_state (rs, 0); 1857 }
1744 1858 reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s), 1);
1745 if (old_format_only(s)) { 1859
1746 /* filesystem of format 3.5 either with standard or non-standard 1860 set_sb_umount_state(rs, REISERFS_ERROR_FS);
1747 journal */ 1861 set_sb_fs_state(rs, 0);
1748 if (convert_reiserfs (s)) { 1862
1749 /* and -o conv is given */ 1863 if (old_format_only(s)) {
1750 if(!silent) 1864 /* filesystem of format 3.5 either with standard or non-standard
1751 reiserfs_info (s,"converting 3.5 filesystem to the 3.6 format") ; 1865 journal */
1752 1866 if (convert_reiserfs(s)) {
1753 if (is_reiserfs_3_5 (rs)) 1867 /* and -o conv is given */
1754 /* put magic string of 3.6 format. 2.2 will not be able to 1868 if (!silent)
1755 mount this filesystem anymore */ 1869 reiserfs_info(s,
1756 memcpy (rs->s_v1.s_magic, reiserfs_3_6_magic_string, 1870 "converting 3.5 filesystem to the 3.6 format");
1757 sizeof (reiserfs_3_6_magic_string)); 1871
1758 1872 if (is_reiserfs_3_5(rs))
1759 set_sb_version(rs,REISERFS_VERSION_2); 1873 /* put magic string of 3.6 format. 2.2 will not be able to
1760 reiserfs_convert_objectid_map_v1(s) ; 1874 mount this filesystem anymore */
1761 set_bit(REISERFS_3_6, &(sbi->s_properties)); 1875 memcpy(rs->s_v1.s_magic,
1762 clear_bit(REISERFS_3_5, &(sbi->s_properties)); 1876 reiserfs_3_6_magic_string,
1763 } else if (!silent){ 1877 sizeof
1764 reiserfs_info (s, "using 3.5.x disk format\n") ; 1878 (reiserfs_3_6_magic_string));
1765 } 1879
1766 } 1880 set_sb_version(rs, REISERFS_VERSION_2);
1767 1881 reiserfs_convert_objectid_map_v1(s);
1768 journal_mark_dirty(&th, s, SB_BUFFER_WITH_SB (s)); 1882 set_bit(REISERFS_3_6, &(sbi->s_properties));
1769 errval = journal_end(&th, s, 1) ; 1883 clear_bit(REISERFS_3_5, &(sbi->s_properties));
1770 if (errval) { 1884 } else if (!silent) {
1771 dput (s->s_root); 1885 reiserfs_info(s, "using 3.5.x disk format\n");
1772 s->s_root = NULL; 1886 }
1773 goto error; 1887 }
1774 } 1888
1775 1889 journal_mark_dirty(&th, s, SB_BUFFER_WITH_SB(s));
1776 if ((errval = reiserfs_xattr_init (s, s->s_flags))) { 1890 errval = journal_end(&th, s, 1);
1777 dput (s->s_root); 1891 if (errval) {
1778 s->s_root = NULL; 1892 dput(s->s_root);
1779 goto error; 1893 s->s_root = NULL;
1780 } 1894 goto error;
1781 1895 }
1782 /* look for files which were to be removed in previous session */ 1896
1783 finish_unfinished (s); 1897 if ((errval = reiserfs_xattr_init(s, s->s_flags))) {
1784 } else { 1898 dput(s->s_root);
1785 if ( old_format_only(s) && !silent) { 1899 s->s_root = NULL;
1786 reiserfs_info (s, "using 3.5.x disk format\n") ; 1900 goto error;
1787 } 1901 }
1788 1902
1789 if ((errval = reiserfs_xattr_init (s, s->s_flags))) { 1903 /* look for files which were to be removed in previous session */
1790 dput (s->s_root); 1904 finish_unfinished(s);
1791 s->s_root = NULL; 1905 } else {
1792 goto error; 1906 if (old_format_only(s) && !silent) {
1793 } 1907 reiserfs_info(s, "using 3.5.x disk format\n");
1794 } 1908 }
1795 // mark hash in super block: it could be unset. overwrite should be ok 1909
1796 set_sb_hash_function_code( rs, function2code(sbi->s_hash_function ) ); 1910 if ((errval = reiserfs_xattr_init(s, s->s_flags))) {
1797 1911 dput(s->s_root);
1798 handle_attrs( s ); 1912 s->s_root = NULL;
1799 1913 goto error;
1800 reiserfs_proc_info_init( s ); 1914 }
1801 1915 }
1802 init_waitqueue_head (&(sbi->s_wait)); 1916 // mark hash in super block: it could be unset. overwrite should be ok
1803 spin_lock_init(&sbi->bitmap_lock); 1917 set_sb_hash_function_code(rs, function2code(sbi->s_hash_function));
1804 1918
1805 return (0); 1919 handle_attrs(s);
1806 1920
1807 error: 1921 reiserfs_proc_info_init(s);
1808 if (jinit_done) { /* kill the commit thread, free journal ram */ 1922
1809 journal_release_error(NULL, s) ; 1923 init_waitqueue_head(&(sbi->s_wait));
1810 } 1924 spin_lock_init(&sbi->bitmap_lock);
1811 if (SB_DISK_SUPER_BLOCK (s)) { 1925
1812 for (j = 0; j < SB_BMAP_NR (s); j ++) { 1926 return (0);
1813 if (SB_AP_BITMAP (s)) 1927
1814 brelse (SB_AP_BITMAP (s)[j].bh); 1928 error:
1815 } 1929 if (jinit_done) { /* kill the commit thread, free journal ram */
1816 if (SB_AP_BITMAP (s)) 1930 journal_release_error(NULL, s);
1817 vfree (SB_AP_BITMAP (s)); 1931 }
1818 } 1932 if (SB_DISK_SUPER_BLOCK(s)) {
1819 if (SB_BUFFER_WITH_SB (s)) 1933 for (j = 0; j < SB_BMAP_NR(s); j++) {
1820 brelse(SB_BUFFER_WITH_SB (s)); 1934 if (SB_AP_BITMAP(s))
1935 brelse(SB_AP_BITMAP(s)[j].bh);
1936 }
1937 if (SB_AP_BITMAP(s))
1938 vfree(SB_AP_BITMAP(s));
1939 }
1940 if (SB_BUFFER_WITH_SB(s))
1941 brelse(SB_BUFFER_WITH_SB(s));
1821#ifdef CONFIG_QUOTA 1942#ifdef CONFIG_QUOTA
1822 for (j = 0; j < MAXQUOTAS; j++) { 1943 for (j = 0; j < MAXQUOTAS; j++) {
1823 if (sbi->s_qf_names[j]) 1944 if (sbi->s_qf_names[j])
1824 kfree(sbi->s_qf_names[j]); 1945 kfree(sbi->s_qf_names[j]);
1825 } 1946 }
1826#endif 1947#endif
1827 if (sbi != NULL) { 1948 if (sbi != NULL) {
1828 kfree(sbi); 1949 kfree(sbi);
1829 } 1950 }
1830 1951
1831 s->s_fs_info = NULL; 1952 s->s_fs_info = NULL;
1832 return errval; 1953 return errval;
1833} 1954}
1834 1955
1835 1956static int reiserfs_statfs(struct super_block *s, struct kstatfs *buf)
1836static int reiserfs_statfs (struct super_block * s, struct kstatfs * buf)
1837{ 1957{
1838 struct reiserfs_super_block * rs = SB_DISK_SUPER_BLOCK (s); 1958 struct reiserfs_super_block *rs = SB_DISK_SUPER_BLOCK(s);
1839 1959
1840 buf->f_namelen = (REISERFS_MAX_NAME (s->s_blocksize)); 1960 buf->f_namelen = (REISERFS_MAX_NAME(s->s_blocksize));
1841 buf->f_bfree = sb_free_blocks(rs); 1961 buf->f_bfree = sb_free_blocks(rs);
1842 buf->f_bavail = buf->f_bfree; 1962 buf->f_bavail = buf->f_bfree;
1843 buf->f_blocks = sb_block_count(rs) - sb_bmap_nr(rs) - 1; 1963 buf->f_blocks = sb_block_count(rs) - sb_bmap_nr(rs) - 1;
1844 buf->f_bsize = s->s_blocksize; 1964 buf->f_bsize = s->s_blocksize;
1845 /* changed to accommodate gcc folks.*/ 1965 /* changed to accommodate gcc folks. */
1846 buf->f_type = REISERFS_SUPER_MAGIC; 1966 buf->f_type = REISERFS_SUPER_MAGIC;
1847 return 0; 1967 return 0;
1848} 1968}
1849 1969
1850#ifdef CONFIG_QUOTA 1970#ifdef CONFIG_QUOTA
1851static int reiserfs_dquot_initialize(struct inode *inode, int type) 1971static int reiserfs_dquot_initialize(struct inode *inode, int type)
1852{ 1972{
1853 struct reiserfs_transaction_handle th; 1973 struct reiserfs_transaction_handle th;
1854 int ret, err; 1974 int ret, err;
1855 1975
1856 /* We may create quota structure so we need to reserve enough blocks */ 1976 /* We may create quota structure so we need to reserve enough blocks */
1857 reiserfs_write_lock(inode->i_sb); 1977 reiserfs_write_lock(inode->i_sb);
1858 ret = journal_begin(&th, inode->i_sb, 2*REISERFS_QUOTA_INIT_BLOCKS(inode->i_sb)); 1978 ret =
1859 if (ret) 1979 journal_begin(&th, inode->i_sb,
1860 goto out; 1980 2 * REISERFS_QUOTA_INIT_BLOCKS(inode->i_sb));
1861 ret = dquot_initialize(inode, type); 1981 if (ret)
1862 err = journal_end(&th, inode->i_sb, 2*REISERFS_QUOTA_INIT_BLOCKS(inode->i_sb)); 1982 goto out;
1863 if (!ret && err) 1983 ret = dquot_initialize(inode, type);
1864 ret = err; 1984 err =
1865out: 1985 journal_end(&th, inode->i_sb,
1866 reiserfs_write_unlock(inode->i_sb); 1986 2 * REISERFS_QUOTA_INIT_BLOCKS(inode->i_sb));
1867 return ret; 1987 if (!ret && err)
1988 ret = err;
1989 out:
1990 reiserfs_write_unlock(inode->i_sb);
1991 return ret;
1868} 1992}
1869 1993
1870static int reiserfs_dquot_drop(struct inode *inode) 1994static int reiserfs_dquot_drop(struct inode *inode)
1871{ 1995{
1872 struct reiserfs_transaction_handle th; 1996 struct reiserfs_transaction_handle th;
1873 int ret, err; 1997 int ret, err;
1874 1998
1875 /* We may delete quota structure so we need to reserve enough blocks */ 1999 /* We may delete quota structure so we need to reserve enough blocks */
1876 reiserfs_write_lock(inode->i_sb); 2000 reiserfs_write_lock(inode->i_sb);
1877 ret = journal_begin(&th, inode->i_sb, 2*REISERFS_QUOTA_DEL_BLOCKS(inode->i_sb)); 2001 ret =
1878 if (ret) 2002 journal_begin(&th, inode->i_sb,
1879 goto out; 2003 2 * REISERFS_QUOTA_DEL_BLOCKS(inode->i_sb));
1880 ret = dquot_drop(inode); 2004 if (ret)
1881 err = journal_end(&th, inode->i_sb, 2*REISERFS_QUOTA_DEL_BLOCKS(inode->i_sb)); 2005 goto out;
1882 if (!ret && err) 2006 ret = dquot_drop(inode);
1883 ret = err; 2007 err =
1884out: 2008 journal_end(&th, inode->i_sb,
1885 reiserfs_write_unlock(inode->i_sb); 2009 2 * REISERFS_QUOTA_DEL_BLOCKS(inode->i_sb));
1886 return ret; 2010 if (!ret && err)
2011 ret = err;
2012 out:
2013 reiserfs_write_unlock(inode->i_sb);
2014 return ret;
1887} 2015}
1888 2016
1889static int reiserfs_write_dquot(struct dquot *dquot) 2017static int reiserfs_write_dquot(struct dquot *dquot)
1890{ 2018{
1891 struct reiserfs_transaction_handle th; 2019 struct reiserfs_transaction_handle th;
1892 int ret, err; 2020 int ret, err;
1893 2021
1894 reiserfs_write_lock(dquot->dq_sb); 2022 reiserfs_write_lock(dquot->dq_sb);
1895 ret = journal_begin(&th, dquot->dq_sb, REISERFS_QUOTA_TRANS_BLOCKS(dquot->dq_sb)); 2023 ret =
1896 if (ret) 2024 journal_begin(&th, dquot->dq_sb,
1897 goto out; 2025 REISERFS_QUOTA_TRANS_BLOCKS(dquot->dq_sb));
1898 ret = dquot_commit(dquot); 2026 if (ret)
1899 err = journal_end(&th, dquot->dq_sb, REISERFS_QUOTA_TRANS_BLOCKS(dquot->dq_sb)); 2027 goto out;
1900 if (!ret && err) 2028 ret = dquot_commit(dquot);
1901 ret = err; 2029 err =
1902out: 2030 journal_end(&th, dquot->dq_sb,
1903 reiserfs_write_unlock(dquot->dq_sb); 2031 REISERFS_QUOTA_TRANS_BLOCKS(dquot->dq_sb));
1904 return ret; 2032 if (!ret && err)
2033 ret = err;
2034 out:
2035 reiserfs_write_unlock(dquot->dq_sb);
2036 return ret;
1905} 2037}
1906 2038
1907static int reiserfs_acquire_dquot(struct dquot *dquot) 2039static int reiserfs_acquire_dquot(struct dquot *dquot)
1908{ 2040{
1909 struct reiserfs_transaction_handle th; 2041 struct reiserfs_transaction_handle th;
1910 int ret, err; 2042 int ret, err;
1911 2043
1912 reiserfs_write_lock(dquot->dq_sb); 2044 reiserfs_write_lock(dquot->dq_sb);
1913 ret = journal_begin(&th, dquot->dq_sb, REISERFS_QUOTA_INIT_BLOCKS(dquot->dq_sb)); 2045 ret =
1914 if (ret) 2046 journal_begin(&th, dquot->dq_sb,
1915 goto out; 2047 REISERFS_QUOTA_INIT_BLOCKS(dquot->dq_sb));
1916 ret = dquot_acquire(dquot); 2048 if (ret)
1917 err = journal_end(&th, dquot->dq_sb, REISERFS_QUOTA_INIT_BLOCKS(dquot->dq_sb)); 2049 goto out;
1918 if (!ret && err) 2050 ret = dquot_acquire(dquot);
1919 ret = err; 2051 err =
1920out: 2052 journal_end(&th, dquot->dq_sb,
1921 reiserfs_write_unlock(dquot->dq_sb); 2053 REISERFS_QUOTA_INIT_BLOCKS(dquot->dq_sb));
1922 return ret; 2054 if (!ret && err)
2055 ret = err;
2056 out:
2057 reiserfs_write_unlock(dquot->dq_sb);
2058 return ret;
1923} 2059}
1924 2060
1925static int reiserfs_release_dquot(struct dquot *dquot) 2061static int reiserfs_release_dquot(struct dquot *dquot)
1926{ 2062{
1927 struct reiserfs_transaction_handle th; 2063 struct reiserfs_transaction_handle th;
1928 int ret, err; 2064 int ret, err;
1929 2065
1930 reiserfs_write_lock(dquot->dq_sb); 2066 reiserfs_write_lock(dquot->dq_sb);
1931 ret = journal_begin(&th, dquot->dq_sb, REISERFS_QUOTA_DEL_BLOCKS(dquot->dq_sb)); 2067 ret =
1932 if (ret) 2068 journal_begin(&th, dquot->dq_sb,
1933 goto out; 2069 REISERFS_QUOTA_DEL_BLOCKS(dquot->dq_sb));
1934 ret = dquot_release(dquot); 2070 if (ret)
1935 err = journal_end(&th, dquot->dq_sb, REISERFS_QUOTA_DEL_BLOCKS(dquot->dq_sb)); 2071 goto out;
1936 if (!ret && err) 2072 ret = dquot_release(dquot);
1937 ret = err; 2073 err =
1938out: 2074 journal_end(&th, dquot->dq_sb,
1939 reiserfs_write_unlock(dquot->dq_sb); 2075 REISERFS_QUOTA_DEL_BLOCKS(dquot->dq_sb));
1940 return ret; 2076 if (!ret && err)
2077 ret = err;
2078 out:
2079 reiserfs_write_unlock(dquot->dq_sb);
2080 return ret;
1941} 2081}
1942 2082
1943static int reiserfs_mark_dquot_dirty(struct dquot *dquot) 2083static int reiserfs_mark_dquot_dirty(struct dquot *dquot)
1944{ 2084{
1945 /* Are we journalling quotas? */ 2085 /* Are we journalling quotas? */
1946 if (REISERFS_SB(dquot->dq_sb)->s_qf_names[USRQUOTA] || 2086 if (REISERFS_SB(dquot->dq_sb)->s_qf_names[USRQUOTA] ||
1947 REISERFS_SB(dquot->dq_sb)->s_qf_names[GRPQUOTA]) { 2087 REISERFS_SB(dquot->dq_sb)->s_qf_names[GRPQUOTA]) {
1948 dquot_mark_dquot_dirty(dquot); 2088 dquot_mark_dquot_dirty(dquot);
1949 return reiserfs_write_dquot(dquot); 2089 return reiserfs_write_dquot(dquot);
1950 } 2090 } else
1951 else 2091 return dquot_mark_dquot_dirty(dquot);
1952 return dquot_mark_dquot_dirty(dquot);
1953} 2092}
1954 2093
1955static int reiserfs_write_info(struct super_block *sb, int type) 2094static int reiserfs_write_info(struct super_block *sb, int type)
1956{ 2095{
1957 struct reiserfs_transaction_handle th; 2096 struct reiserfs_transaction_handle th;
1958 int ret, err; 2097 int ret, err;
1959 2098
1960 /* Data block + inode block */ 2099 /* Data block + inode block */
1961 reiserfs_write_lock(sb); 2100 reiserfs_write_lock(sb);
1962 ret = journal_begin(&th, sb, 2); 2101 ret = journal_begin(&th, sb, 2);
1963 if (ret) 2102 if (ret)
1964 goto out; 2103 goto out;
1965 ret = dquot_commit_info(sb, type); 2104 ret = dquot_commit_info(sb, type);
1966 err = journal_end(&th, sb, 2); 2105 err = journal_end(&th, sb, 2);
1967 if (!ret && err) 2106 if (!ret && err)
1968 ret = err; 2107 ret = err;
1969out: 2108 out:
1970 reiserfs_write_unlock(sb); 2109 reiserfs_write_unlock(sb);
1971 return ret; 2110 return ret;
1972} 2111}
1973 2112
1974/* 2113/*
@@ -1977,45 +2116,48 @@ out:
1977static int reiserfs_quota_on_mount(struct super_block *sb, int type) 2116static int reiserfs_quota_on_mount(struct super_block *sb, int type)
1978{ 2117{
1979 return vfs_quota_on_mount(sb, REISERFS_SB(sb)->s_qf_names[type], 2118 return vfs_quota_on_mount(sb, REISERFS_SB(sb)->s_qf_names[type],
1980 REISERFS_SB(sb)->s_jquota_fmt, type); 2119 REISERFS_SB(sb)->s_jquota_fmt, type);
1981} 2120}
1982 2121
1983/* 2122/*
1984 * Standard function to be called on quota_on 2123 * Standard function to be called on quota_on
1985 */ 2124 */
1986static int reiserfs_quota_on(struct super_block *sb, int type, int format_id, char *path) 2125static int reiserfs_quota_on(struct super_block *sb, int type, int format_id,
2126 char *path)
1987{ 2127{
1988 int err; 2128 int err;
1989 struct nameidata nd; 2129 struct nameidata nd;
1990 2130
1991 if (!(REISERFS_SB(sb)->s_mount_opt & (1<<REISERFS_QUOTA))) 2131 if (!(REISERFS_SB(sb)->s_mount_opt & (1 << REISERFS_QUOTA)))
1992 return -EINVAL; 2132 return -EINVAL;
1993 err = path_lookup(path, LOOKUP_FOLLOW, &nd); 2133 err = path_lookup(path, LOOKUP_FOLLOW, &nd);
1994 if (err) 2134 if (err)
1995 return err; 2135 return err;
1996 /* Quotafile not on the same filesystem? */ 2136 /* Quotafile not on the same filesystem? */
1997 if (nd.mnt->mnt_sb != sb) { 2137 if (nd.mnt->mnt_sb != sb) {
1998 path_release(&nd); 2138 path_release(&nd);
1999 return -EXDEV; 2139 return -EXDEV;
2000 } 2140 }
2001 /* We must not pack tails for quota files on reiserfs for quota IO to work */ 2141 /* We must not pack tails for quota files on reiserfs for quota IO to work */
2002 if (!REISERFS_I(nd.dentry->d_inode)->i_flags & i_nopack_mask) { 2142 if (!REISERFS_I(nd.dentry->d_inode)->i_flags & i_nopack_mask) {
2003 reiserfs_warning(sb, "reiserfs: Quota file must have tail packing disabled."); 2143 reiserfs_warning(sb,
2004 path_release(&nd); 2144 "reiserfs: Quota file must have tail packing disabled.");
2005 return -EINVAL; 2145 path_release(&nd);
2006 } 2146 return -EINVAL;
2007 /* Not journalling quota? No more tests needed... */ 2147 }
2008 if (!REISERFS_SB(sb)->s_qf_names[USRQUOTA] && 2148 /* Not journalling quota? No more tests needed... */
2009 !REISERFS_SB(sb)->s_qf_names[GRPQUOTA]) { 2149 if (!REISERFS_SB(sb)->s_qf_names[USRQUOTA] &&
2150 !REISERFS_SB(sb)->s_qf_names[GRPQUOTA]) {
2151 path_release(&nd);
2152 return vfs_quota_on(sb, type, format_id, path);
2153 }
2154 /* Quotafile not of fs root? */
2155 if (nd.dentry->d_parent->d_inode != sb->s_root->d_inode)
2156 reiserfs_warning(sb,
2157 "reiserfs: Quota file not on filesystem root. "
2158 "Journalled quota will not work.");
2010 path_release(&nd); 2159 path_release(&nd);
2011 return vfs_quota_on(sb, type, format_id, path); 2160 return vfs_quota_on(sb, type, format_id, path);
2012 }
2013 /* Quotafile not of fs root? */
2014 if (nd.dentry->d_parent->d_inode != sb->s_root->d_inode)
2015 reiserfs_warning(sb, "reiserfs: Quota file not on filesystem root. "
2016 "Journalled quota will not work.");
2017 path_release(&nd);
2018 return vfs_quota_on(sb, type, format_id, path);
2019} 2161}
2020 2162
2021/* Read data from quotafile - avoid pagecache and such because we cannot afford 2163/* Read data from quotafile - avoid pagecache and such because we cannot afford
@@ -2025,42 +2167,44 @@ static int reiserfs_quota_on(struct super_block *sb, int type, int format_id, ch
2025static ssize_t reiserfs_quota_read(struct super_block *sb, int type, char *data, 2167static ssize_t reiserfs_quota_read(struct super_block *sb, int type, char *data,
2026 size_t len, loff_t off) 2168 size_t len, loff_t off)
2027{ 2169{
2028 struct inode *inode = sb_dqopt(sb)->files[type]; 2170 struct inode *inode = sb_dqopt(sb)->files[type];
2029 unsigned long blk = off >> sb->s_blocksize_bits; 2171 unsigned long blk = off >> sb->s_blocksize_bits;
2030 int err = 0, offset = off & (sb->s_blocksize - 1), tocopy; 2172 int err = 0, offset = off & (sb->s_blocksize - 1), tocopy;
2031 size_t toread; 2173 size_t toread;
2032 struct buffer_head tmp_bh, *bh; 2174 struct buffer_head tmp_bh, *bh;
2033 loff_t i_size = i_size_read(inode); 2175 loff_t i_size = i_size_read(inode);
2034 2176
2035 if (off > i_size) 2177 if (off > i_size)
2036 return 0; 2178 return 0;
2037 if (off+len > i_size) 2179 if (off + len > i_size)
2038 len = i_size-off; 2180 len = i_size - off;
2039 toread = len; 2181 toread = len;
2040 while (toread > 0) { 2182 while (toread > 0) {
2041 tocopy = sb->s_blocksize - offset < toread ? sb->s_blocksize - offset : toread; 2183 tocopy =
2042 tmp_bh.b_state = 0; 2184 sb->s_blocksize - offset <
2043 /* Quota files are without tails so we can safely use this function */ 2185 toread ? sb->s_blocksize - offset : toread;
2044 reiserfs_write_lock(sb); 2186 tmp_bh.b_state = 0;
2045 err = reiserfs_get_block(inode, blk, &tmp_bh, 0); 2187 /* Quota files are without tails so we can safely use this function */
2046 reiserfs_write_unlock(sb); 2188 reiserfs_write_lock(sb);
2047 if (err) 2189 err = reiserfs_get_block(inode, blk, &tmp_bh, 0);
2048 return err; 2190 reiserfs_write_unlock(sb);
2049 if (!buffer_mapped(&tmp_bh)) /* A hole? */ 2191 if (err)
2050 memset(data, 0, tocopy); 2192 return err;
2051 else { 2193 if (!buffer_mapped(&tmp_bh)) /* A hole? */
2052 bh = sb_bread(sb, tmp_bh.b_blocknr); 2194 memset(data, 0, tocopy);
2053 if (!bh) 2195 else {
2054 return -EIO; 2196 bh = sb_bread(sb, tmp_bh.b_blocknr);
2055 memcpy(data, bh->b_data+offset, tocopy); 2197 if (!bh)
2056 brelse(bh); 2198 return -EIO;
2057 } 2199 memcpy(data, bh->b_data + offset, tocopy);
2058 offset = 0; 2200 brelse(bh);
2059 toread -= tocopy; 2201 }
2060 data += tocopy; 2202 offset = 0;
2061 blk++; 2203 toread -= tocopy;
2062 } 2204 data += tocopy;
2063 return len; 2205 blk++;
2206 }
2207 return len;
2064} 2208}
2065 2209
2066/* Write to quotafile (we know the transaction is already started and has 2210/* Write to quotafile (we know the transaction is already started and has
@@ -2068,117 +2212,116 @@ static ssize_t reiserfs_quota_read(struct super_block *sb, int type, char *data,
2068static ssize_t reiserfs_quota_write(struct super_block *sb, int type, 2212static ssize_t reiserfs_quota_write(struct super_block *sb, int type,
2069 const char *data, size_t len, loff_t off) 2213 const char *data, size_t len, loff_t off)
2070{ 2214{
2071 struct inode *inode = sb_dqopt(sb)->files[type]; 2215 struct inode *inode = sb_dqopt(sb)->files[type];
2072 unsigned long blk = off >> sb->s_blocksize_bits; 2216 unsigned long blk = off >> sb->s_blocksize_bits;
2073 int err = 0, offset = off & (sb->s_blocksize - 1), tocopy; 2217 int err = 0, offset = off & (sb->s_blocksize - 1), tocopy;
2074 int journal_quota = REISERFS_SB(sb)->s_qf_names[type] != NULL; 2218 int journal_quota = REISERFS_SB(sb)->s_qf_names[type] != NULL;
2075 size_t towrite = len; 2219 size_t towrite = len;
2076 struct buffer_head tmp_bh, *bh; 2220 struct buffer_head tmp_bh, *bh;
2077 2221
2078 down(&inode->i_sem); 2222 down(&inode->i_sem);
2079 while (towrite > 0) { 2223 while (towrite > 0) {
2080 tocopy = sb->s_blocksize - offset < towrite ? 2224 tocopy = sb->s_blocksize - offset < towrite ?
2081 sb->s_blocksize - offset : towrite; 2225 sb->s_blocksize - offset : towrite;
2082 tmp_bh.b_state = 0; 2226 tmp_bh.b_state = 0;
2083 err = reiserfs_get_block(inode, blk, &tmp_bh, GET_BLOCK_CREATE); 2227 err = reiserfs_get_block(inode, blk, &tmp_bh, GET_BLOCK_CREATE);
2084 if (err) 2228 if (err)
2085 goto out; 2229 goto out;
2086 if (offset || tocopy != sb->s_blocksize) 2230 if (offset || tocopy != sb->s_blocksize)
2087 bh = sb_bread(sb, tmp_bh.b_blocknr); 2231 bh = sb_bread(sb, tmp_bh.b_blocknr);
2088 else 2232 else
2089 bh = sb_getblk(sb, tmp_bh.b_blocknr); 2233 bh = sb_getblk(sb, tmp_bh.b_blocknr);
2090 if (!bh) { 2234 if (!bh) {
2091 err = -EIO; 2235 err = -EIO;
2092 goto out; 2236 goto out;
2093 } 2237 }
2094 lock_buffer(bh); 2238 lock_buffer(bh);
2095 memcpy(bh->b_data+offset, data, tocopy); 2239 memcpy(bh->b_data + offset, data, tocopy);
2096 flush_dcache_page(bh->b_page); 2240 flush_dcache_page(bh->b_page);
2097 set_buffer_uptodate(bh); 2241 set_buffer_uptodate(bh);
2098 unlock_buffer(bh); 2242 unlock_buffer(bh);
2099 reiserfs_prepare_for_journal(sb, bh, 1); 2243 reiserfs_prepare_for_journal(sb, bh, 1);
2100 journal_mark_dirty(current->journal_info, sb, bh); 2244 journal_mark_dirty(current->journal_info, sb, bh);
2101 if (!journal_quota) 2245 if (!journal_quota)
2102 reiserfs_add_ordered_list(inode, bh); 2246 reiserfs_add_ordered_list(inode, bh);
2103 brelse(bh); 2247 brelse(bh);
2104 offset = 0; 2248 offset = 0;
2105 towrite -= tocopy; 2249 towrite -= tocopy;
2106 data += tocopy; 2250 data += tocopy;
2107 blk++; 2251 blk++;
2108 } 2252 }
2109out: 2253 out:
2110 if (len == towrite) 2254 if (len == towrite)
2111 return err; 2255 return err;
2112 if (inode->i_size < off+len-towrite) 2256 if (inode->i_size < off + len - towrite)
2113 i_size_write(inode, off+len-towrite); 2257 i_size_write(inode, off + len - towrite);
2114 inode->i_version++; 2258 inode->i_version++;
2115 inode->i_mtime = inode->i_ctime = CURRENT_TIME; 2259 inode->i_mtime = inode->i_ctime = CURRENT_TIME;
2116 mark_inode_dirty(inode); 2260 mark_inode_dirty(inode);
2117 up(&inode->i_sem); 2261 up(&inode->i_sem);
2118 return len - towrite; 2262 return len - towrite;
2119} 2263}
2120 2264
2121#endif 2265#endif
2122 2266
2123static struct super_block* 2267static struct super_block *get_super_block(struct file_system_type *fs_type,
2124get_super_block (struct file_system_type *fs_type, int flags, 2268 int flags, const char *dev_name,
2125 const char *dev_name, void *data) 2269 void *data)
2126{ 2270{
2127 return get_sb_bdev(fs_type, flags, dev_name, data, reiserfs_fill_super); 2271 return get_sb_bdev(fs_type, flags, dev_name, data, reiserfs_fill_super);
2128} 2272}
2129 2273
2130static int __init 2274static int __init init_reiserfs_fs(void)
2131init_reiserfs_fs ( void )
2132{ 2275{
2133 int ret; 2276 int ret;
2134 2277
2135 if ((ret = init_inodecache ())) { 2278 if ((ret = init_inodecache())) {
2136 return ret; 2279 return ret;
2137 } 2280 }
2138 2281
2139 if ((ret = reiserfs_xattr_register_handlers ())) 2282 if ((ret = reiserfs_xattr_register_handlers()))
2140 goto failed_reiserfs_xattr_register_handlers; 2283 goto failed_reiserfs_xattr_register_handlers;
2141 2284
2142 reiserfs_proc_info_global_init (); 2285 reiserfs_proc_info_global_init();
2143 reiserfs_proc_register_global ("version", reiserfs_global_version_in_proc); 2286 reiserfs_proc_register_global("version",
2287 reiserfs_global_version_in_proc);
2144 2288
2145 ret = register_filesystem (& reiserfs_fs_type); 2289 ret = register_filesystem(&reiserfs_fs_type);
2146 2290
2147 if (ret == 0) { 2291 if (ret == 0) {
2148 return 0; 2292 return 0;
2149 } 2293 }
2150 2294
2151 reiserfs_xattr_unregister_handlers (); 2295 reiserfs_xattr_unregister_handlers();
2152 2296
2153failed_reiserfs_xattr_register_handlers: 2297 failed_reiserfs_xattr_register_handlers:
2154 reiserfs_proc_unregister_global ("version"); 2298 reiserfs_proc_unregister_global("version");
2155 reiserfs_proc_info_global_done (); 2299 reiserfs_proc_info_global_done();
2156 destroy_inodecache (); 2300 destroy_inodecache();
2157 2301
2158 return ret; 2302 return ret;
2159} 2303}
2160 2304
2161static void __exit 2305static void __exit exit_reiserfs_fs(void)
2162exit_reiserfs_fs ( void )
2163{ 2306{
2164 reiserfs_xattr_unregister_handlers (); 2307 reiserfs_xattr_unregister_handlers();
2165 reiserfs_proc_unregister_global ("version"); 2308 reiserfs_proc_unregister_global("version");
2166 reiserfs_proc_info_global_done (); 2309 reiserfs_proc_info_global_done();
2167 unregister_filesystem (& reiserfs_fs_type); 2310 unregister_filesystem(&reiserfs_fs_type);
2168 destroy_inodecache (); 2311 destroy_inodecache();
2169} 2312}
2170 2313
2171struct file_system_type reiserfs_fs_type = { 2314struct file_system_type reiserfs_fs_type = {
2172 .owner = THIS_MODULE, 2315 .owner = THIS_MODULE,
2173 .name = "reiserfs", 2316 .name = "reiserfs",
2174 .get_sb = get_super_block, 2317 .get_sb = get_super_block,
2175 .kill_sb = kill_block_super, 2318 .kill_sb = kill_block_super,
2176 .fs_flags = FS_REQUIRES_DEV, 2319 .fs_flags = FS_REQUIRES_DEV,
2177}; 2320};
2178 2321
2179MODULE_DESCRIPTION ("ReiserFS journaled filesystem"); 2322MODULE_DESCRIPTION("ReiserFS journaled filesystem");
2180MODULE_AUTHOR ("Hans Reiser <reiser@namesys.com>"); 2323MODULE_AUTHOR("Hans Reiser <reiser@namesys.com>");
2181MODULE_LICENSE ("GPL"); 2324MODULE_LICENSE("GPL");
2182 2325
2183module_init (init_reiserfs_fs); 2326module_init(init_reiserfs_fs);
2184module_exit (exit_reiserfs_fs); 2327module_exit(exit_reiserfs_fs);
diff --git a/fs/reiserfs/tail_conversion.c b/fs/reiserfs/tail_conversion.c
index 6191909d516..c92e124f628 100644
--- a/fs/reiserfs/tail_conversion.c
+++ b/fs/reiserfs/tail_conversion.c
@@ -11,156 +11,159 @@
11/* access to tail : when one is going to read tail it must make sure, that is not running. 11/* access to tail : when one is going to read tail it must make sure, that is not running.
12 direct2indirect and indirect2direct can not run concurrently */ 12 direct2indirect and indirect2direct can not run concurrently */
13 13
14
15/* Converts direct items to an unformatted node. Panics if file has no 14/* Converts direct items to an unformatted node. Panics if file has no
16 tail. -ENOSPC if no disk space for conversion */ 15 tail. -ENOSPC if no disk space for conversion */
17/* path points to first direct item of the file regarless of how many of 16/* path points to first direct item of the file regarless of how many of
18 them are there */ 17 them are there */
19int direct2indirect (struct reiserfs_transaction_handle *th, struct inode * inode, 18int direct2indirect(struct reiserfs_transaction_handle *th, struct inode *inode,
20 struct path * path, struct buffer_head * unbh, 19 struct path *path, struct buffer_head *unbh,
21 loff_t tail_offset) 20 loff_t tail_offset)
22{ 21{
23 struct super_block * sb = inode->i_sb; 22 struct super_block *sb = inode->i_sb;
24 struct buffer_head *up_to_date_bh ; 23 struct buffer_head *up_to_date_bh;
25 struct item_head * p_le_ih = PATH_PITEM_HEAD (path); 24 struct item_head *p_le_ih = PATH_PITEM_HEAD(path);
26 unsigned long total_tail = 0 ; 25 unsigned long total_tail = 0;
27 struct cpu_key end_key; /* Key to search for the last byte of the 26 struct cpu_key end_key; /* Key to search for the last byte of the
28 converted item. */ 27 converted item. */
29 struct item_head ind_ih; /* new indirect item to be inserted or 28 struct item_head ind_ih; /* new indirect item to be inserted or
30 key of unfm pointer to be pasted */ 29 key of unfm pointer to be pasted */
31 int n_blk_size, 30 int n_blk_size, n_retval; /* returned value for reiserfs_insert_item and clones */
32 n_retval; /* returned value for reiserfs_insert_item and clones */ 31 unp_t unfm_ptr; /* Handle on an unformatted node
33 unp_t unfm_ptr; /* Handle on an unformatted node 32 that will be inserted in the
34 that will be inserted in the 33 tree. */
35 tree. */ 34
36 35 BUG_ON(!th->t_trans_id);
37 BUG_ON (!th->t_trans_id); 36
38 37 REISERFS_SB(sb)->s_direct2indirect++;
39 REISERFS_SB(sb)->s_direct2indirect ++; 38
40 39 n_blk_size = sb->s_blocksize;
41 n_blk_size = sb->s_blocksize; 40
42 41 /* and key to search for append or insert pointer to the new
43 /* and key to search for append or insert pointer to the new 42 unformatted node. */
44 unformatted node. */ 43 copy_item_head(&ind_ih, p_le_ih);
45 copy_item_head (&ind_ih, p_le_ih); 44 set_le_ih_k_offset(&ind_ih, tail_offset);
46 set_le_ih_k_offset (&ind_ih, tail_offset); 45 set_le_ih_k_type(&ind_ih, TYPE_INDIRECT);
47 set_le_ih_k_type (&ind_ih, TYPE_INDIRECT); 46
48 47 /* Set the key to search for the place for new unfm pointer */
49 /* Set the key to search for the place for new unfm pointer */ 48 make_cpu_key(&end_key, inode, tail_offset, TYPE_INDIRECT, 4);
50 make_cpu_key (&end_key, inode, tail_offset, TYPE_INDIRECT, 4); 49
51 50 // FIXME: we could avoid this
52 // FIXME: we could avoid this 51 if (search_for_position_by_key(sb, &end_key, path) == POSITION_FOUND) {
53 if ( search_for_position_by_key (sb, &end_key, path) == POSITION_FOUND ) { 52 reiserfs_warning(sb, "PAP-14030: direct2indirect: "
54 reiserfs_warning (sb, "PAP-14030: direct2indirect: " 53 "pasted or inserted byte exists in the tree %K. "
55 "pasted or inserted byte exists in the tree %K. " 54 "Use fsck to repair.", &end_key);
56 "Use fsck to repair.", &end_key); 55 pathrelse(path);
57 pathrelse(path); 56 return -EIO;
58 return -EIO; 57 }
59 } 58
60 59 p_le_ih = PATH_PITEM_HEAD(path);
61 p_le_ih = PATH_PITEM_HEAD (path); 60
62 61 unfm_ptr = cpu_to_le32(unbh->b_blocknr);
63 unfm_ptr = cpu_to_le32 (unbh->b_blocknr); 62
64 63 if (is_statdata_le_ih(p_le_ih)) {
65 if ( is_statdata_le_ih (p_le_ih) ) { 64 /* Insert new indirect item. */
66 /* Insert new indirect item. */ 65 set_ih_free_space(&ind_ih, 0); /* delete at nearest future */
67 set_ih_free_space (&ind_ih, 0); /* delete at nearest future */ 66 put_ih_item_len(&ind_ih, UNFM_P_SIZE);
68 put_ih_item_len( &ind_ih, UNFM_P_SIZE ); 67 PATH_LAST_POSITION(path)++;
69 PATH_LAST_POSITION (path)++; 68 n_retval =
70 n_retval = reiserfs_insert_item (th, path, &end_key, &ind_ih, inode, 69 reiserfs_insert_item(th, path, &end_key, &ind_ih, inode,
71 (char *)&unfm_ptr); 70 (char *)&unfm_ptr);
72 } else {
73 /* Paste into last indirect item of an object. */
74 n_retval = reiserfs_paste_into_item(th, path, &end_key, inode,
75 (char *)&unfm_ptr, UNFM_P_SIZE);
76 }
77 if ( n_retval ) {
78 return n_retval;
79 }
80
81 // note: from here there are two keys which have matching first
82 // three key components. They only differ by the fourth one.
83
84
85 /* Set the key to search for the direct items of the file */
86 make_cpu_key (&end_key, inode, max_reiserfs_offset (inode), TYPE_DIRECT, 4);
87
88 /* Move bytes from the direct items to the new unformatted node
89 and delete them. */
90 while (1) {
91 int tail_size;
92
93 /* end_key.k_offset is set so, that we will always have found
94 last item of the file */
95 if ( search_for_position_by_key (sb, &end_key, path) == POSITION_FOUND )
96 reiserfs_panic (sb, "PAP-14050: direct2indirect: "
97 "direct item (%K) not found", &end_key);
98 p_le_ih = PATH_PITEM_HEAD (path);
99 RFALSE( !is_direct_le_ih (p_le_ih),
100 "vs-14055: direct item expected(%K), found %h",
101 &end_key, p_le_ih);
102 tail_size = (le_ih_k_offset (p_le_ih) & (n_blk_size - 1))
103 + ih_item_len(p_le_ih) - 1;
104
105 /* we only send the unbh pointer if the buffer is not up to date.
106 ** this avoids overwriting good data from writepage() with old data
107 ** from the disk or buffer cache
108 ** Special case: unbh->b_page will be NULL if we are coming through
109 ** DIRECT_IO handler here.
110 */
111 if (!unbh->b_page || buffer_uptodate(unbh) || PageUptodate(unbh->b_page)) {
112 up_to_date_bh = NULL ;
113 } else { 71 } else {
114 up_to_date_bh = unbh ; 72 /* Paste into last indirect item of an object. */
73 n_retval = reiserfs_paste_into_item(th, path, &end_key, inode,
74 (char *)&unfm_ptr,
75 UNFM_P_SIZE);
115 } 76 }
116 n_retval = reiserfs_delete_item (th, path, &end_key, inode, 77 if (n_retval) {
117 up_to_date_bh) ; 78 return n_retval;
118 79 }
119 total_tail += n_retval ; 80 // note: from here there are two keys which have matching first
120 if (tail_size == n_retval) 81 // three key components. They only differ by the fourth one.
121 // done: file does not have direct items anymore 82
122 break; 83 /* Set the key to search for the direct items of the file */
123 84 make_cpu_key(&end_key, inode, max_reiserfs_offset(inode), TYPE_DIRECT,
124 } 85 4);
125 /* if we've copied bytes from disk into the page, we need to zero 86
126 ** out the unused part of the block (it was not up to date before) 87 /* Move bytes from the direct items to the new unformatted node
127 */ 88 and delete them. */
128 if (up_to_date_bh) { 89 while (1) {
129 unsigned pgoff = (tail_offset + total_tail - 1) & (PAGE_CACHE_SIZE - 1); 90 int tail_size;
130 char *kaddr=kmap_atomic(up_to_date_bh->b_page, KM_USER0); 91
131 memset(kaddr + pgoff, 0, n_blk_size - total_tail) ; 92 /* end_key.k_offset is set so, that we will always have found
132 kunmap_atomic(kaddr, KM_USER0); 93 last item of the file */
133 } 94 if (search_for_position_by_key(sb, &end_key, path) ==
134 95 POSITION_FOUND)
135 REISERFS_I(inode)->i_first_direct_byte = U32_MAX; 96 reiserfs_panic(sb,
136 97 "PAP-14050: direct2indirect: "
137 return 0; 98 "direct item (%K) not found", &end_key);
138} 99 p_le_ih = PATH_PITEM_HEAD(path);
100 RFALSE(!is_direct_le_ih(p_le_ih),
101 "vs-14055: direct item expected(%K), found %h",
102 &end_key, p_le_ih);
103 tail_size = (le_ih_k_offset(p_le_ih) & (n_blk_size - 1))
104 + ih_item_len(p_le_ih) - 1;
105
106 /* we only send the unbh pointer if the buffer is not up to date.
107 ** this avoids overwriting good data from writepage() with old data
108 ** from the disk or buffer cache
109 ** Special case: unbh->b_page will be NULL if we are coming through
110 ** DIRECT_IO handler here.
111 */
112 if (!unbh->b_page || buffer_uptodate(unbh)
113 || PageUptodate(unbh->b_page)) {
114 up_to_date_bh = NULL;
115 } else {
116 up_to_date_bh = unbh;
117 }
118 n_retval = reiserfs_delete_item(th, path, &end_key, inode,
119 up_to_date_bh);
120
121 total_tail += n_retval;
122 if (tail_size == n_retval)
123 // done: file does not have direct items anymore
124 break;
139 125
126 }
127 /* if we've copied bytes from disk into the page, we need to zero
128 ** out the unused part of the block (it was not up to date before)
129 */
130 if (up_to_date_bh) {
131 unsigned pgoff =
132 (tail_offset + total_tail - 1) & (PAGE_CACHE_SIZE - 1);
133 char *kaddr = kmap_atomic(up_to_date_bh->b_page, KM_USER0);
134 memset(kaddr + pgoff, 0, n_blk_size - total_tail);
135 kunmap_atomic(kaddr, KM_USER0);
136 }
137
138 REISERFS_I(inode)->i_first_direct_byte = U32_MAX;
139
140 return 0;
141}
140 142
141/* stolen from fs/buffer.c */ 143/* stolen from fs/buffer.c */
142void reiserfs_unmap_buffer(struct buffer_head *bh) { 144void reiserfs_unmap_buffer(struct buffer_head *bh)
143 lock_buffer(bh) ; 145{
144 if (buffer_journaled(bh) || buffer_journal_dirty(bh)) { 146 lock_buffer(bh);
145 BUG() ; 147 if (buffer_journaled(bh) || buffer_journal_dirty(bh)) {
146 } 148 BUG();
147 clear_buffer_dirty(bh) ; 149 }
148 /* Remove the buffer from whatever list it belongs to. We are mostly 150 clear_buffer_dirty(bh);
149 interested in removing it from per-sb j_dirty_buffers list, to avoid 151 /* Remove the buffer from whatever list it belongs to. We are mostly
150 BUG() on attempt to write not mapped buffer */ 152 interested in removing it from per-sb j_dirty_buffers list, to avoid
151 if ( (!list_empty(&bh->b_assoc_buffers) || bh->b_private) && bh->b_page) { 153 BUG() on attempt to write not mapped buffer */
152 struct inode *inode = bh->b_page->mapping->host; 154 if ((!list_empty(&bh->b_assoc_buffers) || bh->b_private) && bh->b_page) {
153 struct reiserfs_journal *j = SB_JOURNAL(inode->i_sb); 155 struct inode *inode = bh->b_page->mapping->host;
154 spin_lock(&j->j_dirty_buffers_lock); 156 struct reiserfs_journal *j = SB_JOURNAL(inode->i_sb);
155 list_del_init(&bh->b_assoc_buffers); 157 spin_lock(&j->j_dirty_buffers_lock);
156 reiserfs_free_jh(bh); 158 list_del_init(&bh->b_assoc_buffers);
157 spin_unlock(&j->j_dirty_buffers_lock); 159 reiserfs_free_jh(bh);
158 } 160 spin_unlock(&j->j_dirty_buffers_lock);
159 clear_buffer_mapped(bh) ; 161 }
160 clear_buffer_req(bh) ; 162 clear_buffer_mapped(bh);
161 clear_buffer_new(bh); 163 clear_buffer_req(bh);
162 bh->b_bdev = NULL; 164 clear_buffer_new(bh);
163 unlock_buffer(bh) ; 165 bh->b_bdev = NULL;
166 unlock_buffer(bh);
164} 167}
165 168
166/* this first locks inode (neither reads nor sync are permitted), 169/* this first locks inode (neither reads nor sync are permitted),
@@ -169,108 +172,108 @@ void reiserfs_unmap_buffer(struct buffer_head *bh) {
169 what we expect from it (number of cut bytes). But when tail remains 172 what we expect from it (number of cut bytes). But when tail remains
170 in the unformatted node, we set mode to SKIP_BALANCING and unlock 173 in the unformatted node, we set mode to SKIP_BALANCING and unlock
171 inode */ 174 inode */
172int indirect2direct (struct reiserfs_transaction_handle *th, 175int indirect2direct(struct reiserfs_transaction_handle *th, struct inode *p_s_inode, struct page *page, struct path *p_s_path, /* path to the indirect item. */
173 struct inode * p_s_inode, 176 const struct cpu_key *p_s_item_key, /* Key to look for unformatted node pointer to be cut. */
174 struct page *page, 177 loff_t n_new_file_size, /* New file size. */
175 struct path * p_s_path, /* path to the indirect item. */ 178 char *p_c_mode)
176 const struct cpu_key * p_s_item_key, /* Key to look for unformatted node pointer to be cut. */
177 loff_t n_new_file_size, /* New file size. */
178 char * p_c_mode)
179{ 179{
180 struct super_block * p_s_sb = p_s_inode->i_sb; 180 struct super_block *p_s_sb = p_s_inode->i_sb;
181 struct item_head s_ih; 181 struct item_head s_ih;
182 unsigned long n_block_size = p_s_sb->s_blocksize; 182 unsigned long n_block_size = p_s_sb->s_blocksize;
183 char * tail; 183 char *tail;
184 int tail_len, round_tail_len; 184 int tail_len, round_tail_len;
185 loff_t pos, pos1; /* position of first byte of the tail */ 185 loff_t pos, pos1; /* position of first byte of the tail */
186 struct cpu_key key; 186 struct cpu_key key;
187 187
188 BUG_ON (!th->t_trans_id); 188 BUG_ON(!th->t_trans_id);
189 189
190 REISERFS_SB(p_s_sb)->s_indirect2direct ++; 190 REISERFS_SB(p_s_sb)->s_indirect2direct++;
191 191
192 *p_c_mode = M_SKIP_BALANCING; 192 *p_c_mode = M_SKIP_BALANCING;
193 193
194 /* store item head path points to. */ 194 /* store item head path points to. */
195 copy_item_head (&s_ih, PATH_PITEM_HEAD(p_s_path));
196
197 tail_len = (n_new_file_size & (n_block_size - 1));
198 if (get_inode_sd_version (p_s_inode) == STAT_DATA_V2)
199 round_tail_len = ROUND_UP (tail_len);
200 else
201 round_tail_len = tail_len;
202
203 pos = le_ih_k_offset (&s_ih) - 1 + (ih_item_len(&s_ih) / UNFM_P_SIZE - 1) * p_s_sb->s_blocksize;
204 pos1 = pos;
205
206 // we are protected by i_sem. The tail can not disapper, not
207 // append can be done either
208 // we are in truncate or packing tail in file_release
209
210 tail = (char *)kmap(page) ; /* this can schedule */
211
212 if (path_changed (&s_ih, p_s_path)) {
213 /* re-search indirect item */
214 if ( search_for_position_by_key (p_s_sb, p_s_item_key, p_s_path) == POSITION_NOT_FOUND )
215 reiserfs_panic(p_s_sb, "PAP-5520: indirect2direct: "
216 "item to be converted %K does not exist", p_s_item_key);
217 copy_item_head(&s_ih, PATH_PITEM_HEAD(p_s_path)); 195 copy_item_head(&s_ih, PATH_PITEM_HEAD(p_s_path));
196
197 tail_len = (n_new_file_size & (n_block_size - 1));
198 if (get_inode_sd_version(p_s_inode) == STAT_DATA_V2)
199 round_tail_len = ROUND_UP(tail_len);
200 else
201 round_tail_len = tail_len;
202
203 pos =
204 le_ih_k_offset(&s_ih) - 1 + (ih_item_len(&s_ih) / UNFM_P_SIZE -
205 1) * p_s_sb->s_blocksize;
206 pos1 = pos;
207
208 // we are protected by i_sem. The tail can not disapper, not
209 // append can be done either
210 // we are in truncate or packing tail in file_release
211
212 tail = (char *)kmap(page); /* this can schedule */
213
214 if (path_changed(&s_ih, p_s_path)) {
215 /* re-search indirect item */
216 if (search_for_position_by_key(p_s_sb, p_s_item_key, p_s_path)
217 == POSITION_NOT_FOUND)
218 reiserfs_panic(p_s_sb,
219 "PAP-5520: indirect2direct: "
220 "item to be converted %K does not exist",
221 p_s_item_key);
222 copy_item_head(&s_ih, PATH_PITEM_HEAD(p_s_path));
218#ifdef CONFIG_REISERFS_CHECK 223#ifdef CONFIG_REISERFS_CHECK
219 pos = le_ih_k_offset (&s_ih) - 1 + 224 pos = le_ih_k_offset(&s_ih) - 1 +
220 (ih_item_len(&s_ih) / UNFM_P_SIZE - 1) * p_s_sb->s_blocksize; 225 (ih_item_len(&s_ih) / UNFM_P_SIZE -
221 if (pos != pos1) 226 1) * p_s_sb->s_blocksize;
222 reiserfs_panic (p_s_sb, "vs-5530: indirect2direct: " 227 if (pos != pos1)
223 "tail position changed while we were reading it"); 228 reiserfs_panic(p_s_sb, "vs-5530: indirect2direct: "
229 "tail position changed while we were reading it");
224#endif 230#endif
225 } 231 }
226
227
228 /* Set direct item header to insert. */
229 make_le_item_head (&s_ih, NULL, get_inode_item_key_version (p_s_inode), pos1 + 1,
230 TYPE_DIRECT, round_tail_len, 0xffff/*ih_free_space*/);
231
232 /* we want a pointer to the first byte of the tail in the page.
233 ** the page was locked and this part of the page was up to date when
234 ** indirect2direct was called, so we know the bytes are still valid
235 */
236 tail = tail + (pos & (PAGE_CACHE_SIZE - 1)) ;
237
238 PATH_LAST_POSITION(p_s_path)++;
239
240 key = *p_s_item_key;
241 set_cpu_key_k_type (&key, TYPE_DIRECT);
242 key.key_length = 4;
243 /* Insert tail as new direct item in the tree */
244 if ( reiserfs_insert_item(th, p_s_path, &key, &s_ih, p_s_inode,
245 tail ? tail : NULL) < 0 ) {
246 /* No disk memory. So we can not convert last unformatted node
247 to the direct item. In this case we used to adjust
248 indirect items's ih_free_space. Now ih_free_space is not
249 used, it would be ideal to write zeros to corresponding
250 unformatted node. For now i_size is considered as guard for
251 going out of file size */
252 kunmap(page) ;
253 return n_block_size - round_tail_len;
254 }
255 kunmap(page) ;
256
257 /* make sure to get the i_blocks changes from reiserfs_insert_item */
258 reiserfs_update_sd(th, p_s_inode);
259 232
260 // note: we have now the same as in above direct2indirect 233 /* Set direct item header to insert. */
261 // conversion: there are two keys which have matching first three 234 make_le_item_head(&s_ih, NULL, get_inode_item_key_version(p_s_inode),
262 // key components. They only differ by the fouhth one. 235 pos1 + 1, TYPE_DIRECT, round_tail_len,
236 0xffff /*ih_free_space */ );
237
238 /* we want a pointer to the first byte of the tail in the page.
239 ** the page was locked and this part of the page was up to date when
240 ** indirect2direct was called, so we know the bytes are still valid
241 */
242 tail = tail + (pos & (PAGE_CACHE_SIZE - 1));
243
244 PATH_LAST_POSITION(p_s_path)++;
245
246 key = *p_s_item_key;
247 set_cpu_key_k_type(&key, TYPE_DIRECT);
248 key.key_length = 4;
249 /* Insert tail as new direct item in the tree */
250 if (reiserfs_insert_item(th, p_s_path, &key, &s_ih, p_s_inode,
251 tail ? tail : NULL) < 0) {
252 /* No disk memory. So we can not convert last unformatted node
253 to the direct item. In this case we used to adjust
254 indirect items's ih_free_space. Now ih_free_space is not
255 used, it would be ideal to write zeros to corresponding
256 unformatted node. For now i_size is considered as guard for
257 going out of file size */
258 kunmap(page);
259 return n_block_size - round_tail_len;
260 }
261 kunmap(page);
263 262
264 /* We have inserted new direct item and must remove last 263 /* make sure to get the i_blocks changes from reiserfs_insert_item */
265 unformatted node. */ 264 reiserfs_update_sd(th, p_s_inode);
266 *p_c_mode = M_CUT;
267 265
268 /* we store position of first direct item in the in-core inode */ 266 // note: we have now the same as in above direct2indirect
269 //mark_file_with_tail (p_s_inode, pos1 + 1); 267 // conversion: there are two keys which have matching first three
270 REISERFS_I(p_s_inode)->i_first_direct_byte = pos1 + 1; 268 // key components. They only differ by the fouhth one.
271
272 return n_block_size - round_tail_len;
273}
274 269
270 /* We have inserted new direct item and must remove last
271 unformatted node. */
272 *p_c_mode = M_CUT;
275 273
274 /* we store position of first direct item in the in-core inode */
275 //mark_file_with_tail (p_s_inode, pos1 + 1);
276 REISERFS_I(p_s_inode)->i_first_direct_byte = pos1 + 1;
276 277
278 return n_block_size - round_tail_len;
279}
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index 45582fe8b46..e386d3db305 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c
@@ -51,67 +51,68 @@
51#define PRIVROOT_NAME ".reiserfs_priv" 51#define PRIVROOT_NAME ".reiserfs_priv"
52#define XAROOT_NAME "xattrs" 52#define XAROOT_NAME "xattrs"
53 53
54static struct reiserfs_xattr_handler *find_xattr_handler_prefix (const char *prefix); 54static struct reiserfs_xattr_handler *find_xattr_handler_prefix(const char
55 *prefix);
55 56
56static struct dentry * 57static struct dentry *create_xa_root(struct super_block *sb)
57create_xa_root (struct super_block *sb)
58{ 58{
59 struct dentry *privroot = dget (REISERFS_SB(sb)->priv_root); 59 struct dentry *privroot = dget(REISERFS_SB(sb)->priv_root);
60 struct dentry *xaroot; 60 struct dentry *xaroot;
61 61
62 /* This needs to be created at mount-time */ 62 /* This needs to be created at mount-time */
63 if (!privroot) 63 if (!privroot)
64 return ERR_PTR(-EOPNOTSUPP); 64 return ERR_PTR(-EOPNOTSUPP);
65 65
66 xaroot = lookup_one_len (XAROOT_NAME, privroot, strlen (XAROOT_NAME)); 66 xaroot = lookup_one_len(XAROOT_NAME, privroot, strlen(XAROOT_NAME));
67 if (IS_ERR (xaroot)) { 67 if (IS_ERR(xaroot)) {
68 goto out; 68 goto out;
69 } else if (!xaroot->d_inode) { 69 } else if (!xaroot->d_inode) {
70 int err; 70 int err;
71 down (&privroot->d_inode->i_sem); 71 down(&privroot->d_inode->i_sem);
72 err = privroot->d_inode->i_op->mkdir (privroot->d_inode, xaroot, 0700); 72 err =
73 up (&privroot->d_inode->i_sem); 73 privroot->d_inode->i_op->mkdir(privroot->d_inode, xaroot,
74 74 0700);
75 if (err) { 75 up(&privroot->d_inode->i_sem);
76 dput (xaroot); 76
77 dput (privroot); 77 if (err) {
78 return ERR_PTR (err); 78 dput(xaroot);
79 } 79 dput(privroot);
80 REISERFS_SB(sb)->xattr_root = dget (xaroot); 80 return ERR_PTR(err);
81 } 81 }
82 82 REISERFS_SB(sb)->xattr_root = dget(xaroot);
83out: 83 }
84 dput (privroot); 84
85 return xaroot; 85 out:
86 dput(privroot);
87 return xaroot;
86} 88}
87 89
88/* This will return a dentry, or error, refering to the xa root directory. 90/* This will return a dentry, or error, refering to the xa root directory.
89 * If the xa root doesn't exist yet, the dentry will be returned without 91 * If the xa root doesn't exist yet, the dentry will be returned without
90 * an associated inode. This dentry can be used with ->mkdir to create 92 * an associated inode. This dentry can be used with ->mkdir to create
91 * the xa directory. */ 93 * the xa directory. */
92static struct dentry * 94static struct dentry *__get_xa_root(struct super_block *s)
93__get_xa_root (struct super_block *s)
94{ 95{
95 struct dentry *privroot = dget (REISERFS_SB(s)->priv_root); 96 struct dentry *privroot = dget(REISERFS_SB(s)->priv_root);
96 struct dentry *xaroot = NULL; 97 struct dentry *xaroot = NULL;
97 98
98 if (IS_ERR (privroot) || !privroot) 99 if (IS_ERR(privroot) || !privroot)
99 return privroot; 100 return privroot;
100 101
101 xaroot = lookup_one_len (XAROOT_NAME, privroot, strlen (XAROOT_NAME)); 102 xaroot = lookup_one_len(XAROOT_NAME, privroot, strlen(XAROOT_NAME));
102 if (IS_ERR (xaroot)) { 103 if (IS_ERR(xaroot)) {
103 goto out; 104 goto out;
104 } else if (!xaroot->d_inode) { 105 } else if (!xaroot->d_inode) {
105 dput (xaroot); 106 dput(xaroot);
106 xaroot = NULL; 107 xaroot = NULL;
107 goto out; 108 goto out;
108 } 109 }
109 110
110 REISERFS_SB(s)->xattr_root = dget (xaroot); 111 REISERFS_SB(s)->xattr_root = dget(xaroot);
111 112
112out: 113 out:
113 dput (privroot); 114 dput(privroot);
114 return xaroot; 115 return xaroot;
115} 116}
116 117
117/* Returns the dentry (or NULL) referring to the root of the extended 118/* Returns the dentry (or NULL) referring to the root of the extended
@@ -119,147 +120,145 @@ out:
119 * Otherwise, we attempt to retreive it from disk. It may also return 120 * Otherwise, we attempt to retreive it from disk. It may also return
120 * a pointer-encoded error. 121 * a pointer-encoded error.
121 */ 122 */
122static inline struct dentry * 123static inline struct dentry *get_xa_root(struct super_block *s)
123get_xa_root (struct super_block *s)
124{ 124{
125 struct dentry *dentry = dget (REISERFS_SB(s)->xattr_root); 125 struct dentry *dentry = dget(REISERFS_SB(s)->xattr_root);
126 126
127 if (!dentry) 127 if (!dentry)
128 dentry = __get_xa_root (s); 128 dentry = __get_xa_root(s);
129 129
130 return dentry; 130 return dentry;
131} 131}
132 132
133/* Opens the directory corresponding to the inode's extended attribute store. 133/* Opens the directory corresponding to the inode's extended attribute store.
134 * If flags allow, the tree to the directory may be created. If creation is 134 * If flags allow, the tree to the directory may be created. If creation is
135 * prohibited, -ENODATA is returned. */ 135 * prohibited, -ENODATA is returned. */
136static struct dentry * 136static struct dentry *open_xa_dir(const struct inode *inode, int flags)
137open_xa_dir (const struct inode *inode, int flags)
138{ 137{
139 struct dentry *xaroot, *xadir; 138 struct dentry *xaroot, *xadir;
140 char namebuf[17]; 139 char namebuf[17];
141 140
142 xaroot = get_xa_root (inode->i_sb); 141 xaroot = get_xa_root(inode->i_sb);
143 if (IS_ERR (xaroot)) { 142 if (IS_ERR(xaroot)) {
144 return xaroot; 143 return xaroot;
145 } else if (!xaroot) { 144 } else if (!xaroot) {
146 if (flags == 0 || flags & XATTR_CREATE) { 145 if (flags == 0 || flags & XATTR_CREATE) {
147 xaroot = create_xa_root (inode->i_sb); 146 xaroot = create_xa_root(inode->i_sb);
148 if (IS_ERR (xaroot)) 147 if (IS_ERR(xaroot))
149 return xaroot; 148 return xaroot;
150 } 149 }
151 if (!xaroot) 150 if (!xaroot)
152 return ERR_PTR (-ENODATA); 151 return ERR_PTR(-ENODATA);
153 } 152 }
154 153
155 /* ok, we have xaroot open */ 154 /* ok, we have xaroot open */
156 155
157 snprintf (namebuf, sizeof (namebuf), "%X.%X", 156 snprintf(namebuf, sizeof(namebuf), "%X.%X",
158 le32_to_cpu (INODE_PKEY (inode)->k_objectid), 157 le32_to_cpu(INODE_PKEY(inode)->k_objectid),
159 inode->i_generation); 158 inode->i_generation);
160 xadir = lookup_one_len (namebuf, xaroot, strlen (namebuf)); 159 xadir = lookup_one_len(namebuf, xaroot, strlen(namebuf));
161 if (IS_ERR (xadir)) { 160 if (IS_ERR(xadir)) {
162 dput (xaroot); 161 dput(xaroot);
163 return xadir; 162 return xadir;
164 } 163 }
165 164
166 if (!xadir->d_inode) { 165 if (!xadir->d_inode) {
167 int err; 166 int err;
168 if (flags == 0 || flags & XATTR_CREATE) { 167 if (flags == 0 || flags & XATTR_CREATE) {
169 /* Although there is nothing else trying to create this directory, 168 /* Although there is nothing else trying to create this directory,
170 * another directory with the same hash may be created, so we need 169 * another directory with the same hash may be created, so we need
171 * to protect against that */ 170 * to protect against that */
172 err = xaroot->d_inode->i_op->mkdir (xaroot->d_inode, xadir, 0700); 171 err =
173 if (err) { 172 xaroot->d_inode->i_op->mkdir(xaroot->d_inode, xadir,
174 dput (xaroot); 173 0700);
175 dput (xadir); 174 if (err) {
176 return ERR_PTR (err); 175 dput(xaroot);
177 } 176 dput(xadir);
178 } 177 return ERR_PTR(err);
179 if (!xadir->d_inode) { 178 }
180 dput (xaroot); 179 }
181 dput (xadir); 180 if (!xadir->d_inode) {
182 return ERR_PTR (-ENODATA); 181 dput(xaroot);
183 } 182 dput(xadir);
184 } 183 return ERR_PTR(-ENODATA);
185 184 }
186 dput (xaroot); 185 }
187 return xadir; 186
187 dput(xaroot);
188 return xadir;
188} 189}
189 190
190/* Returns a dentry corresponding to a specific extended attribute file 191/* Returns a dentry corresponding to a specific extended attribute file
191 * for the inode. If flags allow, the file is created. Otherwise, a 192 * for the inode. If flags allow, the file is created. Otherwise, a
192 * valid or negative dentry, or an error is returned. */ 193 * valid or negative dentry, or an error is returned. */
193static struct dentry * 194static struct dentry *get_xa_file_dentry(const struct inode *inode,
194get_xa_file_dentry (const struct inode *inode, const char *name, int flags) 195 const char *name, int flags)
195{ 196{
196 struct dentry *xadir, *xafile; 197 struct dentry *xadir, *xafile;
197 int err = 0; 198 int err = 0;
198 199
199 xadir = open_xa_dir (inode, flags); 200 xadir = open_xa_dir(inode, flags);
200 if (IS_ERR (xadir)) { 201 if (IS_ERR(xadir)) {
201 return ERR_PTR (PTR_ERR (xadir)); 202 return ERR_PTR(PTR_ERR(xadir));
202 } else if (xadir && !xadir->d_inode) { 203 } else if (xadir && !xadir->d_inode) {
203 dput (xadir); 204 dput(xadir);
204 return ERR_PTR (-ENODATA); 205 return ERR_PTR(-ENODATA);
205 } 206 }
206 207
207 xafile = lookup_one_len (name, xadir, strlen (name)); 208 xafile = lookup_one_len(name, xadir, strlen(name));
208 if (IS_ERR (xafile)) { 209 if (IS_ERR(xafile)) {
209 dput (xadir); 210 dput(xadir);
210 return ERR_PTR (PTR_ERR (xafile)); 211 return ERR_PTR(PTR_ERR(xafile));
211 } 212 }
212 213
213 if (xafile->d_inode) { /* file exists */ 214 if (xafile->d_inode) { /* file exists */
214 if (flags & XATTR_CREATE) { 215 if (flags & XATTR_CREATE) {
215 err = -EEXIST; 216 err = -EEXIST;
216 dput (xafile); 217 dput(xafile);
217 goto out; 218 goto out;
218 } 219 }
219 } else if (flags & XATTR_REPLACE || flags & FL_READONLY) { 220 } else if (flags & XATTR_REPLACE || flags & FL_READONLY) {
220 goto out; 221 goto out;
221 } else { 222 } else {
222 /* inode->i_sem is down, so nothing else can try to create 223 /* inode->i_sem is down, so nothing else can try to create
223 * the same xattr */ 224 * the same xattr */
224 err = xadir->d_inode->i_op->create (xadir->d_inode, xafile, 225 err = xadir->d_inode->i_op->create(xadir->d_inode, xafile,
225 0700|S_IFREG, NULL); 226 0700 | S_IFREG, NULL);
226 227
227 if (err) { 228 if (err) {
228 dput (xafile); 229 dput(xafile);
229 goto out; 230 goto out;
230 } 231 }
231 } 232 }
232
233out:
234 dput (xadir);
235 if (err)
236 xafile = ERR_PTR (err);
237 return xafile;
238}
239 233
234 out:
235 dput(xadir);
236 if (err)
237 xafile = ERR_PTR(err);
238 return xafile;
239}
240 240
241/* Opens a file pointer to the attribute associated with inode */ 241/* Opens a file pointer to the attribute associated with inode */
242static struct file * 242static struct file *open_xa_file(const struct inode *inode, const char *name,
243open_xa_file (const struct inode *inode, const char *name, int flags) 243 int flags)
244{ 244{
245 struct dentry *xafile; 245 struct dentry *xafile;
246 struct file *fp; 246 struct file *fp;
247 247
248 xafile = get_xa_file_dentry (inode, name, flags); 248 xafile = get_xa_file_dentry(inode, name, flags);
249 if (IS_ERR (xafile)) 249 if (IS_ERR(xafile))
250 return ERR_PTR (PTR_ERR (xafile)); 250 return ERR_PTR(PTR_ERR(xafile));
251 else if (!xafile->d_inode) { 251 else if (!xafile->d_inode) {
252 dput (xafile); 252 dput(xafile);
253 return ERR_PTR (-ENODATA); 253 return ERR_PTR(-ENODATA);
254 } 254 }
255 255
256 fp = dentry_open (xafile, NULL, O_RDWR); 256 fp = dentry_open(xafile, NULL, O_RDWR);
257 /* dentry_open dputs the dentry if it fails */ 257 /* dentry_open dputs the dentry if it fails */
258 258
259 return fp; 259 return fp;
260} 260}
261 261
262
263/* 262/*
264 * this is very similar to fs/reiserfs/dir.c:reiserfs_readdir, but 263 * this is very similar to fs/reiserfs/dir.c:reiserfs_readdir, but
265 * we need to drop the path before calling the filldir struct. That 264 * we need to drop the path before calling the filldir struct. That
@@ -273,139 +272,146 @@ open_xa_file (const struct inode *inode, const char *name, int flags)
273 * we're called with i_sem held, so there are no worries about the directory 272 * we're called with i_sem held, so there are no worries about the directory
274 * changing underneath us. 273 * changing underneath us.
275 */ 274 */
276static int __xattr_readdir(struct file * filp, void * dirent, filldir_t filldir) 275static int __xattr_readdir(struct file *filp, void *dirent, filldir_t filldir)
277{ 276{
278 struct inode *inode = filp->f_dentry->d_inode; 277 struct inode *inode = filp->f_dentry->d_inode;
279 struct cpu_key pos_key; /* key of current position in the directory (key of directory entry) */ 278 struct cpu_key pos_key; /* key of current position in the directory (key of directory entry) */
280 INITIALIZE_PATH (path_to_entry); 279 INITIALIZE_PATH(path_to_entry);
281 struct buffer_head * bh; 280 struct buffer_head *bh;
282 int entry_num; 281 int entry_num;
283 struct item_head * ih, tmp_ih; 282 struct item_head *ih, tmp_ih;
284 int search_res; 283 int search_res;
285 char * local_buf; 284 char *local_buf;
286 loff_t next_pos; 285 loff_t next_pos;
287 char small_buf[32] ; /* avoid kmalloc if we can */ 286 char small_buf[32]; /* avoid kmalloc if we can */
288 struct reiserfs_de_head *deh; 287 struct reiserfs_de_head *deh;
289 int d_reclen; 288 int d_reclen;
290 char * d_name; 289 char *d_name;
291 off_t d_off; 290 off_t d_off;
292 ino_t d_ino; 291 ino_t d_ino;
293 struct reiserfs_dir_entry de; 292 struct reiserfs_dir_entry de;
294 293
295 294 /* form key for search the next directory entry using f_pos field of
296 /* form key for search the next directory entry using f_pos field of 295 file structure */
297 file structure */ 296 next_pos = max_reiserfs_offset(inode);
298 next_pos = max_reiserfs_offset(inode); 297
299 298 while (1) {
300 while (1) { 299 research:
301research: 300 if (next_pos <= DOT_DOT_OFFSET)
302 if (next_pos <= DOT_DOT_OFFSET) 301 break;
303 break; 302 make_cpu_key(&pos_key, inode, next_pos, TYPE_DIRENTRY, 3);
304 make_cpu_key (&pos_key, inode, next_pos, TYPE_DIRENTRY, 3); 303
305 304 search_res =
306 search_res = search_by_entry_key(inode->i_sb, &pos_key, &path_to_entry, &de); 305 search_by_entry_key(inode->i_sb, &pos_key, &path_to_entry,
307 if (search_res == IO_ERROR) { 306 &de);
308 // FIXME: we could just skip part of directory which could 307 if (search_res == IO_ERROR) {
309 // not be read 308 // FIXME: we could just skip part of directory which could
310 pathrelse(&path_to_entry); 309 // not be read
311 return -EIO; 310 pathrelse(&path_to_entry);
312 } 311 return -EIO;
313 312 }
314 if (search_res == NAME_NOT_FOUND)
315 de.de_entry_num--;
316 313
317 set_de_name_and_namelen(&de); 314 if (search_res == NAME_NOT_FOUND)
318 entry_num = de.de_entry_num; 315 de.de_entry_num--;
319 deh = &(de.de_deh[entry_num]);
320 316
321 bh = de.de_bh; 317 set_de_name_and_namelen(&de);
322 ih = de.de_ih; 318 entry_num = de.de_entry_num;
319 deh = &(de.de_deh[entry_num]);
323 320
324 if (!is_direntry_le_ih(ih)) { 321 bh = de.de_bh;
325 reiserfs_warning(inode->i_sb, "not direntry %h", ih); 322 ih = de.de_ih;
326 break;
327 }
328 copy_item_head(&tmp_ih, ih);
329 323
330 /* we must have found item, that is item of this directory, */ 324 if (!is_direntry_le_ih(ih)) {
331 RFALSE( COMP_SHORT_KEYS (&(ih->ih_key), &pos_key), 325 reiserfs_warning(inode->i_sb, "not direntry %h", ih);
332 "vs-9000: found item %h does not match to dir we readdir %K", 326 break;
333 ih, &pos_key); 327 }
328 copy_item_head(&tmp_ih, ih);
334 329
335 if (deh_offset(deh) <= DOT_DOT_OFFSET) { 330 /* we must have found item, that is item of this directory, */
336 break; 331 RFALSE(COMP_SHORT_KEYS(&(ih->ih_key), &pos_key),
337 } 332 "vs-9000: found item %h does not match to dir we readdir %K",
333 ih, &pos_key);
338 334
339 /* look for the previous entry in the directory */ 335 if (deh_offset(deh) <= DOT_DOT_OFFSET) {
340 next_pos = deh_offset (deh) - 1; 336 break;
337 }
341 338
342 if (!de_visible (deh)) 339 /* look for the previous entry in the directory */
343 /* it is hidden entry */ 340 next_pos = deh_offset(deh) - 1;
344 continue;
345 341
346 d_reclen = entry_length(bh, ih, entry_num); 342 if (!de_visible(deh))
347 d_name = B_I_DEH_ENTRY_FILE_NAME (bh, ih, deh); 343 /* it is hidden entry */
348 d_off = deh_offset (deh); 344 continue;
349 d_ino = deh_objectid (deh);
350 345
351 if (!d_name[d_reclen - 1]) 346 d_reclen = entry_length(bh, ih, entry_num);
352 d_reclen = strlen (d_name); 347 d_name = B_I_DEH_ENTRY_FILE_NAME(bh, ih, deh);
348 d_off = deh_offset(deh);
349 d_ino = deh_objectid(deh);
353 350
354 if (d_reclen > REISERFS_MAX_NAME(inode->i_sb->s_blocksize)){ 351 if (!d_name[d_reclen - 1])
355 /* too big to send back to VFS */ 352 d_reclen = strlen(d_name);
356 continue ;
357 }
358 353
359 /* Ignore the .reiserfs_priv entry */ 354 if (d_reclen > REISERFS_MAX_NAME(inode->i_sb->s_blocksize)) {
360 if (reiserfs_xattrs (inode->i_sb) && 355 /* too big to send back to VFS */
361 !old_format_only(inode->i_sb) && 356 continue;
362 deh_objectid (deh) == le32_to_cpu (INODE_PKEY(REISERFS_SB(inode->i_sb)->priv_root->d_inode)->k_objectid)) 357 }
363 continue;
364 358
365 if (d_reclen <= 32) { 359 /* Ignore the .reiserfs_priv entry */
366 local_buf = small_buf ; 360 if (reiserfs_xattrs(inode->i_sb) &&
367 } else { 361 !old_format_only(inode->i_sb) &&
368 local_buf = reiserfs_kmalloc(d_reclen, GFP_NOFS, inode->i_sb) ; 362 deh_objectid(deh) ==
369 if (!local_buf) { 363 le32_to_cpu(INODE_PKEY
370 pathrelse (&path_to_entry); 364 (REISERFS_SB(inode->i_sb)->priv_root->d_inode)->
371 return -ENOMEM ; 365 k_objectid))
372 } 366 continue;
373 if (item_moved (&tmp_ih, &path_to_entry)) { 367
374 reiserfs_kfree(local_buf, d_reclen, inode->i_sb) ; 368 if (d_reclen <= 32) {
375 369 local_buf = small_buf;
376 /* sigh, must retry. Do this same offset again */ 370 } else {
377 next_pos = d_off; 371 local_buf =
378 goto research; 372 reiserfs_kmalloc(d_reclen, GFP_NOFS, inode->i_sb);
379 } 373 if (!local_buf) {
380 } 374 pathrelse(&path_to_entry);
375 return -ENOMEM;
376 }
377 if (item_moved(&tmp_ih, &path_to_entry)) {
378 reiserfs_kfree(local_buf, d_reclen,
379 inode->i_sb);
380
381 /* sigh, must retry. Do this same offset again */
382 next_pos = d_off;
383 goto research;
384 }
385 }
381 386
382 // Note, that we copy name to user space via temporary 387 // Note, that we copy name to user space via temporary
383 // buffer (local_buf) because filldir will block if 388 // buffer (local_buf) because filldir will block if
384 // user space buffer is swapped out. At that time 389 // user space buffer is swapped out. At that time
385 // entry can move to somewhere else 390 // entry can move to somewhere else
386 memcpy (local_buf, d_name, d_reclen); 391 memcpy(local_buf, d_name, d_reclen);
387 392
388 /* the filldir function might need to start transactions, 393 /* the filldir function might need to start transactions,
389 * or do who knows what. Release the path now that we've 394 * or do who knows what. Release the path now that we've
390 * copied all the important stuff out of the deh 395 * copied all the important stuff out of the deh
391 */ 396 */
392 pathrelse (&path_to_entry); 397 pathrelse(&path_to_entry);
393 398
394 if (filldir (dirent, local_buf, d_reclen, d_off, d_ino, 399 if (filldir(dirent, local_buf, d_reclen, d_off, d_ino,
395 DT_UNKNOWN) < 0) { 400 DT_UNKNOWN) < 0) {
396 if (local_buf != small_buf) { 401 if (local_buf != small_buf) {
397 reiserfs_kfree(local_buf, d_reclen, inode->i_sb) ; 402 reiserfs_kfree(local_buf, d_reclen,
398 } 403 inode->i_sb);
399 goto end; 404 }
400 } 405 goto end;
401 if (local_buf != small_buf) { 406 }
402 reiserfs_kfree(local_buf, d_reclen, inode->i_sb) ; 407 if (local_buf != small_buf) {
403 } 408 reiserfs_kfree(local_buf, d_reclen, inode->i_sb);
404 } /* while */ 409 }
410 } /* while */
405 411
406end: 412 end:
407 pathrelse (&path_to_entry); 413 pathrelse(&path_to_entry);
408 return 0; 414 return 0;
409} 415}
410 416
411/* 417/*
@@ -417,63 +423,59 @@ end:
417static 423static
418int xattr_readdir(struct file *file, filldir_t filler, void *buf) 424int xattr_readdir(struct file *file, filldir_t filler, void *buf)
419{ 425{
420 struct inode *inode = file->f_dentry->d_inode; 426 struct inode *inode = file->f_dentry->d_inode;
421 int res = -ENOTDIR; 427 int res = -ENOTDIR;
422 if (!file->f_op || !file->f_op->readdir) 428 if (!file->f_op || !file->f_op->readdir)
423 goto out; 429 goto out;
424 down(&inode->i_sem); 430 down(&inode->i_sem);
425// down(&inode->i_zombie); 431// down(&inode->i_zombie);
426 res = -ENOENT; 432 res = -ENOENT;
427 if (!IS_DEADDIR(inode)) { 433 if (!IS_DEADDIR(inode)) {
428 lock_kernel(); 434 lock_kernel();
429 res = __xattr_readdir(file, buf, filler); 435 res = __xattr_readdir(file, buf, filler);
430 unlock_kernel(); 436 unlock_kernel();
431 } 437 }
432// up(&inode->i_zombie); 438// up(&inode->i_zombie);
433 up(&inode->i_sem); 439 up(&inode->i_sem);
434out: 440 out:
435 return res; 441 return res;
436} 442}
437 443
438
439/* Internal operations on file data */ 444/* Internal operations on file data */
440static inline void 445static inline void reiserfs_put_page(struct page *page)
441reiserfs_put_page(struct page *page)
442{ 446{
443 kunmap(page); 447 kunmap(page);
444 page_cache_release(page); 448 page_cache_release(page);
445} 449}
446 450
447static struct page * 451static struct page *reiserfs_get_page(struct inode *dir, unsigned long n)
448reiserfs_get_page(struct inode *dir, unsigned long n)
449{ 452{
450 struct address_space *mapping = dir->i_mapping; 453 struct address_space *mapping = dir->i_mapping;
451 struct page *page; 454 struct page *page;
452 /* We can deadlock if we try to free dentries, 455 /* We can deadlock if we try to free dentries,
453 and an unlink/rmdir has just occured - GFP_NOFS avoids this */ 456 and an unlink/rmdir has just occured - GFP_NOFS avoids this */
454 mapping->flags = (mapping->flags & ~__GFP_BITS_MASK) | GFP_NOFS; 457 mapping->flags = (mapping->flags & ~__GFP_BITS_MASK) | GFP_NOFS;
455 page = read_cache_page (mapping, n, 458 page = read_cache_page(mapping, n,
456 (filler_t*)mapping->a_ops->readpage, NULL); 459 (filler_t *) mapping->a_ops->readpage, NULL);
457 if (!IS_ERR(page)) { 460 if (!IS_ERR(page)) {
458 wait_on_page_locked(page); 461 wait_on_page_locked(page);
459 kmap(page); 462 kmap(page);
460 if (!PageUptodate(page)) 463 if (!PageUptodate(page))
461 goto fail; 464 goto fail;
462 465
463 if (PageError(page)) 466 if (PageError(page))
464 goto fail; 467 goto fail;
465 } 468 }
466 return page; 469 return page;
467 470
468fail: 471 fail:
469 reiserfs_put_page(page); 472 reiserfs_put_page(page);
470 return ERR_PTR(-EIO); 473 return ERR_PTR(-EIO);
471} 474}
472 475
473static inline __u32 476static inline __u32 xattr_hash(const char *msg, int len)
474xattr_hash (const char *msg, int len)
475{ 477{
476 return csum_partial (msg, len, 0); 478 return csum_partial(msg, len, 0);
477} 479}
478 480
479/* Generic extended attribute operations that can be used by xa plugins */ 481/* Generic extended attribute operations that can be used by xa plugins */
@@ -482,294 +484,300 @@ xattr_hash (const char *msg, int len)
482 * inode->i_sem: down 484 * inode->i_sem: down
483 */ 485 */
484int 486int
485reiserfs_xattr_set (struct inode *inode, const char *name, const void *buffer, 487reiserfs_xattr_set(struct inode *inode, const char *name, const void *buffer,
486 size_t buffer_size, int flags) 488 size_t buffer_size, int flags)
487{ 489{
488 int err = 0; 490 int err = 0;
489 struct file *fp; 491 struct file *fp;
490 struct page *page; 492 struct page *page;
491 char *data; 493 char *data;
492 struct address_space *mapping; 494 struct address_space *mapping;
493 size_t file_pos = 0; 495 size_t file_pos = 0;
494 size_t buffer_pos = 0; 496 size_t buffer_pos = 0;
495 struct inode *xinode; 497 struct inode *xinode;
496 struct iattr newattrs; 498 struct iattr newattrs;
497 __u32 xahash = 0; 499 __u32 xahash = 0;
498 500
499 if (IS_RDONLY (inode)) 501 if (IS_RDONLY(inode))
500 return -EROFS; 502 return -EROFS;
501 503
502 if (IS_IMMUTABLE (inode) || IS_APPEND (inode)) 504 if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
503 return -EPERM; 505 return -EPERM;
504 506
505 if (get_inode_sd_version (inode) == STAT_DATA_V1) 507 if (get_inode_sd_version(inode) == STAT_DATA_V1)
506 return -EOPNOTSUPP; 508 return -EOPNOTSUPP;
507 509
508 /* Empty xattrs are ok, they're just empty files, no hash */ 510 /* Empty xattrs are ok, they're just empty files, no hash */
509 if (buffer && buffer_size) 511 if (buffer && buffer_size)
510 xahash = xattr_hash (buffer, buffer_size); 512 xahash = xattr_hash(buffer, buffer_size);
511 513
512open_file: 514 open_file:
513 fp = open_xa_file (inode, name, flags); 515 fp = open_xa_file(inode, name, flags);
514 if (IS_ERR (fp)) { 516 if (IS_ERR(fp)) {
515 err = PTR_ERR (fp); 517 err = PTR_ERR(fp);
516 goto out; 518 goto out;
517 } 519 }
518 520
519 xinode = fp->f_dentry->d_inode; 521 xinode = fp->f_dentry->d_inode;
520 REISERFS_I(inode)->i_flags |= i_has_xattr_dir; 522 REISERFS_I(inode)->i_flags |= i_has_xattr_dir;
521 523
522 /* we need to copy it off.. */ 524 /* we need to copy it off.. */
523 if (xinode->i_nlink > 1) { 525 if (xinode->i_nlink > 1) {
524 fput(fp); 526 fput(fp);
525 err = reiserfs_xattr_del (inode, name); 527 err = reiserfs_xattr_del(inode, name);
526 if (err < 0) 528 if (err < 0)
527 goto out; 529 goto out;
528 /* We just killed the old one, we're not replacing anymore */ 530 /* We just killed the old one, we're not replacing anymore */
529 if (flags & XATTR_REPLACE) 531 if (flags & XATTR_REPLACE)
530 flags &= ~XATTR_REPLACE; 532 flags &= ~XATTR_REPLACE;
531 goto open_file; 533 goto open_file;
532 } 534 }
533 535
534 /* Resize it so we're ok to write there */ 536 /* Resize it so we're ok to write there */
535 newattrs.ia_size = buffer_size; 537 newattrs.ia_size = buffer_size;
536 newattrs.ia_valid = ATTR_SIZE | ATTR_CTIME; 538 newattrs.ia_valid = ATTR_SIZE | ATTR_CTIME;
537 down (&xinode->i_sem); 539 down(&xinode->i_sem);
538 err = notify_change(fp->f_dentry, &newattrs); 540 err = notify_change(fp->f_dentry, &newattrs);
539 if (err) 541 if (err)
540 goto out_filp; 542 goto out_filp;
541 543
542 mapping = xinode->i_mapping; 544 mapping = xinode->i_mapping;
543 while (buffer_pos < buffer_size || buffer_pos == 0) { 545 while (buffer_pos < buffer_size || buffer_pos == 0) {
544 size_t chunk; 546 size_t chunk;
545 size_t skip = 0; 547 size_t skip = 0;
546 size_t page_offset = (file_pos & (PAGE_CACHE_SIZE - 1)); 548 size_t page_offset = (file_pos & (PAGE_CACHE_SIZE - 1));
547 if (buffer_size - buffer_pos > PAGE_CACHE_SIZE) 549 if (buffer_size - buffer_pos > PAGE_CACHE_SIZE)
548 chunk = PAGE_CACHE_SIZE; 550 chunk = PAGE_CACHE_SIZE;
549 else 551 else
550 chunk = buffer_size - buffer_pos; 552 chunk = buffer_size - buffer_pos;
551 553
552 page = reiserfs_get_page (xinode, file_pos >> PAGE_CACHE_SHIFT); 554 page = reiserfs_get_page(xinode, file_pos >> PAGE_CACHE_SHIFT);
553 if (IS_ERR (page)) { 555 if (IS_ERR(page)) {
554 err = PTR_ERR (page); 556 err = PTR_ERR(page);
555 goto out_filp; 557 goto out_filp;
556 } 558 }
557 559
558 lock_page (page); 560 lock_page(page);
559 data = page_address (page); 561 data = page_address(page);
560 562
561 if (file_pos == 0) { 563 if (file_pos == 0) {
562 struct reiserfs_xattr_header *rxh; 564 struct reiserfs_xattr_header *rxh;
563 skip = file_pos = sizeof (struct reiserfs_xattr_header); 565 skip = file_pos = sizeof(struct reiserfs_xattr_header);
564 if (chunk + skip > PAGE_CACHE_SIZE) 566 if (chunk + skip > PAGE_CACHE_SIZE)
565 chunk = PAGE_CACHE_SIZE - skip; 567 chunk = PAGE_CACHE_SIZE - skip;
566 rxh = (struct reiserfs_xattr_header *)data; 568 rxh = (struct reiserfs_xattr_header *)data;
567 rxh->h_magic = cpu_to_le32 (REISERFS_XATTR_MAGIC); 569 rxh->h_magic = cpu_to_le32(REISERFS_XATTR_MAGIC);
568 rxh->h_hash = cpu_to_le32 (xahash); 570 rxh->h_hash = cpu_to_le32(xahash);
569 } 571 }
570 572
571 err = mapping->a_ops->prepare_write (fp, page, page_offset, 573 err = mapping->a_ops->prepare_write(fp, page, page_offset,
572 page_offset + chunk + skip); 574 page_offset + chunk + skip);
573 if (!err) { 575 if (!err) {
574 if (buffer) 576 if (buffer)
575 memcpy (data + skip, buffer + buffer_pos, chunk); 577 memcpy(data + skip, buffer + buffer_pos, chunk);
576 err = mapping->a_ops->commit_write (fp, page, page_offset, 578 err =
577 page_offset + chunk + skip); 579 mapping->a_ops->commit_write(fp, page, page_offset,
580 page_offset + chunk +
581 skip);
582 }
583 unlock_page(page);
584 reiserfs_put_page(page);
585 buffer_pos += chunk;
586 file_pos += chunk;
587 skip = 0;
588 if (err || buffer_size == 0 || !buffer)
589 break;
590 }
591
592 /* We can't mark the inode dirty if it's not hashed. This is the case
593 * when we're inheriting the default ACL. If we dirty it, the inode
594 * gets marked dirty, but won't (ever) make it onto the dirty list until
595 * it's synced explicitly to clear I_DIRTY. This is bad. */
596 if (!hlist_unhashed(&inode->i_hash)) {
597 inode->i_ctime = CURRENT_TIME_SEC;
598 mark_inode_dirty(inode);
578 } 599 }
579 unlock_page (page); 600
580 reiserfs_put_page (page); 601 out_filp:
581 buffer_pos += chunk; 602 up(&xinode->i_sem);
582 file_pos += chunk; 603 fput(fp);
583 skip = 0; 604
584 if (err || buffer_size == 0 || !buffer) 605 out:
585 break; 606 return err;
586 }
587
588 /* We can't mark the inode dirty if it's not hashed. This is the case
589 * when we're inheriting the default ACL. If we dirty it, the inode
590 * gets marked dirty, but won't (ever) make it onto the dirty list until
591 * it's synced explicitly to clear I_DIRTY. This is bad. */
592 if (!hlist_unhashed(&inode->i_hash)) {
593 inode->i_ctime = CURRENT_TIME_SEC;
594 mark_inode_dirty (inode);
595 }
596
597out_filp:
598 up (&xinode->i_sem);
599 fput(fp);
600
601out:
602 return err;
603} 607}
604 608
605/* 609/*
606 * inode->i_sem: down 610 * inode->i_sem: down
607 */ 611 */
608int 612int
609reiserfs_xattr_get (const struct inode *inode, const char *name, void *buffer, 613reiserfs_xattr_get(const struct inode *inode, const char *name, void *buffer,
610 size_t buffer_size) 614 size_t buffer_size)
611{ 615{
612 ssize_t err = 0; 616 ssize_t err = 0;
613 struct file *fp; 617 struct file *fp;
614 size_t isize; 618 size_t isize;
615 size_t file_pos = 0; 619 size_t file_pos = 0;
616 size_t buffer_pos = 0; 620 size_t buffer_pos = 0;
617 struct page *page; 621 struct page *page;
618 struct inode *xinode; 622 struct inode *xinode;
619 __u32 hash = 0; 623 __u32 hash = 0;
620 624
621 if (name == NULL) 625 if (name == NULL)
622 return -EINVAL; 626 return -EINVAL;
623 627
624 /* We can't have xattrs attached to v1 items since they don't have 628 /* We can't have xattrs attached to v1 items since they don't have
625 * generation numbers */ 629 * generation numbers */
626 if (get_inode_sd_version (inode) == STAT_DATA_V1) 630 if (get_inode_sd_version(inode) == STAT_DATA_V1)
627 return -EOPNOTSUPP; 631 return -EOPNOTSUPP;
628 632
629 fp = open_xa_file (inode, name, FL_READONLY); 633 fp = open_xa_file(inode, name, FL_READONLY);
630 if (IS_ERR (fp)) { 634 if (IS_ERR(fp)) {
631 err = PTR_ERR (fp); 635 err = PTR_ERR(fp);
632 goto out; 636 goto out;
633 } 637 }
634 638
635 xinode = fp->f_dentry->d_inode; 639 xinode = fp->f_dentry->d_inode;
636 isize = xinode->i_size; 640 isize = xinode->i_size;
637 REISERFS_I(inode)->i_flags |= i_has_xattr_dir; 641 REISERFS_I(inode)->i_flags |= i_has_xattr_dir;
638 642
639 /* Just return the size needed */ 643 /* Just return the size needed */
640 if (buffer == NULL) { 644 if (buffer == NULL) {
641 err = isize - sizeof (struct reiserfs_xattr_header); 645 err = isize - sizeof(struct reiserfs_xattr_header);
642 goto out_dput; 646 goto out_dput;
643 } 647 }
644 648
645 if (buffer_size < isize - sizeof (struct reiserfs_xattr_header)) { 649 if (buffer_size < isize - sizeof(struct reiserfs_xattr_header)) {
646 err = -ERANGE; 650 err = -ERANGE;
647 goto out_dput; 651 goto out_dput;
648 } 652 }
649 653
650 while (file_pos < isize) { 654 while (file_pos < isize) {
651 size_t chunk; 655 size_t chunk;
652 char *data; 656 char *data;
653 size_t skip = 0; 657 size_t skip = 0;
654 if (isize - file_pos > PAGE_CACHE_SIZE) 658 if (isize - file_pos > PAGE_CACHE_SIZE)
655 chunk = PAGE_CACHE_SIZE; 659 chunk = PAGE_CACHE_SIZE;
656 else 660 else
657 chunk = isize - file_pos; 661 chunk = isize - file_pos;
658 662
659 page = reiserfs_get_page (xinode, file_pos >> PAGE_CACHE_SHIFT); 663 page = reiserfs_get_page(xinode, file_pos >> PAGE_CACHE_SHIFT);
660 if (IS_ERR (page)) { 664 if (IS_ERR(page)) {
661 err = PTR_ERR (page); 665 err = PTR_ERR(page);
662 goto out_dput; 666 goto out_dput;
663 } 667 }
664 668
665 lock_page (page); 669 lock_page(page);
666 data = page_address (page); 670 data = page_address(page);
667 if (file_pos == 0) { 671 if (file_pos == 0) {
668 struct reiserfs_xattr_header *rxh = 672 struct reiserfs_xattr_header *rxh =
669 (struct reiserfs_xattr_header *)data; 673 (struct reiserfs_xattr_header *)data;
670 skip = file_pos = sizeof (struct reiserfs_xattr_header); 674 skip = file_pos = sizeof(struct reiserfs_xattr_header);
671 chunk -= skip; 675 chunk -= skip;
672 /* Magic doesn't match up.. */ 676 /* Magic doesn't match up.. */
673 if (rxh->h_magic != cpu_to_le32 (REISERFS_XATTR_MAGIC)) { 677 if (rxh->h_magic != cpu_to_le32(REISERFS_XATTR_MAGIC)) {
674 unlock_page (page); 678 unlock_page(page);
675 reiserfs_put_page (page); 679 reiserfs_put_page(page);
676 reiserfs_warning (inode->i_sb, "Invalid magic for xattr (%s) " 680 reiserfs_warning(inode->i_sb,
677 "associated with %k", name, 681 "Invalid magic for xattr (%s) "
678 INODE_PKEY (inode)); 682 "associated with %k", name,
679 err = -EIO; 683 INODE_PKEY(inode));
680 goto out_dput; 684 err = -EIO;
681 } 685 goto out_dput;
682 hash = le32_to_cpu (rxh->h_hash); 686 }
683 } 687 hash = le32_to_cpu(rxh->h_hash);
684 memcpy (buffer + buffer_pos, data + skip, chunk); 688 }
685 unlock_page (page); 689 memcpy(buffer + buffer_pos, data + skip, chunk);
686 reiserfs_put_page (page); 690 unlock_page(page);
687 file_pos += chunk; 691 reiserfs_put_page(page);
688 buffer_pos += chunk; 692 file_pos += chunk;
689 skip = 0; 693 buffer_pos += chunk;
690 } 694 skip = 0;
691 err = isize - sizeof (struct reiserfs_xattr_header); 695 }
692 696 err = isize - sizeof(struct reiserfs_xattr_header);
693 if (xattr_hash (buffer, isize - sizeof (struct reiserfs_xattr_header)) != hash) { 697
694 reiserfs_warning (inode->i_sb, "Invalid hash for xattr (%s) associated " 698 if (xattr_hash(buffer, isize - sizeof(struct reiserfs_xattr_header)) !=
695 "with %k", name, INODE_PKEY (inode)); 699 hash) {
696 err = -EIO; 700 reiserfs_warning(inode->i_sb,
697 } 701 "Invalid hash for xattr (%s) associated "
698 702 "with %k", name, INODE_PKEY(inode));
699out_dput: 703 err = -EIO;
700 fput(fp); 704 }
701 705
702out: 706 out_dput:
703 return err; 707 fput(fp);
708
709 out:
710 return err;
704} 711}
705 712
706static int 713static int
707__reiserfs_xattr_del (struct dentry *xadir, const char *name, int namelen) 714__reiserfs_xattr_del(struct dentry *xadir, const char *name, int namelen)
708{ 715{
709 struct dentry *dentry; 716 struct dentry *dentry;
710 struct inode *dir = xadir->d_inode; 717 struct inode *dir = xadir->d_inode;
711 int err = 0; 718 int err = 0;
712 719
713 dentry = lookup_one_len (name, xadir, namelen); 720 dentry = lookup_one_len(name, xadir, namelen);
714 if (IS_ERR (dentry)) { 721 if (IS_ERR(dentry)) {
715 err = PTR_ERR (dentry); 722 err = PTR_ERR(dentry);
716 goto out; 723 goto out;
717 } else if (!dentry->d_inode) { 724 } else if (!dentry->d_inode) {
718 err = -ENODATA; 725 err = -ENODATA;
719 goto out_file; 726 goto out_file;
720 } 727 }
721 728
722 /* Skip directories.. */ 729 /* Skip directories.. */
723 if (S_ISDIR (dentry->d_inode->i_mode)) 730 if (S_ISDIR(dentry->d_inode->i_mode))
724 goto out_file; 731 goto out_file;
725 732
726 if (!is_reiserfs_priv_object (dentry->d_inode)) { 733 if (!is_reiserfs_priv_object(dentry->d_inode)) {
727 reiserfs_warning (dir->i_sb, "OID %08x [%.*s/%.*s] doesn't have " 734 reiserfs_warning(dir->i_sb, "OID %08x [%.*s/%.*s] doesn't have "
728 "priv flag set [parent is %sset].", 735 "priv flag set [parent is %sset].",
729 le32_to_cpu (INODE_PKEY (dentry->d_inode)->k_objectid), 736 le32_to_cpu(INODE_PKEY(dentry->d_inode)->
730 xadir->d_name.len, xadir->d_name.name, namelen, name, 737 k_objectid), xadir->d_name.len,
731 is_reiserfs_priv_object (xadir->d_inode) ? "" : "not "); 738 xadir->d_name.name, namelen, name,
732 dput (dentry); 739 is_reiserfs_priv_object(xadir->
733 return -EIO; 740 d_inode) ? "" :
734 } 741 "not ");
735 742 dput(dentry);
736 err = dir->i_op->unlink (dir, dentry); 743 return -EIO;
737 if (!err) 744 }
738 d_delete (dentry);
739
740out_file:
741 dput (dentry);
742
743out:
744 return err;
745}
746 745
746 err = dir->i_op->unlink(dir, dentry);
747 if (!err)
748 d_delete(dentry);
747 749
748int 750 out_file:
749reiserfs_xattr_del (struct inode *inode, const char *name) 751 dput(dentry);
752
753 out:
754 return err;
755}
756
757int reiserfs_xattr_del(struct inode *inode, const char *name)
750{ 758{
751 struct dentry *dir; 759 struct dentry *dir;
752 int err; 760 int err;
753 761
754 if (IS_RDONLY (inode)) 762 if (IS_RDONLY(inode))
755 return -EROFS; 763 return -EROFS;
756 764
757 dir = open_xa_dir (inode, FL_READONLY); 765 dir = open_xa_dir(inode, FL_READONLY);
758 if (IS_ERR (dir)) { 766 if (IS_ERR(dir)) {
759 err = PTR_ERR (dir); 767 err = PTR_ERR(dir);
760 goto out; 768 goto out;
761 } 769 }
762 770
763 err = __reiserfs_xattr_del (dir, name, strlen (name)); 771 err = __reiserfs_xattr_del(dir, name, strlen(name));
764 dput (dir); 772 dput(dir);
765 773
766 if (!err) { 774 if (!err) {
767 inode->i_ctime = CURRENT_TIME_SEC; 775 inode->i_ctime = CURRENT_TIME_SEC;
768 mark_inode_dirty (inode); 776 mark_inode_dirty(inode);
769 } 777 }
770 778
771out: 779 out:
772 return err; 780 return err;
773} 781}
774 782
775/* The following are side effects of other operations that aren't explicitly 783/* The following are side effects of other operations that aren't explicitly
@@ -777,167 +785,163 @@ out:
777 * or ownership changes, object deletions, etc. */ 785 * or ownership changes, object deletions, etc. */
778 786
779static int 787static int
780reiserfs_delete_xattrs_filler (void *buf, const char *name, int namelen, 788reiserfs_delete_xattrs_filler(void *buf, const char *name, int namelen,
781 loff_t offset, ino_t ino, unsigned int d_type) 789 loff_t offset, ino_t ino, unsigned int d_type)
782{ 790{
783 struct dentry *xadir = (struct dentry *)buf; 791 struct dentry *xadir = (struct dentry *)buf;
784 792
785 return __reiserfs_xattr_del (xadir, name, namelen); 793 return __reiserfs_xattr_del(xadir, name, namelen);
786 794
787} 795}
788 796
789/* This is called w/ inode->i_sem downed */ 797/* This is called w/ inode->i_sem downed */
790int 798int reiserfs_delete_xattrs(struct inode *inode)
791reiserfs_delete_xattrs (struct inode *inode)
792{ 799{
793 struct file *fp; 800 struct file *fp;
794 struct dentry *dir, *root; 801 struct dentry *dir, *root;
795 int err = 0; 802 int err = 0;
796 803
797 /* Skip out, an xattr has no xattrs associated with it */ 804 /* Skip out, an xattr has no xattrs associated with it */
798 if (is_reiserfs_priv_object (inode) || 805 if (is_reiserfs_priv_object(inode) ||
799 get_inode_sd_version (inode) == STAT_DATA_V1 || 806 get_inode_sd_version(inode) == STAT_DATA_V1 ||
800 !reiserfs_xattrs(inode->i_sb)) 807 !reiserfs_xattrs(inode->i_sb)) {
801 { 808 return 0;
802 return 0; 809 }
803 } 810 reiserfs_read_lock_xattrs(inode->i_sb);
804 reiserfs_read_lock_xattrs (inode->i_sb); 811 dir = open_xa_dir(inode, FL_READONLY);
805 dir = open_xa_dir (inode, FL_READONLY); 812 reiserfs_read_unlock_xattrs(inode->i_sb);
806 reiserfs_read_unlock_xattrs (inode->i_sb); 813 if (IS_ERR(dir)) {
807 if (IS_ERR (dir)) { 814 err = PTR_ERR(dir);
808 err = PTR_ERR (dir); 815 goto out;
809 goto out; 816 } else if (!dir->d_inode) {
810 } else if (!dir->d_inode) { 817 dput(dir);
811 dput (dir); 818 return 0;
812 return 0; 819 }
813 } 820
814 821 fp = dentry_open(dir, NULL, O_RDWR);
815 fp = dentry_open (dir, NULL, O_RDWR); 822 if (IS_ERR(fp)) {
816 if (IS_ERR (fp)) { 823 err = PTR_ERR(fp);
817 err = PTR_ERR (fp); 824 /* dentry_open dputs the dentry if it fails */
818 /* dentry_open dputs the dentry if it fails */ 825 goto out;
819 goto out; 826 }
820 } 827
821 828 lock_kernel();
822 lock_kernel (); 829 err = xattr_readdir(fp, reiserfs_delete_xattrs_filler, dir);
823 err = xattr_readdir (fp, reiserfs_delete_xattrs_filler, dir); 830 if (err) {
824 if (err) { 831 unlock_kernel();
825 unlock_kernel (); 832 goto out_dir;
826 goto out_dir; 833 }
827 } 834
828 835 /* Leftovers besides . and .. -- that's not good. */
829 /* Leftovers besides . and .. -- that's not good. */ 836 if (dir->d_inode->i_nlink <= 2) {
830 if (dir->d_inode->i_nlink <= 2) { 837 root = get_xa_root(inode->i_sb);
831 root = get_xa_root (inode->i_sb); 838 reiserfs_write_lock_xattrs(inode->i_sb);
832 reiserfs_write_lock_xattrs (inode->i_sb); 839 err = vfs_rmdir(root->d_inode, dir);
833 err = vfs_rmdir (root->d_inode, dir); 840 reiserfs_write_unlock_xattrs(inode->i_sb);
834 reiserfs_write_unlock_xattrs (inode->i_sb); 841 dput(root);
835 dput (root); 842 } else {
836 } else { 843 reiserfs_warning(inode->i_sb,
837 reiserfs_warning (inode->i_sb, 844 "Couldn't remove all entries in directory");
838 "Couldn't remove all entries in directory"); 845 }
839 } 846 unlock_kernel();
840 unlock_kernel (); 847
841 848 out_dir:
842out_dir: 849 fput(fp);
843 fput(fp); 850
844 851 out:
845out: 852 if (!err)
846 if (!err) 853 REISERFS_I(inode)->i_flags =
847 REISERFS_I(inode)->i_flags = REISERFS_I(inode)->i_flags & ~i_has_xattr_dir; 854 REISERFS_I(inode)->i_flags & ~i_has_xattr_dir;
848 return err; 855 return err;
849} 856}
850 857
851struct reiserfs_chown_buf { 858struct reiserfs_chown_buf {
852 struct inode *inode; 859 struct inode *inode;
853 struct dentry *xadir; 860 struct dentry *xadir;
854 struct iattr *attrs; 861 struct iattr *attrs;
855}; 862};
856 863
857/* XXX: If there is a better way to do this, I'd love to hear about it */ 864/* XXX: If there is a better way to do this, I'd love to hear about it */
858static int 865static int
859reiserfs_chown_xattrs_filler (void *buf, const char *name, int namelen, 866reiserfs_chown_xattrs_filler(void *buf, const char *name, int namelen,
860 loff_t offset, ino_t ino, unsigned int d_type) 867 loff_t offset, ino_t ino, unsigned int d_type)
861{ 868{
862 struct reiserfs_chown_buf *chown_buf = (struct reiserfs_chown_buf *)buf; 869 struct reiserfs_chown_buf *chown_buf = (struct reiserfs_chown_buf *)buf;
863 struct dentry *xafile, *xadir = chown_buf->xadir; 870 struct dentry *xafile, *xadir = chown_buf->xadir;
864 struct iattr *attrs = chown_buf->attrs; 871 struct iattr *attrs = chown_buf->attrs;
865 int err = 0; 872 int err = 0;
866 873
867 xafile = lookup_one_len (name, xadir, namelen); 874 xafile = lookup_one_len(name, xadir, namelen);
868 if (IS_ERR (xafile)) 875 if (IS_ERR(xafile))
869 return PTR_ERR (xafile); 876 return PTR_ERR(xafile);
870 else if (!xafile->d_inode) { 877 else if (!xafile->d_inode) {
871 dput (xafile); 878 dput(xafile);
872 return -ENODATA; 879 return -ENODATA;
873 } 880 }
874 881
875 if (!S_ISDIR (xafile->d_inode->i_mode)) 882 if (!S_ISDIR(xafile->d_inode->i_mode))
876 err = notify_change (xafile, attrs); 883 err = notify_change(xafile, attrs);
877 dput (xafile); 884 dput(xafile);
878 885
879 return err; 886 return err;
880} 887}
881 888
882int 889int reiserfs_chown_xattrs(struct inode *inode, struct iattr *attrs)
883reiserfs_chown_xattrs (struct inode *inode, struct iattr *attrs)
884{ 890{
885 struct file *fp; 891 struct file *fp;
886 struct dentry *dir; 892 struct dentry *dir;
887 int err = 0; 893 int err = 0;
888 struct reiserfs_chown_buf buf; 894 struct reiserfs_chown_buf buf;
889 unsigned int ia_valid = attrs->ia_valid; 895 unsigned int ia_valid = attrs->ia_valid;
890 896
891 /* Skip out, an xattr has no xattrs associated with it */ 897 /* Skip out, an xattr has no xattrs associated with it */
892 if (is_reiserfs_priv_object (inode) || 898 if (is_reiserfs_priv_object(inode) ||
893 get_inode_sd_version (inode) == STAT_DATA_V1 || 899 get_inode_sd_version(inode) == STAT_DATA_V1 ||
894 !reiserfs_xattrs(inode->i_sb)) 900 !reiserfs_xattrs(inode->i_sb)) {
895 { 901 return 0;
896 return 0; 902 }
897 } 903 reiserfs_read_lock_xattrs(inode->i_sb);
898 reiserfs_read_lock_xattrs (inode->i_sb); 904 dir = open_xa_dir(inode, FL_READONLY);
899 dir = open_xa_dir (inode, FL_READONLY); 905 reiserfs_read_unlock_xattrs(inode->i_sb);
900 reiserfs_read_unlock_xattrs (inode->i_sb); 906 if (IS_ERR(dir)) {
901 if (IS_ERR (dir)) { 907 if (PTR_ERR(dir) != -ENODATA)
902 if (PTR_ERR (dir) != -ENODATA) 908 err = PTR_ERR(dir);
903 err = PTR_ERR (dir); 909 goto out;
904 goto out; 910 } else if (!dir->d_inode) {
905 } else if (!dir->d_inode) { 911 dput(dir);
906 dput (dir); 912 goto out;
907 goto out; 913 }
908 } 914
909 915 fp = dentry_open(dir, NULL, O_RDWR);
910 fp = dentry_open (dir, NULL, O_RDWR); 916 if (IS_ERR(fp)) {
911 if (IS_ERR (fp)) { 917 err = PTR_ERR(fp);
912 err = PTR_ERR (fp); 918 /* dentry_open dputs the dentry if it fails */
913 /* dentry_open dputs the dentry if it fails */ 919 goto out;
914 goto out; 920 }
915 }
916
917 lock_kernel ();
918
919 attrs->ia_valid &= (ATTR_UID | ATTR_GID | ATTR_CTIME);
920 buf.xadir = dir;
921 buf.attrs = attrs;
922 buf.inode = inode;
923
924 err = xattr_readdir (fp, reiserfs_chown_xattrs_filler, &buf);
925 if (err) {
926 unlock_kernel ();
927 goto out_dir;
928 }
929
930 err = notify_change (dir, attrs);
931 unlock_kernel ();
932
933out_dir:
934 fput(fp);
935
936out:
937 attrs->ia_valid = ia_valid;
938 return err;
939}
940 921
922 lock_kernel();
923
924 attrs->ia_valid &= (ATTR_UID | ATTR_GID | ATTR_CTIME);
925 buf.xadir = dir;
926 buf.attrs = attrs;
927 buf.inode = inode;
928
929 err = xattr_readdir(fp, reiserfs_chown_xattrs_filler, &buf);
930 if (err) {
931 unlock_kernel();
932 goto out_dir;
933 }
934
935 err = notify_change(dir, attrs);
936 unlock_kernel();
937
938 out_dir:
939 fput(fp);
940
941 out:
942 attrs->ia_valid = ia_valid;
943 return err;
944}
941 945
942/* Actual operations that are exported to VFS-land */ 946/* Actual operations that are exported to VFS-land */
943 947
@@ -946,61 +950,60 @@ out:
946 * Preliminary locking: we down dentry->d_inode->i_sem 950 * Preliminary locking: we down dentry->d_inode->i_sem
947 */ 951 */
948ssize_t 952ssize_t
949reiserfs_getxattr (struct dentry *dentry, const char *name, void *buffer, 953reiserfs_getxattr(struct dentry * dentry, const char *name, void *buffer,
950 size_t size) 954 size_t size)
951{ 955{
952 struct reiserfs_xattr_handler *xah = find_xattr_handler_prefix (name); 956 struct reiserfs_xattr_handler *xah = find_xattr_handler_prefix(name);
953 int err; 957 int err;
954 958
955 if (!xah || !reiserfs_xattrs(dentry->d_sb) || 959 if (!xah || !reiserfs_xattrs(dentry->d_sb) ||
956 get_inode_sd_version (dentry->d_inode) == STAT_DATA_V1) 960 get_inode_sd_version(dentry->d_inode) == STAT_DATA_V1)
957 return -EOPNOTSUPP; 961 return -EOPNOTSUPP;
958 962
959 reiserfs_read_lock_xattr_i (dentry->d_inode); 963 reiserfs_read_lock_xattr_i(dentry->d_inode);
960 reiserfs_read_lock_xattrs (dentry->d_sb); 964 reiserfs_read_lock_xattrs(dentry->d_sb);
961 err = xah->get (dentry->d_inode, name, buffer, size); 965 err = xah->get(dentry->d_inode, name, buffer, size);
962 reiserfs_read_unlock_xattrs (dentry->d_sb); 966 reiserfs_read_unlock_xattrs(dentry->d_sb);
963 reiserfs_read_unlock_xattr_i (dentry->d_inode); 967 reiserfs_read_unlock_xattr_i(dentry->d_inode);
964 return err; 968 return err;
965} 969}
966 970
967
968/* 971/*
969 * Inode operation setxattr() 972 * Inode operation setxattr()
970 * 973 *
971 * dentry->d_inode->i_sem down 974 * dentry->d_inode->i_sem down
972 */ 975 */
973int 976int
974reiserfs_setxattr (struct dentry *dentry, const char *name, const void *value, 977reiserfs_setxattr(struct dentry *dentry, const char *name, const void *value,
975 size_t size, int flags) 978 size_t size, int flags)
976{ 979{
977 struct reiserfs_xattr_handler *xah = find_xattr_handler_prefix (name); 980 struct reiserfs_xattr_handler *xah = find_xattr_handler_prefix(name);
978 int err; 981 int err;
979 int lock; 982 int lock;
980 983
981 if (!xah || !reiserfs_xattrs(dentry->d_sb) || 984 if (!xah || !reiserfs_xattrs(dentry->d_sb) ||
982 get_inode_sd_version (dentry->d_inode) == STAT_DATA_V1) 985 get_inode_sd_version(dentry->d_inode) == STAT_DATA_V1)
983 return -EOPNOTSUPP; 986 return -EOPNOTSUPP;
984 987
985 if (IS_RDONLY (dentry->d_inode)) 988 if (IS_RDONLY(dentry->d_inode))
986 return -EROFS; 989 return -EROFS;
987 990
988 if (IS_IMMUTABLE (dentry->d_inode) || IS_APPEND (dentry->d_inode)) 991 if (IS_IMMUTABLE(dentry->d_inode) || IS_APPEND(dentry->d_inode))
989 return -EROFS; 992 return -EROFS;
990 993
991 reiserfs_write_lock_xattr_i (dentry->d_inode); 994 reiserfs_write_lock_xattr_i(dentry->d_inode);
992 lock = !has_xattr_dir (dentry->d_inode); 995 lock = !has_xattr_dir(dentry->d_inode);
993 if (lock) 996 if (lock)
994 reiserfs_write_lock_xattrs (dentry->d_sb); 997 reiserfs_write_lock_xattrs(dentry->d_sb);
995 else 998 else
996 reiserfs_read_lock_xattrs (dentry->d_sb); 999 reiserfs_read_lock_xattrs(dentry->d_sb);
997 err = xah->set (dentry->d_inode, name, value, size, flags); 1000 err = xah->set(dentry->d_inode, name, value, size, flags);
998 if (lock) 1001 if (lock)
999 reiserfs_write_unlock_xattrs (dentry->d_sb); 1002 reiserfs_write_unlock_xattrs(dentry->d_sb);
1000 else 1003 else
1001 reiserfs_read_unlock_xattrs (dentry->d_sb); 1004 reiserfs_read_unlock_xattrs(dentry->d_sb);
1002 reiserfs_write_unlock_xattr_i (dentry->d_inode); 1005 reiserfs_write_unlock_xattr_i(dentry->d_inode);
1003 return err; 1006 return err;
1004} 1007}
1005 1008
1006/* 1009/*
@@ -1008,344 +1011,343 @@ reiserfs_setxattr (struct dentry *dentry, const char *name, const void *value,
1008 * 1011 *
1009 * dentry->d_inode->i_sem down 1012 * dentry->d_inode->i_sem down
1010 */ 1013 */
1011int 1014int reiserfs_removexattr(struct dentry *dentry, const char *name)
1012reiserfs_removexattr (struct dentry *dentry, const char *name)
1013{ 1015{
1014 int err; 1016 int err;
1015 struct reiserfs_xattr_handler *xah = find_xattr_handler_prefix (name); 1017 struct reiserfs_xattr_handler *xah = find_xattr_handler_prefix(name);
1016 1018
1017 if (!xah || !reiserfs_xattrs(dentry->d_sb) || 1019 if (!xah || !reiserfs_xattrs(dentry->d_sb) ||
1018 get_inode_sd_version (dentry->d_inode) == STAT_DATA_V1) 1020 get_inode_sd_version(dentry->d_inode) == STAT_DATA_V1)
1019 return -EOPNOTSUPP; 1021 return -EOPNOTSUPP;
1020 1022
1021 if (IS_RDONLY (dentry->d_inode)) 1023 if (IS_RDONLY(dentry->d_inode))
1022 return -EROFS; 1024 return -EROFS;
1023 1025
1024 if (IS_IMMUTABLE (dentry->d_inode) || IS_APPEND (dentry->d_inode)) 1026 if (IS_IMMUTABLE(dentry->d_inode) || IS_APPEND(dentry->d_inode))
1025 return -EPERM; 1027 return -EPERM;
1026 1028
1027 reiserfs_write_lock_xattr_i (dentry->d_inode); 1029 reiserfs_write_lock_xattr_i(dentry->d_inode);
1028 reiserfs_read_lock_xattrs (dentry->d_sb); 1030 reiserfs_read_lock_xattrs(dentry->d_sb);
1029 1031
1030 /* Deletion pre-operation */ 1032 /* Deletion pre-operation */
1031 if (xah->del) { 1033 if (xah->del) {
1032 err = xah->del (dentry->d_inode, name); 1034 err = xah->del(dentry->d_inode, name);
1033 if (err) 1035 if (err)
1034 goto out; 1036 goto out;
1035 } 1037 }
1036 1038
1037 err = reiserfs_xattr_del (dentry->d_inode, name); 1039 err = reiserfs_xattr_del(dentry->d_inode, name);
1038 1040
1039 dentry->d_inode->i_ctime = CURRENT_TIME_SEC; 1041 dentry->d_inode->i_ctime = CURRENT_TIME_SEC;
1040 mark_inode_dirty (dentry->d_inode); 1042 mark_inode_dirty(dentry->d_inode);
1041 1043
1042out: 1044 out:
1043 reiserfs_read_unlock_xattrs (dentry->d_sb); 1045 reiserfs_read_unlock_xattrs(dentry->d_sb);
1044 reiserfs_write_unlock_xattr_i (dentry->d_inode); 1046 reiserfs_write_unlock_xattr_i(dentry->d_inode);
1045 return err; 1047 return err;
1046} 1048}
1047 1049
1048
1049/* This is what filldir will use: 1050/* This is what filldir will use:
1050 * r_pos will always contain the amount of space required for the entire 1051 * r_pos will always contain the amount of space required for the entire
1051 * list. If r_pos becomes larger than r_size, we need more space and we 1052 * list. If r_pos becomes larger than r_size, we need more space and we
1052 * return an error indicating this. If r_pos is less than r_size, then we've 1053 * return an error indicating this. If r_pos is less than r_size, then we've
1053 * filled the buffer successfully and we return success */ 1054 * filled the buffer successfully and we return success */
1054struct reiserfs_listxattr_buf { 1055struct reiserfs_listxattr_buf {
1055 int r_pos; 1056 int r_pos;
1056 int r_size; 1057 int r_size;
1057 char *r_buf; 1058 char *r_buf;
1058 struct inode *r_inode; 1059 struct inode *r_inode;
1059}; 1060};
1060 1061
1061static int 1062static int
1062reiserfs_listxattr_filler (void *buf, const char *name, int namelen, 1063reiserfs_listxattr_filler(void *buf, const char *name, int namelen,
1063 loff_t offset, ino_t ino, unsigned int d_type) 1064 loff_t offset, ino_t ino, unsigned int d_type)
1064{ 1065{
1065 struct reiserfs_listxattr_buf *b = (struct reiserfs_listxattr_buf *)buf; 1066 struct reiserfs_listxattr_buf *b = (struct reiserfs_listxattr_buf *)buf;
1066 int len = 0; 1067 int len = 0;
1067 if (name[0] != '.' || (namelen != 1 && (name[1] != '.' || namelen != 2))) { 1068 if (name[0] != '.'
1068 struct reiserfs_xattr_handler *xah = find_xattr_handler_prefix (name); 1069 || (namelen != 1 && (name[1] != '.' || namelen != 2))) {
1069 if (!xah) return 0; /* Unsupported xattr name, skip it */ 1070 struct reiserfs_xattr_handler *xah =
1070 1071 find_xattr_handler_prefix(name);
1071 /* We call ->list() twice because the operation isn't required to just 1072 if (!xah)
1072 * return the name back - we want to make sure we have enough space */ 1073 return 0; /* Unsupported xattr name, skip it */
1073 len += xah->list (b->r_inode, name, namelen, NULL); 1074
1074 1075 /* We call ->list() twice because the operation isn't required to just
1075 if (len) { 1076 * return the name back - we want to make sure we have enough space */
1076 if (b->r_pos + len + 1 <= b->r_size) { 1077 len += xah->list(b->r_inode, name, namelen, NULL);
1077 char *p = b->r_buf + b->r_pos; 1078
1078 p += xah->list (b->r_inode, name, namelen, p); 1079 if (len) {
1079 *p++ = '\0'; 1080 if (b->r_pos + len + 1 <= b->r_size) {
1080 } 1081 char *p = b->r_buf + b->r_pos;
1081 b->r_pos += len + 1; 1082 p += xah->list(b->r_inode, name, namelen, p);
1082 } 1083 *p++ = '\0';
1083 } 1084 }
1084 1085 b->r_pos += len + 1;
1085 return 0; 1086 }
1087 }
1088
1089 return 0;
1086} 1090}
1091
1087/* 1092/*
1088 * Inode operation listxattr() 1093 * Inode operation listxattr()
1089 * 1094 *
1090 * Preliminary locking: we down dentry->d_inode->i_sem 1095 * Preliminary locking: we down dentry->d_inode->i_sem
1091 */ 1096 */
1092ssize_t 1097ssize_t reiserfs_listxattr(struct dentry * dentry, char *buffer, size_t size)
1093reiserfs_listxattr (struct dentry *dentry, char *buffer, size_t size)
1094{ 1098{
1095 struct file *fp; 1099 struct file *fp;
1096 struct dentry *dir; 1100 struct dentry *dir;
1097 int err = 0; 1101 int err = 0;
1098 struct reiserfs_listxattr_buf buf; 1102 struct reiserfs_listxattr_buf buf;
1099 1103
1100 if (!dentry->d_inode) 1104 if (!dentry->d_inode)
1101 return -EINVAL; 1105 return -EINVAL;
1102 1106
1103 if (!reiserfs_xattrs(dentry->d_sb) || 1107 if (!reiserfs_xattrs(dentry->d_sb) ||
1104 get_inode_sd_version (dentry->d_inode) == STAT_DATA_V1) 1108 get_inode_sd_version(dentry->d_inode) == STAT_DATA_V1)
1105 return -EOPNOTSUPP; 1109 return -EOPNOTSUPP;
1106 1110
1107 reiserfs_read_lock_xattr_i (dentry->d_inode); 1111 reiserfs_read_lock_xattr_i(dentry->d_inode);
1108 reiserfs_read_lock_xattrs (dentry->d_sb); 1112 reiserfs_read_lock_xattrs(dentry->d_sb);
1109 dir = open_xa_dir (dentry->d_inode, FL_READONLY); 1113 dir = open_xa_dir(dentry->d_inode, FL_READONLY);
1110 reiserfs_read_unlock_xattrs (dentry->d_sb); 1114 reiserfs_read_unlock_xattrs(dentry->d_sb);
1111 if (IS_ERR (dir)) { 1115 if (IS_ERR(dir)) {
1112 err = PTR_ERR (dir); 1116 err = PTR_ERR(dir);
1113 if (err == -ENODATA) 1117 if (err == -ENODATA)
1114 err = 0; /* Not an error if there aren't any xattrs */ 1118 err = 0; /* Not an error if there aren't any xattrs */
1115 goto out; 1119 goto out;
1116 } 1120 }
1117 1121
1118 fp = dentry_open (dir, NULL, O_RDWR); 1122 fp = dentry_open(dir, NULL, O_RDWR);
1119 if (IS_ERR (fp)) { 1123 if (IS_ERR(fp)) {
1120 err = PTR_ERR (fp); 1124 err = PTR_ERR(fp);
1121 /* dentry_open dputs the dentry if it fails */ 1125 /* dentry_open dputs the dentry if it fails */
1122 goto out; 1126 goto out;
1123 } 1127 }
1124 1128
1125 buf.r_buf = buffer; 1129 buf.r_buf = buffer;
1126 buf.r_size = buffer ? size : 0; 1130 buf.r_size = buffer ? size : 0;
1127 buf.r_pos = 0; 1131 buf.r_pos = 0;
1128 buf.r_inode = dentry->d_inode; 1132 buf.r_inode = dentry->d_inode;
1129 1133
1130 REISERFS_I(dentry->d_inode)->i_flags |= i_has_xattr_dir; 1134 REISERFS_I(dentry->d_inode)->i_flags |= i_has_xattr_dir;
1131 1135
1132 err = xattr_readdir (fp, reiserfs_listxattr_filler, &buf); 1136 err = xattr_readdir(fp, reiserfs_listxattr_filler, &buf);
1133 if (err) 1137 if (err)
1134 goto out_dir; 1138 goto out_dir;
1135 1139
1136 if (buf.r_pos > buf.r_size && buffer != NULL) 1140 if (buf.r_pos > buf.r_size && buffer != NULL)
1137 err = -ERANGE; 1141 err = -ERANGE;
1138 else 1142 else
1139 err = buf.r_pos; 1143 err = buf.r_pos;
1140 1144
1141out_dir: 1145 out_dir:
1142 fput(fp); 1146 fput(fp);
1143 1147
1144out: 1148 out:
1145 reiserfs_read_unlock_xattr_i (dentry->d_inode); 1149 reiserfs_read_unlock_xattr_i(dentry->d_inode);
1146 return err; 1150 return err;
1147} 1151}
1148 1152
1149/* This is the implementation for the xattr plugin infrastructure */ 1153/* This is the implementation for the xattr plugin infrastructure */
1150static struct list_head xattr_handlers = LIST_HEAD_INIT (xattr_handlers); 1154static struct list_head xattr_handlers = LIST_HEAD_INIT(xattr_handlers);
1151static DEFINE_RWLOCK(handler_lock); 1155static DEFINE_RWLOCK(handler_lock);
1152 1156
1153static struct reiserfs_xattr_handler * 1157static struct reiserfs_xattr_handler *find_xattr_handler_prefix(const char
1154find_xattr_handler_prefix (const char *prefix) 1158 *prefix)
1155{ 1159{
1156 struct reiserfs_xattr_handler *xah = NULL; 1160 struct reiserfs_xattr_handler *xah = NULL;
1157 struct list_head *p; 1161 struct list_head *p;
1158 1162
1159 read_lock (&handler_lock); 1163 read_lock(&handler_lock);
1160 list_for_each (p, &xattr_handlers) { 1164 list_for_each(p, &xattr_handlers) {
1161 xah = list_entry (p, struct reiserfs_xattr_handler, handlers); 1165 xah = list_entry(p, struct reiserfs_xattr_handler, handlers);
1162 if (strncmp (xah->prefix, prefix, strlen (xah->prefix)) == 0) 1166 if (strncmp(xah->prefix, prefix, strlen(xah->prefix)) == 0)
1163 break; 1167 break;
1164 xah = NULL; 1168 xah = NULL;
1165 } 1169 }
1166 1170
1167 read_unlock (&handler_lock); 1171 read_unlock(&handler_lock);
1168 return xah; 1172 return xah;
1169} 1173}
1170 1174
1171static void 1175static void __unregister_handlers(void)
1172__unregister_handlers (void)
1173{ 1176{
1174 struct reiserfs_xattr_handler *xah; 1177 struct reiserfs_xattr_handler *xah;
1175 struct list_head *p, *tmp; 1178 struct list_head *p, *tmp;
1176 1179
1177 list_for_each_safe (p, tmp, &xattr_handlers) { 1180 list_for_each_safe(p, tmp, &xattr_handlers) {
1178 xah = list_entry (p, struct reiserfs_xattr_handler, handlers); 1181 xah = list_entry(p, struct reiserfs_xattr_handler, handlers);
1179 if (xah->exit) 1182 if (xah->exit)
1180 xah->exit(); 1183 xah->exit();
1181 1184
1182 list_del_init (p); 1185 list_del_init(p);
1183 } 1186 }
1184 INIT_LIST_HEAD (&xattr_handlers); 1187 INIT_LIST_HEAD(&xattr_handlers);
1185} 1188}
1186 1189
1187int __init 1190int __init reiserfs_xattr_register_handlers(void)
1188reiserfs_xattr_register_handlers (void)
1189{ 1191{
1190 int err = 0; 1192 int err = 0;
1191 struct reiserfs_xattr_handler *xah; 1193 struct reiserfs_xattr_handler *xah;
1192 struct list_head *p; 1194 struct list_head *p;
1193 1195
1194 write_lock (&handler_lock); 1196 write_lock(&handler_lock);
1195 1197
1196 /* If we're already initialized, nothing to do */ 1198 /* If we're already initialized, nothing to do */
1197 if (!list_empty (&xattr_handlers)) { 1199 if (!list_empty(&xattr_handlers)) {
1198 write_unlock (&handler_lock); 1200 write_unlock(&handler_lock);
1199 return 0; 1201 return 0;
1200 } 1202 }
1201 1203
1202 /* Add the handlers */ 1204 /* Add the handlers */
1203 list_add_tail (&user_handler.handlers, &xattr_handlers); 1205 list_add_tail(&user_handler.handlers, &xattr_handlers);
1204 list_add_tail (&trusted_handler.handlers, &xattr_handlers); 1206 list_add_tail(&trusted_handler.handlers, &xattr_handlers);
1205#ifdef CONFIG_REISERFS_FS_SECURITY 1207#ifdef CONFIG_REISERFS_FS_SECURITY
1206 list_add_tail (&security_handler.handlers, &xattr_handlers); 1208 list_add_tail(&security_handler.handlers, &xattr_handlers);
1207#endif 1209#endif
1208#ifdef CONFIG_REISERFS_FS_POSIX_ACL 1210#ifdef CONFIG_REISERFS_FS_POSIX_ACL
1209 list_add_tail (&posix_acl_access_handler.handlers, &xattr_handlers); 1211 list_add_tail(&posix_acl_access_handler.handlers, &xattr_handlers);
1210 list_add_tail (&posix_acl_default_handler.handlers, &xattr_handlers); 1212 list_add_tail(&posix_acl_default_handler.handlers, &xattr_handlers);
1211#endif 1213#endif
1212 1214
1213 /* Run initializers, if available */ 1215 /* Run initializers, if available */
1214 list_for_each (p, &xattr_handlers) { 1216 list_for_each(p, &xattr_handlers) {
1215 xah = list_entry (p, struct reiserfs_xattr_handler, handlers); 1217 xah = list_entry(p, struct reiserfs_xattr_handler, handlers);
1216 if (xah->init) { 1218 if (xah->init) {
1217 err = xah->init (); 1219 err = xah->init();
1218 if (err) { 1220 if (err) {
1219 list_del_init (p); 1221 list_del_init(p);
1220 break; 1222 break;
1221 } 1223 }
1222 } 1224 }
1223 } 1225 }
1224 1226
1225 /* Clean up other handlers, if any failed */ 1227 /* Clean up other handlers, if any failed */
1226 if (err) 1228 if (err)
1227 __unregister_handlers (); 1229 __unregister_handlers();
1228 1230
1229 write_unlock (&handler_lock); 1231 write_unlock(&handler_lock);
1230 return err; 1232 return err;
1231} 1233}
1232 1234
1233void 1235void reiserfs_xattr_unregister_handlers(void)
1234reiserfs_xattr_unregister_handlers (void)
1235{ 1236{
1236 write_lock (&handler_lock); 1237 write_lock(&handler_lock);
1237 __unregister_handlers (); 1238 __unregister_handlers();
1238 write_unlock (&handler_lock); 1239 write_unlock(&handler_lock);
1239} 1240}
1240 1241
1241/* This will catch lookups from the fs root to .reiserfs_priv */ 1242/* This will catch lookups from the fs root to .reiserfs_priv */
1242static int 1243static int
1243xattr_lookup_poison (struct dentry *dentry, struct qstr *q1, struct qstr *name) 1244xattr_lookup_poison(struct dentry *dentry, struct qstr *q1, struct qstr *name)
1244{ 1245{
1245 struct dentry *priv_root = REISERFS_SB(dentry->d_sb)->priv_root; 1246 struct dentry *priv_root = REISERFS_SB(dentry->d_sb)->priv_root;
1246 if (name->len == priv_root->d_name.len && 1247 if (name->len == priv_root->d_name.len &&
1247 name->hash == priv_root->d_name.hash && 1248 name->hash == priv_root->d_name.hash &&
1248 !memcmp (name->name, priv_root->d_name.name, name->len)) { 1249 !memcmp(name->name, priv_root->d_name.name, name->len)) {
1249 return -ENOENT; 1250 return -ENOENT;
1250 } else if (q1->len == name->len && 1251 } else if (q1->len == name->len &&
1251 !memcmp(q1->name, name->name, name->len)) 1252 !memcmp(q1->name, name->name, name->len))
1252 return 0; 1253 return 0;
1253 return 1; 1254 return 1;
1254} 1255}
1255 1256
1256static struct dentry_operations xattr_lookup_poison_ops = { 1257static struct dentry_operations xattr_lookup_poison_ops = {
1257 .d_compare = xattr_lookup_poison, 1258 .d_compare = xattr_lookup_poison,
1258}; 1259};
1259 1260
1260
1261/* We need to take a copy of the mount flags since things like 1261/* We need to take a copy of the mount flags since things like
1262 * MS_RDONLY don't get set until *after* we're called. 1262 * MS_RDONLY don't get set until *after* we're called.
1263 * mount_flags != mount_options */ 1263 * mount_flags != mount_options */
1264int 1264int reiserfs_xattr_init(struct super_block *s, int mount_flags)
1265reiserfs_xattr_init (struct super_block *s, int mount_flags)
1266{ 1265{
1267 int err = 0; 1266 int err = 0;
1268 1267
1269 /* We need generation numbers to ensure that the oid mapping is correct 1268 /* We need generation numbers to ensure that the oid mapping is correct
1270 * v3.5 filesystems don't have them. */ 1269 * v3.5 filesystems don't have them. */
1271 if (!old_format_only (s)) { 1270 if (!old_format_only(s)) {
1272 set_bit (REISERFS_XATTRS, &(REISERFS_SB(s)->s_mount_opt)); 1271 set_bit(REISERFS_XATTRS, &(REISERFS_SB(s)->s_mount_opt));
1273 } else if (reiserfs_xattrs_optional (s)) { 1272 } else if (reiserfs_xattrs_optional(s)) {
1274 /* Old format filesystem, but optional xattrs have been enabled 1273 /* Old format filesystem, but optional xattrs have been enabled
1275 * at mount time. Error out. */ 1274 * at mount time. Error out. */
1276 reiserfs_warning (s, "xattrs/ACLs not supported on pre v3.6 " 1275 reiserfs_warning(s, "xattrs/ACLs not supported on pre v3.6 "
1277 "format filesystem. Failing mount."); 1276 "format filesystem. Failing mount.");
1278 err = -EOPNOTSUPP; 1277 err = -EOPNOTSUPP;
1279 goto error; 1278 goto error;
1280 } else { 1279 } else {
1281 /* Old format filesystem, but no optional xattrs have been enabled. This 1280 /* Old format filesystem, but no optional xattrs have been enabled. This
1282 * means we silently disable xattrs on the filesystem. */ 1281 * means we silently disable xattrs on the filesystem. */
1283 clear_bit (REISERFS_XATTRS, &(REISERFS_SB(s)->s_mount_opt)); 1282 clear_bit(REISERFS_XATTRS, &(REISERFS_SB(s)->s_mount_opt));
1284 } 1283 }
1285 1284
1286 /* If we don't have the privroot located yet - go find it */ 1285 /* If we don't have the privroot located yet - go find it */
1287 if (reiserfs_xattrs (s) && !REISERFS_SB(s)->priv_root) { 1286 if (reiserfs_xattrs(s) && !REISERFS_SB(s)->priv_root) {
1288 struct dentry *dentry; 1287 struct dentry *dentry;
1289 dentry = lookup_one_len (PRIVROOT_NAME, s->s_root, 1288 dentry = lookup_one_len(PRIVROOT_NAME, s->s_root,
1290 strlen (PRIVROOT_NAME)); 1289 strlen(PRIVROOT_NAME));
1291 if (!IS_ERR (dentry)) { 1290 if (!IS_ERR(dentry)) {
1292 if (!(mount_flags & MS_RDONLY) && !dentry->d_inode) { 1291 if (!(mount_flags & MS_RDONLY) && !dentry->d_inode) {
1293 struct inode *inode = dentry->d_parent->d_inode; 1292 struct inode *inode = dentry->d_parent->d_inode;
1294 down (&inode->i_sem); 1293 down(&inode->i_sem);
1295 err = inode->i_op->mkdir (inode, dentry, 0700); 1294 err = inode->i_op->mkdir(inode, dentry, 0700);
1296 up (&inode->i_sem); 1295 up(&inode->i_sem);
1297 if (err) { 1296 if (err) {
1298 dput (dentry); 1297 dput(dentry);
1299 dentry = NULL; 1298 dentry = NULL;
1300 } 1299 }
1301 1300
1302 if (dentry && dentry->d_inode) 1301 if (dentry && dentry->d_inode)
1303 reiserfs_warning (s, "Created %s on %s - reserved for " 1302 reiserfs_warning(s,
1304 "xattr storage.", PRIVROOT_NAME, 1303 "Created %s on %s - reserved for "
1305 reiserfs_bdevname (inode->i_sb)); 1304 "xattr storage.",
1306 } else if (!dentry->d_inode) { 1305 PRIVROOT_NAME,
1307 dput (dentry); 1306 reiserfs_bdevname
1308 dentry = NULL; 1307 (inode->i_sb));
1309 } 1308 } else if (!dentry->d_inode) {
1310 } else 1309 dput(dentry);
1311 err = PTR_ERR (dentry); 1310 dentry = NULL;
1312 1311 }
1313 if (!err && dentry) { 1312 } else
1314 s->s_root->d_op = &xattr_lookup_poison_ops; 1313 err = PTR_ERR(dentry);
1315 reiserfs_mark_inode_private (dentry->d_inode); 1314
1316 REISERFS_SB(s)->priv_root = dentry; 1315 if (!err && dentry) {
1317 } else if (!(mount_flags & MS_RDONLY)) { /* xattrs are unavailable */ 1316 s->s_root->d_op = &xattr_lookup_poison_ops;
1318 /* If we're read-only it just means that the dir hasn't been 1317 reiserfs_mark_inode_private(dentry->d_inode);
1319 * created. Not an error -- just no xattrs on the fs. We'll 1318 REISERFS_SB(s)->priv_root = dentry;
1320 * check again if we go read-write */ 1319 } else if (!(mount_flags & MS_RDONLY)) { /* xattrs are unavailable */
1321 reiserfs_warning (s, "xattrs/ACLs enabled and couldn't " 1320 /* If we're read-only it just means that the dir hasn't been
1322 "find/create .reiserfs_priv. Failing mount."); 1321 * created. Not an error -- just no xattrs on the fs. We'll
1323 err = -EOPNOTSUPP; 1322 * check again if we go read-write */
1324 } 1323 reiserfs_warning(s, "xattrs/ACLs enabled and couldn't "
1325 } 1324 "find/create .reiserfs_priv. Failing mount.");
1326 1325 err = -EOPNOTSUPP;
1327error: 1326 }
1328 /* This is only nonzero if there was an error initializing the xattr 1327 }
1329 * directory or if there is a condition where we don't support them. */ 1328
1330 if (err) { 1329 error:
1331 clear_bit (REISERFS_XATTRS, &(REISERFS_SB(s)->s_mount_opt)); 1330 /* This is only nonzero if there was an error initializing the xattr
1332 clear_bit (REISERFS_XATTRS_USER, &(REISERFS_SB(s)->s_mount_opt)); 1331 * directory or if there is a condition where we don't support them. */
1333 clear_bit (REISERFS_POSIXACL, &(REISERFS_SB(s)->s_mount_opt)); 1332 if (err) {
1334 } 1333 clear_bit(REISERFS_XATTRS, &(REISERFS_SB(s)->s_mount_opt));
1335 1334 clear_bit(REISERFS_XATTRS_USER, &(REISERFS_SB(s)->s_mount_opt));
1336 /* The super_block MS_POSIXACL must mirror the (no)acl mount option. */ 1335 clear_bit(REISERFS_POSIXACL, &(REISERFS_SB(s)->s_mount_opt));
1337 s->s_flags = s->s_flags & ~MS_POSIXACL; 1336 }
1338 if (reiserfs_posixacl (s)) 1337
1339 s->s_flags |= MS_POSIXACL; 1338 /* The super_block MS_POSIXACL must mirror the (no)acl mount option. */
1340 1339 s->s_flags = s->s_flags & ~MS_POSIXACL;
1341 return err; 1340 if (reiserfs_posixacl(s))
1341 s->s_flags |= MS_POSIXACL;
1342
1343 return err;
1342} 1344}
1343 1345
1344static int 1346static int
1345__reiserfs_permission (struct inode *inode, int mask, struct nameidata *nd, 1347__reiserfs_permission(struct inode *inode, int mask, struct nameidata *nd,
1346 int need_lock) 1348 int need_lock)
1347{ 1349{
1348 umode_t mode = inode->i_mode; 1350 umode_t mode = inode->i_mode;
1349 1351
1350 if (mask & MAY_WRITE) { 1352 if (mask & MAY_WRITE) {
1351 /* 1353 /*
@@ -1363,50 +1365,50 @@ __reiserfs_permission (struct inode *inode, int mask, struct nameidata *nd,
1363 } 1365 }
1364 1366
1365 /* We don't do permission checks on the internal objects. 1367 /* We don't do permission checks on the internal objects.
1366 * Permissions are determined by the "owning" object. */ 1368 * Permissions are determined by the "owning" object. */
1367 if (is_reiserfs_priv_object (inode)) 1369 if (is_reiserfs_priv_object(inode))
1368 return 0; 1370 return 0;
1369 1371
1370 if (current->fsuid == inode->i_uid) { 1372 if (current->fsuid == inode->i_uid) {
1371 mode >>= 6; 1373 mode >>= 6;
1372#ifdef CONFIG_REISERFS_FS_POSIX_ACL 1374#ifdef CONFIG_REISERFS_FS_POSIX_ACL
1373 } else if (reiserfs_posixacl(inode->i_sb) && 1375 } else if (reiserfs_posixacl(inode->i_sb) &&
1374 get_inode_sd_version (inode) != STAT_DATA_V1) { 1376 get_inode_sd_version(inode) != STAT_DATA_V1) {
1375 struct posix_acl *acl; 1377 struct posix_acl *acl;
1376 1378
1377 /* ACL can't contain additional permissions if 1379 /* ACL can't contain additional permissions if
1378 the ACL_MASK entry is 0 */ 1380 the ACL_MASK entry is 0 */
1379 if (!(mode & S_IRWXG)) 1381 if (!(mode & S_IRWXG))
1380 goto check_groups; 1382 goto check_groups;
1381 1383
1382 if (need_lock) { 1384 if (need_lock) {
1383 reiserfs_read_lock_xattr_i (inode); 1385 reiserfs_read_lock_xattr_i(inode);
1384 reiserfs_read_lock_xattrs (inode->i_sb); 1386 reiserfs_read_lock_xattrs(inode->i_sb);
1387 }
1388 acl = reiserfs_get_acl(inode, ACL_TYPE_ACCESS);
1389 if (need_lock) {
1390 reiserfs_read_unlock_xattrs(inode->i_sb);
1391 reiserfs_read_unlock_xattr_i(inode);
1385 } 1392 }
1386 acl = reiserfs_get_acl (inode, ACL_TYPE_ACCESS); 1393 if (IS_ERR(acl)) {
1387 if (need_lock) { 1394 if (PTR_ERR(acl) == -ENODATA)
1388 reiserfs_read_unlock_xattrs (inode->i_sb); 1395 goto check_groups;
1389 reiserfs_read_unlock_xattr_i (inode); 1396 return PTR_ERR(acl);
1390 } 1397 }
1391 if (IS_ERR (acl)) { 1398
1392 if (PTR_ERR (acl) == -ENODATA) 1399 if (acl) {
1393 goto check_groups; 1400 int err = posix_acl_permission(inode, acl, mask);
1394 return PTR_ERR (acl); 1401 posix_acl_release(acl);
1395 } 1402 if (err == -EACCES) {
1396 1403 goto check_capabilities;
1397 if (acl) { 1404 }
1398 int err = posix_acl_permission (inode, acl, mask); 1405 return err;
1399 posix_acl_release (acl);
1400 if (err == -EACCES) {
1401 goto check_capabilities;
1402 }
1403 return err;
1404 } else { 1406 } else {
1405 goto check_groups; 1407 goto check_groups;
1406 } 1408 }
1407#endif 1409#endif
1408 } else { 1410 } else {
1409check_groups: 1411 check_groups:
1410 if (in_group_p(inode->i_gid)) 1412 if (in_group_p(inode->i_gid))
1411 mode >>= 3; 1413 mode >>= 3;
1412 } 1414 }
@@ -1414,10 +1416,10 @@ check_groups:
1414 /* 1416 /*
1415 * If the DACs are ok we don't need any capability check. 1417 * If the DACs are ok we don't need any capability check.
1416 */ 1418 */
1417 if (((mode & mask & (MAY_READ|MAY_WRITE|MAY_EXEC)) == mask)) 1419 if (((mode & mask & (MAY_READ | MAY_WRITE | MAY_EXEC)) == mask))
1418 return 0; 1420 return 0;
1419 1421
1420check_capabilities: 1422 check_capabilities:
1421 /* 1423 /*
1422 * Read/write DACs are always overridable. 1424 * Read/write DACs are always overridable.
1423 * Executable DACs are overridable if at least one exec bit is set. 1425 * Executable DACs are overridable if at least one exec bit is set.
@@ -1437,14 +1439,13 @@ check_capabilities:
1437 return -EACCES; 1439 return -EACCES;
1438} 1440}
1439 1441
1440int 1442int reiserfs_permission(struct inode *inode, int mask, struct nameidata *nd)
1441reiserfs_permission (struct inode *inode, int mask, struct nameidata *nd)
1442{ 1443{
1443 return __reiserfs_permission (inode, mask, nd, 1); 1444 return __reiserfs_permission(inode, mask, nd, 1);
1444} 1445}
1445 1446
1446int 1447int
1447reiserfs_permission_locked (struct inode *inode, int mask, struct nameidata *nd) 1448reiserfs_permission_locked(struct inode *inode, int mask, struct nameidata *nd)
1448{ 1449{
1449 return __reiserfs_permission (inode, mask, nd, 0); 1450 return __reiserfs_permission(inode, mask, nd, 0);
1450} 1451}
diff --git a/fs/reiserfs/xattr_acl.c b/fs/reiserfs/xattr_acl.c
index c312881c5f5..6703efa3c43 100644
--- a/fs/reiserfs/xattr_acl.c
+++ b/fs/reiserfs/xattr_acl.c
@@ -9,7 +9,8 @@
9#include <linux/reiserfs_acl.h> 9#include <linux/reiserfs_acl.h>
10#include <asm/uaccess.h> 10#include <asm/uaccess.h>
11 11
12static int reiserfs_set_acl(struct inode *inode, int type, struct posix_acl *acl); 12static int reiserfs_set_acl(struct inode *inode, int type,
13 struct posix_acl *acl);
13 14
14static int 15static int
15xattr_set_acl(struct inode *inode, int type, const void *value, size_t size) 16xattr_set_acl(struct inode *inode, int type, const void *value, size_t size)
@@ -34,14 +35,13 @@ xattr_set_acl(struct inode *inode, int type, const void *value, size_t size)
34 } else 35 } else
35 acl = NULL; 36 acl = NULL;
36 37
37 error = reiserfs_set_acl (inode, type, acl); 38 error = reiserfs_set_acl(inode, type, acl);
38 39
39release_and_out: 40 release_and_out:
40 posix_acl_release(acl); 41 posix_acl_release(acl);
41 return error; 42 return error;
42} 43}
43 44
44
45static int 45static int
46xattr_get_acl(struct inode *inode, int type, void *buffer, size_t size) 46xattr_get_acl(struct inode *inode, int type, void *buffer, size_t size)
47{ 47{
@@ -51,7 +51,7 @@ xattr_get_acl(struct inode *inode, int type, void *buffer, size_t size)
51 if (!reiserfs_posixacl(inode->i_sb)) 51 if (!reiserfs_posixacl(inode->i_sb))
52 return -EOPNOTSUPP; 52 return -EOPNOTSUPP;
53 53
54 acl = reiserfs_get_acl (inode, type); 54 acl = reiserfs_get_acl(inode, type);
55 if (IS_ERR(acl)) 55 if (IS_ERR(acl))
56 return PTR_ERR(acl); 56 return PTR_ERR(acl);
57 if (acl == NULL) 57 if (acl == NULL)
@@ -62,12 +62,10 @@ xattr_get_acl(struct inode *inode, int type, void *buffer, size_t size)
62 return error; 62 return error;
63} 63}
64 64
65
66/* 65/*
67 * Convert from filesystem to in-memory representation. 66 * Convert from filesystem to in-memory representation.
68 */ 67 */
69static struct posix_acl * 68static struct posix_acl *posix_acl_from_disk(const void *value, size_t size)
70posix_acl_from_disk(const void *value, size_t size)
71{ 69{
72 const char *end = (char *)value + size; 70 const char *end = (char *)value + size;
73 int n, count; 71 int n, count;
@@ -76,8 +74,8 @@ posix_acl_from_disk(const void *value, size_t size)
76 if (!value) 74 if (!value)
77 return NULL; 75 return NULL;
78 if (size < sizeof(reiserfs_acl_header)) 76 if (size < sizeof(reiserfs_acl_header))
79 return ERR_PTR(-EINVAL); 77 return ERR_PTR(-EINVAL);
80 if (((reiserfs_acl_header *)value)->a_version != 78 if (((reiserfs_acl_header *) value)->a_version !=
81 cpu_to_le32(REISERFS_ACL_VERSION)) 79 cpu_to_le32(REISERFS_ACL_VERSION))
82 return ERR_PTR(-EINVAL); 80 return ERR_PTR(-EINVAL);
83 value = (char *)value + sizeof(reiserfs_acl_header); 81 value = (char *)value + sizeof(reiserfs_acl_header);
@@ -89,41 +87,39 @@ posix_acl_from_disk(const void *value, size_t size)
89 acl = posix_acl_alloc(count, GFP_NOFS); 87 acl = posix_acl_alloc(count, GFP_NOFS);
90 if (!acl) 88 if (!acl)
91 return ERR_PTR(-ENOMEM); 89 return ERR_PTR(-ENOMEM);
92 for (n=0; n < count; n++) { 90 for (n = 0; n < count; n++) {
93 reiserfs_acl_entry *entry = 91 reiserfs_acl_entry *entry = (reiserfs_acl_entry *) value;
94 (reiserfs_acl_entry *)value;
95 if ((char *)value + sizeof(reiserfs_acl_entry_short) > end) 92 if ((char *)value + sizeof(reiserfs_acl_entry_short) > end)
96 goto fail; 93 goto fail;
97 acl->a_entries[n].e_tag = le16_to_cpu(entry->e_tag); 94 acl->a_entries[n].e_tag = le16_to_cpu(entry->e_tag);
98 acl->a_entries[n].e_perm = le16_to_cpu(entry->e_perm); 95 acl->a_entries[n].e_perm = le16_to_cpu(entry->e_perm);
99 switch(acl->a_entries[n].e_tag) { 96 switch (acl->a_entries[n].e_tag) {
100 case ACL_USER_OBJ: 97 case ACL_USER_OBJ:
101 case ACL_GROUP_OBJ: 98 case ACL_GROUP_OBJ:
102 case ACL_MASK: 99 case ACL_MASK:
103 case ACL_OTHER: 100 case ACL_OTHER:
104 value = (char *)value + 101 value = (char *)value +
105 sizeof(reiserfs_acl_entry_short); 102 sizeof(reiserfs_acl_entry_short);
106 acl->a_entries[n].e_id = ACL_UNDEFINED_ID; 103 acl->a_entries[n].e_id = ACL_UNDEFINED_ID;
107 break; 104 break;
108 105
109 case ACL_USER: 106 case ACL_USER:
110 case ACL_GROUP: 107 case ACL_GROUP:
111 value = (char *)value + sizeof(reiserfs_acl_entry); 108 value = (char *)value + sizeof(reiserfs_acl_entry);
112 if ((char *)value > end) 109 if ((char *)value > end)
113 goto fail;
114 acl->a_entries[n].e_id =
115 le32_to_cpu(entry->e_id);
116 break;
117
118 default:
119 goto fail; 110 goto fail;
111 acl->a_entries[n].e_id = le32_to_cpu(entry->e_id);
112 break;
113
114 default:
115 goto fail;
120 } 116 }
121 } 117 }
122 if (value != end) 118 if (value != end)
123 goto fail; 119 goto fail;
124 return acl; 120 return acl;
125 121
126fail: 122 fail:
127 posix_acl_release(acl); 123 posix_acl_release(acl);
128 return ERR_PTR(-EINVAL); 124 return ERR_PTR(-EINVAL);
129} 125}
@@ -131,46 +127,46 @@ fail:
131/* 127/*
132 * Convert from in-memory to filesystem representation. 128 * Convert from in-memory to filesystem representation.
133 */ 129 */
134static void * 130static void *posix_acl_to_disk(const struct posix_acl *acl, size_t * size)
135posix_acl_to_disk(const struct posix_acl *acl, size_t *size)
136{ 131{
137 reiserfs_acl_header *ext_acl; 132 reiserfs_acl_header *ext_acl;
138 char *e; 133 char *e;
139 int n; 134 int n;
140 135
141 *size = reiserfs_acl_size(acl->a_count); 136 *size = reiserfs_acl_size(acl->a_count);
142 ext_acl = (reiserfs_acl_header *)kmalloc(sizeof(reiserfs_acl_header) + 137 ext_acl = (reiserfs_acl_header *) kmalloc(sizeof(reiserfs_acl_header) +
143 acl->a_count * sizeof(reiserfs_acl_entry), GFP_NOFS); 138 acl->a_count *
139 sizeof(reiserfs_acl_entry),
140 GFP_NOFS);
144 if (!ext_acl) 141 if (!ext_acl)
145 return ERR_PTR(-ENOMEM); 142 return ERR_PTR(-ENOMEM);
146 ext_acl->a_version = cpu_to_le32(REISERFS_ACL_VERSION); 143 ext_acl->a_version = cpu_to_le32(REISERFS_ACL_VERSION);
147 e = (char *)ext_acl + sizeof(reiserfs_acl_header); 144 e = (char *)ext_acl + sizeof(reiserfs_acl_header);
148 for (n=0; n < acl->a_count; n++) { 145 for (n = 0; n < acl->a_count; n++) {
149 reiserfs_acl_entry *entry = (reiserfs_acl_entry *)e; 146 reiserfs_acl_entry *entry = (reiserfs_acl_entry *) e;
150 entry->e_tag = cpu_to_le16(acl->a_entries[n].e_tag); 147 entry->e_tag = cpu_to_le16(acl->a_entries[n].e_tag);
151 entry->e_perm = cpu_to_le16(acl->a_entries[n].e_perm); 148 entry->e_perm = cpu_to_le16(acl->a_entries[n].e_perm);
152 switch(acl->a_entries[n].e_tag) { 149 switch (acl->a_entries[n].e_tag) {
153 case ACL_USER: 150 case ACL_USER:
154 case ACL_GROUP: 151 case ACL_GROUP:
155 entry->e_id = 152 entry->e_id = cpu_to_le32(acl->a_entries[n].e_id);
156 cpu_to_le32(acl->a_entries[n].e_id); 153 e += sizeof(reiserfs_acl_entry);
157 e += sizeof(reiserfs_acl_entry); 154 break;
158 break; 155
159 156 case ACL_USER_OBJ:
160 case ACL_USER_OBJ: 157 case ACL_GROUP_OBJ:
161 case ACL_GROUP_OBJ: 158 case ACL_MASK:
162 case ACL_MASK: 159 case ACL_OTHER:
163 case ACL_OTHER: 160 e += sizeof(reiserfs_acl_entry_short);
164 e += sizeof(reiserfs_acl_entry_short); 161 break;
165 break; 162
166 163 default:
167 default: 164 goto fail;
168 goto fail;
169 } 165 }
170 } 166 }
171 return (char *)ext_acl; 167 return (char *)ext_acl;
172 168
173fail: 169 fail:
174 kfree(ext_acl); 170 kfree(ext_acl);
175 return ERR_PTR(-EINVAL); 171 return ERR_PTR(-EINVAL);
176} 172}
@@ -181,59 +177,58 @@ fail:
181 * inode->i_sem: down 177 * inode->i_sem: down
182 * BKL held [before 2.5.x] 178 * BKL held [before 2.5.x]
183 */ 179 */
184struct posix_acl * 180struct posix_acl *reiserfs_get_acl(struct inode *inode, int type)
185reiserfs_get_acl(struct inode *inode, int type)
186{ 181{
187 char *name, *value; 182 char *name, *value;
188 struct posix_acl *acl, **p_acl; 183 struct posix_acl *acl, **p_acl;
189 size_t size; 184 size_t size;
190 int retval; 185 int retval;
191 struct reiserfs_inode_info *reiserfs_i = REISERFS_I(inode); 186 struct reiserfs_inode_info *reiserfs_i = REISERFS_I(inode);
192 187
193 switch (type) { 188 switch (type) {
194 case ACL_TYPE_ACCESS: 189 case ACL_TYPE_ACCESS:
195 name = POSIX_ACL_XATTR_ACCESS; 190 name = POSIX_ACL_XATTR_ACCESS;
196 p_acl = &reiserfs_i->i_acl_access; 191 p_acl = &reiserfs_i->i_acl_access;
197 break; 192 break;
198 case ACL_TYPE_DEFAULT: 193 case ACL_TYPE_DEFAULT:
199 name = POSIX_ACL_XATTR_DEFAULT; 194 name = POSIX_ACL_XATTR_DEFAULT;
200 p_acl = &reiserfs_i->i_acl_default; 195 p_acl = &reiserfs_i->i_acl_default;
201 break; 196 break;
202 default: 197 default:
203 return ERR_PTR (-EINVAL); 198 return ERR_PTR(-EINVAL);
204 } 199 }
205 200
206 if (IS_ERR (*p_acl)) { 201 if (IS_ERR(*p_acl)) {
207 if (PTR_ERR (*p_acl) == -ENODATA) 202 if (PTR_ERR(*p_acl) == -ENODATA)
208 return NULL; 203 return NULL;
209 } else if (*p_acl != NULL) 204 } else if (*p_acl != NULL)
210 return posix_acl_dup (*p_acl); 205 return posix_acl_dup(*p_acl);
211 206
212 size = reiserfs_xattr_get (inode, name, NULL, 0); 207 size = reiserfs_xattr_get(inode, name, NULL, 0);
213 if ((int)size < 0) { 208 if ((int)size < 0) {
214 if (size == -ENODATA || size == -ENOSYS) { 209 if (size == -ENODATA || size == -ENOSYS) {
215 *p_acl = ERR_PTR (-ENODATA); 210 *p_acl = ERR_PTR(-ENODATA);
216 return NULL; 211 return NULL;
217 } 212 }
218 return ERR_PTR (size); 213 return ERR_PTR(size);
219 } 214 }
220 215
221 value = kmalloc (size, GFP_NOFS); 216 value = kmalloc(size, GFP_NOFS);
222 if (!value) 217 if (!value)
223 return ERR_PTR (-ENOMEM); 218 return ERR_PTR(-ENOMEM);
224 219
225 retval = reiserfs_xattr_get(inode, name, value, size); 220 retval = reiserfs_xattr_get(inode, name, value, size);
226 if (retval == -ENODATA || retval == -ENOSYS) { 221 if (retval == -ENODATA || retval == -ENOSYS) {
227 /* This shouldn't actually happen as it should have 222 /* This shouldn't actually happen as it should have
228 been caught above.. but just in case */ 223 been caught above.. but just in case */
229 acl = NULL; 224 acl = NULL;
230 *p_acl = ERR_PTR (-ENODATA); 225 *p_acl = ERR_PTR(-ENODATA);
231 } else if (retval < 0) { 226 } else if (retval < 0) {
232 acl = ERR_PTR(retval); 227 acl = ERR_PTR(retval);
233 } else { 228 } else {
234 acl = posix_acl_from_disk(value, retval); 229 acl = posix_acl_from_disk(value, retval);
235 *p_acl = posix_acl_dup (acl); 230 *p_acl = posix_acl_dup(acl);
236 } 231 }
237 232
238 kfree(value); 233 kfree(value);
239 return acl; 234 return acl;
@@ -248,72 +243,72 @@ reiserfs_get_acl(struct inode *inode, int type)
248static int 243static int
249reiserfs_set_acl(struct inode *inode, int type, struct posix_acl *acl) 244reiserfs_set_acl(struct inode *inode, int type, struct posix_acl *acl)
250{ 245{
251 char *name; 246 char *name;
252 void *value = NULL; 247 void *value = NULL;
253 struct posix_acl **p_acl; 248 struct posix_acl **p_acl;
254 size_t size; 249 size_t size;
255 int error; 250 int error;
256 struct reiserfs_inode_info *reiserfs_i = REISERFS_I(inode); 251 struct reiserfs_inode_info *reiserfs_i = REISERFS_I(inode);
257 252
258 if (S_ISLNK(inode->i_mode)) 253 if (S_ISLNK(inode->i_mode))
259 return -EOPNOTSUPP; 254 return -EOPNOTSUPP;
260 255
261 switch (type) { 256 switch (type) {
262 case ACL_TYPE_ACCESS: 257 case ACL_TYPE_ACCESS:
263 name = POSIX_ACL_XATTR_ACCESS; 258 name = POSIX_ACL_XATTR_ACCESS;
264 p_acl = &reiserfs_i->i_acl_access; 259 p_acl = &reiserfs_i->i_acl_access;
265 if (acl) { 260 if (acl) {
266 mode_t mode = inode->i_mode; 261 mode_t mode = inode->i_mode;
267 error = posix_acl_equiv_mode (acl, &mode); 262 error = posix_acl_equiv_mode(acl, &mode);
268 if (error < 0) 263 if (error < 0)
269 return error; 264 return error;
270 else { 265 else {
271 inode->i_mode = mode; 266 inode->i_mode = mode;
272 if (error == 0) 267 if (error == 0)
273 acl = NULL; 268 acl = NULL;
274 } 269 }
275 } 270 }
276 break; 271 break;
277 case ACL_TYPE_DEFAULT: 272 case ACL_TYPE_DEFAULT:
278 name = POSIX_ACL_XATTR_DEFAULT; 273 name = POSIX_ACL_XATTR_DEFAULT;
279 p_acl = &reiserfs_i->i_acl_default; 274 p_acl = &reiserfs_i->i_acl_default;
280 if (!S_ISDIR (inode->i_mode)) 275 if (!S_ISDIR(inode->i_mode))
281 return acl ? -EACCES : 0; 276 return acl ? -EACCES : 0;
282 break; 277 break;
283 default: 278 default:
284 return -EINVAL; 279 return -EINVAL;
285 } 280 }
286 281
287 if (acl) { 282 if (acl) {
288 value = posix_acl_to_disk(acl, &size); 283 value = posix_acl_to_disk(acl, &size);
289 if (IS_ERR(value)) 284 if (IS_ERR(value))
290 return (int)PTR_ERR(value); 285 return (int)PTR_ERR(value);
291 error = reiserfs_xattr_set(inode, name, value, size, 0); 286 error = reiserfs_xattr_set(inode, name, value, size, 0);
292 } else { 287 } else {
293 error = reiserfs_xattr_del (inode, name); 288 error = reiserfs_xattr_del(inode, name);
294 if (error == -ENODATA) { 289 if (error == -ENODATA) {
295 /* This may seem odd here, but it means that the ACL was set 290 /* This may seem odd here, but it means that the ACL was set
296 * with a value representable with mode bits. If there was 291 * with a value representable with mode bits. If there was
297 * an ACL before, reiserfs_xattr_del already dirtied the inode. 292 * an ACL before, reiserfs_xattr_del already dirtied the inode.
298 */ 293 */
299 mark_inode_dirty (inode); 294 mark_inode_dirty(inode);
300 error = 0; 295 error = 0;
301 } 296 }
302 } 297 }
303 298
304 if (value) 299 if (value)
305 kfree(value); 300 kfree(value);
306 301
307 if (!error) { 302 if (!error) {
308 /* Release the old one */ 303 /* Release the old one */
309 if (!IS_ERR (*p_acl) && *p_acl) 304 if (!IS_ERR(*p_acl) && *p_acl)
310 posix_acl_release (*p_acl); 305 posix_acl_release(*p_acl);
311 306
312 if (acl == NULL) 307 if (acl == NULL)
313 *p_acl = ERR_PTR (-ENODATA); 308 *p_acl = ERR_PTR(-ENODATA);
314 else 309 else
315 *p_acl = posix_acl_dup (acl); 310 *p_acl = posix_acl_dup(acl);
316 } 311 }
317 312
318 return error; 313 return error;
319} 314}
@@ -321,192 +316,190 @@ reiserfs_set_acl(struct inode *inode, int type, struct posix_acl *acl)
321/* dir->i_sem: down, 316/* dir->i_sem: down,
322 * inode is new and not released into the wild yet */ 317 * inode is new and not released into the wild yet */
323int 318int
324reiserfs_inherit_default_acl (struct inode *dir, struct dentry *dentry, struct inode *inode) 319reiserfs_inherit_default_acl(struct inode *dir, struct dentry *dentry,
320 struct inode *inode)
325{ 321{
326 struct posix_acl *acl; 322 struct posix_acl *acl;
327 int err = 0; 323 int err = 0;
328 324
329 /* ACLs only get applied to files and directories */ 325 /* ACLs only get applied to files and directories */
330 if (S_ISLNK (inode->i_mode)) 326 if (S_ISLNK(inode->i_mode))
331 return 0; 327 return 0;
332 328
333 /* ACLs can only be used on "new" objects, so if it's an old object 329 /* ACLs can only be used on "new" objects, so if it's an old object
334 * there is nothing to inherit from */ 330 * there is nothing to inherit from */
335 if (get_inode_sd_version (dir) == STAT_DATA_V1) 331 if (get_inode_sd_version(dir) == STAT_DATA_V1)
336 goto apply_umask; 332 goto apply_umask;
337 333
338 /* Don't apply ACLs to objects in the .reiserfs_priv tree.. This 334 /* Don't apply ACLs to objects in the .reiserfs_priv tree.. This
339 * would be useless since permissions are ignored, and a pain because 335 * would be useless since permissions are ignored, and a pain because
340 * it introduces locking cycles */ 336 * it introduces locking cycles */
341 if (is_reiserfs_priv_object (dir)) { 337 if (is_reiserfs_priv_object(dir)) {
342 reiserfs_mark_inode_private (inode); 338 reiserfs_mark_inode_private(inode);
343 goto apply_umask; 339 goto apply_umask;
344 } 340 }
345 341
346 acl = reiserfs_get_acl (dir, ACL_TYPE_DEFAULT); 342 acl = reiserfs_get_acl(dir, ACL_TYPE_DEFAULT);
347 if (IS_ERR (acl)) { 343 if (IS_ERR(acl)) {
348 if (PTR_ERR (acl) == -ENODATA) 344 if (PTR_ERR(acl) == -ENODATA)
349 goto apply_umask; 345 goto apply_umask;
350 return PTR_ERR (acl); 346 return PTR_ERR(acl);
351 } 347 }
352 348
353 if (acl) { 349 if (acl) {
354 struct posix_acl *acl_copy; 350 struct posix_acl *acl_copy;
355 mode_t mode = inode->i_mode; 351 mode_t mode = inode->i_mode;
356 int need_acl; 352 int need_acl;
357 353
358 /* Copy the default ACL to the default ACL of a new directory */ 354 /* Copy the default ACL to the default ACL of a new directory */
359 if (S_ISDIR (inode->i_mode)) { 355 if (S_ISDIR(inode->i_mode)) {
360 err = reiserfs_set_acl (inode, ACL_TYPE_DEFAULT, acl); 356 err = reiserfs_set_acl(inode, ACL_TYPE_DEFAULT, acl);
361 if (err) 357 if (err)
362 goto cleanup; 358 goto cleanup;
363 } 359 }
364 360
365 /* Now we reconcile the new ACL and the mode, 361 /* Now we reconcile the new ACL and the mode,
366 potentially modifying both */ 362 potentially modifying both */
367 acl_copy = posix_acl_clone (acl, GFP_NOFS); 363 acl_copy = posix_acl_clone(acl, GFP_NOFS);
368 if (!acl_copy) { 364 if (!acl_copy) {
369 err = -ENOMEM; 365 err = -ENOMEM;
370 goto cleanup; 366 goto cleanup;
371 } 367 }
372 368
373 369 need_acl = posix_acl_create_masq(acl_copy, &mode);
374 need_acl = posix_acl_create_masq (acl_copy, &mode); 370 if (need_acl >= 0) {
375 if (need_acl >= 0) { 371 if (mode != inode->i_mode) {
376 if (mode != inode->i_mode) { 372 inode->i_mode = mode;
377 inode->i_mode = mode; 373 }
378 } 374
379 375 /* If we need an ACL.. */
380 /* If we need an ACL.. */ 376 if (need_acl > 0) {
381 if (need_acl > 0) { 377 err =
382 err = reiserfs_set_acl (inode, ACL_TYPE_ACCESS, acl_copy); 378 reiserfs_set_acl(inode, ACL_TYPE_ACCESS,
383 if (err) 379 acl_copy);
384 goto cleanup_copy; 380 if (err)
385 } 381 goto cleanup_copy;
386 } 382 }
387cleanup_copy: 383 }
388 posix_acl_release (acl_copy); 384 cleanup_copy:
389cleanup: 385 posix_acl_release(acl_copy);
390 posix_acl_release (acl); 386 cleanup:
391 } else { 387 posix_acl_release(acl);
392apply_umask: 388 } else {
393 /* no ACL, apply umask */ 389 apply_umask:
394 inode->i_mode &= ~current->fs->umask; 390 /* no ACL, apply umask */
395 } 391 inode->i_mode &= ~current->fs->umask;
396 392 }
397 return err; 393
394 return err;
398} 395}
399 396
400/* Looks up and caches the result of the default ACL. 397/* Looks up and caches the result of the default ACL.
401 * We do this so that we don't need to carry the xattr_sem into 398 * We do this so that we don't need to carry the xattr_sem into
402 * reiserfs_new_inode if we don't need to */ 399 * reiserfs_new_inode if we don't need to */
403int 400int reiserfs_cache_default_acl(struct inode *inode)
404reiserfs_cache_default_acl (struct inode *inode)
405{ 401{
406 int ret = 0; 402 int ret = 0;
407 if (reiserfs_posixacl (inode->i_sb) && 403 if (reiserfs_posixacl(inode->i_sb) && !is_reiserfs_priv_object(inode)) {
408 !is_reiserfs_priv_object (inode)) { 404 struct posix_acl *acl;
409 struct posix_acl *acl; 405 reiserfs_read_lock_xattr_i(inode);
410 reiserfs_read_lock_xattr_i (inode); 406 reiserfs_read_lock_xattrs(inode->i_sb);
411 reiserfs_read_lock_xattrs (inode->i_sb); 407 acl = reiserfs_get_acl(inode, ACL_TYPE_DEFAULT);
412 acl = reiserfs_get_acl (inode, ACL_TYPE_DEFAULT); 408 reiserfs_read_unlock_xattrs(inode->i_sb);
413 reiserfs_read_unlock_xattrs (inode->i_sb); 409 reiserfs_read_unlock_xattr_i(inode);
414 reiserfs_read_unlock_xattr_i (inode); 410 ret = acl ? 1 : 0;
415 ret = acl ? 1 : 0; 411 posix_acl_release(acl);
416 posix_acl_release (acl); 412 }
417 } 413
418 414 return ret;
419 return ret;
420} 415}
421 416
422int 417int reiserfs_acl_chmod(struct inode *inode)
423reiserfs_acl_chmod (struct inode *inode)
424{ 418{
425 struct posix_acl *acl, *clone; 419 struct posix_acl *acl, *clone;
426 int error; 420 int error;
427 421
428 if (S_ISLNK(inode->i_mode)) 422 if (S_ISLNK(inode->i_mode))
429 return -EOPNOTSUPP; 423 return -EOPNOTSUPP;
430 424
431 if (get_inode_sd_version (inode) == STAT_DATA_V1 || 425 if (get_inode_sd_version(inode) == STAT_DATA_V1 ||
432 !reiserfs_posixacl(inode->i_sb)) 426 !reiserfs_posixacl(inode->i_sb)) {
433 { 427 return 0;
434 return 0;
435 } 428 }
436 429
437 reiserfs_read_lock_xattrs (inode->i_sb); 430 reiserfs_read_lock_xattrs(inode->i_sb);
438 acl = reiserfs_get_acl(inode, ACL_TYPE_ACCESS); 431 acl = reiserfs_get_acl(inode, ACL_TYPE_ACCESS);
439 reiserfs_read_unlock_xattrs (inode->i_sb); 432 reiserfs_read_unlock_xattrs(inode->i_sb);
440 if (!acl) 433 if (!acl)
441 return 0; 434 return 0;
442 if (IS_ERR(acl)) 435 if (IS_ERR(acl))
443 return PTR_ERR(acl); 436 return PTR_ERR(acl);
444 clone = posix_acl_clone(acl, GFP_NOFS); 437 clone = posix_acl_clone(acl, GFP_NOFS);
445 posix_acl_release(acl); 438 posix_acl_release(acl);
446 if (!clone) 439 if (!clone)
447 return -ENOMEM; 440 return -ENOMEM;
448 error = posix_acl_chmod_masq(clone, inode->i_mode); 441 error = posix_acl_chmod_masq(clone, inode->i_mode);
449 if (!error) { 442 if (!error) {
450 int lock = !has_xattr_dir (inode); 443 int lock = !has_xattr_dir(inode);
451 reiserfs_write_lock_xattr_i (inode); 444 reiserfs_write_lock_xattr_i(inode);
452 if (lock) 445 if (lock)
453 reiserfs_write_lock_xattrs (inode->i_sb); 446 reiserfs_write_lock_xattrs(inode->i_sb);
454 else 447 else
455 reiserfs_read_lock_xattrs (inode->i_sb); 448 reiserfs_read_lock_xattrs(inode->i_sb);
456 error = reiserfs_set_acl(inode, ACL_TYPE_ACCESS, clone); 449 error = reiserfs_set_acl(inode, ACL_TYPE_ACCESS, clone);
457 if (lock) 450 if (lock)
458 reiserfs_write_unlock_xattrs (inode->i_sb); 451 reiserfs_write_unlock_xattrs(inode->i_sb);
459 else 452 else
460 reiserfs_read_unlock_xattrs (inode->i_sb); 453 reiserfs_read_unlock_xattrs(inode->i_sb);
461 reiserfs_write_unlock_xattr_i (inode); 454 reiserfs_write_unlock_xattr_i(inode);
462 } 455 }
463 posix_acl_release(clone); 456 posix_acl_release(clone);
464 return error; 457 return error;
465} 458}
466 459
467static int 460static int
468posix_acl_access_get(struct inode *inode, const char *name, 461posix_acl_access_get(struct inode *inode, const char *name,
469 void *buffer, size_t size) 462 void *buffer, size_t size)
470{ 463{
471 if (strlen(name) != sizeof(POSIX_ACL_XATTR_ACCESS)-1) 464 if (strlen(name) != sizeof(POSIX_ACL_XATTR_ACCESS) - 1)
472 return -EINVAL; 465 return -EINVAL;
473 return xattr_get_acl(inode, ACL_TYPE_ACCESS, buffer, size); 466 return xattr_get_acl(inode, ACL_TYPE_ACCESS, buffer, size);
474} 467}
475 468
476static int 469static int
477posix_acl_access_set(struct inode *inode, const char *name, 470posix_acl_access_set(struct inode *inode, const char *name,
478 const void *value, size_t size, int flags) 471 const void *value, size_t size, int flags)
479{ 472{
480 if (strlen(name) != sizeof(POSIX_ACL_XATTR_ACCESS)-1) 473 if (strlen(name) != sizeof(POSIX_ACL_XATTR_ACCESS) - 1)
481 return -EINVAL; 474 return -EINVAL;
482 return xattr_set_acl(inode, ACL_TYPE_ACCESS, value, size); 475 return xattr_set_acl(inode, ACL_TYPE_ACCESS, value, size);
483} 476}
484 477
485static int 478static int posix_acl_access_del(struct inode *inode, const char *name)
486posix_acl_access_del (struct inode *inode, const char *name)
487{ 479{
488 struct reiserfs_inode_info *reiserfs_i = REISERFS_I(inode); 480 struct reiserfs_inode_info *reiserfs_i = REISERFS_I(inode);
489 struct posix_acl **acl = &reiserfs_i->i_acl_access; 481 struct posix_acl **acl = &reiserfs_i->i_acl_access;
490 if (strlen(name) != sizeof(POSIX_ACL_XATTR_ACCESS)-1) 482 if (strlen(name) != sizeof(POSIX_ACL_XATTR_ACCESS) - 1)
491 return -EINVAL; 483 return -EINVAL;
492 if (!IS_ERR (*acl) && *acl) { 484 if (!IS_ERR(*acl) && *acl) {
493 posix_acl_release (*acl); 485 posix_acl_release(*acl);
494 *acl = ERR_PTR (-ENODATA); 486 *acl = ERR_PTR(-ENODATA);
495 } 487 }
496 488
497 return 0; 489 return 0;
498} 490}
499 491
500static int 492static int
501posix_acl_access_list (struct inode *inode, const char *name, int namelen, char *out) 493posix_acl_access_list(struct inode *inode, const char *name, int namelen,
494 char *out)
502{ 495{
503 int len = namelen; 496 int len = namelen;
504 if (!reiserfs_posixacl (inode->i_sb)) 497 if (!reiserfs_posixacl(inode->i_sb))
505 return 0; 498 return 0;
506 if (out) 499 if (out)
507 memcpy (out, name, len); 500 memcpy(out, name, len);
508 501
509 return len; 502 return len;
510} 503}
511 504
512struct reiserfs_xattr_handler posix_acl_access_handler = { 505struct reiserfs_xattr_handler posix_acl_access_handler = {
@@ -518,48 +511,48 @@ struct reiserfs_xattr_handler posix_acl_access_handler = {
518}; 511};
519 512
520static int 513static int
521posix_acl_default_get (struct inode *inode, const char *name, 514posix_acl_default_get(struct inode *inode, const char *name,
522 void *buffer, size_t size) 515 void *buffer, size_t size)
523{ 516{
524 if (strlen(name) != sizeof(POSIX_ACL_XATTR_DEFAULT)-1) 517 if (strlen(name) != sizeof(POSIX_ACL_XATTR_DEFAULT) - 1)
525 return -EINVAL; 518 return -EINVAL;
526 return xattr_get_acl(inode, ACL_TYPE_DEFAULT, buffer, size); 519 return xattr_get_acl(inode, ACL_TYPE_DEFAULT, buffer, size);
527} 520}
528 521
529static int 522static int
530posix_acl_default_set(struct inode *inode, const char *name, 523posix_acl_default_set(struct inode *inode, const char *name,
531 const void *value, size_t size, int flags) 524 const void *value, size_t size, int flags)
532{ 525{
533 if (strlen(name) != sizeof(POSIX_ACL_XATTR_DEFAULT)-1) 526 if (strlen(name) != sizeof(POSIX_ACL_XATTR_DEFAULT) - 1)
534 return -EINVAL; 527 return -EINVAL;
535 return xattr_set_acl(inode, ACL_TYPE_DEFAULT, value, size); 528 return xattr_set_acl(inode, ACL_TYPE_DEFAULT, value, size);
536} 529}
537 530
538static int 531static int posix_acl_default_del(struct inode *inode, const char *name)
539posix_acl_default_del (struct inode *inode, const char *name)
540{ 532{
541 struct reiserfs_inode_info *reiserfs_i = REISERFS_I(inode); 533 struct reiserfs_inode_info *reiserfs_i = REISERFS_I(inode);
542 struct posix_acl **acl = &reiserfs_i->i_acl_default; 534 struct posix_acl **acl = &reiserfs_i->i_acl_default;
543 if (strlen(name) != sizeof(POSIX_ACL_XATTR_DEFAULT)-1) 535 if (strlen(name) != sizeof(POSIX_ACL_XATTR_DEFAULT) - 1)
544 return -EINVAL; 536 return -EINVAL;
545 if (!IS_ERR (*acl) && *acl) { 537 if (!IS_ERR(*acl) && *acl) {
546 posix_acl_release (*acl); 538 posix_acl_release(*acl);
547 *acl = ERR_PTR (-ENODATA); 539 *acl = ERR_PTR(-ENODATA);
548 } 540 }
549 541
550 return 0; 542 return 0;
551} 543}
552 544
553static int 545static int
554posix_acl_default_list (struct inode *inode, const char *name, int namelen, char *out) 546posix_acl_default_list(struct inode *inode, const char *name, int namelen,
547 char *out)
555{ 548{
556 int len = namelen; 549 int len = namelen;
557 if (!reiserfs_posixacl (inode->i_sb)) 550 if (!reiserfs_posixacl(inode->i_sb))
558 return 0; 551 return 0;
559 if (out) 552 if (out)
560 memcpy (out, name, len); 553 memcpy(out, name, len);
561 554
562 return len; 555 return len;
563} 556}
564 557
565struct reiserfs_xattr_handler posix_acl_default_handler = { 558struct reiserfs_xattr_handler posix_acl_default_handler = {
diff --git a/fs/reiserfs/xattr_security.c b/fs/reiserfs/xattr_security.c
index e044d511711..5e90a95ad60 100644
--- a/fs/reiserfs/xattr_security.c
+++ b/fs/reiserfs/xattr_security.c
@@ -9,57 +9,55 @@
9#define XATTR_SECURITY_PREFIX "security." 9#define XATTR_SECURITY_PREFIX "security."
10 10
11static int 11static int
12security_get (struct inode *inode, const char *name, void *buffer, size_t size) 12security_get(struct inode *inode, const char *name, void *buffer, size_t size)
13{ 13{
14 if (strlen(name) < sizeof(XATTR_SECURITY_PREFIX)) 14 if (strlen(name) < sizeof(XATTR_SECURITY_PREFIX))
15 return -EINVAL; 15 return -EINVAL;
16 16
17 if (is_reiserfs_priv_object(inode)) 17 if (is_reiserfs_priv_object(inode))
18 return -EPERM; 18 return -EPERM;
19 19
20 return reiserfs_xattr_get (inode, name, buffer, size); 20 return reiserfs_xattr_get(inode, name, buffer, size);
21} 21}
22 22
23static int 23static int
24security_set (struct inode *inode, const char *name, const void *buffer, 24security_set(struct inode *inode, const char *name, const void *buffer,
25 size_t size, int flags) 25 size_t size, int flags)
26{ 26{
27 if (strlen(name) < sizeof(XATTR_SECURITY_PREFIX)) 27 if (strlen(name) < sizeof(XATTR_SECURITY_PREFIX))
28 return -EINVAL; 28 return -EINVAL;
29 29
30 if (is_reiserfs_priv_object(inode)) 30 if (is_reiserfs_priv_object(inode))
31 return -EPERM; 31 return -EPERM;
32 32
33 return reiserfs_xattr_set (inode, name, buffer, size, flags); 33 return reiserfs_xattr_set(inode, name, buffer, size, flags);
34} 34}
35 35
36static int 36static int security_del(struct inode *inode, const char *name)
37security_del (struct inode *inode, const char *name)
38{ 37{
39 if (strlen(name) < sizeof(XATTR_SECURITY_PREFIX)) 38 if (strlen(name) < sizeof(XATTR_SECURITY_PREFIX))
40 return -EINVAL; 39 return -EINVAL;
41 40
42 if (is_reiserfs_priv_object(inode)) 41 if (is_reiserfs_priv_object(inode))
43 return -EPERM; 42 return -EPERM;
44 43
45 return 0; 44 return 0;
46} 45}
47 46
48static int 47static int
49security_list (struct inode *inode, const char *name, int namelen, char *out) 48security_list(struct inode *inode, const char *name, int namelen, char *out)
50{ 49{
51 int len = namelen; 50 int len = namelen;
52 51
53 if (is_reiserfs_priv_object(inode)) 52 if (is_reiserfs_priv_object(inode))
54 return 0; 53 return 0;
55 54
56 if (out) 55 if (out)
57 memcpy (out, name, len); 56 memcpy(out, name, len);
58 57
59 return len; 58 return len;
60} 59}
61 60
62
63struct reiserfs_xattr_handler security_handler = { 61struct reiserfs_xattr_handler security_handler = {
64 .prefix = XATTR_SECURITY_PREFIX, 62 .prefix = XATTR_SECURITY_PREFIX,
65 .get = security_get, 63 .get = security_get,
diff --git a/fs/reiserfs/xattr_trusted.c b/fs/reiserfs/xattr_trusted.c
index 43762197fb0..2501f7e66ab 100644
--- a/fs/reiserfs/xattr_trusted.c
+++ b/fs/reiserfs/xattr_trusted.c
@@ -9,69 +9,67 @@
9#define XATTR_TRUSTED_PREFIX "trusted." 9#define XATTR_TRUSTED_PREFIX "trusted."
10 10
11static int 11static int
12trusted_get (struct inode *inode, const char *name, void *buffer, size_t size) 12trusted_get(struct inode *inode, const char *name, void *buffer, size_t size)
13{ 13{
14 if (strlen(name) < sizeof(XATTR_TRUSTED_PREFIX)) 14 if (strlen(name) < sizeof(XATTR_TRUSTED_PREFIX))
15 return -EINVAL; 15 return -EINVAL;
16 16
17 if (!reiserfs_xattrs (inode->i_sb)) 17 if (!reiserfs_xattrs(inode->i_sb))
18 return -EOPNOTSUPP; 18 return -EOPNOTSUPP;
19 19
20 if (!(capable(CAP_SYS_ADMIN) || is_reiserfs_priv_object(inode))) 20 if (!(capable(CAP_SYS_ADMIN) || is_reiserfs_priv_object(inode)))
21 return -EPERM; 21 return -EPERM;
22 22
23 return reiserfs_xattr_get (inode, name, buffer, size); 23 return reiserfs_xattr_get(inode, name, buffer, size);
24} 24}
25 25
26static int 26static int
27trusted_set (struct inode *inode, const char *name, const void *buffer, 27trusted_set(struct inode *inode, const char *name, const void *buffer,
28 size_t size, int flags) 28 size_t size, int flags)
29{ 29{
30 if (strlen(name) < sizeof(XATTR_TRUSTED_PREFIX)) 30 if (strlen(name) < sizeof(XATTR_TRUSTED_PREFIX))
31 return -EINVAL; 31 return -EINVAL;
32 32
33 if (!reiserfs_xattrs (inode->i_sb)) 33 if (!reiserfs_xattrs(inode->i_sb))
34 return -EOPNOTSUPP; 34 return -EOPNOTSUPP;
35 35
36 if (!(capable(CAP_SYS_ADMIN) || is_reiserfs_priv_object(inode))) 36 if (!(capable(CAP_SYS_ADMIN) || is_reiserfs_priv_object(inode)))
37 return -EPERM; 37 return -EPERM;
38 38
39 return reiserfs_xattr_set (inode, name, buffer, size, flags); 39 return reiserfs_xattr_set(inode, name, buffer, size, flags);
40} 40}
41 41
42static int 42static int trusted_del(struct inode *inode, const char *name)
43trusted_del (struct inode *inode, const char *name)
44{ 43{
45 if (strlen(name) < sizeof(XATTR_TRUSTED_PREFIX)) 44 if (strlen(name) < sizeof(XATTR_TRUSTED_PREFIX))
46 return -EINVAL; 45 return -EINVAL;
47 46
48 if (!reiserfs_xattrs (inode->i_sb)) 47 if (!reiserfs_xattrs(inode->i_sb))
49 return -EOPNOTSUPP; 48 return -EOPNOTSUPP;
50 49
51 if (!(capable(CAP_SYS_ADMIN) || is_reiserfs_priv_object(inode))) 50 if (!(capable(CAP_SYS_ADMIN) || is_reiserfs_priv_object(inode)))
52 return -EPERM; 51 return -EPERM;
53 52
54 return 0; 53 return 0;
55} 54}
56 55
57static int 56static int
58trusted_list (struct inode *inode, const char *name, int namelen, char *out) 57trusted_list(struct inode *inode, const char *name, int namelen, char *out)
59{ 58{
60 int len = namelen; 59 int len = namelen;
61 60
62 if (!reiserfs_xattrs (inode->i_sb)) 61 if (!reiserfs_xattrs(inode->i_sb))
63 return 0; 62 return 0;
64 63
65 if (!(capable(CAP_SYS_ADMIN) || is_reiserfs_priv_object(inode))) 64 if (!(capable(CAP_SYS_ADMIN) || is_reiserfs_priv_object(inode)))
66 return 0; 65 return 0;
67 66
68 if (out) 67 if (out)
69 memcpy (out, name, len); 68 memcpy(out, name, len);
70 69
71 return len; 70 return len;
72} 71}
73 72
74
75struct reiserfs_xattr_handler trusted_handler = { 73struct reiserfs_xattr_handler trusted_handler = {
76 .prefix = XATTR_TRUSTED_PREFIX, 74 .prefix = XATTR_TRUSTED_PREFIX,
77 .get = trusted_get, 75 .get = trusted_get,
diff --git a/fs/reiserfs/xattr_user.c b/fs/reiserfs/xattr_user.c
index 0772806466a..51458048ca6 100644
--- a/fs/reiserfs/xattr_user.c
+++ b/fs/reiserfs/xattr_user.c
@@ -13,81 +13,80 @@
13#define XATTR_USER_PREFIX "user." 13#define XATTR_USER_PREFIX "user."
14 14
15static int 15static int
16user_get (struct inode *inode, const char *name, void *buffer, size_t size) 16user_get(struct inode *inode, const char *name, void *buffer, size_t size)
17{ 17{
18 18
19 int error; 19 int error;
20 20
21 if (strlen(name) < sizeof(XATTR_USER_PREFIX)) 21 if (strlen(name) < sizeof(XATTR_USER_PREFIX))
22 return -EINVAL; 22 return -EINVAL;
23 23
24 if (!reiserfs_xattrs_user (inode->i_sb)) 24 if (!reiserfs_xattrs_user(inode->i_sb))
25 return -EOPNOTSUPP; 25 return -EOPNOTSUPP;
26 26
27 error = reiserfs_permission_locked (inode, MAY_READ, NULL); 27 error = reiserfs_permission_locked(inode, MAY_READ, NULL);
28 if (error) 28 if (error)
29 return error; 29 return error;
30 30
31 return reiserfs_xattr_get (inode, name, buffer, size); 31 return reiserfs_xattr_get(inode, name, buffer, size);
32} 32}
33 33
34static int 34static int
35user_set (struct inode *inode, const char *name, const void *buffer, 35user_set(struct inode *inode, const char *name, const void *buffer,
36 size_t size, int flags) 36 size_t size, int flags)
37{ 37{
38 38
39 int error; 39 int error;
40 40
41 if (strlen(name) < sizeof(XATTR_USER_PREFIX)) 41 if (strlen(name) < sizeof(XATTR_USER_PREFIX))
42 return -EINVAL; 42 return -EINVAL;
43 43
44 if (!reiserfs_xattrs_user (inode->i_sb)) 44 if (!reiserfs_xattrs_user(inode->i_sb))
45 return -EOPNOTSUPP; 45 return -EOPNOTSUPP;
46 46
47 if (!S_ISREG (inode->i_mode) && 47 if (!S_ISREG(inode->i_mode) &&
48 (!S_ISDIR (inode->i_mode) || inode->i_mode & S_ISVTX)) 48 (!S_ISDIR(inode->i_mode) || inode->i_mode & S_ISVTX))
49 return -EPERM; 49 return -EPERM;
50 50
51 error = reiserfs_permission_locked (inode, MAY_WRITE, NULL); 51 error = reiserfs_permission_locked(inode, MAY_WRITE, NULL);
52 if (error) 52 if (error)
53 return error; 53 return error;
54 54
55 return reiserfs_xattr_set (inode, name, buffer, size, flags); 55 return reiserfs_xattr_set(inode, name, buffer, size, flags);
56} 56}
57 57
58static int 58static int user_del(struct inode *inode, const char *name)
59user_del (struct inode *inode, const char *name)
60{ 59{
61 int error; 60 int error;
62 61
63 if (strlen(name) < sizeof(XATTR_USER_PREFIX)) 62 if (strlen(name) < sizeof(XATTR_USER_PREFIX))
64 return -EINVAL; 63 return -EINVAL;
65 64
66 if (!reiserfs_xattrs_user (inode->i_sb)) 65 if (!reiserfs_xattrs_user(inode->i_sb))
67 return -EOPNOTSUPP; 66 return -EOPNOTSUPP;
68 67
69 if (!S_ISREG (inode->i_mode) && 68 if (!S_ISREG(inode->i_mode) &&
70 (!S_ISDIR (inode->i_mode) || inode->i_mode & S_ISVTX)) 69 (!S_ISDIR(inode->i_mode) || inode->i_mode & S_ISVTX))
71 return -EPERM; 70 return -EPERM;
72 71
73 error = reiserfs_permission_locked (inode, MAY_WRITE, NULL); 72 error = reiserfs_permission_locked(inode, MAY_WRITE, NULL);
74 if (error) 73 if (error)
75 return error; 74 return error;
76 75
77 return 0; 76 return 0;
78} 77}
79 78
80static int 79static int
81user_list (struct inode *inode, const char *name, int namelen, char *out) 80user_list(struct inode *inode, const char *name, int namelen, char *out)
82{ 81{
83 int len = namelen; 82 int len = namelen;
84 if (!reiserfs_xattrs_user (inode->i_sb)) 83 if (!reiserfs_xattrs_user(inode->i_sb))
85 return 0; 84 return 0;
86 85
87 if (out) 86 if (out)
88 memcpy (out, name, len); 87 memcpy(out, name, len);
89 88
90 return len; 89 return len;
91} 90}
92 91
93struct reiserfs_xattr_handler user_handler = { 92struct reiserfs_xattr_handler user_handler = {