diff options
author | Vladimir Saveliev <vs@namesys.com> | 2007-10-16 04:25:12 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@woody.linux-foundation.org> | 2007-10-16 12:42:56 -0400 |
commit | 797b4cffdf79b9ed66759b8d2d5252eba965fb18 (patch) | |
tree | 5704fe75e0e9ff45ccd078d0b34420ad71f242a8 /fs/reiserfs | |
parent | f87061842877cf822251c65b39cc624cc94046da (diff) |
reiserfs: use generic write
Make reiserfs to write via generic routines.
Original reiserfs write optimized for big writes is deadlock rone
Signed-off-by: Vladimir Saveliev <vs@namesys.com>
Signed-off-by: Nick Piggin <npiggin@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'fs/reiserfs')
-rw-r--r-- | fs/reiserfs/file.c | 1240 |
1 files changed, 1 insertions, 1239 deletions
diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c index 2070aeee2a52..a804903d31d1 100644 --- a/fs/reiserfs/file.c +++ b/fs/reiserfs/file.c | |||
@@ -153,608 +153,6 @@ static int reiserfs_sync_file(struct file *p_s_filp, | |||
153 | return (n_err < 0) ? -EIO : 0; | 153 | return (n_err < 0) ? -EIO : 0; |
154 | } | 154 | } |
155 | 155 | ||
156 | /* I really do not want to play with memory shortage right now, so | ||
157 | to simplify the code, we are not going to write more than this much pages at | ||
158 | a time. This still should considerably improve performance compared to 4k | ||
159 | at a time case. This is 32 pages of 4k size. */ | ||
160 | #define REISERFS_WRITE_PAGES_AT_A_TIME (128 * 1024) / PAGE_CACHE_SIZE | ||
161 | |||
162 | /* Allocates blocks for a file to fulfil write request. | ||
163 | Maps all unmapped but prepared pages from the list. | ||
164 | Updates metadata with newly allocated blocknumbers as needed */ | ||
165 | static int reiserfs_allocate_blocks_for_region(struct reiserfs_transaction_handle *th, struct inode *inode, /* Inode we work with */ | ||
166 | loff_t pos, /* Writing position */ | ||
167 | int num_pages, /* number of pages write going | ||
168 | to touch */ | ||
169 | int write_bytes, /* amount of bytes to write */ | ||
170 | struct page **prepared_pages, /* array of | ||
171 | prepared pages | ||
172 | */ | ||
173 | int blocks_to_allocate /* Amount of blocks we | ||
174 | need to allocate to | ||
175 | fit the data into file | ||
176 | */ | ||
177 | ) | ||
178 | { | ||
179 | struct cpu_key key; // cpu key of item that we are going to deal with | ||
180 | struct item_head *ih; // pointer to item head that we are going to deal with | ||
181 | struct buffer_head *bh; // Buffer head that contains items that we are going to deal with | ||
182 | __le32 *item; // pointer to item we are going to deal with | ||
183 | INITIALIZE_PATH(path); // path to item, that we are going to deal with. | ||
184 | b_blocknr_t *allocated_blocks; // Pointer to a place where allocated blocknumbers would be stored. | ||
185 | reiserfs_blocknr_hint_t hint; // hint structure for block allocator. | ||
186 | size_t res; // return value of various functions that we call. | ||
187 | int curr_block; // current block used to keep track of unmapped blocks. | ||
188 | int i; // loop counter | ||
189 | int itempos; // position in item | ||
190 | unsigned int from = (pos & (PAGE_CACHE_SIZE - 1)); // writing position in | ||
191 | // first page | ||
192 | unsigned int to = ((pos + write_bytes - 1) & (PAGE_CACHE_SIZE - 1)) + 1; /* last modified byte offset in last page */ | ||
193 | __u64 hole_size; // amount of blocks for a file hole, if it needed to be created. | ||
194 | int modifying_this_item = 0; // Flag for items traversal code to keep track | ||
195 | // of the fact that we already prepared | ||
196 | // current block for journal | ||
197 | int will_prealloc = 0; | ||
198 | RFALSE(!blocks_to_allocate, | ||
199 | "green-9004: tried to allocate zero blocks?"); | ||
200 | |||
201 | /* only preallocate if this is a small write */ | ||
202 | if (REISERFS_I(inode)->i_prealloc_count || | ||
203 | (!(write_bytes & (inode->i_sb->s_blocksize - 1)) && | ||
204 | blocks_to_allocate < | ||
205 | REISERFS_SB(inode->i_sb)->s_alloc_options.preallocsize)) | ||
206 | will_prealloc = | ||
207 | REISERFS_SB(inode->i_sb)->s_alloc_options.preallocsize; | ||
208 | |||
209 | allocated_blocks = kmalloc((blocks_to_allocate + will_prealloc) * | ||
210 | sizeof(b_blocknr_t), GFP_NOFS); | ||
211 | if (!allocated_blocks) | ||
212 | return -ENOMEM; | ||
213 | |||
214 | /* First we compose a key to point at the writing position, we want to do | ||
215 | that outside of any locking region. */ | ||
216 | make_cpu_key(&key, inode, pos + 1, TYPE_ANY, 3 /*key length */ ); | ||
217 | |||
218 | /* If we came here, it means we absolutely need to open a transaction, | ||
219 | since we need to allocate some blocks */ | ||
220 | reiserfs_write_lock(inode->i_sb); // Journaling stuff and we need that. | ||
221 | res = journal_begin(th, inode->i_sb, JOURNAL_PER_BALANCE_CNT * 3 + 1 + 2 * REISERFS_QUOTA_TRANS_BLOCKS(inode->i_sb)); // Wish I know if this number enough | ||
222 | if (res) | ||
223 | goto error_exit; | ||
224 | reiserfs_update_inode_transaction(inode); | ||
225 | |||
226 | /* Look for the in-tree position of our write, need path for block allocator */ | ||
227 | res = search_for_position_by_key(inode->i_sb, &key, &path); | ||
228 | if (res == IO_ERROR) { | ||
229 | res = -EIO; | ||
230 | goto error_exit; | ||
231 | } | ||
232 | |||
233 | /* Allocate blocks */ | ||
234 | /* First fill in "hint" structure for block allocator */ | ||
235 | hint.th = th; // transaction handle. | ||
236 | hint.path = &path; // Path, so that block allocator can determine packing locality or whatever it needs to determine. | ||
237 | hint.inode = inode; // Inode is needed by block allocator too. | ||
238 | hint.search_start = 0; // We have no hint on where to search free blocks for block allocator. | ||
239 | hint.key = key.on_disk_key; // on disk key of file. | ||
240 | hint.block = inode->i_blocks >> (inode->i_sb->s_blocksize_bits - 9); // Number of disk blocks this file occupies already. | ||
241 | hint.formatted_node = 0; // We are allocating blocks for unformatted node. | ||
242 | hint.preallocate = will_prealloc; | ||
243 | |||
244 | /* Call block allocator to allocate blocks */ | ||
245 | res = | ||
246 | reiserfs_allocate_blocknrs(&hint, allocated_blocks, | ||
247 | blocks_to_allocate, blocks_to_allocate); | ||
248 | if (res != CARRY_ON) { | ||
249 | if (res == NO_DISK_SPACE) { | ||
250 | /* We flush the transaction in case of no space. This way some | ||
251 | blocks might become free */ | ||
252 | SB_JOURNAL(inode->i_sb)->j_must_wait = 1; | ||
253 | res = restart_transaction(th, inode, &path); | ||
254 | if (res) | ||
255 | goto error_exit; | ||
256 | |||
257 | /* We might have scheduled, so search again */ | ||
258 | res = | ||
259 | search_for_position_by_key(inode->i_sb, &key, | ||
260 | &path); | ||
261 | if (res == IO_ERROR) { | ||
262 | res = -EIO; | ||
263 | goto error_exit; | ||
264 | } | ||
265 | |||
266 | /* update changed info for hint structure. */ | ||
267 | res = | ||
268 | reiserfs_allocate_blocknrs(&hint, allocated_blocks, | ||
269 | blocks_to_allocate, | ||
270 | blocks_to_allocate); | ||
271 | if (res != CARRY_ON) { | ||
272 | res = res == QUOTA_EXCEEDED ? -EDQUOT : -ENOSPC; | ||
273 | pathrelse(&path); | ||
274 | goto error_exit; | ||
275 | } | ||
276 | } else { | ||
277 | res = res == QUOTA_EXCEEDED ? -EDQUOT : -ENOSPC; | ||
278 | pathrelse(&path); | ||
279 | goto error_exit; | ||
280 | } | ||
281 | } | ||
282 | #ifdef __BIG_ENDIAN | ||
283 | // Too bad, I have not found any way to convert a given region from | ||
284 | // cpu format to little endian format | ||
285 | { | ||
286 | int i; | ||
287 | for (i = 0; i < blocks_to_allocate; i++) | ||
288 | allocated_blocks[i] = cpu_to_le32(allocated_blocks[i]); | ||
289 | } | ||
290 | #endif | ||
291 | |||
292 | /* Blocks allocating well might have scheduled and tree might have changed, | ||
293 | let's search the tree again */ | ||
294 | /* find where in the tree our write should go */ | ||
295 | res = search_for_position_by_key(inode->i_sb, &key, &path); | ||
296 | if (res == IO_ERROR) { | ||
297 | res = -EIO; | ||
298 | goto error_exit_free_blocks; | ||
299 | } | ||
300 | |||
301 | bh = get_last_bh(&path); // Get a bufferhead for last element in path. | ||
302 | ih = get_ih(&path); // Get a pointer to last item head in path. | ||
303 | item = get_item(&path); // Get a pointer to last item in path | ||
304 | |||
305 | /* Let's see what we have found */ | ||
306 | if (res != POSITION_FOUND) { /* position not found, this means that we | ||
307 | might need to append file with holes | ||
308 | first */ | ||
309 | // Since we are writing past the file's end, we need to find out if | ||
310 | // there is a hole that needs to be inserted before our writing | ||
311 | // position, and how many blocks it is going to cover (we need to | ||
312 | // populate pointers to file blocks representing the hole with zeros) | ||
313 | |||
314 | { | ||
315 | int item_offset = 1; | ||
316 | /* | ||
317 | * if ih is stat data, its offset is 0 and we don't want to | ||
318 | * add 1 to pos in the hole_size calculation | ||
319 | */ | ||
320 | if (is_statdata_le_ih(ih)) | ||
321 | item_offset = 0; | ||
322 | hole_size = (pos + item_offset - | ||
323 | (le_key_k_offset | ||
324 | (get_inode_item_key_version(inode), | ||
325 | &(ih->ih_key)) + op_bytes_number(ih, | ||
326 | inode-> | ||
327 | i_sb-> | ||
328 | s_blocksize))) | ||
329 | >> inode->i_sb->s_blocksize_bits; | ||
330 | } | ||
331 | |||
332 | if (hole_size > 0) { | ||
333 | int to_paste = min_t(__u64, hole_size, MAX_ITEM_LEN(inode->i_sb->s_blocksize) / UNFM_P_SIZE); // How much data to insert first time. | ||
334 | /* area filled with zeroes, to supply as list of zero blocknumbers | ||
335 | We allocate it outside of loop just in case loop would spin for | ||
336 | several iterations. */ | ||
337 | char *zeros = kzalloc(to_paste * UNFM_P_SIZE, GFP_ATOMIC); // We cannot insert more than MAX_ITEM_LEN bytes anyway. | ||
338 | if (!zeros) { | ||
339 | res = -ENOMEM; | ||
340 | goto error_exit_free_blocks; | ||
341 | } | ||
342 | do { | ||
343 | to_paste = | ||
344 | min_t(__u64, hole_size, | ||
345 | MAX_ITEM_LEN(inode->i_sb-> | ||
346 | s_blocksize) / | ||
347 | UNFM_P_SIZE); | ||
348 | if (is_indirect_le_ih(ih)) { | ||
349 | /* Ok, there is existing indirect item already. Need to append it */ | ||
350 | /* Calculate position past inserted item */ | ||
351 | make_cpu_key(&key, inode, | ||
352 | le_key_k_offset | ||
353 | (get_inode_item_key_version | ||
354 | (inode), | ||
355 | &(ih->ih_key)) + | ||
356 | op_bytes_number(ih, | ||
357 | inode-> | ||
358 | i_sb-> | ||
359 | s_blocksize), | ||
360 | TYPE_INDIRECT, 3); | ||
361 | res = | ||
362 | reiserfs_paste_into_item(th, &path, | ||
363 | &key, | ||
364 | inode, | ||
365 | (char *) | ||
366 | zeros, | ||
367 | UNFM_P_SIZE | ||
368 | * | ||
369 | to_paste); | ||
370 | if (res) { | ||
371 | kfree(zeros); | ||
372 | goto error_exit_free_blocks; | ||
373 | } | ||
374 | } else if (is_statdata_le_ih(ih)) { | ||
375 | /* No existing item, create it */ | ||
376 | /* item head for new item */ | ||
377 | struct item_head ins_ih; | ||
378 | |||
379 | /* create a key for our new item */ | ||
380 | make_cpu_key(&key, inode, 1, | ||
381 | TYPE_INDIRECT, 3); | ||
382 | |||
383 | /* Create new item head for our new item */ | ||
384 | make_le_item_head(&ins_ih, &key, | ||
385 | key.version, 1, | ||
386 | TYPE_INDIRECT, | ||
387 | to_paste * | ||
388 | UNFM_P_SIZE, | ||
389 | 0 /* free space */ ); | ||
390 | |||
391 | /* Find where such item should live in the tree */ | ||
392 | res = | ||
393 | search_item(inode->i_sb, &key, | ||
394 | &path); | ||
395 | if (res != ITEM_NOT_FOUND) { | ||
396 | /* item should not exist, otherwise we have error */ | ||
397 | if (res != -ENOSPC) { | ||
398 | reiserfs_warning(inode-> | ||
399 | i_sb, | ||
400 | "green-9008: search_by_key (%K) returned %d", | ||
401 | &key, | ||
402 | res); | ||
403 | } | ||
404 | res = -EIO; | ||
405 | kfree(zeros); | ||
406 | goto error_exit_free_blocks; | ||
407 | } | ||
408 | res = | ||
409 | reiserfs_insert_item(th, &path, | ||
410 | &key, &ins_ih, | ||
411 | inode, | ||
412 | (char *)zeros); | ||
413 | } else { | ||
414 | reiserfs_panic(inode->i_sb, | ||
415 | "green-9011: Unexpected key type %K\n", | ||
416 | &key); | ||
417 | } | ||
418 | if (res) { | ||
419 | kfree(zeros); | ||
420 | goto error_exit_free_blocks; | ||
421 | } | ||
422 | /* Now we want to check if transaction is too full, and if it is | ||
423 | we restart it. This will also free the path. */ | ||
424 | if (journal_transaction_should_end | ||
425 | (th, th->t_blocks_allocated)) { | ||
426 | inode->i_size = cpu_key_k_offset(&key) + | ||
427 | (to_paste << inode->i_blkbits); | ||
428 | res = | ||
429 | restart_transaction(th, inode, | ||
430 | &path); | ||
431 | if (res) { | ||
432 | pathrelse(&path); | ||
433 | kfree(zeros); | ||
434 | goto error_exit; | ||
435 | } | ||
436 | } | ||
437 | |||
438 | /* Well, need to recalculate path and stuff */ | ||
439 | set_cpu_key_k_offset(&key, | ||
440 | cpu_key_k_offset(&key) + | ||
441 | (to_paste << inode-> | ||
442 | i_blkbits)); | ||
443 | res = | ||
444 | search_for_position_by_key(inode->i_sb, | ||
445 | &key, &path); | ||
446 | if (res == IO_ERROR) { | ||
447 | res = -EIO; | ||
448 | kfree(zeros); | ||
449 | goto error_exit_free_blocks; | ||
450 | } | ||
451 | bh = get_last_bh(&path); | ||
452 | ih = get_ih(&path); | ||
453 | item = get_item(&path); | ||
454 | hole_size -= to_paste; | ||
455 | } while (hole_size); | ||
456 | kfree(zeros); | ||
457 | } | ||
458 | } | ||
459 | // Go through existing indirect items first | ||
460 | // replace all zeroes with blocknumbers from list | ||
461 | // Note that if no corresponding item was found, by previous search, | ||
462 | // it means there are no existing in-tree representation for file area | ||
463 | // we are going to overwrite, so there is nothing to scan through for holes. | ||
464 | for (curr_block = 0, itempos = path.pos_in_item; | ||
465 | curr_block < blocks_to_allocate && res == POSITION_FOUND;) { | ||
466 | retry: | ||
467 | |||
468 | if (itempos >= ih_item_len(ih) / UNFM_P_SIZE) { | ||
469 | /* We run out of data in this indirect item, let's look for another | ||
470 | one. */ | ||
471 | /* First if we are already modifying current item, log it */ | ||
472 | if (modifying_this_item) { | ||
473 | journal_mark_dirty(th, inode->i_sb, bh); | ||
474 | modifying_this_item = 0; | ||
475 | } | ||
476 | /* Then set the key to look for a new indirect item (offset of old | ||
477 | item is added to old item length */ | ||
478 | set_cpu_key_k_offset(&key, | ||
479 | le_key_k_offset | ||
480 | (get_inode_item_key_version(inode), | ||
481 | &(ih->ih_key)) + | ||
482 | op_bytes_number(ih, | ||
483 | inode->i_sb-> | ||
484 | s_blocksize)); | ||
485 | /* Search ofor position of new key in the tree. */ | ||
486 | res = | ||
487 | search_for_position_by_key(inode->i_sb, &key, | ||
488 | &path); | ||
489 | if (res == IO_ERROR) { | ||
490 | res = -EIO; | ||
491 | goto error_exit_free_blocks; | ||
492 | } | ||
493 | bh = get_last_bh(&path); | ||
494 | ih = get_ih(&path); | ||
495 | item = get_item(&path); | ||
496 | itempos = path.pos_in_item; | ||
497 | continue; // loop to check all kinds of conditions and so on. | ||
498 | } | ||
499 | /* Ok, we have correct position in item now, so let's see if it is | ||
500 | representing file hole (blocknumber is zero) and fill it if needed */ | ||
501 | if (!item[itempos]) { | ||
502 | /* Ok, a hole. Now we need to check if we already prepared this | ||
503 | block to be journaled */ | ||
504 | while (!modifying_this_item) { // loop until succeed | ||
505 | /* Well, this item is not journaled yet, so we must prepare | ||
506 | it for journal first, before we can change it */ | ||
507 | struct item_head tmp_ih; // We copy item head of found item, | ||
508 | // here to detect if fs changed under | ||
509 | // us while we were preparing for | ||
510 | // journal. | ||
511 | int fs_gen; // We store fs generation here to find if someone | ||
512 | // changes fs under our feet | ||
513 | |||
514 | copy_item_head(&tmp_ih, ih); // Remember itemhead | ||
515 | fs_gen = get_generation(inode->i_sb); // remember fs generation | ||
516 | reiserfs_prepare_for_journal(inode->i_sb, bh, 1); // Prepare a buffer within which indirect item is stored for changing. | ||
517 | if (fs_changed(fs_gen, inode->i_sb) | ||
518 | && item_moved(&tmp_ih, &path)) { | ||
519 | // Sigh, fs was changed under us, we need to look for new | ||
520 | // location of item we are working with | ||
521 | |||
522 | /* unmark prepaerd area as journaled and search for it's | ||
523 | new position */ | ||
524 | reiserfs_restore_prepared_buffer(inode-> | ||
525 | i_sb, | ||
526 | bh); | ||
527 | res = | ||
528 | search_for_position_by_key(inode-> | ||
529 | i_sb, | ||
530 | &key, | ||
531 | &path); | ||
532 | if (res == IO_ERROR) { | ||
533 | res = -EIO; | ||
534 | goto error_exit_free_blocks; | ||
535 | } | ||
536 | bh = get_last_bh(&path); | ||
537 | ih = get_ih(&path); | ||
538 | item = get_item(&path); | ||
539 | itempos = path.pos_in_item; | ||
540 | goto retry; | ||
541 | } | ||
542 | modifying_this_item = 1; | ||
543 | } | ||
544 | item[itempos] = allocated_blocks[curr_block]; // Assign new block | ||
545 | curr_block++; | ||
546 | } | ||
547 | itempos++; | ||
548 | } | ||
549 | |||
550 | if (modifying_this_item) { // We need to log last-accessed block, if it | ||
551 | // was modified, but not logged yet. | ||
552 | journal_mark_dirty(th, inode->i_sb, bh); | ||
553 | } | ||
554 | |||
555 | if (curr_block < blocks_to_allocate) { | ||
556 | // Oh, well need to append to indirect item, or to create indirect item | ||
557 | // if there weren't any | ||
558 | if (is_indirect_le_ih(ih)) { | ||
559 | // Existing indirect item - append. First calculate key for append | ||
560 | // position. We do not need to recalculate path as it should | ||
561 | // already point to correct place. | ||
562 | make_cpu_key(&key, inode, | ||
563 | le_key_k_offset(get_inode_item_key_version | ||
564 | (inode), | ||
565 | &(ih->ih_key)) + | ||
566 | op_bytes_number(ih, | ||
567 | inode->i_sb->s_blocksize), | ||
568 | TYPE_INDIRECT, 3); | ||
569 | res = | ||
570 | reiserfs_paste_into_item(th, &path, &key, inode, | ||
571 | (char *)(allocated_blocks + | ||
572 | curr_block), | ||
573 | UNFM_P_SIZE * | ||
574 | (blocks_to_allocate - | ||
575 | curr_block)); | ||
576 | if (res) { | ||
577 | goto error_exit_free_blocks; | ||
578 | } | ||
579 | } else if (is_statdata_le_ih(ih)) { | ||
580 | // Last found item was statdata. That means we need to create indirect item. | ||
581 | struct item_head ins_ih; /* itemhead for new item */ | ||
582 | |||
583 | /* create a key for our new item */ | ||
584 | make_cpu_key(&key, inode, 1, TYPE_INDIRECT, 3); // Position one, | ||
585 | // because that's | ||
586 | // where first | ||
587 | // indirect item | ||
588 | // begins | ||
589 | /* Create new item head for our new item */ | ||
590 | make_le_item_head(&ins_ih, &key, key.version, 1, | ||
591 | TYPE_INDIRECT, | ||
592 | (blocks_to_allocate - | ||
593 | curr_block) * UNFM_P_SIZE, | ||
594 | 0 /* free space */ ); | ||
595 | /* Find where such item should live in the tree */ | ||
596 | res = search_item(inode->i_sb, &key, &path); | ||
597 | if (res != ITEM_NOT_FOUND) { | ||
598 | /* Well, if we have found such item already, or some error | ||
599 | occured, we need to warn user and return error */ | ||
600 | if (res != -ENOSPC) { | ||
601 | reiserfs_warning(inode->i_sb, | ||
602 | "green-9009: search_by_key (%K) " | ||
603 | "returned %d", &key, | ||
604 | res); | ||
605 | } | ||
606 | res = -EIO; | ||
607 | goto error_exit_free_blocks; | ||
608 | } | ||
609 | /* Insert item into the tree with the data as its body */ | ||
610 | res = | ||
611 | reiserfs_insert_item(th, &path, &key, &ins_ih, | ||
612 | inode, | ||
613 | (char *)(allocated_blocks + | ||
614 | curr_block)); | ||
615 | } else { | ||
616 | reiserfs_panic(inode->i_sb, | ||
617 | "green-9010: unexpected item type for key %K\n", | ||
618 | &key); | ||
619 | } | ||
620 | } | ||
621 | // the caller is responsible for closing the transaction | ||
622 | // unless we return an error, they are also responsible for logging | ||
623 | // the inode. | ||
624 | // | ||
625 | pathrelse(&path); | ||
626 | /* | ||
627 | * cleanup prellocation from previous writes | ||
628 | * if this is a partial block write | ||
629 | */ | ||
630 | if (write_bytes & (inode->i_sb->s_blocksize - 1)) | ||
631 | reiserfs_discard_prealloc(th, inode); | ||
632 | reiserfs_write_unlock(inode->i_sb); | ||
633 | |||
634 | // go through all the pages/buffers and map the buffers to newly allocated | ||
635 | // blocks (so that system knows where to write these pages later). | ||
636 | curr_block = 0; | ||
637 | for (i = 0; i < num_pages; i++) { | ||
638 | struct page *page = prepared_pages[i]; //current page | ||
639 | struct buffer_head *head = page_buffers(page); // first buffer for a page | ||
640 | int block_start, block_end; // in-page offsets for buffers. | ||
641 | |||
642 | if (!page_buffers(page)) | ||
643 | reiserfs_panic(inode->i_sb, | ||
644 | "green-9005: No buffers for prepared page???"); | ||
645 | |||
646 | /* For each buffer in page */ | ||
647 | for (bh = head, block_start = 0; bh != head || !block_start; | ||
648 | block_start = block_end, bh = bh->b_this_page) { | ||
649 | if (!bh) | ||
650 | reiserfs_panic(inode->i_sb, | ||
651 | "green-9006: Allocated but absent buffer for a page?"); | ||
652 | block_end = block_start + inode->i_sb->s_blocksize; | ||
653 | if (i == 0 && block_end <= from) | ||
654 | /* if this buffer is before requested data to map, skip it */ | ||
655 | continue; | ||
656 | if (i == num_pages - 1 && block_start >= to) | ||
657 | /* If this buffer is after requested data to map, abort | ||
658 | processing of current page */ | ||
659 | break; | ||
660 | |||
661 | if (!buffer_mapped(bh)) { // Ok, unmapped buffer, need to map it | ||
662 | map_bh(bh, inode->i_sb, | ||
663 | le32_to_cpu(allocated_blocks | ||
664 | [curr_block])); | ||
665 | curr_block++; | ||
666 | set_buffer_new(bh); | ||
667 | } | ||
668 | } | ||
669 | } | ||
670 | |||
671 | RFALSE(curr_block > blocks_to_allocate, | ||
672 | "green-9007: Used too many blocks? weird"); | ||
673 | |||
674 | kfree(allocated_blocks); | ||
675 | return 0; | ||
676 | |||
677 | // Need to deal with transaction here. | ||
678 | error_exit_free_blocks: | ||
679 | pathrelse(&path); | ||
680 | // free blocks | ||
681 | for (i = 0; i < blocks_to_allocate; i++) | ||
682 | reiserfs_free_block(th, inode, le32_to_cpu(allocated_blocks[i]), | ||
683 | 1); | ||
684 | |||
685 | error_exit: | ||
686 | if (th->t_trans_id) { | ||
687 | int err; | ||
688 | // update any changes we made to blk count | ||
689 | mark_inode_dirty(inode); | ||
690 | err = | ||
691 | journal_end(th, inode->i_sb, | ||
692 | JOURNAL_PER_BALANCE_CNT * 3 + 1 + | ||
693 | 2 * REISERFS_QUOTA_TRANS_BLOCKS(inode->i_sb)); | ||
694 | if (err) | ||
695 | res = err; | ||
696 | } | ||
697 | reiserfs_write_unlock(inode->i_sb); | ||
698 | kfree(allocated_blocks); | ||
699 | |||
700 | return res; | ||
701 | } | ||
702 | |||
703 | /* Unlock pages prepared by reiserfs_prepare_file_region_for_write */ | ||
704 | static void reiserfs_unprepare_pages(struct page **prepared_pages, /* list of locked pages */ | ||
705 | size_t num_pages /* amount of pages */ ) | ||
706 | { | ||
707 | int i; // loop counter | ||
708 | |||
709 | for (i = 0; i < num_pages; i++) { | ||
710 | struct page *page = prepared_pages[i]; | ||
711 | |||
712 | try_to_free_buffers(page); | ||
713 | unlock_page(page); | ||
714 | page_cache_release(page); | ||
715 | } | ||
716 | } | ||
717 | |||
718 | /* This function will copy data from userspace to specified pages within | ||
719 | supplied byte range */ | ||
720 | static int reiserfs_copy_from_user_to_file_region(loff_t pos, /* In-file position */ | ||
721 | int num_pages, /* Number of pages affected */ | ||
722 | int write_bytes, /* Amount of bytes to write */ | ||
723 | struct page **prepared_pages, /* pointer to | ||
724 | array to | ||
725 | prepared pages | ||
726 | */ | ||
727 | const char __user * buf /* Pointer to user-supplied | ||
728 | data */ | ||
729 | ) | ||
730 | { | ||
731 | long page_fault = 0; // status of copy_from_user. | ||
732 | int i; // loop counter. | ||
733 | int offset; // offset in page | ||
734 | |||
735 | for (i = 0, offset = (pos & (PAGE_CACHE_SIZE - 1)); i < num_pages; | ||
736 | i++, offset = 0) { | ||
737 | size_t count = min_t(size_t, PAGE_CACHE_SIZE - offset, write_bytes); // How much of bytes to write to this page | ||
738 | struct page *page = prepared_pages[i]; // Current page we process. | ||
739 | |||
740 | fault_in_pages_readable(buf, count); | ||
741 | |||
742 | /* Copy data from userspace to the current page */ | ||
743 | kmap(page); | ||
744 | page_fault = __copy_from_user(page_address(page) + offset, buf, count); // Copy the data. | ||
745 | /* Flush processor's dcache for this page */ | ||
746 | flush_dcache_page(page); | ||
747 | kunmap(page); | ||
748 | buf += count; | ||
749 | write_bytes -= count; | ||
750 | |||
751 | if (page_fault) | ||
752 | break; // Was there a fault? abort. | ||
753 | } | ||
754 | |||
755 | return page_fault ? -EFAULT : 0; | ||
756 | } | ||
757 | |||
758 | /* taken fs/buffer.c:__block_commit_write */ | 156 | /* taken fs/buffer.c:__block_commit_write */ |
759 | int reiserfs_commit_page(struct inode *inode, struct page *page, | 157 | int reiserfs_commit_page(struct inode *inode, struct page *page, |
760 | unsigned from, unsigned to) | 158 | unsigned from, unsigned to) |
@@ -824,432 +222,6 @@ int reiserfs_commit_page(struct inode *inode, struct page *page, | |||
824 | return ret; | 222 | return ret; |
825 | } | 223 | } |
826 | 224 | ||
827 | /* Submit pages for write. This was separated from actual file copying | ||
828 | because we might want to allocate block numbers in-between. | ||
829 | This function assumes that caller will adjust file size to correct value. */ | ||
830 | static int reiserfs_submit_file_region_for_write(struct reiserfs_transaction_handle *th, struct inode *inode, loff_t pos, /* Writing position offset */ | ||
831 | size_t num_pages, /* Number of pages to write */ | ||
832 | size_t write_bytes, /* number of bytes to write */ | ||
833 | struct page **prepared_pages /* list of pages */ | ||
834 | ) | ||
835 | { | ||
836 | int status; // return status of block_commit_write. | ||
837 | int retval = 0; // Return value we are going to return. | ||
838 | int i; // loop counter | ||
839 | int offset; // Writing offset in page. | ||
840 | int orig_write_bytes = write_bytes; | ||
841 | int sd_update = 0; | ||
842 | |||
843 | for (i = 0, offset = (pos & (PAGE_CACHE_SIZE - 1)); i < num_pages; | ||
844 | i++, offset = 0) { | ||
845 | int count = min_t(int, PAGE_CACHE_SIZE - offset, write_bytes); // How much of bytes to write to this page | ||
846 | struct page *page = prepared_pages[i]; // Current page we process. | ||
847 | |||
848 | status = | ||
849 | reiserfs_commit_page(inode, page, offset, offset + count); | ||
850 | if (status) | ||
851 | retval = status; // To not overcomplicate matters We are going to | ||
852 | // submit all the pages even if there was error. | ||
853 | // we only remember error status to report it on | ||
854 | // exit. | ||
855 | write_bytes -= count; | ||
856 | } | ||
857 | /* now that we've gotten all the ordered buffers marked dirty, | ||
858 | * we can safely update i_size and close any running transaction | ||
859 | */ | ||
860 | if (pos + orig_write_bytes > inode->i_size) { | ||
861 | inode->i_size = pos + orig_write_bytes; // Set new size | ||
862 | /* If the file have grown so much that tail packing is no | ||
863 | * longer possible, reset "need to pack" flag */ | ||
864 | if ((have_large_tails(inode->i_sb) && | ||
865 | inode->i_size > i_block_size(inode) * 4) || | ||
866 | (have_small_tails(inode->i_sb) && | ||
867 | inode->i_size > i_block_size(inode))) | ||
868 | REISERFS_I(inode)->i_flags &= ~i_pack_on_close_mask; | ||
869 | else if ((have_large_tails(inode->i_sb) && | ||
870 | inode->i_size < i_block_size(inode) * 4) || | ||
871 | (have_small_tails(inode->i_sb) && | ||
872 | inode->i_size < i_block_size(inode))) | ||
873 | REISERFS_I(inode)->i_flags |= i_pack_on_close_mask; | ||
874 | |||
875 | if (th->t_trans_id) { | ||
876 | reiserfs_write_lock(inode->i_sb); | ||
877 | // this sets the proper flags for O_SYNC to trigger a commit | ||
878 | mark_inode_dirty(inode); | ||
879 | reiserfs_write_unlock(inode->i_sb); | ||
880 | } else { | ||
881 | reiserfs_write_lock(inode->i_sb); | ||
882 | reiserfs_update_inode_transaction(inode); | ||
883 | mark_inode_dirty(inode); | ||
884 | reiserfs_write_unlock(inode->i_sb); | ||
885 | } | ||
886 | |||
887 | sd_update = 1; | ||
888 | } | ||
889 | if (th->t_trans_id) { | ||
890 | reiserfs_write_lock(inode->i_sb); | ||
891 | if (!sd_update) | ||
892 | mark_inode_dirty(inode); | ||
893 | status = journal_end(th, th->t_super, th->t_blocks_allocated); | ||
894 | if (status) | ||
895 | retval = status; | ||
896 | reiserfs_write_unlock(inode->i_sb); | ||
897 | } | ||
898 | th->t_trans_id = 0; | ||
899 | |||
900 | /* | ||
901 | * we have to unlock the pages after updating i_size, otherwise | ||
902 | * we race with writepage | ||
903 | */ | ||
904 | for (i = 0; i < num_pages; i++) { | ||
905 | struct page *page = prepared_pages[i]; | ||
906 | unlock_page(page); | ||
907 | mark_page_accessed(page); | ||
908 | page_cache_release(page); | ||
909 | } | ||
910 | return retval; | ||
911 | } | ||
912 | |||
913 | /* Look if passed writing region is going to touch file's tail | ||
914 | (if it is present). And if it is, convert the tail to unformatted node */ | ||
915 | static int reiserfs_check_for_tail_and_convert(struct inode *inode, /* inode to deal with */ | ||
916 | loff_t pos, /* Writing position */ | ||
917 | int write_bytes /* amount of bytes to write */ | ||
918 | ) | ||
919 | { | ||
920 | INITIALIZE_PATH(path); // needed for search_for_position | ||
921 | struct cpu_key key; // Key that would represent last touched writing byte. | ||
922 | struct item_head *ih; // item header of found block; | ||
923 | int res; // Return value of various functions we call. | ||
924 | int cont_expand_offset; // We will put offset for generic_cont_expand here | ||
925 | // This can be int just because tails are created | ||
926 | // only for small files. | ||
927 | |||
928 | /* this embodies a dependency on a particular tail policy */ | ||
929 | if (inode->i_size >= inode->i_sb->s_blocksize * 4) { | ||
930 | /* such a big files do not have tails, so we won't bother ourselves | ||
931 | to look for tails, simply return */ | ||
932 | return 0; | ||
933 | } | ||
934 | |||
935 | reiserfs_write_lock(inode->i_sb); | ||
936 | /* find the item containing the last byte to be written, or if | ||
937 | * writing past the end of the file then the last item of the | ||
938 | * file (and then we check its type). */ | ||
939 | make_cpu_key(&key, inode, pos + write_bytes + 1, TYPE_ANY, | ||
940 | 3 /*key length */ ); | ||
941 | res = search_for_position_by_key(inode->i_sb, &key, &path); | ||
942 | if (res == IO_ERROR) { | ||
943 | reiserfs_write_unlock(inode->i_sb); | ||
944 | return -EIO; | ||
945 | } | ||
946 | ih = get_ih(&path); | ||
947 | res = 0; | ||
948 | if (is_direct_le_ih(ih)) { | ||
949 | /* Ok, closest item is file tail (tails are stored in "direct" | ||
950 | * items), so we need to unpack it. */ | ||
951 | /* To not overcomplicate matters, we just call generic_cont_expand | ||
952 | which will in turn call other stuff and finally will boil down to | ||
953 | reiserfs_get_block() that would do necessary conversion. */ | ||
954 | cont_expand_offset = | ||
955 | le_key_k_offset(get_inode_item_key_version(inode), | ||
956 | &(ih->ih_key)); | ||
957 | pathrelse(&path); | ||
958 | res = generic_cont_expand(inode, cont_expand_offset); | ||
959 | } else | ||
960 | pathrelse(&path); | ||
961 | |||
962 | reiserfs_write_unlock(inode->i_sb); | ||
963 | return res; | ||
964 | } | ||
965 | |||
966 | /* This function locks pages starting from @pos for @inode. | ||
967 | @num_pages pages are locked and stored in | ||
968 | @prepared_pages array. Also buffers are allocated for these pages. | ||
969 | First and last page of the region is read if it is overwritten only | ||
970 | partially. If last page did not exist before write (file hole or file | ||
971 | append), it is zeroed, then. | ||
972 | Returns number of unallocated blocks that should be allocated to cover | ||
973 | new file data.*/ | ||
974 | static int reiserfs_prepare_file_region_for_write(struct inode *inode | ||
975 | /* Inode of the file */ , | ||
976 | loff_t pos, /* position in the file */ | ||
977 | size_t num_pages, /* number of pages to | ||
978 | prepare */ | ||
979 | size_t write_bytes, /* Amount of bytes to be | ||
980 | overwritten from | ||
981 | @pos */ | ||
982 | struct page **prepared_pages /* pointer to array | ||
983 | where to store | ||
984 | prepared pages */ | ||
985 | ) | ||
986 | { | ||
987 | int res = 0; // Return values of different functions we call. | ||
988 | unsigned long index = pos >> PAGE_CACHE_SHIFT; // Offset in file in pages. | ||
989 | int from = (pos & (PAGE_CACHE_SIZE - 1)); // Writing offset in first page | ||
990 | int to = ((pos + write_bytes - 1) & (PAGE_CACHE_SIZE - 1)) + 1; | ||
991 | /* offset of last modified byte in last | ||
992 | page */ | ||
993 | struct address_space *mapping = inode->i_mapping; // Pages are mapped here. | ||
994 | int i; // Simple counter | ||
995 | int blocks = 0; /* Return value (blocks that should be allocated) */ | ||
996 | struct buffer_head *bh, *head; // Current bufferhead and first bufferhead | ||
997 | // of a page. | ||
998 | unsigned block_start, block_end; // Starting and ending offsets of current | ||
999 | // buffer in the page. | ||
1000 | struct buffer_head *wait[2], **wait_bh = wait; // Buffers for page, if | ||
1001 | // Page appeared to be not up | ||
1002 | // to date. Note how we have | ||
1003 | // at most 2 buffers, this is | ||
1004 | // because we at most may | ||
1005 | // partially overwrite two | ||
1006 | // buffers for one page. One at // the beginning of write area | ||
1007 | // and one at the end. | ||
1008 | // Everything inthe middle gets // overwritten totally. | ||
1009 | |||
1010 | struct cpu_key key; // cpu key of item that we are going to deal with | ||
1011 | struct item_head *ih = NULL; // pointer to item head that we are going to deal with | ||
1012 | struct buffer_head *itembuf = NULL; // Buffer head that contains items that we are going to deal with | ||
1013 | INITIALIZE_PATH(path); // path to item, that we are going to deal with. | ||
1014 | __le32 *item = NULL; // pointer to item we are going to deal with | ||
1015 | int item_pos = -1; /* Position in indirect item */ | ||
1016 | |||
1017 | if (num_pages < 1) { | ||
1018 | reiserfs_warning(inode->i_sb, | ||
1019 | "green-9001: reiserfs_prepare_file_region_for_write " | ||
1020 | "called with zero number of pages to process"); | ||
1021 | return -EFAULT; | ||
1022 | } | ||
1023 | |||
1024 | /* We have 2 loops for pages. In first loop we grab and lock the pages, so | ||
1025 | that nobody would touch these until we release the pages. Then | ||
1026 | we'd start to deal with mapping buffers to blocks. */ | ||
1027 | for (i = 0; i < num_pages; i++) { | ||
1028 | prepared_pages[i] = grab_cache_page(mapping, index + i); // locks the page | ||
1029 | if (!prepared_pages[i]) { | ||
1030 | res = -ENOMEM; | ||
1031 | goto failed_page_grabbing; | ||
1032 | } | ||
1033 | if (!page_has_buffers(prepared_pages[i])) | ||
1034 | create_empty_buffers(prepared_pages[i], | ||
1035 | inode->i_sb->s_blocksize, 0); | ||
1036 | } | ||
1037 | |||
1038 | /* Let's count amount of blocks for a case where all the blocks | ||
1039 | overwritten are new (we will substract already allocated blocks later) */ | ||
1040 | if (num_pages > 2) | ||
1041 | /* These are full-overwritten pages so we count all the blocks in | ||
1042 | these pages are counted as needed to be allocated */ | ||
1043 | blocks = | ||
1044 | (num_pages - 2) << (PAGE_CACHE_SHIFT - inode->i_blkbits); | ||
1045 | |||
1046 | /* count blocks needed for first page (possibly partially written) */ | ||
1047 | blocks += ((PAGE_CACHE_SIZE - from) >> inode->i_blkbits) + !!(from & (inode->i_sb->s_blocksize - 1)); /* roundup */ | ||
1048 | |||
1049 | /* Now we account for last page. If last page == first page (we | ||
1050 | overwrite only one page), we substract all the blocks past the | ||
1051 | last writing position in a page out of already calculated number | ||
1052 | of blocks */ | ||
1053 | blocks += ((num_pages > 1) << (PAGE_CACHE_SHIFT - inode->i_blkbits)) - | ||
1054 | ((PAGE_CACHE_SIZE - to) >> inode->i_blkbits); | ||
1055 | /* Note how we do not roundup here since partial blocks still | ||
1056 | should be allocated */ | ||
1057 | |||
1058 | /* Now if all the write area lies past the file end, no point in | ||
1059 | maping blocks, since there is none, so we just zero out remaining | ||
1060 | parts of first and last pages in write area (if needed) */ | ||
1061 | if ((pos & ~((loff_t) PAGE_CACHE_SIZE - 1)) > inode->i_size) { | ||
1062 | if (from != 0) /* First page needs to be partially zeroed */ | ||
1063 | zero_user_page(prepared_pages[0], 0, from, KM_USER0); | ||
1064 | |||
1065 | if (to != PAGE_CACHE_SIZE) /* Last page needs to be partially zeroed */ | ||
1066 | zero_user_page(prepared_pages[num_pages-1], to, | ||
1067 | PAGE_CACHE_SIZE - to, KM_USER0); | ||
1068 | |||
1069 | /* Since all blocks are new - use already calculated value */ | ||
1070 | return blocks; | ||
1071 | } | ||
1072 | |||
1073 | /* Well, since we write somewhere into the middle of a file, there is | ||
1074 | possibility we are writing over some already allocated blocks, so | ||
1075 | let's map these blocks and substract number of such blocks out of blocks | ||
1076 | we need to allocate (calculated above) */ | ||
1077 | /* Mask write position to start on blocksize, we do it out of the | ||
1078 | loop for performance reasons */ | ||
1079 | pos &= ~((loff_t) inode->i_sb->s_blocksize - 1); | ||
1080 | /* Set cpu key to the starting position in a file (on left block boundary) */ | ||
1081 | make_cpu_key(&key, inode, | ||
1082 | 1 + ((pos) & ~((loff_t) inode->i_sb->s_blocksize - 1)), | ||
1083 | TYPE_ANY, 3 /*key length */ ); | ||
1084 | |||
1085 | reiserfs_write_lock(inode->i_sb); // We need that for at least search_by_key() | ||
1086 | for (i = 0; i < num_pages; i++) { | ||
1087 | |||
1088 | head = page_buffers(prepared_pages[i]); | ||
1089 | /* For each buffer in the page */ | ||
1090 | for (bh = head, block_start = 0; bh != head || !block_start; | ||
1091 | block_start = block_end, bh = bh->b_this_page) { | ||
1092 | if (!bh) | ||
1093 | reiserfs_panic(inode->i_sb, | ||
1094 | "green-9002: Allocated but absent buffer for a page?"); | ||
1095 | /* Find where this buffer ends */ | ||
1096 | block_end = block_start + inode->i_sb->s_blocksize; | ||
1097 | if (i == 0 && block_end <= from) | ||
1098 | /* if this buffer is before requested data to map, skip it */ | ||
1099 | continue; | ||
1100 | |||
1101 | if (i == num_pages - 1 && block_start >= to) { | ||
1102 | /* If this buffer is after requested data to map, abort | ||
1103 | processing of current page */ | ||
1104 | break; | ||
1105 | } | ||
1106 | |||
1107 | if (buffer_mapped(bh) && bh->b_blocknr != 0) { | ||
1108 | /* This is optimisation for a case where buffer is mapped | ||
1109 | and have blocknumber assigned. In case significant amount | ||
1110 | of such buffers are present, we may avoid some amount | ||
1111 | of search_by_key calls. | ||
1112 | Probably it would be possible to move parts of this code | ||
1113 | out of BKL, but I afraid that would overcomplicate code | ||
1114 | without any noticeable benefit. | ||
1115 | */ | ||
1116 | item_pos++; | ||
1117 | /* Update the key */ | ||
1118 | set_cpu_key_k_offset(&key, | ||
1119 | cpu_key_k_offset(&key) + | ||
1120 | inode->i_sb->s_blocksize); | ||
1121 | blocks--; // Decrease the amount of blocks that need to be | ||
1122 | // allocated | ||
1123 | continue; // Go to the next buffer | ||
1124 | } | ||
1125 | |||
1126 | if (!itembuf || /* if first iteration */ | ||
1127 | item_pos >= ih_item_len(ih) / UNFM_P_SIZE) { /* or if we progressed past the | ||
1128 | current unformatted_item */ | ||
1129 | /* Try to find next item */ | ||
1130 | res = | ||
1131 | search_for_position_by_key(inode->i_sb, | ||
1132 | &key, &path); | ||
1133 | /* Abort if no more items */ | ||
1134 | if (res != POSITION_FOUND) { | ||
1135 | /* make sure later loops don't use this item */ | ||
1136 | itembuf = NULL; | ||
1137 | item = NULL; | ||
1138 | break; | ||
1139 | } | ||
1140 | |||
1141 | /* Update information about current indirect item */ | ||
1142 | itembuf = get_last_bh(&path); | ||
1143 | ih = get_ih(&path); | ||
1144 | item = get_item(&path); | ||
1145 | item_pos = path.pos_in_item; | ||
1146 | |||
1147 | RFALSE(!is_indirect_le_ih(ih), | ||
1148 | "green-9003: indirect item expected"); | ||
1149 | } | ||
1150 | |||
1151 | /* See if there is some block associated with the file | ||
1152 | at that position, map the buffer to this block */ | ||
1153 | if (get_block_num(item, item_pos)) { | ||
1154 | map_bh(bh, inode->i_sb, | ||
1155 | get_block_num(item, item_pos)); | ||
1156 | blocks--; // Decrease the amount of blocks that need to be | ||
1157 | // allocated | ||
1158 | } | ||
1159 | item_pos++; | ||
1160 | /* Update the key */ | ||
1161 | set_cpu_key_k_offset(&key, | ||
1162 | cpu_key_k_offset(&key) + | ||
1163 | inode->i_sb->s_blocksize); | ||
1164 | } | ||
1165 | } | ||
1166 | pathrelse(&path); // Free the path | ||
1167 | reiserfs_write_unlock(inode->i_sb); | ||
1168 | |||
1169 | /* Now zero out unmappend buffers for the first and last pages of | ||
1170 | write area or issue read requests if page is mapped. */ | ||
1171 | /* First page, see if it is not uptodate */ | ||
1172 | if (!PageUptodate(prepared_pages[0])) { | ||
1173 | head = page_buffers(prepared_pages[0]); | ||
1174 | |||
1175 | /* For each buffer in page */ | ||
1176 | for (bh = head, block_start = 0; bh != head || !block_start; | ||
1177 | block_start = block_end, bh = bh->b_this_page) { | ||
1178 | |||
1179 | if (!bh) | ||
1180 | reiserfs_panic(inode->i_sb, | ||
1181 | "green-9002: Allocated but absent buffer for a page?"); | ||
1182 | /* Find where this buffer ends */ | ||
1183 | block_end = block_start + inode->i_sb->s_blocksize; | ||
1184 | if (block_end <= from) | ||
1185 | /* if this buffer is before requested data to map, skip it */ | ||
1186 | continue; | ||
1187 | if (block_start < from) { /* Aha, our partial buffer */ | ||
1188 | if (buffer_mapped(bh)) { /* If it is mapped, we need to | ||
1189 | issue READ request for it to | ||
1190 | not loose data */ | ||
1191 | ll_rw_block(READ, 1, &bh); | ||
1192 | *wait_bh++ = bh; | ||
1193 | } else { /* Not mapped, zero it */ | ||
1194 | zero_user_page(prepared_pages[0], | ||
1195 | block_start, | ||
1196 | from - block_start, KM_USER0); | ||
1197 | set_buffer_uptodate(bh); | ||
1198 | } | ||
1199 | } | ||
1200 | } | ||
1201 | } | ||
1202 | |||
1203 | /* Last page, see if it is not uptodate, or if the last page is past the end of the file. */ | ||
1204 | if (!PageUptodate(prepared_pages[num_pages - 1]) || | ||
1205 | ((pos + write_bytes) >> PAGE_CACHE_SHIFT) > | ||
1206 | (inode->i_size >> PAGE_CACHE_SHIFT)) { | ||
1207 | head = page_buffers(prepared_pages[num_pages - 1]); | ||
1208 | |||
1209 | /* for each buffer in page */ | ||
1210 | for (bh = head, block_start = 0; bh != head || !block_start; | ||
1211 | block_start = block_end, bh = bh->b_this_page) { | ||
1212 | |||
1213 | if (!bh) | ||
1214 | reiserfs_panic(inode->i_sb, | ||
1215 | "green-9002: Allocated but absent buffer for a page?"); | ||
1216 | /* Find where this buffer ends */ | ||
1217 | block_end = block_start + inode->i_sb->s_blocksize; | ||
1218 | if (block_start >= to) | ||
1219 | /* if this buffer is after requested data to map, skip it */ | ||
1220 | break; | ||
1221 | if (block_end > to) { /* Aha, our partial buffer */ | ||
1222 | if (buffer_mapped(bh)) { /* If it is mapped, we need to | ||
1223 | issue READ request for it to | ||
1224 | not loose data */ | ||
1225 | ll_rw_block(READ, 1, &bh); | ||
1226 | *wait_bh++ = bh; | ||
1227 | } else { /* Not mapped, zero it */ | ||
1228 | zero_user_page(prepared_pages[num_pages-1], | ||
1229 | to, block_end - to, KM_USER0); | ||
1230 | set_buffer_uptodate(bh); | ||
1231 | } | ||
1232 | } | ||
1233 | } | ||
1234 | } | ||
1235 | |||
1236 | /* Wait for read requests we made to happen, if necessary */ | ||
1237 | while (wait_bh > wait) { | ||
1238 | wait_on_buffer(*--wait_bh); | ||
1239 | if (!buffer_uptodate(*wait_bh)) { | ||
1240 | res = -EIO; | ||
1241 | goto failed_read; | ||
1242 | } | ||
1243 | } | ||
1244 | |||
1245 | return blocks; | ||
1246 | failed_page_grabbing: | ||
1247 | num_pages = i; | ||
1248 | failed_read: | ||
1249 | reiserfs_unprepare_pages(prepared_pages, num_pages); | ||
1250 | return res; | ||
1251 | } | ||
1252 | |||
1253 | /* Write @count bytes at position @ppos in a file indicated by @file | 225 | /* Write @count bytes at position @ppos in a file indicated by @file |
1254 | from the buffer @buf. | 226 | from the buffer @buf. |
1255 | 227 | ||
@@ -1284,14 +256,9 @@ static ssize_t reiserfs_file_write(struct file *file, /* the file we are going t | |||
1284 | * new current position before returning. */ | 256 | * new current position before returning. */ |
1285 | ) | 257 | ) |
1286 | { | 258 | { |
1287 | size_t already_written = 0; // Number of bytes already written to the file. | ||
1288 | loff_t pos; // Current position in the file. | ||
1289 | ssize_t res; // return value of various functions that we call. | ||
1290 | int err = 0; | ||
1291 | struct inode *inode = file->f_path.dentry->d_inode; // Inode of the file that we are writing to. | 259 | struct inode *inode = file->f_path.dentry->d_inode; // Inode of the file that we are writing to. |
1292 | /* To simplify coding at this time, we store | 260 | /* To simplify coding at this time, we store |
1293 | locked pages in array for now */ | 261 | locked pages in array for now */ |
1294 | struct page *prepared_pages[REISERFS_WRITE_PAGES_AT_A_TIME]; | ||
1295 | struct reiserfs_transaction_handle th; | 262 | struct reiserfs_transaction_handle th; |
1296 | th.t_trans_id = 0; | 263 | th.t_trans_id = 0; |
1297 | 264 | ||
@@ -1311,212 +278,7 @@ static ssize_t reiserfs_file_write(struct file *file, /* the file we are going t | |||
1311 | count = MAX_NON_LFS - (unsigned long)*ppos; | 278 | count = MAX_NON_LFS - (unsigned long)*ppos; |
1312 | } | 279 | } |
1313 | 280 | ||
1314 | if (file->f_flags & O_DIRECT) | 281 | return do_sync_write(file, buf, count, ppos); |
1315 | return do_sync_write(file, buf, count, ppos); | ||
1316 | |||
1317 | if (unlikely((ssize_t) count < 0)) | ||
1318 | return -EINVAL; | ||
1319 | |||
1320 | if (unlikely(!access_ok(VERIFY_READ, buf, count))) | ||
1321 | return -EFAULT; | ||
1322 | |||
1323 | mutex_lock(&inode->i_mutex); // locks the entire file for just us | ||
1324 | |||
1325 | pos = *ppos; | ||
1326 | |||
1327 | /* Check if we can write to specified region of file, file | ||
1328 | is not overly big and this kind of stuff. Adjust pos and | ||
1329 | count, if needed */ | ||
1330 | res = generic_write_checks(file, &pos, &count, 0); | ||
1331 | if (res) | ||
1332 | goto out; | ||
1333 | |||
1334 | if (count == 0) | ||
1335 | goto out; | ||
1336 | |||
1337 | res = remove_suid(file->f_path.dentry); | ||
1338 | if (res) | ||
1339 | goto out; | ||
1340 | |||
1341 | file_update_time(file); | ||
1342 | |||
1343 | // Ok, we are done with all the checks. | ||
1344 | |||
1345 | // Now we should start real work | ||
1346 | |||
1347 | /* If we are going to write past the file's packed tail or if we are going | ||
1348 | to overwrite part of the tail, we need that tail to be converted into | ||
1349 | unformatted node */ | ||
1350 | res = reiserfs_check_for_tail_and_convert(inode, pos, count); | ||
1351 | if (res) | ||
1352 | goto out; | ||
1353 | |||
1354 | while (count > 0) { | ||
1355 | /* This is the main loop in which we running until some error occures | ||
1356 | or until we write all of the data. */ | ||
1357 | size_t num_pages; /* amount of pages we are going to write this iteration */ | ||
1358 | size_t write_bytes; /* amount of bytes to write during this iteration */ | ||
1359 | size_t blocks_to_allocate; /* how much blocks we need to allocate for this iteration */ | ||
1360 | |||
1361 | /* (pos & (PAGE_CACHE_SIZE-1)) is an idiom for offset into a page of pos */ | ||
1362 | num_pages = !!((pos + count) & (PAGE_CACHE_SIZE - 1)) + /* round up partial | ||
1363 | pages */ | ||
1364 | ((count + | ||
1365 | (pos & (PAGE_CACHE_SIZE - 1))) >> PAGE_CACHE_SHIFT); | ||
1366 | /* convert size to amount of | ||
1367 | pages */ | ||
1368 | reiserfs_write_lock(inode->i_sb); | ||
1369 | if (num_pages > REISERFS_WRITE_PAGES_AT_A_TIME | ||
1370 | || num_pages > reiserfs_can_fit_pages(inode->i_sb)) { | ||
1371 | /* If we were asked to write more data than we want to or if there | ||
1372 | is not that much space, then we shorten amount of data to write | ||
1373 | for this iteration. */ | ||
1374 | num_pages = | ||
1375 | min_t(size_t, REISERFS_WRITE_PAGES_AT_A_TIME, | ||
1376 | reiserfs_can_fit_pages(inode->i_sb)); | ||
1377 | /* Also we should not forget to set size in bytes accordingly */ | ||
1378 | write_bytes = (num_pages << PAGE_CACHE_SHIFT) - | ||
1379 | (pos & (PAGE_CACHE_SIZE - 1)); | ||
1380 | /* If position is not on the | ||
1381 | start of the page, we need | ||
1382 | to substract the offset | ||
1383 | within page */ | ||
1384 | } else | ||
1385 | write_bytes = count; | ||
1386 | |||
1387 | /* reserve the blocks to be allocated later, so that later on | ||
1388 | we still have the space to write the blocks to */ | ||
1389 | reiserfs_claim_blocks_to_be_allocated(inode->i_sb, | ||
1390 | num_pages << | ||
1391 | (PAGE_CACHE_SHIFT - | ||
1392 | inode->i_blkbits)); | ||
1393 | reiserfs_write_unlock(inode->i_sb); | ||
1394 | |||
1395 | if (!num_pages) { /* If we do not have enough space even for a single page... */ | ||
1396 | if (pos > | ||
1397 | inode->i_size + inode->i_sb->s_blocksize - | ||
1398 | (pos & (inode->i_sb->s_blocksize - 1))) { | ||
1399 | res = -ENOSPC; | ||
1400 | break; // In case we are writing past the end of the last file block, break. | ||
1401 | } | ||
1402 | // Otherwise we are possibly overwriting the file, so | ||
1403 | // let's set write size to be equal or less than blocksize. | ||
1404 | // This way we get it correctly for file holes. | ||
1405 | // But overwriting files on absolutelly full volumes would not | ||
1406 | // be very efficient. Well, people are not supposed to fill | ||
1407 | // 100% of disk space anyway. | ||
1408 | write_bytes = | ||
1409 | min_t(size_t, count, | ||
1410 | inode->i_sb->s_blocksize - | ||
1411 | (pos & (inode->i_sb->s_blocksize - 1))); | ||
1412 | num_pages = 1; | ||
1413 | // No blocks were claimed before, so do it now. | ||
1414 | reiserfs_claim_blocks_to_be_allocated(inode->i_sb, | ||
1415 | 1 << | ||
1416 | (PAGE_CACHE_SHIFT | ||
1417 | - | ||
1418 | inode-> | ||
1419 | i_blkbits)); | ||
1420 | } | ||
1421 | |||
1422 | /* Prepare for writing into the region, read in all the | ||
1423 | partially overwritten pages, if needed. And lock the pages, | ||
1424 | so that nobody else can access these until we are done. | ||
1425 | We get number of actual blocks needed as a result. */ | ||
1426 | res = reiserfs_prepare_file_region_for_write(inode, pos, | ||
1427 | num_pages, | ||
1428 | write_bytes, | ||
1429 | prepared_pages); | ||
1430 | if (res < 0) { | ||
1431 | reiserfs_release_claimed_blocks(inode->i_sb, | ||
1432 | num_pages << | ||
1433 | (PAGE_CACHE_SHIFT - | ||
1434 | inode->i_blkbits)); | ||
1435 | break; | ||
1436 | } | ||
1437 | |||
1438 | blocks_to_allocate = res; | ||
1439 | |||
1440 | /* First we correct our estimate of how many blocks we need */ | ||
1441 | reiserfs_release_claimed_blocks(inode->i_sb, | ||
1442 | (num_pages << | ||
1443 | (PAGE_CACHE_SHIFT - | ||
1444 | inode->i_sb-> | ||
1445 | s_blocksize_bits)) - | ||
1446 | blocks_to_allocate); | ||
1447 | |||
1448 | if (blocks_to_allocate > 0) { /*We only allocate blocks if we need to */ | ||
1449 | /* Fill in all the possible holes and append the file if needed */ | ||
1450 | res = | ||
1451 | reiserfs_allocate_blocks_for_region(&th, inode, pos, | ||
1452 | num_pages, | ||
1453 | write_bytes, | ||
1454 | prepared_pages, | ||
1455 | blocks_to_allocate); | ||
1456 | } | ||
1457 | |||
1458 | /* well, we have allocated the blocks, so it is time to free | ||
1459 | the reservation we made earlier. */ | ||
1460 | reiserfs_release_claimed_blocks(inode->i_sb, | ||
1461 | blocks_to_allocate); | ||
1462 | if (res) { | ||
1463 | reiserfs_unprepare_pages(prepared_pages, num_pages); | ||
1464 | break; | ||
1465 | } | ||
1466 | |||
1467 | /* NOTE that allocating blocks and filling blocks can be done in reverse order | ||
1468 | and probably we would do that just to get rid of garbage in files after a | ||
1469 | crash */ | ||
1470 | |||
1471 | /* Copy data from user-supplied buffer to file's pages */ | ||
1472 | res = | ||
1473 | reiserfs_copy_from_user_to_file_region(pos, num_pages, | ||
1474 | write_bytes, | ||
1475 | prepared_pages, buf); | ||
1476 | if (res) { | ||
1477 | reiserfs_unprepare_pages(prepared_pages, num_pages); | ||
1478 | break; | ||
1479 | } | ||
1480 | |||
1481 | /* Send the pages to disk and unlock them. */ | ||
1482 | res = | ||
1483 | reiserfs_submit_file_region_for_write(&th, inode, pos, | ||
1484 | num_pages, | ||
1485 | write_bytes, | ||
1486 | prepared_pages); | ||
1487 | if (res) | ||
1488 | break; | ||
1489 | |||
1490 | already_written += write_bytes; | ||
1491 | buf += write_bytes; | ||
1492 | *ppos = pos += write_bytes; | ||
1493 | count -= write_bytes; | ||
1494 | balance_dirty_pages_ratelimited_nr(inode->i_mapping, num_pages); | ||
1495 | } | ||
1496 | |||
1497 | /* this is only true on error */ | ||
1498 | if (th.t_trans_id) { | ||
1499 | reiserfs_write_lock(inode->i_sb); | ||
1500 | err = journal_end(&th, th.t_super, th.t_blocks_allocated); | ||
1501 | reiserfs_write_unlock(inode->i_sb); | ||
1502 | if (err) { | ||
1503 | res = err; | ||
1504 | goto out; | ||
1505 | } | ||
1506 | } | ||
1507 | |||
1508 | if (likely(res >= 0) && | ||
1509 | (unlikely((file->f_flags & O_SYNC) || IS_SYNC(inode)))) | ||
1510 | res = generic_osync_inode(inode, file->f_mapping, | ||
1511 | OSYNC_METADATA | OSYNC_DATA); | ||
1512 | |||
1513 | mutex_unlock(&inode->i_mutex); | ||
1514 | reiserfs_async_progress_wait(inode->i_sb); | ||
1515 | return (already_written != 0) ? already_written : res; | ||
1516 | |||
1517 | out: | ||
1518 | mutex_unlock(&inode->i_mutex); // unlock the file on exit. | ||
1519 | return res; | ||
1520 | } | 282 | } |
1521 | 283 | ||
1522 | const struct file_operations reiserfs_file_operations = { | 284 | const struct file_operations reiserfs_file_operations = { |