aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ubifs/io.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ubifs/io.c')
-rw-r--r--fs/ubifs/io.c201
1 files changed, 154 insertions, 47 deletions
diff --git a/fs/ubifs/io.c b/fs/ubifs/io.c
index d82173182eeb..dfd168b7807e 100644
--- a/fs/ubifs/io.c
+++ b/fs/ubifs/io.c
@@ -31,6 +31,26 @@
31 * buffer is full or when it is not used for some time (by timer). This is 31 * buffer is full or when it is not used for some time (by timer). This is
32 * similar to the mechanism is used by JFFS2. 32 * similar to the mechanism is used by JFFS2.
33 * 33 *
34 * UBIFS distinguishes between minimum write size (@c->min_io_size) and maximum
35 * write size (@c->max_write_size). The latter is the maximum amount of bytes
36 * the underlying flash is able to program at a time, and writing in
37 * @c->max_write_size units should presumably be faster. Obviously,
38 * @c->min_io_size <= @c->max_write_size. Write-buffers are of
39 * @c->max_write_size bytes in size for maximum performance. However, when a
40 * write-buffer is flushed, only the portion of it (aligned to @c->min_io_size
41 * boundary) which contains data is written, not the whole write-buffer,
42 * because this is more space-efficient.
43 *
44 * This optimization adds few complications to the code. Indeed, on the one
45 * hand, we want to write in optimal @c->max_write_size bytes chunks, which
46 * also means aligning writes at the @c->max_write_size bytes offsets. On the
47 * other hand, we do not want to waste space when synchronizing the write
48 * buffer, so during synchronization we writes in smaller chunks. And this makes
49 * the next write offset to be not aligned to @c->max_write_size bytes. So the
50 * have to make sure that the write-buffer offset (@wbuf->offs) becomes aligned
51 * to @c->max_write_size bytes again. We do this by temporarily shrinking
52 * write-buffer size (@wbuf->size).
53 *
34 * Write-buffers are defined by 'struct ubifs_wbuf' objects and protected by 54 * Write-buffers are defined by 'struct ubifs_wbuf' objects and protected by
35 * mutexes defined inside these objects. Since sometimes upper-level code 55 * mutexes defined inside these objects. Since sometimes upper-level code
36 * has to lock the write-buffer (e.g. journal space reservation code), many 56 * has to lock the write-buffer (e.g. journal space reservation code), many
@@ -46,8 +66,8 @@
46 * UBIFS uses padding when it pads to the next min. I/O unit. In this case it 66 * UBIFS uses padding when it pads to the next min. I/O unit. In this case it
47 * uses padding nodes or padding bytes, if the padding node does not fit. 67 * uses padding nodes or padding bytes, if the padding node does not fit.
48 * 68 *
49 * All UBIFS nodes are protected by CRC checksums and UBIFS checks all nodes 69 * All UBIFS nodes are protected by CRC checksums and UBIFS checks CRC when
50 * every time they are read from the flash media. 70 * they are read from the flash media.
51 */ 71 */
52 72
53#include <linux/crc32.h> 73#include <linux/crc32.h>
@@ -88,8 +108,12 @@ void ubifs_ro_mode(struct ubifs_info *c, int err)
88 * This function may skip data nodes CRC checking if @c->no_chk_data_crc is 108 * This function may skip data nodes CRC checking if @c->no_chk_data_crc is
89 * true, which is controlled by corresponding UBIFS mount option. However, if 109 * true, which is controlled by corresponding UBIFS mount option. However, if
90 * @must_chk_crc is true, then @c->no_chk_data_crc is ignored and CRC is 110 * @must_chk_crc is true, then @c->no_chk_data_crc is ignored and CRC is
91 * checked. Similarly, if @c->always_chk_crc is true, @c->no_chk_data_crc is 111 * checked. Similarly, if @c->mounting or @c->remounting_rw is true (we are
92 * ignored and CRC is checked. 112 * mounting or re-mounting to R/W mode), @c->no_chk_data_crc is ignored and CRC
113 * is checked. This is because during mounting or re-mounting from R/O mode to
114 * R/W mode we may read journal nodes (when replying the journal or doing the
115 * recovery) and the journal nodes may potentially be corrupted, so checking is
116 * required.
93 * 117 *
94 * This function returns zero in case of success and %-EUCLEAN in case of bad 118 * This function returns zero in case of success and %-EUCLEAN in case of bad
95 * CRC or magic. 119 * CRC or magic.
@@ -131,8 +155,8 @@ int ubifs_check_node(const struct ubifs_info *c, const void *buf, int lnum,
131 node_len > c->ranges[type].max_len) 155 node_len > c->ranges[type].max_len)
132 goto out_len; 156 goto out_len;
133 157
134 if (!must_chk_crc && type == UBIFS_DATA_NODE && !c->always_chk_crc && 158 if (!must_chk_crc && type == UBIFS_DATA_NODE && !c->mounting &&
135 c->no_chk_data_crc) 159 !c->remounting_rw && c->no_chk_data_crc)
136 return 0; 160 return 0;
137 161
138 crc = crc32(UBIFS_CRC32_INIT, buf + 8, node_len - 8); 162 crc = crc32(UBIFS_CRC32_INIT, buf + 8, node_len - 8);
@@ -343,11 +367,17 @@ static void cancel_wbuf_timer_nolock(struct ubifs_wbuf *wbuf)
343 * 367 *
344 * This function synchronizes write-buffer @buf and returns zero in case of 368 * This function synchronizes write-buffer @buf and returns zero in case of
345 * success or a negative error code in case of failure. 369 * success or a negative error code in case of failure.
370 *
371 * Note, although write-buffers are of @c->max_write_size, this function does
372 * not necessarily writes all @c->max_write_size bytes to the flash. Instead,
373 * if the write-buffer is only partially filled with data, only the used part
374 * of the write-buffer (aligned on @c->min_io_size boundary) is synchronized.
375 * This way we waste less space.
346 */ 376 */
347int ubifs_wbuf_sync_nolock(struct ubifs_wbuf *wbuf) 377int ubifs_wbuf_sync_nolock(struct ubifs_wbuf *wbuf)
348{ 378{
349 struct ubifs_info *c = wbuf->c; 379 struct ubifs_info *c = wbuf->c;
350 int err, dirt; 380 int err, dirt, sync_len;
351 381
352 cancel_wbuf_timer_nolock(wbuf); 382 cancel_wbuf_timer_nolock(wbuf);
353 if (!wbuf->used || wbuf->lnum == -1) 383 if (!wbuf->used || wbuf->lnum == -1)
@@ -357,27 +387,53 @@ int ubifs_wbuf_sync_nolock(struct ubifs_wbuf *wbuf)
357 dbg_io("LEB %d:%d, %d bytes, jhead %s", 387 dbg_io("LEB %d:%d, %d bytes, jhead %s",
358 wbuf->lnum, wbuf->offs, wbuf->used, dbg_jhead(wbuf->jhead)); 388 wbuf->lnum, wbuf->offs, wbuf->used, dbg_jhead(wbuf->jhead));
359 ubifs_assert(!(wbuf->avail & 7)); 389 ubifs_assert(!(wbuf->avail & 7));
360 ubifs_assert(wbuf->offs + c->min_io_size <= c->leb_size); 390 ubifs_assert(wbuf->offs + wbuf->size <= c->leb_size);
391 ubifs_assert(wbuf->size >= c->min_io_size);
392 ubifs_assert(wbuf->size <= c->max_write_size);
393 ubifs_assert(wbuf->size % c->min_io_size == 0);
361 ubifs_assert(!c->ro_media && !c->ro_mount); 394 ubifs_assert(!c->ro_media && !c->ro_mount);
395 if (c->leb_size - wbuf->offs >= c->max_write_size)
396 ubifs_assert(!((wbuf->offs + wbuf->size) % c->max_write_size ));
362 397
363 if (c->ro_error) 398 if (c->ro_error)
364 return -EROFS; 399 return -EROFS;
365 400
366 ubifs_pad(c, wbuf->buf + wbuf->used, wbuf->avail); 401 /*
402 * Do not write whole write buffer but write only the minimum necessary
403 * amount of min. I/O units.
404 */
405 sync_len = ALIGN(wbuf->used, c->min_io_size);
406 dirt = sync_len - wbuf->used;
407 if (dirt)
408 ubifs_pad(c, wbuf->buf + wbuf->used, dirt);
367 err = ubi_leb_write(c->ubi, wbuf->lnum, wbuf->buf, wbuf->offs, 409 err = ubi_leb_write(c->ubi, wbuf->lnum, wbuf->buf, wbuf->offs,
368 c->min_io_size, wbuf->dtype); 410 sync_len, wbuf->dtype);
369 if (err) { 411 if (err) {
370 ubifs_err("cannot write %d bytes to LEB %d:%d", 412 ubifs_err("cannot write %d bytes to LEB %d:%d",
371 c->min_io_size, wbuf->lnum, wbuf->offs); 413 sync_len, wbuf->lnum, wbuf->offs);
372 dbg_dump_stack(); 414 dbg_dump_stack();
373 return err; 415 return err;
374 } 416 }
375 417
376 dirt = wbuf->avail;
377
378 spin_lock(&wbuf->lock); 418 spin_lock(&wbuf->lock);
379 wbuf->offs += c->min_io_size; 419 wbuf->offs += sync_len;
380 wbuf->avail = c->min_io_size; 420 /*
421 * Now @wbuf->offs is not necessarily aligned to @c->max_write_size.
422 * But our goal is to optimize writes and make sure we write in
423 * @c->max_write_size chunks and to @c->max_write_size-aligned offset.
424 * Thus, if @wbuf->offs is not aligned to @c->max_write_size now, make
425 * sure that @wbuf->offs + @wbuf->size is aligned to
426 * @c->max_write_size. This way we make sure that after next
427 * write-buffer flush we are again at the optimal offset (aligned to
428 * @c->max_write_size).
429 */
430 if (c->leb_size - wbuf->offs < c->max_write_size)
431 wbuf->size = c->leb_size - wbuf->offs;
432 else if (wbuf->offs & (c->max_write_size - 1))
433 wbuf->size = ALIGN(wbuf->offs, c->max_write_size) - wbuf->offs;
434 else
435 wbuf->size = c->max_write_size;
436 wbuf->avail = wbuf->size;
381 wbuf->used = 0; 437 wbuf->used = 0;
382 wbuf->next_ino = 0; 438 wbuf->next_ino = 0;
383 spin_unlock(&wbuf->lock); 439 spin_unlock(&wbuf->lock);
@@ -420,7 +476,13 @@ int ubifs_wbuf_seek_nolock(struct ubifs_wbuf *wbuf, int lnum, int offs,
420 spin_lock(&wbuf->lock); 476 spin_lock(&wbuf->lock);
421 wbuf->lnum = lnum; 477 wbuf->lnum = lnum;
422 wbuf->offs = offs; 478 wbuf->offs = offs;
423 wbuf->avail = c->min_io_size; 479 if (c->leb_size - wbuf->offs < c->max_write_size)
480 wbuf->size = c->leb_size - wbuf->offs;
481 else if (wbuf->offs & (c->max_write_size - 1))
482 wbuf->size = ALIGN(wbuf->offs, c->max_write_size) - wbuf->offs;
483 else
484 wbuf->size = c->max_write_size;
485 wbuf->avail = wbuf->size;
424 wbuf->used = 0; 486 wbuf->used = 0;
425 spin_unlock(&wbuf->lock); 487 spin_unlock(&wbuf->lock);
426 wbuf->dtype = dtype; 488 wbuf->dtype = dtype;
@@ -500,8 +562,9 @@ out_timers:
500 * 562 *
501 * This function writes data to flash via write-buffer @wbuf. This means that 563 * This function writes data to flash via write-buffer @wbuf. This means that
502 * the last piece of the node won't reach the flash media immediately if it 564 * the last piece of the node won't reach the flash media immediately if it
503 * does not take whole minimal I/O unit. Instead, the node will sit in RAM 565 * does not take whole max. write unit (@c->max_write_size). Instead, the node
504 * until the write-buffer is synchronized (e.g., by timer). 566 * will sit in RAM until the write-buffer is synchronized (e.g., by timer, or
567 * because more data are appended to the write-buffer).
505 * 568 *
506 * This function returns zero in case of success and a negative error code in 569 * This function returns zero in case of success and a negative error code in
507 * case of failure. If the node cannot be written because there is no more 570 * case of failure. If the node cannot be written because there is no more
@@ -518,9 +581,14 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len)
518 ubifs_assert(len > 0 && wbuf->lnum >= 0 && wbuf->lnum < c->leb_cnt); 581 ubifs_assert(len > 0 && wbuf->lnum >= 0 && wbuf->lnum < c->leb_cnt);
519 ubifs_assert(wbuf->offs >= 0 && wbuf->offs % c->min_io_size == 0); 582 ubifs_assert(wbuf->offs >= 0 && wbuf->offs % c->min_io_size == 0);
520 ubifs_assert(!(wbuf->offs & 7) && wbuf->offs <= c->leb_size); 583 ubifs_assert(!(wbuf->offs & 7) && wbuf->offs <= c->leb_size);
521 ubifs_assert(wbuf->avail > 0 && wbuf->avail <= c->min_io_size); 584 ubifs_assert(wbuf->avail > 0 && wbuf->avail <= wbuf->size);
585 ubifs_assert(wbuf->size >= c->min_io_size);
586 ubifs_assert(wbuf->size <= c->max_write_size);
587 ubifs_assert(wbuf->size % c->min_io_size == 0);
522 ubifs_assert(mutex_is_locked(&wbuf->io_mutex)); 588 ubifs_assert(mutex_is_locked(&wbuf->io_mutex));
523 ubifs_assert(!c->ro_media && !c->ro_mount); 589 ubifs_assert(!c->ro_media && !c->ro_mount);
590 if (c->leb_size - wbuf->offs >= c->max_write_size)
591 ubifs_assert(!((wbuf->offs + wbuf->size) % c->max_write_size ));
524 592
525 if (c->leb_size - wbuf->offs - wbuf->used < aligned_len) { 593 if (c->leb_size - wbuf->offs - wbuf->used < aligned_len) {
526 err = -ENOSPC; 594 err = -ENOSPC;
@@ -543,14 +611,18 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len)
543 dbg_io("flush jhead %s wbuf to LEB %d:%d", 611 dbg_io("flush jhead %s wbuf to LEB %d:%d",
544 dbg_jhead(wbuf->jhead), wbuf->lnum, wbuf->offs); 612 dbg_jhead(wbuf->jhead), wbuf->lnum, wbuf->offs);
545 err = ubi_leb_write(c->ubi, wbuf->lnum, wbuf->buf, 613 err = ubi_leb_write(c->ubi, wbuf->lnum, wbuf->buf,
546 wbuf->offs, c->min_io_size, 614 wbuf->offs, wbuf->size,
547 wbuf->dtype); 615 wbuf->dtype);
548 if (err) 616 if (err)
549 goto out; 617 goto out;
550 618
551 spin_lock(&wbuf->lock); 619 spin_lock(&wbuf->lock);
552 wbuf->offs += c->min_io_size; 620 wbuf->offs += wbuf->size;
553 wbuf->avail = c->min_io_size; 621 if (c->leb_size - wbuf->offs >= c->max_write_size)
622 wbuf->size = c->max_write_size;
623 else
624 wbuf->size = c->leb_size - wbuf->offs;
625 wbuf->avail = wbuf->size;
554 wbuf->used = 0; 626 wbuf->used = 0;
555 wbuf->next_ino = 0; 627 wbuf->next_ino = 0;
556 spin_unlock(&wbuf->lock); 628 spin_unlock(&wbuf->lock);
@@ -564,33 +636,57 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len)
564 goto exit; 636 goto exit;
565 } 637 }
566 638
567 /* 639 offs = wbuf->offs;
568 * The node is large enough and does not fit entirely within current 640 written = 0;
569 * minimal I/O unit. We have to fill and flush write-buffer and switch
570 * to the next min. I/O unit.
571 */
572 dbg_io("flush jhead %s wbuf to LEB %d:%d",
573 dbg_jhead(wbuf->jhead), wbuf->lnum, wbuf->offs);
574 memcpy(wbuf->buf + wbuf->used, buf, wbuf->avail);
575 err = ubi_leb_write(c->ubi, wbuf->lnum, wbuf->buf, wbuf->offs,
576 c->min_io_size, wbuf->dtype);
577 if (err)
578 goto out;
579 641
580 offs = wbuf->offs + c->min_io_size; 642 if (wbuf->used) {
581 len -= wbuf->avail; 643 /*
582 aligned_len -= wbuf->avail; 644 * The node is large enough and does not fit entirely within
583 written = wbuf->avail; 645 * current available space. We have to fill and flush
646 * write-buffer and switch to the next max. write unit.
647 */
648 dbg_io("flush jhead %s wbuf to LEB %d:%d",
649 dbg_jhead(wbuf->jhead), wbuf->lnum, wbuf->offs);
650 memcpy(wbuf->buf + wbuf->used, buf, wbuf->avail);
651 err = ubi_leb_write(c->ubi, wbuf->lnum, wbuf->buf, wbuf->offs,
652 wbuf->size, wbuf->dtype);
653 if (err)
654 goto out;
655
656 offs += wbuf->size;
657 len -= wbuf->avail;
658 aligned_len -= wbuf->avail;
659 written += wbuf->avail;
660 } else if (wbuf->offs & (c->max_write_size - 1)) {
661 /*
662 * The write-buffer offset is not aligned to
663 * @c->max_write_size and @wbuf->size is less than
664 * @c->max_write_size. Write @wbuf->size bytes to make sure the
665 * following writes are done in optimal @c->max_write_size
666 * chunks.
667 */
668 dbg_io("write %d bytes to LEB %d:%d",
669 wbuf->size, wbuf->lnum, wbuf->offs);
670 err = ubi_leb_write(c->ubi, wbuf->lnum, buf, wbuf->offs,
671 wbuf->size, wbuf->dtype);
672 if (err)
673 goto out;
674
675 offs += wbuf->size;
676 len -= wbuf->size;
677 aligned_len -= wbuf->size;
678 written += wbuf->size;
679 }
584 680
585 /* 681 /*
586 * The remaining data may take more whole min. I/O units, so write the 682 * The remaining data may take more whole max. write units, so write the
587 * remains multiple to min. I/O unit size directly to the flash media. 683 * remains multiple to max. write unit size directly to the flash media.
588 * We align node length to 8-byte boundary because we anyway flash wbuf 684 * We align node length to 8-byte boundary because we anyway flash wbuf
589 * if the remaining space is less than 8 bytes. 685 * if the remaining space is less than 8 bytes.
590 */ 686 */
591 n = aligned_len >> c->min_io_shift; 687 n = aligned_len >> c->max_write_shift;
592 if (n) { 688 if (n) {
593 n <<= c->min_io_shift; 689 n <<= c->max_write_shift;
594 dbg_io("write %d bytes to LEB %d:%d", n, wbuf->lnum, offs); 690 dbg_io("write %d bytes to LEB %d:%d", n, wbuf->lnum, offs);
595 err = ubi_leb_write(c->ubi, wbuf->lnum, buf + written, offs, n, 691 err = ubi_leb_write(c->ubi, wbuf->lnum, buf + written, offs, n,
596 wbuf->dtype); 692 wbuf->dtype);
@@ -606,14 +702,18 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len)
606 if (aligned_len) 702 if (aligned_len)
607 /* 703 /*
608 * And now we have what's left and what does not take whole 704 * And now we have what's left and what does not take whole
609 * min. I/O unit, so write it to the write-buffer and we are 705 * max. write unit, so write it to the write-buffer and we are
610 * done. 706 * done.
611 */ 707 */
612 memcpy(wbuf->buf, buf + written, len); 708 memcpy(wbuf->buf, buf + written, len);
613 709
614 wbuf->offs = offs; 710 wbuf->offs = offs;
711 if (c->leb_size - wbuf->offs >= c->max_write_size)
712 wbuf->size = c->max_write_size;
713 else
714 wbuf->size = c->leb_size - wbuf->offs;
715 wbuf->avail = wbuf->size - aligned_len;
615 wbuf->used = aligned_len; 716 wbuf->used = aligned_len;
616 wbuf->avail = c->min_io_size - aligned_len;
617 wbuf->next_ino = 0; 717 wbuf->next_ino = 0;
618 spin_unlock(&wbuf->lock); 718 spin_unlock(&wbuf->lock);
619 719
@@ -837,11 +937,11 @@ int ubifs_wbuf_init(struct ubifs_info *c, struct ubifs_wbuf *wbuf)
837{ 937{
838 size_t size; 938 size_t size;
839 939
840 wbuf->buf = kmalloc(c->min_io_size, GFP_KERNEL); 940 wbuf->buf = kmalloc(c->max_write_size, GFP_KERNEL);
841 if (!wbuf->buf) 941 if (!wbuf->buf)
842 return -ENOMEM; 942 return -ENOMEM;
843 943
844 size = (c->min_io_size / UBIFS_CH_SZ + 1) * sizeof(ino_t); 944 size = (c->max_write_size / UBIFS_CH_SZ + 1) * sizeof(ino_t);
845 wbuf->inodes = kmalloc(size, GFP_KERNEL); 945 wbuf->inodes = kmalloc(size, GFP_KERNEL);
846 if (!wbuf->inodes) { 946 if (!wbuf->inodes) {
847 kfree(wbuf->buf); 947 kfree(wbuf->buf);
@@ -851,7 +951,14 @@ int ubifs_wbuf_init(struct ubifs_info *c, struct ubifs_wbuf *wbuf)
851 951
852 wbuf->used = 0; 952 wbuf->used = 0;
853 wbuf->lnum = wbuf->offs = -1; 953 wbuf->lnum = wbuf->offs = -1;
854 wbuf->avail = c->min_io_size; 954 /*
955 * If the LEB starts at the max. write size aligned address, then
956 * write-buffer size has to be set to @c->max_write_size. Otherwise,
957 * set it to something smaller so that it ends at the closest max.
958 * write size boundary.
959 */
960 size = c->max_write_size - (c->leb_start % c->max_write_size);
961 wbuf->avail = wbuf->size = size;
855 wbuf->dtype = UBI_UNKNOWN; 962 wbuf->dtype = UBI_UNKNOWN;
856 wbuf->sync_callback = NULL; 963 wbuf->sync_callback = NULL;
857 mutex_init(&wbuf->io_mutex); 964 mutex_init(&wbuf->io_mutex);