aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorArtem Bityutskiy <Artem.Bityutskiy@nokia.com>2011-02-06 07:45:26 -0500
committerArtem Bityutskiy <Artem.Bityutskiy@nokia.com>2011-03-08 03:12:49 -0500
commit6c7f74f703cc4baf053270a6e78a32f832f03445 (patch)
tree3674246ea035096e5ab7536e7c6fdad8899c6d3a /fs
parent3c89f396dc78671cfbc1eb20ef1d5be6a9a02780 (diff)
UBIFS: use max_write_size for write-buffers
Switch write-buffers from 'c->min_io_size' to 'c->max_write_size' which presumably has to be more write speed-efficient. However, when write-buffer is synchronized, write only the the min. I/O units which contain the data, do not write whole write-buffer. This is more space-efficient. Additionally, this patch takes into account that the LEB might not start from the max. write unit-aligned address. Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Diffstat (limited to 'fs')
-rw-r--r--fs/ubifs/io.c181
1 files changed, 137 insertions, 44 deletions
diff --git a/fs/ubifs/io.c b/fs/ubifs/io.c
index 7c2a014b59f9..dfd168b7807e 100644
--- a/fs/ubifs/io.c
+++ b/fs/ubifs/io.c
@@ -31,6 +31,26 @@
31 * buffer is full or when it is not used for some time (by timer). This is 31 * buffer is full or when it is not used for some time (by timer). This is
32 * similar to the mechanism is used by JFFS2. 32 * similar to the mechanism is used by JFFS2.
33 * 33 *
34 * UBIFS distinguishes between minimum write size (@c->min_io_size) and maximum
35 * write size (@c->max_write_size). The latter is the maximum amount of bytes
36 * the underlying flash is able to program at a time, and writing in
37 * @c->max_write_size units should presumably be faster. Obviously,
38 * @c->min_io_size <= @c->max_write_size. Write-buffers are of
39 * @c->max_write_size bytes in size for maximum performance. However, when a
40 * write-buffer is flushed, only the portion of it (aligned to @c->min_io_size
41 * boundary) which contains data is written, not the whole write-buffer,
42 * because this is more space-efficient.
43 *
44 * This optimization adds few complications to the code. Indeed, on the one
45 * hand, we want to write in optimal @c->max_write_size bytes chunks, which
46 * also means aligning writes at the @c->max_write_size bytes offsets. On the
47 * other hand, we do not want to waste space when synchronizing the write
48 * buffer, so during synchronization we writes in smaller chunks. And this makes
49 * the next write offset to be not aligned to @c->max_write_size bytes. So the
50 * have to make sure that the write-buffer offset (@wbuf->offs) becomes aligned
51 * to @c->max_write_size bytes again. We do this by temporarily shrinking
52 * write-buffer size (@wbuf->size).
53 *
34 * Write-buffers are defined by 'struct ubifs_wbuf' objects and protected by 54 * Write-buffers are defined by 'struct ubifs_wbuf' objects and protected by
35 * mutexes defined inside these objects. Since sometimes upper-level code 55 * mutexes defined inside these objects. Since sometimes upper-level code
36 * has to lock the write-buffer (e.g. journal space reservation code), many 56 * has to lock the write-buffer (e.g. journal space reservation code), many
@@ -46,8 +66,8 @@
46 * UBIFS uses padding when it pads to the next min. I/O unit. In this case it 66 * UBIFS uses padding when it pads to the next min. I/O unit. In this case it
47 * uses padding nodes or padding bytes, if the padding node does not fit. 67 * uses padding nodes or padding bytes, if the padding node does not fit.
48 * 68 *
49 * All UBIFS nodes are protected by CRC checksums and UBIFS checks all nodes 69 * All UBIFS nodes are protected by CRC checksums and UBIFS checks CRC when
50 * every time they are read from the flash media. 70 * they are read from the flash media.
51 */ 71 */
52 72
53#include <linux/crc32.h> 73#include <linux/crc32.h>
@@ -347,11 +367,17 @@ static void cancel_wbuf_timer_nolock(struct ubifs_wbuf *wbuf)
347 * 367 *
348 * This function synchronizes write-buffer @buf and returns zero in case of 368 * This function synchronizes write-buffer @buf and returns zero in case of
349 * success or a negative error code in case of failure. 369 * success or a negative error code in case of failure.
370 *
371 * Note, although write-buffers are of @c->max_write_size, this function does
372 * not necessarily writes all @c->max_write_size bytes to the flash. Instead,
373 * if the write-buffer is only partially filled with data, only the used part
374 * of the write-buffer (aligned on @c->min_io_size boundary) is synchronized.
375 * This way we waste less space.
350 */ 376 */
351int ubifs_wbuf_sync_nolock(struct ubifs_wbuf *wbuf) 377int ubifs_wbuf_sync_nolock(struct ubifs_wbuf *wbuf)
352{ 378{
353 struct ubifs_info *c = wbuf->c; 379 struct ubifs_info *c = wbuf->c;
354 int err, dirt; 380 int err, dirt, sync_len;
355 381
356 cancel_wbuf_timer_nolock(wbuf); 382 cancel_wbuf_timer_nolock(wbuf);
357 if (!wbuf->used || wbuf->lnum == -1) 383 if (!wbuf->used || wbuf->lnum == -1)
@@ -366,26 +392,48 @@ int ubifs_wbuf_sync_nolock(struct ubifs_wbuf *wbuf)
366 ubifs_assert(wbuf->size <= c->max_write_size); 392 ubifs_assert(wbuf->size <= c->max_write_size);
367 ubifs_assert(wbuf->size % c->min_io_size == 0); 393 ubifs_assert(wbuf->size % c->min_io_size == 0);
368 ubifs_assert(!c->ro_media && !c->ro_mount); 394 ubifs_assert(!c->ro_media && !c->ro_mount);
395 if (c->leb_size - wbuf->offs >= c->max_write_size)
396 ubifs_assert(!((wbuf->offs + wbuf->size) % c->max_write_size ));
369 397
370 if (c->ro_error) 398 if (c->ro_error)
371 return -EROFS; 399 return -EROFS;
372 400
373 ubifs_pad(c, wbuf->buf + wbuf->used, wbuf->avail); 401 /*
402 * Do not write whole write buffer but write only the minimum necessary
403 * amount of min. I/O units.
404 */
405 sync_len = ALIGN(wbuf->used, c->min_io_size);
406 dirt = sync_len - wbuf->used;
407 if (dirt)
408 ubifs_pad(c, wbuf->buf + wbuf->used, dirt);
374 err = ubi_leb_write(c->ubi, wbuf->lnum, wbuf->buf, wbuf->offs, 409 err = ubi_leb_write(c->ubi, wbuf->lnum, wbuf->buf, wbuf->offs,
375 wbuf->size, wbuf->dtype); 410 sync_len, wbuf->dtype);
376 if (err) { 411 if (err) {
377 ubifs_err("cannot write %d bytes to LEB %d:%d", 412 ubifs_err("cannot write %d bytes to LEB %d:%d",
378 wbuf->size, wbuf->lnum, wbuf->offs); 413 sync_len, wbuf->lnum, wbuf->offs);
379 dbg_dump_stack(); 414 dbg_dump_stack();
380 return err; 415 return err;
381 } 416 }
382 417
383 dirt = wbuf->avail;
384
385 spin_lock(&wbuf->lock); 418 spin_lock(&wbuf->lock);
386 wbuf->offs += wbuf->size; 419 wbuf->offs += sync_len;
387 wbuf->avail = c->min_io_size; 420 /*
388 wbuf->size = c->min_io_size; 421 * Now @wbuf->offs is not necessarily aligned to @c->max_write_size.
422 * But our goal is to optimize writes and make sure we write in
423 * @c->max_write_size chunks and to @c->max_write_size-aligned offset.
424 * Thus, if @wbuf->offs is not aligned to @c->max_write_size now, make
425 * sure that @wbuf->offs + @wbuf->size is aligned to
426 * @c->max_write_size. This way we make sure that after next
427 * write-buffer flush we are again at the optimal offset (aligned to
428 * @c->max_write_size).
429 */
430 if (c->leb_size - wbuf->offs < c->max_write_size)
431 wbuf->size = c->leb_size - wbuf->offs;
432 else if (wbuf->offs & (c->max_write_size - 1))
433 wbuf->size = ALIGN(wbuf->offs, c->max_write_size) - wbuf->offs;
434 else
435 wbuf->size = c->max_write_size;
436 wbuf->avail = wbuf->size;
389 wbuf->used = 0; 437 wbuf->used = 0;
390 wbuf->next_ino = 0; 438 wbuf->next_ino = 0;
391 spin_unlock(&wbuf->lock); 439 spin_unlock(&wbuf->lock);
@@ -428,8 +476,13 @@ int ubifs_wbuf_seek_nolock(struct ubifs_wbuf *wbuf, int lnum, int offs,
428 spin_lock(&wbuf->lock); 476 spin_lock(&wbuf->lock);
429 wbuf->lnum = lnum; 477 wbuf->lnum = lnum;
430 wbuf->offs = offs; 478 wbuf->offs = offs;
431 wbuf->avail = c->min_io_size; 479 if (c->leb_size - wbuf->offs < c->max_write_size)
432 wbuf->size = c->min_io_size; 480 wbuf->size = c->leb_size - wbuf->offs;
481 else if (wbuf->offs & (c->max_write_size - 1))
482 wbuf->size = ALIGN(wbuf->offs, c->max_write_size) - wbuf->offs;
483 else
484 wbuf->size = c->max_write_size;
485 wbuf->avail = wbuf->size;
433 wbuf->used = 0; 486 wbuf->used = 0;
434 spin_unlock(&wbuf->lock); 487 spin_unlock(&wbuf->lock);
435 wbuf->dtype = dtype; 488 wbuf->dtype = dtype;
@@ -509,8 +562,9 @@ out_timers:
509 * 562 *
510 * This function writes data to flash via write-buffer @wbuf. This means that 563 * This function writes data to flash via write-buffer @wbuf. This means that
511 * the last piece of the node won't reach the flash media immediately if it 564 * the last piece of the node won't reach the flash media immediately if it
512 * does not take whole minimal I/O unit. Instead, the node will sit in RAM 565 * does not take whole max. write unit (@c->max_write_size). Instead, the node
513 * until the write-buffer is synchronized (e.g., by timer). 566 * will sit in RAM until the write-buffer is synchronized (e.g., by timer, or
567 * because more data are appended to the write-buffer).
514 * 568 *
515 * This function returns zero in case of success and a negative error code in 569 * This function returns zero in case of success and a negative error code in
516 * case of failure. If the node cannot be written because there is no more 570 * case of failure. If the node cannot be written because there is no more
@@ -533,6 +587,8 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len)
533 ubifs_assert(wbuf->size % c->min_io_size == 0); 587 ubifs_assert(wbuf->size % c->min_io_size == 0);
534 ubifs_assert(mutex_is_locked(&wbuf->io_mutex)); 588 ubifs_assert(mutex_is_locked(&wbuf->io_mutex));
535 ubifs_assert(!c->ro_media && !c->ro_mount); 589 ubifs_assert(!c->ro_media && !c->ro_mount);
590 if (c->leb_size - wbuf->offs >= c->max_write_size)
591 ubifs_assert(!((wbuf->offs + wbuf->size) % c->max_write_size ));
536 592
537 if (c->leb_size - wbuf->offs - wbuf->used < aligned_len) { 593 if (c->leb_size - wbuf->offs - wbuf->used < aligned_len) {
538 err = -ENOSPC; 594 err = -ENOSPC;
@@ -561,9 +617,12 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len)
561 goto out; 617 goto out;
562 618
563 spin_lock(&wbuf->lock); 619 spin_lock(&wbuf->lock);
564 wbuf->offs += c->min_io_size; 620 wbuf->offs += wbuf->size;
565 wbuf->avail = c->min_io_size; 621 if (c->leb_size - wbuf->offs >= c->max_write_size)
566 wbuf->size = c->min_io_size; 622 wbuf->size = c->max_write_size;
623 else
624 wbuf->size = c->leb_size - wbuf->offs;
625 wbuf->avail = wbuf->size;
567 wbuf->used = 0; 626 wbuf->used = 0;
568 wbuf->next_ino = 0; 627 wbuf->next_ino = 0;
569 spin_unlock(&wbuf->lock); 628 spin_unlock(&wbuf->lock);
@@ -577,33 +636,57 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len)
577 goto exit; 636 goto exit;
578 } 637 }
579 638
580 /* 639 offs = wbuf->offs;
581 * The node is large enough and does not fit entirely within current 640 written = 0;
582 * minimal I/O unit. We have to fill and flush write-buffer and switch
583 * to the next min. I/O unit.
584 */
585 dbg_io("flush jhead %s wbuf to LEB %d:%d",
586 dbg_jhead(wbuf->jhead), wbuf->lnum, wbuf->offs);
587 memcpy(wbuf->buf + wbuf->used, buf, wbuf->avail);
588 err = ubi_leb_write(c->ubi, wbuf->lnum, wbuf->buf, wbuf->offs,
589 wbuf->size, wbuf->dtype);
590 if (err)
591 goto out;
592 641
593 offs = wbuf->offs + wbuf->size; 642 if (wbuf->used) {
594 len -= wbuf->avail; 643 /*
595 aligned_len -= wbuf->avail; 644 * The node is large enough and does not fit entirely within
596 written = wbuf->avail; 645 * current available space. We have to fill and flush
646 * write-buffer and switch to the next max. write unit.
647 */
648 dbg_io("flush jhead %s wbuf to LEB %d:%d",
649 dbg_jhead(wbuf->jhead), wbuf->lnum, wbuf->offs);
650 memcpy(wbuf->buf + wbuf->used, buf, wbuf->avail);
651 err = ubi_leb_write(c->ubi, wbuf->lnum, wbuf->buf, wbuf->offs,
652 wbuf->size, wbuf->dtype);
653 if (err)
654 goto out;
655
656 offs += wbuf->size;
657 len -= wbuf->avail;
658 aligned_len -= wbuf->avail;
659 written += wbuf->avail;
660 } else if (wbuf->offs & (c->max_write_size - 1)) {
661 /*
662 * The write-buffer offset is not aligned to
663 * @c->max_write_size and @wbuf->size is less than
664 * @c->max_write_size. Write @wbuf->size bytes to make sure the
665 * following writes are done in optimal @c->max_write_size
666 * chunks.
667 */
668 dbg_io("write %d bytes to LEB %d:%d",
669 wbuf->size, wbuf->lnum, wbuf->offs);
670 err = ubi_leb_write(c->ubi, wbuf->lnum, buf, wbuf->offs,
671 wbuf->size, wbuf->dtype);
672 if (err)
673 goto out;
674
675 offs += wbuf->size;
676 len -= wbuf->size;
677 aligned_len -= wbuf->size;
678 written += wbuf->size;
679 }
597 680
598 /* 681 /*
599 * The remaining data may take more whole min. I/O units, so write the 682 * The remaining data may take more whole max. write units, so write the
600 * remains multiple to min. I/O unit size directly to the flash media. 683 * remains multiple to max. write unit size directly to the flash media.
601 * We align node length to 8-byte boundary because we anyway flash wbuf 684 * We align node length to 8-byte boundary because we anyway flash wbuf
602 * if the remaining space is less than 8 bytes. 685 * if the remaining space is less than 8 bytes.
603 */ 686 */
604 n = aligned_len >> c->min_io_shift; 687 n = aligned_len >> c->max_write_shift;
605 if (n) { 688 if (n) {
606 n <<= c->min_io_shift; 689 n <<= c->max_write_shift;
607 dbg_io("write %d bytes to LEB %d:%d", n, wbuf->lnum, offs); 690 dbg_io("write %d bytes to LEB %d:%d", n, wbuf->lnum, offs);
608 err = ubi_leb_write(c->ubi, wbuf->lnum, buf + written, offs, n, 691 err = ubi_leb_write(c->ubi, wbuf->lnum, buf + written, offs, n,
609 wbuf->dtype); 692 wbuf->dtype);
@@ -619,15 +702,18 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len)
619 if (aligned_len) 702 if (aligned_len)
620 /* 703 /*
621 * And now we have what's left and what does not take whole 704 * And now we have what's left and what does not take whole
622 * min. I/O unit, so write it to the write-buffer and we are 705 * max. write unit, so write it to the write-buffer and we are
623 * done. 706 * done.
624 */ 707 */
625 memcpy(wbuf->buf, buf + written, len); 708 memcpy(wbuf->buf, buf + written, len);
626 709
627 wbuf->offs = offs; 710 wbuf->offs = offs;
711 if (c->leb_size - wbuf->offs >= c->max_write_size)
712 wbuf->size = c->max_write_size;
713 else
714 wbuf->size = c->leb_size - wbuf->offs;
715 wbuf->avail = wbuf->size - aligned_len;
628 wbuf->used = aligned_len; 716 wbuf->used = aligned_len;
629 wbuf->avail = c->min_io_size - aligned_len;
630 wbuf->size = c->min_io_size;
631 wbuf->next_ino = 0; 717 wbuf->next_ino = 0;
632 spin_unlock(&wbuf->lock); 718 spin_unlock(&wbuf->lock);
633 719
@@ -851,11 +937,11 @@ int ubifs_wbuf_init(struct ubifs_info *c, struct ubifs_wbuf *wbuf)
851{ 937{
852 size_t size; 938 size_t size;
853 939
854 wbuf->buf = kmalloc(c->min_io_size, GFP_KERNEL); 940 wbuf->buf = kmalloc(c->max_write_size, GFP_KERNEL);
855 if (!wbuf->buf) 941 if (!wbuf->buf)
856 return -ENOMEM; 942 return -ENOMEM;
857 943
858 size = (c->min_io_size / UBIFS_CH_SZ + 1) * sizeof(ino_t); 944 size = (c->max_write_size / UBIFS_CH_SZ + 1) * sizeof(ino_t);
859 wbuf->inodes = kmalloc(size, GFP_KERNEL); 945 wbuf->inodes = kmalloc(size, GFP_KERNEL);
860 if (!wbuf->inodes) { 946 if (!wbuf->inodes) {
861 kfree(wbuf->buf); 947 kfree(wbuf->buf);
@@ -865,7 +951,14 @@ int ubifs_wbuf_init(struct ubifs_info *c, struct ubifs_wbuf *wbuf)
865 951
866 wbuf->used = 0; 952 wbuf->used = 0;
867 wbuf->lnum = wbuf->offs = -1; 953 wbuf->lnum = wbuf->offs = -1;
868 wbuf->avail = wbuf->size = c->min_io_size; 954 /*
955 * If the LEB starts at the max. write size aligned address, then
956 * write-buffer size has to be set to @c->max_write_size. Otherwise,
957 * set it to something smaller so that it ends at the closest max.
958 * write size boundary.
959 */
960 size = c->max_write_size - (c->leb_start % c->max_write_size);
961 wbuf->avail = wbuf->size = size;
869 wbuf->dtype = UBI_UNKNOWN; 962 wbuf->dtype = UBI_UNKNOWN;
870 wbuf->sync_callback = NULL; 963 wbuf->sync_callback = NULL;
871 mutex_init(&wbuf->io_mutex); 964 mutex_init(&wbuf->io_mutex);