diff options
author | Artem Bityutskiy <Artem.Bityutskiy@nokia.com> | 2011-02-06 07:45:26 -0500 |
---|---|---|
committer | Artem Bityutskiy <Artem.Bityutskiy@nokia.com> | 2011-03-08 03:12:49 -0500 |
commit | 6c7f74f703cc4baf053270a6e78a32f832f03445 (patch) | |
tree | 3674246ea035096e5ab7536e7c6fdad8899c6d3a /fs/ubifs | |
parent | 3c89f396dc78671cfbc1eb20ef1d5be6a9a02780 (diff) |
UBIFS: use max_write_size for write-buffers
Switch write-buffers from 'c->min_io_size' to 'c->max_write_size' which
presumably has to be more write speed-efficient. However, when write-buffer
is synchronized, write only the the min. I/O units which contain the
data, do not write whole write-buffer. This is more space-efficient.
Additionally, this patch takes into account that the LEB might not start
from the max. write unit-aligned address.
Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Diffstat (limited to 'fs/ubifs')
-rw-r--r-- | fs/ubifs/io.c | 181 |
1 files changed, 137 insertions, 44 deletions
diff --git a/fs/ubifs/io.c b/fs/ubifs/io.c index 7c2a014b59f9..dfd168b7807e 100644 --- a/fs/ubifs/io.c +++ b/fs/ubifs/io.c | |||
@@ -31,6 +31,26 @@ | |||
31 | * buffer is full or when it is not used for some time (by timer). This is | 31 | * buffer is full or when it is not used for some time (by timer). This is |
32 | * similar to the mechanism is used by JFFS2. | 32 | * similar to the mechanism is used by JFFS2. |
33 | * | 33 | * |
34 | * UBIFS distinguishes between minimum write size (@c->min_io_size) and maximum | ||
35 | * write size (@c->max_write_size). The latter is the maximum amount of bytes | ||
36 | * the underlying flash is able to program at a time, and writing in | ||
37 | * @c->max_write_size units should presumably be faster. Obviously, | ||
38 | * @c->min_io_size <= @c->max_write_size. Write-buffers are of | ||
39 | * @c->max_write_size bytes in size for maximum performance. However, when a | ||
40 | * write-buffer is flushed, only the portion of it (aligned to @c->min_io_size | ||
41 | * boundary) which contains data is written, not the whole write-buffer, | ||
42 | * because this is more space-efficient. | ||
43 | * | ||
44 | * This optimization adds few complications to the code. Indeed, on the one | ||
45 | * hand, we want to write in optimal @c->max_write_size bytes chunks, which | ||
46 | * also means aligning writes at the @c->max_write_size bytes offsets. On the | ||
47 | * other hand, we do not want to waste space when synchronizing the write | ||
48 | * buffer, so during synchronization we writes in smaller chunks. And this makes | ||
49 | * the next write offset to be not aligned to @c->max_write_size bytes. So the | ||
50 | * have to make sure that the write-buffer offset (@wbuf->offs) becomes aligned | ||
51 | * to @c->max_write_size bytes again. We do this by temporarily shrinking | ||
52 | * write-buffer size (@wbuf->size). | ||
53 | * | ||
34 | * Write-buffers are defined by 'struct ubifs_wbuf' objects and protected by | 54 | * Write-buffers are defined by 'struct ubifs_wbuf' objects and protected by |
35 | * mutexes defined inside these objects. Since sometimes upper-level code | 55 | * mutexes defined inside these objects. Since sometimes upper-level code |
36 | * has to lock the write-buffer (e.g. journal space reservation code), many | 56 | * has to lock the write-buffer (e.g. journal space reservation code), many |
@@ -46,8 +66,8 @@ | |||
46 | * UBIFS uses padding when it pads to the next min. I/O unit. In this case it | 66 | * UBIFS uses padding when it pads to the next min. I/O unit. In this case it |
47 | * uses padding nodes or padding bytes, if the padding node does not fit. | 67 | * uses padding nodes or padding bytes, if the padding node does not fit. |
48 | * | 68 | * |
49 | * All UBIFS nodes are protected by CRC checksums and UBIFS checks all nodes | 69 | * All UBIFS nodes are protected by CRC checksums and UBIFS checks CRC when |
50 | * every time they are read from the flash media. | 70 | * they are read from the flash media. |
51 | */ | 71 | */ |
52 | 72 | ||
53 | #include <linux/crc32.h> | 73 | #include <linux/crc32.h> |
@@ -347,11 +367,17 @@ static void cancel_wbuf_timer_nolock(struct ubifs_wbuf *wbuf) | |||
347 | * | 367 | * |
348 | * This function synchronizes write-buffer @buf and returns zero in case of | 368 | * This function synchronizes write-buffer @buf and returns zero in case of |
349 | * success or a negative error code in case of failure. | 369 | * success or a negative error code in case of failure. |
370 | * | ||
371 | * Note, although write-buffers are of @c->max_write_size, this function does | ||
372 | * not necessarily writes all @c->max_write_size bytes to the flash. Instead, | ||
373 | * if the write-buffer is only partially filled with data, only the used part | ||
374 | * of the write-buffer (aligned on @c->min_io_size boundary) is synchronized. | ||
375 | * This way we waste less space. | ||
350 | */ | 376 | */ |
351 | int ubifs_wbuf_sync_nolock(struct ubifs_wbuf *wbuf) | 377 | int ubifs_wbuf_sync_nolock(struct ubifs_wbuf *wbuf) |
352 | { | 378 | { |
353 | struct ubifs_info *c = wbuf->c; | 379 | struct ubifs_info *c = wbuf->c; |
354 | int err, dirt; | 380 | int err, dirt, sync_len; |
355 | 381 | ||
356 | cancel_wbuf_timer_nolock(wbuf); | 382 | cancel_wbuf_timer_nolock(wbuf); |
357 | if (!wbuf->used || wbuf->lnum == -1) | 383 | if (!wbuf->used || wbuf->lnum == -1) |
@@ -366,26 +392,48 @@ int ubifs_wbuf_sync_nolock(struct ubifs_wbuf *wbuf) | |||
366 | ubifs_assert(wbuf->size <= c->max_write_size); | 392 | ubifs_assert(wbuf->size <= c->max_write_size); |
367 | ubifs_assert(wbuf->size % c->min_io_size == 0); | 393 | ubifs_assert(wbuf->size % c->min_io_size == 0); |
368 | ubifs_assert(!c->ro_media && !c->ro_mount); | 394 | ubifs_assert(!c->ro_media && !c->ro_mount); |
395 | if (c->leb_size - wbuf->offs >= c->max_write_size) | ||
396 | ubifs_assert(!((wbuf->offs + wbuf->size) % c->max_write_size )); | ||
369 | 397 | ||
370 | if (c->ro_error) | 398 | if (c->ro_error) |
371 | return -EROFS; | 399 | return -EROFS; |
372 | 400 | ||
373 | ubifs_pad(c, wbuf->buf + wbuf->used, wbuf->avail); | 401 | /* |
402 | * Do not write whole write buffer but write only the minimum necessary | ||
403 | * amount of min. I/O units. | ||
404 | */ | ||
405 | sync_len = ALIGN(wbuf->used, c->min_io_size); | ||
406 | dirt = sync_len - wbuf->used; | ||
407 | if (dirt) | ||
408 | ubifs_pad(c, wbuf->buf + wbuf->used, dirt); | ||
374 | err = ubi_leb_write(c->ubi, wbuf->lnum, wbuf->buf, wbuf->offs, | 409 | err = ubi_leb_write(c->ubi, wbuf->lnum, wbuf->buf, wbuf->offs, |
375 | wbuf->size, wbuf->dtype); | 410 | sync_len, wbuf->dtype); |
376 | if (err) { | 411 | if (err) { |
377 | ubifs_err("cannot write %d bytes to LEB %d:%d", | 412 | ubifs_err("cannot write %d bytes to LEB %d:%d", |
378 | wbuf->size, wbuf->lnum, wbuf->offs); | 413 | sync_len, wbuf->lnum, wbuf->offs); |
379 | dbg_dump_stack(); | 414 | dbg_dump_stack(); |
380 | return err; | 415 | return err; |
381 | } | 416 | } |
382 | 417 | ||
383 | dirt = wbuf->avail; | ||
384 | |||
385 | spin_lock(&wbuf->lock); | 418 | spin_lock(&wbuf->lock); |
386 | wbuf->offs += wbuf->size; | 419 | wbuf->offs += sync_len; |
387 | wbuf->avail = c->min_io_size; | 420 | /* |
388 | wbuf->size = c->min_io_size; | 421 | * Now @wbuf->offs is not necessarily aligned to @c->max_write_size. |
422 | * But our goal is to optimize writes and make sure we write in | ||
423 | * @c->max_write_size chunks and to @c->max_write_size-aligned offset. | ||
424 | * Thus, if @wbuf->offs is not aligned to @c->max_write_size now, make | ||
425 | * sure that @wbuf->offs + @wbuf->size is aligned to | ||
426 | * @c->max_write_size. This way we make sure that after next | ||
427 | * write-buffer flush we are again at the optimal offset (aligned to | ||
428 | * @c->max_write_size). | ||
429 | */ | ||
430 | if (c->leb_size - wbuf->offs < c->max_write_size) | ||
431 | wbuf->size = c->leb_size - wbuf->offs; | ||
432 | else if (wbuf->offs & (c->max_write_size - 1)) | ||
433 | wbuf->size = ALIGN(wbuf->offs, c->max_write_size) - wbuf->offs; | ||
434 | else | ||
435 | wbuf->size = c->max_write_size; | ||
436 | wbuf->avail = wbuf->size; | ||
389 | wbuf->used = 0; | 437 | wbuf->used = 0; |
390 | wbuf->next_ino = 0; | 438 | wbuf->next_ino = 0; |
391 | spin_unlock(&wbuf->lock); | 439 | spin_unlock(&wbuf->lock); |
@@ -428,8 +476,13 @@ int ubifs_wbuf_seek_nolock(struct ubifs_wbuf *wbuf, int lnum, int offs, | |||
428 | spin_lock(&wbuf->lock); | 476 | spin_lock(&wbuf->lock); |
429 | wbuf->lnum = lnum; | 477 | wbuf->lnum = lnum; |
430 | wbuf->offs = offs; | 478 | wbuf->offs = offs; |
431 | wbuf->avail = c->min_io_size; | 479 | if (c->leb_size - wbuf->offs < c->max_write_size) |
432 | wbuf->size = c->min_io_size; | 480 | wbuf->size = c->leb_size - wbuf->offs; |
481 | else if (wbuf->offs & (c->max_write_size - 1)) | ||
482 | wbuf->size = ALIGN(wbuf->offs, c->max_write_size) - wbuf->offs; | ||
483 | else | ||
484 | wbuf->size = c->max_write_size; | ||
485 | wbuf->avail = wbuf->size; | ||
433 | wbuf->used = 0; | 486 | wbuf->used = 0; |
434 | spin_unlock(&wbuf->lock); | 487 | spin_unlock(&wbuf->lock); |
435 | wbuf->dtype = dtype; | 488 | wbuf->dtype = dtype; |
@@ -509,8 +562,9 @@ out_timers: | |||
509 | * | 562 | * |
510 | * This function writes data to flash via write-buffer @wbuf. This means that | 563 | * This function writes data to flash via write-buffer @wbuf. This means that |
511 | * the last piece of the node won't reach the flash media immediately if it | 564 | * the last piece of the node won't reach the flash media immediately if it |
512 | * does not take whole minimal I/O unit. Instead, the node will sit in RAM | 565 | * does not take whole max. write unit (@c->max_write_size). Instead, the node |
513 | * until the write-buffer is synchronized (e.g., by timer). | 566 | * will sit in RAM until the write-buffer is synchronized (e.g., by timer, or |
567 | * because more data are appended to the write-buffer). | ||
514 | * | 568 | * |
515 | * This function returns zero in case of success and a negative error code in | 569 | * This function returns zero in case of success and a negative error code in |
516 | * case of failure. If the node cannot be written because there is no more | 570 | * case of failure. If the node cannot be written because there is no more |
@@ -533,6 +587,8 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len) | |||
533 | ubifs_assert(wbuf->size % c->min_io_size == 0); | 587 | ubifs_assert(wbuf->size % c->min_io_size == 0); |
534 | ubifs_assert(mutex_is_locked(&wbuf->io_mutex)); | 588 | ubifs_assert(mutex_is_locked(&wbuf->io_mutex)); |
535 | ubifs_assert(!c->ro_media && !c->ro_mount); | 589 | ubifs_assert(!c->ro_media && !c->ro_mount); |
590 | if (c->leb_size - wbuf->offs >= c->max_write_size) | ||
591 | ubifs_assert(!((wbuf->offs + wbuf->size) % c->max_write_size )); | ||
536 | 592 | ||
537 | if (c->leb_size - wbuf->offs - wbuf->used < aligned_len) { | 593 | if (c->leb_size - wbuf->offs - wbuf->used < aligned_len) { |
538 | err = -ENOSPC; | 594 | err = -ENOSPC; |
@@ -561,9 +617,12 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len) | |||
561 | goto out; | 617 | goto out; |
562 | 618 | ||
563 | spin_lock(&wbuf->lock); | 619 | spin_lock(&wbuf->lock); |
564 | wbuf->offs += c->min_io_size; | 620 | wbuf->offs += wbuf->size; |
565 | wbuf->avail = c->min_io_size; | 621 | if (c->leb_size - wbuf->offs >= c->max_write_size) |
566 | wbuf->size = c->min_io_size; | 622 | wbuf->size = c->max_write_size; |
623 | else | ||
624 | wbuf->size = c->leb_size - wbuf->offs; | ||
625 | wbuf->avail = wbuf->size; | ||
567 | wbuf->used = 0; | 626 | wbuf->used = 0; |
568 | wbuf->next_ino = 0; | 627 | wbuf->next_ino = 0; |
569 | spin_unlock(&wbuf->lock); | 628 | spin_unlock(&wbuf->lock); |
@@ -577,33 +636,57 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len) | |||
577 | goto exit; | 636 | goto exit; |
578 | } | 637 | } |
579 | 638 | ||
580 | /* | 639 | offs = wbuf->offs; |
581 | * The node is large enough and does not fit entirely within current | 640 | written = 0; |
582 | * minimal I/O unit. We have to fill and flush write-buffer and switch | ||
583 | * to the next min. I/O unit. | ||
584 | */ | ||
585 | dbg_io("flush jhead %s wbuf to LEB %d:%d", | ||
586 | dbg_jhead(wbuf->jhead), wbuf->lnum, wbuf->offs); | ||
587 | memcpy(wbuf->buf + wbuf->used, buf, wbuf->avail); | ||
588 | err = ubi_leb_write(c->ubi, wbuf->lnum, wbuf->buf, wbuf->offs, | ||
589 | wbuf->size, wbuf->dtype); | ||
590 | if (err) | ||
591 | goto out; | ||
592 | 641 | ||
593 | offs = wbuf->offs + wbuf->size; | 642 | if (wbuf->used) { |
594 | len -= wbuf->avail; | 643 | /* |
595 | aligned_len -= wbuf->avail; | 644 | * The node is large enough and does not fit entirely within |
596 | written = wbuf->avail; | 645 | * current available space. We have to fill and flush |
646 | * write-buffer and switch to the next max. write unit. | ||
647 | */ | ||
648 | dbg_io("flush jhead %s wbuf to LEB %d:%d", | ||
649 | dbg_jhead(wbuf->jhead), wbuf->lnum, wbuf->offs); | ||
650 | memcpy(wbuf->buf + wbuf->used, buf, wbuf->avail); | ||
651 | err = ubi_leb_write(c->ubi, wbuf->lnum, wbuf->buf, wbuf->offs, | ||
652 | wbuf->size, wbuf->dtype); | ||
653 | if (err) | ||
654 | goto out; | ||
655 | |||
656 | offs += wbuf->size; | ||
657 | len -= wbuf->avail; | ||
658 | aligned_len -= wbuf->avail; | ||
659 | written += wbuf->avail; | ||
660 | } else if (wbuf->offs & (c->max_write_size - 1)) { | ||
661 | /* | ||
662 | * The write-buffer offset is not aligned to | ||
663 | * @c->max_write_size and @wbuf->size is less than | ||
664 | * @c->max_write_size. Write @wbuf->size bytes to make sure the | ||
665 | * following writes are done in optimal @c->max_write_size | ||
666 | * chunks. | ||
667 | */ | ||
668 | dbg_io("write %d bytes to LEB %d:%d", | ||
669 | wbuf->size, wbuf->lnum, wbuf->offs); | ||
670 | err = ubi_leb_write(c->ubi, wbuf->lnum, buf, wbuf->offs, | ||
671 | wbuf->size, wbuf->dtype); | ||
672 | if (err) | ||
673 | goto out; | ||
674 | |||
675 | offs += wbuf->size; | ||
676 | len -= wbuf->size; | ||
677 | aligned_len -= wbuf->size; | ||
678 | written += wbuf->size; | ||
679 | } | ||
597 | 680 | ||
598 | /* | 681 | /* |
599 | * The remaining data may take more whole min. I/O units, so write the | 682 | * The remaining data may take more whole max. write units, so write the |
600 | * remains multiple to min. I/O unit size directly to the flash media. | 683 | * remains multiple to max. write unit size directly to the flash media. |
601 | * We align node length to 8-byte boundary because we anyway flash wbuf | 684 | * We align node length to 8-byte boundary because we anyway flash wbuf |
602 | * if the remaining space is less than 8 bytes. | 685 | * if the remaining space is less than 8 bytes. |
603 | */ | 686 | */ |
604 | n = aligned_len >> c->min_io_shift; | 687 | n = aligned_len >> c->max_write_shift; |
605 | if (n) { | 688 | if (n) { |
606 | n <<= c->min_io_shift; | 689 | n <<= c->max_write_shift; |
607 | dbg_io("write %d bytes to LEB %d:%d", n, wbuf->lnum, offs); | 690 | dbg_io("write %d bytes to LEB %d:%d", n, wbuf->lnum, offs); |
608 | err = ubi_leb_write(c->ubi, wbuf->lnum, buf + written, offs, n, | 691 | err = ubi_leb_write(c->ubi, wbuf->lnum, buf + written, offs, n, |
609 | wbuf->dtype); | 692 | wbuf->dtype); |
@@ -619,15 +702,18 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len) | |||
619 | if (aligned_len) | 702 | if (aligned_len) |
620 | /* | 703 | /* |
621 | * And now we have what's left and what does not take whole | 704 | * And now we have what's left and what does not take whole |
622 | * min. I/O unit, so write it to the write-buffer and we are | 705 | * max. write unit, so write it to the write-buffer and we are |
623 | * done. | 706 | * done. |
624 | */ | 707 | */ |
625 | memcpy(wbuf->buf, buf + written, len); | 708 | memcpy(wbuf->buf, buf + written, len); |
626 | 709 | ||
627 | wbuf->offs = offs; | 710 | wbuf->offs = offs; |
711 | if (c->leb_size - wbuf->offs >= c->max_write_size) | ||
712 | wbuf->size = c->max_write_size; | ||
713 | else | ||
714 | wbuf->size = c->leb_size - wbuf->offs; | ||
715 | wbuf->avail = wbuf->size - aligned_len; | ||
628 | wbuf->used = aligned_len; | 716 | wbuf->used = aligned_len; |
629 | wbuf->avail = c->min_io_size - aligned_len; | ||
630 | wbuf->size = c->min_io_size; | ||
631 | wbuf->next_ino = 0; | 717 | wbuf->next_ino = 0; |
632 | spin_unlock(&wbuf->lock); | 718 | spin_unlock(&wbuf->lock); |
633 | 719 | ||
@@ -851,11 +937,11 @@ int ubifs_wbuf_init(struct ubifs_info *c, struct ubifs_wbuf *wbuf) | |||
851 | { | 937 | { |
852 | size_t size; | 938 | size_t size; |
853 | 939 | ||
854 | wbuf->buf = kmalloc(c->min_io_size, GFP_KERNEL); | 940 | wbuf->buf = kmalloc(c->max_write_size, GFP_KERNEL); |
855 | if (!wbuf->buf) | 941 | if (!wbuf->buf) |
856 | return -ENOMEM; | 942 | return -ENOMEM; |
857 | 943 | ||
858 | size = (c->min_io_size / UBIFS_CH_SZ + 1) * sizeof(ino_t); | 944 | size = (c->max_write_size / UBIFS_CH_SZ + 1) * sizeof(ino_t); |
859 | wbuf->inodes = kmalloc(size, GFP_KERNEL); | 945 | wbuf->inodes = kmalloc(size, GFP_KERNEL); |
860 | if (!wbuf->inodes) { | 946 | if (!wbuf->inodes) { |
861 | kfree(wbuf->buf); | 947 | kfree(wbuf->buf); |
@@ -865,7 +951,14 @@ int ubifs_wbuf_init(struct ubifs_info *c, struct ubifs_wbuf *wbuf) | |||
865 | 951 | ||
866 | wbuf->used = 0; | 952 | wbuf->used = 0; |
867 | wbuf->lnum = wbuf->offs = -1; | 953 | wbuf->lnum = wbuf->offs = -1; |
868 | wbuf->avail = wbuf->size = c->min_io_size; | 954 | /* |
955 | * If the LEB starts at the max. write size aligned address, then | ||
956 | * write-buffer size has to be set to @c->max_write_size. Otherwise, | ||
957 | * set it to something smaller so that it ends at the closest max. | ||
958 | * write size boundary. | ||
959 | */ | ||
960 | size = c->max_write_size - (c->leb_start % c->max_write_size); | ||
961 | wbuf->avail = wbuf->size = size; | ||
869 | wbuf->dtype = UBI_UNKNOWN; | 962 | wbuf->dtype = UBI_UNKNOWN; |
870 | wbuf->sync_callback = NULL; | 963 | wbuf->sync_callback = NULL; |
871 | mutex_init(&wbuf->io_mutex); | 964 | mutex_init(&wbuf->io_mutex); |