diff options
| -rw-r--r-- | fs/ubifs/io.c | 181 |
1 files changed, 137 insertions, 44 deletions
diff --git a/fs/ubifs/io.c b/fs/ubifs/io.c index 7c2a014b59f9..dfd168b7807e 100644 --- a/fs/ubifs/io.c +++ b/fs/ubifs/io.c | |||
| @@ -31,6 +31,26 @@ | |||
| 31 | * buffer is full or when it is not used for some time (by timer). This is | 31 | * buffer is full or when it is not used for some time (by timer). This is |
| 32 | * similar to the mechanism is used by JFFS2. | 32 | * similar to the mechanism is used by JFFS2. |
| 33 | * | 33 | * |
| 34 | * UBIFS distinguishes between minimum write size (@c->min_io_size) and maximum | ||
| 35 | * write size (@c->max_write_size). The latter is the maximum amount of bytes | ||
| 36 | * the underlying flash is able to program at a time, and writing in | ||
| 37 | * @c->max_write_size units should presumably be faster. Obviously, | ||
| 38 | * @c->min_io_size <= @c->max_write_size. Write-buffers are of | ||
| 39 | * @c->max_write_size bytes in size for maximum performance. However, when a | ||
| 40 | * write-buffer is flushed, only the portion of it (aligned to @c->min_io_size | ||
| 41 | * boundary) which contains data is written, not the whole write-buffer, | ||
| 42 | * because this is more space-efficient. | ||
| 43 | * | ||
| 44 | * This optimization adds few complications to the code. Indeed, on the one | ||
| 45 | * hand, we want to write in optimal @c->max_write_size bytes chunks, which | ||
| 46 | * also means aligning writes at the @c->max_write_size bytes offsets. On the | ||
| 47 | * other hand, we do not want to waste space when synchronizing the write | ||
| 48 | * buffer, so during synchronization we writes in smaller chunks. And this makes | ||
| 49 | * the next write offset to be not aligned to @c->max_write_size bytes. So the | ||
| 50 | * have to make sure that the write-buffer offset (@wbuf->offs) becomes aligned | ||
| 51 | * to @c->max_write_size bytes again. We do this by temporarily shrinking | ||
| 52 | * write-buffer size (@wbuf->size). | ||
| 53 | * | ||
| 34 | * Write-buffers are defined by 'struct ubifs_wbuf' objects and protected by | 54 | * Write-buffers are defined by 'struct ubifs_wbuf' objects and protected by |
| 35 | * mutexes defined inside these objects. Since sometimes upper-level code | 55 | * mutexes defined inside these objects. Since sometimes upper-level code |
| 36 | * has to lock the write-buffer (e.g. journal space reservation code), many | 56 | * has to lock the write-buffer (e.g. journal space reservation code), many |
| @@ -46,8 +66,8 @@ | |||
| 46 | * UBIFS uses padding when it pads to the next min. I/O unit. In this case it | 66 | * UBIFS uses padding when it pads to the next min. I/O unit. In this case it |
| 47 | * uses padding nodes or padding bytes, if the padding node does not fit. | 67 | * uses padding nodes or padding bytes, if the padding node does not fit. |
| 48 | * | 68 | * |
| 49 | * All UBIFS nodes are protected by CRC checksums and UBIFS checks all nodes | 69 | * All UBIFS nodes are protected by CRC checksums and UBIFS checks CRC when |
| 50 | * every time they are read from the flash media. | 70 | * they are read from the flash media. |
| 51 | */ | 71 | */ |
| 52 | 72 | ||
| 53 | #include <linux/crc32.h> | 73 | #include <linux/crc32.h> |
| @@ -347,11 +367,17 @@ static void cancel_wbuf_timer_nolock(struct ubifs_wbuf *wbuf) | |||
| 347 | * | 367 | * |
| 348 | * This function synchronizes write-buffer @buf and returns zero in case of | 368 | * This function synchronizes write-buffer @buf and returns zero in case of |
| 349 | * success or a negative error code in case of failure. | 369 | * success or a negative error code in case of failure. |
| 370 | * | ||
| 371 | * Note, although write-buffers are of @c->max_write_size, this function does | ||
| 372 | * not necessarily writes all @c->max_write_size bytes to the flash. Instead, | ||
| 373 | * if the write-buffer is only partially filled with data, only the used part | ||
| 374 | * of the write-buffer (aligned on @c->min_io_size boundary) is synchronized. | ||
| 375 | * This way we waste less space. | ||
| 350 | */ | 376 | */ |
| 351 | int ubifs_wbuf_sync_nolock(struct ubifs_wbuf *wbuf) | 377 | int ubifs_wbuf_sync_nolock(struct ubifs_wbuf *wbuf) |
| 352 | { | 378 | { |
| 353 | struct ubifs_info *c = wbuf->c; | 379 | struct ubifs_info *c = wbuf->c; |
| 354 | int err, dirt; | 380 | int err, dirt, sync_len; |
| 355 | 381 | ||
| 356 | cancel_wbuf_timer_nolock(wbuf); | 382 | cancel_wbuf_timer_nolock(wbuf); |
| 357 | if (!wbuf->used || wbuf->lnum == -1) | 383 | if (!wbuf->used || wbuf->lnum == -1) |
| @@ -366,26 +392,48 @@ int ubifs_wbuf_sync_nolock(struct ubifs_wbuf *wbuf) | |||
| 366 | ubifs_assert(wbuf->size <= c->max_write_size); | 392 | ubifs_assert(wbuf->size <= c->max_write_size); |
| 367 | ubifs_assert(wbuf->size % c->min_io_size == 0); | 393 | ubifs_assert(wbuf->size % c->min_io_size == 0); |
| 368 | ubifs_assert(!c->ro_media && !c->ro_mount); | 394 | ubifs_assert(!c->ro_media && !c->ro_mount); |
| 395 | if (c->leb_size - wbuf->offs >= c->max_write_size) | ||
| 396 | ubifs_assert(!((wbuf->offs + wbuf->size) % c->max_write_size )); | ||
| 369 | 397 | ||
| 370 | if (c->ro_error) | 398 | if (c->ro_error) |
| 371 | return -EROFS; | 399 | return -EROFS; |
| 372 | 400 | ||
| 373 | ubifs_pad(c, wbuf->buf + wbuf->used, wbuf->avail); | 401 | /* |
| 402 | * Do not write whole write buffer but write only the minimum necessary | ||
| 403 | * amount of min. I/O units. | ||
| 404 | */ | ||
| 405 | sync_len = ALIGN(wbuf->used, c->min_io_size); | ||
| 406 | dirt = sync_len - wbuf->used; | ||
| 407 | if (dirt) | ||
| 408 | ubifs_pad(c, wbuf->buf + wbuf->used, dirt); | ||
| 374 | err = ubi_leb_write(c->ubi, wbuf->lnum, wbuf->buf, wbuf->offs, | 409 | err = ubi_leb_write(c->ubi, wbuf->lnum, wbuf->buf, wbuf->offs, |
| 375 | wbuf->size, wbuf->dtype); | 410 | sync_len, wbuf->dtype); |
| 376 | if (err) { | 411 | if (err) { |
| 377 | ubifs_err("cannot write %d bytes to LEB %d:%d", | 412 | ubifs_err("cannot write %d bytes to LEB %d:%d", |
| 378 | wbuf->size, wbuf->lnum, wbuf->offs); | 413 | sync_len, wbuf->lnum, wbuf->offs); |
| 379 | dbg_dump_stack(); | 414 | dbg_dump_stack(); |
| 380 | return err; | 415 | return err; |
| 381 | } | 416 | } |
| 382 | 417 | ||
| 383 | dirt = wbuf->avail; | ||
| 384 | |||
| 385 | spin_lock(&wbuf->lock); | 418 | spin_lock(&wbuf->lock); |
| 386 | wbuf->offs += wbuf->size; | 419 | wbuf->offs += sync_len; |
| 387 | wbuf->avail = c->min_io_size; | 420 | /* |
| 388 | wbuf->size = c->min_io_size; | 421 | * Now @wbuf->offs is not necessarily aligned to @c->max_write_size. |
| 422 | * But our goal is to optimize writes and make sure we write in | ||
| 423 | * @c->max_write_size chunks and to @c->max_write_size-aligned offset. | ||
| 424 | * Thus, if @wbuf->offs is not aligned to @c->max_write_size now, make | ||
| 425 | * sure that @wbuf->offs + @wbuf->size is aligned to | ||
| 426 | * @c->max_write_size. This way we make sure that after next | ||
| 427 | * write-buffer flush we are again at the optimal offset (aligned to | ||
| 428 | * @c->max_write_size). | ||
| 429 | */ | ||
| 430 | if (c->leb_size - wbuf->offs < c->max_write_size) | ||
| 431 | wbuf->size = c->leb_size - wbuf->offs; | ||
| 432 | else if (wbuf->offs & (c->max_write_size - 1)) | ||
| 433 | wbuf->size = ALIGN(wbuf->offs, c->max_write_size) - wbuf->offs; | ||
| 434 | else | ||
| 435 | wbuf->size = c->max_write_size; | ||
| 436 | wbuf->avail = wbuf->size; | ||
| 389 | wbuf->used = 0; | 437 | wbuf->used = 0; |
| 390 | wbuf->next_ino = 0; | 438 | wbuf->next_ino = 0; |
| 391 | spin_unlock(&wbuf->lock); | 439 | spin_unlock(&wbuf->lock); |
| @@ -428,8 +476,13 @@ int ubifs_wbuf_seek_nolock(struct ubifs_wbuf *wbuf, int lnum, int offs, | |||
| 428 | spin_lock(&wbuf->lock); | 476 | spin_lock(&wbuf->lock); |
| 429 | wbuf->lnum = lnum; | 477 | wbuf->lnum = lnum; |
| 430 | wbuf->offs = offs; | 478 | wbuf->offs = offs; |
| 431 | wbuf->avail = c->min_io_size; | 479 | if (c->leb_size - wbuf->offs < c->max_write_size) |
| 432 | wbuf->size = c->min_io_size; | 480 | wbuf->size = c->leb_size - wbuf->offs; |
| 481 | else if (wbuf->offs & (c->max_write_size - 1)) | ||
| 482 | wbuf->size = ALIGN(wbuf->offs, c->max_write_size) - wbuf->offs; | ||
| 483 | else | ||
| 484 | wbuf->size = c->max_write_size; | ||
| 485 | wbuf->avail = wbuf->size; | ||
| 433 | wbuf->used = 0; | 486 | wbuf->used = 0; |
| 434 | spin_unlock(&wbuf->lock); | 487 | spin_unlock(&wbuf->lock); |
| 435 | wbuf->dtype = dtype; | 488 | wbuf->dtype = dtype; |
| @@ -509,8 +562,9 @@ out_timers: | |||
| 509 | * | 562 | * |
| 510 | * This function writes data to flash via write-buffer @wbuf. This means that | 563 | * This function writes data to flash via write-buffer @wbuf. This means that |
| 511 | * the last piece of the node won't reach the flash media immediately if it | 564 | * the last piece of the node won't reach the flash media immediately if it |
| 512 | * does not take whole minimal I/O unit. Instead, the node will sit in RAM | 565 | * does not take whole max. write unit (@c->max_write_size). Instead, the node |
| 513 | * until the write-buffer is synchronized (e.g., by timer). | 566 | * will sit in RAM until the write-buffer is synchronized (e.g., by timer, or |
| 567 | * because more data are appended to the write-buffer). | ||
| 514 | * | 568 | * |
| 515 | * This function returns zero in case of success and a negative error code in | 569 | * This function returns zero in case of success and a negative error code in |
| 516 | * case of failure. If the node cannot be written because there is no more | 570 | * case of failure. If the node cannot be written because there is no more |
| @@ -533,6 +587,8 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len) | |||
| 533 | ubifs_assert(wbuf->size % c->min_io_size == 0); | 587 | ubifs_assert(wbuf->size % c->min_io_size == 0); |
| 534 | ubifs_assert(mutex_is_locked(&wbuf->io_mutex)); | 588 | ubifs_assert(mutex_is_locked(&wbuf->io_mutex)); |
| 535 | ubifs_assert(!c->ro_media && !c->ro_mount); | 589 | ubifs_assert(!c->ro_media && !c->ro_mount); |
| 590 | if (c->leb_size - wbuf->offs >= c->max_write_size) | ||
| 591 | ubifs_assert(!((wbuf->offs + wbuf->size) % c->max_write_size )); | ||
| 536 | 592 | ||
| 537 | if (c->leb_size - wbuf->offs - wbuf->used < aligned_len) { | 593 | if (c->leb_size - wbuf->offs - wbuf->used < aligned_len) { |
| 538 | err = -ENOSPC; | 594 | err = -ENOSPC; |
| @@ -561,9 +617,12 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len) | |||
| 561 | goto out; | 617 | goto out; |
| 562 | 618 | ||
| 563 | spin_lock(&wbuf->lock); | 619 | spin_lock(&wbuf->lock); |
| 564 | wbuf->offs += c->min_io_size; | 620 | wbuf->offs += wbuf->size; |
| 565 | wbuf->avail = c->min_io_size; | 621 | if (c->leb_size - wbuf->offs >= c->max_write_size) |
| 566 | wbuf->size = c->min_io_size; | 622 | wbuf->size = c->max_write_size; |
| 623 | else | ||
| 624 | wbuf->size = c->leb_size - wbuf->offs; | ||
| 625 | wbuf->avail = wbuf->size; | ||
| 567 | wbuf->used = 0; | 626 | wbuf->used = 0; |
| 568 | wbuf->next_ino = 0; | 627 | wbuf->next_ino = 0; |
| 569 | spin_unlock(&wbuf->lock); | 628 | spin_unlock(&wbuf->lock); |
| @@ -577,33 +636,57 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len) | |||
| 577 | goto exit; | 636 | goto exit; |
| 578 | } | 637 | } |
| 579 | 638 | ||
| 580 | /* | 639 | offs = wbuf->offs; |
| 581 | * The node is large enough and does not fit entirely within current | 640 | written = 0; |
| 582 | * minimal I/O unit. We have to fill and flush write-buffer and switch | ||
| 583 | * to the next min. I/O unit. | ||
| 584 | */ | ||
| 585 | dbg_io("flush jhead %s wbuf to LEB %d:%d", | ||
| 586 | dbg_jhead(wbuf->jhead), wbuf->lnum, wbuf->offs); | ||
| 587 | memcpy(wbuf->buf + wbuf->used, buf, wbuf->avail); | ||
| 588 | err = ubi_leb_write(c->ubi, wbuf->lnum, wbuf->buf, wbuf->offs, | ||
| 589 | wbuf->size, wbuf->dtype); | ||
| 590 | if (err) | ||
| 591 | goto out; | ||
| 592 | 641 | ||
| 593 | offs = wbuf->offs + wbuf->size; | 642 | if (wbuf->used) { |
| 594 | len -= wbuf->avail; | 643 | /* |
| 595 | aligned_len -= wbuf->avail; | 644 | * The node is large enough and does not fit entirely within |
| 596 | written = wbuf->avail; | 645 | * current available space. We have to fill and flush |
| 646 | * write-buffer and switch to the next max. write unit. | ||
| 647 | */ | ||
| 648 | dbg_io("flush jhead %s wbuf to LEB %d:%d", | ||
| 649 | dbg_jhead(wbuf->jhead), wbuf->lnum, wbuf->offs); | ||
| 650 | memcpy(wbuf->buf + wbuf->used, buf, wbuf->avail); | ||
| 651 | err = ubi_leb_write(c->ubi, wbuf->lnum, wbuf->buf, wbuf->offs, | ||
| 652 | wbuf->size, wbuf->dtype); | ||
| 653 | if (err) | ||
| 654 | goto out; | ||
| 655 | |||
| 656 | offs += wbuf->size; | ||
| 657 | len -= wbuf->avail; | ||
| 658 | aligned_len -= wbuf->avail; | ||
| 659 | written += wbuf->avail; | ||
| 660 | } else if (wbuf->offs & (c->max_write_size - 1)) { | ||
| 661 | /* | ||
| 662 | * The write-buffer offset is not aligned to | ||
| 663 | * @c->max_write_size and @wbuf->size is less than | ||
| 664 | * @c->max_write_size. Write @wbuf->size bytes to make sure the | ||
| 665 | * following writes are done in optimal @c->max_write_size | ||
| 666 | * chunks. | ||
| 667 | */ | ||
| 668 | dbg_io("write %d bytes to LEB %d:%d", | ||
| 669 | wbuf->size, wbuf->lnum, wbuf->offs); | ||
| 670 | err = ubi_leb_write(c->ubi, wbuf->lnum, buf, wbuf->offs, | ||
| 671 | wbuf->size, wbuf->dtype); | ||
| 672 | if (err) | ||
| 673 | goto out; | ||
| 674 | |||
| 675 | offs += wbuf->size; | ||
| 676 | len -= wbuf->size; | ||
| 677 | aligned_len -= wbuf->size; | ||
| 678 | written += wbuf->size; | ||
| 679 | } | ||
| 597 | 680 | ||
| 598 | /* | 681 | /* |
| 599 | * The remaining data may take more whole min. I/O units, so write the | 682 | * The remaining data may take more whole max. write units, so write the |
| 600 | * remains multiple to min. I/O unit size directly to the flash media. | 683 | * remains multiple to max. write unit size directly to the flash media. |
| 601 | * We align node length to 8-byte boundary because we anyway flash wbuf | 684 | * We align node length to 8-byte boundary because we anyway flash wbuf |
| 602 | * if the remaining space is less than 8 bytes. | 685 | * if the remaining space is less than 8 bytes. |
| 603 | */ | 686 | */ |
| 604 | n = aligned_len >> c->min_io_shift; | 687 | n = aligned_len >> c->max_write_shift; |
| 605 | if (n) { | 688 | if (n) { |
| 606 | n <<= c->min_io_shift; | 689 | n <<= c->max_write_shift; |
| 607 | dbg_io("write %d bytes to LEB %d:%d", n, wbuf->lnum, offs); | 690 | dbg_io("write %d bytes to LEB %d:%d", n, wbuf->lnum, offs); |
| 608 | err = ubi_leb_write(c->ubi, wbuf->lnum, buf + written, offs, n, | 691 | err = ubi_leb_write(c->ubi, wbuf->lnum, buf + written, offs, n, |
| 609 | wbuf->dtype); | 692 | wbuf->dtype); |
| @@ -619,15 +702,18 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len) | |||
| 619 | if (aligned_len) | 702 | if (aligned_len) |
| 620 | /* | 703 | /* |
| 621 | * And now we have what's left and what does not take whole | 704 | * And now we have what's left and what does not take whole |
| 622 | * min. I/O unit, so write it to the write-buffer and we are | 705 | * max. write unit, so write it to the write-buffer and we are |
| 623 | * done. | 706 | * done. |
| 624 | */ | 707 | */ |
| 625 | memcpy(wbuf->buf, buf + written, len); | 708 | memcpy(wbuf->buf, buf + written, len); |
| 626 | 709 | ||
| 627 | wbuf->offs = offs; | 710 | wbuf->offs = offs; |
| 711 | if (c->leb_size - wbuf->offs >= c->max_write_size) | ||
| 712 | wbuf->size = c->max_write_size; | ||
| 713 | else | ||
| 714 | wbuf->size = c->leb_size - wbuf->offs; | ||
| 715 | wbuf->avail = wbuf->size - aligned_len; | ||
| 628 | wbuf->used = aligned_len; | 716 | wbuf->used = aligned_len; |
| 629 | wbuf->avail = c->min_io_size - aligned_len; | ||
| 630 | wbuf->size = c->min_io_size; | ||
| 631 | wbuf->next_ino = 0; | 717 | wbuf->next_ino = 0; |
| 632 | spin_unlock(&wbuf->lock); | 718 | spin_unlock(&wbuf->lock); |
| 633 | 719 | ||
| @@ -851,11 +937,11 @@ int ubifs_wbuf_init(struct ubifs_info *c, struct ubifs_wbuf *wbuf) | |||
| 851 | { | 937 | { |
| 852 | size_t size; | 938 | size_t size; |
| 853 | 939 | ||
| 854 | wbuf->buf = kmalloc(c->min_io_size, GFP_KERNEL); | 940 | wbuf->buf = kmalloc(c->max_write_size, GFP_KERNEL); |
| 855 | if (!wbuf->buf) | 941 | if (!wbuf->buf) |
| 856 | return -ENOMEM; | 942 | return -ENOMEM; |
| 857 | 943 | ||
| 858 | size = (c->min_io_size / UBIFS_CH_SZ + 1) * sizeof(ino_t); | 944 | size = (c->max_write_size / UBIFS_CH_SZ + 1) * sizeof(ino_t); |
| 859 | wbuf->inodes = kmalloc(size, GFP_KERNEL); | 945 | wbuf->inodes = kmalloc(size, GFP_KERNEL); |
| 860 | if (!wbuf->inodes) { | 946 | if (!wbuf->inodes) { |
| 861 | kfree(wbuf->buf); | 947 | kfree(wbuf->buf); |
| @@ -865,7 +951,14 @@ int ubifs_wbuf_init(struct ubifs_info *c, struct ubifs_wbuf *wbuf) | |||
| 865 | 951 | ||
| 866 | wbuf->used = 0; | 952 | wbuf->used = 0; |
| 867 | wbuf->lnum = wbuf->offs = -1; | 953 | wbuf->lnum = wbuf->offs = -1; |
| 868 | wbuf->avail = wbuf->size = c->min_io_size; | 954 | /* |
| 955 | * If the LEB starts at the max. write size aligned address, then | ||
| 956 | * write-buffer size has to be set to @c->max_write_size. Otherwise, | ||
| 957 | * set it to something smaller so that it ends at the closest max. | ||
| 958 | * write size boundary. | ||
| 959 | */ | ||
| 960 | size = c->max_write_size - (c->leb_start % c->max_write_size); | ||
| 961 | wbuf->avail = wbuf->size = size; | ||
| 869 | wbuf->dtype = UBI_UNKNOWN; | 962 | wbuf->dtype = UBI_UNKNOWN; |
| 870 | wbuf->sync_callback = NULL; | 963 | wbuf->sync_callback = NULL; |
| 871 | mutex_init(&wbuf->io_mutex); | 964 | mutex_init(&wbuf->io_mutex); |
