aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEric Gouriou <egouriou@google.com>2011-10-27 11:52:18 -0400
committerTheodore Ts'o <tytso@mit.edu>2011-10-27 11:52:18 -0400
commit80e675f906db54eb1ce3a9555cee5f45b5b72ab2 (patch)
treeb9bd012caf69c479a946b467c051f505caa1f593
parent6f91bc5fda82d2c49b4f7fb29424cf6a3c7574bc (diff)
ext4: optimize memmmove lengths in extent/index insertions
ext4_ext_insert_extent() (respectively ext4_ext_insert_index()) was using EXT_MAX_EXTENT() (resp. EXT_MAX_INDEX()) to determine how many entries needed to be moved beyond the insertion point. In practice this means that (320 - I) * 24 bytes were memmove()'d when I is the insertion point, rather than (#entries - I) * 24 bytes. This patch uses EXT_LAST_EXTENT() (resp. EXT_LAST_INDEX()) instead to only move existing entries. The code flow is also simplified slightly to highlight similarities and reduce code duplication in the insertion logic. This patch reduces system CPU consumption by over 25% on a 4kB synchronous append DIO write workload when used with the pre-2.6.39 x86_64 memmove() implementation. With the much faster 2.6.39 memmove() implementation we still see a decrease in system CPU usage between 2% and 7%. Note that the ext_debug() output changes with this patch, splitting some log information between entries. Users of the ext_debug() output should note that the "move %d" units changed from reporting the number of bytes moved to reporting the number of entries moved. Signed-off-by: Eric Gouriou <egouriou@google.com> Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
-rw-r--r--fs/ext4/extents.c85
1 files changed, 42 insertions, 43 deletions
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 8b6a17b60970..29969622af8b 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -754,31 +754,25 @@ static int ext4_ext_insert_index(handle_t *handle, struct inode *inode,
754 return -EIO; 754 return -EIO;
755 } 755 }
756 756
757 len = EXT_MAX_INDEX(curp->p_hdr) - curp->p_idx;
758 if (logical > le32_to_cpu(curp->p_idx->ei_block)) { 757 if (logical > le32_to_cpu(curp->p_idx->ei_block)) {
759 /* insert after */ 758 /* insert after */
760 if (curp->p_idx != EXT_LAST_INDEX(curp->p_hdr)) { 759 ext_debug("insert new index %d after: %llu\n", logical, ptr);
761 len = (len - 1) * sizeof(struct ext4_extent_idx);
762 len = len < 0 ? 0 : len;
763 ext_debug("insert new index %d after: %llu. "
764 "move %d from 0x%p to 0x%p\n",
765 logical, ptr, len,
766 (curp->p_idx + 1), (curp->p_idx + 2));
767 memmove(curp->p_idx + 2, curp->p_idx + 1, len);
768 }
769 ix = curp->p_idx + 1; 760 ix = curp->p_idx + 1;
770 } else { 761 } else {
771 /* insert before */ 762 /* insert before */
772 len = len * sizeof(struct ext4_extent_idx); 763 ext_debug("insert new index %d before: %llu\n", logical, ptr);
773 len = len < 0 ? 0 : len;
774 ext_debug("insert new index %d before: %llu. "
775 "move %d from 0x%p to 0x%p\n",
776 logical, ptr, len,
777 curp->p_idx, (curp->p_idx + 1));
778 memmove(curp->p_idx + 1, curp->p_idx, len);
779 ix = curp->p_idx; 764 ix = curp->p_idx;
780 } 765 }
781 766
767 len = EXT_LAST_INDEX(curp->p_hdr) - ix + 1;
768 BUG_ON(len < 0);
769 if (len > 0) {
770 ext_debug("insert new index %d: "
771 "move %d indices from 0x%p to 0x%p\n",
772 logical, len, ix, ix + 1);
773 memmove(ix + 1, ix, len * sizeof(struct ext4_extent_idx));
774 }
775
782 if (unlikely(ix > EXT_MAX_INDEX(curp->p_hdr))) { 776 if (unlikely(ix > EXT_MAX_INDEX(curp->p_hdr))) {
783 EXT4_ERROR_INODE(inode, "ix > EXT_MAX_INDEX!"); 777 EXT4_ERROR_INODE(inode, "ix > EXT_MAX_INDEX!");
784 return -EIO; 778 return -EIO;
@@ -1779,41 +1773,46 @@ has_space:
1779 ext4_ext_pblock(newext), 1773 ext4_ext_pblock(newext),
1780 ext4_ext_is_uninitialized(newext), 1774 ext4_ext_is_uninitialized(newext),
1781 ext4_ext_get_actual_len(newext)); 1775 ext4_ext_get_actual_len(newext));
1782 path[depth].p_ext = EXT_FIRST_EXTENT(eh); 1776 nearex = EXT_FIRST_EXTENT(eh);
1783 } else if (le32_to_cpu(newext->ee_block) 1777 } else {
1778 if (le32_to_cpu(newext->ee_block)
1784 > le32_to_cpu(nearex->ee_block)) { 1779 > le32_to_cpu(nearex->ee_block)) {
1785/* BUG_ON(newext->ee_block == nearex->ee_block); */ 1780 /* Insert after */
1786 if (nearex != EXT_LAST_EXTENT(eh)) { 1781 ext_debug("insert %d:%llu:[%d]%d %s before: "
1787 len = EXT_MAX_EXTENT(eh) - nearex; 1782 "nearest 0x%p\n"
1788 len = (len - 1) * sizeof(struct ext4_extent); 1783 le32_to_cpu(newext->ee_block),
1789 len = len < 0 ? 0 : len; 1784 ext4_ext_pblock(newext),
1790 ext_debug("insert %d:%llu:[%d]%d after: nearest 0x%p, " 1785 ext4_ext_is_uninitialized(newext),
1791 "move %d from 0x%p to 0x%p\n", 1786 ext4_ext_get_actual_len(newext),
1787 nearex);
1788 nearex++;
1789 } else {
1790 /* Insert before */
1791 BUG_ON(newext->ee_block == nearex->ee_block);
1792 ext_debug("insert %d:%llu:[%d]%d %s after: "
1793 "nearest 0x%p\n"
1792 le32_to_cpu(newext->ee_block), 1794 le32_to_cpu(newext->ee_block),
1793 ext4_ext_pblock(newext), 1795 ext4_ext_pblock(newext),
1794 ext4_ext_is_uninitialized(newext), 1796 ext4_ext_is_uninitialized(newext),
1795 ext4_ext_get_actual_len(newext), 1797 ext4_ext_get_actual_len(newext),
1796 nearex, len, nearex + 1, nearex + 2); 1798 nearex);
1797 memmove(nearex + 2, nearex + 1, len); 1799 }
1800 len = EXT_LAST_EXTENT(eh) - nearex + 1;
1801 if (len > 0) {
1802 ext_debug("insert %d:%llu:[%d]%d: "
1803 "move %d extents from 0x%p to 0x%p\n",
1804 le32_to_cpu(newext->ee_block),
1805 ext4_ext_pblock(newext),
1806 ext4_ext_is_uninitialized(newext),
1807 ext4_ext_get_actual_len(newext),
1808 len, nearex, nearex + 1);
1809 memmove(nearex + 1, nearex,
1810 len * sizeof(struct ext4_extent));
1798 } 1811 }
1799 path[depth].p_ext = nearex + 1;
1800 } else {
1801 BUG_ON(newext->ee_block == nearex->ee_block);
1802 len = (EXT_MAX_EXTENT(eh) - nearex) * sizeof(struct ext4_extent);
1803 len = len < 0 ? 0 : len;
1804 ext_debug("insert %d:%llu:[%d]%d before: nearest 0x%p, "
1805 "move %d from 0x%p to 0x%p\n",
1806 le32_to_cpu(newext->ee_block),
1807 ext4_ext_pblock(newext),
1808 ext4_ext_is_uninitialized(newext),
1809 ext4_ext_get_actual_len(newext),
1810 nearex, len, nearex, nearex + 1);
1811 memmove(nearex + 1, nearex, len);
1812 path[depth].p_ext = nearex;
1813 } 1812 }
1814 1813
1815 le16_add_cpu(&eh->eh_entries, 1); 1814 le16_add_cpu(&eh->eh_entries, 1);
1816 nearex = path[depth].p_ext; 1815 path[depth].p_ext = nearex;
1817 nearex->ee_block = newext->ee_block; 1816 nearex->ee_block = newext->ee_block;
1818 ext4_ext_store_pblock(nearex, ext4_ext_pblock(newext)); 1817 ext4_ext_store_pblock(nearex, ext4_ext_pblock(newext));
1819 nearex->ee_len = newext->ee_len; 1818 nearex->ee_len = newext->ee_len;