diff options
author | Dan Williams <dan.j.williams@intel.com> | 2007-01-02 15:52:30 -0500 |
---|---|---|
committer | Dan Williams <dan.j.williams@intel.com> | 2007-07-13 11:06:16 -0400 |
commit | e33129d84130459dbb764a1a52a4bfceab3da978 (patch) | |
tree | c3f2742dac468a1c62e14ec1f2ec0cb5a37ee966 | |
parent | d84e0f10d38393f617227f0c831a99c69294651f (diff) |
md: handle_stripe5 - add request/completion logic for async write ops
After handle_stripe5 decides whether it wants to perform a
read-modify-write, or a reconstruct write it calls
handle_write_operations5. A read-modify-write operation will perform an
xor subtraction of the blocks marked with the R5_Wantprexor flag, copy the
new data into the stripe (biodrain) and perform a postxor operation across
all up-to-date blocks to generate the new parity. A reconstruct write is run
when all blocks are already up-to-date in the cache so all that is needed
is a biodrain and postxor.
On the completion path STRIPE_OP_PREXOR will be set if the operation was a
read-modify-write. The STRIPE_OP_BIODRAIN flag is used in the completion
path to differentiate write-initiated postxor operations versus
expansion-initiated postxor operations. Completion of a write triggers i/o
to the drives.
Changelog:
* make the 'rcw' parameter to handle_write_operations5 a simple flag, Neil Brown
* remove test_and_set/test_and_clear BUG_ONs, Neil Brown
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Acked-By: NeilBrown <neilb@suse.de>
-rw-r--r-- | drivers/md/raid5.c | 161 |
1 files changed, 138 insertions, 23 deletions
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index d89a25e7c17b..d9521aa69461 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c | |||
@@ -1822,7 +1822,79 @@ static void compute_block_2(struct stripe_head *sh, int dd_idx1, int dd_idx2) | |||
1822 | } | 1822 | } |
1823 | } | 1823 | } |
1824 | 1824 | ||
1825 | static int | ||
1826 | handle_write_operations5(struct stripe_head *sh, int rcw, int expand) | ||
1827 | { | ||
1828 | int i, pd_idx = sh->pd_idx, disks = sh->disks; | ||
1829 | int locked = 0; | ||
1830 | |||
1831 | if (rcw) { | ||
1832 | /* if we are not expanding this is a proper write request, and | ||
1833 | * there will be bios with new data to be drained into the | ||
1834 | * stripe cache | ||
1835 | */ | ||
1836 | if (!expand) { | ||
1837 | set_bit(STRIPE_OP_BIODRAIN, &sh->ops.pending); | ||
1838 | sh->ops.count++; | ||
1839 | } | ||
1840 | |||
1841 | set_bit(STRIPE_OP_POSTXOR, &sh->ops.pending); | ||
1842 | sh->ops.count++; | ||
1843 | |||
1844 | for (i = disks; i--; ) { | ||
1845 | struct r5dev *dev = &sh->dev[i]; | ||
1846 | |||
1847 | if (dev->towrite) { | ||
1848 | set_bit(R5_LOCKED, &dev->flags); | ||
1849 | if (!expand) | ||
1850 | clear_bit(R5_UPTODATE, &dev->flags); | ||
1851 | locked++; | ||
1852 | } | ||
1853 | } | ||
1854 | } else { | ||
1855 | BUG_ON(!(test_bit(R5_UPTODATE, &sh->dev[pd_idx].flags) || | ||
1856 | test_bit(R5_Wantcompute, &sh->dev[pd_idx].flags))); | ||
1857 | |||
1858 | set_bit(STRIPE_OP_PREXOR, &sh->ops.pending); | ||
1859 | set_bit(STRIPE_OP_BIODRAIN, &sh->ops.pending); | ||
1860 | set_bit(STRIPE_OP_POSTXOR, &sh->ops.pending); | ||
1861 | |||
1862 | sh->ops.count += 3; | ||
1863 | |||
1864 | for (i = disks; i--; ) { | ||
1865 | struct r5dev *dev = &sh->dev[i]; | ||
1866 | if (i == pd_idx) | ||
1867 | continue; | ||
1868 | |||
1869 | /* For a read-modify write there may be blocks that are | ||
1870 | * locked for reading while others are ready to be | ||
1871 | * written so we distinguish these blocks by the | ||
1872 | * R5_Wantprexor bit | ||
1873 | */ | ||
1874 | if (dev->towrite && | ||
1875 | (test_bit(R5_UPTODATE, &dev->flags) || | ||
1876 | test_bit(R5_Wantcompute, &dev->flags))) { | ||
1877 | set_bit(R5_Wantprexor, &dev->flags); | ||
1878 | set_bit(R5_LOCKED, &dev->flags); | ||
1879 | clear_bit(R5_UPTODATE, &dev->flags); | ||
1880 | locked++; | ||
1881 | } | ||
1882 | } | ||
1883 | } | ||
1884 | |||
1885 | /* keep the parity disk locked while asynchronous operations | ||
1886 | * are in flight | ||
1887 | */ | ||
1888 | set_bit(R5_LOCKED, &sh->dev[pd_idx].flags); | ||
1889 | clear_bit(R5_UPTODATE, &sh->dev[pd_idx].flags); | ||
1890 | locked++; | ||
1825 | 1891 | ||
1892 | pr_debug("%s: stripe %llu locked: %d pending: %lx\n", | ||
1893 | __FUNCTION__, (unsigned long long)sh->sector, | ||
1894 | locked, sh->ops.pending); | ||
1895 | |||
1896 | return locked; | ||
1897 | } | ||
1826 | 1898 | ||
1827 | /* | 1899 | /* |
1828 | * Each stripe/dev can have one or more bion attached. | 1900 | * Each stripe/dev can have one or more bion attached. |
@@ -2217,27 +2289,8 @@ static void handle_issuing_new_write_requests5(raid5_conf_t *conf, | |||
2217 | * we can start a write request | 2289 | * we can start a write request |
2218 | */ | 2290 | */ |
2219 | if (s->locked == 0 && (rcw == 0 || rmw == 0) && | 2291 | if (s->locked == 0 && (rcw == 0 || rmw == 0) && |
2220 | !test_bit(STRIPE_BIT_DELAY, &sh->state)) { | 2292 | !test_bit(STRIPE_BIT_DELAY, &sh->state)) |
2221 | pr_debug("Computing parity...\n"); | 2293 | s->locked += handle_write_operations5(sh, rcw == 0, 0); |
2222 | compute_parity5(sh, rcw == 0 ? | ||
2223 | RECONSTRUCT_WRITE : READ_MODIFY_WRITE); | ||
2224 | /* now every locked buffer is ready to be written */ | ||
2225 | for (i = disks; i--; ) | ||
2226 | if (test_bit(R5_LOCKED, &sh->dev[i].flags)) { | ||
2227 | pr_debug("Writing block %d\n", i); | ||
2228 | s->locked++; | ||
2229 | set_bit(R5_Wantwrite, &sh->dev[i].flags); | ||
2230 | if (!test_bit(R5_Insync, &sh->dev[i].flags) | ||
2231 | || (i == sh->pd_idx && s->failed == 0)) | ||
2232 | set_bit(STRIPE_INSYNC, &sh->state); | ||
2233 | } | ||
2234 | if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) { | ||
2235 | atomic_dec(&conf->preread_active_stripes); | ||
2236 | if (atomic_read(&conf->preread_active_stripes) < | ||
2237 | IO_THRESHOLD) | ||
2238 | md_wakeup_thread(conf->mddev->thread); | ||
2239 | } | ||
2240 | } | ||
2241 | } | 2294 | } |
2242 | 2295 | ||
2243 | static void handle_issuing_new_write_requests6(raid5_conf_t *conf, | 2296 | static void handle_issuing_new_write_requests6(raid5_conf_t *conf, |
@@ -2656,8 +2709,70 @@ static void handle_stripe5(struct stripe_head *sh) | |||
2656 | (s.syncing && (s.uptodate < disks)) || s.expanding) | 2709 | (s.syncing && (s.uptodate < disks)) || s.expanding) |
2657 | handle_issuing_new_read_requests5(sh, &s, disks); | 2710 | handle_issuing_new_read_requests5(sh, &s, disks); |
2658 | 2711 | ||
2659 | /* now to consider writing and what else, if anything should be read */ | 2712 | /* Now we check to see if any write operations have recently |
2660 | if (s.to_write) | 2713 | * completed |
2714 | */ | ||
2715 | |||
2716 | /* leave prexor set until postxor is done, allows us to distinguish | ||
2717 | * a rmw from a rcw during biodrain | ||
2718 | */ | ||
2719 | if (test_bit(STRIPE_OP_PREXOR, &sh->ops.complete) && | ||
2720 | test_bit(STRIPE_OP_POSTXOR, &sh->ops.complete)) { | ||
2721 | |||
2722 | clear_bit(STRIPE_OP_PREXOR, &sh->ops.complete); | ||
2723 | clear_bit(STRIPE_OP_PREXOR, &sh->ops.ack); | ||
2724 | clear_bit(STRIPE_OP_PREXOR, &sh->ops.pending); | ||
2725 | |||
2726 | for (i = disks; i--; ) | ||
2727 | clear_bit(R5_Wantprexor, &sh->dev[i].flags); | ||
2728 | } | ||
2729 | |||
2730 | /* if only POSTXOR is set then this is an 'expand' postxor */ | ||
2731 | if (test_bit(STRIPE_OP_BIODRAIN, &sh->ops.complete) && | ||
2732 | test_bit(STRIPE_OP_POSTXOR, &sh->ops.complete)) { | ||
2733 | |||
2734 | clear_bit(STRIPE_OP_BIODRAIN, &sh->ops.complete); | ||
2735 | clear_bit(STRIPE_OP_BIODRAIN, &sh->ops.ack); | ||
2736 | clear_bit(STRIPE_OP_BIODRAIN, &sh->ops.pending); | ||
2737 | |||
2738 | clear_bit(STRIPE_OP_POSTXOR, &sh->ops.complete); | ||
2739 | clear_bit(STRIPE_OP_POSTXOR, &sh->ops.ack); | ||
2740 | clear_bit(STRIPE_OP_POSTXOR, &sh->ops.pending); | ||
2741 | |||
2742 | /* All the 'written' buffers and the parity block are ready to | ||
2743 | * be written back to disk | ||
2744 | */ | ||
2745 | BUG_ON(!test_bit(R5_UPTODATE, &sh->dev[sh->pd_idx].flags)); | ||
2746 | for (i = disks; i--; ) { | ||
2747 | dev = &sh->dev[i]; | ||
2748 | if (test_bit(R5_LOCKED, &dev->flags) && | ||
2749 | (i == sh->pd_idx || dev->written)) { | ||
2750 | pr_debug("Writing block %d\n", i); | ||
2751 | set_bit(R5_Wantwrite, &dev->flags); | ||
2752 | if (!test_and_set_bit( | ||
2753 | STRIPE_OP_IO, &sh->ops.pending)) | ||
2754 | sh->ops.count++; | ||
2755 | if (!test_bit(R5_Insync, &dev->flags) || | ||
2756 | (i == sh->pd_idx && s.failed == 0)) | ||
2757 | set_bit(STRIPE_INSYNC, &sh->state); | ||
2758 | } | ||
2759 | } | ||
2760 | if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) { | ||
2761 | atomic_dec(&conf->preread_active_stripes); | ||
2762 | if (atomic_read(&conf->preread_active_stripes) < | ||
2763 | IO_THRESHOLD) | ||
2764 | md_wakeup_thread(conf->mddev->thread); | ||
2765 | } | ||
2766 | } | ||
2767 | |||
2768 | /* Now to consider new write requests and what else, if anything | ||
2769 | * should be read. We do not handle new writes when: | ||
2770 | * 1/ A 'write' operation (copy+xor) is already in flight. | ||
2771 | * 2/ A 'check' operation is in flight, as it may clobber the parity | ||
2772 | * block. | ||
2773 | */ | ||
2774 | if (s.to_write && !test_bit(STRIPE_OP_POSTXOR, &sh->ops.pending) && | ||
2775 | !test_bit(STRIPE_OP_CHECK, &sh->ops.pending)) | ||
2661 | handle_issuing_new_write_requests5(conf, sh, &s, disks); | 2776 | handle_issuing_new_write_requests5(conf, sh, &s, disks); |
2662 | 2777 | ||
2663 | /* maybe we need to check and possibly fix the parity for this stripe | 2778 | /* maybe we need to check and possibly fix the parity for this stripe |