aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDan Williams <dan.j.williams@intel.com>2007-01-02 15:52:30 -0500
committerDan Williams <dan.j.williams@intel.com>2007-07-13 11:06:16 -0400
commite33129d84130459dbb764a1a52a4bfceab3da978 (patch)
treec3f2742dac468a1c62e14ec1f2ec0cb5a37ee966
parentd84e0f10d38393f617227f0c831a99c69294651f (diff)
md: handle_stripe5 - add request/completion logic for async write ops
After handle_stripe5 decides whether it wants to perform a read-modify-write, or a reconstruct write it calls handle_write_operations5. A read-modify-write operation will perform an xor subtraction of the blocks marked with the R5_Wantprexor flag, copy the new data into the stripe (biodrain) and perform a postxor operation across all up-to-date blocks to generate the new parity. A reconstruct write is run when all blocks are already up-to-date in the cache so all that is needed is a biodrain and postxor. On the completion path STRIPE_OP_PREXOR will be set if the operation was a read-modify-write. The STRIPE_OP_BIODRAIN flag is used in the completion path to differentiate write-initiated postxor operations versus expansion-initiated postxor operations. Completion of a write triggers i/o to the drives. Changelog: * make the 'rcw' parameter to handle_write_operations5 a simple flag, Neil Brown * remove test_and_set/test_and_clear BUG_ONs, Neil Brown Signed-off-by: Dan Williams <dan.j.williams@intel.com> Acked-By: NeilBrown <neilb@suse.de>
-rw-r--r--drivers/md/raid5.c161
1 files changed, 138 insertions, 23 deletions
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index d89a25e7c17b..d9521aa69461 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -1822,7 +1822,79 @@ static void compute_block_2(struct stripe_head *sh, int dd_idx1, int dd_idx2)
1822 } 1822 }
1823} 1823}
1824 1824
1825static int
1826handle_write_operations5(struct stripe_head *sh, int rcw, int expand)
1827{
1828 int i, pd_idx = sh->pd_idx, disks = sh->disks;
1829 int locked = 0;
1830
1831 if (rcw) {
1832 /* if we are not expanding this is a proper write request, and
1833 * there will be bios with new data to be drained into the
1834 * stripe cache
1835 */
1836 if (!expand) {
1837 set_bit(STRIPE_OP_BIODRAIN, &sh->ops.pending);
1838 sh->ops.count++;
1839 }
1840
1841 set_bit(STRIPE_OP_POSTXOR, &sh->ops.pending);
1842 sh->ops.count++;
1843
1844 for (i = disks; i--; ) {
1845 struct r5dev *dev = &sh->dev[i];
1846
1847 if (dev->towrite) {
1848 set_bit(R5_LOCKED, &dev->flags);
1849 if (!expand)
1850 clear_bit(R5_UPTODATE, &dev->flags);
1851 locked++;
1852 }
1853 }
1854 } else {
1855 BUG_ON(!(test_bit(R5_UPTODATE, &sh->dev[pd_idx].flags) ||
1856 test_bit(R5_Wantcompute, &sh->dev[pd_idx].flags)));
1857
1858 set_bit(STRIPE_OP_PREXOR, &sh->ops.pending);
1859 set_bit(STRIPE_OP_BIODRAIN, &sh->ops.pending);
1860 set_bit(STRIPE_OP_POSTXOR, &sh->ops.pending);
1861
1862 sh->ops.count += 3;
1863
1864 for (i = disks; i--; ) {
1865 struct r5dev *dev = &sh->dev[i];
1866 if (i == pd_idx)
1867 continue;
1868
1869 /* For a read-modify write there may be blocks that are
1870 * locked for reading while others are ready to be
1871 * written so we distinguish these blocks by the
1872 * R5_Wantprexor bit
1873 */
1874 if (dev->towrite &&
1875 (test_bit(R5_UPTODATE, &dev->flags) ||
1876 test_bit(R5_Wantcompute, &dev->flags))) {
1877 set_bit(R5_Wantprexor, &dev->flags);
1878 set_bit(R5_LOCKED, &dev->flags);
1879 clear_bit(R5_UPTODATE, &dev->flags);
1880 locked++;
1881 }
1882 }
1883 }
1884
1885 /* keep the parity disk locked while asynchronous operations
1886 * are in flight
1887 */
1888 set_bit(R5_LOCKED, &sh->dev[pd_idx].flags);
1889 clear_bit(R5_UPTODATE, &sh->dev[pd_idx].flags);
1890 locked++;
1825 1891
1892 pr_debug("%s: stripe %llu locked: %d pending: %lx\n",
1893 __FUNCTION__, (unsigned long long)sh->sector,
1894 locked, sh->ops.pending);
1895
1896 return locked;
1897}
1826 1898
1827/* 1899/*
1828 * Each stripe/dev can have one or more bion attached. 1900 * Each stripe/dev can have one or more bion attached.
@@ -2217,27 +2289,8 @@ static void handle_issuing_new_write_requests5(raid5_conf_t *conf,
2217 * we can start a write request 2289 * we can start a write request
2218 */ 2290 */
2219 if (s->locked == 0 && (rcw == 0 || rmw == 0) && 2291 if (s->locked == 0 && (rcw == 0 || rmw == 0) &&
2220 !test_bit(STRIPE_BIT_DELAY, &sh->state)) { 2292 !test_bit(STRIPE_BIT_DELAY, &sh->state))
2221 pr_debug("Computing parity...\n"); 2293 s->locked += handle_write_operations5(sh, rcw == 0, 0);
2222 compute_parity5(sh, rcw == 0 ?
2223 RECONSTRUCT_WRITE : READ_MODIFY_WRITE);
2224 /* now every locked buffer is ready to be written */
2225 for (i = disks; i--; )
2226 if (test_bit(R5_LOCKED, &sh->dev[i].flags)) {
2227 pr_debug("Writing block %d\n", i);
2228 s->locked++;
2229 set_bit(R5_Wantwrite, &sh->dev[i].flags);
2230 if (!test_bit(R5_Insync, &sh->dev[i].flags)
2231 || (i == sh->pd_idx && s->failed == 0))
2232 set_bit(STRIPE_INSYNC, &sh->state);
2233 }
2234 if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) {
2235 atomic_dec(&conf->preread_active_stripes);
2236 if (atomic_read(&conf->preread_active_stripes) <
2237 IO_THRESHOLD)
2238 md_wakeup_thread(conf->mddev->thread);
2239 }
2240 }
2241} 2294}
2242 2295
2243static void handle_issuing_new_write_requests6(raid5_conf_t *conf, 2296static void handle_issuing_new_write_requests6(raid5_conf_t *conf,
@@ -2656,8 +2709,70 @@ static void handle_stripe5(struct stripe_head *sh)
2656 (s.syncing && (s.uptodate < disks)) || s.expanding) 2709 (s.syncing && (s.uptodate < disks)) || s.expanding)
2657 handle_issuing_new_read_requests5(sh, &s, disks); 2710 handle_issuing_new_read_requests5(sh, &s, disks);
2658 2711
2659 /* now to consider writing and what else, if anything should be read */ 2712 /* Now we check to see if any write operations have recently
2660 if (s.to_write) 2713 * completed
2714 */
2715
2716 /* leave prexor set until postxor is done, allows us to distinguish
2717 * a rmw from a rcw during biodrain
2718 */
2719 if (test_bit(STRIPE_OP_PREXOR, &sh->ops.complete) &&
2720 test_bit(STRIPE_OP_POSTXOR, &sh->ops.complete)) {
2721
2722 clear_bit(STRIPE_OP_PREXOR, &sh->ops.complete);
2723 clear_bit(STRIPE_OP_PREXOR, &sh->ops.ack);
2724 clear_bit(STRIPE_OP_PREXOR, &sh->ops.pending);
2725
2726 for (i = disks; i--; )
2727 clear_bit(R5_Wantprexor, &sh->dev[i].flags);
2728 }
2729
2730 /* if only POSTXOR is set then this is an 'expand' postxor */
2731 if (test_bit(STRIPE_OP_BIODRAIN, &sh->ops.complete) &&
2732 test_bit(STRIPE_OP_POSTXOR, &sh->ops.complete)) {
2733
2734 clear_bit(STRIPE_OP_BIODRAIN, &sh->ops.complete);
2735 clear_bit(STRIPE_OP_BIODRAIN, &sh->ops.ack);
2736 clear_bit(STRIPE_OP_BIODRAIN, &sh->ops.pending);
2737
2738 clear_bit(STRIPE_OP_POSTXOR, &sh->ops.complete);
2739 clear_bit(STRIPE_OP_POSTXOR, &sh->ops.ack);
2740 clear_bit(STRIPE_OP_POSTXOR, &sh->ops.pending);
2741
2742 /* All the 'written' buffers and the parity block are ready to
2743 * be written back to disk
2744 */
2745 BUG_ON(!test_bit(R5_UPTODATE, &sh->dev[sh->pd_idx].flags));
2746 for (i = disks; i--; ) {
2747 dev = &sh->dev[i];
2748 if (test_bit(R5_LOCKED, &dev->flags) &&
2749 (i == sh->pd_idx || dev->written)) {
2750 pr_debug("Writing block %d\n", i);
2751 set_bit(R5_Wantwrite, &dev->flags);
2752 if (!test_and_set_bit(
2753 STRIPE_OP_IO, &sh->ops.pending))
2754 sh->ops.count++;
2755 if (!test_bit(R5_Insync, &dev->flags) ||
2756 (i == sh->pd_idx && s.failed == 0))
2757 set_bit(STRIPE_INSYNC, &sh->state);
2758 }
2759 }
2760 if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) {
2761 atomic_dec(&conf->preread_active_stripes);
2762 if (atomic_read(&conf->preread_active_stripes) <
2763 IO_THRESHOLD)
2764 md_wakeup_thread(conf->mddev->thread);
2765 }
2766 }
2767
2768 /* Now to consider new write requests and what else, if anything
2769 * should be read. We do not handle new writes when:
2770 * 1/ A 'write' operation (copy+xor) is already in flight.
2771 * 2/ A 'check' operation is in flight, as it may clobber the parity
2772 * block.
2773 */
2774 if (s.to_write && !test_bit(STRIPE_OP_POSTXOR, &sh->ops.pending) &&
2775 !test_bit(STRIPE_OP_CHECK, &sh->ops.pending))
2661 handle_issuing_new_write_requests5(conf, sh, &s, disks); 2776 handle_issuing_new_write_requests5(conf, sh, &s, disks);
2662 2777
2663 /* maybe we need to check and possibly fix the parity for this stripe 2778 /* maybe we need to check and possibly fix the parity for this stripe