aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--drivers/block/as-iosched.c310
1 files changed, 165 insertions, 145 deletions
diff --git a/drivers/block/as-iosched.c b/drivers/block/as-iosched.c
index c6744ff38294..a78e160b59a3 100644
--- a/drivers/block/as-iosched.c
+++ b/drivers/block/as-iosched.c
@@ -4,7 +4,7 @@
4 * Anticipatory & deadline i/o scheduler. 4 * Anticipatory & deadline i/o scheduler.
5 * 5 *
6 * Copyright (C) 2002 Jens Axboe <axboe@suse.de> 6 * Copyright (C) 2002 Jens Axboe <axboe@suse.de>
7 * Nick Piggin <piggin@cyberone.com.au> 7 * Nick Piggin <nickpiggin@yahoo.com.au>
8 * 8 *
9 */ 9 */
10#include <linux/kernel.h> 10#include <linux/kernel.h>
@@ -69,7 +69,7 @@
69 69
70/* Bits in as_io_context.state */ 70/* Bits in as_io_context.state */
71enum as_io_states { 71enum as_io_states {
72 AS_TASK_RUNNING=0, /* Process has not exitted */ 72 AS_TASK_RUNNING=0, /* Process has not exited */
73 AS_TASK_IOSTARTED, /* Process has started some IO */ 73 AS_TASK_IOSTARTED, /* Process has started some IO */
74 AS_TASK_IORUNNING, /* Process has completed some IO */ 74 AS_TASK_IORUNNING, /* Process has completed some IO */
75}; 75};
@@ -102,6 +102,9 @@ struct as_data {
102 102
103 unsigned long exit_prob; /* probability a task will exit while 103 unsigned long exit_prob; /* probability a task will exit while
104 being waited on */ 104 being waited on */
105 unsigned long exit_no_coop; /* probablility an exited task will
106 not be part of a later cooperating
107 request */
105 unsigned long new_ttime_total; /* mean thinktime on new proc */ 108 unsigned long new_ttime_total; /* mean thinktime on new proc */
106 unsigned long new_ttime_mean; 109 unsigned long new_ttime_mean;
107 u64 new_seek_total; /* mean seek on new proc */ 110 u64 new_seek_total; /* mean seek on new proc */
@@ -636,37 +639,152 @@ static void as_antic_timeout(unsigned long data)
636 kblockd_schedule_work(&ad->antic_work); 639 kblockd_schedule_work(&ad->antic_work);
637 640
638 if (aic->ttime_samples == 0) { 641 if (aic->ttime_samples == 0) {
639 /* process anticipated on has exitted or timed out*/ 642 /* process anticipated on has exited or timed out*/
640 ad->exit_prob = (7*ad->exit_prob + 256)/8; 643 ad->exit_prob = (7*ad->exit_prob + 256)/8;
641 } 644 }
645 if (!test_bit(AS_TASK_RUNNING, &aic->state)) {
646 /* process not "saved" by a cooperating request */
647 ad->exit_no_coop = (7*ad->exit_no_coop + 256)/8;
648 }
642 } 649 }
643 spin_unlock_irqrestore(q->queue_lock, flags); 650 spin_unlock_irqrestore(q->queue_lock, flags);
644} 651}
645 652
653static void as_update_thinktime(struct as_data *ad, struct as_io_context *aic,
654 unsigned long ttime)
655{
656 /* fixed point: 1.0 == 1<<8 */
657 if (aic->ttime_samples == 0) {
658 ad->new_ttime_total = (7*ad->new_ttime_total + 256*ttime) / 8;
659 ad->new_ttime_mean = ad->new_ttime_total / 256;
660
661 ad->exit_prob = (7*ad->exit_prob)/8;
662 }
663 aic->ttime_samples = (7*aic->ttime_samples + 256) / 8;
664 aic->ttime_total = (7*aic->ttime_total + 256*ttime) / 8;
665 aic->ttime_mean = (aic->ttime_total + 128) / aic->ttime_samples;
666}
667
668static void as_update_seekdist(struct as_data *ad, struct as_io_context *aic,
669 sector_t sdist)
670{
671 u64 total;
672
673 if (aic->seek_samples == 0) {
674 ad->new_seek_total = (7*ad->new_seek_total + 256*(u64)sdist)/8;
675 ad->new_seek_mean = ad->new_seek_total / 256;
676 }
677
678 /*
679 * Don't allow the seek distance to get too large from the
680 * odd fragment, pagein, etc
681 */
682 if (aic->seek_samples <= 60) /* second&third seek */
683 sdist = min(sdist, (aic->seek_mean * 4) + 2*1024*1024);
684 else
685 sdist = min(sdist, (aic->seek_mean * 4) + 2*1024*64);
686
687 aic->seek_samples = (7*aic->seek_samples + 256) / 8;
688 aic->seek_total = (7*aic->seek_total + (u64)256*sdist) / 8;
689 total = aic->seek_total + (aic->seek_samples/2);
690 do_div(total, aic->seek_samples);
691 aic->seek_mean = (sector_t)total;
692}
693
694/*
695 * as_update_iohist keeps a decaying histogram of IO thinktimes, and
696 * updates @aic->ttime_mean based on that. It is called when a new
697 * request is queued.
698 */
699static void as_update_iohist(struct as_data *ad, struct as_io_context *aic,
700 struct request *rq)
701{
702 struct as_rq *arq = RQ_DATA(rq);
703 int data_dir = arq->is_sync;
704 unsigned long thinktime = 0;
705 sector_t seek_dist;
706
707 if (aic == NULL)
708 return;
709
710 if (data_dir == REQ_SYNC) {
711 unsigned long in_flight = atomic_read(&aic->nr_queued)
712 + atomic_read(&aic->nr_dispatched);
713 spin_lock(&aic->lock);
714 if (test_bit(AS_TASK_IORUNNING, &aic->state) ||
715 test_bit(AS_TASK_IOSTARTED, &aic->state)) {
716 /* Calculate read -> read thinktime */
717 if (test_bit(AS_TASK_IORUNNING, &aic->state)
718 && in_flight == 0) {
719 thinktime = jiffies - aic->last_end_request;
720 thinktime = min(thinktime, MAX_THINKTIME-1);
721 }
722 as_update_thinktime(ad, aic, thinktime);
723
724 /* Calculate read -> read seek distance */
725 if (aic->last_request_pos < rq->sector)
726 seek_dist = rq->sector - aic->last_request_pos;
727 else
728 seek_dist = aic->last_request_pos - rq->sector;
729 as_update_seekdist(ad, aic, seek_dist);
730 }
731 aic->last_request_pos = rq->sector + rq->nr_sectors;
732 set_bit(AS_TASK_IOSTARTED, &aic->state);
733 spin_unlock(&aic->lock);
734 }
735}
736
646/* 737/*
647 * as_close_req decides if one request is considered "close" to the 738 * as_close_req decides if one request is considered "close" to the
648 * previous one issued. 739 * previous one issued.
649 */ 740 */
650static int as_close_req(struct as_data *ad, struct as_rq *arq) 741static int as_close_req(struct as_data *ad, struct as_io_context *aic,
742 struct as_rq *arq)
651{ 743{
652 unsigned long delay; /* milliseconds */ 744 unsigned long delay; /* milliseconds */
653 sector_t last = ad->last_sector[ad->batch_data_dir]; 745 sector_t last = ad->last_sector[ad->batch_data_dir];
654 sector_t next = arq->request->sector; 746 sector_t next = arq->request->sector;
655 sector_t delta; /* acceptable close offset (in sectors) */ 747 sector_t delta; /* acceptable close offset (in sectors) */
748 sector_t s;
656 749
657 if (ad->antic_status == ANTIC_OFF || !ad->ioc_finished) 750 if (ad->antic_status == ANTIC_OFF || !ad->ioc_finished)
658 delay = 0; 751 delay = 0;
659 else 752 else
660 delay = ((jiffies - ad->antic_start) * 1000) / HZ; 753 delay = ((jiffies - ad->antic_start) * 1000) / HZ;
661 754
662 if (delay <= 1) 755 if (delay == 0)
663 delta = 64; 756 delta = 8192;
664 else if (delay <= 20 && delay <= ad->antic_expire) 757 else if (delay <= 20 && delay <= ad->antic_expire)
665 delta = 64 << (delay-1); 758 delta = 8192 << delay;
666 else 759 else
667 return 1; 760 return 1;
668 761
669 return (last - (delta>>1) <= next) && (next <= last + delta); 762 if ((last <= next + (delta>>1)) && (next <= last + delta))
763 return 1;
764
765 if (last < next)
766 s = next - last;
767 else
768 s = last - next;
769
770 if (aic->seek_samples == 0) {
771 /*
772 * Process has just started IO. Use past statistics to
773 * gauge success possibility
774 */
775 if (ad->new_seek_mean > s) {
776 /* this request is better than what we're expecting */
777 return 1;
778 }
779
780 } else {
781 if (aic->seek_mean > s) {
782 /* this request is better than what we're expecting */
783 return 1;
784 }
785 }
786
787 return 0;
670} 788}
671 789
672/* 790/*
@@ -678,7 +796,7 @@ static int as_close_req(struct as_data *ad, struct as_rq *arq)
678 * dispatch it ASAP, because we know that application will not be submitting 796 * dispatch it ASAP, because we know that application will not be submitting
679 * any new reads. 797 * any new reads.
680 * 798 *
681 * If the task which has submitted the request has exitted, break anticipation. 799 * If the task which has submitted the request has exited, break anticipation.
682 * 800 *
683 * If this task has queued some other IO, do not enter enticipation. 801 * If this task has queued some other IO, do not enter enticipation.
684 */ 802 */
@@ -686,7 +804,6 @@ static int as_can_break_anticipation(struct as_data *ad, struct as_rq *arq)
686{ 804{
687 struct io_context *ioc; 805 struct io_context *ioc;
688 struct as_io_context *aic; 806 struct as_io_context *aic;
689 sector_t s;
690 807
691 ioc = ad->io_context; 808 ioc = ad->io_context;
692 BUG_ON(!ioc); 809 BUG_ON(!ioc);
@@ -708,13 +825,6 @@ static int as_can_break_anticipation(struct as_data *ad, struct as_rq *arq)
708 if (!aic) 825 if (!aic)
709 return 0; 826 return 0;
710 827
711 if (!test_bit(AS_TASK_RUNNING, &aic->state)) {
712 /* process anticipated on has exitted */
713 if (aic->ttime_samples == 0)
714 ad->exit_prob = (7*ad->exit_prob + 256)/8;
715 return 1;
716 }
717
718 if (atomic_read(&aic->nr_queued) > 0) { 828 if (atomic_read(&aic->nr_queued) > 0) {
719 /* process has more requests queued */ 829 /* process has more requests queued */
720 return 1; 830 return 1;
@@ -725,57 +835,45 @@ static int as_can_break_anticipation(struct as_data *ad, struct as_rq *arq)
725 return 1; 835 return 1;
726 } 836 }
727 837
728 if (arq && arq->is_sync == REQ_SYNC && as_close_req(ad, arq)) { 838 if (arq && arq->is_sync == REQ_SYNC && as_close_req(ad, aic, arq)) {
729 /* 839 /*
730 * Found a close request that is not one of ours. 840 * Found a close request that is not one of ours.
731 * 841 *
732 * This makes close requests from another process reset 842 * This makes close requests from another process update
733 * our thinktime delay. Is generally useful when there are 843 * our IO history. Is generally useful when there are
734 * two or more cooperating processes working in the same 844 * two or more cooperating processes working in the same
735 * area. 845 * area.
736 */ 846 */
737 spin_lock(&aic->lock); 847 if (!test_bit(AS_TASK_RUNNING, &aic->state)) {
738 aic->last_end_request = jiffies; 848 if (aic->ttime_samples == 0)
739 spin_unlock(&aic->lock); 849 ad->exit_prob = (7*ad->exit_prob + 256)/8;
850
851 ad->exit_no_coop = (7*ad->exit_no_coop)/8;
852 }
853
854 as_update_iohist(ad, aic, arq->request);
740 return 1; 855 return 1;
741 } 856 }
742 857
858 if (!test_bit(AS_TASK_RUNNING, &aic->state)) {
859 /* process anticipated on has exited */
860 if (aic->ttime_samples == 0)
861 ad->exit_prob = (7*ad->exit_prob + 256)/8;
862
863 if (ad->exit_no_coop > 128)
864 return 1;
865 }
743 866
744 if (aic->ttime_samples == 0) { 867 if (aic->ttime_samples == 0) {
745 if (ad->new_ttime_mean > ad->antic_expire) 868 if (ad->new_ttime_mean > ad->antic_expire)
746 return 1; 869 return 1;
747 if (ad->exit_prob > 128) 870 if (ad->exit_prob * ad->exit_no_coop > 128*256)
748 return 1; 871 return 1;
749 } else if (aic->ttime_mean > ad->antic_expire) { 872 } else if (aic->ttime_mean > ad->antic_expire) {
750 /* the process thinks too much between requests */ 873 /* the process thinks too much between requests */
751 return 1; 874 return 1;
752 } 875 }
753 876
754 if (!arq)
755 return 0;
756
757 if (ad->last_sector[REQ_SYNC] < arq->request->sector)
758 s = arq->request->sector - ad->last_sector[REQ_SYNC];
759 else
760 s = ad->last_sector[REQ_SYNC] - arq->request->sector;
761
762 if (aic->seek_samples == 0) {
763 /*
764 * Process has just started IO. Use past statistics to
765 * guage success possibility
766 */
767 if (ad->new_seek_mean > s) {
768 /* this request is better than what we're expecting */
769 return 1;
770 }
771
772 } else {
773 if (aic->seek_mean > s) {
774 /* this request is better than what we're expecting */
775 return 1;
776 }
777 }
778
779 return 0; 877 return 0;
780} 878}
781 879
@@ -809,94 +907,11 @@ static int as_can_anticipate(struct as_data *ad, struct as_rq *arq)
809 * Status is either ANTIC_OFF so start waiting, 907 * Status is either ANTIC_OFF so start waiting,
810 * ANTIC_WAIT_REQ so continue waiting for request to finish 908 * ANTIC_WAIT_REQ so continue waiting for request to finish
811 * or ANTIC_WAIT_NEXT so continue waiting for an acceptable request. 909 * or ANTIC_WAIT_NEXT so continue waiting for an acceptable request.
812 *
813 */ 910 */
814 911
815 return 1; 912 return 1;
816} 913}
817 914
818static void as_update_thinktime(struct as_data *ad, struct as_io_context *aic, unsigned long ttime)
819{
820 /* fixed point: 1.0 == 1<<8 */
821 if (aic->ttime_samples == 0) {
822 ad->new_ttime_total = (7*ad->new_ttime_total + 256*ttime) / 8;
823 ad->new_ttime_mean = ad->new_ttime_total / 256;
824
825 ad->exit_prob = (7*ad->exit_prob)/8;
826 }
827 aic->ttime_samples = (7*aic->ttime_samples + 256) / 8;
828 aic->ttime_total = (7*aic->ttime_total + 256*ttime) / 8;
829 aic->ttime_mean = (aic->ttime_total + 128) / aic->ttime_samples;
830}
831
832static void as_update_seekdist(struct as_data *ad, struct as_io_context *aic, sector_t sdist)
833{
834 u64 total;
835
836 if (aic->seek_samples == 0) {
837 ad->new_seek_total = (7*ad->new_seek_total + 256*(u64)sdist)/8;
838 ad->new_seek_mean = ad->new_seek_total / 256;
839 }
840
841 /*
842 * Don't allow the seek distance to get too large from the
843 * odd fragment, pagein, etc
844 */
845 if (aic->seek_samples <= 60) /* second&third seek */
846 sdist = min(sdist, (aic->seek_mean * 4) + 2*1024*1024);
847 else
848 sdist = min(sdist, (aic->seek_mean * 4) + 2*1024*64);
849
850 aic->seek_samples = (7*aic->seek_samples + 256) / 8;
851 aic->seek_total = (7*aic->seek_total + (u64)256*sdist) / 8;
852 total = aic->seek_total + (aic->seek_samples/2);
853 do_div(total, aic->seek_samples);
854 aic->seek_mean = (sector_t)total;
855}
856
857/*
858 * as_update_iohist keeps a decaying histogram of IO thinktimes, and
859 * updates @aic->ttime_mean based on that. It is called when a new
860 * request is queued.
861 */
862static void as_update_iohist(struct as_data *ad, struct as_io_context *aic, struct request *rq)
863{
864 struct as_rq *arq = RQ_DATA(rq);
865 int data_dir = arq->is_sync;
866 unsigned long thinktime;
867 sector_t seek_dist;
868
869 if (aic == NULL)
870 return;
871
872 if (data_dir == REQ_SYNC) {
873 unsigned long in_flight = atomic_read(&aic->nr_queued)
874 + atomic_read(&aic->nr_dispatched);
875 spin_lock(&aic->lock);
876 if (test_bit(AS_TASK_IORUNNING, &aic->state) ||
877 test_bit(AS_TASK_IOSTARTED, &aic->state)) {
878 /* Calculate read -> read thinktime */
879 if (test_bit(AS_TASK_IORUNNING, &aic->state)
880 && in_flight == 0) {
881 thinktime = jiffies - aic->last_end_request;
882 thinktime = min(thinktime, MAX_THINKTIME-1);
883 } else
884 thinktime = 0;
885 as_update_thinktime(ad, aic, thinktime);
886
887 /* Calculate read -> read seek distance */
888 if (aic->last_request_pos < rq->sector)
889 seek_dist = rq->sector - aic->last_request_pos;
890 else
891 seek_dist = aic->last_request_pos - rq->sector;
892 as_update_seekdist(ad, aic, seek_dist);
893 }
894 aic->last_request_pos = rq->sector + rq->nr_sectors;
895 set_bit(AS_TASK_IOSTARTED, &aic->state);
896 spin_unlock(&aic->lock);
897 }
898}
899
900/* 915/*
901 * as_update_arq must be called whenever a request (arq) is added to 916 * as_update_arq must be called whenever a request (arq) is added to
902 * the sort_list. This function keeps caches up to date, and checks if the 917 * the sort_list. This function keeps caches up to date, and checks if the
@@ -1201,7 +1216,7 @@ static int as_dispatch_request(request_queue_t *q, int force)
1201 || ad->changed_batch) 1216 || ad->changed_batch)
1202 return 0; 1217 return 0;
1203 1218
1204 if (!(reads && writes && as_batch_expired(ad)) ) { 1219 if (!(reads && writes && as_batch_expired(ad))) {
1205 /* 1220 /*
1206 * batch is still running or no reads or no writes 1221 * batch is still running or no reads or no writes
1207 */ 1222 */
@@ -1316,7 +1331,8 @@ fifo_expired:
1316 * Add arq to a list behind alias 1331 * Add arq to a list behind alias
1317 */ 1332 */
1318static inline void 1333static inline void
1319as_add_aliased_request(struct as_data *ad, struct as_rq *arq, struct as_rq *alias) 1334as_add_aliased_request(struct as_data *ad, struct as_rq *arq,
1335 struct as_rq *alias)
1320{ 1336{
1321 struct request *req = arq->request; 1337 struct request *req = arq->request;
1322 struct list_head *insert = alias->request->queuelist.prev; 1338 struct list_head *insert = alias->request->queuelist.prev;
@@ -1441,8 +1457,8 @@ static int as_queue_empty(request_queue_t *q)
1441 && list_empty(&ad->fifo_list[REQ_SYNC]); 1457 && list_empty(&ad->fifo_list[REQ_SYNC]);
1442} 1458}
1443 1459
1444static struct request * 1460static struct request *as_former_request(request_queue_t *q,
1445as_former_request(request_queue_t *q, struct request *rq) 1461 struct request *rq)
1446{ 1462{
1447 struct as_rq *arq = RQ_DATA(rq); 1463 struct as_rq *arq = RQ_DATA(rq);
1448 struct rb_node *rbprev = rb_prev(&arq->rb_node); 1464 struct rb_node *rbprev = rb_prev(&arq->rb_node);
@@ -1454,8 +1470,8 @@ as_former_request(request_queue_t *q, struct request *rq)
1454 return ret; 1470 return ret;
1455} 1471}
1456 1472
1457static struct request * 1473static struct request *as_latter_request(request_queue_t *q,
1458as_latter_request(request_queue_t *q, struct request *rq) 1474 struct request *rq)
1459{ 1475{
1460 struct as_rq *arq = RQ_DATA(rq); 1476 struct as_rq *arq = RQ_DATA(rq);
1461 struct rb_node *rbnext = rb_next(&arq->rb_node); 1477 struct rb_node *rbnext = rb_next(&arq->rb_node);
@@ -1537,7 +1553,7 @@ static void as_merged_request(request_queue_t *q, struct request *req)
1537 * currently don't bother. Ditto the next function. 1553 * currently don't bother. Ditto the next function.
1538 */ 1554 */
1539 as_del_arq_rb(ad, arq); 1555 as_del_arq_rb(ad, arq);
1540 if ((alias = as_add_arq_rb(ad, arq)) ) { 1556 if ((alias = as_add_arq_rb(ad, arq))) {
1541 list_del_init(&arq->fifo); 1557 list_del_init(&arq->fifo);
1542 as_add_aliased_request(ad, arq, alias); 1558 as_add_aliased_request(ad, arq, alias);
1543 if (next_arq) 1559 if (next_arq)
@@ -1551,9 +1567,8 @@ static void as_merged_request(request_queue_t *q, struct request *req)
1551 } 1567 }
1552} 1568}
1553 1569
1554static void 1570static void as_merged_requests(request_queue_t *q, struct request *req,
1555as_merged_requests(request_queue_t *q, struct request *req, 1571 struct request *next)
1556 struct request *next)
1557{ 1572{
1558 struct as_data *ad = q->elevator->elevator_data; 1573 struct as_data *ad = q->elevator->elevator_data;
1559 struct as_rq *arq = RQ_DATA(req); 1574 struct as_rq *arq = RQ_DATA(req);
@@ -1576,7 +1591,7 @@ as_merged_requests(request_queue_t *q, struct request *req,
1576 next_arq = as_find_next_arq(ad, arq); 1591 next_arq = as_find_next_arq(ad, arq);
1577 1592
1578 as_del_arq_rb(ad, arq); 1593 as_del_arq_rb(ad, arq);
1579 if ((alias = as_add_arq_rb(ad, arq)) ) { 1594 if ((alias = as_add_arq_rb(ad, arq))) {
1580 list_del_init(&arq->fifo); 1595 list_del_init(&arq->fifo);
1581 as_add_aliased_request(ad, arq, alias); 1596 as_add_aliased_request(ad, arq, alias);
1582 if (next_arq) 1597 if (next_arq)
@@ -1806,9 +1821,14 @@ static ssize_t as_est_show(struct as_data *ad, char *page)
1806{ 1821{
1807 int pos = 0; 1822 int pos = 0;
1808 1823
1809 pos += sprintf(page+pos, "%lu %% exit probability\n", 100*ad->exit_prob/256); 1824 pos += sprintf(page+pos, "%lu %% exit probability\n",
1825 100*ad->exit_prob/256);
1826 pos += sprintf(page+pos, "%lu %% probability of exiting without a "
1827 "cooperating process submitting IO\n",
1828 100*ad->exit_no_coop/256);
1810 pos += sprintf(page+pos, "%lu ms new thinktime\n", ad->new_ttime_mean); 1829 pos += sprintf(page+pos, "%lu ms new thinktime\n", ad->new_ttime_mean);
1811 pos += sprintf(page+pos, "%llu sectors new seek distance\n", (unsigned long long)ad->new_seek_mean); 1830 pos += sprintf(page+pos, "%llu sectors new seek distance\n",
1831 (unsigned long long)ad->new_seek_mean);
1812 1832
1813 return pos; 1833 return pos;
1814} 1834}