diff options
author | Jens Axboe <jens.axboe@oracle.com> | 2007-08-07 03:02:51 -0400 |
---|---|---|
committer | Jens Axboe <jens.axboe@oracle.com> | 2007-10-16 05:12:53 -0400 |
commit | a8474ce23a73185dd2bae4c884b1716474032d31 (patch) | |
tree | 63501846b8aaef02579a868f6d5118b6a07c4a5e | |
parent | 0cde8d9510e242c73b2d68f9949cd3c456c863b4 (diff) |
SCSI: support for allocating large scatterlists
This is what enables large commands. If we need to allocate an
sgtable that doesn't fit in a single page, allocate several
SCSI_MAX_SG_SEGMENTS sized tables and chain them together.
SCSI defaults to large chained sg tables, if the arch supports it.
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
-rw-r--r-- | drivers/scsi/scsi_lib.c | 209 | ||||
-rw-r--r-- | include/scsi/scsi.h | 7 | ||||
-rw-r--r-- | include/scsi/scsi_cmnd.h | 1 |
3 files changed, 163 insertions, 54 deletions
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index 7e1dcc57f115..c75cb6ad6d94 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c | |||
@@ -36,33 +36,19 @@ | |||
36 | 36 | ||
37 | struct scsi_host_sg_pool { | 37 | struct scsi_host_sg_pool { |
38 | size_t size; | 38 | size_t size; |
39 | char *name; | 39 | char *name; |
40 | struct kmem_cache *slab; | 40 | struct kmem_cache *slab; |
41 | mempool_t *pool; | 41 | mempool_t *pool; |
42 | }; | 42 | }; |
43 | 43 | ||
44 | #if (SCSI_MAX_PHYS_SEGMENTS < 32) | 44 | #define SP(x) { x, "sgpool-" #x } |
45 | #error SCSI_MAX_PHYS_SEGMENTS is too small | ||
46 | #endif | ||
47 | |||
48 | #define SP(x) { x, "sgpool-" #x } | ||
49 | static struct scsi_host_sg_pool scsi_sg_pools[] = { | 45 | static struct scsi_host_sg_pool scsi_sg_pools[] = { |
50 | SP(8), | 46 | SP(8), |
51 | SP(16), | 47 | SP(16), |
52 | SP(32), | 48 | SP(32), |
53 | #if (SCSI_MAX_PHYS_SEGMENTS > 32) | ||
54 | SP(64), | 49 | SP(64), |
55 | #if (SCSI_MAX_PHYS_SEGMENTS > 64) | ||
56 | SP(128), | 50 | SP(128), |
57 | #if (SCSI_MAX_PHYS_SEGMENTS > 128) | 51 | }; |
58 | SP(256), | ||
59 | #if (SCSI_MAX_PHYS_SEGMENTS > 256) | ||
60 | #error SCSI_MAX_PHYS_SEGMENTS is too large | ||
61 | #endif | ||
62 | #endif | ||
63 | #endif | ||
64 | #endif | ||
65 | }; | ||
66 | #undef SP | 52 | #undef SP |
67 | 53 | ||
68 | static void scsi_run_queue(struct request_queue *q); | 54 | static void scsi_run_queue(struct request_queue *q); |
@@ -698,45 +684,126 @@ static struct scsi_cmnd *scsi_end_request(struct scsi_cmnd *cmd, int uptodate, | |||
698 | return NULL; | 684 | return NULL; |
699 | } | 685 | } |
700 | 686 | ||
701 | struct scatterlist *scsi_alloc_sgtable(struct scsi_cmnd *cmd, gfp_t gfp_mask) | 687 | /* |
702 | { | 688 | * The maximum number of SG segments that we will put inside a scatterlist |
703 | struct scsi_host_sg_pool *sgp; | 689 | * (unless chaining is used). Should ideally fit inside a single page, to |
704 | struct scatterlist *sgl; | 690 | * avoid a higher order allocation. |
691 | */ | ||
692 | #define SCSI_MAX_SG_SEGMENTS 128 | ||
705 | 693 | ||
706 | BUG_ON(!cmd->use_sg); | 694 | /* |
695 | * Like SCSI_MAX_SG_SEGMENTS, but for archs that have sg chaining. This limit | ||
696 | * is totally arbitrary, a setting of 2048 will get you at least 8mb ios. | ||
697 | */ | ||
698 | #define SCSI_MAX_SG_CHAIN_SEGMENTS 2048 | ||
707 | 699 | ||
708 | switch (cmd->use_sg) { | 700 | static inline unsigned int scsi_sgtable_index(unsigned short nents) |
701 | { | ||
702 | unsigned int index; | ||
703 | |||
704 | switch (nents) { | ||
709 | case 1 ... 8: | 705 | case 1 ... 8: |
710 | cmd->sglist_len = 0; | 706 | index = 0; |
711 | break; | 707 | break; |
712 | case 9 ... 16: | 708 | case 9 ... 16: |
713 | cmd->sglist_len = 1; | 709 | index = 1; |
714 | break; | 710 | break; |
715 | case 17 ... 32: | 711 | case 17 ... 32: |
716 | cmd->sglist_len = 2; | 712 | index = 2; |
717 | break; | 713 | break; |
718 | #if (SCSI_MAX_PHYS_SEGMENTS > 32) | ||
719 | case 33 ... 64: | 714 | case 33 ... 64: |
720 | cmd->sglist_len = 3; | 715 | index = 3; |
721 | break; | 716 | break; |
722 | #if (SCSI_MAX_PHYS_SEGMENTS > 64) | 717 | case 65 ... SCSI_MAX_SG_SEGMENTS: |
723 | case 65 ... 128: | 718 | index = 4; |
724 | cmd->sglist_len = 4; | ||
725 | break; | 719 | break; |
726 | #if (SCSI_MAX_PHYS_SEGMENTS > 128) | ||
727 | case 129 ... 256: | ||
728 | cmd->sglist_len = 5; | ||
729 | break; | ||
730 | #endif | ||
731 | #endif | ||
732 | #endif | ||
733 | default: | 720 | default: |
734 | return NULL; | 721 | printk(KERN_ERR "scsi: bad segment count=%d\n", nents); |
722 | BUG(); | ||
735 | } | 723 | } |
736 | 724 | ||
737 | sgp = scsi_sg_pools + cmd->sglist_len; | 725 | return index; |
738 | sgl = mempool_alloc(sgp->pool, gfp_mask); | 726 | } |
739 | return sgl; | 727 | |
728 | struct scatterlist *scsi_alloc_sgtable(struct scsi_cmnd *cmd, gfp_t gfp_mask) | ||
729 | { | ||
730 | struct scsi_host_sg_pool *sgp; | ||
731 | struct scatterlist *sgl, *prev, *ret; | ||
732 | unsigned int index; | ||
733 | int this, left; | ||
734 | |||
735 | BUG_ON(!cmd->use_sg); | ||
736 | |||
737 | left = cmd->use_sg; | ||
738 | ret = prev = NULL; | ||
739 | do { | ||
740 | this = left; | ||
741 | if (this > SCSI_MAX_SG_SEGMENTS) { | ||
742 | this = SCSI_MAX_SG_SEGMENTS - 1; | ||
743 | index = SG_MEMPOOL_NR - 1; | ||
744 | } else | ||
745 | index = scsi_sgtable_index(this); | ||
746 | |||
747 | left -= this; | ||
748 | |||
749 | sgp = scsi_sg_pools + index; | ||
750 | |||
751 | sgl = mempool_alloc(sgp->pool, gfp_mask); | ||
752 | if (unlikely(!sgl)) | ||
753 | goto enomem; | ||
754 | |||
755 | memset(sgl, 0, sizeof(*sgl) * sgp->size); | ||
756 | |||
757 | /* | ||
758 | * first loop through, set initial index and return value | ||
759 | */ | ||
760 | if (!ret) { | ||
761 | cmd->sglist_len = index; | ||
762 | ret = sgl; | ||
763 | } | ||
764 | |||
765 | /* | ||
766 | * chain previous sglist, if any. we know the previous | ||
767 | * sglist must be the biggest one, or we would not have | ||
768 | * ended up doing another loop. | ||
769 | */ | ||
770 | if (prev) | ||
771 | sg_chain(prev, SCSI_MAX_SG_SEGMENTS, sgl); | ||
772 | |||
773 | /* | ||
774 | * don't allow subsequent mempool allocs to sleep, it would | ||
775 | * violate the mempool principle. | ||
776 | */ | ||
777 | gfp_mask &= ~__GFP_WAIT; | ||
778 | gfp_mask |= __GFP_HIGH; | ||
779 | prev = sgl; | ||
780 | } while (left); | ||
781 | |||
782 | /* | ||
783 | * ->use_sg may get modified after dma mapping has potentially | ||
784 | * shrunk the number of segments, so keep a copy of it for free. | ||
785 | */ | ||
786 | cmd->__use_sg = cmd->use_sg; | ||
787 | return ret; | ||
788 | enomem: | ||
789 | if (ret) { | ||
790 | /* | ||
791 | * Free entries chained off ret. Since we were trying to | ||
792 | * allocate another sglist, we know that all entries are of | ||
793 | * the max size. | ||
794 | */ | ||
795 | sgp = scsi_sg_pools + SG_MEMPOOL_NR - 1; | ||
796 | prev = ret; | ||
797 | ret = &ret[SCSI_MAX_SG_SEGMENTS - 1]; | ||
798 | |||
799 | while ((sgl = sg_chain_ptr(ret)) != NULL) { | ||
800 | ret = &sgl[SCSI_MAX_SG_SEGMENTS - 1]; | ||
801 | mempool_free(sgl, sgp->pool); | ||
802 | } | ||
803 | |||
804 | mempool_free(prev, sgp->pool); | ||
805 | } | ||
806 | return NULL; | ||
740 | } | 807 | } |
741 | 808 | ||
742 | EXPORT_SYMBOL(scsi_alloc_sgtable); | 809 | EXPORT_SYMBOL(scsi_alloc_sgtable); |
@@ -748,6 +815,42 @@ void scsi_free_sgtable(struct scsi_cmnd *cmd) | |||
748 | 815 | ||
749 | BUG_ON(cmd->sglist_len >= SG_MEMPOOL_NR); | 816 | BUG_ON(cmd->sglist_len >= SG_MEMPOOL_NR); |
750 | 817 | ||
818 | /* | ||
819 | * if this is the biggest size sglist, check if we have | ||
820 | * chained parts we need to free | ||
821 | */ | ||
822 | if (cmd->__use_sg > SCSI_MAX_SG_SEGMENTS) { | ||
823 | unsigned short this, left; | ||
824 | struct scatterlist *next; | ||
825 | unsigned int index; | ||
826 | |||
827 | left = cmd->__use_sg - (SCSI_MAX_SG_SEGMENTS - 1); | ||
828 | next = sg_chain_ptr(&sgl[SCSI_MAX_SG_SEGMENTS - 1]); | ||
829 | while (left && next) { | ||
830 | sgl = next; | ||
831 | this = left; | ||
832 | if (this > SCSI_MAX_SG_SEGMENTS) { | ||
833 | this = SCSI_MAX_SG_SEGMENTS - 1; | ||
834 | index = SG_MEMPOOL_NR - 1; | ||
835 | } else | ||
836 | index = scsi_sgtable_index(this); | ||
837 | |||
838 | left -= this; | ||
839 | |||
840 | sgp = scsi_sg_pools + index; | ||
841 | |||
842 | if (left) | ||
843 | next = sg_chain_ptr(&sgl[sgp->size - 1]); | ||
844 | |||
845 | mempool_free(sgl, sgp->pool); | ||
846 | } | ||
847 | |||
848 | /* | ||
849 | * Restore original, will be freed below | ||
850 | */ | ||
851 | sgl = cmd->request_buffer; | ||
852 | } | ||
853 | |||
751 | sgp = scsi_sg_pools + cmd->sglist_len; | 854 | sgp = scsi_sg_pools + cmd->sglist_len; |
752 | mempool_free(sgl, sgp->pool); | 855 | mempool_free(sgl, sgp->pool); |
753 | } | 856 | } |
@@ -988,7 +1091,6 @@ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes) | |||
988 | static int scsi_init_io(struct scsi_cmnd *cmd) | 1091 | static int scsi_init_io(struct scsi_cmnd *cmd) |
989 | { | 1092 | { |
990 | struct request *req = cmd->request; | 1093 | struct request *req = cmd->request; |
991 | struct scatterlist *sgpnt; | ||
992 | int count; | 1094 | int count; |
993 | 1095 | ||
994 | /* | 1096 | /* |
@@ -1001,14 +1103,13 @@ static int scsi_init_io(struct scsi_cmnd *cmd) | |||
1001 | /* | 1103 | /* |
1002 | * If sg table allocation fails, requeue request later. | 1104 | * If sg table allocation fails, requeue request later. |
1003 | */ | 1105 | */ |
1004 | sgpnt = scsi_alloc_sgtable(cmd, GFP_ATOMIC); | 1106 | cmd->request_buffer = scsi_alloc_sgtable(cmd, GFP_ATOMIC); |
1005 | if (unlikely(!sgpnt)) { | 1107 | if (unlikely(!cmd->request_buffer)) { |
1006 | scsi_unprep_request(req); | 1108 | scsi_unprep_request(req); |
1007 | return BLKPREP_DEFER; | 1109 | return BLKPREP_DEFER; |
1008 | } | 1110 | } |
1009 | 1111 | ||
1010 | req->buffer = NULL; | 1112 | req->buffer = NULL; |
1011 | cmd->request_buffer = (char *) sgpnt; | ||
1012 | if (blk_pc_request(req)) | 1113 | if (blk_pc_request(req)) |
1013 | cmd->request_bufflen = req->data_len; | 1114 | cmd->request_bufflen = req->data_len; |
1014 | else | 1115 | else |
@@ -1533,8 +1634,22 @@ struct request_queue *__scsi_alloc_queue(struct Scsi_Host *shost, | |||
1533 | if (!q) | 1634 | if (!q) |
1534 | return NULL; | 1635 | return NULL; |
1535 | 1636 | ||
1637 | /* | ||
1638 | * this limit is imposed by hardware restrictions | ||
1639 | */ | ||
1536 | blk_queue_max_hw_segments(q, shost->sg_tablesize); | 1640 | blk_queue_max_hw_segments(q, shost->sg_tablesize); |
1537 | blk_queue_max_phys_segments(q, SCSI_MAX_PHYS_SEGMENTS); | 1641 | |
1642 | /* | ||
1643 | * In the future, sg chaining support will be mandatory and this | ||
1644 | * ifdef can then go away. Right now we don't have all archs | ||
1645 | * converted, so better keep it safe. | ||
1646 | */ | ||
1647 | #ifdef ARCH_HAS_SG_CHAIN | ||
1648 | blk_queue_max_phys_segments(q, SCSI_MAX_SG_CHAIN_SEGMENTS); | ||
1649 | #else | ||
1650 | blk_queue_max_phys_segments(q, SCSI_MAX_SG_SEGMENTS); | ||
1651 | #endif | ||
1652 | |||
1538 | blk_queue_max_sectors(q, shost->max_sectors); | 1653 | blk_queue_max_sectors(q, shost->max_sectors); |
1539 | blk_queue_bounce_limit(q, scsi_calculate_bounce_limit(shost)); | 1654 | blk_queue_bounce_limit(q, scsi_calculate_bounce_limit(shost)); |
1540 | blk_queue_segment_boundary(q, shost->dma_boundary); | 1655 | blk_queue_segment_boundary(q, shost->dma_boundary); |
diff --git a/include/scsi/scsi.h b/include/scsi/scsi.h index 9f8f80ab0c8b..702fcfeb37f1 100644 --- a/include/scsi/scsi.h +++ b/include/scsi/scsi.h | |||
@@ -11,13 +11,6 @@ | |||
11 | #include <linux/types.h> | 11 | #include <linux/types.h> |
12 | 12 | ||
13 | /* | 13 | /* |
14 | * The maximum sg list length SCSI can cope with | ||
15 | * (currently must be a power of 2 between 32 and 256) | ||
16 | */ | ||
17 | #define SCSI_MAX_PHYS_SEGMENTS MAX_PHYS_SEGMENTS | ||
18 | |||
19 | |||
20 | /* | ||
21 | * SCSI command lengths | 14 | * SCSI command lengths |
22 | */ | 15 | */ |
23 | 16 | ||
diff --git a/include/scsi/scsi_cmnd.h b/include/scsi/scsi_cmnd.h index a5439a4b8f97..4a5fa2d1c44c 100644 --- a/include/scsi/scsi_cmnd.h +++ b/include/scsi/scsi_cmnd.h | |||
@@ -70,6 +70,7 @@ struct scsi_cmnd { | |||
70 | /* These elements define the operation we ultimately want to perform */ | 70 | /* These elements define the operation we ultimately want to perform */ |
71 | unsigned short use_sg; /* Number of pieces of scatter-gather */ | 71 | unsigned short use_sg; /* Number of pieces of scatter-gather */ |
72 | unsigned short sglist_len; /* size of malloc'd scatter-gather list */ | 72 | unsigned short sglist_len; /* size of malloc'd scatter-gather list */ |
73 | unsigned short __use_sg; | ||
73 | 74 | ||
74 | unsigned underflow; /* Return error if less than | 75 | unsigned underflow; /* Return error if less than |
75 | this amount is transferred */ | 76 | this amount is transferred */ |