aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/md/bcache/request.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/md/bcache/request.c')
-rw-r--r--drivers/md/bcache/request.c197
1 files changed, 79 insertions, 118 deletions
diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c
index e5ff12e52d5b..786a1a4f74d8 100644
--- a/drivers/md/bcache/request.c
+++ b/drivers/md/bcache/request.c
@@ -10,6 +10,7 @@
10#include "btree.h" 10#include "btree.h"
11#include "debug.h" 11#include "debug.h"
12#include "request.h" 12#include "request.h"
13#include "writeback.h"
13 14
14#include <linux/cgroup.h> 15#include <linux/cgroup.h>
15#include <linux/module.h> 16#include <linux/module.h>
@@ -21,8 +22,6 @@
21 22
22#define CUTOFF_CACHE_ADD 95 23#define CUTOFF_CACHE_ADD 95
23#define CUTOFF_CACHE_READA 90 24#define CUTOFF_CACHE_READA 90
24#define CUTOFF_WRITEBACK 50
25#define CUTOFF_WRITEBACK_SYNC 75
26 25
27struct kmem_cache *bch_search_cache; 26struct kmem_cache *bch_search_cache;
28 27
@@ -489,6 +488,12 @@ static void bch_insert_data_loop(struct closure *cl)
489 bch_queue_gc(op->c); 488 bch_queue_gc(op->c);
490 } 489 }
491 490
491 /*
492 * Journal writes are marked REQ_FLUSH; if the original write was a
493 * flush, it'll wait on the journal write.
494 */
495 bio->bi_rw &= ~(REQ_FLUSH|REQ_FUA);
496
492 do { 497 do {
493 unsigned i; 498 unsigned i;
494 struct bkey *k; 499 struct bkey *k;
@@ -510,10 +515,6 @@ static void bch_insert_data_loop(struct closure *cl)
510 goto err; 515 goto err;
511 516
512 n = bch_bio_split(bio, KEY_SIZE(k), GFP_NOIO, split); 517 n = bch_bio_split(bio, KEY_SIZE(k), GFP_NOIO, split);
513 if (!n) {
514 __bkey_put(op->c, k);
515 continue_at(cl, bch_insert_data_loop, bcache_wq);
516 }
517 518
518 n->bi_end_io = bch_insert_data_endio; 519 n->bi_end_io = bch_insert_data_endio;
519 n->bi_private = cl; 520 n->bi_private = cl;
@@ -530,10 +531,9 @@ static void bch_insert_data_loop(struct closure *cl)
530 if (KEY_CSUM(k)) 531 if (KEY_CSUM(k))
531 bio_csum(n, k); 532 bio_csum(n, k);
532 533
533 pr_debug("%s", pkey(k)); 534 trace_bcache_cache_insert(k);
534 bch_keylist_push(&op->keys); 535 bch_keylist_push(&op->keys);
535 536
536 trace_bcache_cache_insert(n, n->bi_sector, n->bi_bdev);
537 n->bi_rw |= REQ_WRITE; 537 n->bi_rw |= REQ_WRITE;
538 bch_submit_bbio(n, op->c, k, 0); 538 bch_submit_bbio(n, op->c, k, 0);
539 } while (n != bio); 539 } while (n != bio);
@@ -716,7 +716,7 @@ static struct search *search_alloc(struct bio *bio, struct bcache_device *d)
716 s->task = current; 716 s->task = current;
717 s->orig_bio = bio; 717 s->orig_bio = bio;
718 s->write = (bio->bi_rw & REQ_WRITE) != 0; 718 s->write = (bio->bi_rw & REQ_WRITE) != 0;
719 s->op.flush_journal = (bio->bi_rw & REQ_FLUSH) != 0; 719 s->op.flush_journal = (bio->bi_rw & (REQ_FLUSH|REQ_FUA)) != 0;
720 s->op.skip = (bio->bi_rw & REQ_DISCARD) != 0; 720 s->op.skip = (bio->bi_rw & REQ_DISCARD) != 0;
721 s->recoverable = 1; 721 s->recoverable = 1;
722 s->start_time = jiffies; 722 s->start_time = jiffies;
@@ -784,11 +784,8 @@ static void request_read_error(struct closure *cl)
784 int i; 784 int i;
785 785
786 if (s->recoverable) { 786 if (s->recoverable) {
787 /* The cache read failed, but we can retry from the backing 787 /* Retry from the backing device: */
788 * device. 788 trace_bcache_read_retry(s->orig_bio);
789 */
790 pr_debug("recovering at sector %llu",
791 (uint64_t) s->orig_bio->bi_sector);
792 789
793 s->error = 0; 790 s->error = 0;
794 bv = s->bio.bio.bi_io_vec; 791 bv = s->bio.bio.bi_io_vec;
@@ -806,7 +803,6 @@ static void request_read_error(struct closure *cl)
806 803
807 /* XXX: invalidate cache */ 804 /* XXX: invalidate cache */
808 805
809 trace_bcache_read_retry(&s->bio.bio);
810 closure_bio_submit(&s->bio.bio, &s->cl, s->d); 806 closure_bio_submit(&s->bio.bio, &s->cl, s->d);
811 } 807 }
812 808
@@ -827,53 +823,13 @@ static void request_read_done(struct closure *cl)
827 */ 823 */
828 824
829 if (s->op.cache_bio) { 825 if (s->op.cache_bio) {
830 struct bio_vec *src, *dst;
831 unsigned src_offset, dst_offset, bytes;
832 void *dst_ptr;
833
834 bio_reset(s->op.cache_bio); 826 bio_reset(s->op.cache_bio);
835 s->op.cache_bio->bi_sector = s->cache_miss->bi_sector; 827 s->op.cache_bio->bi_sector = s->cache_miss->bi_sector;
836 s->op.cache_bio->bi_bdev = s->cache_miss->bi_bdev; 828 s->op.cache_bio->bi_bdev = s->cache_miss->bi_bdev;
837 s->op.cache_bio->bi_size = s->cache_bio_sectors << 9; 829 s->op.cache_bio->bi_size = s->cache_bio_sectors << 9;
838 bch_bio_map(s->op.cache_bio, NULL); 830 bch_bio_map(s->op.cache_bio, NULL);
839 831
840 src = bio_iovec(s->op.cache_bio); 832 bio_copy_data(s->cache_miss, s->op.cache_bio);
841 dst = bio_iovec(s->cache_miss);
842 src_offset = src->bv_offset;
843 dst_offset = dst->bv_offset;
844 dst_ptr = kmap(dst->bv_page);
845
846 while (1) {
847 if (dst_offset == dst->bv_offset + dst->bv_len) {
848 kunmap(dst->bv_page);
849 dst++;
850 if (dst == bio_iovec_idx(s->cache_miss,
851 s->cache_miss->bi_vcnt))
852 break;
853
854 dst_offset = dst->bv_offset;
855 dst_ptr = kmap(dst->bv_page);
856 }
857
858 if (src_offset == src->bv_offset + src->bv_len) {
859 src++;
860 if (src == bio_iovec_idx(s->op.cache_bio,
861 s->op.cache_bio->bi_vcnt))
862 BUG();
863
864 src_offset = src->bv_offset;
865 }
866
867 bytes = min(dst->bv_offset + dst->bv_len - dst_offset,
868 src->bv_offset + src->bv_len - src_offset);
869
870 memcpy(dst_ptr + dst_offset,
871 page_address(src->bv_page) + src_offset,
872 bytes);
873
874 src_offset += bytes;
875 dst_offset += bytes;
876 }
877 833
878 bio_put(s->cache_miss); 834 bio_put(s->cache_miss);
879 s->cache_miss = NULL; 835 s->cache_miss = NULL;
@@ -899,6 +855,7 @@ static void request_read_done_bh(struct closure *cl)
899 struct cached_dev *dc = container_of(s->d, struct cached_dev, disk); 855 struct cached_dev *dc = container_of(s->d, struct cached_dev, disk);
900 856
901 bch_mark_cache_accounting(s, !s->cache_miss, s->op.skip); 857 bch_mark_cache_accounting(s, !s->cache_miss, s->op.skip);
858 trace_bcache_read(s->orig_bio, !s->cache_miss, s->op.skip);
902 859
903 if (s->error) 860 if (s->error)
904 continue_at_nobarrier(cl, request_read_error, bcache_wq); 861 continue_at_nobarrier(cl, request_read_error, bcache_wq);
@@ -917,9 +874,6 @@ static int cached_dev_cache_miss(struct btree *b, struct search *s,
917 struct bio *miss; 874 struct bio *miss;
918 875
919 miss = bch_bio_split(bio, sectors, GFP_NOIO, s->d->bio_split); 876 miss = bch_bio_split(bio, sectors, GFP_NOIO, s->d->bio_split);
920 if (!miss)
921 return -EAGAIN;
922
923 if (miss == bio) 877 if (miss == bio)
924 s->op.lookup_done = true; 878 s->op.lookup_done = true;
925 879
@@ -938,8 +892,9 @@ static int cached_dev_cache_miss(struct btree *b, struct search *s,
938 reada = min(dc->readahead >> 9, 892 reada = min(dc->readahead >> 9,
939 sectors - bio_sectors(miss)); 893 sectors - bio_sectors(miss));
940 894
941 if (bio_end(miss) + reada > bdev_sectors(miss->bi_bdev)) 895 if (bio_end_sector(miss) + reada > bdev_sectors(miss->bi_bdev))
942 reada = bdev_sectors(miss->bi_bdev) - bio_end(miss); 896 reada = bdev_sectors(miss->bi_bdev) -
897 bio_end_sector(miss);
943 } 898 }
944 899
945 s->cache_bio_sectors = bio_sectors(miss) + reada; 900 s->cache_bio_sectors = bio_sectors(miss) + reada;
@@ -963,13 +918,12 @@ static int cached_dev_cache_miss(struct btree *b, struct search *s,
963 goto out_put; 918 goto out_put;
964 919
965 bch_bio_map(s->op.cache_bio, NULL); 920 bch_bio_map(s->op.cache_bio, NULL);
966 if (bch_bio_alloc_pages(s->op.cache_bio, __GFP_NOWARN|GFP_NOIO)) 921 if (bio_alloc_pages(s->op.cache_bio, __GFP_NOWARN|GFP_NOIO))
967 goto out_put; 922 goto out_put;
968 923
969 s->cache_miss = miss; 924 s->cache_miss = miss;
970 bio_get(s->op.cache_bio); 925 bio_get(s->op.cache_bio);
971 926
972 trace_bcache_cache_miss(s->orig_bio);
973 closure_bio_submit(s->op.cache_bio, &s->cl, s->d); 927 closure_bio_submit(s->op.cache_bio, &s->cl, s->d);
974 928
975 return ret; 929 return ret;
@@ -1002,24 +956,13 @@ static void cached_dev_write_complete(struct closure *cl)
1002 cached_dev_bio_complete(cl); 956 cached_dev_bio_complete(cl);
1003} 957}
1004 958
1005static bool should_writeback(struct cached_dev *dc, struct bio *bio)
1006{
1007 unsigned threshold = (bio->bi_rw & REQ_SYNC)
1008 ? CUTOFF_WRITEBACK_SYNC
1009 : CUTOFF_WRITEBACK;
1010
1011 return !atomic_read(&dc->disk.detaching) &&
1012 cache_mode(dc, bio) == CACHE_MODE_WRITEBACK &&
1013 dc->disk.c->gc_stats.in_use < threshold;
1014}
1015
1016static void request_write(struct cached_dev *dc, struct search *s) 959static void request_write(struct cached_dev *dc, struct search *s)
1017{ 960{
1018 struct closure *cl = &s->cl; 961 struct closure *cl = &s->cl;
1019 struct bio *bio = &s->bio.bio; 962 struct bio *bio = &s->bio.bio;
1020 struct bkey start, end; 963 struct bkey start, end;
1021 start = KEY(dc->disk.id, bio->bi_sector, 0); 964 start = KEY(dc->disk.id, bio->bi_sector, 0);
1022 end = KEY(dc->disk.id, bio_end(bio), 0); 965 end = KEY(dc->disk.id, bio_end_sector(bio), 0);
1023 966
1024 bch_keybuf_check_overlapping(&s->op.c->moving_gc_keys, &start, &end); 967 bch_keybuf_check_overlapping(&s->op.c->moving_gc_keys, &start, &end);
1025 968
@@ -1034,22 +977,37 @@ static void request_write(struct cached_dev *dc, struct search *s)
1034 if (bio->bi_rw & REQ_DISCARD) 977 if (bio->bi_rw & REQ_DISCARD)
1035 goto skip; 978 goto skip;
1036 979
980 if (should_writeback(dc, s->orig_bio,
981 cache_mode(dc, bio),
982 s->op.skip)) {
983 s->op.skip = false;
984 s->writeback = true;
985 }
986
1037 if (s->op.skip) 987 if (s->op.skip)
1038 goto skip; 988 goto skip;
1039 989
1040 if (should_writeback(dc, s->orig_bio)) 990 trace_bcache_write(s->orig_bio, s->writeback, s->op.skip);
1041 s->writeback = true;
1042 991
1043 if (!s->writeback) { 992 if (!s->writeback) {
1044 s->op.cache_bio = bio_clone_bioset(bio, GFP_NOIO, 993 s->op.cache_bio = bio_clone_bioset(bio, GFP_NOIO,
1045 dc->disk.bio_split); 994 dc->disk.bio_split);
1046 995
1047 trace_bcache_writethrough(s->orig_bio);
1048 closure_bio_submit(bio, cl, s->d); 996 closure_bio_submit(bio, cl, s->d);
1049 } else { 997 } else {
1050 s->op.cache_bio = bio; 998 bch_writeback_add(dc);
1051 trace_bcache_writeback(s->orig_bio); 999
1052 bch_writeback_add(dc, bio_sectors(bio)); 1000 if (s->op.flush_journal) {
1001 /* Also need to send a flush to the backing device */
1002 s->op.cache_bio = bio_clone_bioset(bio, GFP_NOIO,
1003 dc->disk.bio_split);
1004
1005 bio->bi_size = 0;
1006 bio->bi_vcnt = 0;
1007 closure_bio_submit(bio, cl, s->d);
1008 } else {
1009 s->op.cache_bio = bio;
1010 }
1053 } 1011 }
1054out: 1012out:
1055 closure_call(&s->op.cl, bch_insert_data, NULL, cl); 1013 closure_call(&s->op.cl, bch_insert_data, NULL, cl);
@@ -1058,7 +1016,6 @@ skip:
1058 s->op.skip = true; 1016 s->op.skip = true;
1059 s->op.cache_bio = s->orig_bio; 1017 s->op.cache_bio = s->orig_bio;
1060 bio_get(s->op.cache_bio); 1018 bio_get(s->op.cache_bio);
1061 trace_bcache_write_skip(s->orig_bio);
1062 1019
1063 if ((bio->bi_rw & REQ_DISCARD) && 1020 if ((bio->bi_rw & REQ_DISCARD) &&
1064 !blk_queue_discard(bdev_get_queue(dc->bdev))) 1021 !blk_queue_discard(bdev_get_queue(dc->bdev)))
@@ -1088,9 +1045,10 @@ static void request_nodata(struct cached_dev *dc, struct search *s)
1088 1045
1089/* Cached devices - read & write stuff */ 1046/* Cached devices - read & write stuff */
1090 1047
1091int bch_get_congested(struct cache_set *c) 1048unsigned bch_get_congested(struct cache_set *c)
1092{ 1049{
1093 int i; 1050 int i;
1051 long rand;
1094 1052
1095 if (!c->congested_read_threshold_us && 1053 if (!c->congested_read_threshold_us &&
1096 !c->congested_write_threshold_us) 1054 !c->congested_write_threshold_us)
@@ -1106,7 +1064,13 @@ int bch_get_congested(struct cache_set *c)
1106 1064
1107 i += CONGESTED_MAX; 1065 i += CONGESTED_MAX;
1108 1066
1109 return i <= 0 ? 1 : fract_exp_two(i, 6); 1067 if (i > 0)
1068 i = fract_exp_two(i, 6);
1069
1070 rand = get_random_int();
1071 i -= bitmap_weight(&rand, BITS_PER_LONG);
1072
1073 return i > 0 ? i : 1;
1110} 1074}
1111 1075
1112static void add_sequential(struct task_struct *t) 1076static void add_sequential(struct task_struct *t)
@@ -1126,10 +1090,8 @@ static void check_should_skip(struct cached_dev *dc, struct search *s)
1126{ 1090{
1127 struct cache_set *c = s->op.c; 1091 struct cache_set *c = s->op.c;
1128 struct bio *bio = &s->bio.bio; 1092 struct bio *bio = &s->bio.bio;
1129
1130 long rand;
1131 int cutoff = bch_get_congested(c);
1132 unsigned mode = cache_mode(dc, bio); 1093 unsigned mode = cache_mode(dc, bio);
1094 unsigned sectors, congested = bch_get_congested(c);
1133 1095
1134 if (atomic_read(&dc->disk.detaching) || 1096 if (atomic_read(&dc->disk.detaching) ||
1135 c->gc_stats.in_use > CUTOFF_CACHE_ADD || 1097 c->gc_stats.in_use > CUTOFF_CACHE_ADD ||
@@ -1147,17 +1109,14 @@ static void check_should_skip(struct cached_dev *dc, struct search *s)
1147 goto skip; 1109 goto skip;
1148 } 1110 }
1149 1111
1150 if (!cutoff) { 1112 if (!congested && !dc->sequential_cutoff)
1151 cutoff = dc->sequential_cutoff >> 9; 1113 goto rescale;
1152 1114
1153 if (!cutoff) 1115 if (!congested &&
1154 goto rescale; 1116 mode == CACHE_MODE_WRITEBACK &&
1155 1117 (bio->bi_rw & REQ_WRITE) &&
1156 if (mode == CACHE_MODE_WRITEBACK && 1118 (bio->bi_rw & REQ_SYNC))
1157 (bio->bi_rw & REQ_WRITE) && 1119 goto rescale;
1158 (bio->bi_rw & REQ_SYNC))
1159 goto rescale;
1160 }
1161 1120
1162 if (dc->sequential_merge) { 1121 if (dc->sequential_merge) {
1163 struct io *i; 1122 struct io *i;
@@ -1177,7 +1136,7 @@ found:
1177 if (i->sequential + bio->bi_size > i->sequential) 1136 if (i->sequential + bio->bi_size > i->sequential)
1178 i->sequential += bio->bi_size; 1137 i->sequential += bio->bi_size;
1179 1138
1180 i->last = bio_end(bio); 1139 i->last = bio_end_sector(bio);
1181 i->jiffies = jiffies + msecs_to_jiffies(5000); 1140 i->jiffies = jiffies + msecs_to_jiffies(5000);
1182 s->task->sequential_io = i->sequential; 1141 s->task->sequential_io = i->sequential;
1183 1142
@@ -1192,12 +1151,19 @@ found:
1192 add_sequential(s->task); 1151 add_sequential(s->task);
1193 } 1152 }
1194 1153
1195 rand = get_random_int(); 1154 sectors = max(s->task->sequential_io,
1196 cutoff -= bitmap_weight(&rand, BITS_PER_LONG); 1155 s->task->sequential_io_avg) >> 9;
1197 1156
1198 if (cutoff <= (int) (max(s->task->sequential_io, 1157 if (dc->sequential_cutoff &&
1199 s->task->sequential_io_avg) >> 9)) 1158 sectors >= dc->sequential_cutoff >> 9) {
1159 trace_bcache_bypass_sequential(s->orig_bio);
1200 goto skip; 1160 goto skip;
1161 }
1162
1163 if (congested && sectors >= congested) {
1164 trace_bcache_bypass_congested(s->orig_bio);
1165 goto skip;
1166 }
1201 1167
1202rescale: 1168rescale:
1203 bch_rescale_priorities(c, bio_sectors(bio)); 1169 bch_rescale_priorities(c, bio_sectors(bio));
@@ -1288,30 +1254,25 @@ void bch_cached_dev_request_init(struct cached_dev *dc)
1288static int flash_dev_cache_miss(struct btree *b, struct search *s, 1254static int flash_dev_cache_miss(struct btree *b, struct search *s,
1289 struct bio *bio, unsigned sectors) 1255 struct bio *bio, unsigned sectors)
1290{ 1256{
1257 struct bio_vec *bv;
1258 int i;
1259
1291 /* Zero fill bio */ 1260 /* Zero fill bio */
1292 1261
1293 while (bio->bi_idx != bio->bi_vcnt) { 1262 bio_for_each_segment(bv, bio, i) {
1294 struct bio_vec *bv = bio_iovec(bio);
1295 unsigned j = min(bv->bv_len >> 9, sectors); 1263 unsigned j = min(bv->bv_len >> 9, sectors);
1296 1264
1297 void *p = kmap(bv->bv_page); 1265 void *p = kmap(bv->bv_page);
1298 memset(p + bv->bv_offset, 0, j << 9); 1266 memset(p + bv->bv_offset, 0, j << 9);
1299 kunmap(bv->bv_page); 1267 kunmap(bv->bv_page);
1300 1268
1301 bv->bv_len -= j << 9; 1269 sectors -= j;
1302 bv->bv_offset += j << 9;
1303
1304 if (bv->bv_len)
1305 return 0;
1306
1307 bio->bi_sector += j;
1308 bio->bi_size -= j << 9;
1309
1310 bio->bi_idx++;
1311 sectors -= j;
1312 } 1270 }
1313 1271
1314 s->op.lookup_done = true; 1272 bio_advance(bio, min(sectors << 9, bio->bi_size));
1273
1274 if (!bio->bi_size)
1275 s->op.lookup_done = true;
1315 1276
1316 return 0; 1277 return 0;
1317} 1278}
@@ -1338,8 +1299,8 @@ static void flash_dev_make_request(struct request_queue *q, struct bio *bio)
1338 closure_call(&s->op.cl, btree_read_async, NULL, cl); 1299 closure_call(&s->op.cl, btree_read_async, NULL, cl);
1339 } else if (bio_has_data(bio) || s->op.skip) { 1300 } else if (bio_has_data(bio) || s->op.skip) {
1340 bch_keybuf_check_overlapping(&s->op.c->moving_gc_keys, 1301 bch_keybuf_check_overlapping(&s->op.c->moving_gc_keys,
1341 &KEY(d->id, bio->bi_sector, 0), 1302 &KEY(d->id, bio->bi_sector, 0),
1342 &KEY(d->id, bio_end(bio), 0)); 1303 &KEY(d->id, bio_end_sector(bio), 0));
1343 1304
1344 s->writeback = true; 1305 s->writeback = true;
1345 s->op.cache_bio = bio; 1306 s->op.cache_bio = bio;