summaryrefslogtreecommitdiffstats
path: root/block/bfq-iosched.c
diff options
context:
space:
mode:
authorPaolo Valente <paolo.valente@linaro.org>2017-12-15 01:23:12 -0500
committerJens Axboe <axboe@kernel.dk>2018-01-05 11:31:19 -0500
commita34b024448eb71b0e51ad011fa1862236e366034 (patch)
tree1b8dfea18fe25a61e711792afb1d877247a14df5 /block/bfq-iosched.c
parent4403e4e467c365b4189e3e3d3ad35cf67b8c36ed (diff)
block, bfq: consider also past I/O in soft real-time detection
BFQ privileges the I/O of soft real-time applications, such as video players, to guarantee to these application a high bandwidth and a low latency. In this respect, it is not easy to correctly detect when an application is soft real-time. A particularly nasty false positive is that of an I/O-bound application that occasionally happens to meet all requirements to be deemed as soft real-time. After being detected as soft real-time, such an application monopolizes the device. Fortunately, BFQ will realize soon that the application is actually not soft real-time and suspend every privilege. Yet, the application may happen again to be wrongly detected as soft real-time, and so on. As highlighted by our tests, this problem causes BFQ to occasionally fail to guarantee a high responsiveness, in the presence of heavy background I/O workloads. The reason is that the background workload happens to be detected as soft real-time, more or less frequently, during the execution of the interactive task under test. To give an idea, because of this problem, Libreoffice Writer occasionally takes 8 seconds, instead of 3, to start up, if there are sequential reads and writes in the background, on a Kingston SSDNow V300. This commit addresses this issue by leveraging the following facts. The reason why some applications are detected as soft real-time despite all BFQ checks to avoid false positives, is simply that, during high CPU or storage-device load, I/O-bound applications may happen to do I/O slowly enough to meet all soft real-time requirements, and pass all BFQ extra checks. Yet, this happens only for limited time periods: slow-speed time intervals are usually interspersed between other time intervals during which these applications do I/O at a very high speed. To exploit these facts, this commit introduces a little change, in the detection of soft real-time behavior, to systematically consider also the recent past: the higher the speed was in the recent past, the later next I/O should arrive for the application to be considered as soft real-time. At the beginning of a slow-speed interval, the minimum arrival time allowed for the next I/O usually happens to still be so high, to fall *after* the end of the slow-speed period itself. As a consequence, the application does not risk to be deemed as soft real-time during the slow-speed interval. Then, during the next high-speed interval, the application cannot, evidently, be deemed as soft real-time (exactly because of its speed), and so on. This extra filtering proved to be rather effective: in the above test, the frequency of false positives became so low that the start-up time was 3 seconds in all iterations (apart from occasional outliers, caused by page-cache-management issues, which are out of the scope of this commit, and cannot be solved by an I/O scheduler). Tested-by: Lee Tibbert <lee.tibbert@gmail.com> Signed-off-by: Paolo Valente <paolo.valente@linaro.org> Signed-off-by: Angelo Ruocco <angeloruocco90@gmail.com> Signed-off-by: Jens Axboe <axboe@kernel.dk>
Diffstat (limited to 'block/bfq-iosched.c')
-rw-r--r--block/bfq-iosched.c115
1 files changed, 81 insertions, 34 deletions
diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index 9625550b2f85..e33c5c4c9856 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -2940,45 +2940,87 @@ static bool bfq_bfqq_is_slow(struct bfq_data *bfqd, struct bfq_queue *bfqq,
2940 * whereas soft_rt_next_start is set to infinity for applications that do 2940 * whereas soft_rt_next_start is set to infinity for applications that do
2941 * not. 2941 * not.
2942 * 2942 *
2943 * Unfortunately, even a greedy application may happen to behave in an 2943 * Unfortunately, even a greedy (i.e., I/O-bound) application may
2944 * isochronous way if the CPU load is high. In fact, the application may 2944 * happen to meet, occasionally or systematically, both the above
2945 * stop issuing requests while the CPUs are busy serving other processes, 2945 * bandwidth and isochrony requirements. This may happen at least in
2946 * then restart, then stop again for a while, and so on. In addition, if 2946 * the following circumstances. First, if the CPU load is high. The
2947 * the disk achieves a low enough throughput with the request pattern 2947 * application may stop issuing requests while the CPUs are busy
2948 * issued by the application (e.g., because the request pattern is random 2948 * serving other processes, then restart, then stop again for a while,
2949 * and/or the device is slow), then the application may meet the above 2949 * and so on. The other circumstances are related to the storage
2950 * bandwidth requirement too. To prevent such a greedy application to be 2950 * device: the storage device is highly loaded or reaches a low-enough
2951 * deemed as soft real-time, a further rule is used in the computation of 2951 * throughput with the I/O of the application (e.g., because the I/O
2952 * soft_rt_next_start: soft_rt_next_start must be higher than the current 2952 * is random and/or the device is slow). In all these cases, the
2953 * time plus the maximum time for which the arrival of a request is waited 2953 * I/O of the application may be simply slowed down enough to meet
2954 * for when a sync queue becomes idle, namely bfqd->bfq_slice_idle. 2954 * the bandwidth and isochrony requirements. To reduce the probability
2955 * This filters out greedy applications, as the latter issue instead their 2955 * that greedy applications are deemed as soft real-time in these
2956 * next request as soon as possible after the last one has been completed 2956 * corner cases, a further rule is used in the computation of
2957 * (in contrast, when a batch of requests is completed, a soft real-time 2957 * soft_rt_next_start: the return value of this function is forced to
2958 * application spends some time processing data). 2958 * be higher than the maximum between the following two quantities.
2959 * 2959 *
2960 * Unfortunately, the last filter may easily generate false positives if 2960 * (a) Current time plus: (1) the maximum time for which the arrival
2961 * only bfqd->bfq_slice_idle is used as a reference time interval and one 2961 * of a request is waited for when a sync queue becomes idle,
2962 * or both the following cases occur: 2962 * namely bfqd->bfq_slice_idle, and (2) a few extra jiffies. We
2963 * 1) HZ is so low that the duration of a jiffy is comparable to or higher 2963 * postpone for a moment the reason for adding a few extra
2964 * than bfqd->bfq_slice_idle. This happens, e.g., on slow devices with 2964 * jiffies; we get back to it after next item (b). Lower-bounding
2965 * HZ=100. 2965 * the return value of this function with the current time plus
2966 * bfqd->bfq_slice_idle tends to filter out greedy applications,
2967 * because the latter issue their next request as soon as possible
2968 * after the last one has been completed. In contrast, a soft
2969 * real-time application spends some time processing data, after a
2970 * batch of its requests has been completed.
2971 *
2972 * (b) Current value of bfqq->soft_rt_next_start. As pointed out
2973 * above, greedy applications may happen to meet both the
2974 * bandwidth and isochrony requirements under heavy CPU or
2975 * storage-device load. In more detail, in these scenarios, these
2976 * applications happen, only for limited time periods, to do I/O
2977 * slowly enough to meet all the requirements described so far,
2978 * including the filtering in above item (a). These slow-speed
2979 * time intervals are usually interspersed between other time
2980 * intervals during which these applications do I/O at a very high
2981 * speed. Fortunately, exactly because of the high speed of the
2982 * I/O in the high-speed intervals, the values returned by this
2983 * function happen to be so high, near the end of any such
2984 * high-speed interval, to be likely to fall *after* the end of
2985 * the low-speed time interval that follows. These high values are
2986 * stored in bfqq->soft_rt_next_start after each invocation of
2987 * this function. As a consequence, if the last value of
2988 * bfqq->soft_rt_next_start is constantly used to lower-bound the
2989 * next value that this function may return, then, from the very
2990 * beginning of a low-speed interval, bfqq->soft_rt_next_start is
2991 * likely to be constantly kept so high that any I/O request
2992 * issued during the low-speed interval is considered as arriving
2993 * to soon for the application to be deemed as soft
2994 * real-time. Then, in the high-speed interval that follows, the
2995 * application will not be deemed as soft real-time, just because
2996 * it will do I/O at a high speed. And so on.
2997 *
2998 * Getting back to the filtering in item (a), in the following two
2999 * cases this filtering might be easily passed by a greedy
3000 * application, if the reference quantity was just
3001 * bfqd->bfq_slice_idle:
3002 * 1) HZ is so low that the duration of a jiffy is comparable to or
3003 * higher than bfqd->bfq_slice_idle. This happens, e.g., on slow
3004 * devices with HZ=100. The time granularity may be so coarse
3005 * that the approximation, in jiffies, of bfqd->bfq_slice_idle
3006 * is rather lower than the exact value.
2966 * 2) jiffies, instead of increasing at a constant rate, may stop increasing 3007 * 2) jiffies, instead of increasing at a constant rate, may stop increasing
2967 * for a while, then suddenly 'jump' by several units to recover the lost 3008 * for a while, then suddenly 'jump' by several units to recover the lost
2968 * increments. This seems to happen, e.g., inside virtual machines. 3009 * increments. This seems to happen, e.g., inside virtual machines.
2969 * To address this issue, we do not use as a reference time interval just 3010 * To address this issue, in the filtering in (a) we do not use as a
2970 * bfqd->bfq_slice_idle, but bfqd->bfq_slice_idle plus a few jiffies. In 3011 * reference time interval just bfqd->bfq_slice_idle, but
2971 * particular we add the minimum number of jiffies for which the filter 3012 * bfqd->bfq_slice_idle plus a few jiffies. In particular, we add the
2972 * seems to be quite precise also in embedded systems and KVM/QEMU virtual 3013 * minimum number of jiffies for which the filter seems to be quite
2973 * machines. 3014 * precise also in embedded systems and KVM/QEMU virtual machines.
2974 */ 3015 */
2975static unsigned long bfq_bfqq_softrt_next_start(struct bfq_data *bfqd, 3016static unsigned long bfq_bfqq_softrt_next_start(struct bfq_data *bfqd,
2976 struct bfq_queue *bfqq) 3017 struct bfq_queue *bfqq)
2977{ 3018{
2978 return max(bfqq->last_idle_bklogged + 3019 return max3(bfqq->soft_rt_next_start,
2979 HZ * bfqq->service_from_backlogged / 3020 bfqq->last_idle_bklogged +
2980 bfqd->bfq_wr_max_softrt_rate, 3021 HZ * bfqq->service_from_backlogged /
2981 jiffies + nsecs_to_jiffies(bfqq->bfqd->bfq_slice_idle) + 4); 3022 bfqd->bfq_wr_max_softrt_rate,
3023 jiffies + nsecs_to_jiffies(bfqq->bfqd->bfq_slice_idle) + 4);
2982} 3024}
2983 3025
2984/** 3026/**
@@ -4014,10 +4056,15 @@ static void bfq_init_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq,
4014 bfqq->split_time = bfq_smallest_from_now(); 4056 bfqq->split_time = bfq_smallest_from_now();
4015 4057
4016 /* 4058 /*
4017 * Set to the value for which bfqq will not be deemed as 4059 * To not forget the possibly high bandwidth consumed by a
4018 * soft rt when it becomes backlogged. 4060 * process/queue in the recent past,
4061 * bfq_bfqq_softrt_next_start() returns a value at least equal
4062 * to the current value of bfqq->soft_rt_next_start (see
4063 * comments on bfq_bfqq_softrt_next_start). Set
4064 * soft_rt_next_start to now, to mean that bfqq has consumed
4065 * no bandwidth so far.
4019 */ 4066 */
4020 bfqq->soft_rt_next_start = bfq_greatest_from_now(); 4067 bfqq->soft_rt_next_start = jiffies;
4021 4068
4022 /* first request is almost certainly seeky */ 4069 /* first request is almost certainly seeky */
4023 bfqq->seek_history = 1; 4070 bfqq->seek_history = 1;