aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r--arch/x86/kernel/cpu/intel_rdt.c50
-rw-r--r--arch/x86/kernel/cpu/intel_rdt.h18
-rw-r--r--arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c24
-rw-r--r--arch/x86/kernel/cpu/intel_rdt_monitor.c170
-rw-r--r--arch/x86/kernel/cpu/intel_rdt_rdtgroup.c33
5 files changed, 270 insertions, 25 deletions
diff --git a/arch/x86/kernel/cpu/intel_rdt.c b/arch/x86/kernel/cpu/intel_rdt.c
index 589b948e6e01..24bfa63e86cf 100644
--- a/arch/x86/kernel/cpu/intel_rdt.c
+++ b/arch/x86/kernel/cpu/intel_rdt.c
@@ -33,8 +33,8 @@
33#include <asm/intel_rdt_sched.h> 33#include <asm/intel_rdt_sched.h>
34#include "intel_rdt.h" 34#include "intel_rdt.h"
35 35
36#define MAX_MBA_BW 100u
37#define MBA_IS_LINEAR 0x4 36#define MBA_IS_LINEAR 0x4
37#define MBA_MAX_MBPS U32_MAX
38 38
39/* Mutex to protect rdtgroup access. */ 39/* Mutex to protect rdtgroup access. */
40DEFINE_MUTEX(rdtgroup_mutex); 40DEFINE_MUTEX(rdtgroup_mutex);
@@ -178,7 +178,7 @@ struct rdt_resource rdt_resources_all[] = {
178 .msr_update = mba_wrmsr, 178 .msr_update = mba_wrmsr,
179 .cache_level = 3, 179 .cache_level = 3,
180 .parse_ctrlval = parse_bw, 180 .parse_ctrlval = parse_bw,
181 .format_str = "%d=%*d", 181 .format_str = "%d=%*u",
182 .fflags = RFTYPE_RES_MB, 182 .fflags = RFTYPE_RES_MB,
183 }, 183 },
184}; 184};
@@ -230,6 +230,14 @@ static inline void cache_alloc_hsw_probe(void)
230 rdt_alloc_capable = true; 230 rdt_alloc_capable = true;
231} 231}
232 232
233bool is_mba_sc(struct rdt_resource *r)
234{
235 if (!r)
236 return rdt_resources_all[RDT_RESOURCE_MBA].membw.mba_sc;
237
238 return r->membw.mba_sc;
239}
240
233/* 241/*
234 * rdt_get_mb_table() - get a mapping of bandwidth(b/w) percentage values 242 * rdt_get_mb_table() - get a mapping of bandwidth(b/w) percentage values
235 * exposed to user interface and the h/w understandable delay values. 243 * exposed to user interface and the h/w understandable delay values.
@@ -341,7 +349,7 @@ static int get_cache_id(int cpu, int level)
341 * that can be written to QOS_MSRs. 349 * that can be written to QOS_MSRs.
342 * There are currently no SKUs which support non linear delay values. 350 * There are currently no SKUs which support non linear delay values.
343 */ 351 */
344static u32 delay_bw_map(unsigned long bw, struct rdt_resource *r) 352u32 delay_bw_map(unsigned long bw, struct rdt_resource *r)
345{ 353{
346 if (r->membw.delay_linear) 354 if (r->membw.delay_linear)
347 return MAX_MBA_BW - bw; 355 return MAX_MBA_BW - bw;
@@ -431,25 +439,40 @@ struct rdt_domain *rdt_find_domain(struct rdt_resource *r, int id,
431 return NULL; 439 return NULL;
432} 440}
433 441
442void setup_default_ctrlval(struct rdt_resource *r, u32 *dc, u32 *dm)
443{
444 int i;
445
446 /*
447 * Initialize the Control MSRs to having no control.
448 * For Cache Allocation: Set all bits in cbm
449 * For Memory Allocation: Set b/w requested to 100%
450 * and the bandwidth in MBps to U32_MAX
451 */
452 for (i = 0; i < r->num_closid; i++, dc++, dm++) {
453 *dc = r->default_ctrl;
454 *dm = MBA_MAX_MBPS;
455 }
456}
457
434static int domain_setup_ctrlval(struct rdt_resource *r, struct rdt_domain *d) 458static int domain_setup_ctrlval(struct rdt_resource *r, struct rdt_domain *d)
435{ 459{
436 struct msr_param m; 460 struct msr_param m;
437 u32 *dc; 461 u32 *dc, *dm;
438 int i;
439 462
440 dc = kmalloc_array(r->num_closid, sizeof(*d->ctrl_val), GFP_KERNEL); 463 dc = kmalloc_array(r->num_closid, sizeof(*d->ctrl_val), GFP_KERNEL);
441 if (!dc) 464 if (!dc)
442 return -ENOMEM; 465 return -ENOMEM;
443 466
444 d->ctrl_val = dc; 467 dm = kmalloc_array(r->num_closid, sizeof(*d->mbps_val), GFP_KERNEL);
468 if (!dm) {
469 kfree(dc);
470 return -ENOMEM;
471 }
445 472
446 /* 473 d->ctrl_val = dc;
447 * Initialize the Control MSRs to having no control. 474 d->mbps_val = dm;
448 * For Cache Allocation: Set all bits in cbm 475 setup_default_ctrlval(r, dc, dm);
449 * For Memory Allocation: Set b/w requested to 100
450 */
451 for (i = 0; i < r->num_closid; i++, dc++)
452 *dc = r->default_ctrl;
453 476
454 m.low = 0; 477 m.low = 0;
455 m.high = r->num_closid; 478 m.high = r->num_closid;
@@ -588,6 +611,7 @@ static void domain_remove_cpu(int cpu, struct rdt_resource *r)
588 } 611 }
589 612
590 kfree(d->ctrl_val); 613 kfree(d->ctrl_val);
614 kfree(d->mbps_val);
591 kfree(d->rmid_busy_llc); 615 kfree(d->rmid_busy_llc);
592 kfree(d->mbm_total); 616 kfree(d->mbm_total);
593 kfree(d->mbm_local); 617 kfree(d->mbm_local);
diff --git a/arch/x86/kernel/cpu/intel_rdt.h b/arch/x86/kernel/cpu/intel_rdt.h
index 3fd7a70ee04a..39752825e376 100644
--- a/arch/x86/kernel/cpu/intel_rdt.h
+++ b/arch/x86/kernel/cpu/intel_rdt.h
@@ -28,6 +28,7 @@
28 28
29#define MBM_CNTR_WIDTH 24 29#define MBM_CNTR_WIDTH 24
30#define MBM_OVERFLOW_INTERVAL 1000 30#define MBM_OVERFLOW_INTERVAL 1000
31#define MAX_MBA_BW 100u
31 32
32#define RMID_VAL_ERROR BIT_ULL(63) 33#define RMID_VAL_ERROR BIT_ULL(63)
33#define RMID_VAL_UNAVAIL BIT_ULL(62) 34#define RMID_VAL_UNAVAIL BIT_ULL(62)
@@ -180,10 +181,20 @@ struct rftype {
180 * struct mbm_state - status for each MBM counter in each domain 181 * struct mbm_state - status for each MBM counter in each domain
181 * @chunks: Total data moved (multiply by rdt_group.mon_scale to get bytes) 182 * @chunks: Total data moved (multiply by rdt_group.mon_scale to get bytes)
182 * @prev_msr Value of IA32_QM_CTR for this RMID last time we read it 183 * @prev_msr Value of IA32_QM_CTR for this RMID last time we read it
184 * @chunks_bw Total local data moved. Used for bandwidth calculation
185 * @prev_bw_msr:Value of previous IA32_QM_CTR for bandwidth counting
186 * @prev_bw The most recent bandwidth in MBps
187 * @delta_bw Difference between the current and previous bandwidth
188 * @delta_comp Indicates whether to compute the delta_bw
183 */ 189 */
184struct mbm_state { 190struct mbm_state {
185 u64 chunks; 191 u64 chunks;
186 u64 prev_msr; 192 u64 prev_msr;
193 u64 chunks_bw;
194 u64 prev_bw_msr;
195 u32 prev_bw;
196 u32 delta_bw;
197 bool delta_comp;
187}; 198};
188 199
189/** 200/**
@@ -202,6 +213,7 @@ struct mbm_state {
202 * @cqm_work_cpu: 213 * @cqm_work_cpu:
203 * worker cpu for CQM h/w counters 214 * worker cpu for CQM h/w counters
204 * @ctrl_val: array of cache or mem ctrl values (indexed by CLOSID) 215 * @ctrl_val: array of cache or mem ctrl values (indexed by CLOSID)
216 * @mbps_val: When mba_sc is enabled, this holds the bandwidth in MBps
205 * @new_ctrl: new ctrl value to be loaded 217 * @new_ctrl: new ctrl value to be loaded
206 * @have_new_ctrl: did user provide new_ctrl for this domain 218 * @have_new_ctrl: did user provide new_ctrl for this domain
207 */ 219 */
@@ -217,6 +229,7 @@ struct rdt_domain {
217 int mbm_work_cpu; 229 int mbm_work_cpu;
218 int cqm_work_cpu; 230 int cqm_work_cpu;
219 u32 *ctrl_val; 231 u32 *ctrl_val;
232 u32 *mbps_val;
220 u32 new_ctrl; 233 u32 new_ctrl;
221 bool have_new_ctrl; 234 bool have_new_ctrl;
222}; 235};
@@ -259,6 +272,7 @@ struct rdt_cache {
259 * @min_bw: Minimum memory bandwidth percentage user can request 272 * @min_bw: Minimum memory bandwidth percentage user can request
260 * @bw_gran: Granularity at which the memory bandwidth is allocated 273 * @bw_gran: Granularity at which the memory bandwidth is allocated
261 * @delay_linear: True if memory B/W delay is in linear scale 274 * @delay_linear: True if memory B/W delay is in linear scale
275 * @mba_sc: True if MBA software controller(mba_sc) is enabled
262 * @mb_map: Mapping of memory B/W percentage to memory B/W delay 276 * @mb_map: Mapping of memory B/W percentage to memory B/W delay
263 */ 277 */
264struct rdt_membw { 278struct rdt_membw {
@@ -266,6 +280,7 @@ struct rdt_membw {
266 u32 min_bw; 280 u32 min_bw;
267 u32 bw_gran; 281 u32 bw_gran;
268 u32 delay_linear; 282 u32 delay_linear;
283 bool mba_sc;
269 u32 *mb_map; 284 u32 *mb_map;
270}; 285};
271 286
@@ -445,6 +460,9 @@ void mon_event_read(struct rmid_read *rr, struct rdt_domain *d,
445void mbm_setup_overflow_handler(struct rdt_domain *dom, 460void mbm_setup_overflow_handler(struct rdt_domain *dom,
446 unsigned long delay_ms); 461 unsigned long delay_ms);
447void mbm_handle_overflow(struct work_struct *work); 462void mbm_handle_overflow(struct work_struct *work);
463bool is_mba_sc(struct rdt_resource *r);
464void setup_default_ctrlval(struct rdt_resource *r, u32 *dc, u32 *dm);
465u32 delay_bw_map(unsigned long bw, struct rdt_resource *r);
448void cqm_setup_limbo_handler(struct rdt_domain *dom, unsigned long delay_ms); 466void cqm_setup_limbo_handler(struct rdt_domain *dom, unsigned long delay_ms);
449void cqm_handle_limbo(struct work_struct *work); 467void cqm_handle_limbo(struct work_struct *work);
450bool has_busy_rmid(struct rdt_resource *r, struct rdt_domain *d); 468bool has_busy_rmid(struct rdt_resource *r, struct rdt_domain *d);
diff --git a/arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c b/arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c
index 23e1d5c249c6..116d57b248d3 100644
--- a/arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c
+++ b/arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c
@@ -53,7 +53,8 @@ static bool bw_validate(char *buf, unsigned long *data, struct rdt_resource *r)
53 return false; 53 return false;
54 } 54 }
55 55
56 if (bw < r->membw.min_bw || bw > r->default_ctrl) { 56 if ((bw < r->membw.min_bw || bw > r->default_ctrl) &&
57 !is_mba_sc(r)) {
57 rdt_last_cmd_printf("MB value %ld out of range [%d,%d]\n", bw, 58 rdt_last_cmd_printf("MB value %ld out of range [%d,%d]\n", bw,
58 r->membw.min_bw, r->default_ctrl); 59 r->membw.min_bw, r->default_ctrl);
59 return false; 60 return false;
@@ -179,6 +180,8 @@ static int update_domains(struct rdt_resource *r, int closid)
179 struct msr_param msr_param; 180 struct msr_param msr_param;
180 cpumask_var_t cpu_mask; 181 cpumask_var_t cpu_mask;
181 struct rdt_domain *d; 182 struct rdt_domain *d;
183 bool mba_sc;
184 u32 *dc;
182 int cpu; 185 int cpu;
183 186
184 if (!zalloc_cpumask_var(&cpu_mask, GFP_KERNEL)) 187 if (!zalloc_cpumask_var(&cpu_mask, GFP_KERNEL))
@@ -188,13 +191,20 @@ static int update_domains(struct rdt_resource *r, int closid)
188 msr_param.high = msr_param.low + 1; 191 msr_param.high = msr_param.low + 1;
189 msr_param.res = r; 192 msr_param.res = r;
190 193
194 mba_sc = is_mba_sc(r);
191 list_for_each_entry(d, &r->domains, list) { 195 list_for_each_entry(d, &r->domains, list) {
192 if (d->have_new_ctrl && d->new_ctrl != d->ctrl_val[closid]) { 196 dc = !mba_sc ? d->ctrl_val : d->mbps_val;
197 if (d->have_new_ctrl && d->new_ctrl != dc[closid]) {
193 cpumask_set_cpu(cpumask_any(&d->cpu_mask), cpu_mask); 198 cpumask_set_cpu(cpumask_any(&d->cpu_mask), cpu_mask);
194 d->ctrl_val[closid] = d->new_ctrl; 199 dc[closid] = d->new_ctrl;
195 } 200 }
196 } 201 }
197 if (cpumask_empty(cpu_mask)) 202
203 /*
204 * Avoid writing the control msr with control values when
205 * MBA software controller is enabled
206 */
207 if (cpumask_empty(cpu_mask) || mba_sc)
198 goto done; 208 goto done;
199 cpu = get_cpu(); 209 cpu = get_cpu();
200 /* Update CBM on this cpu if it's in cpu_mask. */ 210 /* Update CBM on this cpu if it's in cpu_mask. */
@@ -282,13 +292,17 @@ static void show_doms(struct seq_file *s, struct rdt_resource *r, int closid)
282{ 292{
283 struct rdt_domain *dom; 293 struct rdt_domain *dom;
284 bool sep = false; 294 bool sep = false;
295 u32 ctrl_val;
285 296
286 seq_printf(s, "%*s:", max_name_width, r->name); 297 seq_printf(s, "%*s:", max_name_width, r->name);
287 list_for_each_entry(dom, &r->domains, list) { 298 list_for_each_entry(dom, &r->domains, list) {
288 if (sep) 299 if (sep)
289 seq_puts(s, ";"); 300 seq_puts(s, ";");
301
302 ctrl_val = (!is_mba_sc(r) ? dom->ctrl_val[closid] :
303 dom->mbps_val[closid]);
290 seq_printf(s, r->format_str, dom->id, max_data_width, 304 seq_printf(s, r->format_str, dom->id, max_data_width,
291 dom->ctrl_val[closid]); 305 ctrl_val);
292 sep = true; 306 sep = true;
293 } 307 }
294 seq_puts(s, "\n"); 308 seq_puts(s, "\n");
diff --git a/arch/x86/kernel/cpu/intel_rdt_monitor.c b/arch/x86/kernel/cpu/intel_rdt_monitor.c
index 681450eee428..b0f3aed76b75 100644
--- a/arch/x86/kernel/cpu/intel_rdt_monitor.c
+++ b/arch/x86/kernel/cpu/intel_rdt_monitor.c
@@ -225,10 +225,18 @@ void free_rmid(u32 rmid)
225 list_add_tail(&entry->list, &rmid_free_lru); 225 list_add_tail(&entry->list, &rmid_free_lru);
226} 226}
227 227
228static u64 mbm_overflow_count(u64 prev_msr, u64 cur_msr)
229{
230 u64 shift = 64 - MBM_CNTR_WIDTH, chunks;
231
232 chunks = (cur_msr << shift) - (prev_msr << shift);
233 return chunks >>= shift;
234}
235
228static int __mon_event_count(u32 rmid, struct rmid_read *rr) 236static int __mon_event_count(u32 rmid, struct rmid_read *rr)
229{ 237{
230 u64 chunks, shift, tval;
231 struct mbm_state *m; 238 struct mbm_state *m;
239 u64 chunks, tval;
232 240
233 tval = __rmid_read(rmid, rr->evtid); 241 tval = __rmid_read(rmid, rr->evtid);
234 if (tval & (RMID_VAL_ERROR | RMID_VAL_UNAVAIL)) { 242 if (tval & (RMID_VAL_ERROR | RMID_VAL_UNAVAIL)) {
@@ -254,14 +262,12 @@ static int __mon_event_count(u32 rmid, struct rmid_read *rr)
254 } 262 }
255 263
256 if (rr->first) { 264 if (rr->first) {
257 m->prev_msr = tval; 265 memset(m, 0, sizeof(struct mbm_state));
258 m->chunks = 0; 266 m->prev_bw_msr = m->prev_msr = tval;
259 return 0; 267 return 0;
260 } 268 }
261 269
262 shift = 64 - MBM_CNTR_WIDTH; 270 chunks = mbm_overflow_count(m->prev_msr, tval);
263 chunks = (tval << shift) - (m->prev_msr << shift);
264 chunks >>= shift;
265 m->chunks += chunks; 271 m->chunks += chunks;
266 m->prev_msr = tval; 272 m->prev_msr = tval;
267 273
@@ -270,6 +276,32 @@ static int __mon_event_count(u32 rmid, struct rmid_read *rr)
270} 276}
271 277
272/* 278/*
279 * Supporting function to calculate the memory bandwidth
280 * and delta bandwidth in MBps.
281 */
282static void mbm_bw_count(u32 rmid, struct rmid_read *rr)
283{
284 struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_L3];
285 struct mbm_state *m = &rr->d->mbm_local[rmid];
286 u64 tval, cur_bw, chunks;
287
288 tval = __rmid_read(rmid, rr->evtid);
289 if (tval & (RMID_VAL_ERROR | RMID_VAL_UNAVAIL))
290 return;
291
292 chunks = mbm_overflow_count(m->prev_bw_msr, tval);
293 m->chunks_bw += chunks;
294 m->chunks = m->chunks_bw;
295 cur_bw = (chunks * r->mon_scale) >> 20;
296
297 if (m->delta_comp)
298 m->delta_bw = abs(cur_bw - m->prev_bw);
299 m->delta_comp = false;
300 m->prev_bw = cur_bw;
301 m->prev_bw_msr = tval;
302}
303
304/*
273 * This is called via IPI to read the CQM/MBM counters 305 * This is called via IPI to read the CQM/MBM counters
274 * on a domain. 306 * on a domain.
275 */ 307 */
@@ -297,6 +329,118 @@ void mon_event_count(void *info)
297 } 329 }
298} 330}
299 331
332/*
333 * Feedback loop for MBA software controller (mba_sc)
334 *
335 * mba_sc is a feedback loop where we periodically read MBM counters and
336 * adjust the bandwidth percentage values via the IA32_MBA_THRTL_MSRs so
337 * that:
338 *
339 * current bandwdith(cur_bw) < user specified bandwidth(user_bw)
340 *
341 * This uses the MBM counters to measure the bandwidth and MBA throttle
342 * MSRs to control the bandwidth for a particular rdtgrp. It builds on the
343 * fact that resctrl rdtgroups have both monitoring and control.
344 *
345 * The frequency of the checks is 1s and we just tag along the MBM overflow
346 * timer. Having 1s interval makes the calculation of bandwidth simpler.
347 *
348 * Although MBA's goal is to restrict the bandwidth to a maximum, there may
349 * be a need to increase the bandwidth to avoid uncecessarily restricting
350 * the L2 <-> L3 traffic.
351 *
352 * Since MBA controls the L2 external bandwidth where as MBM measures the
353 * L3 external bandwidth the following sequence could lead to such a
354 * situation.
355 *
356 * Consider an rdtgroup which had high L3 <-> memory traffic in initial
357 * phases -> mba_sc kicks in and reduced bandwidth percentage values -> but
358 * after some time rdtgroup has mostly L2 <-> L3 traffic.
359 *
360 * In this case we may restrict the rdtgroup's L2 <-> L3 traffic as its
361 * throttle MSRs already have low percentage values. To avoid
362 * unnecessarily restricting such rdtgroups, we also increase the bandwidth.
363 */
364static void update_mba_bw(struct rdtgroup *rgrp, struct rdt_domain *dom_mbm)
365{
366 u32 closid, rmid, cur_msr, cur_msr_val, new_msr_val;
367 struct mbm_state *pmbm_data, *cmbm_data;
368 u32 cur_bw, delta_bw, user_bw;
369 struct rdt_resource *r_mba;
370 struct rdt_domain *dom_mba;
371 struct list_head *head;
372 struct rdtgroup *entry;
373
374 r_mba = &rdt_resources_all[RDT_RESOURCE_MBA];
375 closid = rgrp->closid;
376 rmid = rgrp->mon.rmid;
377 pmbm_data = &dom_mbm->mbm_local[rmid];
378
379 dom_mba = get_domain_from_cpu(smp_processor_id(), r_mba);
380 if (!dom_mba) {
381 pr_warn_once("Failure to get domain for MBA update\n");
382 return;
383 }
384
385 cur_bw = pmbm_data->prev_bw;
386 user_bw = dom_mba->mbps_val[closid];
387 delta_bw = pmbm_data->delta_bw;
388 cur_msr_val = dom_mba->ctrl_val[closid];
389
390 /*
391 * For Ctrl groups read data from child monitor groups.
392 */
393 head = &rgrp->mon.crdtgrp_list;
394 list_for_each_entry(entry, head, mon.crdtgrp_list) {
395 cmbm_data = &dom_mbm->mbm_local[entry->mon.rmid];
396 cur_bw += cmbm_data->prev_bw;
397 delta_bw += cmbm_data->delta_bw;
398 }
399
400 /*
401 * Scale up/down the bandwidth linearly for the ctrl group. The
402 * bandwidth step is the bandwidth granularity specified by the
403 * hardware.
404 *
405 * The delta_bw is used when increasing the bandwidth so that we
406 * dont alternately increase and decrease the control values
407 * continuously.
408 *
409 * For ex: consider cur_bw = 90MBps, user_bw = 100MBps and if
410 * bandwidth step is 20MBps(> user_bw - cur_bw), we would keep
411 * switching between 90 and 110 continuously if we only check
412 * cur_bw < user_bw.
413 */
414 if (cur_msr_val > r_mba->membw.min_bw && user_bw < cur_bw) {
415 new_msr_val = cur_msr_val - r_mba->membw.bw_gran;
416 } else if (cur_msr_val < MAX_MBA_BW &&
417 (user_bw > (cur_bw + delta_bw))) {
418 new_msr_val = cur_msr_val + r_mba->membw.bw_gran;
419 } else {
420 return;
421 }
422
423 cur_msr = r_mba->msr_base + closid;
424 wrmsrl(cur_msr, delay_bw_map(new_msr_val, r_mba));
425 dom_mba->ctrl_val[closid] = new_msr_val;
426
427 /*
428 * Delta values are updated dynamically package wise for each
429 * rdtgrp everytime the throttle MSR changes value.
430 *
431 * This is because (1)the increase in bandwidth is not perfectly
432 * linear and only "approximately" linear even when the hardware
433 * says it is linear.(2)Also since MBA is a core specific
434 * mechanism, the delta values vary based on number of cores used
435 * by the rdtgrp.
436 */
437 pmbm_data->delta_comp = true;
438 list_for_each_entry(entry, head, mon.crdtgrp_list) {
439 cmbm_data = &dom_mbm->mbm_local[entry->mon.rmid];
440 cmbm_data->delta_comp = true;
441 }
442}
443
300static void mbm_update(struct rdt_domain *d, int rmid) 444static void mbm_update(struct rdt_domain *d, int rmid)
301{ 445{
302 struct rmid_read rr; 446 struct rmid_read rr;
@@ -314,7 +458,16 @@ static void mbm_update(struct rdt_domain *d, int rmid)
314 } 458 }
315 if (is_mbm_local_enabled()) { 459 if (is_mbm_local_enabled()) {
316 rr.evtid = QOS_L3_MBM_LOCAL_EVENT_ID; 460 rr.evtid = QOS_L3_MBM_LOCAL_EVENT_ID;
317 __mon_event_count(rmid, &rr); 461
462 /*
463 * Call the MBA software controller only for the
464 * control groups and when user has enabled
465 * the software controller explicitly.
466 */
467 if (!is_mba_sc(NULL))
468 __mon_event_count(rmid, &rr);
469 else
470 mbm_bw_count(rmid, &rr);
318 } 471 }
319} 472}
320 473
@@ -385,6 +538,9 @@ void mbm_handle_overflow(struct work_struct *work)
385 head = &prgrp->mon.crdtgrp_list; 538 head = &prgrp->mon.crdtgrp_list;
386 list_for_each_entry(crgrp, head, mon.crdtgrp_list) 539 list_for_each_entry(crgrp, head, mon.crdtgrp_list)
387 mbm_update(d, crgrp->mon.rmid); 540 mbm_update(d, crgrp->mon.rmid);
541
542 if (is_mba_sc(NULL))
543 update_mba_bw(prgrp, d);
388 } 544 }
389 545
390 schedule_delayed_work_on(cpu, &d->mbm_over, delay); 546 schedule_delayed_work_on(cpu, &d->mbm_over, delay);
diff --git a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
index fca759d272a1..749856a2e736 100644
--- a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
+++ b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
@@ -1005,6 +1005,11 @@ static void l2_qos_cfg_update(void *arg)
1005 wrmsrl(IA32_L2_QOS_CFG, *enable ? L2_QOS_CDP_ENABLE : 0ULL); 1005 wrmsrl(IA32_L2_QOS_CFG, *enable ? L2_QOS_CDP_ENABLE : 0ULL);
1006} 1006}
1007 1007
1008static inline bool is_mba_linear(void)
1009{
1010 return rdt_resources_all[RDT_RESOURCE_MBA].membw.delay_linear;
1011}
1012
1008static int set_cache_qos_cfg(int level, bool enable) 1013static int set_cache_qos_cfg(int level, bool enable)
1009{ 1014{
1010 void (*update)(void *arg); 1015 void (*update)(void *arg);
@@ -1041,6 +1046,28 @@ static int set_cache_qos_cfg(int level, bool enable)
1041 return 0; 1046 return 0;
1042} 1047}
1043 1048
1049/*
1050 * Enable or disable the MBA software controller
1051 * which helps user specify bandwidth in MBps.
1052 * MBA software controller is supported only if
1053 * MBM is supported and MBA is in linear scale.
1054 */
1055static int set_mba_sc(bool mba_sc)
1056{
1057 struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_MBA];
1058 struct rdt_domain *d;
1059
1060 if (!is_mbm_enabled() || !is_mba_linear() ||
1061 mba_sc == is_mba_sc(r))
1062 return -EINVAL;
1063
1064 r->membw.mba_sc = mba_sc;
1065 list_for_each_entry(d, &r->domains, list)
1066 setup_default_ctrlval(r, d->ctrl_val, d->mbps_val);
1067
1068 return 0;
1069}
1070
1044static int cdp_enable(int level, int data_type, int code_type) 1071static int cdp_enable(int level, int data_type, int code_type)
1045{ 1072{
1046 struct rdt_resource *r_ldata = &rdt_resources_all[data_type]; 1073 struct rdt_resource *r_ldata = &rdt_resources_all[data_type];
@@ -1123,6 +1150,10 @@ static int parse_rdtgroupfs_options(char *data)
1123 ret = cdpl2_enable(); 1150 ret = cdpl2_enable();
1124 if (ret) 1151 if (ret)
1125 goto out; 1152 goto out;
1153 } else if (!strcmp(token, "mba_MBps")) {
1154 ret = set_mba_sc(true);
1155 if (ret)
1156 goto out;
1126 } else { 1157 } else {
1127 ret = -EINVAL; 1158 ret = -EINVAL;
1128 goto out; 1159 goto out;
@@ -1445,6 +1476,8 @@ static void rdt_kill_sb(struct super_block *sb)
1445 cpus_read_lock(); 1476 cpus_read_lock();
1446 mutex_lock(&rdtgroup_mutex); 1477 mutex_lock(&rdtgroup_mutex);
1447 1478
1479 set_mba_sc(false);
1480
1448 /*Put everything back to default values. */ 1481 /*Put everything back to default values. */
1449 for_each_alloc_enabled_rdt_resource(r) 1482 for_each_alloc_enabled_rdt_resource(r)
1450 reset_all_ctrls(r); 1483 reset_all_ctrls(r);