x86/intel_rdt/cqm: Improve limbo list processing

During a mkdir, the entire limbo list is synchronously checked on each package for free RMIDs by sending IPIs. With a large number of RMIDs (SKL has 192) this creates a intolerable amount of work in IPIs. Replace the IPI based checking of the limbo list with asynchronous worker threads on each package which periodically scan the limbo list and move the RMIDs that have: llc_occupancy < threshold_occupancy on all packages to the free list. mkdir now returns -ENOSPC if the free list and the limbo list ere empty or returns -EBUSY if there are RMIDs on the limbo list and the free list is empty. Getting rid of the IPIs also simplifies the data structures and the serialization required for handling the lists. [ tglx: Rewrote changelog ... ] Signed-off-by: Vikas Shivappa <vikas.shivappa@linux.intel.com> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Cc: ravi.v.shankar@intel.com Cc: tony.luck@intel.com Cc: fenghua.yu@intel.com Cc: peterz@infradead.org Cc: eranian@google.com Cc: vikas.shivappa@intel.com Cc: ak@linux.intel.com Cc: davidcc@google.com Link: http://lkml.kernel.org/r/1502845243-20454-3-git-send-email-vikas.shivappa@linux.intel.com
author: Vikas Shivappa <vikas.shivappa@linux.intel.com> 2017-08-15 21:00:43 -0400
committer: Thomas Gleixner <tglx@linutronix.de> 2017-08-16 06:05:41 -0400
commit: 24247aeeabe99eab13b798ccccc2dec066dd6f07 (patch)
tree: 5ae1fddc1d0ae510c0060783092a6a618add6103
parent: bbc4615e0b7df5e21d0991adb4b2798508354924 (diff)
3 files changed, 133 insertions, 122 deletions
diff --git a/arch/x86/kernel/cpu/intel_rdt.c b/arch/x86/kernel/cpu/intel_rdt.c
index b8dc141896b6..6935c8ecad7f 100644
--- a/arch/x86/kernel/cpu/intel_rdt.c
+++ b/arch/x86/kernel/cpu/intel_rdt.c
@@ -426,6 +426,7 @@ static int domain_setup_mon_state(struct rdt_resource *r, struct rdt_domain *d)
                                           GFP_KERNEL);
                if (!d->rmid_busy_llc)
                        return -ENOMEM;
+                INIT_DELAYED_WORK(&d->cqm_limbo, cqm_handle_limbo);
        }
        if (is_mbm_total_enabled()) {
                tsize = sizeof(*d->mbm_total);
@@ -536,11 +537,33 @@ static void domain_remove_cpu(int cpu, struct rdt_resource *r)
                list_del(&d->list);
                if (is_mbm_enabled())
                        cancel_delayed_work(&d->mbm_over);
+                if (is_llc_occupancy_enabled() &&  has_busy_rmid(r, d)) {
+                        /*
+                         * When a package is going down, forcefully
+                         * decrement rmid->ebusy. There is no way to know
+                         * that the L3 was flushed and hence may lead to
+                         * incorrect counts in rare scenarios, but leaving
+                         * the RMID as busy creates RMID leaks if the
+                         * package never comes back.
+                         */
+                        __check_limbo(d, true);
+                        cancel_delayed_work(&d->cqm_limbo);
+                }
                kfree(d);
-        } else if (r == &rdt_resources_all[RDT_RESOURCE_L3] &&
+                return;
-                   cpu == d->mbm_work_cpu && is_mbm_enabled()) {
+        }
-                cancel_delayed_work(&d->mbm_over);
-                mbm_setup_overflow_handler(d, 0);
+        if (r == &rdt_resources_all[RDT_RESOURCE_L3]) {
+                if (is_mbm_enabled() && cpu == d->mbm_work_cpu) {
+                        cancel_delayed_work(&d->mbm_over);
+                        mbm_setup_overflow_handler(d, 0);
+                }
+                if (is_llc_occupancy_enabled() && cpu == d->cqm_work_cpu &&
+                    has_busy_rmid(r, d)) {
+                        cancel_delayed_work(&d->cqm_limbo);
+                        cqm_setup_limbo_handler(d, 0);
+                }
        }
 }
diff --git a/arch/x86/kernel/cpu/intel_rdt.h b/arch/x86/kernel/cpu/intel_rdt.h
index 3e4869390603..ebaddaeef023 100644
--- a/arch/x86/kernel/cpu/intel_rdt.h
+++ b/arch/x86/kernel/cpu/intel_rdt.h
@@ -20,6 +20,8 @@
 #define QOS_L3_MBM_TOTAL_EVENT_ID       0x02
 #define QOS_L3_MBM_LOCAL_EVENT_ID       0x03
+#define CQM_LIMBOCHECK_INTERVAL 1000
 #define MBM_CNTR_WIDTH                  24
 #define MBM_OVERFLOW_INTERVAL           1000
@@ -187,8 +189,11 @@ struct mbm_state {
 * @mbm_total:  saved state for MBM total bandwidth
 * @mbm_local:  saved state for MBM local bandwidth
 * @mbm_over:   worker to periodically read MBM h/w counters
+ * @cqm_limbo:  worker to periodically read CQM h/w counters
 * @mbm_work_cpu:
 *              worker cpu for MBM h/w counters
+ * @cqm_work_cpu:
+ *              worker cpu for CQM h/w counters
 * @ctrl_val:   array of cache or mem ctrl values (indexed by CLOSID)
 * @new_ctrl:   new ctrl value to be loaded
 * @have_new_ctrl: did user provide new_ctrl for this domain
@@ -201,7 +206,9 @@ struct rdt_domain {
        struct mbm_state        *mbm_total;
        struct mbm_state        *mbm_local;
        struct delayed_work     mbm_over;
+        struct delayed_work     cqm_limbo;
        int                     mbm_work_cpu;
+        int                     cqm_work_cpu;
        u32                     *ctrl_val;
        u32                     new_ctrl;
        bool                    have_new_ctrl;
@@ -422,7 +429,12 @@ void mkdir_mondata_subdir_allrdtgrp(struct rdt_resource *r,
                                    struct rdt_domain *d);
 void mon_event_read(struct rmid_read *rr, struct rdt_domain *d,
                    struct rdtgroup *rdtgrp, int evtid, int first);
-void mbm_setup_overflow_handler(struct rdt_domain *dom, unsigned long delay_ms);
+void mbm_setup_overflow_handler(struct rdt_domain *dom,
+                                unsigned long delay_ms);
 void mbm_handle_overflow(struct work_struct *work);
+void cqm_setup_limbo_handler(struct rdt_domain *dom, unsigned long delay_ms);
+void cqm_handle_limbo(struct work_struct *work);
+bool has_busy_rmid(struct rdt_resource *r, struct rdt_domain *d);
+void __check_limbo(struct rdt_domain *d, bool force_free);
 #endif /* _ASM_X86_INTEL_RDT_H */
diff --git a/arch/x86/kernel/cpu/intel_rdt_monitor.c b/arch/x86/kernel/cpu/intel_rdt_monitor.c
index 8378785883dc..30827510094b 100644
--- a/arch/x86/kernel/cpu/intel_rdt_monitor.c
+++ b/arch/x86/kernel/cpu/intel_rdt_monitor.c
@@ -33,7 +33,7 @@
 struct rmid_entry {
        u32                             rmid;
-        atomic_t                        busy;
+        int                             busy;
        struct list_head                list;
 };
@@ -45,13 +45,13 @@ struct rmid_entry {
 static LIST_HEAD(rmid_free_lru);
 /**
- * @rmid_limbo_lru       list of currently unused but (potentially)
+ * @rmid_limbo_count     count of currently unused but (potentially)
 *     dirty RMIDs.
- *     This list contains RMIDs that no one is currently using but that
+ *     This counts RMIDs that no one is currently using but that
 *     may have a occupancy value > intel_cqm_threshold. User can change
 *     the threshold occupancy value.
 */
-static LIST_HEAD(rmid_limbo_lru);
+unsigned int rmid_limbo_count;
 /**
 * @rmid_entry - The entry in the limbo and free lists.
@@ -103,124 +103,53 @@ static u64 __rmid_read(u32 rmid, u32 eventid)
        return val;
 }
-/*
+static bool rmid_dirty(struct rmid_entry *entry)
- * Walk the limbo list looking at any RMIDs that are flagged in the
- * domain rmid_busy_llc bitmap as busy. If the reported LLC occupancy
- * is below the threshold clear the busy bit and decrement the count.
- * If the busy count gets to zero on an RMID we stop looking.
- * This can be called from an IPI.
- * We need an atomic for the busy count because multiple CPUs may check
- * the same RMID at the same time.
- */
-static bool __check_limbo(struct rdt_domain *d)
-{
-        struct rmid_entry *entry;
-        u64 val;
-        list_for_each_entry(entry, &rmid_limbo_lru, list) {
-                if (!test_bit(entry->rmid, d->rmid_busy_llc))
-                        continue;
-                val = __rmid_read(entry->rmid, QOS_L3_OCCUP_EVENT_ID);
-                if (val <= intel_cqm_threshold) {
-                        clear_bit(entry->rmid, d->rmid_busy_llc);
-                        if (atomic_dec_and_test(&entry->busy))
-                                return true;
-                }
-        }
-        return false;
-}
-static void check_limbo(void *arg)
 {
-        struct rdt_domain *d;
+        u64 val = __rmid_read(entry->rmid, QOS_L3_OCCUP_EVENT_ID);
-        d = get_domain_from_cpu(smp_processor_id(),
-                                &rdt_resources_all[RDT_RESOURCE_L3]);
-        if (d)
-                __check_limbo(d);
-}
-static bool has_busy_rmid(struct rdt_resource *r, struct rdt_domain *d)
+        return val >= intel_cqm_threshold;
-{
-        return find_first_bit(d->rmid_busy_llc, r->num_rmid) != r->num_rmid;
 }
 /*
- * Scan the limbo list and move all entries that are below the
+ * Check the RMIDs that are marked as busy for this domain. If the
- * intel_cqm_threshold to the free list.
+ * reported LLC occupancy is below the threshold clear the busy bit and
- * Return "true" if the limbo list is empty, "false" if there are
+ * decrement the count. If the busy count gets to zero on an RMID, we
- * still some RMIDs there.
+ * free the RMID
 */
-static bool try_freeing_limbo_rmid(void)
+void __check_limbo(struct rdt_domain *d, bool force_free)
 {
-        struct rmid_entry *entry, *tmp;
+        struct rmid_entry *entry;
        struct rdt_resource *r;
-        cpumask_var_t cpu_mask;
+        u32 crmid = 1, nrmid;
-        struct rdt_domain *d;
-        bool ret = true;
-        int cpu;
-        if (list_empty(&rmid_limbo_lru))
-                return ret;
        r = &rdt_resources_all[RDT_RESOURCE_L3];
-        cpu = get_cpu();
        /*
-         * First see if we can free up an RMID by checking busy values
+         * Skip RMID 0 and start from RMID 1 and check all the RMIDs that
-         * on the local package.
+         * are marked as busy for occupancy < threshold. If the occupancy
+         * is less than the threshold decrement the busy counter of the
+         * RMID and move it to the free list when the counter reaches 0.
         */
-        d = get_domain_from_cpu(cpu, r);
+        for (;;) {
-        if (d && has_busy_rmid(r, d) && __check_limbo(d)) {
+                nrmid = find_next_bit(d->rmid_busy_llc, r->num_rmid, crmid);
-                list_for_each_entry_safe(entry, tmp, &rmid_limbo_lru, list) {
+                if (nrmid >= r->num_rmid)
-                        if (atomic_read(&entry->busy) == 0) {
+                        break;
-                                list_del(&entry->list);
+                entry = __rmid_entry(nrmid);
+                if (force_free || !rmid_dirty(entry)) {
+                        clear_bit(entry->rmid, d->rmid_busy_llc);
+                        if (!--entry->busy) {
+                                rmid_limbo_count--;
                                list_add_tail(&entry->list, &rmid_free_lru);
-                                goto done;
                        }
                }
+                crmid = nrmid + 1;
        }
+}
-        if (!zalloc_cpumask_var(&cpu_mask, GFP_KERNEL)) {
+bool has_busy_rmid(struct rdt_resource *r, struct rdt_domain *d)
-                ret = false;
+{
-                goto done;
+        return find_first_bit(d->rmid_busy_llc, r->num_rmid) != r->num_rmid;
-        }
-        /*
-         * Build a mask of other domains that have busy RMIDs
-         */
-        list_for_each_entry(d, &r->domains, list) {
-                if (!cpumask_test_cpu(cpu, &d->cpu_mask) &&
-                    has_busy_rmid(r, d))
-                        cpumask_set_cpu(cpumask_any(&d->cpu_mask), cpu_mask);
-        }
-        if (cpumask_empty(cpu_mask)) {
-                ret = false;
-                goto free_mask;
-        }
-        /*
-         * Scan domains with busy RMIDs to check if they still are busy
-         */
-        on_each_cpu_mask(cpu_mask, check_limbo, NULL, true);
-        /* Walk limbo list moving all free RMIDs to the &rmid_free_lru list */
-        list_for_each_entry_safe(entry, tmp, &rmid_limbo_lru, list) {
-                if (atomic_read(&entry->busy) != 0) {
-                        ret = false;
-                        continue;
-                }
-                list_del(&entry->list);
-                list_add_tail(&entry->list, &rmid_free_lru);
-        }
-free_mask:
-        free_cpumask_var(cpu_mask);
-done:
-        put_cpu();
-        return ret;
 }
 /*
@@ -231,15 +160,11 @@ done:
 int alloc_rmid(void)
 {
        struct rmid_entry *entry;
-        bool ret;
        lockdep_assert_held(&rdtgroup_mutex);
-        if (list_empty(&rmid_free_lru)) {
+        if (list_empty(&rmid_free_lru))
-                ret = try_freeing_limbo_rmid();
+                return rmid_limbo_count ? -EBUSY : -ENOSPC;
-                if (list_empty(&rmid_free_lru))
-                        return ret ? -ENOSPC : -EBUSY;
-        }
        entry = list_first_entry(&rmid_free_lru,
                                 struct rmid_entry, list);
@@ -252,11 +177,12 @@ static void add_rmid_to_limbo(struct rmid_entry *entry)
 {
        struct rdt_resource *r;
        struct rdt_domain *d;
-        int cpu, nbusy = 0;
+        int cpu;
        u64 val;
        r = &rdt_resources_all[RDT_RESOURCE_L3];
+        entry->busy = 0;
        cpu = get_cpu();
        list_for_each_entry(d, &r->domains, list) {
                if (cpumask_test_cpu(cpu, &d->cpu_mask)) {
@@ -264,17 +190,22 @@ static void add_rmid_to_limbo(struct rmid_entry *entry)
                        if (val <= intel_cqm_threshold)
                                continue;
                }
+                /*
+                 * For the first limbo RMID in the domain,
+                 * setup up the limbo worker.
+                 */
+                if (!has_busy_rmid(r, d))
+                        cqm_setup_limbo_handler(d, CQM_LIMBOCHECK_INTERVAL);
                set_bit(entry->rmid, d->rmid_busy_llc);
-                nbusy++;
+                entry->busy++;
        }
        put_cpu();
-        if (nbusy) {
+        if (entry->busy)
-                atomic_set(&entry->busy, nbusy);
+                rmid_limbo_count++;
-                list_add_tail(&entry->list, &rmid_limbo_lru);
+        else
-        } else {
                list_add_tail(&entry->list, &rmid_free_lru);
-        }
 }
 void free_rmid(u32 rmid)
@@ -387,6 +318,50 @@ static void mbm_update(struct rdt_domain *d, int rmid)
        }
 }
+/*
+ * Handler to scan the limbo list and move the RMIDs
+ * to free list whose occupancy < threshold_occupancy.
+ */
+void cqm_handle_limbo(struct work_struct *work)
+{
+        unsigned long delay = msecs_to_jiffies(CQM_LIMBOCHECK_INTERVAL);
+        int cpu = smp_processor_id();
+        struct rdt_resource *r;
+        struct rdt_domain *d;
+        mutex_lock(&rdtgroup_mutex);
+        r = &rdt_resources_all[RDT_RESOURCE_L3];
+        d = get_domain_from_cpu(cpu, r);
+        if (!d) {
+                pr_warn_once("Failure to get domain for limbo worker\n");
+                goto out_unlock;
+        }
+        __check_limbo(d, false);
+        if (has_busy_rmid(r, d))
+                schedule_delayed_work_on(cpu, &d->cqm_limbo, delay);
+out_unlock:
+        mutex_unlock(&rdtgroup_mutex);
+}
+void cqm_setup_limbo_handler(struct rdt_domain *dom, unsigned long delay_ms)
+{
+        unsigned long delay = msecs_to_jiffies(delay_ms);
+        struct rdt_resource *r;
+        int cpu;
+        r = &rdt_resources_all[RDT_RESOURCE_L3];
+        cpu = cpumask_any(&dom->cpu_mask);
+        dom->cqm_work_cpu = cpu;
+        schedule_delayed_work_on(cpu, &dom->cqm_limbo, delay);
+}
 void mbm_handle_overflow(struct work_struct *work)
 {
        unsigned long delay = msecs_to_jiffies(MBM_OVERFLOW_INTERVAL);
@@ -413,6 +388,7 @@ void mbm_handle_overflow(struct work_struct *work)
        }
        schedule_delayed_work_on(cpu, &d->mbm_over, delay);
 out_unlock:
        mutex_unlock(&rdtgroup_mutex);
 }
author	Vikas Shivappa <vikas.shivappa@linux.intel.com>	2017-08-15 21:00:43 -0400
committer	Thomas Gleixner <tglx@linutronix.de>	2017-08-16 06:05:41 -0400
commit	24247aeeabe99eab13b798ccccc2dec066dd6f07 (patch)
tree	5ae1fddc1d0ae510c0060783092a6a618add6103
parent	bbc4615e0b7df5e21d0991adb4b2798508354924 (diff)

diff --git a/arch/x86/kernel/cpu/intel_rdt.c b/arch/x86/kernel/cpu/intel_rdt.c index b8dc141896b6..6935c8ecad7f 100644 --- a/arch/x86/kernel/cpu/intel_rdt.c +++ b/arch/x86/kernel/cpu/intel_rdt.c
@@ -426,6 +426,7 @@ static int domain_setup_mon_state(struct rdt_resource r, struct rdt_domain d)
426	GFP_KERNEL);	426	GFP_KERNEL);
427	if (!d->rmid_busy_llc)	427	if (!d->rmid_busy_llc)
428	return -ENOMEM;	428	return -ENOMEM;
		429	INIT_DELAYED_WORK(&d->cqm_limbo, cqm_handle_limbo);
429	}	430	}
430	if (is_mbm_total_enabled()) {	431	if (is_mbm_total_enabled()) {
431	tsize = sizeof(*d->mbm_total);	432	tsize = sizeof(*d->mbm_total);
@@ -536,11 +537,33 @@ static void domain_remove_cpu(int cpu, struct rdt_resource *r)
536	list_del(&d->list);	537	list_del(&d->list);
537	if (is_mbm_enabled())	538	if (is_mbm_enabled())
538	cancel_delayed_work(&d->mbm_over);	539	cancel_delayed_work(&d->mbm_over);
		540	if (is_llc_occupancy_enabled() && has_busy_rmid(r, d)) {
		541	/*
		542	* When a package is going down, forcefully
		543	* decrement rmid->ebusy. There is no way to know
		544	* that the L3 was flushed and hence may lead to
		545	* incorrect counts in rare scenarios, but leaving
		546	* the RMID as busy creates RMID leaks if the
		547	* package never comes back.
		548	*/
		549	__check_limbo(d, true);
		550	cancel_delayed_work(&d->cqm_limbo);
		551	}
		552
539	kfree(d);	553	kfree(d);
540	} else if (r == &rdt_resources_all[RDT_RESOURCE_L3] &&	554	return;
541	cpu == d->mbm_work_cpu && is_mbm_enabled()) {	555	}
542	cancel_delayed_work(&d->mbm_over);	556
543	mbm_setup_overflow_handler(d, 0);	557	if (r == &rdt_resources_all[RDT_RESOURCE_L3]) {
		558	if (is_mbm_enabled() && cpu == d->mbm_work_cpu) {
		559	cancel_delayed_work(&d->mbm_over);
		560	mbm_setup_overflow_handler(d, 0);
		561	}
		562	if (is_llc_occupancy_enabled() && cpu == d->cqm_work_cpu &&
		563	has_busy_rmid(r, d)) {
		564	cancel_delayed_work(&d->cqm_limbo);
		565	cqm_setup_limbo_handler(d, 0);
		566	}
544	}	567	}
545	}	568	}
546		569


diff --git a/arch/x86/kernel/cpu/intel_rdt.h b/arch/x86/kernel/cpu/intel_rdt.h index 3e4869390603..ebaddaeef023 100644 --- a/arch/x86/kernel/cpu/intel_rdt.h +++ b/arch/x86/kernel/cpu/intel_rdt.h
@@ -20,6 +20,8 @@
20	#define QOS_L3_MBM_TOTAL_EVENT_ID 0x02	20	#define QOS_L3_MBM_TOTAL_EVENT_ID 0x02
21	#define QOS_L3_MBM_LOCAL_EVENT_ID 0x03	21	#define QOS_L3_MBM_LOCAL_EVENT_ID 0x03
22		22
		23	#define CQM_LIMBOCHECK_INTERVAL 1000
		24
23	#define MBM_CNTR_WIDTH 24	25	#define MBM_CNTR_WIDTH 24
24	#define MBM_OVERFLOW_INTERVAL 1000	26	#define MBM_OVERFLOW_INTERVAL 1000
25		27
@@ -187,8 +189,11 @@ struct mbm_state {
187	* @mbm_total: saved state for MBM total bandwidth	189	* @mbm_total: saved state for MBM total bandwidth
188	* @mbm_local: saved state for MBM local bandwidth	190	* @mbm_local: saved state for MBM local bandwidth
189	* @mbm_over: worker to periodically read MBM h/w counters	191	* @mbm_over: worker to periodically read MBM h/w counters
		192	* @cqm_limbo: worker to periodically read CQM h/w counters
190	* @mbm_work_cpu:	193	* @mbm_work_cpu:
191	* worker cpu for MBM h/w counters	194	* worker cpu for MBM h/w counters
		195	* @cqm_work_cpu:
		196	* worker cpu for CQM h/w counters
192	* @ctrl_val: array of cache or mem ctrl values (indexed by CLOSID)	197	* @ctrl_val: array of cache or mem ctrl values (indexed by CLOSID)
193	* @new_ctrl: new ctrl value to be loaded	198	* @new_ctrl: new ctrl value to be loaded
194	* @have_new_ctrl: did user provide new_ctrl for this domain	199	* @have_new_ctrl: did user provide new_ctrl for this domain
@@ -201,7 +206,9 @@ struct rdt_domain {
201	struct mbm_state *mbm_total;	206	struct mbm_state *mbm_total;
202	struct mbm_state *mbm_local;	207	struct mbm_state *mbm_local;
203	struct delayed_work mbm_over;	208	struct delayed_work mbm_over;
		209	struct delayed_work cqm_limbo;
204	int mbm_work_cpu;	210	int mbm_work_cpu;
		211	int cqm_work_cpu;
205	u32 *ctrl_val;	212	u32 *ctrl_val;
206	u32 new_ctrl;	213	u32 new_ctrl;
207	bool have_new_ctrl;	214	bool have_new_ctrl;
@@ -422,7 +429,12 @@ void mkdir_mondata_subdir_allrdtgrp(struct rdt_resource *r,
422	struct rdt_domain *d);	429	struct rdt_domain *d);
423	void mon_event_read(struct rmid_read rr, struct rdt_domain d,	430	void mon_event_read(struct rmid_read rr, struct rdt_domain d,
424	struct rdtgroup *rdtgrp, int evtid, int first);	431	struct rdtgroup *rdtgrp, int evtid, int first);
425	void mbm_setup_overflow_handler(struct rdt_domain *dom, unsigned long delay_ms);	432	void mbm_setup_overflow_handler(struct rdt_domain *dom,
		433	unsigned long delay_ms);
426	void mbm_handle_overflow(struct work_struct *work);	434	void mbm_handle_overflow(struct work_struct *work);
		435	void cqm_setup_limbo_handler(struct rdt_domain *dom, unsigned long delay_ms);
		436	void cqm_handle_limbo(struct work_struct *work);
		437	bool has_busy_rmid(struct rdt_resource r, struct rdt_domain d);
		438	void __check_limbo(struct rdt_domain *d, bool force_free);
427		439
428	#endif /* _ASM_X86_INTEL_RDT_H */	440	#endif /* _ASM_X86_INTEL_RDT_H */


diff --git a/arch/x86/kernel/cpu/intel_rdt_monitor.c b/arch/x86/kernel/cpu/intel_rdt_monitor.c index 8378785883dc..30827510094b 100644 --- a/arch/x86/kernel/cpu/intel_rdt_monitor.c +++ b/arch/x86/kernel/cpu/intel_rdt_monitor.c
@@ -33,7 +33,7 @@
33		33
34	struct rmid_entry {	34	struct rmid_entry {
35	u32 rmid;	35	u32 rmid;
36	atomic_t busy;	36	int busy;
37	struct list_head list;	37	struct list_head list;
38	};	38	};
39		39
@@ -45,13 +45,13 @@ struct rmid_entry {
45	static LIST_HEAD(rmid_free_lru);	45	static LIST_HEAD(rmid_free_lru);
46		46
47	/**	47	/**
48	* @rmid_limbo_lru list of currently unused but (potentially)	48	* @rmid_limbo_count count of currently unused but (potentially)
49	* dirty RMIDs.	49	* dirty RMIDs.
50	* This list contains RMIDs that no one is currently using but that	50	* This counts RMIDs that no one is currently using but that
51	* may have a occupancy value > intel_cqm_threshold. User can change	51	* may have a occupancy value > intel_cqm_threshold. User can change
52	* the threshold occupancy value.	52	* the threshold occupancy value.
53	*/	53	*/
54	static LIST_HEAD(rmid_limbo_lru);	54	unsigned int rmid_limbo_count;
55		55
56	/**	56	/**
57	* @rmid_entry - The entry in the limbo and free lists.	57	* @rmid_entry - The entry in the limbo and free lists.
@@ -103,124 +103,53 @@ static u64 __rmid_read(u32 rmid, u32 eventid)
103	return val;	103	return val;
104	}	104	}
105		105
106	/*	106	static bool rmid_dirty(struct rmid_entry *entry)
107	* Walk the limbo list looking at any RMIDs that are flagged in the
108	* domain rmid_busy_llc bitmap as busy. If the reported LLC occupancy
109	* is below the threshold clear the busy bit and decrement the count.
110	* If the busy count gets to zero on an RMID we stop looking.
111	* This can be called from an IPI.
112	* We need an atomic for the busy count because multiple CPUs may check
113	* the same RMID at the same time.
114	*/
115	static bool __check_limbo(struct rdt_domain *d)
116	{
117	struct rmid_entry *entry;
118	u64 val;
119
120	list_for_each_entry(entry, &rmid_limbo_lru, list) {
121	if (!test_bit(entry->rmid, d->rmid_busy_llc))
122	continue;
123	val = __rmid_read(entry->rmid, QOS_L3_OCCUP_EVENT_ID);
124	if (val <= intel_cqm_threshold) {
125	clear_bit(entry->rmid, d->rmid_busy_llc);
126	if (atomic_dec_and_test(&entry->busy))
127	return true;
128	}
129	}
130	return false;
131	}
132
133	static void check_limbo(void *arg)
134	{	107	{
135	struct rdt_domain *d;	108	u64 val = __rmid_read(entry->rmid, QOS_L3_OCCUP_EVENT_ID);
136
137	d = get_domain_from_cpu(smp_processor_id(),
138	&rdt_resources_all[RDT_RESOURCE_L3]);
139
140	if (d)
141	__check_limbo(d);
142	}
143		109
144	static bool has_busy_rmid(struct rdt_resource r, struct rdt_domain d)	110	return val >= intel_cqm_threshold;
145	{
146	return find_first_bit(d->rmid_busy_llc, r->num_rmid) != r->num_rmid;
147	}	111	}
148		112
149	/*	113	/*
150	* Scan the limbo list and move all entries that are below the	114	* Check the RMIDs that are marked as busy for this domain. If the
151	* intel_cqm_threshold to the free list.	115	* reported LLC occupancy is below the threshold clear the busy bit and
152	* Return "true" if the limbo list is empty, "false" if there are	116	* decrement the count. If the busy count gets to zero on an RMID, we
153	* still some RMIDs there.	117	* free the RMID
154	*/	118	*/
155	static bool try_freeing_limbo_rmid(void)	119	void __check_limbo(struct rdt_domain *d, bool force_free)
156	{	120	{
157	struct rmid_entry entry, tmp;	121	struct rmid_entry *entry;
158	struct rdt_resource *r;	122	struct rdt_resource *r;
159	cpumask_var_t cpu_mask;	123	u32 crmid = 1, nrmid;
160	struct rdt_domain *d;
161	bool ret = true;
162	int cpu;
163
164	if (list_empty(&rmid_limbo_lru))
165	return ret;
166		124
167	r = &rdt_resources_all[RDT_RESOURCE_L3];	125	r = &rdt_resources_all[RDT_RESOURCE_L3];
168		126
169	cpu = get_cpu();
170
171	/*	127	/*
172	* First see if we can free up an RMID by checking busy values	128	* Skip RMID 0 and start from RMID 1 and check all the RMIDs that
173	* on the local package.	129	* are marked as busy for occupancy < threshold. If the occupancy
		130	* is less than the threshold decrement the busy counter of the
		131	* RMID and move it to the free list when the counter reaches 0.
174	*/	132	*/
175	d = get_domain_from_cpu(cpu, r);	133	for (;;) {
176	if (d && has_busy_rmid(r, d) && __check_limbo(d)) {	134	nrmid = find_next_bit(d->rmid_busy_llc, r->num_rmid, crmid);
177	list_for_each_entry_safe(entry, tmp, &rmid_limbo_lru, list) {	135	if (nrmid >= r->num_rmid)
178	if (atomic_read(&entry->busy) == 0) {	136	break;
179	list_del(&entry->list);	137
		138	entry = __rmid_entry(nrmid);
		139	if (force_free \|\| !rmid_dirty(entry)) {
		140	clear_bit(entry->rmid, d->rmid_busy_llc);
		141	if (!--entry->busy) {
		142	rmid_limbo_count--;
180	list_add_tail(&entry->list, &rmid_free_lru);	143	list_add_tail(&entry->list, &rmid_free_lru);
181	goto done;
182	}	144	}
183	}	145	}
		146	crmid = nrmid + 1;
184	}	147	}
		148	}
185		149
186	if (!zalloc_cpumask_var(&cpu_mask, GFP_KERNEL)) {	150	bool has_busy_rmid(struct rdt_resource r, struct rdt_domain d)
187	ret = false;	151	{
188	goto done;	152	return find_first_bit(d->rmid_busy_llc, r->num_rmid) != r->num_rmid;
189	}
190
191	/*
192	* Build a mask of other domains that have busy RMIDs
193	*/
194	list_for_each_entry(d, &r->domains, list) {
195	if (!cpumask_test_cpu(cpu, &d->cpu_mask) &&
196	has_busy_rmid(r, d))
197	cpumask_set_cpu(cpumask_any(&d->cpu_mask), cpu_mask);
198	}
199	if (cpumask_empty(cpu_mask)) {
200	ret = false;
201	goto free_mask;
202	}
203
204	/*
205	* Scan domains with busy RMIDs to check if they still are busy
206	*/
207	on_each_cpu_mask(cpu_mask, check_limbo, NULL, true);
208
209	/* Walk limbo list moving all free RMIDs to the &rmid_free_lru list */
210	list_for_each_entry_safe(entry, tmp, &rmid_limbo_lru, list) {
211	if (atomic_read(&entry->busy) != 0) {
212	ret = false;
213	continue;
214	}
215	list_del(&entry->list);
216	list_add_tail(&entry->list, &rmid_free_lru);
217	}
218
219	free_mask:
220	free_cpumask_var(cpu_mask);
221	done:
222	put_cpu();
223	return ret;
224	}	153	}
225		154
226	/*	155	/*
@@ -231,15 +160,11 @@ done:
231	int alloc_rmid(void)	160	int alloc_rmid(void)
232	{	161	{
233	struct rmid_entry *entry;	162	struct rmid_entry *entry;
234	bool ret;
235		163
236	lockdep_assert_held(&rdtgroup_mutex);	164	lockdep_assert_held(&rdtgroup_mutex);
237		165
238	if (list_empty(&rmid_free_lru)) {	166	if (list_empty(&rmid_free_lru))
239	ret = try_freeing_limbo_rmid();	167	return rmid_limbo_count ? -EBUSY : -ENOSPC;
240	if (list_empty(&rmid_free_lru))
241	return ret ? -ENOSPC : -EBUSY;
242	}
243		168
244	entry = list_first_entry(&rmid_free_lru,	169	entry = list_first_entry(&rmid_free_lru,
245	struct rmid_entry, list);	170	struct rmid_entry, list);
@@ -252,11 +177,12 @@ static void add_rmid_to_limbo(struct rmid_entry *entry)
252	{	177	{
253	struct rdt_resource *r;	178	struct rdt_resource *r;
254	struct rdt_domain *d;	179	struct rdt_domain *d;
255	int cpu, nbusy = 0;	180	int cpu;
256	u64 val;	181	u64 val;
257		182
258	r = &rdt_resources_all[RDT_RESOURCE_L3];	183	r = &rdt_resources_all[RDT_RESOURCE_L3];
259		184
		185	entry->busy = 0;
260	cpu = get_cpu();	186	cpu = get_cpu();
261	list_for_each_entry(d, &r->domains, list) {	187	list_for_each_entry(d, &r->domains, list) {
262	if (cpumask_test_cpu(cpu, &d->cpu_mask)) {	188	if (cpumask_test_cpu(cpu, &d->cpu_mask)) {
@@ -264,17 +190,22 @@ static void add_rmid_to_limbo(struct rmid_entry *entry)
264	if (val <= intel_cqm_threshold)	190	if (val <= intel_cqm_threshold)
265	continue;	191	continue;
266	}	192	}
		193
		194	/*
		195	* For the first limbo RMID in the domain,
		196	* setup up the limbo worker.
		197	*/
		198	if (!has_busy_rmid(r, d))
		199	cqm_setup_limbo_handler(d, CQM_LIMBOCHECK_INTERVAL);
267	set_bit(entry->rmid, d->rmid_busy_llc);	200	set_bit(entry->rmid, d->rmid_busy_llc);
268	nbusy++;	201	entry->busy++;
269	}	202	}
270	put_cpu();	203	put_cpu();
271		204
272	if (nbusy) {	205	if (entry->busy)
273	atomic_set(&entry->busy, nbusy);	206	rmid_limbo_count++;
274	list_add_tail(&entry->list, &rmid_limbo_lru);	207	else
275	} else {
276	list_add_tail(&entry->list, &rmid_free_lru);	208	list_add_tail(&entry->list, &rmid_free_lru);
277	}
278	}	209	}
279		210
280	void free_rmid(u32 rmid)	211	void free_rmid(u32 rmid)
@@ -387,6 +318,50 @@ static void mbm_update(struct rdt_domain *d, int rmid)
387	}	318	}
388	}	319	}
389		320
		321	/*
		322	* Handler to scan the limbo list and move the RMIDs
		323	* to free list whose occupancy < threshold_occupancy.
		324	*/
		325	void cqm_handle_limbo(struct work_struct *work)
		326	{
		327	unsigned long delay = msecs_to_jiffies(CQM_LIMBOCHECK_INTERVAL);
		328	int cpu = smp_processor_id();
		329	struct rdt_resource *r;
		330	struct rdt_domain *d;
		331
		332	mutex_lock(&rdtgroup_mutex);
		333
		334	r = &rdt_resources_all[RDT_RESOURCE_L3];
		335	d = get_domain_from_cpu(cpu, r);
		336
		337	if (!d) {
		338	pr_warn_once("Failure to get domain for limbo worker\n");
		339	goto out_unlock;
		340	}
		341
		342	__check_limbo(d, false);
		343
		344	if (has_busy_rmid(r, d))
		345	schedule_delayed_work_on(cpu, &d->cqm_limbo, delay);
		346
		347	out_unlock:
		348	mutex_unlock(&rdtgroup_mutex);
		349	}
		350
		351	void cqm_setup_limbo_handler(struct rdt_domain *dom, unsigned long delay_ms)
		352	{
		353	unsigned long delay = msecs_to_jiffies(delay_ms);
		354	struct rdt_resource *r;
		355	int cpu;
		356
		357	r = &rdt_resources_all[RDT_RESOURCE_L3];
		358
		359	cpu = cpumask_any(&dom->cpu_mask);
		360	dom->cqm_work_cpu = cpu;
		361
		362	schedule_delayed_work_on(cpu, &dom->cqm_limbo, delay);
		363	}
		364
390	void mbm_handle_overflow(struct work_struct *work)	365	void mbm_handle_overflow(struct work_struct *work)
391	{	366	{
392	unsigned long delay = msecs_to_jiffies(MBM_OVERFLOW_INTERVAL);	367	unsigned long delay = msecs_to_jiffies(MBM_OVERFLOW_INTERVAL);
@@ -413,6 +388,7 @@ void mbm_handle_overflow(struct work_struct *work)
413	}	388	}
414		389
415	schedule_delayed_work_on(cpu, &d->mbm_over, delay);	390	schedule_delayed_work_on(cpu, &d->mbm_over, delay);
		391
416	out_unlock:	392	out_unlock:
417	mutex_unlock(&rdtgroup_mutex);	393	mutex_unlock(&rdtgroup_mutex);
418	}	394	}