3 files changed, 87 insertions, 4 deletions
diff --git a/Documentation/x86/intel_rdt_ui.txt b/Documentation/x86/intel_rdt_ui.txt
index bcd0a6d2fcf8..acac30b67c62 100644
--- a/Documentation/x86/intel_rdt_ui.txt
+++ b/Documentation/x86/intel_rdt_ui.txt
@@ -461,8 +461,8 @@ in the cache via carefully configuring the CAT feature and controlling
 application behavior. There is no guarantee that data is placed in
 cache. Instructions like INVD, WBINVD, CLFLUSH, etc. can still evict
 “locked” data from cache. Power management C-states may shrink or
-power off cache. It is thus recommended to limit the processor maximum
+power off cache. Deeper C-states will automatically be restricted on
-C-state, for example, by setting the processor.max_cstate kernel parameter.
+pseudo-locked region creation.
 It is required that an application using a pseudo-locked region runs
 with affinity to the cores (or a subset of the cores) associated
diff --git a/arch/x86/kernel/cpu/intel_rdt.h b/arch/x86/kernel/cpu/intel_rdt.h
index b8e490a43290..2d9cbb9d7a58 100644
--- a/arch/x86/kernel/cpu/intel_rdt.h
+++ b/arch/x86/kernel/cpu/intel_rdt.h
@@ -142,6 +142,7 @@ struct mongroup {
 *                      region
 * @debugfs_dir:        pointer to this region's directory in the debugfs
 *                      filesystem
+ * @pm_reqs:            Power management QoS requests related to this region
 */
 struct pseudo_lock_region {
        struct rdt_resource     *r;
@@ -155,6 +156,7 @@ struct pseudo_lock_region {
        void                    *kmem;
        unsigned int            minor;
        struct dentry           *debugfs_dir;
+        struct list_head        pm_reqs;
 };
 /**
diff --git a/arch/x86/kernel/cpu/intel_rdt_pseudo_lock.c b/arch/x86/kernel/cpu/intel_rdt_pseudo_lock.c
index dd1341557c9d..6e83f61552a5 100644
--- a/arch/x86/kernel/cpu/intel_rdt_pseudo_lock.c
+++ b/arch/x86/kernel/cpu/intel_rdt_pseudo_lock.c
@@ -17,6 +17,7 @@
 #include <linux/debugfs.h>
 #include <linux/kthread.h>
 #include <linux/mman.h>
+#include <linux/pm_qos.h>
 #include <linux/slab.h>
 #include <linux/uaccess.h>
@@ -176,6 +177,76 @@ static struct rdtgroup *region_find_by_minor(unsigned int minor)
 }
 /**
+ * pseudo_lock_pm_req - A power management QoS request list entry
+ * @list:       Entry within the @pm_reqs list for a pseudo-locked region
+ * @req:        PM QoS request
+ */
+struct pseudo_lock_pm_req {
+        struct list_head list;
+        struct dev_pm_qos_request req;
+};
+static void pseudo_lock_cstates_relax(struct pseudo_lock_region *plr)
+{
+        struct pseudo_lock_pm_req *pm_req, *next;
+        list_for_each_entry_safe(pm_req, next, &plr->pm_reqs, list) {
+                dev_pm_qos_remove_request(&pm_req->req);
+                list_del(&pm_req->list);
+                kfree(pm_req);
+        }
+}
+/**
+ * pseudo_lock_cstates_constrain - Restrict cores from entering C6
+ *
+ * To prevent the cache from being affected by power management entering
+ * C6 has to be avoided. This is accomplished by requesting a latency
+ * requirement lower than lowest C6 exit latency of all supported
+ * platforms as found in the cpuidle state tables in the intel_idle driver.
+ * At this time it is possible to do so with a single latency requirement
+ * for all supported platforms.
+ *
+ * Since Goldmont is supported, which is affected by X86_BUG_MONITOR,
+ * the ACPI latencies need to be considered while keeping in mind that C2
+ * may be set to map to deeper sleep states. In this case the latency
+ * requirement needs to prevent entering C2 also.
+ */
+static int pseudo_lock_cstates_constrain(struct pseudo_lock_region *plr)
+{
+        struct pseudo_lock_pm_req *pm_req;
+        int cpu;
+        int ret;
+        for_each_cpu(cpu, &plr->d->cpu_mask) {
+                pm_req = kzalloc(sizeof(*pm_req), GFP_KERNEL);
+                if (!pm_req) {
+                        rdt_last_cmd_puts("fail allocating mem for PM QoS\n");
+                        ret = -ENOMEM;
+                        goto out_err;
+                }
+                ret = dev_pm_qos_add_request(get_cpu_device(cpu),
+                                             &pm_req->req,
+                                             DEV_PM_QOS_RESUME_LATENCY,
+                                             30);
+                if (ret < 0) {
+                        rdt_last_cmd_printf("fail to add latency req cpu%d\n",
+                                            cpu);
+                        kfree(pm_req);
+                        ret = -1;
+                        goto out_err;
+                }
+                list_add(&pm_req->list, &plr->pm_reqs);
+        }
+        return 0;
+out_err:
+        pseudo_lock_cstates_relax(plr);
+        return ret;
+}
+/**
 * pseudo_lock_region_init - Initialize pseudo-lock region information
 * @plr: pseudo-lock region
 *
@@ -242,6 +313,7 @@ static int pseudo_lock_init(struct rdtgroup *rdtgrp)
                return -ENOMEM;
        init_waitqueue_head(&plr->lock_thread_wq);
+        INIT_LIST_HEAD(&plr->pm_reqs);
        rdtgrp->plr = plr;
        return 0;
 }
@@ -1135,6 +1207,12 @@ int rdtgroup_pseudo_lock_create(struct rdtgroup *rdtgrp)
        if (ret < 0)
                return ret;
+        ret = pseudo_lock_cstates_constrain(plr);
+        if (ret < 0) {
+                ret = -EINVAL;
+                goto out_region;
+        }
        plr->thread_done = 0;
        thread = kthread_create_on_node(pseudo_lock_fn, rdtgrp,
@@ -1143,7 +1221,7 @@ int rdtgroup_pseudo_lock_create(struct rdtgroup *rdtgrp)
        if (IS_ERR(thread)) {
                ret = PTR_ERR(thread);
                rdt_last_cmd_printf("locking thread returned error %d\n", ret);
-                goto out_region;
+                goto out_cstates;
        }
        kthread_bind(thread, plr->cpu);
@@ -1161,7 +1239,7 @@ int rdtgroup_pseudo_lock_create(struct rdtgroup *rdtgrp)
                 * empty pseudo-locking loop.
                 */
                rdt_last_cmd_puts("locking thread interrupted\n");
-                goto out_region;
+                goto out_cstates;
        }
        if (!IS_ERR_OR_NULL(debugfs_resctrl)) {
@@ -1222,6 +1300,8 @@ out_minor:
        pseudo_lock_minor_release(new_minor);
 out_debugfs:
        debugfs_remove_recursive(plr->debugfs_dir);
+out_cstates:
+        pseudo_lock_cstates_relax(plr);
 out_region:
        pseudo_lock_region_clear(plr);
 out:
@@ -1255,6 +1335,7 @@ void rdtgroup_pseudo_lock_remove(struct rdtgroup *rdtgrp)
                goto free;
        }
+        pseudo_lock_cstates_relax(plr);
        debugfs_remove_recursive(rdtgrp->plr->debugfs_dir);
        device_destroy(pseudo_lock_class, MKDEV(pseudo_lock_major, plr->minor));
        pseudo_lock_minor_release(plr->minor);

diff --git a/Documentation/x86/intel_rdt_ui.txt b/Documentation/x86/intel_rdt_ui.txt index bcd0a6d2fcf8..acac30b67c62 100644 --- a/Documentation/x86/intel_rdt_ui.txt +++ b/Documentation/x86/intel_rdt_ui.txt
@@ -461,8 +461,8 @@ in the cache via carefully configuring the CAT feature and controlling
461	application behavior. There is no guarantee that data is placed in	461	application behavior. There is no guarantee that data is placed in
462	cache. Instructions like INVD, WBINVD, CLFLUSH, etc. can still evict	462	cache. Instructions like INVD, WBINVD, CLFLUSH, etc. can still evict
463	“locked” data from cache. Power management C-states may shrink or	463	“locked” data from cache. Power management C-states may shrink or
464	power off cache. It is thus recommended to limit the processor maximum	464	power off cache. Deeper C-states will automatically be restricted on
465	C-state, for example, by setting the processor.max_cstate kernel parameter.	465	pseudo-locked region creation.
466		466
467	It is required that an application using a pseudo-locked region runs	467	It is required that an application using a pseudo-locked region runs
468	with affinity to the cores (or a subset of the cores) associated	468	with affinity to the cores (or a subset of the cores) associated


diff --git a/arch/x86/kernel/cpu/intel_rdt.h b/arch/x86/kernel/cpu/intel_rdt.h index b8e490a43290..2d9cbb9d7a58 100644 --- a/arch/x86/kernel/cpu/intel_rdt.h +++ b/arch/x86/kernel/cpu/intel_rdt.h
@@ -142,6 +142,7 @@ struct mongroup {
142	* region	142	* region
143	* @debugfs_dir: pointer to this region's directory in the debugfs	143	* @debugfs_dir: pointer to this region's directory in the debugfs
144	* filesystem	144	* filesystem
		145	* @pm_reqs: Power management QoS requests related to this region
145	*/	146	*/
146	struct pseudo_lock_region {	147	struct pseudo_lock_region {
147	struct rdt_resource *r;	148	struct rdt_resource *r;
@@ -155,6 +156,7 @@ struct pseudo_lock_region {
155	void *kmem;	156	void *kmem;
156	unsigned int minor;	157	unsigned int minor;
157	struct dentry *debugfs_dir;	158	struct dentry *debugfs_dir;
		159	struct list_head pm_reqs;
158	};	160	};
159		161
160	/**	162	/**


diff --git a/arch/x86/kernel/cpu/intel_rdt_pseudo_lock.c b/arch/x86/kernel/cpu/intel_rdt_pseudo_lock.c index dd1341557c9d..6e83f61552a5 100644 --- a/arch/x86/kernel/cpu/intel_rdt_pseudo_lock.c +++ b/arch/x86/kernel/cpu/intel_rdt_pseudo_lock.c
@@ -17,6 +17,7 @@
17	#include <linux/debugfs.h>	17	#include <linux/debugfs.h>
18	#include <linux/kthread.h>	18	#include <linux/kthread.h>
19	#include <linux/mman.h>	19	#include <linux/mman.h>
		20	#include <linux/pm_qos.h>
20	#include <linux/slab.h>	21	#include <linux/slab.h>
21	#include <linux/uaccess.h>	22	#include <linux/uaccess.h>
22		23
@@ -176,6 +177,76 @@ static struct rdtgroup *region_find_by_minor(unsigned int minor)
176	}	177	}
177		178
178	/**	179	/**
		180	* pseudo_lock_pm_req - A power management QoS request list entry
		181	* @list: Entry within the @pm_reqs list for a pseudo-locked region
		182	* @req: PM QoS request
		183	*/
		184	struct pseudo_lock_pm_req {
		185	struct list_head list;
		186	struct dev_pm_qos_request req;
		187	};
		188
		189	static void pseudo_lock_cstates_relax(struct pseudo_lock_region *plr)
		190	{
		191	struct pseudo_lock_pm_req pm_req, next;
		192
		193	list_for_each_entry_safe(pm_req, next, &plr->pm_reqs, list) {
		194	dev_pm_qos_remove_request(&pm_req->req);
		195	list_del(&pm_req->list);
		196	kfree(pm_req);
		197	}
		198	}
		199
		200	/**
		201	* pseudo_lock_cstates_constrain - Restrict cores from entering C6
		202	*
		203	* To prevent the cache from being affected by power management entering
		204	* C6 has to be avoided. This is accomplished by requesting a latency
		205	* requirement lower than lowest C6 exit latency of all supported
		206	* platforms as found in the cpuidle state tables in the intel_idle driver.
		207	* At this time it is possible to do so with a single latency requirement
		208	* for all supported platforms.
		209	*
		210	* Since Goldmont is supported, which is affected by X86_BUG_MONITOR,
		211	* the ACPI latencies need to be considered while keeping in mind that C2
		212	* may be set to map to deeper sleep states. In this case the latency
		213	* requirement needs to prevent entering C2 also.
		214	*/
		215	static int pseudo_lock_cstates_constrain(struct pseudo_lock_region *plr)
		216	{
		217	struct pseudo_lock_pm_req *pm_req;
		218	int cpu;
		219	int ret;
		220
		221	for_each_cpu(cpu, &plr->d->cpu_mask) {
		222	pm_req = kzalloc(sizeof(*pm_req), GFP_KERNEL);
		223	if (!pm_req) {
		224	rdt_last_cmd_puts("fail allocating mem for PM QoS\n");
		225	ret = -ENOMEM;
		226	goto out_err;
		227	}
		228	ret = dev_pm_qos_add_request(get_cpu_device(cpu),
		229	&pm_req->req,
		230	DEV_PM_QOS_RESUME_LATENCY,
		231	30);
		232	if (ret < 0) {
		233	rdt_last_cmd_printf("fail to add latency req cpu%d\n",
		234	cpu);
		235	kfree(pm_req);
		236	ret = -1;
		237	goto out_err;
		238	}
		239	list_add(&pm_req->list, &plr->pm_reqs);
		240	}
		241
		242	return 0;
		243
		244	out_err:
		245	pseudo_lock_cstates_relax(plr);
		246	return ret;
		247	}
		248
		249	/**
179	* pseudo_lock_region_init - Initialize pseudo-lock region information	250	* pseudo_lock_region_init - Initialize pseudo-lock region information
180	* @plr: pseudo-lock region	251	* @plr: pseudo-lock region
181	*	252	*
@@ -242,6 +313,7 @@ static int pseudo_lock_init(struct rdtgroup *rdtgrp)
242	return -ENOMEM;	313	return -ENOMEM;
243		314
244	init_waitqueue_head(&plr->lock_thread_wq);	315	init_waitqueue_head(&plr->lock_thread_wq);
		316	INIT_LIST_HEAD(&plr->pm_reqs);
245	rdtgrp->plr = plr;	317	rdtgrp->plr = plr;
246	return 0;	318	return 0;
247	}	319	}
@@ -1135,6 +1207,12 @@ int rdtgroup_pseudo_lock_create(struct rdtgroup *rdtgrp)
1135	if (ret < 0)	1207	if (ret < 0)
1136	return ret;	1208	return ret;
1137		1209
		1210	ret = pseudo_lock_cstates_constrain(plr);
		1211	if (ret < 0) {
		1212	ret = -EINVAL;
		1213	goto out_region;
		1214	}
		1215
1138	plr->thread_done = 0;	1216	plr->thread_done = 0;
1139		1217
1140	thread = kthread_create_on_node(pseudo_lock_fn, rdtgrp,	1218	thread = kthread_create_on_node(pseudo_lock_fn, rdtgrp,
@@ -1143,7 +1221,7 @@ int rdtgroup_pseudo_lock_create(struct rdtgroup *rdtgrp)
1143	if (IS_ERR(thread)) {	1221	if (IS_ERR(thread)) {
1144	ret = PTR_ERR(thread);	1222	ret = PTR_ERR(thread);
1145	rdt_last_cmd_printf("locking thread returned error %d\n", ret);	1223	rdt_last_cmd_printf("locking thread returned error %d\n", ret);
1146	goto out_region;	1224	goto out_cstates;
1147	}	1225	}
1148		1226
1149	kthread_bind(thread, plr->cpu);	1227	kthread_bind(thread, plr->cpu);
@@ -1161,7 +1239,7 @@ int rdtgroup_pseudo_lock_create(struct rdtgroup *rdtgrp)
1161	* empty pseudo-locking loop.	1239	* empty pseudo-locking loop.
1162	*/	1240	*/
1163	rdt_last_cmd_puts("locking thread interrupted\n");	1241	rdt_last_cmd_puts("locking thread interrupted\n");
1164	goto out_region;	1242	goto out_cstates;
1165	}	1243	}
1166		1244
1167	if (!IS_ERR_OR_NULL(debugfs_resctrl)) {	1245	if (!IS_ERR_OR_NULL(debugfs_resctrl)) {
@@ -1222,6 +1300,8 @@ out_minor:
1222	pseudo_lock_minor_release(new_minor);	1300	pseudo_lock_minor_release(new_minor);
1223	out_debugfs:	1301	out_debugfs:
1224	debugfs_remove_recursive(plr->debugfs_dir);	1302	debugfs_remove_recursive(plr->debugfs_dir);
		1303	out_cstates:
		1304	pseudo_lock_cstates_relax(plr);
1225	out_region:	1305	out_region:
1226	pseudo_lock_region_clear(plr);	1306	pseudo_lock_region_clear(plr);
1227	out:	1307	out:
@@ -1255,6 +1335,7 @@ void rdtgroup_pseudo_lock_remove(struct rdtgroup *rdtgrp)
1255	goto free;	1335	goto free;
1256	}	1336	}
1257		1337
		1338	pseudo_lock_cstates_relax(plr);
1258	debugfs_remove_recursive(rdtgrp->plr->debugfs_dir);	1339	debugfs_remove_recursive(rdtgrp->plr->debugfs_dir);
1259	device_destroy(pseudo_lock_class, MKDEV(pseudo_lock_major, plr->minor));	1340	device_destroy(pseudo_lock_class, MKDEV(pseudo_lock_major, plr->minor));
1260	pseudo_lock_minor_release(plr->minor);	1341	pseudo_lock_minor_release(plr->minor);