aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/x86/intel_rdt_ui.txt4
-rw-r--r--arch/x86/kernel/cpu/intel_rdt.h2
-rw-r--r--arch/x86/kernel/cpu/intel_rdt_pseudo_lock.c85
3 files changed, 87 insertions, 4 deletions
diff --git a/Documentation/x86/intel_rdt_ui.txt b/Documentation/x86/intel_rdt_ui.txt
index bcd0a6d2fcf8..acac30b67c62 100644
--- a/Documentation/x86/intel_rdt_ui.txt
+++ b/Documentation/x86/intel_rdt_ui.txt
@@ -461,8 +461,8 @@ in the cache via carefully configuring the CAT feature and controlling
461application behavior. There is no guarantee that data is placed in 461application behavior. There is no guarantee that data is placed in
462cache. Instructions like INVD, WBINVD, CLFLUSH, etc. can still evict 462cache. Instructions like INVD, WBINVD, CLFLUSH, etc. can still evict
463“locked” data from cache. Power management C-states may shrink or 463“locked” data from cache. Power management C-states may shrink or
464power off cache. It is thus recommended to limit the processor maximum 464power off cache. Deeper C-states will automatically be restricted on
465C-state, for example, by setting the processor.max_cstate kernel parameter. 465pseudo-locked region creation.
466 466
467It is required that an application using a pseudo-locked region runs 467It is required that an application using a pseudo-locked region runs
468with affinity to the cores (or a subset of the cores) associated 468with affinity to the cores (or a subset of the cores) associated
diff --git a/arch/x86/kernel/cpu/intel_rdt.h b/arch/x86/kernel/cpu/intel_rdt.h
index b8e490a43290..2d9cbb9d7a58 100644
--- a/arch/x86/kernel/cpu/intel_rdt.h
+++ b/arch/x86/kernel/cpu/intel_rdt.h
@@ -142,6 +142,7 @@ struct mongroup {
142 * region 142 * region
143 * @debugfs_dir: pointer to this region's directory in the debugfs 143 * @debugfs_dir: pointer to this region's directory in the debugfs
144 * filesystem 144 * filesystem
145 * @pm_reqs: Power management QoS requests related to this region
145 */ 146 */
146struct pseudo_lock_region { 147struct pseudo_lock_region {
147 struct rdt_resource *r; 148 struct rdt_resource *r;
@@ -155,6 +156,7 @@ struct pseudo_lock_region {
155 void *kmem; 156 void *kmem;
156 unsigned int minor; 157 unsigned int minor;
157 struct dentry *debugfs_dir; 158 struct dentry *debugfs_dir;
159 struct list_head pm_reqs;
158}; 160};
159 161
160/** 162/**
diff --git a/arch/x86/kernel/cpu/intel_rdt_pseudo_lock.c b/arch/x86/kernel/cpu/intel_rdt_pseudo_lock.c
index dd1341557c9d..6e83f61552a5 100644
--- a/arch/x86/kernel/cpu/intel_rdt_pseudo_lock.c
+++ b/arch/x86/kernel/cpu/intel_rdt_pseudo_lock.c
@@ -17,6 +17,7 @@
17#include <linux/debugfs.h> 17#include <linux/debugfs.h>
18#include <linux/kthread.h> 18#include <linux/kthread.h>
19#include <linux/mman.h> 19#include <linux/mman.h>
20#include <linux/pm_qos.h>
20#include <linux/slab.h> 21#include <linux/slab.h>
21#include <linux/uaccess.h> 22#include <linux/uaccess.h>
22 23
@@ -176,6 +177,76 @@ static struct rdtgroup *region_find_by_minor(unsigned int minor)
176} 177}
177 178
178/** 179/**
180 * pseudo_lock_pm_req - A power management QoS request list entry
181 * @list: Entry within the @pm_reqs list for a pseudo-locked region
182 * @req: PM QoS request
183 */
184struct pseudo_lock_pm_req {
185 struct list_head list;
186 struct dev_pm_qos_request req;
187};
188
189static void pseudo_lock_cstates_relax(struct pseudo_lock_region *plr)
190{
191 struct pseudo_lock_pm_req *pm_req, *next;
192
193 list_for_each_entry_safe(pm_req, next, &plr->pm_reqs, list) {
194 dev_pm_qos_remove_request(&pm_req->req);
195 list_del(&pm_req->list);
196 kfree(pm_req);
197 }
198}
199
200/**
201 * pseudo_lock_cstates_constrain - Restrict cores from entering C6
202 *
203 * To prevent the cache from being affected by power management entering
204 * C6 has to be avoided. This is accomplished by requesting a latency
205 * requirement lower than lowest C6 exit latency of all supported
206 * platforms as found in the cpuidle state tables in the intel_idle driver.
207 * At this time it is possible to do so with a single latency requirement
208 * for all supported platforms.
209 *
210 * Since Goldmont is supported, which is affected by X86_BUG_MONITOR,
211 * the ACPI latencies need to be considered while keeping in mind that C2
212 * may be set to map to deeper sleep states. In this case the latency
213 * requirement needs to prevent entering C2 also.
214 */
215static int pseudo_lock_cstates_constrain(struct pseudo_lock_region *plr)
216{
217 struct pseudo_lock_pm_req *pm_req;
218 int cpu;
219 int ret;
220
221 for_each_cpu(cpu, &plr->d->cpu_mask) {
222 pm_req = kzalloc(sizeof(*pm_req), GFP_KERNEL);
223 if (!pm_req) {
224 rdt_last_cmd_puts("fail allocating mem for PM QoS\n");
225 ret = -ENOMEM;
226 goto out_err;
227 }
228 ret = dev_pm_qos_add_request(get_cpu_device(cpu),
229 &pm_req->req,
230 DEV_PM_QOS_RESUME_LATENCY,
231 30);
232 if (ret < 0) {
233 rdt_last_cmd_printf("fail to add latency req cpu%d\n",
234 cpu);
235 kfree(pm_req);
236 ret = -1;
237 goto out_err;
238 }
239 list_add(&pm_req->list, &plr->pm_reqs);
240 }
241
242 return 0;
243
244out_err:
245 pseudo_lock_cstates_relax(plr);
246 return ret;
247}
248
249/**
179 * pseudo_lock_region_init - Initialize pseudo-lock region information 250 * pseudo_lock_region_init - Initialize pseudo-lock region information
180 * @plr: pseudo-lock region 251 * @plr: pseudo-lock region
181 * 252 *
@@ -242,6 +313,7 @@ static int pseudo_lock_init(struct rdtgroup *rdtgrp)
242 return -ENOMEM; 313 return -ENOMEM;
243 314
244 init_waitqueue_head(&plr->lock_thread_wq); 315 init_waitqueue_head(&plr->lock_thread_wq);
316 INIT_LIST_HEAD(&plr->pm_reqs);
245 rdtgrp->plr = plr; 317 rdtgrp->plr = plr;
246 return 0; 318 return 0;
247} 319}
@@ -1135,6 +1207,12 @@ int rdtgroup_pseudo_lock_create(struct rdtgroup *rdtgrp)
1135 if (ret < 0) 1207 if (ret < 0)
1136 return ret; 1208 return ret;
1137 1209
1210 ret = pseudo_lock_cstates_constrain(plr);
1211 if (ret < 0) {
1212 ret = -EINVAL;
1213 goto out_region;
1214 }
1215
1138 plr->thread_done = 0; 1216 plr->thread_done = 0;
1139 1217
1140 thread = kthread_create_on_node(pseudo_lock_fn, rdtgrp, 1218 thread = kthread_create_on_node(pseudo_lock_fn, rdtgrp,
@@ -1143,7 +1221,7 @@ int rdtgroup_pseudo_lock_create(struct rdtgroup *rdtgrp)
1143 if (IS_ERR(thread)) { 1221 if (IS_ERR(thread)) {
1144 ret = PTR_ERR(thread); 1222 ret = PTR_ERR(thread);
1145 rdt_last_cmd_printf("locking thread returned error %d\n", ret); 1223 rdt_last_cmd_printf("locking thread returned error %d\n", ret);
1146 goto out_region; 1224 goto out_cstates;
1147 } 1225 }
1148 1226
1149 kthread_bind(thread, plr->cpu); 1227 kthread_bind(thread, plr->cpu);
@@ -1161,7 +1239,7 @@ int rdtgroup_pseudo_lock_create(struct rdtgroup *rdtgrp)
1161 * empty pseudo-locking loop. 1239 * empty pseudo-locking loop.
1162 */ 1240 */
1163 rdt_last_cmd_puts("locking thread interrupted\n"); 1241 rdt_last_cmd_puts("locking thread interrupted\n");
1164 goto out_region; 1242 goto out_cstates;
1165 } 1243 }
1166 1244
1167 if (!IS_ERR_OR_NULL(debugfs_resctrl)) { 1245 if (!IS_ERR_OR_NULL(debugfs_resctrl)) {
@@ -1222,6 +1300,8 @@ out_minor:
1222 pseudo_lock_minor_release(new_minor); 1300 pseudo_lock_minor_release(new_minor);
1223out_debugfs: 1301out_debugfs:
1224 debugfs_remove_recursive(plr->debugfs_dir); 1302 debugfs_remove_recursive(plr->debugfs_dir);
1303out_cstates:
1304 pseudo_lock_cstates_relax(plr);
1225out_region: 1305out_region:
1226 pseudo_lock_region_clear(plr); 1306 pseudo_lock_region_clear(plr);
1227out: 1307out:
@@ -1255,6 +1335,7 @@ void rdtgroup_pseudo_lock_remove(struct rdtgroup *rdtgrp)
1255 goto free; 1335 goto free;
1256 } 1336 }
1257 1337
1338 pseudo_lock_cstates_relax(plr);
1258 debugfs_remove_recursive(rdtgrp->plr->debugfs_dir); 1339 debugfs_remove_recursive(rdtgrp->plr->debugfs_dir);
1259 device_destroy(pseudo_lock_class, MKDEV(pseudo_lock_major, plr->minor)); 1340 device_destroy(pseudo_lock_class, MKDEV(pseudo_lock_major, plr->minor));
1260 pseudo_lock_minor_release(plr->minor); 1341 pseudo_lock_minor_release(plr->minor);