diff options
author | Babu Moger <Babu.Moger@amd.com> | 2018-11-21 15:28:45 -0500 |
---|---|---|
committer | Borislav Petkov <bp@suse.de> | 2018-11-22 14:16:20 -0500 |
commit | 4d05bf71f157d756932e77cdee16dc99e235d636 (patch) | |
tree | 63c54a94cca33592a7b3e7b5ab7176dbed79c73f | |
parent | 723f1a0dd8e26a7523ba068204bee11c95ded38d (diff) |
x86/resctrl: Introduce AMD QOS feature
Enable QOS feature on AMD.
Following QoS sub-features are supported on AMD if the underlying
hardware supports it:
- L3 Cache allocation enforcement
- L3 Cache occupancy monitoring
- L3 Code-Data Prioritization support
- Memory Bandwidth Enforcement (Allocation)
The specification is available at:
https://developer.amd.com/wp-content/resources/56375.pdf
There are differences in the way some of the features are implemented.
Separate those functions and add those as vendor specific functions.
The major difference is in MBA feature:
- AMD uses CPUID leaf 0x80000020 to initialize the MBA features.
- AMD uses direct bandwidth value instead of delay based on bandwidth values.
- MSR register base addresses are different for MBA.
- AMD allows non-contiguous L3 cache bit masks.
Signed-off-by: Babu Moger <babu.moger@amd.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Brijesh Singh <brijesh.singh@amd.com>
Cc: "Chang S. Bae" <chang.seok.bae@intel.com>
Cc: David Miller <davem@davemloft.net>
Cc: David Woodhouse <dwmw2@infradead.org>
Cc: Dmitry Safonov <dima@arista.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jann Horn <jannh@google.com>
Cc: Joerg Roedel <jroedel@suse.de>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Kate Stewart <kstewart@linuxfoundation.org>
Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Cc: <linux-doc@vger.kernel.org>
Cc: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Philippe Ombredanne <pombredanne@nexb.com>
Cc: Pu Wen <puwen@hygon.cn>
Cc: <qianyue.zj@alibaba-inc.com>
Cc: "Rafael J. Wysocki" <rafael@kernel.org>
Cc: Reinette Chatre <reinette.chatre@intel.com>
Cc: Rian Hunter <rian@alum.mit.edu>
Cc: Sherry Hurwitz <sherry.hurwitz@amd.com>
Cc: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Thomas Lendacky <Thomas.Lendacky@amd.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Vitaly Kuznetsov <vkuznets@redhat.com>
Cc: <xiaochen.shen@intel.com>
Link: https://lkml.kernel.org/r/20181121202811.4492-12-babu.moger@amd.com
-rw-r--r-- | arch/x86/kernel/cpu/resctrl/core.c | 69 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/resctrl/ctrlmondata.c | 71 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/resctrl/internal.h | 5 |
3 files changed, 142 insertions, 3 deletions
diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c index ba5a5b8c4681..2ec252be4ed9 100644 --- a/arch/x86/kernel/cpu/resctrl/core.c +++ b/arch/x86/kernel/cpu/resctrl/core.c | |||
@@ -61,6 +61,9 @@ mba_wrmsr_intel(struct rdt_domain *d, struct msr_param *m, | |||
61 | struct rdt_resource *r); | 61 | struct rdt_resource *r); |
62 | static void | 62 | static void |
63 | cat_wrmsr(struct rdt_domain *d, struct msr_param *m, struct rdt_resource *r); | 63 | cat_wrmsr(struct rdt_domain *d, struct msr_param *m, struct rdt_resource *r); |
64 | static void | ||
65 | mba_wrmsr_amd(struct rdt_domain *d, struct msr_param *m, | ||
66 | struct rdt_resource *r); | ||
64 | 67 | ||
65 | #define domain_init(id) LIST_HEAD_INIT(rdt_resources_all[id].domains) | 68 | #define domain_init(id) LIST_HEAD_INIT(rdt_resources_all[id].domains) |
66 | 69 | ||
@@ -255,7 +258,7 @@ static inline bool rdt_get_mb_table(struct rdt_resource *r) | |||
255 | return false; | 258 | return false; |
256 | } | 259 | } |
257 | 260 | ||
258 | static bool __get_mem_config(struct rdt_resource *r) | 261 | static bool __get_mem_config_intel(struct rdt_resource *r) |
259 | { | 262 | { |
260 | union cpuid_0x10_3_eax eax; | 263 | union cpuid_0x10_3_eax eax; |
261 | union cpuid_0x10_x_edx edx; | 264 | union cpuid_0x10_x_edx edx; |
@@ -281,6 +284,30 @@ static bool __get_mem_config(struct rdt_resource *r) | |||
281 | return true; | 284 | return true; |
282 | } | 285 | } |
283 | 286 | ||
287 | static bool __rdt_get_mem_config_amd(struct rdt_resource *r) | ||
288 | { | ||
289 | union cpuid_0x10_3_eax eax; | ||
290 | union cpuid_0x10_x_edx edx; | ||
291 | u32 ebx, ecx; | ||
292 | |||
293 | cpuid_count(0x80000020, 1, &eax.full, &ebx, &ecx, &edx.full); | ||
294 | r->num_closid = edx.split.cos_max + 1; | ||
295 | r->default_ctrl = MAX_MBA_BW_AMD; | ||
296 | |||
297 | /* AMD does not use delay */ | ||
298 | r->membw.delay_linear = false; | ||
299 | |||
300 | r->membw.min_bw = 0; | ||
301 | r->membw.bw_gran = 1; | ||
302 | /* Max value is 2048, Data width should be 4 in decimal */ | ||
303 | r->data_width = 4; | ||
304 | |||
305 | r->alloc_capable = true; | ||
306 | r->alloc_enabled = true; | ||
307 | |||
308 | return true; | ||
309 | } | ||
310 | |||
284 | static void rdt_get_cache_alloc_cfg(int idx, struct rdt_resource *r) | 311 | static void rdt_get_cache_alloc_cfg(int idx, struct rdt_resource *r) |
285 | { | 312 | { |
286 | union cpuid_0x10_1_eax eax; | 313 | union cpuid_0x10_1_eax eax; |
@@ -340,6 +367,15 @@ static int get_cache_id(int cpu, int level) | |||
340 | return -1; | 367 | return -1; |
341 | } | 368 | } |
342 | 369 | ||
370 | static void | ||
371 | mba_wrmsr_amd(struct rdt_domain *d, struct msr_param *m, struct rdt_resource *r) | ||
372 | { | ||
373 | unsigned int i; | ||
374 | |||
375 | for (i = m->low; i < m->high; i++) | ||
376 | wrmsrl(r->msr_base + i, d->ctrl_val[i]); | ||
377 | } | ||
378 | |||
343 | /* | 379 | /* |
344 | * Map the memory b/w percentage value to delay values | 380 | * Map the memory b/w percentage value to delay values |
345 | * that can be written to QOS_MSRs. | 381 | * that can be written to QOS_MSRs. |
@@ -793,8 +829,13 @@ static bool __init rdt_cpu_has(int flag) | |||
793 | 829 | ||
794 | static __init bool get_mem_config(void) | 830 | static __init bool get_mem_config(void) |
795 | { | 831 | { |
796 | if (rdt_cpu_has(X86_FEATURE_MBA)) | 832 | if (!rdt_cpu_has(X86_FEATURE_MBA)) |
797 | return __get_mem_config(&rdt_resources_all[RDT_RESOURCE_MBA]); | 833 | return false; |
834 | |||
835 | if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) | ||
836 | return __get_mem_config_intel(&rdt_resources_all[RDT_RESOURCE_MBA]); | ||
837 | else if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) | ||
838 | return __rdt_get_mem_config_amd(&rdt_resources_all[RDT_RESOURCE_MBA]); | ||
798 | 839 | ||
799 | return false; | 840 | return false; |
800 | } | 841 | } |
@@ -893,10 +934,32 @@ static __init void rdt_init_res_defs_intel(void) | |||
893 | } | 934 | } |
894 | } | 935 | } |
895 | 936 | ||
937 | static __init void rdt_init_res_defs_amd(void) | ||
938 | { | ||
939 | struct rdt_resource *r; | ||
940 | |||
941 | for_each_rdt_resource(r) { | ||
942 | if (r->rid == RDT_RESOURCE_L3 || | ||
943 | r->rid == RDT_RESOURCE_L3DATA || | ||
944 | r->rid == RDT_RESOURCE_L3CODE || | ||
945 | r->rid == RDT_RESOURCE_L2 || | ||
946 | r->rid == RDT_RESOURCE_L2DATA || | ||
947 | r->rid == RDT_RESOURCE_L2CODE) | ||
948 | r->cbm_validate = cbm_validate_amd; | ||
949 | else if (r->rid == RDT_RESOURCE_MBA) { | ||
950 | r->msr_base = MSR_IA32_MBA_BW_BASE; | ||
951 | r->msr_update = mba_wrmsr_amd; | ||
952 | r->parse_ctrlval = parse_bw_amd; | ||
953 | } | ||
954 | } | ||
955 | } | ||
956 | |||
896 | static __init void rdt_init_res_defs(void) | 957 | static __init void rdt_init_res_defs(void) |
897 | { | 958 | { |
898 | if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) | 959 | if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) |
899 | rdt_init_res_defs_intel(); | 960 | rdt_init_res_defs_intel(); |
961 | else if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) | ||
962 | rdt_init_res_defs_amd(); | ||
900 | } | 963 | } |
901 | 964 | ||
902 | static enum cpuhp_state rdt_online; | 965 | static enum cpuhp_state rdt_online; |
diff --git a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c index bfd7bdf8a156..43ee3cee6494 100644 --- a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c +++ b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c | |||
@@ -30,6 +30,53 @@ | |||
30 | 30 | ||
31 | /* | 31 | /* |
32 | * Check whether MBA bandwidth percentage value is correct. The value is | 32 | * Check whether MBA bandwidth percentage value is correct. The value is |
33 | * checked against the minimum and maximum bandwidth values specified by | ||
34 | * the hardware. The allocated bandwidth percentage is rounded to the next | ||
35 | * control step available on the hardware. | ||
36 | */ | ||
37 | static bool bw_validate_amd(char *buf, unsigned long *data, | ||
38 | struct rdt_resource *r) | ||
39 | { | ||
40 | unsigned long bw; | ||
41 | int ret; | ||
42 | |||
43 | ret = kstrtoul(buf, 10, &bw); | ||
44 | if (ret) { | ||
45 | rdt_last_cmd_printf("Non-decimal digit in MB value %s\n", buf); | ||
46 | return false; | ||
47 | } | ||
48 | |||
49 | if (bw < r->membw.min_bw || bw > r->default_ctrl) { | ||
50 | rdt_last_cmd_printf("MB value %ld out of range [%d,%d]\n", bw, | ||
51 | r->membw.min_bw, r->default_ctrl); | ||
52 | return false; | ||
53 | } | ||
54 | |||
55 | *data = roundup(bw, (unsigned long)r->membw.bw_gran); | ||
56 | return true; | ||
57 | } | ||
58 | |||
59 | int parse_bw_amd(struct rdt_parse_data *data, struct rdt_resource *r, | ||
60 | struct rdt_domain *d) | ||
61 | { | ||
62 | unsigned long bw_val; | ||
63 | |||
64 | if (d->have_new_ctrl) { | ||
65 | rdt_last_cmd_printf("Duplicate domain %d\n", d->id); | ||
66 | return -EINVAL; | ||
67 | } | ||
68 | |||
69 | if (!bw_validate_amd(data->buf, &bw_val, r)) | ||
70 | return -EINVAL; | ||
71 | |||
72 | d->new_ctrl = bw_val; | ||
73 | d->have_new_ctrl = true; | ||
74 | |||
75 | return 0; | ||
76 | } | ||
77 | |||
78 | /* | ||
79 | * Check whether MBA bandwidth percentage value is correct. The value is | ||
33 | * checked against the minimum and max bandwidth values specified by the | 80 | * checked against the minimum and max bandwidth values specified by the |
34 | * hardware. The allocated bandwidth percentage is rounded to the next | 81 | * hardware. The allocated bandwidth percentage is rounded to the next |
35 | * control step available on the hardware. | 82 | * control step available on the hardware. |
@@ -124,6 +171,30 @@ bool cbm_validate_intel(char *buf, u32 *data, struct rdt_resource *r) | |||
124 | } | 171 | } |
125 | 172 | ||
126 | /* | 173 | /* |
174 | * Check whether a cache bit mask is valid. AMD allows non-contiguous | ||
175 | * bitmasks | ||
176 | */ | ||
177 | bool cbm_validate_amd(char *buf, u32 *data, struct rdt_resource *r) | ||
178 | { | ||
179 | unsigned long val; | ||
180 | int ret; | ||
181 | |||
182 | ret = kstrtoul(buf, 16, &val); | ||
183 | if (ret) { | ||
184 | rdt_last_cmd_printf("Non-hex character in the mask %s\n", buf); | ||
185 | return false; | ||
186 | } | ||
187 | |||
188 | if (val > r->default_ctrl) { | ||
189 | rdt_last_cmd_puts("Mask out of range\n"); | ||
190 | return false; | ||
191 | } | ||
192 | |||
193 | *data = val; | ||
194 | return true; | ||
195 | } | ||
196 | |||
197 | /* | ||
127 | * Read one cache bit mask (hex). Check that it is valid for the current | 198 | * Read one cache bit mask (hex). Check that it is valid for the current |
128 | * resource type. | 199 | * resource type. |
129 | */ | 200 | */ |
diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h index 599cad34a6a8..822b7db634ee 100644 --- a/arch/x86/kernel/cpu/resctrl/internal.h +++ b/arch/x86/kernel/cpu/resctrl/internal.h | |||
@@ -11,6 +11,7 @@ | |||
11 | #define MSR_IA32_L3_CBM_BASE 0xc90 | 11 | #define MSR_IA32_L3_CBM_BASE 0xc90 |
12 | #define MSR_IA32_L2_CBM_BASE 0xd10 | 12 | #define MSR_IA32_L2_CBM_BASE 0xd10 |
13 | #define MSR_IA32_MBA_THRTL_BASE 0xd50 | 13 | #define MSR_IA32_MBA_THRTL_BASE 0xd50 |
14 | #define MSR_IA32_MBA_BW_BASE 0xc0000200 | ||
14 | 15 | ||
15 | #define MSR_IA32_QM_CTR 0x0c8e | 16 | #define MSR_IA32_QM_CTR 0x0c8e |
16 | #define MSR_IA32_QM_EVTSEL 0x0c8d | 17 | #define MSR_IA32_QM_EVTSEL 0x0c8d |
@@ -34,6 +35,7 @@ | |||
34 | #define MAX_MBA_BW 100u | 35 | #define MAX_MBA_BW 100u |
35 | #define MBA_IS_LINEAR 0x4 | 36 | #define MBA_IS_LINEAR 0x4 |
36 | #define MBA_MAX_MBPS U32_MAX | 37 | #define MBA_MAX_MBPS U32_MAX |
38 | #define MAX_MBA_BW_AMD 0x800 | ||
37 | 39 | ||
38 | #define RMID_VAL_ERROR BIT_ULL(63) | 40 | #define RMID_VAL_ERROR BIT_ULL(63) |
39 | #define RMID_VAL_UNAVAIL BIT_ULL(62) | 41 | #define RMID_VAL_UNAVAIL BIT_ULL(62) |
@@ -448,6 +450,8 @@ int parse_cbm(struct rdt_parse_data *data, struct rdt_resource *r, | |||
448 | struct rdt_domain *d); | 450 | struct rdt_domain *d); |
449 | int parse_bw_intel(struct rdt_parse_data *data, struct rdt_resource *r, | 451 | int parse_bw_intel(struct rdt_parse_data *data, struct rdt_resource *r, |
450 | struct rdt_domain *d); | 452 | struct rdt_domain *d); |
453 | int parse_bw_amd(struct rdt_parse_data *data, struct rdt_resource *r, | ||
454 | struct rdt_domain *d); | ||
451 | 455 | ||
452 | extern struct mutex rdtgroup_mutex; | 456 | extern struct mutex rdtgroup_mutex; |
453 | 457 | ||
@@ -579,5 +583,6 @@ void cqm_handle_limbo(struct work_struct *work); | |||
579 | bool has_busy_rmid(struct rdt_resource *r, struct rdt_domain *d); | 583 | bool has_busy_rmid(struct rdt_resource *r, struct rdt_domain *d); |
580 | void __check_limbo(struct rdt_domain *d, bool force_free); | 584 | void __check_limbo(struct rdt_domain *d, bool force_free); |
581 | bool cbm_validate_intel(char *buf, u32 *data, struct rdt_resource *r); | 585 | bool cbm_validate_intel(char *buf, u32 *data, struct rdt_resource *r); |
586 | bool cbm_validate_amd(char *buf, u32 *data, struct rdt_resource *r); | ||
582 | 587 | ||
583 | #endif /* _ASM_X86_RESCTRL_INTERNAL_H */ | 588 | #endif /* _ASM_X86_RESCTRL_INTERNAL_H */ |