aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2016-12-22 12:25:45 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2016-12-22 12:25:45 -0500
commiteb254f323bd50ab7e3cc385f2fc641a595cc8b37 (patch)
treeada2b6251000dc6ccdfcfac0f38c4eaf7aec905a
parentf79f7b1b4f910e03fa20092759c79fc2e53f2eff (diff)
parent76ae054c69a745ded388fc4ae70422d74c5bc77d (diff)
Merge branch 'x86-cache-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 cache allocation interface from Thomas Gleixner: "This provides support for Intel's Cache Allocation Technology, a cache partitioning mechanism. The interface is odd, but the hardware interface of that CAT stuff is odd as well. We tried hard to come up with an abstraction, but that only allows rather simple partitioning, but no way of sharing and dealing with the per package nature of this mechanism. In the end we decided to expose the allocation bitmaps directly so all combinations of the hardware can be utilized. There are two ways of associating a cache partition: - Task A task can be added to a resource group. It uses the cache partition associated to the group. - CPU All tasks which are not member of a resource group use the group to which the CPU they are running on is associated with. That allows for simple CPU based partitioning schemes. The main expected user sare: - Virtualization so a VM can only trash only the associated part of the cash w/o disturbing others - Real-Time systems to seperate RT and general workloads. - Latency sensitive enterprise workloads - In theory this also can be used to protect against cache side channel attacks" [ Intel RDT is "Resource Director Technology". The interface really is rather odd and very specific, which delayed this pull request while I was thinking about it. The pull request itself came in early during the merge window, I just delayed it until things had calmed down and I had more time. But people tell me they'll use this, and the good news is that it is _so_ specific that it's rather independent of anything else, and no user is going to depend on the interface since it's pretty rare. So if push comes to shove, we can just remove the interface and nothing will break ] * 'x86-cache-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (31 commits) x86/intel_rdt: Implement show_options() for resctrlfs x86/intel_rdt: Call intel_rdt_sched_in() with preemption disabled x86/intel_rdt: Update task closid immediately on CPU in rmdir and unmount x86/intel_rdt: Fix setting of closid when adding CPUs to a group x86/intel_rdt: Update percpu closid immeditately on CPUs affected by changee x86/intel_rdt: Reset per cpu closids on unmount x86/intel_rdt: Select KERNFS when enabling INTEL_RDT_A x86/intel_rdt: Prevent deadlock against hotplug lock x86/intel_rdt: Protect info directory from removal x86/intel_rdt: Add info files to Documentation x86/intel_rdt: Export the minimum number of set mask bits in sysfs x86/intel_rdt: Propagate error in rdt_mount() properly x86/intel_rdt: Add a missing #include MAINTAINERS: Add maintainer for Intel RDT resource allocation x86/intel_rdt: Add scheduler hook x86/intel_rdt: Add schemata file x86/intel_rdt: Add tasks files x86/intel_rdt: Add cpus file x86/intel_rdt: Add mkdir to resctrl file system x86/intel_rdt: Add "info" files to resctrl file system ...
-rw-r--r--Documentation/ABI/testing/sysfs-devices-system-cpu16
-rw-r--r--Documentation/x86/intel_rdt_ui.txt214
-rw-r--r--MAINTAINERS8
-rw-r--r--arch/x86/Kconfig13
-rw-r--r--arch/x86/events/intel/cqm.c23
-rw-r--r--arch/x86/include/asm/cpufeatures.h4
-rw-r--r--arch/x86/include/asm/intel_rdt.h224
-rw-r--r--arch/x86/include/asm/intel_rdt_common.h27
-rw-r--r--arch/x86/kernel/cpu/Makefile2
-rw-r--r--arch/x86/kernel/cpu/intel_cacheinfo.c20
-rw-r--r--arch/x86/kernel/cpu/intel_rdt.c403
-rw-r--r--arch/x86/kernel/cpu/intel_rdt_rdtgroup.c1115
-rw-r--r--arch/x86/kernel/cpu/intel_rdt_schemata.c245
-rw-r--r--arch/x86/kernel/cpu/scattered.c11
-rw-r--r--arch/x86/kernel/process_32.c4
-rw-r--r--arch/x86/kernel/process_64.c4
-rw-r--r--drivers/base/cacheinfo.c5
-rw-r--r--include/linux/cacheinfo.h3
-rw-r--r--include/linux/sched.h3
-rw-r--r--include/uapi/linux/magic.h1
20 files changed, 2320 insertions, 25 deletions
diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu
index 498741737055..2a4a423d08e0 100644
--- a/Documentation/ABI/testing/sysfs-devices-system-cpu
+++ b/Documentation/ABI/testing/sysfs-devices-system-cpu
@@ -272,6 +272,22 @@ Description: Parameters for the CPU cache attributes
272 the modified cache line is written to main 272 the modified cache line is written to main
273 memory only when it is replaced 273 memory only when it is replaced
274 274
275
276What: /sys/devices/system/cpu/cpu*/cache/index*/id
277Date: September 2016
278Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org>
279Description: Cache id
280
281 The id provides a unique number for a specific instance of
282 a cache of a particular type. E.g. there may be a level
283 3 unified cache on each socket in a server and we may
284 assign them ids 0, 1, 2, ...
285
286 Note that id value can be non-contiguous. E.g. level 1
287 caches typically exist per core, but there may not be a
288 power of two cores on a socket, so these caches may be
289 numbered 0, 1, 2, 3, 4, 5, 8, 9, 10, ...
290
275What: /sys/devices/system/cpu/cpuX/cpufreq/throttle_stats 291What: /sys/devices/system/cpu/cpuX/cpufreq/throttle_stats
276 /sys/devices/system/cpu/cpuX/cpufreq/throttle_stats/turbo_stat 292 /sys/devices/system/cpu/cpuX/cpufreq/throttle_stats/turbo_stat
277 /sys/devices/system/cpu/cpuX/cpufreq/throttle_stats/sub_turbo_stat 293 /sys/devices/system/cpu/cpuX/cpufreq/throttle_stats/sub_turbo_stat
diff --git a/Documentation/x86/intel_rdt_ui.txt b/Documentation/x86/intel_rdt_ui.txt
new file mode 100644
index 000000000000..d918d268cd72
--- /dev/null
+++ b/Documentation/x86/intel_rdt_ui.txt
@@ -0,0 +1,214 @@
1User Interface for Resource Allocation in Intel Resource Director Technology
2
3Copyright (C) 2016 Intel Corporation
4
5Fenghua Yu <fenghua.yu@intel.com>
6Tony Luck <tony.luck@intel.com>
7
8This feature is enabled by the CONFIG_INTEL_RDT_A Kconfig and the
9X86 /proc/cpuinfo flag bits "rdt", "cat_l3" and "cdp_l3".
10
11To use the feature mount the file system:
12
13 # mount -t resctrl resctrl [-o cdp] /sys/fs/resctrl
14
15mount options are:
16
17"cdp": Enable code/data prioritization in L3 cache allocations.
18
19
20Info directory
21--------------
22
23The 'info' directory contains information about the enabled
24resources. Each resource has its own subdirectory. The subdirectory
25names reflect the resource names. Each subdirectory contains the
26following files:
27
28"num_closids": The number of CLOSIDs which are valid for this
29 resource. The kernel uses the smallest number of
30 CLOSIDs of all enabled resources as limit.
31
32"cbm_mask": The bitmask which is valid for this resource. This
33 mask is equivalent to 100%.
34
35"min_cbm_bits": The minimum number of consecutive bits which must be
36 set when writing a mask.
37
38
39Resource groups
40---------------
41Resource groups are represented as directories in the resctrl file
42system. The default group is the root directory. Other groups may be
43created as desired by the system administrator using the "mkdir(1)"
44command, and removed using "rmdir(1)".
45
46There are three files associated with each group:
47
48"tasks": A list of tasks that belongs to this group. Tasks can be
49 added to a group by writing the task ID to the "tasks" file
50 (which will automatically remove them from the previous
51 group to which they belonged). New tasks created by fork(2)
52 and clone(2) are added to the same group as their parent.
53 If a pid is not in any sub partition, it is in root partition
54 (i.e. default partition).
55
56"cpus": A bitmask of logical CPUs assigned to this group. Writing
57 a new mask can add/remove CPUs from this group. Added CPUs
58 are removed from their previous group. Removed ones are
59 given to the default (root) group. You cannot remove CPUs
60 from the default group.
61
62"schemata": A list of all the resources available to this group.
63 Each resource has its own line and format - see below for
64 details.
65
66When a task is running the following rules define which resources
67are available to it:
68
691) If the task is a member of a non-default group, then the schemata
70for that group is used.
71
722) Else if the task belongs to the default group, but is running on a
73CPU that is assigned to some specific group, then the schemata for
74the CPU's group is used.
75
763) Otherwise the schemata for the default group is used.
77
78
79Schemata files - general concepts
80---------------------------------
81Each line in the file describes one resource. The line starts with
82the name of the resource, followed by specific values to be applied
83in each of the instances of that resource on the system.
84
85Cache IDs
86---------
87On current generation systems there is one L3 cache per socket and L2
88caches are generally just shared by the hyperthreads on a core, but this
89isn't an architectural requirement. We could have multiple separate L3
90caches on a socket, multiple cores could share an L2 cache. So instead
91of using "socket" or "core" to define the set of logical cpus sharing
92a resource we use a "Cache ID". At a given cache level this will be a
93unique number across the whole system (but it isn't guaranteed to be a
94contiguous sequence, there may be gaps). To find the ID for each logical
95CPU look in /sys/devices/system/cpu/cpu*/cache/index*/id
96
97Cache Bit Masks (CBM)
98---------------------
99For cache resources we describe the portion of the cache that is available
100for allocation using a bitmask. The maximum value of the mask is defined
101by each cpu model (and may be different for different cache levels). It
102is found using CPUID, but is also provided in the "info" directory of
103the resctrl file system in "info/{resource}/cbm_mask". X86 hardware
104requires that these masks have all the '1' bits in a contiguous block. So
1050x3, 0x6 and 0xC are legal 4-bit masks with two bits set, but 0x5, 0x9
106and 0xA are not. On a system with a 20-bit mask each bit represents 5%
107of the capacity of the cache. You could partition the cache into four
108equal parts with masks: 0x1f, 0x3e0, 0x7c00, 0xf8000.
109
110
111L3 details (code and data prioritization disabled)
112--------------------------------------------------
113With CDP disabled the L3 schemata format is:
114
115 L3:<cache_id0>=<cbm>;<cache_id1>=<cbm>;...
116
117L3 details (CDP enabled via mount option to resctrl)
118----------------------------------------------------
119When CDP is enabled L3 control is split into two separate resources
120so you can specify independent masks for code and data like this:
121
122 L3data:<cache_id0>=<cbm>;<cache_id1>=<cbm>;...
123 L3code:<cache_id0>=<cbm>;<cache_id1>=<cbm>;...
124
125L2 details
126----------
127L2 cache does not support code and data prioritization, so the
128schemata format is always:
129
130 L2:<cache_id0>=<cbm>;<cache_id1>=<cbm>;...
131
132Example 1
133---------
134On a two socket machine (one L3 cache per socket) with just four bits
135for cache bit masks
136
137# mount -t resctrl resctrl /sys/fs/resctrl
138# cd /sys/fs/resctrl
139# mkdir p0 p1
140# echo "L3:0=3;1=c" > /sys/fs/resctrl/p0/schemata
141# echo "L3:0=3;1=3" > /sys/fs/resctrl/p1/schemata
142
143The default resource group is unmodified, so we have access to all parts
144of all caches (its schemata file reads "L3:0=f;1=f").
145
146Tasks that are under the control of group "p0" may only allocate from the
147"lower" 50% on cache ID 0, and the "upper" 50% of cache ID 1.
148Tasks in group "p1" use the "lower" 50% of cache on both sockets.
149
150Example 2
151---------
152Again two sockets, but this time with a more realistic 20-bit mask.
153
154Two real time tasks pid=1234 running on processor 0 and pid=5678 running on
155processor 1 on socket 0 on a 2-socket and dual core machine. To avoid noisy
156neighbors, each of the two real-time tasks exclusively occupies one quarter
157of L3 cache on socket 0.
158
159# mount -t resctrl resctrl /sys/fs/resctrl
160# cd /sys/fs/resctrl
161
162First we reset the schemata for the default group so that the "upper"
16350% of the L3 cache on socket 0 cannot be used by ordinary tasks:
164
165# echo "L3:0=3ff;1=fffff" > schemata
166
167Next we make a resource group for our first real time task and give
168it access to the "top" 25% of the cache on socket 0.
169
170# mkdir p0
171# echo "L3:0=f8000;1=fffff" > p0/schemata
172
173Finally we move our first real time task into this resource group. We
174also use taskset(1) to ensure the task always runs on a dedicated CPU
175on socket 0. Most uses of resource groups will also constrain which
176processors tasks run on.
177
178# echo 1234 > p0/tasks
179# taskset -cp 1 1234
180
181Ditto for the second real time task (with the remaining 25% of cache):
182
183# mkdir p1
184# echo "L3:0=7c00;1=fffff" > p1/schemata
185# echo 5678 > p1/tasks
186# taskset -cp 2 5678
187
188Example 3
189---------
190
191A single socket system which has real-time tasks running on core 4-7 and
192non real-time workload assigned to core 0-3. The real-time tasks share text
193and data, so a per task association is not required and due to interaction
194with the kernel it's desired that the kernel on these cores shares L3 with
195the tasks.
196
197# mount -t resctrl resctrl /sys/fs/resctrl
198# cd /sys/fs/resctrl
199
200First we reset the schemata for the default group so that the "upper"
20150% of the L3 cache on socket 0 cannot be used by ordinary tasks:
202
203# echo "L3:0=3ff" > schemata
204
205Next we make a resource group for our real time cores and give
206it access to the "top" 50% of the cache on socket 0.
207
208# mkdir p0
209# echo "L3:0=ffc00;" > p0/schemata
210
211Finally we move core 4-7 over to the new group and make sure that the
212kernel and the tasks running there get 50% of the cache.
213
214# echo C0 > p0/cpus
diff --git a/MAINTAINERS b/MAINTAINERS
index f6eb97b35e0f..7c21c7638bb5 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -10327,6 +10327,14 @@ L: linux-rdma@vger.kernel.org
10327S: Supported 10327S: Supported
10328F: drivers/infiniband/sw/rdmavt 10328F: drivers/infiniband/sw/rdmavt
10329 10329
10330RDT - RESOURCE ALLOCATION
10331M: Fenghua Yu <fenghua.yu@intel.com>
10332L: linux-kernel@vger.kernel.org
10333S: Supported
10334F: arch/x86/kernel/cpu/intel_rdt*
10335F: arch/x86/include/asm/intel_rdt*
10336F: Documentation/x86/intel_rdt*
10337
10330READ-COPY UPDATE (RCU) 10338READ-COPY UPDATE (RCU)
10331M: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com> 10339M: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
10332M: Josh Triplett <josh@joshtriplett.org> 10340M: Josh Triplett <josh@joshtriplett.org>
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 64024c999531..e487493bbd47 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -412,6 +412,19 @@ config GOLDFISH
412 def_bool y 412 def_bool y
413 depends on X86_GOLDFISH 413 depends on X86_GOLDFISH
414 414
415config INTEL_RDT_A
416 bool "Intel Resource Director Technology Allocation support"
417 default n
418 depends on X86 && CPU_SUP_INTEL
419 select KERNFS
420 help
421 Select to enable resource allocation which is a sub-feature of
422 Intel Resource Director Technology(RDT). More information about
423 RDT can be found in the Intel x86 Architecture Software
424 Developer Manual.
425
426 Say N if unsure.
427
415if X86_32 428if X86_32
416config X86_EXTENDED_PLATFORM 429config X86_EXTENDED_PLATFORM
417 bool "Support for extended (non-PC) x86 platforms" 430 bool "Support for extended (non-PC) x86 platforms"
diff --git a/arch/x86/events/intel/cqm.c b/arch/x86/events/intel/cqm.c
index 8f82b02934fa..0c45cc8e64ba 100644
--- a/arch/x86/events/intel/cqm.c
+++ b/arch/x86/events/intel/cqm.c
@@ -7,9 +7,9 @@
7#include <linux/perf_event.h> 7#include <linux/perf_event.h>
8#include <linux/slab.h> 8#include <linux/slab.h>
9#include <asm/cpu_device_id.h> 9#include <asm/cpu_device_id.h>
10#include <asm/intel_rdt_common.h>
10#include "../perf_event.h" 11#include "../perf_event.h"
11 12
12#define MSR_IA32_PQR_ASSOC 0x0c8f
13#define MSR_IA32_QM_CTR 0x0c8e 13#define MSR_IA32_QM_CTR 0x0c8e
14#define MSR_IA32_QM_EVTSEL 0x0c8d 14#define MSR_IA32_QM_EVTSEL 0x0c8d
15 15
@@ -24,32 +24,13 @@ static unsigned int cqm_l3_scale; /* supposedly cacheline size */
24static bool cqm_enabled, mbm_enabled; 24static bool cqm_enabled, mbm_enabled;
25unsigned int mbm_socket_max; 25unsigned int mbm_socket_max;
26 26
27/**
28 * struct intel_pqr_state - State cache for the PQR MSR
29 * @rmid: The cached Resource Monitoring ID
30 * @closid: The cached Class Of Service ID
31 * @rmid_usecnt: The usage counter for rmid
32 *
33 * The upper 32 bits of MSR_IA32_PQR_ASSOC contain closid and the
34 * lower 10 bits rmid. The update to MSR_IA32_PQR_ASSOC always
35 * contains both parts, so we need to cache them.
36 *
37 * The cache also helps to avoid pointless updates if the value does
38 * not change.
39 */
40struct intel_pqr_state {
41 u32 rmid;
42 u32 closid;
43 int rmid_usecnt;
44};
45
46/* 27/*
47 * The cached intel_pqr_state is strictly per CPU and can never be 28 * The cached intel_pqr_state is strictly per CPU and can never be
48 * updated from a remote CPU. Both functions which modify the state 29 * updated from a remote CPU. Both functions which modify the state
49 * (intel_cqm_event_start and intel_cqm_event_stop) are called with 30 * (intel_cqm_event_start and intel_cqm_event_stop) are called with
50 * interrupts disabled, which is sufficient for the protection. 31 * interrupts disabled, which is sufficient for the protection.
51 */ 32 */
52static DEFINE_PER_CPU(struct intel_pqr_state, pqr_state); 33DEFINE_PER_CPU(struct intel_pqr_state, pqr_state);
53static struct hrtimer *mbm_timers; 34static struct hrtimer *mbm_timers;
54/** 35/**
55 * struct sample - mbm event's (local or total) data 36 * struct sample - mbm event's (local or total) data
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 6ccbf1aaa7ce..eafee3161d1c 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -189,6 +189,9 @@
189 189
190#define X86_FEATURE_CPB ( 7*32+ 2) /* AMD Core Performance Boost */ 190#define X86_FEATURE_CPB ( 7*32+ 2) /* AMD Core Performance Boost */
191#define X86_FEATURE_EPB ( 7*32+ 3) /* IA32_ENERGY_PERF_BIAS support */ 191#define X86_FEATURE_EPB ( 7*32+ 3) /* IA32_ENERGY_PERF_BIAS support */
192#define X86_FEATURE_CAT_L3 ( 7*32+ 4) /* Cache Allocation Technology L3 */
193#define X86_FEATURE_CAT_L2 ( 7*32+ 5) /* Cache Allocation Technology L2 */
194#define X86_FEATURE_CDP_L3 ( 7*32+ 6) /* Code and Data Prioritization L3 */
192 195
193#define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */ 196#define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */
194#define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */ 197#define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */
@@ -222,6 +225,7 @@
222#define X86_FEATURE_RTM ( 9*32+11) /* Restricted Transactional Memory */ 225#define X86_FEATURE_RTM ( 9*32+11) /* Restricted Transactional Memory */
223#define X86_FEATURE_CQM ( 9*32+12) /* Cache QoS Monitoring */ 226#define X86_FEATURE_CQM ( 9*32+12) /* Cache QoS Monitoring */
224#define X86_FEATURE_MPX ( 9*32+14) /* Memory Protection Extension */ 227#define X86_FEATURE_MPX ( 9*32+14) /* Memory Protection Extension */
228#define X86_FEATURE_RDT_A ( 9*32+15) /* Resource Director Technology Allocation */
225#define X86_FEATURE_AVX512F ( 9*32+16) /* AVX-512 Foundation */ 229#define X86_FEATURE_AVX512F ( 9*32+16) /* AVX-512 Foundation */
226#define X86_FEATURE_AVX512DQ ( 9*32+17) /* AVX-512 DQ (Double/Quad granular) Instructions */ 230#define X86_FEATURE_AVX512DQ ( 9*32+17) /* AVX-512 DQ (Double/Quad granular) Instructions */
227#define X86_FEATURE_RDSEED ( 9*32+18) /* The RDSEED instruction */ 231#define X86_FEATURE_RDSEED ( 9*32+18) /* The RDSEED instruction */
diff --git a/arch/x86/include/asm/intel_rdt.h b/arch/x86/include/asm/intel_rdt.h
new file mode 100644
index 000000000000..95ce5c85b009
--- /dev/null
+++ b/arch/x86/include/asm/intel_rdt.h
@@ -0,0 +1,224 @@
1#ifndef _ASM_X86_INTEL_RDT_H
2#define _ASM_X86_INTEL_RDT_H
3
4#ifdef CONFIG_INTEL_RDT_A
5
6#include <linux/kernfs.h>
7#include <linux/jump_label.h>
8
9#include <asm/intel_rdt_common.h>
10
11#define IA32_L3_QOS_CFG 0xc81
12#define IA32_L3_CBM_BASE 0xc90
13#define IA32_L2_CBM_BASE 0xd10
14
15#define L3_QOS_CDP_ENABLE 0x01ULL
16
17/**
18 * struct rdtgroup - store rdtgroup's data in resctrl file system.
19 * @kn: kernfs node
20 * @rdtgroup_list: linked list for all rdtgroups
21 * @closid: closid for this rdtgroup
22 * @cpu_mask: CPUs assigned to this rdtgroup
23 * @flags: status bits
24 * @waitcount: how many cpus expect to find this
25 * group when they acquire rdtgroup_mutex
26 */
27struct rdtgroup {
28 struct kernfs_node *kn;
29 struct list_head rdtgroup_list;
30 int closid;
31 struct cpumask cpu_mask;
32 int flags;
33 atomic_t waitcount;
34};
35
36/* rdtgroup.flags */
37#define RDT_DELETED 1
38
39/* List of all resource groups */
40extern struct list_head rdt_all_groups;
41
42int __init rdtgroup_init(void);
43
44/**
45 * struct rftype - describe each file in the resctrl file system
46 * @name: file name
47 * @mode: access mode
48 * @kf_ops: operations
49 * @seq_show: show content of the file
50 * @write: write to the file
51 */
52struct rftype {
53 char *name;
54 umode_t mode;
55 struct kernfs_ops *kf_ops;
56
57 int (*seq_show)(struct kernfs_open_file *of,
58 struct seq_file *sf, void *v);
59 /*
60 * write() is the generic write callback which maps directly to
61 * kernfs write operation and overrides all other operations.
62 * Maximum write size is determined by ->max_write_len.
63 */
64 ssize_t (*write)(struct kernfs_open_file *of,
65 char *buf, size_t nbytes, loff_t off);
66};
67
68/**
69 * struct rdt_resource - attributes of an RDT resource
70 * @enabled: Is this feature enabled on this machine
71 * @capable: Is this feature available on this machine
72 * @name: Name to use in "schemata" file
73 * @num_closid: Number of CLOSIDs available
74 * @max_cbm: Largest Cache Bit Mask allowed
75 * @min_cbm_bits: Minimum number of consecutive bits to be set
76 * in a cache bit mask
77 * @domains: All domains for this resource
78 * @num_domains: Number of domains active
79 * @msr_base: Base MSR address for CBMs
80 * @tmp_cbms: Scratch space when updating schemata
81 * @num_tmp_cbms: Number of CBMs in tmp_cbms
82 * @cache_level: Which cache level defines scope of this domain
83 * @cbm_idx_multi: Multiplier of CBM index
84 * @cbm_idx_offset: Offset of CBM index. CBM index is computed by:
85 * closid * cbm_idx_multi + cbm_idx_offset
86 */
87struct rdt_resource {
88 bool enabled;
89 bool capable;
90 char *name;
91 int num_closid;
92 int cbm_len;
93 int min_cbm_bits;
94 u32 max_cbm;
95 struct list_head domains;
96 int num_domains;
97 int msr_base;
98 u32 *tmp_cbms;
99 int num_tmp_cbms;
100 int cache_level;
101 int cbm_idx_multi;
102 int cbm_idx_offset;
103};
104
105/**
106 * struct rdt_domain - group of cpus sharing an RDT resource
107 * @list: all instances of this resource
108 * @id: unique id for this instance
109 * @cpu_mask: which cpus share this resource
110 * @cbm: array of cache bit masks (indexed by CLOSID)
111 */
112struct rdt_domain {
113 struct list_head list;
114 int id;
115 struct cpumask cpu_mask;
116 u32 *cbm;
117};
118
119/**
120 * struct msr_param - set a range of MSRs from a domain
121 * @res: The resource to use
122 * @low: Beginning index from base MSR
123 * @high: End index
124 */
125struct msr_param {
126 struct rdt_resource *res;
127 int low;
128 int high;
129};
130
131extern struct mutex rdtgroup_mutex;
132
133extern struct rdt_resource rdt_resources_all[];
134extern struct rdtgroup rdtgroup_default;
135DECLARE_STATIC_KEY_FALSE(rdt_enable_key);
136
137int __init rdtgroup_init(void);
138
139enum {
140 RDT_RESOURCE_L3,
141 RDT_RESOURCE_L3DATA,
142 RDT_RESOURCE_L3CODE,
143 RDT_RESOURCE_L2,
144
145 /* Must be the last */
146 RDT_NUM_RESOURCES,
147};
148
149#define for_each_capable_rdt_resource(r) \
150 for (r = rdt_resources_all; r < rdt_resources_all + RDT_NUM_RESOURCES;\
151 r++) \
152 if (r->capable)
153
154#define for_each_enabled_rdt_resource(r) \
155 for (r = rdt_resources_all; r < rdt_resources_all + RDT_NUM_RESOURCES;\
156 r++) \
157 if (r->enabled)
158
159/* CPUID.(EAX=10H, ECX=ResID=1).EAX */
160union cpuid_0x10_1_eax {
161 struct {
162 unsigned int cbm_len:5;
163 } split;
164 unsigned int full;
165};
166
167/* CPUID.(EAX=10H, ECX=ResID=1).EDX */
168union cpuid_0x10_1_edx {
169 struct {
170 unsigned int cos_max:16;
171 } split;
172 unsigned int full;
173};
174
175DECLARE_PER_CPU_READ_MOSTLY(int, cpu_closid);
176
177void rdt_cbm_update(void *arg);
178struct rdtgroup *rdtgroup_kn_lock_live(struct kernfs_node *kn);
179void rdtgroup_kn_unlock(struct kernfs_node *kn);
180ssize_t rdtgroup_schemata_write(struct kernfs_open_file *of,
181 char *buf, size_t nbytes, loff_t off);
182int rdtgroup_schemata_show(struct kernfs_open_file *of,
183 struct seq_file *s, void *v);
184
185/*
186 * intel_rdt_sched_in() - Writes the task's CLOSid to IA32_PQR_MSR
187 *
188 * Following considerations are made so that this has minimal impact
189 * on scheduler hot path:
190 * - This will stay as no-op unless we are running on an Intel SKU
191 * which supports resource control and we enable by mounting the
192 * resctrl file system.
193 * - Caches the per cpu CLOSid values and does the MSR write only
194 * when a task with a different CLOSid is scheduled in.
195 *
196 * Must be called with preemption disabled.
197 */
198static inline void intel_rdt_sched_in(void)
199{
200 if (static_branch_likely(&rdt_enable_key)) {
201 struct intel_pqr_state *state = this_cpu_ptr(&pqr_state);
202 int closid;
203
204 /*
205 * If this task has a closid assigned, use it.
206 * Else use the closid assigned to this cpu.
207 */
208 closid = current->closid;
209 if (closid == 0)
210 closid = this_cpu_read(cpu_closid);
211
212 if (closid != state->closid) {
213 state->closid = closid;
214 wrmsr(MSR_IA32_PQR_ASSOC, state->rmid, closid);
215 }
216 }
217}
218
219#else
220
221static inline void intel_rdt_sched_in(void) {}
222
223#endif /* CONFIG_INTEL_RDT_A */
224#endif /* _ASM_X86_INTEL_RDT_H */
diff --git a/arch/x86/include/asm/intel_rdt_common.h b/arch/x86/include/asm/intel_rdt_common.h
new file mode 100644
index 000000000000..b31081b89407
--- /dev/null
+++ b/arch/x86/include/asm/intel_rdt_common.h
@@ -0,0 +1,27 @@
1#ifndef _ASM_X86_INTEL_RDT_COMMON_H
2#define _ASM_X86_INTEL_RDT_COMMON_H
3
4#define MSR_IA32_PQR_ASSOC 0x0c8f
5
6/**
7 * struct intel_pqr_state - State cache for the PQR MSR
8 * @rmid: The cached Resource Monitoring ID
9 * @closid: The cached Class Of Service ID
10 * @rmid_usecnt: The usage counter for rmid
11 *
12 * The upper 32 bits of MSR_IA32_PQR_ASSOC contain closid and the
13 * lower 10 bits rmid. The update to MSR_IA32_PQR_ASSOC always
14 * contains both parts, so we need to cache them.
15 *
16 * The cache also helps to avoid pointless updates if the value does
17 * not change.
18 */
19struct intel_pqr_state {
20 u32 rmid;
21 u32 closid;
22 int rmid_usecnt;
23};
24
25DECLARE_PER_CPU(struct intel_pqr_state, pqr_state);
26
27#endif /* _ASM_X86_INTEL_RDT_COMMON_H */
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index 33b63670bf09..52000010c62e 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -32,6 +32,8 @@ obj-$(CONFIG_CPU_SUP_CENTAUR) += centaur.o
32obj-$(CONFIG_CPU_SUP_TRANSMETA_32) += transmeta.o 32obj-$(CONFIG_CPU_SUP_TRANSMETA_32) += transmeta.o
33obj-$(CONFIG_CPU_SUP_UMC_32) += umc.o 33obj-$(CONFIG_CPU_SUP_UMC_32) += umc.o
34 34
35obj-$(CONFIG_INTEL_RDT_A) += intel_rdt.o intel_rdt_rdtgroup.o intel_rdt_schemata.o
36
35obj-$(CONFIG_X86_MCE) += mcheck/ 37obj-$(CONFIG_X86_MCE) += mcheck/
36obj-$(CONFIG_MTRR) += mtrr/ 38obj-$(CONFIG_MTRR) += mtrr/
37obj-$(CONFIG_MICROCODE) += microcode/ 39obj-$(CONFIG_MICROCODE) += microcode/
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index be6337156502..0282b0df004a 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -153,6 +153,7 @@ struct _cpuid4_info_regs {
153 union _cpuid4_leaf_eax eax; 153 union _cpuid4_leaf_eax eax;
154 union _cpuid4_leaf_ebx ebx; 154 union _cpuid4_leaf_ebx ebx;
155 union _cpuid4_leaf_ecx ecx; 155 union _cpuid4_leaf_ecx ecx;
156 unsigned int id;
156 unsigned long size; 157 unsigned long size;
157 struct amd_northbridge *nb; 158 struct amd_northbridge *nb;
158}; 159};
@@ -894,6 +895,8 @@ static void __cache_cpumap_setup(unsigned int cpu, int index,
894static void ci_leaf_init(struct cacheinfo *this_leaf, 895static void ci_leaf_init(struct cacheinfo *this_leaf,
895 struct _cpuid4_info_regs *base) 896 struct _cpuid4_info_regs *base)
896{ 897{
898 this_leaf->id = base->id;
899 this_leaf->attributes = CACHE_ID;
897 this_leaf->level = base->eax.split.level; 900 this_leaf->level = base->eax.split.level;
898 this_leaf->type = cache_type_map[base->eax.split.type]; 901 this_leaf->type = cache_type_map[base->eax.split.type];
899 this_leaf->coherency_line_size = 902 this_leaf->coherency_line_size =
@@ -920,6 +923,22 @@ static int __init_cache_level(unsigned int cpu)
920 return 0; 923 return 0;
921} 924}
922 925
926/*
927 * The max shared threads number comes from CPUID.4:EAX[25-14] with input
928 * ECX as cache index. Then right shift apicid by the number's order to get
929 * cache id for this cache node.
930 */
931static void get_cache_id(int cpu, struct _cpuid4_info_regs *id4_regs)
932{
933 struct cpuinfo_x86 *c = &cpu_data(cpu);
934 unsigned long num_threads_sharing;
935 int index_msb;
936
937 num_threads_sharing = 1 + id4_regs->eax.split.num_threads_sharing;
938 index_msb = get_count_order(num_threads_sharing);
939 id4_regs->id = c->apicid >> index_msb;
940}
941
923static int __populate_cache_leaves(unsigned int cpu) 942static int __populate_cache_leaves(unsigned int cpu)
924{ 943{
925 unsigned int idx, ret; 944 unsigned int idx, ret;
@@ -931,6 +950,7 @@ static int __populate_cache_leaves(unsigned int cpu)
931 ret = cpuid4_cache_lookup_regs(idx, &id4_regs); 950 ret = cpuid4_cache_lookup_regs(idx, &id4_regs);
932 if (ret) 951 if (ret)
933 return ret; 952 return ret;
953 get_cache_id(cpu, &id4_regs);
934 ci_leaf_init(this_leaf++, &id4_regs); 954 ci_leaf_init(this_leaf++, &id4_regs);
935 __cache_cpumap_setup(cpu, idx, &id4_regs); 955 __cache_cpumap_setup(cpu, idx, &id4_regs);
936 } 956 }
diff --git a/arch/x86/kernel/cpu/intel_rdt.c b/arch/x86/kernel/cpu/intel_rdt.c
new file mode 100644
index 000000000000..5a533fefefa0
--- /dev/null
+++ b/arch/x86/kernel/cpu/intel_rdt.c
@@ -0,0 +1,403 @@
1/*
2 * Resource Director Technology(RDT)
3 * - Cache Allocation code.
4 *
5 * Copyright (C) 2016 Intel Corporation
6 *
7 * Authors:
8 * Fenghua Yu <fenghua.yu@intel.com>
9 * Tony Luck <tony.luck@intel.com>
10 * Vikas Shivappa <vikas.shivappa@intel.com>
11 *
12 * This program is free software; you can redistribute it and/or modify it
13 * under the terms and conditions of the GNU General Public License,
14 * version 2, as published by the Free Software Foundation.
15 *
16 * This program is distributed in the hope it will be useful, but WITHOUT
17 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
18 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
19 * more details.
20 *
21 * More information about RDT be found in the Intel (R) x86 Architecture
22 * Software Developer Manual June 2016, volume 3, section 17.17.
23 */
24
25#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
26
27#include <linux/slab.h>
28#include <linux/err.h>
29#include <linux/cacheinfo.h>
30#include <linux/cpuhotplug.h>
31
32#include <asm/intel-family.h>
33#include <asm/intel_rdt.h>
34
35/* Mutex to protect rdtgroup access. */
36DEFINE_MUTEX(rdtgroup_mutex);
37
38DEFINE_PER_CPU_READ_MOSTLY(int, cpu_closid);
39
40#define domain_init(id) LIST_HEAD_INIT(rdt_resources_all[id].domains)
41
42struct rdt_resource rdt_resources_all[] = {
43 {
44 .name = "L3",
45 .domains = domain_init(RDT_RESOURCE_L3),
46 .msr_base = IA32_L3_CBM_BASE,
47 .min_cbm_bits = 1,
48 .cache_level = 3,
49 .cbm_idx_multi = 1,
50 .cbm_idx_offset = 0
51 },
52 {
53 .name = "L3DATA",
54 .domains = domain_init(RDT_RESOURCE_L3DATA),
55 .msr_base = IA32_L3_CBM_BASE,
56 .min_cbm_bits = 1,
57 .cache_level = 3,
58 .cbm_idx_multi = 2,
59 .cbm_idx_offset = 0
60 },
61 {
62 .name = "L3CODE",
63 .domains = domain_init(RDT_RESOURCE_L3CODE),
64 .msr_base = IA32_L3_CBM_BASE,
65 .min_cbm_bits = 1,
66 .cache_level = 3,
67 .cbm_idx_multi = 2,
68 .cbm_idx_offset = 1
69 },
70 {
71 .name = "L2",
72 .domains = domain_init(RDT_RESOURCE_L2),
73 .msr_base = IA32_L2_CBM_BASE,
74 .min_cbm_bits = 1,
75 .cache_level = 2,
76 .cbm_idx_multi = 1,
77 .cbm_idx_offset = 0
78 },
79};
80
81static int cbm_idx(struct rdt_resource *r, int closid)
82{
83 return closid * r->cbm_idx_multi + r->cbm_idx_offset;
84}
85
86/*
87 * cache_alloc_hsw_probe() - Have to probe for Intel haswell server CPUs
88 * as they do not have CPUID enumeration support for Cache allocation.
89 * The check for Vendor/Family/Model is not enough to guarantee that
90 * the MSRs won't #GP fault because only the following SKUs support
91 * CAT:
92 * Intel(R) Xeon(R) CPU E5-2658 v3 @ 2.20GHz
93 * Intel(R) Xeon(R) CPU E5-2648L v3 @ 1.80GHz
94 * Intel(R) Xeon(R) CPU E5-2628L v3 @ 2.00GHz
95 * Intel(R) Xeon(R) CPU E5-2618L v3 @ 2.30GHz
96 * Intel(R) Xeon(R) CPU E5-2608L v3 @ 2.00GHz
97 * Intel(R) Xeon(R) CPU E5-2658A v3 @ 2.20GHz
98 *
99 * Probe by trying to write the first of the L3 cach mask registers
100 * and checking that the bits stick. Max CLOSids is always 4 and max cbm length
101 * is always 20 on hsw server parts. The minimum cache bitmask length
102 * allowed for HSW server is always 2 bits. Hardcode all of them.
103 */
104static inline bool cache_alloc_hsw_probe(void)
105{
106 if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL &&
107 boot_cpu_data.x86 == 6 &&
108 boot_cpu_data.x86_model == INTEL_FAM6_HASWELL_X) {
109 struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_L3];
110 u32 l, h, max_cbm = BIT_MASK(20) - 1;
111
112 if (wrmsr_safe(IA32_L3_CBM_BASE, max_cbm, 0))
113 return false;
114 rdmsr(IA32_L3_CBM_BASE, l, h);
115
116 /* If all the bits were set in MSR, return success */
117 if (l != max_cbm)
118 return false;
119
120 r->num_closid = 4;
121 r->cbm_len = 20;
122 r->max_cbm = max_cbm;
123 r->min_cbm_bits = 2;
124 r->capable = true;
125 r->enabled = true;
126
127 return true;
128 }
129
130 return false;
131}
132
133static void rdt_get_config(int idx, struct rdt_resource *r)
134{
135 union cpuid_0x10_1_eax eax;
136 union cpuid_0x10_1_edx edx;
137 u32 ebx, ecx;
138
139 cpuid_count(0x00000010, idx, &eax.full, &ebx, &ecx, &edx.full);
140 r->num_closid = edx.split.cos_max + 1;
141 r->cbm_len = eax.split.cbm_len + 1;
142 r->max_cbm = BIT_MASK(eax.split.cbm_len + 1) - 1;
143 r->capable = true;
144 r->enabled = true;
145}
146
147static void rdt_get_cdp_l3_config(int type)
148{
149 struct rdt_resource *r_l3 = &rdt_resources_all[RDT_RESOURCE_L3];
150 struct rdt_resource *r = &rdt_resources_all[type];
151
152 r->num_closid = r_l3->num_closid / 2;
153 r->cbm_len = r_l3->cbm_len;
154 r->max_cbm = r_l3->max_cbm;
155 r->capable = true;
156 /*
157 * By default, CDP is disabled. CDP can be enabled by mount parameter
158 * "cdp" during resctrl file system mount time.
159 */
160 r->enabled = false;
161}
162
163static inline bool get_rdt_resources(void)
164{
165 bool ret = false;
166
167 if (cache_alloc_hsw_probe())
168 return true;
169
170 if (!boot_cpu_has(X86_FEATURE_RDT_A))
171 return false;
172
173 if (boot_cpu_has(X86_FEATURE_CAT_L3)) {
174 rdt_get_config(1, &rdt_resources_all[RDT_RESOURCE_L3]);
175 if (boot_cpu_has(X86_FEATURE_CDP_L3)) {
176 rdt_get_cdp_l3_config(RDT_RESOURCE_L3DATA);
177 rdt_get_cdp_l3_config(RDT_RESOURCE_L3CODE);
178 }
179 ret = true;
180 }
181 if (boot_cpu_has(X86_FEATURE_CAT_L2)) {
182 /* CPUID 0x10.2 fields are same format at 0x10.1 */
183 rdt_get_config(2, &rdt_resources_all[RDT_RESOURCE_L2]);
184 ret = true;
185 }
186
187 return ret;
188}
189
190static int get_cache_id(int cpu, int level)
191{
192 struct cpu_cacheinfo *ci = get_cpu_cacheinfo(cpu);
193 int i;
194
195 for (i = 0; i < ci->num_leaves; i++) {
196 if (ci->info_list[i].level == level)
197 return ci->info_list[i].id;
198 }
199
200 return -1;
201}
202
203void rdt_cbm_update(void *arg)
204{
205 struct msr_param *m = (struct msr_param *)arg;
206 struct rdt_resource *r = m->res;
207 int i, cpu = smp_processor_id();
208 struct rdt_domain *d;
209
210 list_for_each_entry(d, &r->domains, list) {
211 /* Find the domain that contains this CPU */
212 if (cpumask_test_cpu(cpu, &d->cpu_mask))
213 goto found;
214 }
215 pr_info_once("cpu %d not found in any domain for resource %s\n",
216 cpu, r->name);
217
218 return;
219
220found:
221 for (i = m->low; i < m->high; i++) {
222 int idx = cbm_idx(r, i);
223
224 wrmsrl(r->msr_base + idx, d->cbm[i]);
225 }
226}
227
228/*
229 * rdt_find_domain - Find a domain in a resource that matches input resource id
230 *
231 * Search resource r's domain list to find the resource id. If the resource
232 * id is found in a domain, return the domain. Otherwise, if requested by
233 * caller, return the first domain whose id is bigger than the input id.
234 * The domain list is sorted by id in ascending order.
235 */
236static struct rdt_domain *rdt_find_domain(struct rdt_resource *r, int id,
237 struct list_head **pos)
238{
239 struct rdt_domain *d;
240 struct list_head *l;
241
242 if (id < 0)
243 return ERR_PTR(id);
244
245 list_for_each(l, &r->domains) {
246 d = list_entry(l, struct rdt_domain, list);
247 /* When id is found, return its domain. */
248 if (id == d->id)
249 return d;
250 /* Stop searching when finding id's position in sorted list. */
251 if (id < d->id)
252 break;
253 }
254
255 if (pos)
256 *pos = l;
257
258 return NULL;
259}
260
261/*
262 * domain_add_cpu - Add a cpu to a resource's domain list.
263 *
264 * If an existing domain in the resource r's domain list matches the cpu's
265 * resource id, add the cpu in the domain.
266 *
267 * Otherwise, a new domain is allocated and inserted into the right position
268 * in the domain list sorted by id in ascending order.
269 *
270 * The order in the domain list is visible to users when we print entries
271 * in the schemata file and schemata input is validated to have the same order
272 * as this list.
273 */
274static void domain_add_cpu(int cpu, struct rdt_resource *r)
275{
276 int i, id = get_cache_id(cpu, r->cache_level);
277 struct list_head *add_pos = NULL;
278 struct rdt_domain *d;
279
280 d = rdt_find_domain(r, id, &add_pos);
281 if (IS_ERR(d)) {
282 pr_warn("Could't find cache id for cpu %d\n", cpu);
283 return;
284 }
285
286 if (d) {
287 cpumask_set_cpu(cpu, &d->cpu_mask);
288 return;
289 }
290
291 d = kzalloc_node(sizeof(*d), GFP_KERNEL, cpu_to_node(cpu));
292 if (!d)
293 return;
294
295 d->id = id;
296
297 d->cbm = kmalloc_array(r->num_closid, sizeof(*d->cbm), GFP_KERNEL);
298 if (!d->cbm) {
299 kfree(d);
300 return;
301 }
302
303 for (i = 0; i < r->num_closid; i++) {
304 int idx = cbm_idx(r, i);
305
306 d->cbm[i] = r->max_cbm;
307 wrmsrl(r->msr_base + idx, d->cbm[i]);
308 }
309
310 cpumask_set_cpu(cpu, &d->cpu_mask);
311 list_add_tail(&d->list, add_pos);
312 r->num_domains++;
313}
314
315static void domain_remove_cpu(int cpu, struct rdt_resource *r)
316{
317 int id = get_cache_id(cpu, r->cache_level);
318 struct rdt_domain *d;
319
320 d = rdt_find_domain(r, id, NULL);
321 if (IS_ERR_OR_NULL(d)) {
322 pr_warn("Could't find cache id for cpu %d\n", cpu);
323 return;
324 }
325
326 cpumask_clear_cpu(cpu, &d->cpu_mask);
327 if (cpumask_empty(&d->cpu_mask)) {
328 r->num_domains--;
329 kfree(d->cbm);
330 list_del(&d->list);
331 kfree(d);
332 }
333}
334
335static void clear_closid(int cpu)
336{
337 struct intel_pqr_state *state = this_cpu_ptr(&pqr_state);
338
339 per_cpu(cpu_closid, cpu) = 0;
340 state->closid = 0;
341 wrmsr(MSR_IA32_PQR_ASSOC, state->rmid, 0);
342}
343
344static int intel_rdt_online_cpu(unsigned int cpu)
345{
346 struct rdt_resource *r;
347
348 mutex_lock(&rdtgroup_mutex);
349 for_each_capable_rdt_resource(r)
350 domain_add_cpu(cpu, r);
351 /* The cpu is set in default rdtgroup after online. */
352 cpumask_set_cpu(cpu, &rdtgroup_default.cpu_mask);
353 clear_closid(cpu);
354 mutex_unlock(&rdtgroup_mutex);
355
356 return 0;
357}
358
359static int intel_rdt_offline_cpu(unsigned int cpu)
360{
361 struct rdtgroup *rdtgrp;
362 struct rdt_resource *r;
363
364 mutex_lock(&rdtgroup_mutex);
365 for_each_capable_rdt_resource(r)
366 domain_remove_cpu(cpu, r);
367 list_for_each_entry(rdtgrp, &rdt_all_groups, rdtgroup_list) {
368 if (cpumask_test_and_clear_cpu(cpu, &rdtgrp->cpu_mask))
369 break;
370 }
371 clear_closid(cpu);
372 mutex_unlock(&rdtgroup_mutex);
373
374 return 0;
375}
376
377static int __init intel_rdt_late_init(void)
378{
379 struct rdt_resource *r;
380 int state, ret;
381
382 if (!get_rdt_resources())
383 return -ENODEV;
384
385 state = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN,
386 "x86/rdt/cat:online:",
387 intel_rdt_online_cpu, intel_rdt_offline_cpu);
388 if (state < 0)
389 return state;
390
391 ret = rdtgroup_init();
392 if (ret) {
393 cpuhp_remove_state(state);
394 return ret;
395 }
396
397 for_each_capable_rdt_resource(r)
398 pr_info("Intel RDT %s allocation detected\n", r->name);
399
400 return 0;
401}
402
403late_initcall(intel_rdt_late_init);
diff --git a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
new file mode 100644
index 000000000000..8af04afdfcb9
--- /dev/null
+++ b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
@@ -0,0 +1,1115 @@
1/*
2 * User interface for Resource Alloction in Resource Director Technology(RDT)
3 *
4 * Copyright (C) 2016 Intel Corporation
5 *
6 * Author: Fenghua Yu <fenghua.yu@intel.com>
7 *
8 * This program is free software; you can redistribute it and/or modify it
9 * under the terms and conditions of the GNU General Public License,
10 * version 2, as published by the Free Software Foundation.
11 *
12 * This program is distributed in the hope it will be useful, but WITHOUT
13 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 * more details.
16 *
17 * More information about RDT be found in the Intel (R) x86 Architecture
18 * Software Developer Manual.
19 */
20
21#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
22
23#include <linux/cpu.h>
24#include <linux/fs.h>
25#include <linux/sysfs.h>
26#include <linux/kernfs.h>
27#include <linux/seq_file.h>
28#include <linux/sched.h>
29#include <linux/slab.h>
30#include <linux/cpu.h>
31#include <linux/task_work.h>
32
33#include <uapi/linux/magic.h>
34
35#include <asm/intel_rdt.h>
36#include <asm/intel_rdt_common.h>
37
38DEFINE_STATIC_KEY_FALSE(rdt_enable_key);
39struct kernfs_root *rdt_root;
40struct rdtgroup rdtgroup_default;
41LIST_HEAD(rdt_all_groups);
42
43/* Kernel fs node for "info" directory under root */
44static struct kernfs_node *kn_info;
45
46/*
47 * Trivial allocator for CLOSIDs. Since h/w only supports a small number,
48 * we can keep a bitmap of free CLOSIDs in a single integer.
49 *
50 * Using a global CLOSID across all resources has some advantages and
51 * some drawbacks:
52 * + We can simply set "current->closid" to assign a task to a resource
53 * group.
54 * + Context switch code can avoid extra memory references deciding which
55 * CLOSID to load into the PQR_ASSOC MSR
56 * - We give up some options in configuring resource groups across multi-socket
57 * systems.
58 * - Our choices on how to configure each resource become progressively more
59 * limited as the number of resources grows.
60 */
61static int closid_free_map;
62
63static void closid_init(void)
64{
65 struct rdt_resource *r;
66 int rdt_min_closid = 32;
67
68 /* Compute rdt_min_closid across all resources */
69 for_each_enabled_rdt_resource(r)
70 rdt_min_closid = min(rdt_min_closid, r->num_closid);
71
72 closid_free_map = BIT_MASK(rdt_min_closid) - 1;
73
74 /* CLOSID 0 is always reserved for the default group */
75 closid_free_map &= ~1;
76}
77
78int closid_alloc(void)
79{
80 int closid = ffs(closid_free_map);
81
82 if (closid == 0)
83 return -ENOSPC;
84 closid--;
85 closid_free_map &= ~(1 << closid);
86
87 return closid;
88}
89
90static void closid_free(int closid)
91{
92 closid_free_map |= 1 << closid;
93}
94
95/* set uid and gid of rdtgroup dirs and files to that of the creator */
96static int rdtgroup_kn_set_ugid(struct kernfs_node *kn)
97{
98 struct iattr iattr = { .ia_valid = ATTR_UID | ATTR_GID,
99 .ia_uid = current_fsuid(),
100 .ia_gid = current_fsgid(), };
101
102 if (uid_eq(iattr.ia_uid, GLOBAL_ROOT_UID) &&
103 gid_eq(iattr.ia_gid, GLOBAL_ROOT_GID))
104 return 0;
105
106 return kernfs_setattr(kn, &iattr);
107}
108
109static int rdtgroup_add_file(struct kernfs_node *parent_kn, struct rftype *rft)
110{
111 struct kernfs_node *kn;
112 int ret;
113
114 kn = __kernfs_create_file(parent_kn, rft->name, rft->mode,
115 0, rft->kf_ops, rft, NULL, NULL);
116 if (IS_ERR(kn))
117 return PTR_ERR(kn);
118
119 ret = rdtgroup_kn_set_ugid(kn);
120 if (ret) {
121 kernfs_remove(kn);
122 return ret;
123 }
124
125 return 0;
126}
127
128static int rdtgroup_add_files(struct kernfs_node *kn, struct rftype *rfts,
129 int len)
130{
131 struct rftype *rft;
132 int ret;
133
134 lockdep_assert_held(&rdtgroup_mutex);
135
136 for (rft = rfts; rft < rfts + len; rft++) {
137 ret = rdtgroup_add_file(kn, rft);
138 if (ret)
139 goto error;
140 }
141
142 return 0;
143error:
144 pr_warn("Failed to add %s, err=%d\n", rft->name, ret);
145 while (--rft >= rfts)
146 kernfs_remove_by_name(kn, rft->name);
147 return ret;
148}
149
150static int rdtgroup_seqfile_show(struct seq_file *m, void *arg)
151{
152 struct kernfs_open_file *of = m->private;
153 struct rftype *rft = of->kn->priv;
154
155 if (rft->seq_show)
156 return rft->seq_show(of, m, arg);
157 return 0;
158}
159
160static ssize_t rdtgroup_file_write(struct kernfs_open_file *of, char *buf,
161 size_t nbytes, loff_t off)
162{
163 struct rftype *rft = of->kn->priv;
164
165 if (rft->write)
166 return rft->write(of, buf, nbytes, off);
167
168 return -EINVAL;
169}
170
171static struct kernfs_ops rdtgroup_kf_single_ops = {
172 .atomic_write_len = PAGE_SIZE,
173 .write = rdtgroup_file_write,
174 .seq_show = rdtgroup_seqfile_show,
175};
176
177static int rdtgroup_cpus_show(struct kernfs_open_file *of,
178 struct seq_file *s, void *v)
179{
180 struct rdtgroup *rdtgrp;
181 int ret = 0;
182
183 rdtgrp = rdtgroup_kn_lock_live(of->kn);
184
185 if (rdtgrp)
186 seq_printf(s, "%*pb\n", cpumask_pr_args(&rdtgrp->cpu_mask));
187 else
188 ret = -ENOENT;
189 rdtgroup_kn_unlock(of->kn);
190
191 return ret;
192}
193
194/*
195 * This is safe against intel_rdt_sched_in() called from __switch_to()
196 * because __switch_to() is executed with interrupts disabled. A local call
197 * from rdt_update_closid() is proteced against __switch_to() because
198 * preemption is disabled.
199 */
200static void rdt_update_cpu_closid(void *closid)
201{
202 if (closid)
203 this_cpu_write(cpu_closid, *(int *)closid);
204 /*
205 * We cannot unconditionally write the MSR because the current
206 * executing task might have its own closid selected. Just reuse
207 * the context switch code.
208 */
209 intel_rdt_sched_in();
210}
211
212/*
213 * Update the PGR_ASSOC MSR on all cpus in @cpu_mask,
214 *
215 * Per task closids must have been set up before calling this function.
216 *
217 * The per cpu closids are updated with the smp function call, when @closid
218 * is not NULL. If @closid is NULL then all affected percpu closids must
219 * have been set up before calling this function.
220 */
221static void
222rdt_update_closid(const struct cpumask *cpu_mask, int *closid)
223{
224 int cpu = get_cpu();
225
226 if (cpumask_test_cpu(cpu, cpu_mask))
227 rdt_update_cpu_closid(closid);
228 smp_call_function_many(cpu_mask, rdt_update_cpu_closid, closid, 1);
229 put_cpu();
230}
231
232static ssize_t rdtgroup_cpus_write(struct kernfs_open_file *of,
233 char *buf, size_t nbytes, loff_t off)
234{
235 cpumask_var_t tmpmask, newmask;
236 struct rdtgroup *rdtgrp, *r;
237 int ret;
238
239 if (!buf)
240 return -EINVAL;
241
242 if (!zalloc_cpumask_var(&tmpmask, GFP_KERNEL))
243 return -ENOMEM;
244 if (!zalloc_cpumask_var(&newmask, GFP_KERNEL)) {
245 free_cpumask_var(tmpmask);
246 return -ENOMEM;
247 }
248
249 rdtgrp = rdtgroup_kn_lock_live(of->kn);
250 if (!rdtgrp) {
251 ret = -ENOENT;
252 goto unlock;
253 }
254
255 ret = cpumask_parse(buf, newmask);
256 if (ret)
257 goto unlock;
258
259 /* check that user didn't specify any offline cpus */
260 cpumask_andnot(tmpmask, newmask, cpu_online_mask);
261 if (cpumask_weight(tmpmask)) {
262 ret = -EINVAL;
263 goto unlock;
264 }
265
266 /* Check whether cpus are dropped from this group */
267 cpumask_andnot(tmpmask, &rdtgrp->cpu_mask, newmask);
268 if (cpumask_weight(tmpmask)) {
269 /* Can't drop from default group */
270 if (rdtgrp == &rdtgroup_default) {
271 ret = -EINVAL;
272 goto unlock;
273 }
274 /* Give any dropped cpus to rdtgroup_default */
275 cpumask_or(&rdtgroup_default.cpu_mask,
276 &rdtgroup_default.cpu_mask, tmpmask);
277 rdt_update_closid(tmpmask, &rdtgroup_default.closid);
278 }
279
280 /*
281 * If we added cpus, remove them from previous group that owned them
282 * and update per-cpu closid
283 */
284 cpumask_andnot(tmpmask, newmask, &rdtgrp->cpu_mask);
285 if (cpumask_weight(tmpmask)) {
286 list_for_each_entry(r, &rdt_all_groups, rdtgroup_list) {
287 if (r == rdtgrp)
288 continue;
289 cpumask_andnot(&r->cpu_mask, &r->cpu_mask, tmpmask);
290 }
291 rdt_update_closid(tmpmask, &rdtgrp->closid);
292 }
293
294 /* Done pushing/pulling - update this group with new mask */
295 cpumask_copy(&rdtgrp->cpu_mask, newmask);
296
297unlock:
298 rdtgroup_kn_unlock(of->kn);
299 free_cpumask_var(tmpmask);
300 free_cpumask_var(newmask);
301
302 return ret ?: nbytes;
303}
304
305struct task_move_callback {
306 struct callback_head work;
307 struct rdtgroup *rdtgrp;
308};
309
310static void move_myself(struct callback_head *head)
311{
312 struct task_move_callback *callback;
313 struct rdtgroup *rdtgrp;
314
315 callback = container_of(head, struct task_move_callback, work);
316 rdtgrp = callback->rdtgrp;
317
318 /*
319 * If resource group was deleted before this task work callback
320 * was invoked, then assign the task to root group and free the
321 * resource group.
322 */
323 if (atomic_dec_and_test(&rdtgrp->waitcount) &&
324 (rdtgrp->flags & RDT_DELETED)) {
325 current->closid = 0;
326 kfree(rdtgrp);
327 }
328
329 preempt_disable();
330 /* update PQR_ASSOC MSR to make resource group go into effect */
331 intel_rdt_sched_in();
332 preempt_enable();
333
334 kfree(callback);
335}
336
337static int __rdtgroup_move_task(struct task_struct *tsk,
338 struct rdtgroup *rdtgrp)
339{
340 struct task_move_callback *callback;
341 int ret;
342
343 callback = kzalloc(sizeof(*callback), GFP_KERNEL);
344 if (!callback)
345 return -ENOMEM;
346 callback->work.func = move_myself;
347 callback->rdtgrp = rdtgrp;
348
349 /*
350 * Take a refcount, so rdtgrp cannot be freed before the
351 * callback has been invoked.
352 */
353 atomic_inc(&rdtgrp->waitcount);
354 ret = task_work_add(tsk, &callback->work, true);
355 if (ret) {
356 /*
357 * Task is exiting. Drop the refcount and free the callback.
358 * No need to check the refcount as the group cannot be
359 * deleted before the write function unlocks rdtgroup_mutex.
360 */
361 atomic_dec(&rdtgrp->waitcount);
362 kfree(callback);
363 } else {
364 tsk->closid = rdtgrp->closid;
365 }
366 return ret;
367}
368
369static int rdtgroup_task_write_permission(struct task_struct *task,
370 struct kernfs_open_file *of)
371{
372 const struct cred *tcred = get_task_cred(task);
373 const struct cred *cred = current_cred();
374 int ret = 0;
375
376 /*
377 * Even if we're attaching all tasks in the thread group, we only
378 * need to check permissions on one of them.
379 */
380 if (!uid_eq(cred->euid, GLOBAL_ROOT_UID) &&
381 !uid_eq(cred->euid, tcred->uid) &&
382 !uid_eq(cred->euid, tcred->suid))
383 ret = -EPERM;
384
385 put_cred(tcred);
386 return ret;
387}
388
389static int rdtgroup_move_task(pid_t pid, struct rdtgroup *rdtgrp,
390 struct kernfs_open_file *of)
391{
392 struct task_struct *tsk;
393 int ret;
394
395 rcu_read_lock();
396 if (pid) {
397 tsk = find_task_by_vpid(pid);
398 if (!tsk) {
399 rcu_read_unlock();
400 return -ESRCH;
401 }
402 } else {
403 tsk = current;
404 }
405
406 get_task_struct(tsk);
407 rcu_read_unlock();
408
409 ret = rdtgroup_task_write_permission(tsk, of);
410 if (!ret)
411 ret = __rdtgroup_move_task(tsk, rdtgrp);
412
413 put_task_struct(tsk);
414 return ret;
415}
416
417static ssize_t rdtgroup_tasks_write(struct kernfs_open_file *of,
418 char *buf, size_t nbytes, loff_t off)
419{
420 struct rdtgroup *rdtgrp;
421 int ret = 0;
422 pid_t pid;
423
424 if (kstrtoint(strstrip(buf), 0, &pid) || pid < 0)
425 return -EINVAL;
426 rdtgrp = rdtgroup_kn_lock_live(of->kn);
427
428 if (rdtgrp)
429 ret = rdtgroup_move_task(pid, rdtgrp, of);
430 else
431 ret = -ENOENT;
432
433 rdtgroup_kn_unlock(of->kn);
434
435 return ret ?: nbytes;
436}
437
438static void show_rdt_tasks(struct rdtgroup *r, struct seq_file *s)
439{
440 struct task_struct *p, *t;
441
442 rcu_read_lock();
443 for_each_process_thread(p, t) {
444 if (t->closid == r->closid)
445 seq_printf(s, "%d\n", t->pid);
446 }
447 rcu_read_unlock();
448}
449
450static int rdtgroup_tasks_show(struct kernfs_open_file *of,
451 struct seq_file *s, void *v)
452{
453 struct rdtgroup *rdtgrp;
454 int ret = 0;
455
456 rdtgrp = rdtgroup_kn_lock_live(of->kn);
457 if (rdtgrp)
458 show_rdt_tasks(rdtgrp, s);
459 else
460 ret = -ENOENT;
461 rdtgroup_kn_unlock(of->kn);
462
463 return ret;
464}
465
466/* Files in each rdtgroup */
467static struct rftype rdtgroup_base_files[] = {
468 {
469 .name = "cpus",
470 .mode = 0644,
471 .kf_ops = &rdtgroup_kf_single_ops,
472 .write = rdtgroup_cpus_write,
473 .seq_show = rdtgroup_cpus_show,
474 },
475 {
476 .name = "tasks",
477 .mode = 0644,
478 .kf_ops = &rdtgroup_kf_single_ops,
479 .write = rdtgroup_tasks_write,
480 .seq_show = rdtgroup_tasks_show,
481 },
482 {
483 .name = "schemata",
484 .mode = 0644,
485 .kf_ops = &rdtgroup_kf_single_ops,
486 .write = rdtgroup_schemata_write,
487 .seq_show = rdtgroup_schemata_show,
488 },
489};
490
491static int rdt_num_closids_show(struct kernfs_open_file *of,
492 struct seq_file *seq, void *v)
493{
494 struct rdt_resource *r = of->kn->parent->priv;
495
496 seq_printf(seq, "%d\n", r->num_closid);
497
498 return 0;
499}
500
501static int rdt_cbm_mask_show(struct kernfs_open_file *of,
502 struct seq_file *seq, void *v)
503{
504 struct rdt_resource *r = of->kn->parent->priv;
505
506 seq_printf(seq, "%x\n", r->max_cbm);
507
508 return 0;
509}
510
511static int rdt_min_cbm_bits_show(struct kernfs_open_file *of,
512 struct seq_file *seq, void *v)
513{
514 struct rdt_resource *r = of->kn->parent->priv;
515
516 seq_printf(seq, "%d\n", r->min_cbm_bits);
517
518 return 0;
519}
520
521/* rdtgroup information files for one cache resource. */
522static struct rftype res_info_files[] = {
523 {
524 .name = "num_closids",
525 .mode = 0444,
526 .kf_ops = &rdtgroup_kf_single_ops,
527 .seq_show = rdt_num_closids_show,
528 },
529 {
530 .name = "cbm_mask",
531 .mode = 0444,
532 .kf_ops = &rdtgroup_kf_single_ops,
533 .seq_show = rdt_cbm_mask_show,
534 },
535 {
536 .name = "min_cbm_bits",
537 .mode = 0444,
538 .kf_ops = &rdtgroup_kf_single_ops,
539 .seq_show = rdt_min_cbm_bits_show,
540 },
541};
542
543static int rdtgroup_create_info_dir(struct kernfs_node *parent_kn)
544{
545 struct kernfs_node *kn_subdir;
546 struct rdt_resource *r;
547 int ret;
548
549 /* create the directory */
550 kn_info = kernfs_create_dir(parent_kn, "info", parent_kn->mode, NULL);
551 if (IS_ERR(kn_info))
552 return PTR_ERR(kn_info);
553 kernfs_get(kn_info);
554
555 for_each_enabled_rdt_resource(r) {
556 kn_subdir = kernfs_create_dir(kn_info, r->name,
557 kn_info->mode, r);
558 if (IS_ERR(kn_subdir)) {
559 ret = PTR_ERR(kn_subdir);
560 goto out_destroy;
561 }
562 kernfs_get(kn_subdir);
563 ret = rdtgroup_kn_set_ugid(kn_subdir);
564 if (ret)
565 goto out_destroy;
566 ret = rdtgroup_add_files(kn_subdir, res_info_files,
567 ARRAY_SIZE(res_info_files));
568 if (ret)
569 goto out_destroy;
570 kernfs_activate(kn_subdir);
571 }
572
573 /*
574 * This extra ref will be put in kernfs_remove() and guarantees
575 * that @rdtgrp->kn is always accessible.
576 */
577 kernfs_get(kn_info);
578
579 ret = rdtgroup_kn_set_ugid(kn_info);
580 if (ret)
581 goto out_destroy;
582
583 kernfs_activate(kn_info);
584
585 return 0;
586
587out_destroy:
588 kernfs_remove(kn_info);
589 return ret;
590}
591
592static void l3_qos_cfg_update(void *arg)
593{
594 bool *enable = arg;
595
596 wrmsrl(IA32_L3_QOS_CFG, *enable ? L3_QOS_CDP_ENABLE : 0ULL);
597}
598
599static int set_l3_qos_cfg(struct rdt_resource *r, bool enable)
600{
601 cpumask_var_t cpu_mask;
602 struct rdt_domain *d;
603 int cpu;
604
605 if (!zalloc_cpumask_var(&cpu_mask, GFP_KERNEL))
606 return -ENOMEM;
607
608 list_for_each_entry(d, &r->domains, list) {
609 /* Pick one CPU from each domain instance to update MSR */
610 cpumask_set_cpu(cpumask_any(&d->cpu_mask), cpu_mask);
611 }
612 cpu = get_cpu();
613 /* Update QOS_CFG MSR on this cpu if it's in cpu_mask. */
614 if (cpumask_test_cpu(cpu, cpu_mask))
615 l3_qos_cfg_update(&enable);
616 /* Update QOS_CFG MSR on all other cpus in cpu_mask. */
617 smp_call_function_many(cpu_mask, l3_qos_cfg_update, &enable, 1);
618 put_cpu();
619
620 free_cpumask_var(cpu_mask);
621
622 return 0;
623}
624
625static int cdp_enable(void)
626{
627 struct rdt_resource *r_l3data = &rdt_resources_all[RDT_RESOURCE_L3DATA];
628 struct rdt_resource *r_l3code = &rdt_resources_all[RDT_RESOURCE_L3CODE];
629 struct rdt_resource *r_l3 = &rdt_resources_all[RDT_RESOURCE_L3];
630 int ret;
631
632 if (!r_l3->capable || !r_l3data->capable || !r_l3code->capable)
633 return -EINVAL;
634
635 ret = set_l3_qos_cfg(r_l3, true);
636 if (!ret) {
637 r_l3->enabled = false;
638 r_l3data->enabled = true;
639 r_l3code->enabled = true;
640 }
641 return ret;
642}
643
644static void cdp_disable(void)
645{
646 struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_L3];
647
648 r->enabled = r->capable;
649
650 if (rdt_resources_all[RDT_RESOURCE_L3DATA].enabled) {
651 rdt_resources_all[RDT_RESOURCE_L3DATA].enabled = false;
652 rdt_resources_all[RDT_RESOURCE_L3CODE].enabled = false;
653 set_l3_qos_cfg(r, false);
654 }
655}
656
657static int parse_rdtgroupfs_options(char *data)
658{
659 char *token, *o = data;
660 int ret = 0;
661
662 while ((token = strsep(&o, ",")) != NULL) {
663 if (!*token)
664 return -EINVAL;
665
666 if (!strcmp(token, "cdp"))
667 ret = cdp_enable();
668 }
669
670 return ret;
671}
672
673/*
674 * We don't allow rdtgroup directories to be created anywhere
675 * except the root directory. Thus when looking for the rdtgroup
676 * structure for a kernfs node we are either looking at a directory,
677 * in which case the rdtgroup structure is pointed at by the "priv"
678 * field, otherwise we have a file, and need only look to the parent
679 * to find the rdtgroup.
680 */
681static struct rdtgroup *kernfs_to_rdtgroup(struct kernfs_node *kn)
682{
683 if (kernfs_type(kn) == KERNFS_DIR) {
684 /*
685 * All the resource directories use "kn->priv"
686 * to point to the "struct rdtgroup" for the
687 * resource. "info" and its subdirectories don't
688 * have rdtgroup structures, so return NULL here.
689 */
690 if (kn == kn_info || kn->parent == kn_info)
691 return NULL;
692 else
693 return kn->priv;
694 } else {
695 return kn->parent->priv;
696 }
697}
698
699struct rdtgroup *rdtgroup_kn_lock_live(struct kernfs_node *kn)
700{
701 struct rdtgroup *rdtgrp = kernfs_to_rdtgroup(kn);
702
703 if (!rdtgrp)
704 return NULL;
705
706 atomic_inc(&rdtgrp->waitcount);
707 kernfs_break_active_protection(kn);
708
709 mutex_lock(&rdtgroup_mutex);
710
711 /* Was this group deleted while we waited? */
712 if (rdtgrp->flags & RDT_DELETED)
713 return NULL;
714
715 return rdtgrp;
716}
717
718void rdtgroup_kn_unlock(struct kernfs_node *kn)
719{
720 struct rdtgroup *rdtgrp = kernfs_to_rdtgroup(kn);
721
722 if (!rdtgrp)
723 return;
724
725 mutex_unlock(&rdtgroup_mutex);
726
727 if (atomic_dec_and_test(&rdtgrp->waitcount) &&
728 (rdtgrp->flags & RDT_DELETED)) {
729 kernfs_unbreak_active_protection(kn);
730 kernfs_put(kn);
731 kfree(rdtgrp);
732 } else {
733 kernfs_unbreak_active_protection(kn);
734 }
735}
736
737static struct dentry *rdt_mount(struct file_system_type *fs_type,
738 int flags, const char *unused_dev_name,
739 void *data)
740{
741 struct dentry *dentry;
742 int ret;
743
744 mutex_lock(&rdtgroup_mutex);
745 /*
746 * resctrl file system can only be mounted once.
747 */
748 if (static_branch_unlikely(&rdt_enable_key)) {
749 dentry = ERR_PTR(-EBUSY);
750 goto out;
751 }
752
753 ret = parse_rdtgroupfs_options(data);
754 if (ret) {
755 dentry = ERR_PTR(ret);
756 goto out_cdp;
757 }
758
759 closid_init();
760
761 ret = rdtgroup_create_info_dir(rdtgroup_default.kn);
762 if (ret) {
763 dentry = ERR_PTR(ret);
764 goto out_cdp;
765 }
766
767 dentry = kernfs_mount(fs_type, flags, rdt_root,
768 RDTGROUP_SUPER_MAGIC, NULL);
769 if (IS_ERR(dentry))
770 goto out_cdp;
771
772 static_branch_enable(&rdt_enable_key);
773 goto out;
774
775out_cdp:
776 cdp_disable();
777out:
778 mutex_unlock(&rdtgroup_mutex);
779
780 return dentry;
781}
782
783static int reset_all_cbms(struct rdt_resource *r)
784{
785 struct msr_param msr_param;
786 cpumask_var_t cpu_mask;
787 struct rdt_domain *d;
788 int i, cpu;
789
790 if (!zalloc_cpumask_var(&cpu_mask, GFP_KERNEL))
791 return -ENOMEM;
792
793 msr_param.res = r;
794 msr_param.low = 0;
795 msr_param.high = r->num_closid;
796
797 /*
798 * Disable resource control for this resource by setting all
799 * CBMs in all domains to the maximum mask value. Pick one CPU
800 * from each domain to update the MSRs below.
801 */
802 list_for_each_entry(d, &r->domains, list) {
803 cpumask_set_cpu(cpumask_any(&d->cpu_mask), cpu_mask);
804
805 for (i = 0; i < r->num_closid; i++)
806 d->cbm[i] = r->max_cbm;
807 }
808 cpu = get_cpu();
809 /* Update CBM on this cpu if it's in cpu_mask. */
810 if (cpumask_test_cpu(cpu, cpu_mask))
811 rdt_cbm_update(&msr_param);
812 /* Update CBM on all other cpus in cpu_mask. */
813 smp_call_function_many(cpu_mask, rdt_cbm_update, &msr_param, 1);
814 put_cpu();
815
816 free_cpumask_var(cpu_mask);
817
818 return 0;
819}
820
821/*
822 * Move tasks from one to the other group. If @from is NULL, then all tasks
823 * in the systems are moved unconditionally (used for teardown).
824 *
825 * If @mask is not NULL the cpus on which moved tasks are running are set
826 * in that mask so the update smp function call is restricted to affected
827 * cpus.
828 */
829static void rdt_move_group_tasks(struct rdtgroup *from, struct rdtgroup *to,
830 struct cpumask *mask)
831{
832 struct task_struct *p, *t;
833
834 read_lock(&tasklist_lock);
835 for_each_process_thread(p, t) {
836 if (!from || t->closid == from->closid) {
837 t->closid = to->closid;
838#ifdef CONFIG_SMP
839 /*
840 * This is safe on x86 w/o barriers as the ordering
841 * of writing to task_cpu() and t->on_cpu is
842 * reverse to the reading here. The detection is
843 * inaccurate as tasks might move or schedule
844 * before the smp function call takes place. In
845 * such a case the function call is pointless, but
846 * there is no other side effect.
847 */
848 if (mask && t->on_cpu)
849 cpumask_set_cpu(task_cpu(t), mask);
850#endif
851 }
852 }
853 read_unlock(&tasklist_lock);
854}
855
856/*
857 * Forcibly remove all of subdirectories under root.
858 */
859static void rmdir_all_sub(void)
860{
861 struct rdtgroup *rdtgrp, *tmp;
862
863 /* Move all tasks to the default resource group */
864 rdt_move_group_tasks(NULL, &rdtgroup_default, NULL);
865
866 list_for_each_entry_safe(rdtgrp, tmp, &rdt_all_groups, rdtgroup_list) {
867 /* Remove each rdtgroup other than root */
868 if (rdtgrp == &rdtgroup_default)
869 continue;
870
871 /*
872 * Give any CPUs back to the default group. We cannot copy
873 * cpu_online_mask because a CPU might have executed the
874 * offline callback already, but is still marked online.
875 */
876 cpumask_or(&rdtgroup_default.cpu_mask,
877 &rdtgroup_default.cpu_mask, &rdtgrp->cpu_mask);
878
879 kernfs_remove(rdtgrp->kn);
880 list_del(&rdtgrp->rdtgroup_list);
881 kfree(rdtgrp);
882 }
883 /* Notify online CPUs to update per cpu storage and PQR_ASSOC MSR */
884 get_online_cpus();
885 rdt_update_closid(cpu_online_mask, &rdtgroup_default.closid);
886 put_online_cpus();
887
888 kernfs_remove(kn_info);
889}
890
891static void rdt_kill_sb(struct super_block *sb)
892{
893 struct rdt_resource *r;
894
895 mutex_lock(&rdtgroup_mutex);
896
897 /*Put everything back to default values. */
898 for_each_enabled_rdt_resource(r)
899 reset_all_cbms(r);
900 cdp_disable();
901 rmdir_all_sub();
902 static_branch_disable(&rdt_enable_key);
903 kernfs_kill_sb(sb);
904 mutex_unlock(&rdtgroup_mutex);
905}
906
907static struct file_system_type rdt_fs_type = {
908 .name = "resctrl",
909 .mount = rdt_mount,
910 .kill_sb = rdt_kill_sb,
911};
912
913static int rdtgroup_mkdir(struct kernfs_node *parent_kn, const char *name,
914 umode_t mode)
915{
916 struct rdtgroup *parent, *rdtgrp;
917 struct kernfs_node *kn;
918 int ret, closid;
919
920 /* Only allow mkdir in the root directory */
921 if (parent_kn != rdtgroup_default.kn)
922 return -EPERM;
923
924 /* Do not accept '\n' to avoid unparsable situation. */
925 if (strchr(name, '\n'))
926 return -EINVAL;
927
928 parent = rdtgroup_kn_lock_live(parent_kn);
929 if (!parent) {
930 ret = -ENODEV;
931 goto out_unlock;
932 }
933
934 ret = closid_alloc();
935 if (ret < 0)
936 goto out_unlock;
937 closid = ret;
938
939 /* allocate the rdtgroup. */
940 rdtgrp = kzalloc(sizeof(*rdtgrp), GFP_KERNEL);
941 if (!rdtgrp) {
942 ret = -ENOSPC;
943 goto out_closid_free;
944 }
945 rdtgrp->closid = closid;
946 list_add(&rdtgrp->rdtgroup_list, &rdt_all_groups);
947
948 /* kernfs creates the directory for rdtgrp */
949 kn = kernfs_create_dir(parent->kn, name, mode, rdtgrp);
950 if (IS_ERR(kn)) {
951 ret = PTR_ERR(kn);
952 goto out_cancel_ref;
953 }
954 rdtgrp->kn = kn;
955
956 /*
957 * kernfs_remove() will drop the reference count on "kn" which
958 * will free it. But we still need it to stick around for the
959 * rdtgroup_kn_unlock(kn} call below. Take one extra reference
960 * here, which will be dropped inside rdtgroup_kn_unlock().
961 */
962 kernfs_get(kn);
963
964 ret = rdtgroup_kn_set_ugid(kn);
965 if (ret)
966 goto out_destroy;
967
968 ret = rdtgroup_add_files(kn, rdtgroup_base_files,
969 ARRAY_SIZE(rdtgroup_base_files));
970 if (ret)
971 goto out_destroy;
972
973 kernfs_activate(kn);
974
975 ret = 0;
976 goto out_unlock;
977
978out_destroy:
979 kernfs_remove(rdtgrp->kn);
980out_cancel_ref:
981 list_del(&rdtgrp->rdtgroup_list);
982 kfree(rdtgrp);
983out_closid_free:
984 closid_free(closid);
985out_unlock:
986 rdtgroup_kn_unlock(parent_kn);
987 return ret;
988}
989
990static int rdtgroup_rmdir(struct kernfs_node *kn)
991{
992 int ret, cpu, closid = rdtgroup_default.closid;
993 struct rdtgroup *rdtgrp;
994 cpumask_var_t tmpmask;
995
996 if (!zalloc_cpumask_var(&tmpmask, GFP_KERNEL))
997 return -ENOMEM;
998
999 rdtgrp = rdtgroup_kn_lock_live(kn);
1000 if (!rdtgrp) {
1001 ret = -EPERM;
1002 goto out;
1003 }
1004
1005 /* Give any tasks back to the default group */
1006 rdt_move_group_tasks(rdtgrp, &rdtgroup_default, tmpmask);
1007
1008 /* Give any CPUs back to the default group */
1009 cpumask_or(&rdtgroup_default.cpu_mask,
1010 &rdtgroup_default.cpu_mask, &rdtgrp->cpu_mask);
1011
1012 /* Update per cpu closid of the moved CPUs first */
1013 for_each_cpu(cpu, &rdtgrp->cpu_mask)
1014 per_cpu(cpu_closid, cpu) = closid;
1015 /*
1016 * Update the MSR on moved CPUs and CPUs which have moved
1017 * task running on them.
1018 */
1019 cpumask_or(tmpmask, tmpmask, &rdtgrp->cpu_mask);
1020 rdt_update_closid(tmpmask, NULL);
1021
1022 rdtgrp->flags = RDT_DELETED;
1023 closid_free(rdtgrp->closid);
1024 list_del(&rdtgrp->rdtgroup_list);
1025
1026 /*
1027 * one extra hold on this, will drop when we kfree(rdtgrp)
1028 * in rdtgroup_kn_unlock()
1029 */
1030 kernfs_get(kn);
1031 kernfs_remove(rdtgrp->kn);
1032 ret = 0;
1033out:
1034 rdtgroup_kn_unlock(kn);
1035 free_cpumask_var(tmpmask);
1036 return ret;
1037}
1038
1039static int rdtgroup_show_options(struct seq_file *seq, struct kernfs_root *kf)
1040{
1041 if (rdt_resources_all[RDT_RESOURCE_L3DATA].enabled)
1042 seq_puts(seq, ",cdp");
1043 return 0;
1044}
1045
1046static struct kernfs_syscall_ops rdtgroup_kf_syscall_ops = {
1047 .mkdir = rdtgroup_mkdir,
1048 .rmdir = rdtgroup_rmdir,
1049 .show_options = rdtgroup_show_options,
1050};
1051
1052static int __init rdtgroup_setup_root(void)
1053{
1054 int ret;
1055
1056 rdt_root = kernfs_create_root(&rdtgroup_kf_syscall_ops,
1057 KERNFS_ROOT_CREATE_DEACTIVATED,
1058 &rdtgroup_default);
1059 if (IS_ERR(rdt_root))
1060 return PTR_ERR(rdt_root);
1061
1062 mutex_lock(&rdtgroup_mutex);
1063
1064 rdtgroup_default.closid = 0;
1065 list_add(&rdtgroup_default.rdtgroup_list, &rdt_all_groups);
1066
1067 ret = rdtgroup_add_files(rdt_root->kn, rdtgroup_base_files,
1068 ARRAY_SIZE(rdtgroup_base_files));
1069 if (ret) {
1070 kernfs_destroy_root(rdt_root);
1071 goto out;
1072 }
1073
1074 rdtgroup_default.kn = rdt_root->kn;
1075 kernfs_activate(rdtgroup_default.kn);
1076
1077out:
1078 mutex_unlock(&rdtgroup_mutex);
1079
1080 return ret;
1081}
1082
1083/*
1084 * rdtgroup_init - rdtgroup initialization
1085 *
1086 * Setup resctrl file system including set up root, create mount point,
1087 * register rdtgroup filesystem, and initialize files under root directory.
1088 *
1089 * Return: 0 on success or -errno
1090 */
1091int __init rdtgroup_init(void)
1092{
1093 int ret = 0;
1094
1095 ret = rdtgroup_setup_root();
1096 if (ret)
1097 return ret;
1098
1099 ret = sysfs_create_mount_point(fs_kobj, "resctrl");
1100 if (ret)
1101 goto cleanup_root;
1102
1103 ret = register_filesystem(&rdt_fs_type);
1104 if (ret)
1105 goto cleanup_mountpoint;
1106
1107 return 0;
1108
1109cleanup_mountpoint:
1110 sysfs_remove_mount_point(fs_kobj, "resctrl");
1111cleanup_root:
1112 kernfs_destroy_root(rdt_root);
1113
1114 return ret;
1115}
diff --git a/arch/x86/kernel/cpu/intel_rdt_schemata.c b/arch/x86/kernel/cpu/intel_rdt_schemata.c
new file mode 100644
index 000000000000..f369cb8db0d5
--- /dev/null
+++ b/arch/x86/kernel/cpu/intel_rdt_schemata.c
@@ -0,0 +1,245 @@
1/*
2 * Resource Director Technology(RDT)
3 * - Cache Allocation code.
4 *
5 * Copyright (C) 2016 Intel Corporation
6 *
7 * Authors:
8 * Fenghua Yu <fenghua.yu@intel.com>
9 * Tony Luck <tony.luck@intel.com>
10 *
11 * This program is free software; you can redistribute it and/or modify it
12 * under the terms and conditions of the GNU General Public License,
13 * version 2, as published by the Free Software Foundation.
14 *
15 * This program is distributed in the hope it will be useful, but WITHOUT
16 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
17 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
18 * more details.
19 *
20 * More information about RDT be found in the Intel (R) x86 Architecture
21 * Software Developer Manual June 2016, volume 3, section 17.17.
22 */
23
24#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
25
26#include <linux/kernfs.h>
27#include <linux/seq_file.h>
28#include <linux/slab.h>
29#include <asm/intel_rdt.h>
30
31/*
32 * Check whether a cache bit mask is valid. The SDM says:
33 * Please note that all (and only) contiguous '1' combinations
34 * are allowed (e.g. FFFFH, 0FF0H, 003CH, etc.).
35 * Additionally Haswell requires at least two bits set.
36 */
37static bool cbm_validate(unsigned long var, struct rdt_resource *r)
38{
39 unsigned long first_bit, zero_bit;
40
41 if (var == 0 || var > r->max_cbm)
42 return false;
43
44 first_bit = find_first_bit(&var, r->cbm_len);
45 zero_bit = find_next_zero_bit(&var, r->cbm_len, first_bit);
46
47 if (find_next_bit(&var, r->cbm_len, zero_bit) < r->cbm_len)
48 return false;
49
50 if ((zero_bit - first_bit) < r->min_cbm_bits)
51 return false;
52 return true;
53}
54
55/*
56 * Read one cache bit mask (hex). Check that it is valid for the current
57 * resource type.
58 */
59static int parse_cbm(char *buf, struct rdt_resource *r)
60{
61 unsigned long data;
62 int ret;
63
64 ret = kstrtoul(buf, 16, &data);
65 if (ret)
66 return ret;
67 if (!cbm_validate(data, r))
68 return -EINVAL;
69 r->tmp_cbms[r->num_tmp_cbms++] = data;
70
71 return 0;
72}
73
74/*
75 * For each domain in this resource we expect to find a series of:
76 * id=mask
77 * separated by ";". The "id" is in decimal, and must appear in the
78 * right order.
79 */
80static int parse_line(char *line, struct rdt_resource *r)
81{
82 char *dom = NULL, *id;
83 struct rdt_domain *d;
84 unsigned long dom_id;
85
86 list_for_each_entry(d, &r->domains, list) {
87 dom = strsep(&line, ";");
88 if (!dom)
89 return -EINVAL;
90 id = strsep(&dom, "=");
91 if (kstrtoul(id, 10, &dom_id) || dom_id != d->id)
92 return -EINVAL;
93 if (parse_cbm(dom, r))
94 return -EINVAL;
95 }
96
97 /* Any garbage at the end of the line? */
98 if (line && line[0])
99 return -EINVAL;
100 return 0;
101}
102
103static int update_domains(struct rdt_resource *r, int closid)
104{
105 struct msr_param msr_param;
106 cpumask_var_t cpu_mask;
107 struct rdt_domain *d;
108 int cpu, idx = 0;
109
110 if (!zalloc_cpumask_var(&cpu_mask, GFP_KERNEL))
111 return -ENOMEM;
112
113 msr_param.low = closid;
114 msr_param.high = msr_param.low + 1;
115 msr_param.res = r;
116
117 list_for_each_entry(d, &r->domains, list) {
118 cpumask_set_cpu(cpumask_any(&d->cpu_mask), cpu_mask);
119 d->cbm[msr_param.low] = r->tmp_cbms[idx++];
120 }
121 cpu = get_cpu();
122 /* Update CBM on this cpu if it's in cpu_mask. */
123 if (cpumask_test_cpu(cpu, cpu_mask))
124 rdt_cbm_update(&msr_param);
125 /* Update CBM on other cpus. */
126 smp_call_function_many(cpu_mask, rdt_cbm_update, &msr_param, 1);
127 put_cpu();
128
129 free_cpumask_var(cpu_mask);
130
131 return 0;
132}
133
134ssize_t rdtgroup_schemata_write(struct kernfs_open_file *of,
135 char *buf, size_t nbytes, loff_t off)
136{
137 struct rdtgroup *rdtgrp;
138 struct rdt_resource *r;
139 char *tok, *resname;
140 int closid, ret = 0;
141 u32 *l3_cbms = NULL;
142
143 /* Valid input requires a trailing newline */
144 if (nbytes == 0 || buf[nbytes - 1] != '\n')
145 return -EINVAL;
146 buf[nbytes - 1] = '\0';
147
148 rdtgrp = rdtgroup_kn_lock_live(of->kn);
149 if (!rdtgrp) {
150 rdtgroup_kn_unlock(of->kn);
151 return -ENOENT;
152 }
153
154 closid = rdtgrp->closid;
155
156 /* get scratch space to save all the masks while we validate input */
157 for_each_enabled_rdt_resource(r) {
158 r->tmp_cbms = kcalloc(r->num_domains, sizeof(*l3_cbms),
159 GFP_KERNEL);
160 if (!r->tmp_cbms) {
161 ret = -ENOMEM;
162 goto out;
163 }
164 r->num_tmp_cbms = 0;
165 }
166
167 while ((tok = strsep(&buf, "\n")) != NULL) {
168 resname = strsep(&tok, ":");
169 if (!tok) {
170 ret = -EINVAL;
171 goto out;
172 }
173 for_each_enabled_rdt_resource(r) {
174 if (!strcmp(resname, r->name) &&
175 closid < r->num_closid) {
176 ret = parse_line(tok, r);
177 if (ret)
178 goto out;
179 break;
180 }
181 }
182 if (!r->name) {
183 ret = -EINVAL;
184 goto out;
185 }
186 }
187
188 /* Did the parser find all the masks we need? */
189 for_each_enabled_rdt_resource(r) {
190 if (r->num_tmp_cbms != r->num_domains) {
191 ret = -EINVAL;
192 goto out;
193 }
194 }
195
196 for_each_enabled_rdt_resource(r) {
197 ret = update_domains(r, closid);
198 if (ret)
199 goto out;
200 }
201
202out:
203 rdtgroup_kn_unlock(of->kn);
204 for_each_enabled_rdt_resource(r) {
205 kfree(r->tmp_cbms);
206 r->tmp_cbms = NULL;
207 }
208 return ret ?: nbytes;
209}
210
211static void show_doms(struct seq_file *s, struct rdt_resource *r, int closid)
212{
213 struct rdt_domain *dom;
214 bool sep = false;
215
216 seq_printf(s, "%s:", r->name);
217 list_for_each_entry(dom, &r->domains, list) {
218 if (sep)
219 seq_puts(s, ";");
220 seq_printf(s, "%d=%x", dom->id, dom->cbm[closid]);
221 sep = true;
222 }
223 seq_puts(s, "\n");
224}
225
226int rdtgroup_schemata_show(struct kernfs_open_file *of,
227 struct seq_file *s, void *v)
228{
229 struct rdtgroup *rdtgrp;
230 struct rdt_resource *r;
231 int closid, ret = 0;
232
233 rdtgrp = rdtgroup_kn_lock_live(of->kn);
234 if (rdtgrp) {
235 closid = rdtgrp->closid;
236 for_each_enabled_rdt_resource(r) {
237 if (closid < r->num_closid)
238 show_doms(s, r, closid);
239 }
240 } else {
241 ret = -ENOENT;
242 }
243 rdtgroup_kn_unlock(of->kn);
244 return ret;
245}
diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c
index d1316f9c8329..d9794060fe22 100644
--- a/arch/x86/kernel/cpu/scattered.c
+++ b/arch/x86/kernel/cpu/scattered.c
@@ -20,12 +20,15 @@ struct cpuid_bit {
20/* Please keep the leaf sorted by cpuid_bit.level for faster search. */ 20/* Please keep the leaf sorted by cpuid_bit.level for faster search. */
21static const struct cpuid_bit cpuid_bits[] = { 21static const struct cpuid_bit cpuid_bits[] = {
22 { X86_FEATURE_APERFMPERF, CPUID_ECX, 0, 0x00000006, 0 }, 22 { X86_FEATURE_APERFMPERF, CPUID_ECX, 0, 0x00000006, 0 },
23 { X86_FEATURE_EPB, CPUID_ECX, 3, 0x00000006, 0 }, 23 { X86_FEATURE_EPB, CPUID_ECX, 3, 0x00000006, 0 },
24 { X86_FEATURE_INTEL_PT, CPUID_EBX, 25, 0x00000007, 0 }, 24 { X86_FEATURE_INTEL_PT, CPUID_EBX, 25, 0x00000007, 0 },
25 { X86_FEATURE_AVX512_4VNNIW, CPUID_EDX, 2, 0x00000007, 0 }, 25 { X86_FEATURE_AVX512_4VNNIW, CPUID_EDX, 2, 0x00000007, 0 },
26 { X86_FEATURE_AVX512_4FMAPS, CPUID_EDX, 3, 0x00000007, 0 }, 26 { X86_FEATURE_AVX512_4FMAPS, CPUID_EDX, 3, 0x00000007, 0 },
27 { X86_FEATURE_HW_PSTATE, CPUID_EDX, 7, 0x80000007, 0 }, 27 { X86_FEATURE_CAT_L3, CPUID_EBX, 1, 0x00000010, 0 },
28 { X86_FEATURE_CPB, CPUID_EDX, 9, 0x80000007, 0 }, 28 { X86_FEATURE_CAT_L2, CPUID_EBX, 2, 0x00000010, 0 },
29 { X86_FEATURE_CDP_L3, CPUID_ECX, 2, 0x00000010, 1 },
30 { X86_FEATURE_HW_PSTATE, CPUID_EDX, 7, 0x80000007, 0 },
31 { X86_FEATURE_CPB, CPUID_EDX, 9, 0x80000007, 0 },
29 { X86_FEATURE_PROC_FEEDBACK, CPUID_EDX, 11, 0x80000007, 0 }, 32 { X86_FEATURE_PROC_FEEDBACK, CPUID_EDX, 11, 0x80000007, 0 },
30 { 0, 0, 0, 0, 0 } 33 { 0, 0, 0, 0, 0 }
31}; 34};
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index d0d744108594..a0ac3e81518a 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -53,6 +53,7 @@
53#include <asm/debugreg.h> 53#include <asm/debugreg.h>
54#include <asm/switch_to.h> 54#include <asm/switch_to.h>
55#include <asm/vm86.h> 55#include <asm/vm86.h>
56#include <asm/intel_rdt.h>
56 57
57void __show_regs(struct pt_regs *regs, int all) 58void __show_regs(struct pt_regs *regs, int all)
58{ 59{
@@ -296,5 +297,8 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
296 297
297 this_cpu_write(current_task, next_p); 298 this_cpu_write(current_task, next_p);
298 299
300 /* Load the Intel cache allocation PQR MSR. */
301 intel_rdt_sched_in();
302
299 return prev_p; 303 return prev_p;
300} 304}
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index a76b65e3e615..a61e141b6891 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -49,6 +49,7 @@
49#include <asm/switch_to.h> 49#include <asm/switch_to.h>
50#include <asm/xen/hypervisor.h> 50#include <asm/xen/hypervisor.h>
51#include <asm/vdso.h> 51#include <asm/vdso.h>
52#include <asm/intel_rdt.h>
52 53
53__visible DEFINE_PER_CPU(unsigned long, rsp_scratch); 54__visible DEFINE_PER_CPU(unsigned long, rsp_scratch);
54 55
@@ -476,6 +477,9 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
476 loadsegment(ss, __KERNEL_DS); 477 loadsegment(ss, __KERNEL_DS);
477 } 478 }
478 479
480 /* Load the Intel cache allocation PQR MSR. */
481 intel_rdt_sched_in();
482
479 return prev_p; 483 return prev_p;
480} 484}
481 485
diff --git a/drivers/base/cacheinfo.c b/drivers/base/cacheinfo.c
index 1e3903d0d994..eb3af2739537 100644
--- a/drivers/base/cacheinfo.c
+++ b/drivers/base/cacheinfo.c
@@ -363,6 +363,7 @@ static ssize_t file_name##_show(struct device *dev, \
363 return sprintf(buf, "%u\n", this_leaf->object); \ 363 return sprintf(buf, "%u\n", this_leaf->object); \
364} 364}
365 365
366show_one(id, id);
366show_one(level, level); 367show_one(level, level);
367show_one(coherency_line_size, coherency_line_size); 368show_one(coherency_line_size, coherency_line_size);
368show_one(number_of_sets, number_of_sets); 369show_one(number_of_sets, number_of_sets);
@@ -444,6 +445,7 @@ static ssize_t write_policy_show(struct device *dev,
444 return n; 445 return n;
445} 446}
446 447
448static DEVICE_ATTR_RO(id);
447static DEVICE_ATTR_RO(level); 449static DEVICE_ATTR_RO(level);
448static DEVICE_ATTR_RO(type); 450static DEVICE_ATTR_RO(type);
449static DEVICE_ATTR_RO(coherency_line_size); 451static DEVICE_ATTR_RO(coherency_line_size);
@@ -457,6 +459,7 @@ static DEVICE_ATTR_RO(shared_cpu_list);
457static DEVICE_ATTR_RO(physical_line_partition); 459static DEVICE_ATTR_RO(physical_line_partition);
458 460
459static struct attribute *cache_default_attrs[] = { 461static struct attribute *cache_default_attrs[] = {
462 &dev_attr_id.attr,
460 &dev_attr_type.attr, 463 &dev_attr_type.attr,
461 &dev_attr_level.attr, 464 &dev_attr_level.attr,
462 &dev_attr_shared_cpu_map.attr, 465 &dev_attr_shared_cpu_map.attr,
@@ -480,6 +483,8 @@ cache_default_attrs_is_visible(struct kobject *kobj,
480 const struct cpumask *mask = &this_leaf->shared_cpu_map; 483 const struct cpumask *mask = &this_leaf->shared_cpu_map;
481 umode_t mode = attr->mode; 484 umode_t mode = attr->mode;
482 485
486 if ((attr == &dev_attr_id.attr) && (this_leaf->attributes & CACHE_ID))
487 return mode;
483 if ((attr == &dev_attr_type.attr) && this_leaf->type) 488 if ((attr == &dev_attr_type.attr) && this_leaf->type)
484 return mode; 489 return mode;
485 if ((attr == &dev_attr_level.attr) && this_leaf->level) 490 if ((attr == &dev_attr_level.attr) && this_leaf->level)
diff --git a/include/linux/cacheinfo.h b/include/linux/cacheinfo.h
index a951fd10aaaa..6a524bf6a06d 100644
--- a/include/linux/cacheinfo.h
+++ b/include/linux/cacheinfo.h
@@ -18,6 +18,7 @@ enum cache_type {
18 18
19/** 19/**
20 * struct cacheinfo - represent a cache leaf node 20 * struct cacheinfo - represent a cache leaf node
21 * @id: This cache's id. It is unique among caches with the same (type, level).
21 * @type: type of the cache - data, inst or unified 22 * @type: type of the cache - data, inst or unified
22 * @level: represents the hierarchy in the multi-level cache 23 * @level: represents the hierarchy in the multi-level cache
23 * @coherency_line_size: size of each cache line usually representing 24 * @coherency_line_size: size of each cache line usually representing
@@ -44,6 +45,7 @@ enum cache_type {
44 * keeping, the remaining members form the core properties of the cache 45 * keeping, the remaining members form the core properties of the cache
45 */ 46 */
46struct cacheinfo { 47struct cacheinfo {
48 unsigned int id;
47 enum cache_type type; 49 enum cache_type type;
48 unsigned int level; 50 unsigned int level;
49 unsigned int coherency_line_size; 51 unsigned int coherency_line_size;
@@ -61,6 +63,7 @@ struct cacheinfo {
61#define CACHE_WRITE_ALLOCATE BIT(3) 63#define CACHE_WRITE_ALLOCATE BIT(3)
62#define CACHE_ALLOCATE_POLICY_MASK \ 64#define CACHE_ALLOCATE_POLICY_MASK \
63 (CACHE_READ_ALLOCATE | CACHE_WRITE_ALLOCATE) 65 (CACHE_READ_ALLOCATE | CACHE_WRITE_ALLOCATE)
66#define CACHE_ID BIT(4)
64 67
65 struct device_node *of_node; 68 struct device_node *of_node;
66 bool disable_sysfs; 69 bool disable_sysfs;
diff --git a/include/linux/sched.h b/include/linux/sched.h
index a440cf178191..4d1905245c7a 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1821,6 +1821,9 @@ struct task_struct {
1821 /* cg_list protected by css_set_lock and tsk->alloc_lock */ 1821 /* cg_list protected by css_set_lock and tsk->alloc_lock */
1822 struct list_head cg_list; 1822 struct list_head cg_list;
1823#endif 1823#endif
1824#ifdef CONFIG_INTEL_RDT_A
1825 int closid;
1826#endif
1824#ifdef CONFIG_FUTEX 1827#ifdef CONFIG_FUTEX
1825 struct robust_list_head __user *robust_list; 1828 struct robust_list_head __user *robust_list;
1826#ifdef CONFIG_COMPAT 1829#ifdef CONFIG_COMPAT
diff --git a/include/uapi/linux/magic.h b/include/uapi/linux/magic.h
index 9bd559472c92..e230af2e6855 100644
--- a/include/uapi/linux/magic.h
+++ b/include/uapi/linux/magic.h
@@ -57,6 +57,7 @@
57#define CGROUP_SUPER_MAGIC 0x27e0eb 57#define CGROUP_SUPER_MAGIC 0x27e0eb
58#define CGROUP2_SUPER_MAGIC 0x63677270 58#define CGROUP2_SUPER_MAGIC 0x63677270
59 59
60#define RDTGROUP_SUPER_MAGIC 0x7655821
60 61
61#define STACK_END_MAGIC 0x57AC6E9D 62#define STACK_END_MAGIC 0x57AC6E9D
62 63