summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2019-07-08 21:28:44 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2019-07-08 21:28:44 -0400
commit222a21d29521d144f3dd7a0bc4d4020e448f0126 (patch)
tree4685c2b74451716ed873471425c2f33b0c17aa81
parent8faef7125d02c0bbd7a1ceb4e3b599a9b8c42e58 (diff)
parenteb876fbc248e6eb4773a5bc80d205ff7262b1bb5 (diff)
Merge branch 'x86-topology-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 topology updates from Ingo Molnar: "Implement multi-die topology support on Intel CPUs and expose the die topology to user-space tooling, by Len Brown, Kan Liang and Zhang Rui. These changes should have no effect on the kernel's existing understanding of topologies, i.e. there should be no behavioral impact on cache, NUMA, scheduler, perf and other topologies and overall system performance" * 'x86-topology-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: perf/x86/intel/rapl: Cosmetic rename internal variables in response to multi-die/pkg support perf/x86/intel/uncore: Cosmetic renames in response to multi-die/pkg support hwmon/coretemp: Cosmetic: Rename internal variables to zones from packages thermal/x86_pkg_temp_thermal: Cosmetic: Rename internal variables to zones from packages perf/x86/intel/cstate: Support multi-die/package perf/x86/intel/rapl: Support multi-die/package perf/x86/intel/uncore: Support multi-die/package topology: Create core_cpus and die_cpus sysfs attributes topology: Create package_cpus sysfs attribute hwmon/coretemp: Support multi-die/package powercap/intel_rapl: Update RAPL domain name and debug messages thermal/x86_pkg_temp_thermal: Support multi-die/package powercap/intel_rapl: Support multi-die/package powercap/intel_rapl: Simplify rapl_find_package() x86/topology: Define topology_logical_die_id() x86/topology: Define topology_die_id() cpu/topology: Export die_id x86/topology: Create topology_max_die_per_package() x86/topology: Add CPUID.1F multi-die/package support
-rw-r--r--Documentation/cputopology.txt48
-rw-r--r--Documentation/x86/topology.rst4
-rw-r--r--arch/x86/events/intel/cstate.c14
-rw-r--r--arch/x86/events/intel/rapl.c20
-rw-r--r--arch/x86/events/intel/uncore.c80
-rw-r--r--arch/x86/events/intel/uncore.h4
-rw-r--r--arch/x86/events/intel/uncore_snbep.c4
-rw-r--r--arch/x86/include/asm/processor.h4
-rw-r--r--arch/x86/include/asm/smp.h1
-rw-r--r--arch/x86/include/asm/topology.h17
-rw-r--r--arch/x86/kernel/cpu/common.c1
-rw-r--r--arch/x86/kernel/cpu/topology.c88
-rw-r--r--arch/x86/kernel/smpboot.c69
-rw-r--r--arch/x86/xen/smp_pv.c1
-rw-r--r--drivers/base/topology.c22
-rw-r--r--drivers/hwmon/coretemp.c36
-rw-r--r--drivers/powercap/intel_rapl.c75
-rw-r--r--drivers/thermal/intel/x86_pkg_temp_thermal.c142
-rw-r--r--include/linux/topology.h6
19 files changed, 422 insertions, 214 deletions
diff --git a/Documentation/cputopology.txt b/Documentation/cputopology.txt
index cb61277e2308..b90dafcc8237 100644
--- a/Documentation/cputopology.txt
+++ b/Documentation/cputopology.txt
@@ -12,6 +12,12 @@ physical_package_id:
12 socket number, but the actual value is architecture and platform 12 socket number, but the actual value is architecture and platform
13 dependent. 13 dependent.
14 14
15die_id:
16
17 the CPU die ID of cpuX. Typically it is the hardware platform's
18 identifier (rather than the kernel's). The actual value is
19 architecture and platform dependent.
20
15core_id: 21core_id:
16 22
17 the CPU core ID of cpuX. Typically it is the hardware platform's 23 the CPU core ID of cpuX. Typically it is the hardware platform's
@@ -30,25 +36,33 @@ drawer_id:
30 identifier (rather than the kernel's). The actual value is 36 identifier (rather than the kernel's). The actual value is
31 architecture and platform dependent. 37 architecture and platform dependent.
32 38
33thread_siblings: 39core_cpus:
34 40
35 internal kernel map of cpuX's hardware threads within the same 41 internal kernel map of CPUs within the same core.
36 core as cpuX. 42 (deprecated name: "thread_siblings")
37 43
38thread_siblings_list: 44core_cpus_list:
39 45
40 human-readable list of cpuX's hardware threads within the same 46 human-readable list of CPUs within the same core.
41 core as cpuX. 47 (deprecated name: "thread_siblings_list");
42 48
43core_siblings: 49package_cpus:
44 50
45 internal kernel map of cpuX's hardware threads within the same 51 internal kernel map of the CPUs sharing the same physical_package_id.
46 physical_package_id. 52 (deprecated name: "core_siblings")
47 53
48core_siblings_list: 54package_cpus_list:
49 55
50 human-readable list of cpuX's hardware threads within the same 56 human-readable list of CPUs sharing the same physical_package_id.
51 physical_package_id. 57 (deprecated name: "core_siblings_list")
58
59die_cpus:
60
61 internal kernel map of CPUs within the same die.
62
63die_cpus_list:
64
65 human-readable list of CPUs within the same die.
52 66
53book_siblings: 67book_siblings:
54 68
@@ -81,11 +95,13 @@ For an architecture to support this feature, it must define some of
81these macros in include/asm-XXX/topology.h:: 95these macros in include/asm-XXX/topology.h::
82 96
83 #define topology_physical_package_id(cpu) 97 #define topology_physical_package_id(cpu)
98 #define topology_die_id(cpu)
84 #define topology_core_id(cpu) 99 #define topology_core_id(cpu)
85 #define topology_book_id(cpu) 100 #define topology_book_id(cpu)
86 #define topology_drawer_id(cpu) 101 #define topology_drawer_id(cpu)
87 #define topology_sibling_cpumask(cpu) 102 #define topology_sibling_cpumask(cpu)
88 #define topology_core_cpumask(cpu) 103 #define topology_core_cpumask(cpu)
104 #define topology_die_cpumask(cpu)
89 #define topology_book_cpumask(cpu) 105 #define topology_book_cpumask(cpu)
90 #define topology_drawer_cpumask(cpu) 106 #define topology_drawer_cpumask(cpu)
91 107
@@ -99,9 +115,11 @@ provides default definitions for any of the above macros that are
99not defined by include/asm-XXX/topology.h: 115not defined by include/asm-XXX/topology.h:
100 116
1011) topology_physical_package_id: -1 1171) topology_physical_package_id: -1
1022) topology_core_id: 0 1182) topology_die_id: -1
1033) topology_sibling_cpumask: just the given CPU 1193) topology_core_id: 0
1044) topology_core_cpumask: just the given CPU 1204) topology_sibling_cpumask: just the given CPU
1215) topology_core_cpumask: just the given CPU
1226) topology_die_cpumask: just the given CPU
105 123
106For architectures that don't support books (CONFIG_SCHED_BOOK) there are no 124For architectures that don't support books (CONFIG_SCHED_BOOK) there are no
107default definitions for topology_book_id() and topology_book_cpumask(). 125default definitions for topology_book_id() and topology_book_cpumask().
diff --git a/Documentation/x86/topology.rst b/Documentation/x86/topology.rst
index 6e28dbe818ab..8e9704f61017 100644
--- a/Documentation/x86/topology.rst
+++ b/Documentation/x86/topology.rst
@@ -49,6 +49,10 @@ Package-related topology information in the kernel:
49 49
50 The number of cores in a package. This information is retrieved via CPUID. 50 The number of cores in a package. This information is retrieved via CPUID.
51 51
52 - cpuinfo_x86.x86_max_dies:
53
54 The number of dies in a package. This information is retrieved via CPUID.
55
52 - cpuinfo_x86.phys_proc_id: 56 - cpuinfo_x86.phys_proc_id:
53 57
54 The physical ID of the package. This information is retrieved via CPUID 58 The physical ID of the package. This information is retrieved via CPUID
diff --git a/arch/x86/events/intel/cstate.c b/arch/x86/events/intel/cstate.c
index 6072f92cb8ea..267d7f8e12ab 100644
--- a/arch/x86/events/intel/cstate.c
+++ b/arch/x86/events/intel/cstate.c
@@ -302,7 +302,7 @@ static int cstate_pmu_event_init(struct perf_event *event)
302 return -EINVAL; 302 return -EINVAL;
303 event->hw.event_base = pkg_msr[cfg].msr; 303 event->hw.event_base = pkg_msr[cfg].msr;
304 cpu = cpumask_any_and(&cstate_pkg_cpu_mask, 304 cpu = cpumask_any_and(&cstate_pkg_cpu_mask,
305 topology_core_cpumask(event->cpu)); 305 topology_die_cpumask(event->cpu));
306 } else { 306 } else {
307 return -ENOENT; 307 return -ENOENT;
308 } 308 }
@@ -385,7 +385,7 @@ static int cstate_cpu_exit(unsigned int cpu)
385 if (has_cstate_pkg && 385 if (has_cstate_pkg &&
386 cpumask_test_and_clear_cpu(cpu, &cstate_pkg_cpu_mask)) { 386 cpumask_test_and_clear_cpu(cpu, &cstate_pkg_cpu_mask)) {
387 387
388 target = cpumask_any_but(topology_core_cpumask(cpu), cpu); 388 target = cpumask_any_but(topology_die_cpumask(cpu), cpu);
389 /* Migrate events if there is a valid target */ 389 /* Migrate events if there is a valid target */
390 if (target < nr_cpu_ids) { 390 if (target < nr_cpu_ids) {
391 cpumask_set_cpu(target, &cstate_pkg_cpu_mask); 391 cpumask_set_cpu(target, &cstate_pkg_cpu_mask);
@@ -414,7 +414,7 @@ static int cstate_cpu_init(unsigned int cpu)
414 * in the package cpu mask as the designated reader. 414 * in the package cpu mask as the designated reader.
415 */ 415 */
416 target = cpumask_any_and(&cstate_pkg_cpu_mask, 416 target = cpumask_any_and(&cstate_pkg_cpu_mask,
417 topology_core_cpumask(cpu)); 417 topology_die_cpumask(cpu));
418 if (has_cstate_pkg && target >= nr_cpu_ids) 418 if (has_cstate_pkg && target >= nr_cpu_ids)
419 cpumask_set_cpu(cpu, &cstate_pkg_cpu_mask); 419 cpumask_set_cpu(cpu, &cstate_pkg_cpu_mask);
420 420
@@ -663,7 +663,13 @@ static int __init cstate_init(void)
663 } 663 }
664 664
665 if (has_cstate_pkg) { 665 if (has_cstate_pkg) {
666 err = perf_pmu_register(&cstate_pkg_pmu, cstate_pkg_pmu.name, -1); 666 if (topology_max_die_per_package() > 1) {
667 err = perf_pmu_register(&cstate_pkg_pmu,
668 "cstate_die", -1);
669 } else {
670 err = perf_pmu_register(&cstate_pkg_pmu,
671 cstate_pkg_pmu.name, -1);
672 }
667 if (err) { 673 if (err) {
668 has_cstate_pkg = false; 674 has_cstate_pkg = false;
669 pr_info("Failed to register cstate pkg pmu\n"); 675 pr_info("Failed to register cstate pkg pmu\n");
diff --git a/arch/x86/events/intel/rapl.c b/arch/x86/events/intel/rapl.c
index 26c03f5adfb9..8c7ecde3ba70 100644
--- a/arch/x86/events/intel/rapl.c
+++ b/arch/x86/events/intel/rapl.c
@@ -149,7 +149,7 @@ struct rapl_pmu {
149 149
150struct rapl_pmus { 150struct rapl_pmus {
151 struct pmu pmu; 151 struct pmu pmu;
152 unsigned int maxpkg; 152 unsigned int maxdie;
153 struct rapl_pmu *pmus[]; 153 struct rapl_pmu *pmus[];
154}; 154};
155 155
@@ -162,13 +162,13 @@ static u64 rapl_timer_ms;
162 162
163static inline struct rapl_pmu *cpu_to_rapl_pmu(unsigned int cpu) 163static inline struct rapl_pmu *cpu_to_rapl_pmu(unsigned int cpu)
164{ 164{
165 unsigned int pkgid = topology_logical_package_id(cpu); 165 unsigned int dieid = topology_logical_die_id(cpu);
166 166
167 /* 167 /*
168 * The unsigned check also catches the '-1' return value for non 168 * The unsigned check also catches the '-1' return value for non
169 * existent mappings in the topology map. 169 * existent mappings in the topology map.
170 */ 170 */
171 return pkgid < rapl_pmus->maxpkg ? rapl_pmus->pmus[pkgid] : NULL; 171 return dieid < rapl_pmus->maxdie ? rapl_pmus->pmus[dieid] : NULL;
172} 172}
173 173
174static inline u64 rapl_read_counter(struct perf_event *event) 174static inline u64 rapl_read_counter(struct perf_event *event)
@@ -572,7 +572,7 @@ static int rapl_cpu_offline(unsigned int cpu)
572 572
573 pmu->cpu = -1; 573 pmu->cpu = -1;
574 /* Find a new cpu to collect rapl events */ 574 /* Find a new cpu to collect rapl events */
575 target = cpumask_any_but(topology_core_cpumask(cpu), cpu); 575 target = cpumask_any_but(topology_die_cpumask(cpu), cpu);
576 576
577 /* Migrate rapl events to the new target */ 577 /* Migrate rapl events to the new target */
578 if (target < nr_cpu_ids) { 578 if (target < nr_cpu_ids) {
@@ -599,14 +599,14 @@ static int rapl_cpu_online(unsigned int cpu)
599 pmu->timer_interval = ms_to_ktime(rapl_timer_ms); 599 pmu->timer_interval = ms_to_ktime(rapl_timer_ms);
600 rapl_hrtimer_init(pmu); 600 rapl_hrtimer_init(pmu);
601 601
602 rapl_pmus->pmus[topology_logical_package_id(cpu)] = pmu; 602 rapl_pmus->pmus[topology_logical_die_id(cpu)] = pmu;
603 } 603 }
604 604
605 /* 605 /*
606 * Check if there is an online cpu in the package which collects rapl 606 * Check if there is an online cpu in the package which collects rapl
607 * events already. 607 * events already.
608 */ 608 */
609 target = cpumask_any_and(&rapl_cpu_mask, topology_core_cpumask(cpu)); 609 target = cpumask_any_and(&rapl_cpu_mask, topology_die_cpumask(cpu));
610 if (target < nr_cpu_ids) 610 if (target < nr_cpu_ids)
611 return 0; 611 return 0;
612 612
@@ -669,22 +669,22 @@ static void cleanup_rapl_pmus(void)
669{ 669{
670 int i; 670 int i;
671 671
672 for (i = 0; i < rapl_pmus->maxpkg; i++) 672 for (i = 0; i < rapl_pmus->maxdie; i++)
673 kfree(rapl_pmus->pmus[i]); 673 kfree(rapl_pmus->pmus[i]);
674 kfree(rapl_pmus); 674 kfree(rapl_pmus);
675} 675}
676 676
677static int __init init_rapl_pmus(void) 677static int __init init_rapl_pmus(void)
678{ 678{
679 int maxpkg = topology_max_packages(); 679 int maxdie = topology_max_packages() * topology_max_die_per_package();
680 size_t size; 680 size_t size;
681 681
682 size = sizeof(*rapl_pmus) + maxpkg * sizeof(struct rapl_pmu *); 682 size = sizeof(*rapl_pmus) + maxdie * sizeof(struct rapl_pmu *);
683 rapl_pmus = kzalloc(size, GFP_KERNEL); 683 rapl_pmus = kzalloc(size, GFP_KERNEL);
684 if (!rapl_pmus) 684 if (!rapl_pmus)
685 return -ENOMEM; 685 return -ENOMEM;
686 686
687 rapl_pmus->maxpkg = maxpkg; 687 rapl_pmus->maxdie = maxdie;
688 rapl_pmus->pmu.attr_groups = rapl_attr_groups; 688 rapl_pmus->pmu.attr_groups = rapl_attr_groups;
689 rapl_pmus->pmu.task_ctx_nr = perf_invalid_context; 689 rapl_pmus->pmu.task_ctx_nr = perf_invalid_context;
690 rapl_pmus->pmu.event_init = rapl_pmu_event_init; 690 rapl_pmus->pmu.event_init = rapl_pmu_event_init;
diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c
index 089bfcdf2f7f..6094c8db949d 100644
--- a/arch/x86/events/intel/uncore.c
+++ b/arch/x86/events/intel/uncore.c
@@ -15,7 +15,7 @@ struct pci_driver *uncore_pci_driver;
15DEFINE_RAW_SPINLOCK(pci2phy_map_lock); 15DEFINE_RAW_SPINLOCK(pci2phy_map_lock);
16struct list_head pci2phy_map_head = LIST_HEAD_INIT(pci2phy_map_head); 16struct list_head pci2phy_map_head = LIST_HEAD_INIT(pci2phy_map_head);
17struct pci_extra_dev *uncore_extra_pci_dev; 17struct pci_extra_dev *uncore_extra_pci_dev;
18static int max_packages; 18static int max_dies;
19 19
20/* mask of cpus that collect uncore events */ 20/* mask of cpus that collect uncore events */
21static cpumask_t uncore_cpu_mask; 21static cpumask_t uncore_cpu_mask;
@@ -101,13 +101,13 @@ ssize_t uncore_event_show(struct kobject *kobj,
101 101
102struct intel_uncore_box *uncore_pmu_to_box(struct intel_uncore_pmu *pmu, int cpu) 102struct intel_uncore_box *uncore_pmu_to_box(struct intel_uncore_pmu *pmu, int cpu)
103{ 103{
104 unsigned int pkgid = topology_logical_package_id(cpu); 104 unsigned int dieid = topology_logical_die_id(cpu);
105 105
106 /* 106 /*
107 * The unsigned check also catches the '-1' return value for non 107 * The unsigned check also catches the '-1' return value for non
108 * existent mappings in the topology map. 108 * existent mappings in the topology map.
109 */ 109 */
110 return pkgid < max_packages ? pmu->boxes[pkgid] : NULL; 110 return dieid < max_dies ? pmu->boxes[dieid] : NULL;
111} 111}
112 112
113u64 uncore_msr_read_counter(struct intel_uncore_box *box, struct perf_event *event) 113u64 uncore_msr_read_counter(struct intel_uncore_box *box, struct perf_event *event)
@@ -312,7 +312,7 @@ static struct intel_uncore_box *uncore_alloc_box(struct intel_uncore_type *type,
312 uncore_pmu_init_hrtimer(box); 312 uncore_pmu_init_hrtimer(box);
313 box->cpu = -1; 313 box->cpu = -1;
314 box->pci_phys_id = -1; 314 box->pci_phys_id = -1;
315 box->pkgid = -1; 315 box->dieid = -1;
316 316
317 /* set default hrtimer timeout */ 317 /* set default hrtimer timeout */
318 box->hrtimer_duration = UNCORE_PMU_HRTIMER_INTERVAL; 318 box->hrtimer_duration = UNCORE_PMU_HRTIMER_INTERVAL;
@@ -827,10 +827,10 @@ static void uncore_pmu_unregister(struct intel_uncore_pmu *pmu)
827 827
828static void uncore_free_boxes(struct intel_uncore_pmu *pmu) 828static void uncore_free_boxes(struct intel_uncore_pmu *pmu)
829{ 829{
830 int pkg; 830 int die;
831 831
832 for (pkg = 0; pkg < max_packages; pkg++) 832 for (die = 0; die < max_dies; die++)
833 kfree(pmu->boxes[pkg]); 833 kfree(pmu->boxes[die]);
834 kfree(pmu->boxes); 834 kfree(pmu->boxes);
835} 835}
836 836
@@ -867,7 +867,7 @@ static int __init uncore_type_init(struct intel_uncore_type *type, bool setid)
867 if (!pmus) 867 if (!pmus)
868 return -ENOMEM; 868 return -ENOMEM;
869 869
870 size = max_packages * sizeof(struct intel_uncore_box *); 870 size = max_dies * sizeof(struct intel_uncore_box *);
871 871
872 for (i = 0; i < type->num_boxes; i++) { 872 for (i = 0; i < type->num_boxes; i++) {
873 pmus[i].func_id = setid ? i : -1; 873 pmus[i].func_id = setid ? i : -1;
@@ -937,20 +937,21 @@ static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id
937 struct intel_uncore_type *type; 937 struct intel_uncore_type *type;
938 struct intel_uncore_pmu *pmu = NULL; 938 struct intel_uncore_pmu *pmu = NULL;
939 struct intel_uncore_box *box; 939 struct intel_uncore_box *box;
940 int phys_id, pkg, ret; 940 int phys_id, die, ret;
941 941
942 phys_id = uncore_pcibus_to_physid(pdev->bus); 942 phys_id = uncore_pcibus_to_physid(pdev->bus);
943 if (phys_id < 0) 943 if (phys_id < 0)
944 return -ENODEV; 944 return -ENODEV;
945 945
946 pkg = topology_phys_to_logical_pkg(phys_id); 946 die = (topology_max_die_per_package() > 1) ? phys_id :
947 if (pkg < 0) 947 topology_phys_to_logical_pkg(phys_id);
948 if (die < 0)
948 return -EINVAL; 949 return -EINVAL;
949 950
950 if (UNCORE_PCI_DEV_TYPE(id->driver_data) == UNCORE_EXTRA_PCI_DEV) { 951 if (UNCORE_PCI_DEV_TYPE(id->driver_data) == UNCORE_EXTRA_PCI_DEV) {
951 int idx = UNCORE_PCI_DEV_IDX(id->driver_data); 952 int idx = UNCORE_PCI_DEV_IDX(id->driver_data);
952 953
953 uncore_extra_pci_dev[pkg].dev[idx] = pdev; 954 uncore_extra_pci_dev[die].dev[idx] = pdev;
954 pci_set_drvdata(pdev, NULL); 955 pci_set_drvdata(pdev, NULL);
955 return 0; 956 return 0;
956 } 957 }
@@ -989,7 +990,7 @@ static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id
989 pmu = &type->pmus[UNCORE_PCI_DEV_IDX(id->driver_data)]; 990 pmu = &type->pmus[UNCORE_PCI_DEV_IDX(id->driver_data)];
990 } 991 }
991 992
992 if (WARN_ON_ONCE(pmu->boxes[pkg] != NULL)) 993 if (WARN_ON_ONCE(pmu->boxes[die] != NULL))
993 return -EINVAL; 994 return -EINVAL;
994 995
995 box = uncore_alloc_box(type, NUMA_NO_NODE); 996 box = uncore_alloc_box(type, NUMA_NO_NODE);
@@ -1003,13 +1004,13 @@ static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id
1003 1004
1004 atomic_inc(&box->refcnt); 1005 atomic_inc(&box->refcnt);
1005 box->pci_phys_id = phys_id; 1006 box->pci_phys_id = phys_id;
1006 box->pkgid = pkg; 1007 box->dieid = die;
1007 box->pci_dev = pdev; 1008 box->pci_dev = pdev;
1008 box->pmu = pmu; 1009 box->pmu = pmu;
1009 uncore_box_init(box); 1010 uncore_box_init(box);
1010 pci_set_drvdata(pdev, box); 1011 pci_set_drvdata(pdev, box);
1011 1012
1012 pmu->boxes[pkg] = box; 1013 pmu->boxes[die] = box;
1013 if (atomic_inc_return(&pmu->activeboxes) > 1) 1014 if (atomic_inc_return(&pmu->activeboxes) > 1)
1014 return 0; 1015 return 0;
1015 1016
@@ -1017,7 +1018,7 @@ static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id
1017 ret = uncore_pmu_register(pmu); 1018 ret = uncore_pmu_register(pmu);
1018 if (ret) { 1019 if (ret) {
1019 pci_set_drvdata(pdev, NULL); 1020 pci_set_drvdata(pdev, NULL);
1020 pmu->boxes[pkg] = NULL; 1021 pmu->boxes[die] = NULL;
1021 uncore_box_exit(box); 1022 uncore_box_exit(box);
1022 kfree(box); 1023 kfree(box);
1023 } 1024 }
@@ -1028,16 +1029,17 @@ static void uncore_pci_remove(struct pci_dev *pdev)
1028{ 1029{
1029 struct intel_uncore_box *box; 1030 struct intel_uncore_box *box;
1030 struct intel_uncore_pmu *pmu; 1031 struct intel_uncore_pmu *pmu;
1031 int i, phys_id, pkg; 1032 int i, phys_id, die;
1032 1033
1033 phys_id = uncore_pcibus_to_physid(pdev->bus); 1034 phys_id = uncore_pcibus_to_physid(pdev->bus);
1034 1035
1035 box = pci_get_drvdata(pdev); 1036 box = pci_get_drvdata(pdev);
1036 if (!box) { 1037 if (!box) {
1037 pkg = topology_phys_to_logical_pkg(phys_id); 1038 die = (topology_max_die_per_package() > 1) ? phys_id :
1039 topology_phys_to_logical_pkg(phys_id);
1038 for (i = 0; i < UNCORE_EXTRA_PCI_DEV_MAX; i++) { 1040 for (i = 0; i < UNCORE_EXTRA_PCI_DEV_MAX; i++) {
1039 if (uncore_extra_pci_dev[pkg].dev[i] == pdev) { 1041 if (uncore_extra_pci_dev[die].dev[i] == pdev) {
1040 uncore_extra_pci_dev[pkg].dev[i] = NULL; 1042 uncore_extra_pci_dev[die].dev[i] = NULL;
1041 break; 1043 break;
1042 } 1044 }
1043 } 1045 }
@@ -1050,7 +1052,7 @@ static void uncore_pci_remove(struct pci_dev *pdev)
1050 return; 1052 return;
1051 1053
1052 pci_set_drvdata(pdev, NULL); 1054 pci_set_drvdata(pdev, NULL);
1053 pmu->boxes[box->pkgid] = NULL; 1055 pmu->boxes[box->dieid] = NULL;
1054 if (atomic_dec_return(&pmu->activeboxes) == 0) 1056 if (atomic_dec_return(&pmu->activeboxes) == 0)
1055 uncore_pmu_unregister(pmu); 1057 uncore_pmu_unregister(pmu);
1056 uncore_box_exit(box); 1058 uncore_box_exit(box);
@@ -1062,7 +1064,7 @@ static int __init uncore_pci_init(void)
1062 size_t size; 1064 size_t size;
1063 int ret; 1065 int ret;
1064 1066
1065 size = max_packages * sizeof(struct pci_extra_dev); 1067 size = max_dies * sizeof(struct pci_extra_dev);
1066 uncore_extra_pci_dev = kzalloc(size, GFP_KERNEL); 1068 uncore_extra_pci_dev = kzalloc(size, GFP_KERNEL);
1067 if (!uncore_extra_pci_dev) { 1069 if (!uncore_extra_pci_dev) {
1068 ret = -ENOMEM; 1070 ret = -ENOMEM;
@@ -1109,11 +1111,11 @@ static void uncore_change_type_ctx(struct intel_uncore_type *type, int old_cpu,
1109{ 1111{
1110 struct intel_uncore_pmu *pmu = type->pmus; 1112 struct intel_uncore_pmu *pmu = type->pmus;
1111 struct intel_uncore_box *box; 1113 struct intel_uncore_box *box;
1112 int i, pkg; 1114 int i, die;
1113 1115
1114 pkg = topology_logical_package_id(old_cpu < 0 ? new_cpu : old_cpu); 1116 die = topology_logical_die_id(old_cpu < 0 ? new_cpu : old_cpu);
1115 for (i = 0; i < type->num_boxes; i++, pmu++) { 1117 for (i = 0; i < type->num_boxes; i++, pmu++) {
1116 box = pmu->boxes[pkg]; 1118 box = pmu->boxes[die];
1117 if (!box) 1119 if (!box)
1118 continue; 1120 continue;
1119 1121
@@ -1146,13 +1148,13 @@ static int uncore_event_cpu_offline(unsigned int cpu)
1146 struct intel_uncore_type *type, **types = uncore_msr_uncores; 1148 struct intel_uncore_type *type, **types = uncore_msr_uncores;
1147 struct intel_uncore_pmu *pmu; 1149 struct intel_uncore_pmu *pmu;
1148 struct intel_uncore_box *box; 1150 struct intel_uncore_box *box;
1149 int i, pkg, target; 1151 int i, die, target;
1150 1152
1151 /* Check if exiting cpu is used for collecting uncore events */ 1153 /* Check if exiting cpu is used for collecting uncore events */
1152 if (!cpumask_test_and_clear_cpu(cpu, &uncore_cpu_mask)) 1154 if (!cpumask_test_and_clear_cpu(cpu, &uncore_cpu_mask))
1153 goto unref; 1155 goto unref;
1154 /* Find a new cpu to collect uncore events */ 1156 /* Find a new cpu to collect uncore events */
1155 target = cpumask_any_but(topology_core_cpumask(cpu), cpu); 1157 target = cpumask_any_but(topology_die_cpumask(cpu), cpu);
1156 1158
1157 /* Migrate uncore events to the new target */ 1159 /* Migrate uncore events to the new target */
1158 if (target < nr_cpu_ids) 1160 if (target < nr_cpu_ids)
@@ -1165,12 +1167,12 @@ static int uncore_event_cpu_offline(unsigned int cpu)
1165 1167
1166unref: 1168unref:
1167 /* Clear the references */ 1169 /* Clear the references */
1168 pkg = topology_logical_package_id(cpu); 1170 die = topology_logical_die_id(cpu);
1169 for (; *types; types++) { 1171 for (; *types; types++) {
1170 type = *types; 1172 type = *types;
1171 pmu = type->pmus; 1173 pmu = type->pmus;
1172 for (i = 0; i < type->num_boxes; i++, pmu++) { 1174 for (i = 0; i < type->num_boxes; i++, pmu++) {
1173 box = pmu->boxes[pkg]; 1175 box = pmu->boxes[die];
1174 if (box && atomic_dec_return(&box->refcnt) == 0) 1176 if (box && atomic_dec_return(&box->refcnt) == 0)
1175 uncore_box_exit(box); 1177 uncore_box_exit(box);
1176 } 1178 }
@@ -1179,7 +1181,7 @@ unref:
1179} 1181}
1180 1182
1181static int allocate_boxes(struct intel_uncore_type **types, 1183static int allocate_boxes(struct intel_uncore_type **types,
1182 unsigned int pkg, unsigned int cpu) 1184 unsigned int die, unsigned int cpu)
1183{ 1185{
1184 struct intel_uncore_box *box, *tmp; 1186 struct intel_uncore_box *box, *tmp;
1185 struct intel_uncore_type *type; 1187 struct intel_uncore_type *type;
@@ -1192,20 +1194,20 @@ static int allocate_boxes(struct intel_uncore_type **types,
1192 type = *types; 1194 type = *types;
1193 pmu = type->pmus; 1195 pmu = type->pmus;
1194 for (i = 0; i < type->num_boxes; i++, pmu++) { 1196 for (i = 0; i < type->num_boxes; i++, pmu++) {
1195 if (pmu->boxes[pkg]) 1197 if (pmu->boxes[die])
1196 continue; 1198 continue;
1197 box = uncore_alloc_box(type, cpu_to_node(cpu)); 1199 box = uncore_alloc_box(type, cpu_to_node(cpu));
1198 if (!box) 1200 if (!box)
1199 goto cleanup; 1201 goto cleanup;
1200 box->pmu = pmu; 1202 box->pmu = pmu;
1201 box->pkgid = pkg; 1203 box->dieid = die;
1202 list_add(&box->active_list, &allocated); 1204 list_add(&box->active_list, &allocated);
1203 } 1205 }
1204 } 1206 }
1205 /* Install them in the pmus */ 1207 /* Install them in the pmus */
1206 list_for_each_entry_safe(box, tmp, &allocated, active_list) { 1208 list_for_each_entry_safe(box, tmp, &allocated, active_list) {
1207 list_del_init(&box->active_list); 1209 list_del_init(&box->active_list);
1208 box->pmu->boxes[pkg] = box; 1210 box->pmu->boxes[die] = box;
1209 } 1211 }
1210 return 0; 1212 return 0;
1211 1213
@@ -1222,10 +1224,10 @@ static int uncore_event_cpu_online(unsigned int cpu)
1222 struct intel_uncore_type *type, **types = uncore_msr_uncores; 1224 struct intel_uncore_type *type, **types = uncore_msr_uncores;
1223 struct intel_uncore_pmu *pmu; 1225 struct intel_uncore_pmu *pmu;
1224 struct intel_uncore_box *box; 1226 struct intel_uncore_box *box;
1225 int i, ret, pkg, target; 1227 int i, ret, die, target;
1226 1228
1227 pkg = topology_logical_package_id(cpu); 1229 die = topology_logical_die_id(cpu);
1228 ret = allocate_boxes(types, pkg, cpu); 1230 ret = allocate_boxes(types, die, cpu);
1229 if (ret) 1231 if (ret)
1230 return ret; 1232 return ret;
1231 1233
@@ -1233,7 +1235,7 @@ static int uncore_event_cpu_online(unsigned int cpu)
1233 type = *types; 1235 type = *types;
1234 pmu = type->pmus; 1236 pmu = type->pmus;
1235 for (i = 0; i < type->num_boxes; i++, pmu++) { 1237 for (i = 0; i < type->num_boxes; i++, pmu++) {
1236 box = pmu->boxes[pkg]; 1238 box = pmu->boxes[die];
1237 if (box && atomic_inc_return(&box->refcnt) == 1) 1239 if (box && atomic_inc_return(&box->refcnt) == 1)
1238 uncore_box_init(box); 1240 uncore_box_init(box);
1239 } 1241 }
@@ -1243,7 +1245,7 @@ static int uncore_event_cpu_online(unsigned int cpu)
1243 * Check if there is an online cpu in the package 1245 * Check if there is an online cpu in the package
1244 * which collects uncore events already. 1246 * which collects uncore events already.
1245 */ 1247 */
1246 target = cpumask_any_and(&uncore_cpu_mask, topology_core_cpumask(cpu)); 1248 target = cpumask_any_and(&uncore_cpu_mask, topology_die_cpumask(cpu));
1247 if (target < nr_cpu_ids) 1249 if (target < nr_cpu_ids)
1248 return 0; 1250 return 0;
1249 1251
@@ -1419,7 +1421,7 @@ static int __init intel_uncore_init(void)
1419 if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) 1421 if (boot_cpu_has(X86_FEATURE_HYPERVISOR))
1420 return -ENODEV; 1422 return -ENODEV;
1421 1423
1422 max_packages = topology_max_packages(); 1424 max_dies = topology_max_packages() * topology_max_die_per_package();
1423 1425
1424 uncore_init = (struct intel_uncore_init_fun *)id->driver_data; 1426 uncore_init = (struct intel_uncore_init_fun *)id->driver_data;
1425 if (uncore_init->pci_init) { 1427 if (uncore_init->pci_init) {
diff --git a/arch/x86/events/intel/uncore.h b/arch/x86/events/intel/uncore.h
index 79eb2e21e4f0..33aba2504cb1 100644
--- a/arch/x86/events/intel/uncore.h
+++ b/arch/x86/events/intel/uncore.h
@@ -108,7 +108,7 @@ struct intel_uncore_extra_reg {
108 108
109struct intel_uncore_box { 109struct intel_uncore_box {
110 int pci_phys_id; 110 int pci_phys_id;
111 int pkgid; /* Logical package ID */ 111 int dieid; /* Logical die ID */
112 int n_active; /* number of active events */ 112 int n_active; /* number of active events */
113 int n_events; 113 int n_events;
114 int cpu; /* cpu to collect events */ 114 int cpu; /* cpu to collect events */
@@ -467,7 +467,7 @@ static inline void uncore_box_exit(struct intel_uncore_box *box)
467 467
468static inline bool uncore_box_is_fake(struct intel_uncore_box *box) 468static inline bool uncore_box_is_fake(struct intel_uncore_box *box)
469{ 469{
470 return (box->pkgid < 0); 470 return (box->dieid < 0);
471} 471}
472 472
473static inline struct intel_uncore_pmu *uncore_event_to_pmu(struct perf_event *event) 473static inline struct intel_uncore_pmu *uncore_event_to_pmu(struct perf_event *event)
diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c
index b10e04387f38..bbe89bc589f9 100644
--- a/arch/x86/events/intel/uncore_snbep.c
+++ b/arch/x86/events/intel/uncore_snbep.c
@@ -1058,8 +1058,8 @@ static void snbep_qpi_enable_event(struct intel_uncore_box *box, struct perf_eve
1058 1058
1059 if (reg1->idx != EXTRA_REG_NONE) { 1059 if (reg1->idx != EXTRA_REG_NONE) {
1060 int idx = box->pmu->pmu_idx + SNBEP_PCI_QPI_PORT0_FILTER; 1060 int idx = box->pmu->pmu_idx + SNBEP_PCI_QPI_PORT0_FILTER;
1061 int pkg = box->pkgid; 1061 int die = box->dieid;
1062 struct pci_dev *filter_pdev = uncore_extra_pci_dev[pkg].dev[idx]; 1062 struct pci_dev *filter_pdev = uncore_extra_pci_dev[die].dev[idx];
1063 1063
1064 if (filter_pdev) { 1064 if (filter_pdev) {
1065 pci_write_config_dword(filter_pdev, reg1->reg, 1065 pci_write_config_dword(filter_pdev, reg1->reg,
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index e57d2ca2ed87..3eab6ece52b4 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -105,7 +105,7 @@ struct cpuinfo_x86 {
105 int x86_power; 105 int x86_power;
106 unsigned long loops_per_jiffy; 106 unsigned long loops_per_jiffy;
107 /* cpuid returned max cores value: */ 107 /* cpuid returned max cores value: */
108 u16 x86_max_cores; 108 u16 x86_max_cores;
109 u16 apicid; 109 u16 apicid;
110 u16 initial_apicid; 110 u16 initial_apicid;
111 u16 x86_clflush_size; 111 u16 x86_clflush_size;
@@ -117,6 +117,8 @@ struct cpuinfo_x86 {
117 u16 logical_proc_id; 117 u16 logical_proc_id;
118 /* Core id: */ 118 /* Core id: */
119 u16 cpu_core_id; 119 u16 cpu_core_id;
120 u16 cpu_die_id;
121 u16 logical_die_id;
120 /* Index into per_cpu list: */ 122 /* Index into per_cpu list: */
121 u16 cpu_index; 123 u16 cpu_index;
122 u32 microcode; 124 u32 microcode;
diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h
index 0d3fe060a44f..e1356a3b8223 100644
--- a/arch/x86/include/asm/smp.h
+++ b/arch/x86/include/asm/smp.h
@@ -23,6 +23,7 @@ extern unsigned int num_processors;
23 23
24DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_sibling_map); 24DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_sibling_map);
25DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_core_map); 25DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_core_map);
26DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_die_map);
26/* cpus sharing the last level cache: */ 27/* cpus sharing the last level cache: */
27DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_llc_shared_map); 28DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_llc_shared_map);
28DECLARE_PER_CPU_READ_MOSTLY(u16, cpu_llc_id); 29DECLARE_PER_CPU_READ_MOSTLY(u16, cpu_llc_id);
diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
index 453cf38a1c33..4b14d2318251 100644
--- a/arch/x86/include/asm/topology.h
+++ b/arch/x86/include/asm/topology.h
@@ -106,15 +106,25 @@ extern const struct cpumask *cpu_coregroup_mask(int cpu);
106 106
107#define topology_logical_package_id(cpu) (cpu_data(cpu).logical_proc_id) 107#define topology_logical_package_id(cpu) (cpu_data(cpu).logical_proc_id)
108#define topology_physical_package_id(cpu) (cpu_data(cpu).phys_proc_id) 108#define topology_physical_package_id(cpu) (cpu_data(cpu).phys_proc_id)
109#define topology_logical_die_id(cpu) (cpu_data(cpu).logical_die_id)
110#define topology_die_id(cpu) (cpu_data(cpu).cpu_die_id)
109#define topology_core_id(cpu) (cpu_data(cpu).cpu_core_id) 111#define topology_core_id(cpu) (cpu_data(cpu).cpu_core_id)
110 112
111#ifdef CONFIG_SMP 113#ifdef CONFIG_SMP
114#define topology_die_cpumask(cpu) (per_cpu(cpu_die_map, cpu))
112#define topology_core_cpumask(cpu) (per_cpu(cpu_core_map, cpu)) 115#define topology_core_cpumask(cpu) (per_cpu(cpu_core_map, cpu))
113#define topology_sibling_cpumask(cpu) (per_cpu(cpu_sibling_map, cpu)) 116#define topology_sibling_cpumask(cpu) (per_cpu(cpu_sibling_map, cpu))
114 117
115extern unsigned int __max_logical_packages; 118extern unsigned int __max_logical_packages;
116#define topology_max_packages() (__max_logical_packages) 119#define topology_max_packages() (__max_logical_packages)
117 120
121extern unsigned int __max_die_per_package;
122
123static inline int topology_max_die_per_package(void)
124{
125 return __max_die_per_package;
126}
127
118extern int __max_smt_threads; 128extern int __max_smt_threads;
119 129
120static inline int topology_max_smt_threads(void) 130static inline int topology_max_smt_threads(void)
@@ -123,14 +133,21 @@ static inline int topology_max_smt_threads(void)
123} 133}
124 134
125int topology_update_package_map(unsigned int apicid, unsigned int cpu); 135int topology_update_package_map(unsigned int apicid, unsigned int cpu);
136int topology_update_die_map(unsigned int dieid, unsigned int cpu);
126int topology_phys_to_logical_pkg(unsigned int pkg); 137int topology_phys_to_logical_pkg(unsigned int pkg);
138int topology_phys_to_logical_die(unsigned int die, unsigned int cpu);
127bool topology_is_primary_thread(unsigned int cpu); 139bool topology_is_primary_thread(unsigned int cpu);
128bool topology_smt_supported(void); 140bool topology_smt_supported(void);
129#else 141#else
130#define topology_max_packages() (1) 142#define topology_max_packages() (1)
131static inline int 143static inline int
132topology_update_package_map(unsigned int apicid, unsigned int cpu) { return 0; } 144topology_update_package_map(unsigned int apicid, unsigned int cpu) { return 0; }
145static inline int
146topology_update_die_map(unsigned int dieid, unsigned int cpu) { return 0; }
133static inline int topology_phys_to_logical_pkg(unsigned int pkg) { return 0; } 147static inline int topology_phys_to_logical_pkg(unsigned int pkg) { return 0; }
148static inline int topology_phys_to_logical_die(unsigned int die,
149 unsigned int cpu) { return 0; }
150static inline int topology_max_die_per_package(void) { return 1; }
134static inline int topology_max_smt_threads(void) { return 1; } 151static inline int topology_max_smt_threads(void) { return 1; }
135static inline bool topology_is_primary_thread(unsigned int cpu) { return true; } 152static inline bool topology_is_primary_thread(unsigned int cpu) { return true; }
136static inline bool topology_smt_supported(void) { return false; } 153static inline bool topology_smt_supported(void) { return false; }
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 8febe90470f4..309b6b9b49d4 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1322,6 +1322,7 @@ static void validate_apic_and_package_id(struct cpuinfo_x86 *c)
1322 cpu, apicid, c->initial_apicid); 1322 cpu, apicid, c->initial_apicid);
1323 } 1323 }
1324 BUG_ON(topology_update_package_map(c->phys_proc_id, cpu)); 1324 BUG_ON(topology_update_package_map(c->phys_proc_id, cpu));
1325 BUG_ON(topology_update_die_map(c->cpu_die_id, cpu));
1325#else 1326#else
1326 c->logical_proc_id = 0; 1327 c->logical_proc_id = 0;
1327#endif 1328#endif
diff --git a/arch/x86/kernel/cpu/topology.c b/arch/x86/kernel/cpu/topology.c
index 8f6c784141d1..ee48c3fc8a65 100644
--- a/arch/x86/kernel/cpu/topology.c
+++ b/arch/x86/kernel/cpu/topology.c
@@ -15,33 +15,66 @@
15/* leaf 0xb SMT level */ 15/* leaf 0xb SMT level */
16#define SMT_LEVEL 0 16#define SMT_LEVEL 0
17 17
18/* leaf 0xb sub-leaf types */ 18/* extended topology sub-leaf types */
19#define INVALID_TYPE 0 19#define INVALID_TYPE 0
20#define SMT_TYPE 1 20#define SMT_TYPE 1
21#define CORE_TYPE 2 21#define CORE_TYPE 2
22#define DIE_TYPE 5
22 23
23#define LEAFB_SUBTYPE(ecx) (((ecx) >> 8) & 0xff) 24#define LEAFB_SUBTYPE(ecx) (((ecx) >> 8) & 0xff)
24#define BITS_SHIFT_NEXT_LEVEL(eax) ((eax) & 0x1f) 25#define BITS_SHIFT_NEXT_LEVEL(eax) ((eax) & 0x1f)
25#define LEVEL_MAX_SIBLINGS(ebx) ((ebx) & 0xffff) 26#define LEVEL_MAX_SIBLINGS(ebx) ((ebx) & 0xffff)
26 27
27int detect_extended_topology_early(struct cpuinfo_x86 *c)
28{
29#ifdef CONFIG_SMP 28#ifdef CONFIG_SMP
29unsigned int __max_die_per_package __read_mostly = 1;
30EXPORT_SYMBOL(__max_die_per_package);
31
32/*
33 * Check if given CPUID extended toplogy "leaf" is implemented
34 */
35static int check_extended_topology_leaf(int leaf)
36{
30 unsigned int eax, ebx, ecx, edx; 37 unsigned int eax, ebx, ecx, edx;
31 38
32 if (c->cpuid_level < 0xb) 39 cpuid_count(leaf, SMT_LEVEL, &eax, &ebx, &ecx, &edx);
40
41 if (ebx == 0 || (LEAFB_SUBTYPE(ecx) != SMT_TYPE))
33 return -1; 42 return -1;
34 43
35 cpuid_count(0xb, SMT_LEVEL, &eax, &ebx, &ecx, &edx); 44 return 0;
45}
46/*
47 * Return best CPUID Extended Toplogy Leaf supported
48 */
49static int detect_extended_topology_leaf(struct cpuinfo_x86 *c)
50{
51 if (c->cpuid_level >= 0x1f) {
52 if (check_extended_topology_leaf(0x1f) == 0)
53 return 0x1f;
54 }
36 55
37 /* 56 if (c->cpuid_level >= 0xb) {
38 * check if the cpuid leaf 0xb is actually implemented. 57 if (check_extended_topology_leaf(0xb) == 0)
39 */ 58 return 0xb;
40 if (ebx == 0 || (LEAFB_SUBTYPE(ecx) != SMT_TYPE)) 59 }
60
61 return -1;
62}
63#endif
64
65int detect_extended_topology_early(struct cpuinfo_x86 *c)
66{
67#ifdef CONFIG_SMP
68 unsigned int eax, ebx, ecx, edx;
69 int leaf;
70
71 leaf = detect_extended_topology_leaf(c);
72 if (leaf < 0)
41 return -1; 73 return -1;
42 74
43 set_cpu_cap(c, X86_FEATURE_XTOPOLOGY); 75 set_cpu_cap(c, X86_FEATURE_XTOPOLOGY);
44 76
77 cpuid_count(leaf, SMT_LEVEL, &eax, &ebx, &ecx, &edx);
45 /* 78 /*
46 * initial apic id, which also represents 32-bit extended x2apic id. 79 * initial apic id, which also represents 32-bit extended x2apic id.
47 */ 80 */
@@ -52,7 +85,7 @@ int detect_extended_topology_early(struct cpuinfo_x86 *c)
52} 85}
53 86
54/* 87/*
55 * Check for extended topology enumeration cpuid leaf 0xb and if it 88 * Check for extended topology enumeration cpuid leaf, and if it
56 * exists, use it for populating initial_apicid and cpu topology 89 * exists, use it for populating initial_apicid and cpu topology
57 * detection. 90 * detection.
58 */ 91 */
@@ -60,22 +93,28 @@ int detect_extended_topology(struct cpuinfo_x86 *c)
60{ 93{
61#ifdef CONFIG_SMP 94#ifdef CONFIG_SMP
62 unsigned int eax, ebx, ecx, edx, sub_index; 95 unsigned int eax, ebx, ecx, edx, sub_index;
63 unsigned int ht_mask_width, core_plus_mask_width; 96 unsigned int ht_mask_width, core_plus_mask_width, die_plus_mask_width;
64 unsigned int core_select_mask, core_level_siblings; 97 unsigned int core_select_mask, core_level_siblings;
98 unsigned int die_select_mask, die_level_siblings;
99 int leaf;
65 100
66 if (detect_extended_topology_early(c) < 0) 101 leaf = detect_extended_topology_leaf(c);
102 if (leaf < 0)
67 return -1; 103 return -1;
68 104
69 /* 105 /*
70 * Populate HT related information from sub-leaf level 0. 106 * Populate HT related information from sub-leaf level 0.
71 */ 107 */
72 cpuid_count(0xb, SMT_LEVEL, &eax, &ebx, &ecx, &edx); 108 cpuid_count(leaf, SMT_LEVEL, &eax, &ebx, &ecx, &edx);
109 c->initial_apicid = edx;
73 core_level_siblings = smp_num_siblings = LEVEL_MAX_SIBLINGS(ebx); 110 core_level_siblings = smp_num_siblings = LEVEL_MAX_SIBLINGS(ebx);
74 core_plus_mask_width = ht_mask_width = BITS_SHIFT_NEXT_LEVEL(eax); 111 core_plus_mask_width = ht_mask_width = BITS_SHIFT_NEXT_LEVEL(eax);
112 die_level_siblings = LEVEL_MAX_SIBLINGS(ebx);
113 die_plus_mask_width = BITS_SHIFT_NEXT_LEVEL(eax);
75 114
76 sub_index = 1; 115 sub_index = 1;
77 do { 116 do {
78 cpuid_count(0xb, sub_index, &eax, &ebx, &ecx, &edx); 117 cpuid_count(leaf, sub_index, &eax, &ebx, &ecx, &edx);
79 118
80 /* 119 /*
81 * Check for the Core type in the implemented sub leaves. 120 * Check for the Core type in the implemented sub leaves.
@@ -83,23 +122,34 @@ int detect_extended_topology(struct cpuinfo_x86 *c)
83 if (LEAFB_SUBTYPE(ecx) == CORE_TYPE) { 122 if (LEAFB_SUBTYPE(ecx) == CORE_TYPE) {
84 core_level_siblings = LEVEL_MAX_SIBLINGS(ebx); 123 core_level_siblings = LEVEL_MAX_SIBLINGS(ebx);
85 core_plus_mask_width = BITS_SHIFT_NEXT_LEVEL(eax); 124 core_plus_mask_width = BITS_SHIFT_NEXT_LEVEL(eax);
86 break; 125 die_level_siblings = core_level_siblings;
126 die_plus_mask_width = BITS_SHIFT_NEXT_LEVEL(eax);
127 }
128 if (LEAFB_SUBTYPE(ecx) == DIE_TYPE) {
129 die_level_siblings = LEVEL_MAX_SIBLINGS(ebx);
130 die_plus_mask_width = BITS_SHIFT_NEXT_LEVEL(eax);
87 } 131 }
88 132
89 sub_index++; 133 sub_index++;
90 } while (LEAFB_SUBTYPE(ecx) != INVALID_TYPE); 134 } while (LEAFB_SUBTYPE(ecx) != INVALID_TYPE);
91 135
92 core_select_mask = (~(-1 << core_plus_mask_width)) >> ht_mask_width; 136 core_select_mask = (~(-1 << core_plus_mask_width)) >> ht_mask_width;
93 137 die_select_mask = (~(-1 << die_plus_mask_width)) >>
94 c->cpu_core_id = apic->phys_pkg_id(c->initial_apicid, ht_mask_width) 138 core_plus_mask_width;
95 & core_select_mask; 139
96 c->phys_proc_id = apic->phys_pkg_id(c->initial_apicid, core_plus_mask_width); 140 c->cpu_core_id = apic->phys_pkg_id(c->initial_apicid,
141 ht_mask_width) & core_select_mask;
142 c->cpu_die_id = apic->phys_pkg_id(c->initial_apicid,
143 core_plus_mask_width) & die_select_mask;
144 c->phys_proc_id = apic->phys_pkg_id(c->initial_apicid,
145 die_plus_mask_width);
97 /* 146 /*
98 * Reinit the apicid, now that we have extended initial_apicid. 147 * Reinit the apicid, now that we have extended initial_apicid.
99 */ 148 */
100 c->apicid = apic->phys_pkg_id(c->initial_apicid, 0); 149 c->apicid = apic->phys_pkg_id(c->initial_apicid, 0);
101 150
102 c->x86_max_cores = (core_level_siblings / smp_num_siblings); 151 c->x86_max_cores = (core_level_siblings / smp_num_siblings);
152 __max_die_per_package = (die_level_siblings / core_level_siblings);
103#endif 153#endif
104 return 0; 154 return 0;
105} 155}
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 1af7a2d89419..f78801114ee1 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -89,6 +89,10 @@ EXPORT_PER_CPU_SYMBOL(cpu_sibling_map);
89DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_core_map); 89DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_core_map);
90EXPORT_PER_CPU_SYMBOL(cpu_core_map); 90EXPORT_PER_CPU_SYMBOL(cpu_core_map);
91 91
92/* representing HT, core, and die siblings of each logical CPU */
93DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_die_map);
94EXPORT_PER_CPU_SYMBOL(cpu_die_map);
95
92DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_llc_shared_map); 96DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_llc_shared_map);
93 97
94/* Per CPU bogomips and other parameters */ 98/* Per CPU bogomips and other parameters */
@@ -99,6 +103,7 @@ EXPORT_PER_CPU_SYMBOL(cpu_info);
99unsigned int __max_logical_packages __read_mostly; 103unsigned int __max_logical_packages __read_mostly;
100EXPORT_SYMBOL(__max_logical_packages); 104EXPORT_SYMBOL(__max_logical_packages);
101static unsigned int logical_packages __read_mostly; 105static unsigned int logical_packages __read_mostly;
106static unsigned int logical_die __read_mostly;
102 107
103/* Maximum number of SMT threads on any online core */ 108/* Maximum number of SMT threads on any online core */
104int __read_mostly __max_smt_threads = 1; 109int __read_mostly __max_smt_threads = 1;
@@ -306,6 +311,26 @@ int topology_phys_to_logical_pkg(unsigned int phys_pkg)
306 return -1; 311 return -1;
307} 312}
308EXPORT_SYMBOL(topology_phys_to_logical_pkg); 313EXPORT_SYMBOL(topology_phys_to_logical_pkg);
314/**
315 * topology_phys_to_logical_die - Map a physical die id to logical
316 *
317 * Returns logical die id or -1 if not found
318 */
319int topology_phys_to_logical_die(unsigned int die_id, unsigned int cur_cpu)
320{
321 int cpu;
322 int proc_id = cpu_data(cur_cpu).phys_proc_id;
323
324 for_each_possible_cpu(cpu) {
325 struct cpuinfo_x86 *c = &cpu_data(cpu);
326
327 if (c->initialized && c->cpu_die_id == die_id &&
328 c->phys_proc_id == proc_id)
329 return c->logical_die_id;
330 }
331 return -1;
332}
333EXPORT_SYMBOL(topology_phys_to_logical_die);
309 334
310/** 335/**
311 * topology_update_package_map - Update the physical to logical package map 336 * topology_update_package_map - Update the physical to logical package map
@@ -330,6 +355,29 @@ found:
330 cpu_data(cpu).logical_proc_id = new; 355 cpu_data(cpu).logical_proc_id = new;
331 return 0; 356 return 0;
332} 357}
358/**
359 * topology_update_die_map - Update the physical to logical die map
360 * @die: The die id as retrieved via CPUID
361 * @cpu: The cpu for which this is updated
362 */
363int topology_update_die_map(unsigned int die, unsigned int cpu)
364{
365 int new;
366
367 /* Already available somewhere? */
368 new = topology_phys_to_logical_die(die, cpu);
369 if (new >= 0)
370 goto found;
371
372 new = logical_die++;
373 if (new != die) {
374 pr_info("CPU %u Converting physical %u to logical die %u\n",
375 cpu, die, new);
376 }
377found:
378 cpu_data(cpu).logical_die_id = new;
379 return 0;
380}
333 381
334void __init smp_store_boot_cpu_info(void) 382void __init smp_store_boot_cpu_info(void)
335{ 383{
@@ -339,6 +387,7 @@ void __init smp_store_boot_cpu_info(void)
339 *c = boot_cpu_data; 387 *c = boot_cpu_data;
340 c->cpu_index = id; 388 c->cpu_index = id;
341 topology_update_package_map(c->phys_proc_id, id); 389 topology_update_package_map(c->phys_proc_id, id);
390 topology_update_die_map(c->cpu_die_id, id);
342 c->initialized = true; 391 c->initialized = true;
343} 392}
344 393
@@ -393,6 +442,7 @@ static bool match_smt(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
393 int cpu1 = c->cpu_index, cpu2 = o->cpu_index; 442 int cpu1 = c->cpu_index, cpu2 = o->cpu_index;
394 443
395 if (c->phys_proc_id == o->phys_proc_id && 444 if (c->phys_proc_id == o->phys_proc_id &&
445 c->cpu_die_id == o->cpu_die_id &&
396 per_cpu(cpu_llc_id, cpu1) == per_cpu(cpu_llc_id, cpu2)) { 446 per_cpu(cpu_llc_id, cpu1) == per_cpu(cpu_llc_id, cpu2)) {
397 if (c->cpu_core_id == o->cpu_core_id) 447 if (c->cpu_core_id == o->cpu_core_id)
398 return topology_sane(c, o, "smt"); 448 return topology_sane(c, o, "smt");
@@ -404,6 +454,7 @@ static bool match_smt(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
404 } 454 }
405 455
406 } else if (c->phys_proc_id == o->phys_proc_id && 456 } else if (c->phys_proc_id == o->phys_proc_id &&
457 c->cpu_die_id == o->cpu_die_id &&
407 c->cpu_core_id == o->cpu_core_id) { 458 c->cpu_core_id == o->cpu_core_id) {
408 return topology_sane(c, o, "smt"); 459 return topology_sane(c, o, "smt");
409 } 460 }
@@ -466,6 +517,15 @@ static bool match_pkg(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
466 return false; 517 return false;
467} 518}
468 519
520static bool match_die(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
521{
522 if ((c->phys_proc_id == o->phys_proc_id) &&
523 (c->cpu_die_id == o->cpu_die_id))
524 return true;
525 return false;
526}
527
528
469#if defined(CONFIG_SCHED_SMT) || defined(CONFIG_SCHED_MC) 529#if defined(CONFIG_SCHED_SMT) || defined(CONFIG_SCHED_MC)
470static inline int x86_sched_itmt_flags(void) 530static inline int x86_sched_itmt_flags(void)
471{ 531{
@@ -528,6 +588,7 @@ void set_cpu_sibling_map(int cpu)
528 cpumask_set_cpu(cpu, topology_sibling_cpumask(cpu)); 588 cpumask_set_cpu(cpu, topology_sibling_cpumask(cpu));
529 cpumask_set_cpu(cpu, cpu_llc_shared_mask(cpu)); 589 cpumask_set_cpu(cpu, cpu_llc_shared_mask(cpu));
530 cpumask_set_cpu(cpu, topology_core_cpumask(cpu)); 590 cpumask_set_cpu(cpu, topology_core_cpumask(cpu));
591 cpumask_set_cpu(cpu, topology_die_cpumask(cpu));
531 c->booted_cores = 1; 592 c->booted_cores = 1;
532 return; 593 return;
533 } 594 }
@@ -576,6 +637,9 @@ void set_cpu_sibling_map(int cpu)
576 } 637 }
577 if (match_pkg(c, o) && !topology_same_node(c, o)) 638 if (match_pkg(c, o) && !topology_same_node(c, o))
578 x86_has_numa_in_package = true; 639 x86_has_numa_in_package = true;
640
641 if ((i == cpu) || (has_mp && match_die(c, o)))
642 link_mask(topology_die_cpumask, cpu, i);
579 } 643 }
580 644
581 threads = cpumask_weight(topology_sibling_cpumask(cpu)); 645 threads = cpumask_weight(topology_sibling_cpumask(cpu));
@@ -1180,6 +1244,7 @@ static __init void disable_smp(void)
1180 physid_set_mask_of_physid(0, &phys_cpu_present_map); 1244 physid_set_mask_of_physid(0, &phys_cpu_present_map);
1181 cpumask_set_cpu(0, topology_sibling_cpumask(0)); 1245 cpumask_set_cpu(0, topology_sibling_cpumask(0));
1182 cpumask_set_cpu(0, topology_core_cpumask(0)); 1246 cpumask_set_cpu(0, topology_core_cpumask(0));
1247 cpumask_set_cpu(0, topology_die_cpumask(0));
1183} 1248}
1184 1249
1185/* 1250/*
@@ -1275,6 +1340,7 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
1275 for_each_possible_cpu(i) { 1340 for_each_possible_cpu(i) {
1276 zalloc_cpumask_var(&per_cpu(cpu_sibling_map, i), GFP_KERNEL); 1341 zalloc_cpumask_var(&per_cpu(cpu_sibling_map, i), GFP_KERNEL);
1277 zalloc_cpumask_var(&per_cpu(cpu_core_map, i), GFP_KERNEL); 1342 zalloc_cpumask_var(&per_cpu(cpu_core_map, i), GFP_KERNEL);
1343 zalloc_cpumask_var(&per_cpu(cpu_die_map, i), GFP_KERNEL);
1278 zalloc_cpumask_var(&per_cpu(cpu_llc_shared_map, i), GFP_KERNEL); 1344 zalloc_cpumask_var(&per_cpu(cpu_llc_shared_map, i), GFP_KERNEL);
1279 } 1345 }
1280 1346
@@ -1495,6 +1561,8 @@ static void remove_siblinginfo(int cpu)
1495 cpu_data(sibling).booted_cores--; 1561 cpu_data(sibling).booted_cores--;
1496 } 1562 }
1497 1563
1564 for_each_cpu(sibling, topology_die_cpumask(cpu))
1565 cpumask_clear_cpu(cpu, topology_die_cpumask(sibling));
1498 for_each_cpu(sibling, topology_sibling_cpumask(cpu)) 1566 for_each_cpu(sibling, topology_sibling_cpumask(cpu))
1499 cpumask_clear_cpu(cpu, topology_sibling_cpumask(sibling)); 1567 cpumask_clear_cpu(cpu, topology_sibling_cpumask(sibling));
1500 for_each_cpu(sibling, cpu_llc_shared_mask(cpu)) 1568 for_each_cpu(sibling, cpu_llc_shared_mask(cpu))
@@ -1502,6 +1570,7 @@ static void remove_siblinginfo(int cpu)
1502 cpumask_clear(cpu_llc_shared_mask(cpu)); 1570 cpumask_clear(cpu_llc_shared_mask(cpu));
1503 cpumask_clear(topology_sibling_cpumask(cpu)); 1571 cpumask_clear(topology_sibling_cpumask(cpu));
1504 cpumask_clear(topology_core_cpumask(cpu)); 1572 cpumask_clear(topology_core_cpumask(cpu));
1573 cpumask_clear(topology_die_cpumask(cpu));
1505 c->cpu_core_id = 0; 1574 c->cpu_core_id = 0;
1506 c->booted_cores = 0; 1575 c->booted_cores = 0;
1507 cpumask_clear_cpu(cpu, cpu_sibling_setup_mask); 1576 cpumask_clear_cpu(cpu, cpu_sibling_setup_mask);
diff --git a/arch/x86/xen/smp_pv.c b/arch/x86/xen/smp_pv.c
index 590fcf863006..77d81c1a63e9 100644
--- a/arch/x86/xen/smp_pv.c
+++ b/arch/x86/xen/smp_pv.c
@@ -251,6 +251,7 @@ static void __init xen_pv_smp_prepare_cpus(unsigned int max_cpus)
251 for_each_possible_cpu(i) { 251 for_each_possible_cpu(i) {
252 zalloc_cpumask_var(&per_cpu(cpu_sibling_map, i), GFP_KERNEL); 252 zalloc_cpumask_var(&per_cpu(cpu_sibling_map, i), GFP_KERNEL);
253 zalloc_cpumask_var(&per_cpu(cpu_core_map, i), GFP_KERNEL); 253 zalloc_cpumask_var(&per_cpu(cpu_core_map, i), GFP_KERNEL);
254 zalloc_cpumask_var(&per_cpu(cpu_die_map, i), GFP_KERNEL);
254 zalloc_cpumask_var(&per_cpu(cpu_llc_shared_map, i), GFP_KERNEL); 255 zalloc_cpumask_var(&per_cpu(cpu_llc_shared_map, i), GFP_KERNEL);
255 } 256 }
256 set_cpu_sibling_map(0); 257 set_cpu_sibling_map(0);
diff --git a/drivers/base/topology.c b/drivers/base/topology.c
index 5fd9f167ecc1..4e033d4cc0dc 100644
--- a/drivers/base/topology.c
+++ b/drivers/base/topology.c
@@ -43,6 +43,9 @@ static ssize_t name##_list_show(struct device *dev, \
43define_id_show_func(physical_package_id); 43define_id_show_func(physical_package_id);
44static DEVICE_ATTR_RO(physical_package_id); 44static DEVICE_ATTR_RO(physical_package_id);
45 45
46define_id_show_func(die_id);
47static DEVICE_ATTR_RO(die_id);
48
46define_id_show_func(core_id); 49define_id_show_func(core_id);
47static DEVICE_ATTR_RO(core_id); 50static DEVICE_ATTR_RO(core_id);
48 51
@@ -50,10 +53,22 @@ define_siblings_show_func(thread_siblings, sibling_cpumask);
50static DEVICE_ATTR_RO(thread_siblings); 53static DEVICE_ATTR_RO(thread_siblings);
51static DEVICE_ATTR_RO(thread_siblings_list); 54static DEVICE_ATTR_RO(thread_siblings_list);
52 55
56define_siblings_show_func(core_cpus, sibling_cpumask);
57static DEVICE_ATTR_RO(core_cpus);
58static DEVICE_ATTR_RO(core_cpus_list);
59
53define_siblings_show_func(core_siblings, core_cpumask); 60define_siblings_show_func(core_siblings, core_cpumask);
54static DEVICE_ATTR_RO(core_siblings); 61static DEVICE_ATTR_RO(core_siblings);
55static DEVICE_ATTR_RO(core_siblings_list); 62static DEVICE_ATTR_RO(core_siblings_list);
56 63
64define_siblings_show_func(die_cpus, die_cpumask);
65static DEVICE_ATTR_RO(die_cpus);
66static DEVICE_ATTR_RO(die_cpus_list);
67
68define_siblings_show_func(package_cpus, core_cpumask);
69static DEVICE_ATTR_RO(package_cpus);
70static DEVICE_ATTR_RO(package_cpus_list);
71
57#ifdef CONFIG_SCHED_BOOK 72#ifdef CONFIG_SCHED_BOOK
58define_id_show_func(book_id); 73define_id_show_func(book_id);
59static DEVICE_ATTR_RO(book_id); 74static DEVICE_ATTR_RO(book_id);
@@ -72,11 +87,18 @@ static DEVICE_ATTR_RO(drawer_siblings_list);
72 87
73static struct attribute *default_attrs[] = { 88static struct attribute *default_attrs[] = {
74 &dev_attr_physical_package_id.attr, 89 &dev_attr_physical_package_id.attr,
90 &dev_attr_die_id.attr,
75 &dev_attr_core_id.attr, 91 &dev_attr_core_id.attr,
76 &dev_attr_thread_siblings.attr, 92 &dev_attr_thread_siblings.attr,
77 &dev_attr_thread_siblings_list.attr, 93 &dev_attr_thread_siblings_list.attr,
94 &dev_attr_core_cpus.attr,
95 &dev_attr_core_cpus_list.attr,
78 &dev_attr_core_siblings.attr, 96 &dev_attr_core_siblings.attr,
79 &dev_attr_core_siblings_list.attr, 97 &dev_attr_core_siblings_list.attr,
98 &dev_attr_die_cpus.attr,
99 &dev_attr_die_cpus_list.attr,
100 &dev_attr_package_cpus.attr,
101 &dev_attr_package_cpus_list.attr,
80#ifdef CONFIG_SCHED_BOOK 102#ifdef CONFIG_SCHED_BOOK
81 &dev_attr_book_id.attr, 103 &dev_attr_book_id.attr,
82 &dev_attr_book_siblings.attr, 104 &dev_attr_book_siblings.attr,
diff --git a/drivers/hwmon/coretemp.c b/drivers/hwmon/coretemp.c
index 4d0d6c86c12f..fe6618e49dc4 100644
--- a/drivers/hwmon/coretemp.c
+++ b/drivers/hwmon/coretemp.c
@@ -96,10 +96,10 @@ struct platform_data {
96 struct device_attribute name_attr; 96 struct device_attribute name_attr;
97}; 97};
98 98
99/* Keep track of how many package pointers we allocated in init() */ 99/* Keep track of how many zone pointers we allocated in init() */
100static int max_packages __read_mostly; 100static int max_zones __read_mostly;
101/* Array of package pointers. Serialized by cpu hotplug lock */ 101/* Array of zone pointers. Serialized by cpu hotplug lock */
102static struct platform_device **pkg_devices; 102static struct platform_device **zone_devices;
103 103
104static ssize_t show_label(struct device *dev, 104static ssize_t show_label(struct device *dev,
105 struct device_attribute *devattr, char *buf) 105 struct device_attribute *devattr, char *buf)
@@ -422,10 +422,10 @@ static int chk_ucode_version(unsigned int cpu)
422 422
423static struct platform_device *coretemp_get_pdev(unsigned int cpu) 423static struct platform_device *coretemp_get_pdev(unsigned int cpu)
424{ 424{
425 int pkgid = topology_logical_package_id(cpu); 425 int id = topology_logical_die_id(cpu);
426 426
427 if (pkgid >= 0 && pkgid < max_packages) 427 if (id >= 0 && id < max_zones)
428 return pkg_devices[pkgid]; 428 return zone_devices[id];
429 return NULL; 429 return NULL;
430} 430}
431 431
@@ -531,7 +531,7 @@ static int coretemp_probe(struct platform_device *pdev)
531 struct device *dev = &pdev->dev; 531 struct device *dev = &pdev->dev;
532 struct platform_data *pdata; 532 struct platform_data *pdata;
533 533
534 /* Initialize the per-package data structures */ 534 /* Initialize the per-zone data structures */
535 pdata = devm_kzalloc(dev, sizeof(struct platform_data), GFP_KERNEL); 535 pdata = devm_kzalloc(dev, sizeof(struct platform_data), GFP_KERNEL);
536 if (!pdata) 536 if (!pdata)
537 return -ENOMEM; 537 return -ENOMEM;
@@ -566,13 +566,13 @@ static struct platform_driver coretemp_driver = {
566 566
567static struct platform_device *coretemp_device_add(unsigned int cpu) 567static struct platform_device *coretemp_device_add(unsigned int cpu)
568{ 568{
569 int err, pkgid = topology_logical_package_id(cpu); 569 int err, zoneid = topology_logical_die_id(cpu);
570 struct platform_device *pdev; 570 struct platform_device *pdev;
571 571
572 if (pkgid < 0) 572 if (zoneid < 0)
573 return ERR_PTR(-ENOMEM); 573 return ERR_PTR(-ENOMEM);
574 574
575 pdev = platform_device_alloc(DRVNAME, pkgid); 575 pdev = platform_device_alloc(DRVNAME, zoneid);
576 if (!pdev) 576 if (!pdev)
577 return ERR_PTR(-ENOMEM); 577 return ERR_PTR(-ENOMEM);
578 578
@@ -582,7 +582,7 @@ static struct platform_device *coretemp_device_add(unsigned int cpu)
582 return ERR_PTR(err); 582 return ERR_PTR(err);
583 } 583 }
584 584
585 pkg_devices[pkgid] = pdev; 585 zone_devices[zoneid] = pdev;
586 return pdev; 586 return pdev;
587} 587}
588 588
@@ -690,7 +690,7 @@ static int coretemp_cpu_offline(unsigned int cpu)
690 * the rest. 690 * the rest.
691 */ 691 */
692 if (cpumask_empty(&pd->cpumask)) { 692 if (cpumask_empty(&pd->cpumask)) {
693 pkg_devices[topology_logical_package_id(cpu)] = NULL; 693 zone_devices[topology_logical_die_id(cpu)] = NULL;
694 platform_device_unregister(pdev); 694 platform_device_unregister(pdev);
695 return 0; 695 return 0;
696 } 696 }
@@ -728,10 +728,10 @@ static int __init coretemp_init(void)
728 if (!x86_match_cpu(coretemp_ids)) 728 if (!x86_match_cpu(coretemp_ids))
729 return -ENODEV; 729 return -ENODEV;
730 730
731 max_packages = topology_max_packages(); 731 max_zones = topology_max_packages() * topology_max_die_per_package();
732 pkg_devices = kcalloc(max_packages, sizeof(struct platform_device *), 732 zone_devices = kcalloc(max_zones, sizeof(struct platform_device *),
733 GFP_KERNEL); 733 GFP_KERNEL);
734 if (!pkg_devices) 734 if (!zone_devices)
735 return -ENOMEM; 735 return -ENOMEM;
736 736
737 err = platform_driver_register(&coretemp_driver); 737 err = platform_driver_register(&coretemp_driver);
@@ -747,7 +747,7 @@ static int __init coretemp_init(void)
747 747
748outdrv: 748outdrv:
749 platform_driver_unregister(&coretemp_driver); 749 platform_driver_unregister(&coretemp_driver);
750 kfree(pkg_devices); 750 kfree(zone_devices);
751 return err; 751 return err;
752} 752}
753module_init(coretemp_init) 753module_init(coretemp_init)
@@ -756,7 +756,7 @@ static void __exit coretemp_exit(void)
756{ 756{
757 cpuhp_remove_state(coretemp_hp_online); 757 cpuhp_remove_state(coretemp_hp_online);
758 platform_driver_unregister(&coretemp_driver); 758 platform_driver_unregister(&coretemp_driver);
759 kfree(pkg_devices); 759 kfree(zone_devices);
760} 760}
761module_exit(coretemp_exit) 761module_exit(coretemp_exit)
762 762
diff --git a/drivers/powercap/intel_rapl.c b/drivers/powercap/intel_rapl.c
index f888117b0efc..8692f6b79f93 100644
--- a/drivers/powercap/intel_rapl.c
+++ b/drivers/powercap/intel_rapl.c
@@ -166,12 +166,15 @@ struct rapl_domain {
166#define power_zone_to_rapl_domain(_zone) \ 166#define power_zone_to_rapl_domain(_zone) \
167 container_of(_zone, struct rapl_domain, power_zone) 167 container_of(_zone, struct rapl_domain, power_zone)
168 168
169/* maximum rapl package domain name: package-%d-die-%d */
170#define PACKAGE_DOMAIN_NAME_LENGTH 30
169 171
170/* Each physical package contains multiple domains, these are the common 172
173/* Each rapl package contains multiple domains, these are the common
171 * data across RAPL domains within a package. 174 * data across RAPL domains within a package.
172 */ 175 */
173struct rapl_package { 176struct rapl_package {
174 unsigned int id; /* physical package/socket id */ 177 unsigned int id; /* logical die id, equals physical 1-die systems */
175 unsigned int nr_domains; 178 unsigned int nr_domains;
176 unsigned long domain_map; /* bit map of active domains */ 179 unsigned long domain_map; /* bit map of active domains */
177 unsigned int power_unit; 180 unsigned int power_unit;
@@ -186,6 +189,7 @@ struct rapl_package {
186 int lead_cpu; /* one active cpu per package for access */ 189 int lead_cpu; /* one active cpu per package for access */
187 /* Track active cpus */ 190 /* Track active cpus */
188 struct cpumask cpumask; 191 struct cpumask cpumask;
192 char name[PACKAGE_DOMAIN_NAME_LENGTH];
189}; 193};
190 194
191struct rapl_defaults { 195struct rapl_defaults {
@@ -252,8 +256,9 @@ static struct powercap_control_type *control_type; /* PowerCap Controller */
252static struct rapl_domain *platform_rapl_domain; /* Platform (PSys) domain */ 256static struct rapl_domain *platform_rapl_domain; /* Platform (PSys) domain */
253 257
254/* caller to ensure CPU hotplug lock is held */ 258/* caller to ensure CPU hotplug lock is held */
255static struct rapl_package *find_package_by_id(int id) 259static struct rapl_package *rapl_find_package_domain(int cpu)
256{ 260{
261 int id = topology_logical_die_id(cpu);
257 struct rapl_package *rp; 262 struct rapl_package *rp;
258 263
259 list_for_each_entry(rp, &rapl_packages, plist) { 264 list_for_each_entry(rp, &rapl_packages, plist) {
@@ -913,8 +918,8 @@ static int rapl_check_unit_core(struct rapl_package *rp, int cpu)
913 value = (msr_val & TIME_UNIT_MASK) >> TIME_UNIT_OFFSET; 918 value = (msr_val & TIME_UNIT_MASK) >> TIME_UNIT_OFFSET;
914 rp->time_unit = 1000000 / (1 << value); 919 rp->time_unit = 1000000 / (1 << value);
915 920
916 pr_debug("Core CPU package %d energy=%dpJ, time=%dus, power=%duW\n", 921 pr_debug("Core CPU %s energy=%dpJ, time=%dus, power=%duW\n",
917 rp->id, rp->energy_unit, rp->time_unit, rp->power_unit); 922 rp->name, rp->energy_unit, rp->time_unit, rp->power_unit);
918 923
919 return 0; 924 return 0;
920} 925}
@@ -938,8 +943,8 @@ static int rapl_check_unit_atom(struct rapl_package *rp, int cpu)
938 value = (msr_val & TIME_UNIT_MASK) >> TIME_UNIT_OFFSET; 943 value = (msr_val & TIME_UNIT_MASK) >> TIME_UNIT_OFFSET;
939 rp->time_unit = 1000000 / (1 << value); 944 rp->time_unit = 1000000 / (1 << value);
940 945
941 pr_debug("Atom package %d energy=%dpJ, time=%dus, power=%duW\n", 946 pr_debug("Atom %s energy=%dpJ, time=%dus, power=%duW\n",
942 rp->id, rp->energy_unit, rp->time_unit, rp->power_unit); 947 rp->name, rp->energy_unit, rp->time_unit, rp->power_unit);
943 948
944 return 0; 949 return 0;
945} 950}
@@ -1168,7 +1173,7 @@ static void rapl_update_domain_data(struct rapl_package *rp)
1168 u64 val; 1173 u64 val;
1169 1174
1170 for (dmn = 0; dmn < rp->nr_domains; dmn++) { 1175 for (dmn = 0; dmn < rp->nr_domains; dmn++) {
1171 pr_debug("update package %d domain %s data\n", rp->id, 1176 pr_debug("update %s domain %s data\n", rp->name,
1172 rp->domains[dmn].name); 1177 rp->domains[dmn].name);
1173 /* exclude non-raw primitives */ 1178 /* exclude non-raw primitives */
1174 for (prim = 0; prim < NR_RAW_PRIMITIVES; prim++) { 1179 for (prim = 0; prim < NR_RAW_PRIMITIVES; prim++) {
@@ -1193,7 +1198,6 @@ static void rapl_unregister_powercap(void)
1193static int rapl_package_register_powercap(struct rapl_package *rp) 1198static int rapl_package_register_powercap(struct rapl_package *rp)
1194{ 1199{
1195 struct rapl_domain *rd; 1200 struct rapl_domain *rd;
1196 char dev_name[17]; /* max domain name = 7 + 1 + 8 for int + 1 for null*/
1197 struct powercap_zone *power_zone = NULL; 1201 struct powercap_zone *power_zone = NULL;
1198 int nr_pl, ret; 1202 int nr_pl, ret;
1199 1203
@@ -1204,20 +1208,16 @@ static int rapl_package_register_powercap(struct rapl_package *rp)
1204 for (rd = rp->domains; rd < rp->domains + rp->nr_domains; rd++) { 1208 for (rd = rp->domains; rd < rp->domains + rp->nr_domains; rd++) {
1205 if (rd->id == RAPL_DOMAIN_PACKAGE) { 1209 if (rd->id == RAPL_DOMAIN_PACKAGE) {
1206 nr_pl = find_nr_power_limit(rd); 1210 nr_pl = find_nr_power_limit(rd);
1207 pr_debug("register socket %d package domain %s\n", 1211 pr_debug("register package domain %s\n", rp->name);
1208 rp->id, rd->name);
1209 memset(dev_name, 0, sizeof(dev_name));
1210 snprintf(dev_name, sizeof(dev_name), "%s-%d",
1211 rd->name, rp->id);
1212 power_zone = powercap_register_zone(&rd->power_zone, 1212 power_zone = powercap_register_zone(&rd->power_zone,
1213 control_type, 1213 control_type,
1214 dev_name, NULL, 1214 rp->name, NULL,
1215 &zone_ops[rd->id], 1215 &zone_ops[rd->id],
1216 nr_pl, 1216 nr_pl,
1217 &constraint_ops); 1217 &constraint_ops);
1218 if (IS_ERR(power_zone)) { 1218 if (IS_ERR(power_zone)) {
1219 pr_debug("failed to register package, %d\n", 1219 pr_debug("failed to register power zone %s\n",
1220 rp->id); 1220 rp->name);
1221 return PTR_ERR(power_zone); 1221 return PTR_ERR(power_zone);
1222 } 1222 }
1223 /* track parent zone in per package/socket data */ 1223 /* track parent zone in per package/socket data */
@@ -1243,8 +1243,8 @@ static int rapl_package_register_powercap(struct rapl_package *rp)
1243 &constraint_ops); 1243 &constraint_ops);
1244 1244
1245 if (IS_ERR(power_zone)) { 1245 if (IS_ERR(power_zone)) {
1246 pr_debug("failed to register power_zone, %d:%s:%s\n", 1246 pr_debug("failed to register power_zone, %s:%s\n",
1247 rp->id, rd->name, dev_name); 1247 rp->name, rd->name);
1248 ret = PTR_ERR(power_zone); 1248 ret = PTR_ERR(power_zone);
1249 goto err_cleanup; 1249 goto err_cleanup;
1250 } 1250 }
@@ -1257,7 +1257,7 @@ err_cleanup:
1257 * failed after the first domain setup. 1257 * failed after the first domain setup.
1258 */ 1258 */
1259 while (--rd >= rp->domains) { 1259 while (--rd >= rp->domains) {
1260 pr_debug("unregister package %d domain %s\n", rp->id, rd->name); 1260 pr_debug("unregister %s domain %s\n", rp->name, rd->name);
1261 powercap_unregister_zone(control_type, &rd->power_zone); 1261 powercap_unregister_zone(control_type, &rd->power_zone);
1262 } 1262 }
1263 1263
@@ -1288,7 +1288,7 @@ static int __init rapl_register_psys(void)
1288 rd->rpl[0].name = pl1_name; 1288 rd->rpl[0].name = pl1_name;
1289 rd->rpl[1].prim_id = PL2_ENABLE; 1289 rd->rpl[1].prim_id = PL2_ENABLE;
1290 rd->rpl[1].name = pl2_name; 1290 rd->rpl[1].name = pl2_name;
1291 rd->rp = find_package_by_id(0); 1291 rd->rp = rapl_find_package_domain(0);
1292 1292
1293 power_zone = powercap_register_zone(&rd->power_zone, control_type, 1293 power_zone = powercap_register_zone(&rd->power_zone, control_type,
1294 "psys", NULL, 1294 "psys", NULL,
@@ -1367,8 +1367,8 @@ static void rapl_detect_powerlimit(struct rapl_domain *rd)
1367 /* check if the domain is locked by BIOS, ignore if MSR doesn't exist */ 1367 /* check if the domain is locked by BIOS, ignore if MSR doesn't exist */
1368 if (!rapl_read_data_raw(rd, FW_LOCK, false, &val64)) { 1368 if (!rapl_read_data_raw(rd, FW_LOCK, false, &val64)) {
1369 if (val64) { 1369 if (val64) {
1370 pr_info("RAPL package %d domain %s locked by BIOS\n", 1370 pr_info("RAPL %s domain %s locked by BIOS\n",
1371 rd->rp->id, rd->name); 1371 rd->rp->name, rd->name);
1372 rd->state |= DOMAIN_STATE_BIOS_LOCKED; 1372 rd->state |= DOMAIN_STATE_BIOS_LOCKED;
1373 } 1373 }
1374 } 1374 }
@@ -1397,10 +1397,10 @@ static int rapl_detect_domains(struct rapl_package *rp, int cpu)
1397 } 1397 }
1398 rp->nr_domains = bitmap_weight(&rp->domain_map, RAPL_DOMAIN_MAX); 1398 rp->nr_domains = bitmap_weight(&rp->domain_map, RAPL_DOMAIN_MAX);
1399 if (!rp->nr_domains) { 1399 if (!rp->nr_domains) {
1400 pr_debug("no valid rapl domains found in package %d\n", rp->id); 1400 pr_debug("no valid rapl domains found in %s\n", rp->name);
1401 return -ENODEV; 1401 return -ENODEV;
1402 } 1402 }
1403 pr_debug("found %d domains on package %d\n", rp->nr_domains, rp->id); 1403 pr_debug("found %d domains on %s\n", rp->nr_domains, rp->name);
1404 1404
1405 rp->domains = kcalloc(rp->nr_domains + 1, sizeof(struct rapl_domain), 1405 rp->domains = kcalloc(rp->nr_domains + 1, sizeof(struct rapl_domain),
1406 GFP_KERNEL); 1406 GFP_KERNEL);
@@ -1433,8 +1433,8 @@ static void rapl_remove_package(struct rapl_package *rp)
1433 rd_package = rd; 1433 rd_package = rd;
1434 continue; 1434 continue;
1435 } 1435 }
1436 pr_debug("remove package, undo power limit on %d: %s\n", 1436 pr_debug("remove package, undo power limit on %s: %s\n",
1437 rp->id, rd->name); 1437 rp->name, rd->name);
1438 powercap_unregister_zone(control_type, &rd->power_zone); 1438 powercap_unregister_zone(control_type, &rd->power_zone);
1439 } 1439 }
1440 /* do parent zone last */ 1440 /* do parent zone last */
@@ -1444,9 +1444,11 @@ static void rapl_remove_package(struct rapl_package *rp)
1444} 1444}
1445 1445
1446/* called from CPU hotplug notifier, hotplug lock held */ 1446/* called from CPU hotplug notifier, hotplug lock held */
1447static struct rapl_package *rapl_add_package(int cpu, int pkgid) 1447static struct rapl_package *rapl_add_package(int cpu)
1448{ 1448{
1449 int id = topology_logical_die_id(cpu);
1449 struct rapl_package *rp; 1450 struct rapl_package *rp;
1451 struct cpuinfo_x86 *c = &cpu_data(cpu);
1450 int ret; 1452 int ret;
1451 1453
1452 rp = kzalloc(sizeof(struct rapl_package), GFP_KERNEL); 1454 rp = kzalloc(sizeof(struct rapl_package), GFP_KERNEL);
@@ -1454,9 +1456,16 @@ static struct rapl_package *rapl_add_package(int cpu, int pkgid)
1454 return ERR_PTR(-ENOMEM); 1456 return ERR_PTR(-ENOMEM);
1455 1457
1456 /* add the new package to the list */ 1458 /* add the new package to the list */
1457 rp->id = pkgid; 1459 rp->id = id;
1458 rp->lead_cpu = cpu; 1460 rp->lead_cpu = cpu;
1459 1461
1462 if (topology_max_die_per_package() > 1)
1463 snprintf(rp->name, PACKAGE_DOMAIN_NAME_LENGTH,
1464 "package-%d-die-%d", c->phys_proc_id, c->cpu_die_id);
1465 else
1466 snprintf(rp->name, PACKAGE_DOMAIN_NAME_LENGTH, "package-%d",
1467 c->phys_proc_id);
1468
1460 /* check if the package contains valid domains */ 1469 /* check if the package contains valid domains */
1461 if (rapl_detect_domains(rp, cpu) || 1470 if (rapl_detect_domains(rp, cpu) ||
1462 rapl_defaults->check_unit(rp, cpu)) { 1471 rapl_defaults->check_unit(rp, cpu)) {
@@ -1485,12 +1494,11 @@ err_free_package:
1485 */ 1494 */
1486static int rapl_cpu_online(unsigned int cpu) 1495static int rapl_cpu_online(unsigned int cpu)
1487{ 1496{
1488 int pkgid = topology_physical_package_id(cpu);
1489 struct rapl_package *rp; 1497 struct rapl_package *rp;
1490 1498
1491 rp = find_package_by_id(pkgid); 1499 rp = rapl_find_package_domain(cpu);
1492 if (!rp) { 1500 if (!rp) {
1493 rp = rapl_add_package(cpu, pkgid); 1501 rp = rapl_add_package(cpu);
1494 if (IS_ERR(rp)) 1502 if (IS_ERR(rp))
1495 return PTR_ERR(rp); 1503 return PTR_ERR(rp);
1496 } 1504 }
@@ -1500,11 +1508,10 @@ static int rapl_cpu_online(unsigned int cpu)
1500 1508
1501static int rapl_cpu_down_prep(unsigned int cpu) 1509static int rapl_cpu_down_prep(unsigned int cpu)
1502{ 1510{
1503 int pkgid = topology_physical_package_id(cpu);
1504 struct rapl_package *rp; 1511 struct rapl_package *rp;
1505 int lead_cpu; 1512 int lead_cpu;
1506 1513
1507 rp = find_package_by_id(pkgid); 1514 rp = rapl_find_package_domain(cpu);
1508 if (!rp) 1515 if (!rp)
1509 return 0; 1516 return 0;
1510 1517
diff --git a/drivers/thermal/intel/x86_pkg_temp_thermal.c b/drivers/thermal/intel/x86_pkg_temp_thermal.c
index 319b77126168..e85d54d1cdf3 100644
--- a/drivers/thermal/intel/x86_pkg_temp_thermal.c
+++ b/drivers/thermal/intel/x86_pkg_temp_thermal.c
@@ -43,7 +43,7 @@ MODULE_PARM_DESC(notify_delay_ms,
43*/ 43*/
44#define MAX_NUMBER_OF_TRIPS 2 44#define MAX_NUMBER_OF_TRIPS 2
45 45
46struct pkg_device { 46struct zone_device {
47 int cpu; 47 int cpu;
48 bool work_scheduled; 48 bool work_scheduled;
49 u32 tj_max; 49 u32 tj_max;
@@ -58,10 +58,10 @@ static struct thermal_zone_params pkg_temp_tz_params = {
58 .no_hwmon = true, 58 .no_hwmon = true,
59}; 59};
60 60
61/* Keep track of how many package pointers we allocated in init() */ 61/* Keep track of how many zone pointers we allocated in init() */
62static int max_packages __read_mostly; 62static int max_id __read_mostly;
63/* Array of package pointers */ 63/* Array of zone pointers */
64static struct pkg_device **packages; 64static struct zone_device **zones;
65/* Serializes interrupt notification, work and hotplug */ 65/* Serializes interrupt notification, work and hotplug */
66static DEFINE_SPINLOCK(pkg_temp_lock); 66static DEFINE_SPINLOCK(pkg_temp_lock);
67/* Protects zone operation in the work function against hotplug removal */ 67/* Protects zone operation in the work function against hotplug removal */
@@ -108,12 +108,12 @@ err_out:
108 * 108 *
109 * - Other callsites: Must hold pkg_temp_lock 109 * - Other callsites: Must hold pkg_temp_lock
110 */ 110 */
111static struct pkg_device *pkg_temp_thermal_get_dev(unsigned int cpu) 111static struct zone_device *pkg_temp_thermal_get_dev(unsigned int cpu)
112{ 112{
113 int pkgid = topology_logical_package_id(cpu); 113 int id = topology_logical_die_id(cpu);
114 114
115 if (pkgid >= 0 && pkgid < max_packages) 115 if (id >= 0 && id < max_id)
116 return packages[pkgid]; 116 return zones[id];
117 return NULL; 117 return NULL;
118} 118}
119 119
@@ -138,12 +138,13 @@ static int get_tj_max(int cpu, u32 *tj_max)
138 138
139static int sys_get_curr_temp(struct thermal_zone_device *tzd, int *temp) 139static int sys_get_curr_temp(struct thermal_zone_device *tzd, int *temp)
140{ 140{
141 struct pkg_device *pkgdev = tzd->devdata; 141 struct zone_device *zonedev = tzd->devdata;
142 u32 eax, edx; 142 u32 eax, edx;
143 143
144 rdmsr_on_cpu(pkgdev->cpu, MSR_IA32_PACKAGE_THERM_STATUS, &eax, &edx); 144 rdmsr_on_cpu(zonedev->cpu, MSR_IA32_PACKAGE_THERM_STATUS,
145 &eax, &edx);
145 if (eax & 0x80000000) { 146 if (eax & 0x80000000) {
146 *temp = pkgdev->tj_max - ((eax >> 16) & 0x7f) * 1000; 147 *temp = zonedev->tj_max - ((eax >> 16) & 0x7f) * 1000;
147 pr_debug("sys_get_curr_temp %d\n", *temp); 148 pr_debug("sys_get_curr_temp %d\n", *temp);
148 return 0; 149 return 0;
149 } 150 }
@@ -153,7 +154,7 @@ static int sys_get_curr_temp(struct thermal_zone_device *tzd, int *temp)
153static int sys_get_trip_temp(struct thermal_zone_device *tzd, 154static int sys_get_trip_temp(struct thermal_zone_device *tzd,
154 int trip, int *temp) 155 int trip, int *temp)
155{ 156{
156 struct pkg_device *pkgdev = tzd->devdata; 157 struct zone_device *zonedev = tzd->devdata;
157 unsigned long thres_reg_value; 158 unsigned long thres_reg_value;
158 u32 mask, shift, eax, edx; 159 u32 mask, shift, eax, edx;
159 int ret; 160 int ret;
@@ -169,14 +170,14 @@ static int sys_get_trip_temp(struct thermal_zone_device *tzd,
169 shift = THERM_SHIFT_THRESHOLD0; 170 shift = THERM_SHIFT_THRESHOLD0;
170 } 171 }
171 172
172 ret = rdmsr_on_cpu(pkgdev->cpu, MSR_IA32_PACKAGE_THERM_INTERRUPT, 173 ret = rdmsr_on_cpu(zonedev->cpu, MSR_IA32_PACKAGE_THERM_INTERRUPT,
173 &eax, &edx); 174 &eax, &edx);
174 if (ret < 0) 175 if (ret < 0)
175 return ret; 176 return ret;
176 177
177 thres_reg_value = (eax & mask) >> shift; 178 thres_reg_value = (eax & mask) >> shift;
178 if (thres_reg_value) 179 if (thres_reg_value)
179 *temp = pkgdev->tj_max - thres_reg_value * 1000; 180 *temp = zonedev->tj_max - thres_reg_value * 1000;
180 else 181 else
181 *temp = 0; 182 *temp = 0;
182 pr_debug("sys_get_trip_temp %d\n", *temp); 183 pr_debug("sys_get_trip_temp %d\n", *temp);
@@ -187,14 +188,14 @@ static int sys_get_trip_temp(struct thermal_zone_device *tzd,
187static int 188static int
188sys_set_trip_temp(struct thermal_zone_device *tzd, int trip, int temp) 189sys_set_trip_temp(struct thermal_zone_device *tzd, int trip, int temp)
189{ 190{
190 struct pkg_device *pkgdev = tzd->devdata; 191 struct zone_device *zonedev = tzd->devdata;
191 u32 l, h, mask, shift, intr; 192 u32 l, h, mask, shift, intr;
192 int ret; 193 int ret;
193 194
194 if (trip >= MAX_NUMBER_OF_TRIPS || temp >= pkgdev->tj_max) 195 if (trip >= MAX_NUMBER_OF_TRIPS || temp >= zonedev->tj_max)
195 return -EINVAL; 196 return -EINVAL;
196 197
197 ret = rdmsr_on_cpu(pkgdev->cpu, MSR_IA32_PACKAGE_THERM_INTERRUPT, 198 ret = rdmsr_on_cpu(zonedev->cpu, MSR_IA32_PACKAGE_THERM_INTERRUPT,
198 &l, &h); 199 &l, &h);
199 if (ret < 0) 200 if (ret < 0)
200 return ret; 201 return ret;
@@ -216,11 +217,12 @@ sys_set_trip_temp(struct thermal_zone_device *tzd, int trip, int temp)
216 if (!temp) { 217 if (!temp) {
217 l &= ~intr; 218 l &= ~intr;
218 } else { 219 } else {
219 l |= (pkgdev->tj_max - temp)/1000 << shift; 220 l |= (zonedev->tj_max - temp)/1000 << shift;
220 l |= intr; 221 l |= intr;
221 } 222 }
222 223
223 return wrmsr_on_cpu(pkgdev->cpu, MSR_IA32_PACKAGE_THERM_INTERRUPT, l, h); 224 return wrmsr_on_cpu(zonedev->cpu, MSR_IA32_PACKAGE_THERM_INTERRUPT,
225 l, h);
224} 226}
225 227
226static int sys_get_trip_type(struct thermal_zone_device *thermal, int trip, 228static int sys_get_trip_type(struct thermal_zone_device *thermal, int trip,
@@ -275,26 +277,26 @@ static void pkg_temp_thermal_threshold_work_fn(struct work_struct *work)
275{ 277{
276 struct thermal_zone_device *tzone = NULL; 278 struct thermal_zone_device *tzone = NULL;
277 int cpu = smp_processor_id(); 279 int cpu = smp_processor_id();
278 struct pkg_device *pkgdev; 280 struct zone_device *zonedev;
279 u64 msr_val, wr_val; 281 u64 msr_val, wr_val;
280 282
281 mutex_lock(&thermal_zone_mutex); 283 mutex_lock(&thermal_zone_mutex);
282 spin_lock_irq(&pkg_temp_lock); 284 spin_lock_irq(&pkg_temp_lock);
283 ++pkg_work_cnt; 285 ++pkg_work_cnt;
284 286
285 pkgdev = pkg_temp_thermal_get_dev(cpu); 287 zonedev = pkg_temp_thermal_get_dev(cpu);
286 if (!pkgdev) { 288 if (!zonedev) {
287 spin_unlock_irq(&pkg_temp_lock); 289 spin_unlock_irq(&pkg_temp_lock);
288 mutex_unlock(&thermal_zone_mutex); 290 mutex_unlock(&thermal_zone_mutex);
289 return; 291 return;
290 } 292 }
291 pkgdev->work_scheduled = false; 293 zonedev->work_scheduled = false;
292 294
293 rdmsrl(MSR_IA32_PACKAGE_THERM_STATUS, msr_val); 295 rdmsrl(MSR_IA32_PACKAGE_THERM_STATUS, msr_val);
294 wr_val = msr_val & ~(THERM_LOG_THRESHOLD0 | THERM_LOG_THRESHOLD1); 296 wr_val = msr_val & ~(THERM_LOG_THRESHOLD0 | THERM_LOG_THRESHOLD1);
295 if (wr_val != msr_val) { 297 if (wr_val != msr_val) {
296 wrmsrl(MSR_IA32_PACKAGE_THERM_STATUS, wr_val); 298 wrmsrl(MSR_IA32_PACKAGE_THERM_STATUS, wr_val);
297 tzone = pkgdev->tzone; 299 tzone = zonedev->tzone;
298 } 300 }
299 301
300 enable_pkg_thres_interrupt(); 302 enable_pkg_thres_interrupt();
@@ -320,7 +322,7 @@ static void pkg_thermal_schedule_work(int cpu, struct delayed_work *work)
320static int pkg_thermal_notify(u64 msr_val) 322static int pkg_thermal_notify(u64 msr_val)
321{ 323{
322 int cpu = smp_processor_id(); 324 int cpu = smp_processor_id();
323 struct pkg_device *pkgdev; 325 struct zone_device *zonedev;
324 unsigned long flags; 326 unsigned long flags;
325 327
326 spin_lock_irqsave(&pkg_temp_lock, flags); 328 spin_lock_irqsave(&pkg_temp_lock, flags);
@@ -329,10 +331,10 @@ static int pkg_thermal_notify(u64 msr_val)
329 disable_pkg_thres_interrupt(); 331 disable_pkg_thres_interrupt();
330 332
331 /* Work is per package, so scheduling it once is enough. */ 333 /* Work is per package, so scheduling it once is enough. */
332 pkgdev = pkg_temp_thermal_get_dev(cpu); 334 zonedev = pkg_temp_thermal_get_dev(cpu);
333 if (pkgdev && !pkgdev->work_scheduled) { 335 if (zonedev && !zonedev->work_scheduled) {
334 pkgdev->work_scheduled = true; 336 zonedev->work_scheduled = true;
335 pkg_thermal_schedule_work(pkgdev->cpu, &pkgdev->work); 337 pkg_thermal_schedule_work(zonedev->cpu, &zonedev->work);
336 } 338 }
337 339
338 spin_unlock_irqrestore(&pkg_temp_lock, flags); 340 spin_unlock_irqrestore(&pkg_temp_lock, flags);
@@ -341,12 +343,12 @@ static int pkg_thermal_notify(u64 msr_val)
341 343
342static int pkg_temp_thermal_device_add(unsigned int cpu) 344static int pkg_temp_thermal_device_add(unsigned int cpu)
343{ 345{
344 int pkgid = topology_logical_package_id(cpu); 346 int id = topology_logical_die_id(cpu);
345 u32 tj_max, eax, ebx, ecx, edx; 347 u32 tj_max, eax, ebx, ecx, edx;
346 struct pkg_device *pkgdev; 348 struct zone_device *zonedev;
347 int thres_count, err; 349 int thres_count, err;
348 350
349 if (pkgid >= max_packages) 351 if (id >= max_id)
350 return -ENOMEM; 352 return -ENOMEM;
351 353
352 cpuid(6, &eax, &ebx, &ecx, &edx); 354 cpuid(6, &eax, &ebx, &ecx, &edx);
@@ -360,51 +362,51 @@ static int pkg_temp_thermal_device_add(unsigned int cpu)
360 if (err) 362 if (err)
361 return err; 363 return err;
362 364
363 pkgdev = kzalloc(sizeof(*pkgdev), GFP_KERNEL); 365 zonedev = kzalloc(sizeof(*zonedev), GFP_KERNEL);
364 if (!pkgdev) 366 if (!zonedev)
365 return -ENOMEM; 367 return -ENOMEM;
366 368
367 INIT_DELAYED_WORK(&pkgdev->work, pkg_temp_thermal_threshold_work_fn); 369 INIT_DELAYED_WORK(&zonedev->work, pkg_temp_thermal_threshold_work_fn);
368 pkgdev->cpu = cpu; 370 zonedev->cpu = cpu;
369 pkgdev->tj_max = tj_max; 371 zonedev->tj_max = tj_max;
370 pkgdev->tzone = thermal_zone_device_register("x86_pkg_temp", 372 zonedev->tzone = thermal_zone_device_register("x86_pkg_temp",
371 thres_count, 373 thres_count,
372 (thres_count == MAX_NUMBER_OF_TRIPS) ? 0x03 : 0x01, 374 (thres_count == MAX_NUMBER_OF_TRIPS) ? 0x03 : 0x01,
373 pkgdev, &tzone_ops, &pkg_temp_tz_params, 0, 0); 375 zonedev, &tzone_ops, &pkg_temp_tz_params, 0, 0);
374 if (IS_ERR(pkgdev->tzone)) { 376 if (IS_ERR(zonedev->tzone)) {
375 err = PTR_ERR(pkgdev->tzone); 377 err = PTR_ERR(zonedev->tzone);
376 kfree(pkgdev); 378 kfree(zonedev);
377 return err; 379 return err;
378 } 380 }
379 /* Store MSR value for package thermal interrupt, to restore at exit */ 381 /* Store MSR value for package thermal interrupt, to restore at exit */
380 rdmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT, pkgdev->msr_pkg_therm_low, 382 rdmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT, zonedev->msr_pkg_therm_low,
381 pkgdev->msr_pkg_therm_high); 383 zonedev->msr_pkg_therm_high);
382 384
383 cpumask_set_cpu(cpu, &pkgdev->cpumask); 385 cpumask_set_cpu(cpu, &zonedev->cpumask);
384 spin_lock_irq(&pkg_temp_lock); 386 spin_lock_irq(&pkg_temp_lock);
385 packages[pkgid] = pkgdev; 387 zones[id] = zonedev;
386 spin_unlock_irq(&pkg_temp_lock); 388 spin_unlock_irq(&pkg_temp_lock);
387 return 0; 389 return 0;
388} 390}
389 391
390static int pkg_thermal_cpu_offline(unsigned int cpu) 392static int pkg_thermal_cpu_offline(unsigned int cpu)
391{ 393{
392 struct pkg_device *pkgdev = pkg_temp_thermal_get_dev(cpu); 394 struct zone_device *zonedev = pkg_temp_thermal_get_dev(cpu);
393 bool lastcpu, was_target; 395 bool lastcpu, was_target;
394 int target; 396 int target;
395 397
396 if (!pkgdev) 398 if (!zonedev)
397 return 0; 399 return 0;
398 400
399 target = cpumask_any_but(&pkgdev->cpumask, cpu); 401 target = cpumask_any_but(&zonedev->cpumask, cpu);
400 cpumask_clear_cpu(cpu, &pkgdev->cpumask); 402 cpumask_clear_cpu(cpu, &zonedev->cpumask);
401 lastcpu = target >= nr_cpu_ids; 403 lastcpu = target >= nr_cpu_ids;
402 /* 404 /*
403 * Remove the sysfs files, if this is the last cpu in the package 405 * Remove the sysfs files, if this is the last cpu in the package
404 * before doing further cleanups. 406 * before doing further cleanups.
405 */ 407 */
406 if (lastcpu) { 408 if (lastcpu) {
407 struct thermal_zone_device *tzone = pkgdev->tzone; 409 struct thermal_zone_device *tzone = zonedev->tzone;
408 410
409 /* 411 /*
410 * We must protect against a work function calling 412 * We must protect against a work function calling
@@ -413,7 +415,7 @@ static int pkg_thermal_cpu_offline(unsigned int cpu)
413 * won't try to call. 415 * won't try to call.
414 */ 416 */
415 mutex_lock(&thermal_zone_mutex); 417 mutex_lock(&thermal_zone_mutex);
416 pkgdev->tzone = NULL; 418 zonedev->tzone = NULL;
417 mutex_unlock(&thermal_zone_mutex); 419 mutex_unlock(&thermal_zone_mutex);
418 420
419 thermal_zone_device_unregister(tzone); 421 thermal_zone_device_unregister(tzone);
@@ -427,8 +429,8 @@ static int pkg_thermal_cpu_offline(unsigned int cpu)
427 * one. When we drop the lock, then the interrupt notify function 429 * one. When we drop the lock, then the interrupt notify function
428 * will see the new target. 430 * will see the new target.
429 */ 431 */
430 was_target = pkgdev->cpu == cpu; 432 was_target = zonedev->cpu == cpu;
431 pkgdev->cpu = target; 433 zonedev->cpu = target;
432 434
433 /* 435 /*
434 * If this is the last CPU in the package remove the package 436 * If this is the last CPU in the package remove the package
@@ -437,23 +439,23 @@ static int pkg_thermal_cpu_offline(unsigned int cpu)
437 * worker will see the package anymore. 439 * worker will see the package anymore.
438 */ 440 */
439 if (lastcpu) { 441 if (lastcpu) {
440 packages[topology_logical_package_id(cpu)] = NULL; 442 zones[topology_logical_die_id(cpu)] = NULL;
441 /* After this point nothing touches the MSR anymore. */ 443 /* After this point nothing touches the MSR anymore. */
442 wrmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT, 444 wrmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT,
443 pkgdev->msr_pkg_therm_low, pkgdev->msr_pkg_therm_high); 445 zonedev->msr_pkg_therm_low, zonedev->msr_pkg_therm_high);
444 } 446 }
445 447
446 /* 448 /*
447 * Check whether there is work scheduled and whether the work is 449 * Check whether there is work scheduled and whether the work is
448 * targeted at the outgoing CPU. 450 * targeted at the outgoing CPU.
449 */ 451 */
450 if (pkgdev->work_scheduled && was_target) { 452 if (zonedev->work_scheduled && was_target) {
451 /* 453 /*
452 * To cancel the work we need to drop the lock, otherwise 454 * To cancel the work we need to drop the lock, otherwise
453 * we might deadlock if the work needs to be flushed. 455 * we might deadlock if the work needs to be flushed.
454 */ 456 */
455 spin_unlock_irq(&pkg_temp_lock); 457 spin_unlock_irq(&pkg_temp_lock);
456 cancel_delayed_work_sync(&pkgdev->work); 458 cancel_delayed_work_sync(&zonedev->work);
457 spin_lock_irq(&pkg_temp_lock); 459 spin_lock_irq(&pkg_temp_lock);
458 /* 460 /*
459 * If this is not the last cpu in the package and the work 461 * If this is not the last cpu in the package and the work
@@ -461,21 +463,21 @@ static int pkg_thermal_cpu_offline(unsigned int cpu)
461 * need to reschedule the work, otherwise the interrupt 463 * need to reschedule the work, otherwise the interrupt
462 * stays disabled forever. 464 * stays disabled forever.
463 */ 465 */
464 if (!lastcpu && pkgdev->work_scheduled) 466 if (!lastcpu && zonedev->work_scheduled)
465 pkg_thermal_schedule_work(target, &pkgdev->work); 467 pkg_thermal_schedule_work(target, &zonedev->work);
466 } 468 }
467 469
468 spin_unlock_irq(&pkg_temp_lock); 470 spin_unlock_irq(&pkg_temp_lock);
469 471
470 /* Final cleanup if this is the last cpu */ 472 /* Final cleanup if this is the last cpu */
471 if (lastcpu) 473 if (lastcpu)
472 kfree(pkgdev); 474 kfree(zonedev);
473 return 0; 475 return 0;
474} 476}
475 477
476static int pkg_thermal_cpu_online(unsigned int cpu) 478static int pkg_thermal_cpu_online(unsigned int cpu)
477{ 479{
478 struct pkg_device *pkgdev = pkg_temp_thermal_get_dev(cpu); 480 struct zone_device *zonedev = pkg_temp_thermal_get_dev(cpu);
479 struct cpuinfo_x86 *c = &cpu_data(cpu); 481 struct cpuinfo_x86 *c = &cpu_data(cpu);
480 482
481 /* Paranoia check */ 483 /* Paranoia check */
@@ -483,8 +485,8 @@ static int pkg_thermal_cpu_online(unsigned int cpu)
483 return -ENODEV; 485 return -ENODEV;
484 486
485 /* If the package exists, nothing to do */ 487 /* If the package exists, nothing to do */
486 if (pkgdev) { 488 if (zonedev) {
487 cpumask_set_cpu(cpu, &pkgdev->cpumask); 489 cpumask_set_cpu(cpu, &zonedev->cpumask);
488 return 0; 490 return 0;
489 } 491 }
490 return pkg_temp_thermal_device_add(cpu); 492 return pkg_temp_thermal_device_add(cpu);
@@ -503,10 +505,10 @@ static int __init pkg_temp_thermal_init(void)
503 if (!x86_match_cpu(pkg_temp_thermal_ids)) 505 if (!x86_match_cpu(pkg_temp_thermal_ids))
504 return -ENODEV; 506 return -ENODEV;
505 507
506 max_packages = topology_max_packages(); 508 max_id = topology_max_packages() * topology_max_die_per_package();
507 packages = kcalloc(max_packages, sizeof(struct pkg_device *), 509 zones = kcalloc(max_id, sizeof(struct zone_device *),
508 GFP_KERNEL); 510 GFP_KERNEL);
509 if (!packages) 511 if (!zones)
510 return -ENOMEM; 512 return -ENOMEM;
511 513
512 ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "thermal/x86_pkg:online", 514 ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "thermal/x86_pkg:online",
@@ -525,7 +527,7 @@ static int __init pkg_temp_thermal_init(void)
525 return 0; 527 return 0;
526 528
527err: 529err:
528 kfree(packages); 530 kfree(zones);
529 return ret; 531 return ret;
530} 532}
531module_init(pkg_temp_thermal_init) 533module_init(pkg_temp_thermal_init)
@@ -537,7 +539,7 @@ static void __exit pkg_temp_thermal_exit(void)
537 539
538 cpuhp_remove_state(pkg_thermal_hp_state); 540 cpuhp_remove_state(pkg_thermal_hp_state);
539 debugfs_remove_recursive(debugfs); 541 debugfs_remove_recursive(debugfs);
540 kfree(packages); 542 kfree(zones);
541} 543}
542module_exit(pkg_temp_thermal_exit) 544module_exit(pkg_temp_thermal_exit)
543 545
diff --git a/include/linux/topology.h b/include/linux/topology.h
index cb0775e1ee4b..47a3e3c08036 100644
--- a/include/linux/topology.h
+++ b/include/linux/topology.h
@@ -184,6 +184,9 @@ static inline int cpu_to_mem(int cpu)
184#ifndef topology_physical_package_id 184#ifndef topology_physical_package_id
185#define topology_physical_package_id(cpu) ((void)(cpu), -1) 185#define topology_physical_package_id(cpu) ((void)(cpu), -1)
186#endif 186#endif
187#ifndef topology_die_id
188#define topology_die_id(cpu) ((void)(cpu), -1)
189#endif
187#ifndef topology_core_id 190#ifndef topology_core_id
188#define topology_core_id(cpu) ((void)(cpu), 0) 191#define topology_core_id(cpu) ((void)(cpu), 0)
189#endif 192#endif
@@ -193,6 +196,9 @@ static inline int cpu_to_mem(int cpu)
193#ifndef topology_core_cpumask 196#ifndef topology_core_cpumask
194#define topology_core_cpumask(cpu) cpumask_of(cpu) 197#define topology_core_cpumask(cpu) cpumask_of(cpu)
195#endif 198#endif
199#ifndef topology_die_cpumask
200#define topology_die_cpumask(cpu) cpumask_of(cpu)
201#endif
196 202
197#ifdef CONFIG_SCHED_SMT 203#ifdef CONFIG_SCHED_SMT
198static inline const struct cpumask *cpu_smt_mask(int cpu) 204static inline const struct cpumask *cpu_smt_mask(int cpu)