From 11b8990d195e1356eb0f046e83e8f1ea708e1a53 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Tue, 28 Feb 2012 09:01:26 +0200 Subject: tools/virtio: add linux/module.h stub Make the tool build again after virtio changes broke it. Signed-off-by: Michael S. Tsirkin --- tools/virtio/linux/module.h | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 tools/virtio/linux/module.h (limited to 'tools') diff --git a/tools/virtio/linux/module.h b/tools/virtio/linux/module.h new file mode 100644 index 00000000000..e69de29bb2d -- cgit v1.2.2 From b0820a50a0439764411b779208f0e6a67b937e72 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Tue, 28 Feb 2012 09:02:53 +0200 Subject: tools/virtio: add linux/hrtimer.h stub Make tool build after virtio changes broke it. Signed-off-by: Michael S. Tsirkin --- tools/virtio/linux/hrtimer.h | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 tools/virtio/linux/hrtimer.h (limited to 'tools') diff --git a/tools/virtio/linux/hrtimer.h b/tools/virtio/linux/hrtimer.h new file mode 100644 index 00000000000..e69de29bb2d -- cgit v1.2.2 From b17d5c6e190f3d328aae0444f8b93d58d0015714 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Tue, 28 Feb 2012 09:07:58 +0200 Subject: tools/virtio: stub out strong barriers The tool should never use them, abort if it does. Signed-off-by: Michael S. Tsirkin --- tools/virtio/linux/virtio.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'tools') diff --git a/tools/virtio/linux/virtio.h b/tools/virtio/linux/virtio.h index b4fbc91c41b..7579f19e61e 100644 --- a/tools/virtio/linux/virtio.h +++ b/tools/virtio/linux/virtio.h @@ -181,6 +181,9 @@ struct virtqueue { #define smp_mb() mb() # define smp_rmb() barrier() # define smp_wmb() barrier() +/* Weak barriers should be used. If not - it's a bug */ +# define rmb() abort() +# define wmb() abort() #else #error Please fill in barrier macros #endif -- cgit v1.2.2 From 568a89904c7fc93071efd7f811fc58aff6d5774a Mon Sep 17 00:00:00 2001 From: Thomas Renninger Date: Tue, 11 Oct 2011 15:33:50 +0200 Subject: cpupower: Better interface for accessing AMD pci registers AMD's BKDG (Bios and Kernel Developers Guide) talks in the CPU spec of their CPU families about PCI registers defined by "device" (slot) and func(tion). Assuming that CPU specific configuration PCI devices are always on domain and bus zero a pci_slot_func_init() func which gets the slot and func of the desired PCI device passed looks like the most convenient way. This also obsoletes the PCI device id maintenance. Signed-off-by: Thomas Renninger CC: Andreas Herrmann Signed-off-by: Dominik Brodowski --- tools/power/cpupower/utils/helpers/amd.c | 4 +-- tools/power/cpupower/utils/helpers/helpers.h | 7 +++-- tools/power/cpupower/utils/helpers/pci.c | 35 ++++++++++++++-------- .../cpupower/utils/idle_monitor/amd_fam14h_idle.c | 9 ++---- 4 files changed, 32 insertions(+), 23 deletions(-) (limited to 'tools') diff --git a/tools/power/cpupower/utils/helpers/amd.c b/tools/power/cpupower/utils/helpers/amd.c index 87d5605bdda..6437ef39aee 100644 --- a/tools/power/cpupower/utils/helpers/amd.c +++ b/tools/power/cpupower/utils/helpers/amd.c @@ -112,14 +112,12 @@ int decode_pstates(unsigned int cpu, unsigned int cpu_family, int amd_pci_get_num_boost_states(int *active, int *states) { struct pci_access *pci_acc; - int vendor_id = 0x1022; - int boost_dev_ids[4] = {0x1204, 0x1604, 0x1704, 0}; struct pci_dev *device; uint8_t val = 0; *active = *states = 0; - device = pci_acc_init(&pci_acc, vendor_id, boost_dev_ids); + device = pci_slot_func_init(&pci_acc, 0x18, 4); if (device == NULL) return -ENODEV; diff --git a/tools/power/cpupower/utils/helpers/helpers.h b/tools/power/cpupower/utils/helpers/helpers.h index 2747e738efb..6280daa4e24 100644 --- a/tools/power/cpupower/utils/helpers/helpers.h +++ b/tools/power/cpupower/utils/helpers/helpers.h @@ -132,8 +132,11 @@ extern unsigned long long msr_intel_get_turbo_ratio(unsigned int cpu); /* PCI stuff ****************************/ extern int amd_pci_get_num_boost_states(int *active, int *states); -extern struct pci_dev *pci_acc_init(struct pci_access **pacc, int vendor_id, - int *dev_ids); +extern struct pci_dev *pci_acc_init(struct pci_access **pacc, int domain, + int bus, int slot, int func, int vendor, + int dev); +extern struct pci_dev *pci_slot_func_init(struct pci_access **pacc, + int slot, int func); /* PCI stuff ****************************/ diff --git a/tools/power/cpupower/utils/helpers/pci.c b/tools/power/cpupower/utils/helpers/pci.c index cd2eb6fe41c..9690798e644 100644 --- a/tools/power/cpupower/utils/helpers/pci.c +++ b/tools/power/cpupower/utils/helpers/pci.c @@ -10,19 +10,24 @@ * **pacc : if a valid pci_dev is returned * *pacc must be passed to pci_acc_cleanup to free it * - * vendor_id : the pci vendor id matching the pci device to access - * dev_ids : device ids matching the pci device to access + * domain: domain + * bus: bus + * slot: slot + * func: func + * vendor: vendor + * device: device + * Pass -1 for one of the six above to match any * * Returns : * struct pci_dev which can be used with pci_{read,write}_* functions * to access the PCI config space of matching pci devices */ -struct pci_dev *pci_acc_init(struct pci_access **pacc, int vendor_id, - int *dev_ids) +struct pci_dev *pci_acc_init(struct pci_access **pacc, int domain, int bus, + int slot, int func, int vendor, int dev) { - struct pci_filter filter_nb_link = { -1, -1, -1, -1, vendor_id, 0}; + struct pci_filter filter_nb_link = { domain, bus, slot, func, + vendor, dev }; struct pci_dev *device; - unsigned int i; *pacc = pci_alloc(); if (*pacc == NULL) @@ -31,14 +36,20 @@ struct pci_dev *pci_acc_init(struct pci_access **pacc, int vendor_id, pci_init(*pacc); pci_scan_bus(*pacc); - for (i = 0; dev_ids[i] != 0; i++) { - filter_nb_link.device = dev_ids[i]; - for (device = (*pacc)->devices; device; device = device->next) { - if (pci_filter_match(&filter_nb_link, device)) - return device; - } + for (device = (*pacc)->devices; device; device = device->next) { + if (pci_filter_match(&filter_nb_link, device)) + return device; } pci_cleanup(*pacc); return NULL; } + +/* Typically one wants to get a specific slot(device)/func of the root domain + and bus */ +struct pci_dev *pci_slot_func_init(struct pci_access **pacc, int slot, + int func) +{ + return pci_acc_init(pacc, 0, 0, slot, func, -1, -1); +} + #endif /* defined(__i386__) || defined(__x86_64__) */ diff --git a/tools/power/cpupower/utils/idle_monitor/amd_fam14h_idle.c b/tools/power/cpupower/utils/idle_monitor/amd_fam14h_idle.c index 202e555988b..108108dfa68 100644 --- a/tools/power/cpupower/utils/idle_monitor/amd_fam14h_idle.c +++ b/tools/power/cpupower/utils/idle_monitor/amd_fam14h_idle.c @@ -20,8 +20,6 @@ #include "idle_monitor/cpupower-monitor.h" #include "helpers/helpers.h" -/******** PCI parts could go into own file and get shared ***************/ - #define PCI_NON_PC0_OFFSET 0xb0 #define PCI_PC1_OFFSET 0xb4 #define PCI_PC6_OFFSET 0xb8 @@ -82,10 +80,7 @@ static cstate_t amd_fam14h_cstates[AMD_FAM14H_STATE_NUM] = { }; static struct pci_access *pci_acc; -static int pci_vendor_id = 0x1022; -static int pci_dev_ids[2] = {0x1716, 0}; static struct pci_dev *amd_fam14h_pci_dev; - static int nbp1_entered; struct timespec start_time; @@ -303,7 +298,9 @@ struct cpuidle_monitor *amd_fam14h_register(void) sizeof(unsigned long long)); } - amd_fam14h_pci_dev = pci_acc_init(&pci_acc, pci_vendor_id, pci_dev_ids); + /* We need PCI device: Slot 18, Func 6, compare with BKDG + for fam 12h/14h */ + amd_fam14h_pci_dev = pci_slot_func_init(&pci_acc, 0x18, 6); if (amd_fam14h_pci_dev == NULL || pci_acc == NULL) return NULL; -- cgit v1.2.2 From f642089ce0e89931bc2f8484f6ce862536f2b8e9 Mon Sep 17 00:00:00 2001 From: Thomas Renninger Date: Tue, 11 Oct 2011 15:33:51 +0200 Subject: cpupower: AMD fam14h/Ontario monitor can also be used by fam12h cpus The name of the monitor is updated at runtime to the name of the CPU type. Signed-off-by: Thomas Renninger CC: Andreas Herrmann Signed-off-by: Dominik Brodowski --- tools/power/cpupower/man/cpupower-monitor.1 | 2 +- .../power/cpupower/utils/idle_monitor/amd_fam14h_idle.c | 16 ++++++++-------- 2 files changed, 9 insertions(+), 9 deletions(-) (limited to 'tools') diff --git a/tools/power/cpupower/man/cpupower-monitor.1 b/tools/power/cpupower/man/cpupower-monitor.1 index d5cfa265c3d..1141c207371 100644 --- a/tools/power/cpupower/man/cpupower-monitor.1 +++ b/tools/power/cpupower/man/cpupower-monitor.1 @@ -107,7 +107,7 @@ Deepest package sleep states may in reality show up as machine/platform wide sleep states and can only be entered if all cores are idle. Look up Intel manuals (some are provided in the References section) for further details. -.SS "Ontario" "Liano" +.SS "Fam_12h" "Fam_14h" AMD laptop and desktop processor (family 12h and 14h) sleep state counters. The registers are accessed via PCI and therefore can still be read out while cores have been offlined. diff --git a/tools/power/cpupower/utils/idle_monitor/amd_fam14h_idle.c b/tools/power/cpupower/utils/idle_monitor/amd_fam14h_idle.c index 108108dfa68..2116df9ad83 100644 --- a/tools/power/cpupower/utils/idle_monitor/amd_fam14h_idle.c +++ b/tools/power/cpupower/utils/idle_monitor/amd_fam14h_idle.c @@ -281,13 +281,13 @@ struct cpuidle_monitor *amd_fam14h_register(void) if (cpupower_cpu_info.vendor != X86_VENDOR_AMD) return NULL; - if (cpupower_cpu_info.family == 0x14) { - if (cpu_count <= 0 || cpu_count > 2) { - fprintf(stderr, "AMD fam14h: Invalid cpu count: %d\n", - cpu_count); - return NULL; - } - } else + if (cpupower_cpu_info.family == 0x14) + strncpy(amd_fam14h_monitor.name, "Fam_14h", + MONITOR_NAME_LEN - 1); + else if (cpupower_cpu_info.family == 0x12) + strncpy(amd_fam14h_monitor.name, "Fam_12h", + MONITOR_NAME_LEN - 1); + else return NULL; /* We do not alloc for nbp1 machine wide counter */ @@ -322,7 +322,7 @@ static void amd_fam14h_unregister(void) } struct cpuidle_monitor amd_fam14h_monitor = { - .name = "Ontario", + .name = "", .hw_states = amd_fam14h_cstates, .hw_states_num = AMD_FAM14H_STATE_NUM, .start = amd_fam14h_start, -- cgit v1.2.2 From e7d85a934182d5bde1eea865169fbf2e0637a3ed Mon Sep 17 00:00:00 2001 From: Thomas Renninger Date: Fri, 16 Dec 2011 15:35:51 +0100 Subject: cpupower: Add cpupower-idle-info manpage The last missing manpage for cpupower tools. More info about other architecture's sleep state specialities would be great. Signed-off-by: Thomas Renninger Signed-off-by: Dominik Brodowski --- tools/power/cpupower/man/cpupower-idle-info.1 | 90 +++++++++++++++++++++++++++ 1 file changed, 90 insertions(+) create mode 100644 tools/power/cpupower/man/cpupower-idle-info.1 (limited to 'tools') diff --git a/tools/power/cpupower/man/cpupower-idle-info.1 b/tools/power/cpupower/man/cpupower-idle-info.1 new file mode 100644 index 00000000000..4178effd9e9 --- /dev/null +++ b/tools/power/cpupower/man/cpupower-idle-info.1 @@ -0,0 +1,90 @@ +.TH "CPUPOWER-IDLE-INFO" "1" "0.1" "" "cpupower Manual" +.SH "NAME" +.LP +cpupower idle\-info \- Utility to retrieve cpu idle kernel information +.SH "SYNTAX" +.LP +cpupower [ \-c cpulist ] idle\-info [\fIoptions\fP] +.SH "DESCRIPTION" +.LP +A tool which prints out per cpu idle information helpful to developers and interested users. +.SH "OPTIONS" +.LP +.TP +\fB\-f\fR \fB\-\-silent\fR +Only print a summary of all available C-states in the system. +.TP +\fB\-e\fR \fB\-\-proc\fR +deprecated. +Prints out idle information in old /proc/acpi/processor/*/power format. This +interface has been removed from the kernel for quite some time, do not let +further code depend on this option, best do not use it. + +.SH IDLE\-INFO DESCRIPTIONS +CPU sleep state statistics and descriptions are retrieved from sysfs files, +exported by the cpuidle kernel subsystem. The kernel only updates these +statistics when it enters or leaves an idle state, therefore on a very idle or +a very busy system, these statistics may not be accurate. They still provide a +good overview about the usage and availability of processor sleep states on +the platform. + +Be aware that the sleep states as exported by the hardware or BIOS and used by +the Linux kernel may not exactly reflect the capabilities of the +processor. This often is the case on the X86 architecture when the acpi_idle +driver is used. It is also possible that the hardware overrules the kernel +requests, due to internal activity monitors or other reasons. +On recent X86 platforms it is often possible to read out hardware registers +which monitor the duration of sleep states the processor resided in. The +cpupower monitor tool (cpupower\-monitor(1)) can be used to show real sleep +state residencies. Please refer to the architecture specific description +section below. + +.SH IDLE\-INFO ARCHITECTURE SPECIFIC DESCRIPTIONS +.SS "X86" +POLL idle state + +If cpuidle is active, X86 platforms have one special idle state. +The POLL idle state is not a real idle state, it does not save any +power. Instead, a busy\-loop is executed doing nothing for a short period of +time. This state is used if the kernel knows that work has to be processed +very soon and entering any real hardware idle state may result in a slight +performance penalty. + +There exist two different cpuidle drivers on the X86 architecture platform: + +"acpi_idle" cpuidle driver + +The acpi_idle cpuidle driver retrieves available sleep states (C\-states) from +the ACPI BIOS tables (from the _CST ACPI function on recent platforms or from +the FADT BIOS table on older ones). +The C1 state is not retrieved from ACPI tables. If the C1 state is entered, +the kernel will call the hlt instruction (or mwait on Intel). + +"intel_idle" cpuidle driver + +In kernel 2.6.36 the intel_idle driver was introduced. +It only serves recent Intel CPUs (Nehalem, Westmere, Sandybridge, Atoms or +newer). On older Intel CPUs the acpi_idle driver is still used (if the BIOS +provides C\-state ACPI tables). +The intel_idle driver knows the sleep state capabilities of the processor and +ignores ACPI BIOS exported processor sleep states tables. + +.SH "REMARKS" +.LP +By default only values of core zero are displayed. How to display settings of +other cores is described in the cpupower(1) manpage in the \-\-cpu option +section. +.SH REFERENCES +http://www.acpi.info/spec.htm +.SH "FILES" +.nf +\fI/sys/devices/system/cpu/cpu*/cpuidle/state*\fP +\fI/sys/devices/system/cpu/cpuidle/*\fP +.fi +.SH "AUTHORS" +.nf +Thomas Renninger +.fi +.SH "SEE ALSO" +.LP +cpupower(1), cpupower\-monitor(1), cpupower\-info(1), cpupower\-set(1) -- cgit v1.2.2 From e03bd1aa00cff10d8a833442456a07b17dae32b8 Mon Sep 17 00:00:00 2001 From: Thomas Renninger Date: Fri, 16 Dec 2011 15:35:52 +0100 Subject: cpupower: Unify cpupower-frequency-* manpages cpupower-frequency-* manpages slightly differed from the others. - Use uppercase letters in the title - Show cpupower Manual in the header - Remove Mattia from left down corner of the manpage, he is already listed as author - Remove --help, prints this message -> not needed Signed-off-by: Thomas Renninger Signed-off-by: Dominik Brodowski --- tools/power/cpupower/man/cpupower-frequency-info.1 | 4 +--- tools/power/cpupower/man/cpupower-frequency-set.1 | 4 +--- 2 files changed, 2 insertions(+), 6 deletions(-) (limited to 'tools') diff --git a/tools/power/cpupower/man/cpupower-frequency-info.1 b/tools/power/cpupower/man/cpupower-frequency-info.1 index bb60a8d1e45..4a1918ea8f9 100644 --- a/tools/power/cpupower/man/cpupower-frequency-info.1 +++ b/tools/power/cpupower/man/cpupower-frequency-info.1 @@ -1,4 +1,4 @@ -.TH "cpupower-frequency-info" "1" "0.1" "Mattia Dongili" "" +.TH "CPUPOWER\-FREQUENCY\-INFO" "1" "0.1" "" "cpupower Manual" .SH "NAME" .LP cpupower frequency\-info \- Utility to retrieve cpufreq kernel information @@ -50,8 +50,6 @@ Prints out information like provided by the /proc/cpufreq interface in 2.4. and \fB\-m\fR \fB\-\-human\fR human\-readable output for the \-f, \-w, \-s and \-y parameters. .TP -\fB\-h\fR \fB\-\-help\fR -Prints out the help screen. .SH "REMARKS" .LP By default only values of core zero are displayed. How to display settings of diff --git a/tools/power/cpupower/man/cpupower-frequency-set.1 b/tools/power/cpupower/man/cpupower-frequency-set.1 index 685f469093a..3eacc8d03d1 100644 --- a/tools/power/cpupower/man/cpupower-frequency-set.1 +++ b/tools/power/cpupower/man/cpupower-frequency-set.1 @@ -1,4 +1,4 @@ -.TH "cpupower-freqency-set" "1" "0.1" "Mattia Dongili" "" +.TH "CPUPOWER\-FREQUENCY\-SET" "1" "0.1" "" "cpupower Manual" .SH "NAME" .LP cpupower frequency\-set \- A small tool which allows to modify cpufreq settings. @@ -26,8 +26,6 @@ specific frequency to be set. Requires userspace governor to be available and lo \fB\-r\fR \fB\-\-related\fR modify all hardware-related CPUs at the same time .TP -\fB\-h\fR \fB\-\-help\fR -Prints out the help screen. .SH "REMARKS" .LP By default values are applied on all cores. How to modify single core -- cgit v1.2.2 From 0b37ee65e5d5e626b50852718ed72979160f7be0 Mon Sep 17 00:00:00 2001 From: Thomas Renninger Date: Fri, 16 Dec 2011 15:35:53 +0100 Subject: cpupower: Fix number of idle states The number of idle states was wrong. The POLL idle state (on X86) was missed out: Number of idle states: 4 Available idle states: C1-NHM C3-NHM C6-NHM While the POLL is not a real idle state, its statistics should still be shown. It's now also explained in a detailed manpage. This should fix a bug of missing the first idle state on other archs. Signed-off-by: Thomas Renninger Signed-off-by: Dominik Brodowski --- tools/power/cpupower/utils/cpuidle-info.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/power/cpupower/utils/cpuidle-info.c b/tools/power/cpupower/utils/cpuidle-info.c index b028267c137..e076bebde1c 100644 --- a/tools/power/cpupower/utils/cpuidle-info.c +++ b/tools/power/cpupower/utils/cpuidle-info.c @@ -43,9 +43,8 @@ static void cpuidle_cpu_output(unsigned int cpu, int verbose) } printf(_("Number of idle states: %d\n"), idlestates); - printf(_("Available idle states:")); - for (idlestate = 1; idlestate < idlestates; idlestate++) { + for (idlestate = 0; idlestate < idlestates; idlestate++) { tmp = sysfs_get_idlestate_name(cpu, idlestate); if (!tmp) continue; @@ -57,7 +56,7 @@ static void cpuidle_cpu_output(unsigned int cpu, int verbose) if (!verbose) return; - for (idlestate = 1; idlestate < idlestates; idlestate++) { + for (idlestate = 0; idlestate < idlestates; idlestate++) { tmp = sysfs_get_idlestate_name(cpu, idlestate); if (!tmp) continue; -- cgit v1.2.2 From e0c6082daeb982df70f00bbb122fed66f7f57c3e Mon Sep 17 00:00:00 2001 From: Thomas Renninger Date: Fri, 16 Dec 2011 15:35:54 +0100 Subject: cpupower: Remove unneeded code and by that fix a memleak Looks like some not needed debug code slipped in. Also this code: tmp = sysfs_get_idlestate_name(cpu, idlestates - 1); performs a strdup and the mem was not freed again. -> delete it. Signed-off-by: Thomas Renninger Signed-off-by: Dominik Brodowski --- tools/power/cpupower/utils/cpuidle-info.c | 7 ------- 1 file changed, 7 deletions(-) (limited to 'tools') diff --git a/tools/power/cpupower/utils/cpuidle-info.c b/tools/power/cpupower/utils/cpuidle-info.c index e076bebde1c..8145af5f93a 100644 --- a/tools/power/cpupower/utils/cpuidle-info.c +++ b/tools/power/cpupower/utils/cpuidle-info.c @@ -35,13 +35,6 @@ static void cpuidle_cpu_output(unsigned int cpu, int verbose) printf(_("CPU %u: Can't read idle state info\n"), cpu); return; } - tmp = sysfs_get_idlestate_name(cpu, idlestates - 1); - if (!tmp) { - printf(_("Could not determine max idle state %u\n"), - idlestates - 1); - return; - } - printf(_("Number of idle states: %d\n"), idlestates); printf(_("Available idle states:")); for (idlestate = 0; idlestate < idlestates; idlestate++) { -- cgit v1.2.2 From f2a00bb31cef363199c0f5870e9cd386ea3f5919 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ozan=20=C3=87a=C4=9Flayan?= Date: Thu, 19 Jan 2012 16:06:36 +0200 Subject: cpupower: Fix linking with --as-needed MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix linking order to avoid undefined reference errors when using --as-needed linker flag. Signed-off-by: Ozan Çağlayan Signed-off-by: Dominik Brodowski --- tools/power/cpupower/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/power/cpupower/Makefile b/tools/power/cpupower/Makefile index e8a03aceceb..f01b3f5e312 100644 --- a/tools/power/cpupower/Makefile +++ b/tools/power/cpupower/Makefile @@ -193,7 +193,7 @@ $(UTIL_OBJS): $(UTIL_HEADERS) cpupower: $(UTIL_OBJS) libcpupower.so.$(LIB_MAJ) $(ECHO) " CC " $@ - $(QUIET) $(CC) $(CFLAGS) $(LDFLAGS) -lcpupower -lrt -lpci -L. -o $@ $(UTIL_OBJS) + $(QUIET) $(CC) $(CFLAGS) $(LDFLAGS) $(UTIL_OBJS) -lcpupower -lrt -lpci -L. -o $@ $(QUIET) $(STRIPCMD) $@ po/$(PACKAGE).pot: $(UTIL_SRC) -- cgit v1.2.2 From cf0213fdff9c0e9392fcde925113ed965cbd1f7e Mon Sep 17 00:00:00 2001 From: Franck Bui-Huu Date: Wed, 1 Feb 2012 12:08:18 +0100 Subject: cpupower tool: remove use of undefined variables from the clean target of the top makefile UTIL_BINS and IDLE_OBJS variables are not defined at all, so there's no need to remove their content from the 'clean' target. Signed-off-by: Franck Bui-Huu Signed-off-by: Dominik Brodowski --- tools/power/cpupower/Makefile | 2 -- 1 file changed, 2 deletions(-) (limited to 'tools') diff --git a/tools/power/cpupower/Makefile b/tools/power/cpupower/Makefile index f01b3f5e312..eb7f44844d5 100644 --- a/tools/power/cpupower/Makefile +++ b/tools/power/cpupower/Makefile @@ -228,8 +228,6 @@ compile-bench: libcpupower.so.$(LIB_MAJ) clean: -find . \( -not -type d \) -and \( -name '*~' -o -name '*.[oas]' \) -type f -print \ | xargs rm -f - -rm -f $(UTIL_BINS) - -rm -f $(IDLE_OBJS) -rm -f cpupower -rm -f libcpupower.so* -rm -rf po/*.gmo po/*.pot -- cgit v1.2.2 From 3827150458cfefe1fa1e536617c906367795add2 Mon Sep 17 00:00:00 2001 From: Franck Bui-Huu Date: Wed, 1 Feb 2012 12:08:19 +0100 Subject: cpupower tool: makefile: simplify the recipe used to generate cpupower.pot target Use the '-p' and '-o' switches to specify the pathname of the output file to xgettext(1). This avoids to move manually the output file if xgettext(1) succeeds. Signed-off-by: Franck Bui-Huu Signed-off-by: Dominik Brodowski --- tools/power/cpupower/Makefile | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/power/cpupower/Makefile b/tools/power/cpupower/Makefile index eb7f44844d5..19526054d72 100644 --- a/tools/power/cpupower/Makefile +++ b/tools/power/cpupower/Makefile @@ -199,9 +199,7 @@ cpupower: $(UTIL_OBJS) libcpupower.so.$(LIB_MAJ) po/$(PACKAGE).pot: $(UTIL_SRC) $(ECHO) " GETTEXT " $@ $(QUIET) xgettext --default-domain=$(PACKAGE) --add-comments \ - --keyword=_ --keyword=N_ $(UTIL_SRC) && \ - test -f $(PACKAGE).po && \ - mv -f $(PACKAGE).po po/$(PACKAGE).pot + --keyword=_ --keyword=N_ $(UTIL_SRC) -p $(@D) -o $(@F) po/%.gmo: po/%.po $(ECHO) " MSGFMT " $@ -- cgit v1.2.2 From 68bb2c3a145c9ccac9e695baffefde0c3b303cba Mon Sep 17 00:00:00 2001 From: Franck Bui-Huu Date: Wed, 1 Feb 2012 12:08:20 +0100 Subject: cpupower tool: allow to build in a separate directory This patch allows cpupower tool to generate its output files in a seperate directory. This is now possible by passing the 'O=' to the command line. This can be usefull for a normal user if the kernel source code is located in a read only location. This is patch stole some bits of the perf makefile. [linux@dominikbrodowski.net: fix commit message] Signed-off-by: Franck Bui-Huu Signed-off-by: Dominik Brodowski --- tools/power/cpupower/Makefile | 87 ++++++++++++++++++++++++------------- tools/power/cpupower/bench/Makefile | 23 ++++++---- 2 files changed, 71 insertions(+), 39 deletions(-) (limited to 'tools') diff --git a/tools/power/cpupower/Makefile b/tools/power/cpupower/Makefile index 19526054d72..a93e06cfcc2 100644 --- a/tools/power/cpupower/Makefile +++ b/tools/power/cpupower/Makefile @@ -19,6 +19,16 @@ # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # +OUTPUT=./ +ifeq ("$(origin O)", "command line") + OUTPUT := $(O)/ +endif + +ifneq ($(OUTPUT),) +# check that the output directory actually exists +OUTDIR := $(shell cd $(OUTPUT) && /bin/pwd) +$(if $(OUTDIR),, $(error output directory "$(OUTPUT)" does not exist)) +endif # --- CONFIGURATION BEGIN --- @@ -87,6 +97,7 @@ AR = $(CROSS)ar STRIP = $(CROSS)strip RANLIB = $(CROSS)ranlib HOSTCC = gcc +MKDIR = mkdir # Now we set up the build system @@ -95,7 +106,7 @@ HOSTCC = gcc # set up PWD so that older versions of make will work with our build. PWD = $(shell pwd) -GMO_FILES = ${shell for HLANG in ${LANGUAGES}; do echo po/$$HLANG.gmo; done;} +GMO_FILES = ${shell for HLANG in ${LANGUAGES}; do echo $(OUTPUT)po/$$HLANG.gmo; done;} export CROSS CC AR STRIP RANLIB CFLAGS LDFLAGS LIB_OBJS @@ -122,15 +133,18 @@ UTIL_OBJS = utils/helpers/amd.o utils/helpers/topology.o utils/helpers/msr.o \ utils/cpupower.o utils/cpufreq-info.o utils/cpufreq-set.o \ utils/cpupower-set.o utils/cpupower-info.o utils/cpuidle-info.o +UTIL_SRC := $(UTIL_OBJS:.o=.c) + +UTIL_OBJS := $(addprefix $(OUTPUT),$(UTIL_OBJS)) + UTIL_HEADERS = utils/helpers/helpers.h utils/idle_monitor/cpupower-monitor.h \ utils/helpers/bitmask.h \ utils/idle_monitor/idle_monitors.h utils/idle_monitor/idle_monitors.def -UTIL_SRC := $(UTIL_OBJS:.o=.c) - LIB_HEADERS = lib/cpufreq.h lib/sysfs.h LIB_SRC = lib/cpufreq.c lib/sysfs.c LIB_OBJS = lib/cpufreq.o lib/sysfs.o +LIB_OBJS := $(addprefix $(OUTPUT),$(LIB_OBJS)) CFLAGS += -pipe @@ -168,79 +182,90 @@ endif # the actual make rules -all: libcpupower cpupower $(COMPILE_NLS) $(COMPILE_BENCH) +all: libcpupower $(OUTPUT)cpupower $(COMPILE_NLS) $(COMPILE_BENCH) -lib/%.o: $(LIB_SRC) $(LIB_HEADERS) +$(OUTPUT)lib/%.o: $(LIB_SRC) $(LIB_HEADERS) $(ECHO) " CC " $@ $(QUIET) $(CC) $(CFLAGS) -fPIC -o $@ -c lib/$*.c -libcpupower.so.$(LIB_MAJ): $(LIB_OBJS) +$(OUTPUT)libcpupower.so.$(LIB_MAJ): $(LIB_OBJS) $(ECHO) " LD " $@ $(QUIET) $(CC) -shared $(CFLAGS) $(LDFLAGS) -o $@ \ -Wl,-soname,libcpupower.so.$(LIB_MIN) $(LIB_OBJS) - @ln -sf $@ libcpupower.so - @ln -sf $@ libcpupower.so.$(LIB_MIN) + @ln -sf $(@F) $(OUTPUT)libcpupower.so + @ln -sf $(@F) $(OUTPUT)libcpupower.so.$(LIB_MIN) -libcpupower: libcpupower.so.$(LIB_MAJ) +libcpupower: $(OUTPUT)libcpupower.so.$(LIB_MAJ) # Let all .o files depend on its .c file and all headers # Might be worth to put this into utils/Makefile at some point of time $(UTIL_OBJS): $(UTIL_HEADERS) -.c.o: +$(OUTPUT)%.o: %.c $(ECHO) " CC " $@ $(QUIET) $(CC) $(CFLAGS) -I./lib -I ./utils -o $@ -c $*.c -cpupower: $(UTIL_OBJS) libcpupower.so.$(LIB_MAJ) +$(OUTPUT)cpupower: $(UTIL_OBJS) $(OUTPUT)libcpupower.so.$(LIB_MAJ) $(ECHO) " CC " $@ - $(QUIET) $(CC) $(CFLAGS) $(LDFLAGS) $(UTIL_OBJS) -lcpupower -lrt -lpci -L. -o $@ + $(QUIET) $(CC) $(CFLAGS) $(LDFLAGS) $(UTIL_OBJS) -lcpupower -lrt -lpci -L$(OUTPUT) -o $@ $(QUIET) $(STRIPCMD) $@ -po/$(PACKAGE).pot: $(UTIL_SRC) +$(OUTPUT)po/$(PACKAGE).pot: $(UTIL_SRC) $(ECHO) " GETTEXT " $@ $(QUIET) xgettext --default-domain=$(PACKAGE) --add-comments \ --keyword=_ --keyword=N_ $(UTIL_SRC) -p $(@D) -o $(@F) -po/%.gmo: po/%.po +$(OUTPUT)po/%.gmo: po/%.po $(ECHO) " MSGFMT " $@ $(QUIET) msgfmt -o $@ po/$*.po create-gmo: ${GMO_FILES} -update-po: po/$(PACKAGE).pot +update-po: $(OUTPUT)po/$(PACKAGE).pot $(ECHO) " MSGMRG " $@ $(QUIET) @for HLANG in $(LANGUAGES); do \ echo -n "Updating $$HLANG "; \ - if msgmerge po/$$HLANG.po po/$(PACKAGE).pot -o \ - po/$$HLANG.new.po; then \ - mv -f po/$$HLANG.new.po po/$$HLANG.po; \ + if msgmerge po/$$HLANG.po $< -o \ + $(OUTPUT)po/$$HLANG.new.po; then \ + mv -f $(OUTPUT)po/$$HLANG.new.po $(OUTPUT)po/$$HLANG.po; \ else \ echo "msgmerge for $$HLANG failed!"; \ - rm -f po/$$HLANG.new.po; \ + rm -f $(OUTPUT)po/$$HLANG.new.po; \ fi; \ done; -compile-bench: libcpupower.so.$(LIB_MAJ) - @V=$(V) confdir=$(confdir) $(MAKE) -C bench +compile-bench: $(OUTPUT)libcpupower.so.$(LIB_MAJ) + @V=$(V) confdir=$(confdir) $(MAKE) -C bench O=$(OUTPUT) + +# we compile into subdirectories. if the target directory is not the +# source directory, they might not exists. So we depend the various +# files onto their directories. +DIRECTORY_DEPS = $(LIB_OBJS) $(UTIL_OBJS) $(GMO_FILES) +$(DIRECTORY_DEPS): | $(sort $(dir $(DIRECTORY_DEPS))) + +# In the second step, we make a rule to actually create these directories +$(sort $(dir $(DIRECTORY_DEPS))): + $(ECHO) " MKDIR " $@ + $(QUIET) $(MKDIR) -p $@ 2>/dev/null clean: - -find . \( -not -type d \) -and \( -name '*~' -o -name '*.[oas]' \) -type f -print \ + -find $(OUTPUT) \( -not -type d \) -and \( -name '*~' -o -name '*.[oas]' \) -type f -print \ | xargs rm -f - -rm -f cpupower - -rm -f libcpupower.so* - -rm -rf po/*.gmo po/*.pot - $(MAKE) -C bench clean + -rm -f $(OUTPUT)cpupower + -rm -f $(OUTPUT)libcpupower.so* + -rm -rf $(OUTPUT)po/*.{gmo,pot} + $(MAKE) -C bench O=$(OUTPUT) clean install-lib: $(INSTALL) -d $(DESTDIR)${libdir} - $(CP) libcpupower.so* $(DESTDIR)${libdir}/ + $(CP) $(OUTPUT)libcpupower.so* $(DESTDIR)${libdir}/ $(INSTALL) -d $(DESTDIR)${includedir} $(INSTALL_DATA) lib/cpufreq.h $(DESTDIR)${includedir}/cpufreq.h install-tools: $(INSTALL) -d $(DESTDIR)${bindir} - $(INSTALL_PROGRAM) cpupower $(DESTDIR)${bindir} + $(INSTALL_PROGRAM) $(OUTPUT)cpupower $(DESTDIR)${bindir} install-man: $(INSTALL_DATA) -D man/cpupower.1 $(DESTDIR)${mandir}/man1/cpupower.1 @@ -253,13 +278,13 @@ install-man: install-gmo: $(INSTALL) -d $(DESTDIR)${localedir} for HLANG in $(LANGUAGES); do \ - echo '$(INSTALL_DATA) -D po/$$HLANG.gmo $(DESTDIR)${localedir}/$$HLANG/LC_MESSAGES/cpupower.mo'; \ - $(INSTALL_DATA) -D po/$$HLANG.gmo $(DESTDIR)${localedir}/$$HLANG/LC_MESSAGES/cpupower.mo; \ + echo '$(INSTALL_DATA) -D $(OUTPUT)po/$$HLANG.gmo $(DESTDIR)${localedir}/$$HLANG/LC_MESSAGES/cpupower.mo'; \ + $(INSTALL_DATA) -D $(OUTPUT)po/$$HLANG.gmo $(DESTDIR)${localedir}/$$HLANG/LC_MESSAGES/cpupower.mo; \ done; install-bench: @#DESTDIR must be set from outside to survive - @sbindir=$(sbindir) bindir=$(bindir) docdir=$(docdir) confdir=$(confdir) $(MAKE) -C bench install + @sbindir=$(sbindir) bindir=$(bindir) docdir=$(docdir) confdir=$(confdir) $(MAKE) -C bench O=$(OUTPUT) install install: all install-lib install-tools install-man $(INSTALL_NLS) $(INSTALL_BENCH) diff --git a/tools/power/cpupower/bench/Makefile b/tools/power/cpupower/bench/Makefile index 2b67606fc3e..7ec7021a29c 100644 --- a/tools/power/cpupower/bench/Makefile +++ b/tools/power/cpupower/bench/Makefile @@ -1,29 +1,36 @@ -LIBS = -L../ -lm -lcpupower +OUTPUT := ./ +ifeq ("$(origin O)", "command line") +ifneq ($(O),) + OUTPUT := $(O)/ +endif +endif -OBJS = main.o parse.o system.o benchmark.o +LIBS = -L../ -L$(OUTPUT) -lm -lcpupower + +OBJS = $(OUTPUT)main.o $(OUTPUT)parse.o $(OUTPUT)system.o $(OUTPUT)benchmark.o CFLAGS += -D_GNU_SOURCE -I../lib -DDEFAULT_CONFIG_FILE=\"$(confdir)/cpufreq-bench.conf\" -%.o : %.c +$(OUTPUT)%.o : %.c $(ECHO) " CC " $@ $(QUIET) $(CC) -c $(CFLAGS) $< -o $@ -cpufreq-bench: $(OBJS) +$(OUTPUT)cpufreq-bench: $(OBJS) $(ECHO) " CC " $@ $(QUIET) $(CC) -o $@ $(CFLAGS) $(OBJS) $(LIBS) -all: cpufreq-bench +all: $(OUTPUT)cpufreq-bench install: mkdir -p $(DESTDIR)/$(sbindir) mkdir -p $(DESTDIR)/$(bindir) mkdir -p $(DESTDIR)/$(docdir) mkdir -p $(DESTDIR)/$(confdir) - install -m 755 cpufreq-bench $(DESTDIR)/$(sbindir)/cpufreq-bench + install -m 755 $(OUTPUT)cpufreq-bench $(DESTDIR)/$(sbindir)/cpufreq-bench install -m 755 cpufreq-bench_plot.sh $(DESTDIR)/$(bindir)/cpufreq-bench_plot.sh install -m 644 README-BENCH $(DESTDIR)/$(docdir)/README-BENCH install -m 755 cpufreq-bench_script.sh $(DESTDIR)/$(docdir)/cpufreq-bench_script.sh install -m 644 example.cfg $(DESTDIR)/$(confdir)/cpufreq-bench.conf clean: - rm -f *.o - rm -f cpufreq-bench + rm -f $(OUTPUT)*.o + rm -f $(OUTPUT)cpufreq-bench -- cgit v1.2.2 From 62d5a67d65d69d1c8ba6d2638193bc2864c14fdf Mon Sep 17 00:00:00 2001 From: Thomas Renninger Date: Wed, 8 Feb 2012 02:05:10 +0100 Subject: cpupower: Fix broken mask values Signed-off-by: Thomas Renninger Tested-by: Dave Jones Signed-off-by: Dominik Brodowski --- tools/power/cpupower/utils/helpers/helpers.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/power/cpupower/utils/helpers/helpers.h b/tools/power/cpupower/utils/helpers/helpers.h index 6280daa4e24..2eb584cf2f5 100644 --- a/tools/power/cpupower/utils/helpers/helpers.h +++ b/tools/power/cpupower/utils/helpers/helpers.h @@ -66,8 +66,8 @@ enum cpupower_cpu_vendor {X86_VENDOR_UNKNOWN = 0, X86_VENDOR_INTEL, #define CPUPOWER_CAP_AMD_CBP 0x00000004 #define CPUPOWER_CAP_PERF_BIAS 0x00000008 #define CPUPOWER_CAP_HAS_TURBO_RATIO 0x00000010 -#define CPUPOWER_CAP_IS_SNB 0x00000011 -#define CPUPOWER_CAP_INTEL_IDA 0x00000012 +#define CPUPOWER_CAP_IS_SNB 0x00000020 +#define CPUPOWER_CAP_INTEL_IDA 0x00000040 #define MAX_HW_PSTATES 10 -- cgit v1.2.2 From 7490ca1ea5af18bd0ceb3b96ddb2f10b09e08b5d Mon Sep 17 00:00:00 2001 From: Franck Bui-Huu Date: Tue, 7 Feb 2012 17:15:56 +0100 Subject: cpupower tools: allow to build debug tools in a separate directory too Signed-off-by: Franck Bui-Huu Signed-off-by: Dominik Brodowski --- tools/power/cpupower/debug/i386/Makefile | 26 ++++++++++++++++---------- tools/power/cpupower/debug/x86_64/Makefile | 14 ++++++++++---- 2 files changed, 26 insertions(+), 14 deletions(-) (limited to 'tools') diff --git a/tools/power/cpupower/debug/i386/Makefile b/tools/power/cpupower/debug/i386/Makefile index d08cc1ead9b..626478b8a55 100644 --- a/tools/power/cpupower/debug/i386/Makefile +++ b/tools/power/cpupower/debug/i386/Makefile @@ -1,20 +1,26 @@ +OUTPUT=./ +ifeq ("$(origin O)", "command line") + OUTPUT := $(O)/ +endif + + default: all -centrino-decode: centrino-decode.c - $(CC) $(CFLAGS) -o centrino-decode centrino-decode.c +$(OUTPUT)centrino-decode: centrino-decode.c + $(CC) $(CFLAGS) -o $@ centrino-decode.c -dump_psb: dump_psb.c - $(CC) $(CFLAGS) -o dump_psb dump_psb.c +$(OUTPUT)dump_psb: dump_psb.c + $(CC) $(CFLAGS) -o $@ dump_psb.c -intel_gsic: intel_gsic.c - $(CC) $(CFLAGS) -o intel_gsic -llrmi intel_gsic.c +$(OUTPUT)intel_gsic: intel_gsic.c + $(CC) $(CFLAGS) -o $@ -llrmi intel_gsic.c -powernow-k8-decode: powernow-k8-decode.c - $(CC) $(CFLAGS) -o powernow-k8-decode powernow-k8-decode.c +$(OUTPUT)powernow-k8-decode: powernow-k8-decode.c + $(CC) $(CFLAGS) -o $@ powernow-k8-decode.c -all: centrino-decode dump_psb intel_gsic powernow-k8-decode +all: $(OUTPUT)centrino-decode $(OUTPUT)dump_psb $(OUTPUT)intel_gsic $(OUTPUT)powernow-k8-decode clean: - rm -rf centrino-decode dump_psb intel_gsic powernow-k8-decode + rm -rf $(OUTPUT){centrino-decode,dump_psb,intel_gsic,powernow-k8-decode} .PHONY: all default clean diff --git a/tools/power/cpupower/debug/x86_64/Makefile b/tools/power/cpupower/debug/x86_64/Makefile index 3326217dd31..8e26d67525c 100644 --- a/tools/power/cpupower/debug/x86_64/Makefile +++ b/tools/power/cpupower/debug/x86_64/Makefile @@ -1,14 +1,20 @@ +OUTPUT=./ +ifeq ("$(origin O)", "command line") + OUTPUT := $(O)/ +endif + + default: all -centrino-decode: ../i386/centrino-decode.c +$(OUTPUT)centrino-decode: ../i386/centrino-decode.c $(CC) $(CFLAGS) -o $@ $< -powernow-k8-decode: ../i386/powernow-k8-decode.c +$(OUTPUT)powernow-k8-decode: ../i386/powernow-k8-decode.c $(CC) $(CFLAGS) -o $@ $< -all: centrino-decode powernow-k8-decode +all: $(OUTPUT)centrino-decode $(OUTPUT)powernow-k8-decode clean: - rm -rf centrino-decode powernow-k8-decode + rm -rf $(OUTPUT)centrino-decode $(OUTPUT)powernow-k8-decode .PHONY: all default clean -- cgit v1.2.2 From f16603386b38c28979f4df1cafdc2fe73fa87d37 Mon Sep 17 00:00:00 2001 From: Franck Bui-Huu Date: Tue, 7 Feb 2012 17:15:57 +0100 Subject: cpupower tools: add install target to the debug tools' makefiles Signed-off-by: Franck Bui-Huu Signed-off-by: Dominik Brodowski --- tools/power/cpupower/debug/i386/Makefile | 14 +++++++++++++- tools/power/cpupower/debug/x86_64/Makefile | 12 +++++++++++- 2 files changed, 24 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/power/cpupower/debug/i386/Makefile b/tools/power/cpupower/debug/i386/Makefile index 626478b8a55..3ba158f0e28 100644 --- a/tools/power/cpupower/debug/i386/Makefile +++ b/tools/power/cpupower/debug/i386/Makefile @@ -3,6 +3,11 @@ ifeq ("$(origin O)", "command line") OUTPUT := $(O)/ endif +DESTDIR = +bindir = /usr/bin + +INSTALL = /usr/bin/install + default: all @@ -23,4 +28,11 @@ all: $(OUTPUT)centrino-decode $(OUTPUT)dump_psb $(OUTPUT)intel_gsic $(OUTPUT)pow clean: rm -rf $(OUTPUT){centrino-decode,dump_psb,intel_gsic,powernow-k8-decode} -.PHONY: all default clean +install: + $(INSTALL) -d $(DESTDIR)${bindir} + $(INSTALL) $(OUTPUT)centrino-decode $(DESTDIR)${bindir} + $(INSTALL) $(OUTPUT)powernow-k8-decode $(DESTDIR)${bindir} + $(INSTALL) $(OUTPUT)dump_psb $(DESTDIR)${bindir} + $(INSTALL) $(OUTPUT)intel_gsic $(DESTDIR)${bindir} + +.PHONY: all default clean install diff --git a/tools/power/cpupower/debug/x86_64/Makefile b/tools/power/cpupower/debug/x86_64/Makefile index 8e26d67525c..1c521452671 100644 --- a/tools/power/cpupower/debug/x86_64/Makefile +++ b/tools/power/cpupower/debug/x86_64/Makefile @@ -3,6 +3,11 @@ ifeq ("$(origin O)", "command line") OUTPUT := $(O)/ endif +DESTDIR = +bindir = /usr/bin + +INSTALL = /usr/bin/install + default: all @@ -17,4 +22,9 @@ all: $(OUTPUT)centrino-decode $(OUTPUT)powernow-k8-decode clean: rm -rf $(OUTPUT)centrino-decode $(OUTPUT)powernow-k8-decode -.PHONY: all default clean +install: + $(INSTALL) -d $(DESTDIR)${bindir} + $(INSTALL) $(OUTPUT)centrino-decode $(DESTDIR)${bindir} + $(INSTALL) $(OUTPUT)powernow-k8-decode $(DESTDIR)${bindir} + +.PHONY: all default clean install -- cgit v1.2.2 From f467f7140339355978994ffcc23d569e7b4cea4d Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 28 Mar 2012 14:42:54 -0700 Subject: selftests: launch individual selftests from the main Makefile Remove the run_tests script and launch the selftests by calling "make run_tests" from the selftests top directory instead. This delegates to the Makefile in each selftest directory, where it is decided how to launch the local test. This removes the need to add each selftest directory to the now removed "run_tests" top script. Signed-off-by: Frederic Weisbecker Cc: Dave Young Cc: Christoph Lameter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- tools/testing/selftests/Makefile | 5 +++++ tools/testing/selftests/breakpoints/Makefile | 7 +++++-- tools/testing/selftests/run_tests | 8 -------- 3 files changed, 10 insertions(+), 10 deletions(-) delete mode 100644 tools/testing/selftests/run_tests (limited to 'tools') diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index 4ec84018cc1..b1119f0db51 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile @@ -5,6 +5,11 @@ all: make -C $$TARGET; \ done; +run_tests: + for TARGET in $(TARGETS); do \ + make -C $$TARGET run_tests; \ + done; + clean: for TARGET in $(TARGETS); do \ make -C $$TARGET clean; \ diff --git a/tools/testing/selftests/breakpoints/Makefile b/tools/testing/selftests/breakpoints/Makefile index f362722cdce..931278035f5 100644 --- a/tools/testing/selftests/breakpoints/Makefile +++ b/tools/testing/selftests/breakpoints/Makefile @@ -11,10 +11,13 @@ endif all: ifeq ($(ARCH),x86) - gcc breakpoint_test.c -o run_test + gcc breakpoint_test.c -o breakpoint_test else echo "Not an x86 target, can't build breakpoints selftests" endif +run_tests: + ./breakpoint_test + clean: - rm -fr run_test + rm -fr breakpoint_test diff --git a/tools/testing/selftests/run_tests b/tools/testing/selftests/run_tests deleted file mode 100644 index 320718a4e6b..00000000000 --- a/tools/testing/selftests/run_tests +++ /dev/null @@ -1,8 +0,0 @@ -#!/bin/bash - -TARGETS=breakpoints - -for TARGET in $TARGETS -do - $TARGET/run_test -done -- cgit v1.2.2 From cab6b0560080c6da5107c5d7dbba6372f7b288ab Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Wed, 28 Mar 2012 14:42:54 -0700 Subject: selftests/Makefile: make `run_tests' depend on `all' So a "make run_tests" will build the tests before trying to run them. Acked-by: Frederic Weisbecker Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- tools/testing/selftests/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index b1119f0db51..9203cd77fc3 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile @@ -5,7 +5,7 @@ all: make -C $$TARGET; \ done; -run_tests: +run_tests: all for TARGET in $(TARGETS); do \ make -C $$TARGET run_tests; \ done; -- cgit v1.2.2 From c6dd897f3bfc54a44942d742d6dfa842e33d88e0 Mon Sep 17 00:00:00 2001 From: Dave Young Date: Wed, 28 Mar 2012 14:42:55 -0700 Subject: mm: move page-types.c from Documentation to tools/vm tools/ is the better place for vm tools which are used by many people. Moving them to tools also make them open to more users instead of hide in Documentation folder. This patch moves page-types.c to tools/vm/page-types.c. Also add a Makefile in tools/vm and fix two coding style problems: a) change const arrary to 'const char * const', b) change a space to tab for indent. Signed-off-by: Dave Young Acked-by: Wu Fengguang Cc: Christoph Lameter Cc: Pekka Enberg Cc: Frederic Weisbecker Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- tools/vm/Makefile | 11 + tools/vm/page-types.c | 1102 +++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 1113 insertions(+) create mode 100644 tools/vm/Makefile create mode 100644 tools/vm/page-types.c (limited to 'tools') diff --git a/tools/vm/Makefile b/tools/vm/Makefile new file mode 100644 index 00000000000..3823d4b1fa7 --- /dev/null +++ b/tools/vm/Makefile @@ -0,0 +1,11 @@ +# Makefile for vm tools + +CC = $(CROSS_COMPILE)gcc +CFLAGS = -Wall -Wextra + +all: page-types +%: %.c + $(CC) $(CFLAGS) -o $@ $^ + +clean: + $(RM) page-types diff --git a/tools/vm/page-types.c b/tools/vm/page-types.c new file mode 100644 index 00000000000..7dab7b25b5c --- /dev/null +++ b/tools/vm/page-types.c @@ -0,0 +1,1102 @@ +/* + * page-types: Tool for querying page flags + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; version 2. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should find a copy of v2 of the GNU General Public License somewhere on + * your Linux system; if not, write to the Free Software Foundation, Inc., 59 + * Temple Place, Suite 330, Boston, MA 02111-1307 USA. + * + * Copyright (C) 2009 Intel corporation + * + * Authors: Wu Fengguang + */ + +#define _LARGEFILE64_SOURCE +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "../../include/linux/magic.h" + + +#ifndef MAX_PATH +# define MAX_PATH 256 +#endif + +#ifndef STR +# define _STR(x) #x +# define STR(x) _STR(x) +#endif + +/* + * pagemap kernel ABI bits + */ + +#define PM_ENTRY_BYTES sizeof(uint64_t) +#define PM_STATUS_BITS 3 +#define PM_STATUS_OFFSET (64 - PM_STATUS_BITS) +#define PM_STATUS_MASK (((1LL << PM_STATUS_BITS) - 1) << PM_STATUS_OFFSET) +#define PM_STATUS(nr) (((nr) << PM_STATUS_OFFSET) & PM_STATUS_MASK) +#define PM_PSHIFT_BITS 6 +#define PM_PSHIFT_OFFSET (PM_STATUS_OFFSET - PM_PSHIFT_BITS) +#define PM_PSHIFT_MASK (((1LL << PM_PSHIFT_BITS) - 1) << PM_PSHIFT_OFFSET) +#define PM_PSHIFT(x) (((u64) (x) << PM_PSHIFT_OFFSET) & PM_PSHIFT_MASK) +#define PM_PFRAME_MASK ((1LL << PM_PSHIFT_OFFSET) - 1) +#define PM_PFRAME(x) ((x) & PM_PFRAME_MASK) + +#define PM_PRESENT PM_STATUS(4LL) +#define PM_SWAP PM_STATUS(2LL) + + +/* + * kernel page flags + */ + +#define KPF_BYTES 8 +#define PROC_KPAGEFLAGS "/proc/kpageflags" + +/* copied from kpageflags_read() */ +#define KPF_LOCKED 0 +#define KPF_ERROR 1 +#define KPF_REFERENCED 2 +#define KPF_UPTODATE 3 +#define KPF_DIRTY 4 +#define KPF_LRU 5 +#define KPF_ACTIVE 6 +#define KPF_SLAB 7 +#define KPF_WRITEBACK 8 +#define KPF_RECLAIM 9 +#define KPF_BUDDY 10 + +/* [11-20] new additions in 2.6.31 */ +#define KPF_MMAP 11 +#define KPF_ANON 12 +#define KPF_SWAPCACHE 13 +#define KPF_SWAPBACKED 14 +#define KPF_COMPOUND_HEAD 15 +#define KPF_COMPOUND_TAIL 16 +#define KPF_HUGE 17 +#define KPF_UNEVICTABLE 18 +#define KPF_HWPOISON 19 +#define KPF_NOPAGE 20 +#define KPF_KSM 21 +#define KPF_THP 22 + +/* [32-] kernel hacking assistances */ +#define KPF_RESERVED 32 +#define KPF_MLOCKED 33 +#define KPF_MAPPEDTODISK 34 +#define KPF_PRIVATE 35 +#define KPF_PRIVATE_2 36 +#define KPF_OWNER_PRIVATE 37 +#define KPF_ARCH 38 +#define KPF_UNCACHED 39 + +/* [48-] take some arbitrary free slots for expanding overloaded flags + * not part of kernel API + */ +#define KPF_READAHEAD 48 +#define KPF_SLOB_FREE 49 +#define KPF_SLUB_FROZEN 50 +#define KPF_SLUB_DEBUG 51 + +#define KPF_ALL_BITS ((uint64_t)~0ULL) +#define KPF_HACKERS_BITS (0xffffULL << 32) +#define KPF_OVERLOADED_BITS (0xffffULL << 48) +#define BIT(name) (1ULL << KPF_##name) +#define BITS_COMPOUND (BIT(COMPOUND_HEAD) | BIT(COMPOUND_TAIL)) + +static const char * const page_flag_names[] = { + [KPF_LOCKED] = "L:locked", + [KPF_ERROR] = "E:error", + [KPF_REFERENCED] = "R:referenced", + [KPF_UPTODATE] = "U:uptodate", + [KPF_DIRTY] = "D:dirty", + [KPF_LRU] = "l:lru", + [KPF_ACTIVE] = "A:active", + [KPF_SLAB] = "S:slab", + [KPF_WRITEBACK] = "W:writeback", + [KPF_RECLAIM] = "I:reclaim", + [KPF_BUDDY] = "B:buddy", + + [KPF_MMAP] = "M:mmap", + [KPF_ANON] = "a:anonymous", + [KPF_SWAPCACHE] = "s:swapcache", + [KPF_SWAPBACKED] = "b:swapbacked", + [KPF_COMPOUND_HEAD] = "H:compound_head", + [KPF_COMPOUND_TAIL] = "T:compound_tail", + [KPF_HUGE] = "G:huge", + [KPF_UNEVICTABLE] = "u:unevictable", + [KPF_HWPOISON] = "X:hwpoison", + [KPF_NOPAGE] = "n:nopage", + [KPF_KSM] = "x:ksm", + [KPF_THP] = "t:thp", + + [KPF_RESERVED] = "r:reserved", + [KPF_MLOCKED] = "m:mlocked", + [KPF_MAPPEDTODISK] = "d:mappedtodisk", + [KPF_PRIVATE] = "P:private", + [KPF_PRIVATE_2] = "p:private_2", + [KPF_OWNER_PRIVATE] = "O:owner_private", + [KPF_ARCH] = "h:arch", + [KPF_UNCACHED] = "c:uncached", + + [KPF_READAHEAD] = "I:readahead", + [KPF_SLOB_FREE] = "P:slob_free", + [KPF_SLUB_FROZEN] = "A:slub_frozen", + [KPF_SLUB_DEBUG] = "E:slub_debug", +}; + + +static const char * const debugfs_known_mountpoints[] = { + "/sys/kernel/debug", + "/debug", + 0, +}; + +/* + * data structures + */ + +static int opt_raw; /* for kernel developers */ +static int opt_list; /* list pages (in ranges) */ +static int opt_no_summary; /* don't show summary */ +static pid_t opt_pid; /* process to walk */ + +#define MAX_ADDR_RANGES 1024 +static int nr_addr_ranges; +static unsigned long opt_offset[MAX_ADDR_RANGES]; +static unsigned long opt_size[MAX_ADDR_RANGES]; + +#define MAX_VMAS 10240 +static int nr_vmas; +static unsigned long pg_start[MAX_VMAS]; +static unsigned long pg_end[MAX_VMAS]; + +#define MAX_BIT_FILTERS 64 +static int nr_bit_filters; +static uint64_t opt_mask[MAX_BIT_FILTERS]; +static uint64_t opt_bits[MAX_BIT_FILTERS]; + +static int page_size; + +static int pagemap_fd; +static int kpageflags_fd; + +static int opt_hwpoison; +static int opt_unpoison; + +static char hwpoison_debug_fs[MAX_PATH+1]; +static int hwpoison_inject_fd; +static int hwpoison_forget_fd; + +#define HASH_SHIFT 13 +#define HASH_SIZE (1 << HASH_SHIFT) +#define HASH_MASK (HASH_SIZE - 1) +#define HASH_KEY(flags) (flags & HASH_MASK) + +static unsigned long total_pages; +static unsigned long nr_pages[HASH_SIZE]; +static uint64_t page_flags[HASH_SIZE]; + + +/* + * helper functions + */ + +#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) + +#define min_t(type, x, y) ({ \ + type __min1 = (x); \ + type __min2 = (y); \ + __min1 < __min2 ? __min1 : __min2; }) + +#define max_t(type, x, y) ({ \ + type __max1 = (x); \ + type __max2 = (y); \ + __max1 > __max2 ? __max1 : __max2; }) + +static unsigned long pages2mb(unsigned long pages) +{ + return (pages * page_size) >> 20; +} + +static void fatal(const char *x, ...) +{ + va_list ap; + + va_start(ap, x); + vfprintf(stderr, x, ap); + va_end(ap); + exit(EXIT_FAILURE); +} + +static int checked_open(const char *pathname, int flags) +{ + int fd = open(pathname, flags); + + if (fd < 0) { + perror(pathname); + exit(EXIT_FAILURE); + } + + return fd; +} + +/* + * pagemap/kpageflags routines + */ + +static unsigned long do_u64_read(int fd, char *name, + uint64_t *buf, + unsigned long index, + unsigned long count) +{ + long bytes; + + if (index > ULONG_MAX / 8) + fatal("index overflow: %lu\n", index); + + if (lseek(fd, index * 8, SEEK_SET) < 0) { + perror(name); + exit(EXIT_FAILURE); + } + + bytes = read(fd, buf, count * 8); + if (bytes < 0) { + perror(name); + exit(EXIT_FAILURE); + } + if (bytes % 8) + fatal("partial read: %lu bytes\n", bytes); + + return bytes / 8; +} + +static unsigned long kpageflags_read(uint64_t *buf, + unsigned long index, + unsigned long pages) +{ + return do_u64_read(kpageflags_fd, PROC_KPAGEFLAGS, buf, index, pages); +} + +static unsigned long pagemap_read(uint64_t *buf, + unsigned long index, + unsigned long pages) +{ + return do_u64_read(pagemap_fd, "/proc/pid/pagemap", buf, index, pages); +} + +static unsigned long pagemap_pfn(uint64_t val) +{ + unsigned long pfn; + + if (val & PM_PRESENT) + pfn = PM_PFRAME(val); + else + pfn = 0; + + return pfn; +} + + +/* + * page flag names + */ + +static char *page_flag_name(uint64_t flags) +{ + static char buf[65]; + int present; + int i, j; + + for (i = 0, j = 0; i < ARRAY_SIZE(page_flag_names); i++) { + present = (flags >> i) & 1; + if (!page_flag_names[i]) { + if (present) + fatal("unknown flag bit %d\n", i); + continue; + } + buf[j++] = present ? page_flag_names[i][0] : '_'; + } + + return buf; +} + +static char *page_flag_longname(uint64_t flags) +{ + static char buf[1024]; + int i, n; + + for (i = 0, n = 0; i < ARRAY_SIZE(page_flag_names); i++) { + if (!page_flag_names[i]) + continue; + if ((flags >> i) & 1) + n += snprintf(buf + n, sizeof(buf) - n, "%s,", + page_flag_names[i] + 2); + } + if (n) + n--; + buf[n] = '\0'; + + return buf; +} + + +/* + * page list and summary + */ + +static void show_page_range(unsigned long voffset, + unsigned long offset, uint64_t flags) +{ + static uint64_t flags0; + static unsigned long voff; + static unsigned long index; + static unsigned long count; + + if (flags == flags0 && offset == index + count && + (!opt_pid || voffset == voff + count)) { + count++; + return; + } + + if (count) { + if (opt_pid) + printf("%lx\t", voff); + printf("%lx\t%lx\t%s\n", + index, count, page_flag_name(flags0)); + } + + flags0 = flags; + index = offset; + voff = voffset; + count = 1; +} + +static void show_page(unsigned long voffset, + unsigned long offset, uint64_t flags) +{ + if (opt_pid) + printf("%lx\t", voffset); + printf("%lx\t%s\n", offset, page_flag_name(flags)); +} + +static void show_summary(void) +{ + int i; + + printf(" flags\tpage-count MB" + " symbolic-flags\t\t\tlong-symbolic-flags\n"); + + for (i = 0; i < ARRAY_SIZE(nr_pages); i++) { + if (nr_pages[i]) + printf("0x%016llx\t%10lu %8lu %s\t%s\n", + (unsigned long long)page_flags[i], + nr_pages[i], + pages2mb(nr_pages[i]), + page_flag_name(page_flags[i]), + page_flag_longname(page_flags[i])); + } + + printf(" total\t%10lu %8lu\n", + total_pages, pages2mb(total_pages)); +} + + +/* + * page flag filters + */ + +static int bit_mask_ok(uint64_t flags) +{ + int i; + + for (i = 0; i < nr_bit_filters; i++) { + if (opt_bits[i] == KPF_ALL_BITS) { + if ((flags & opt_mask[i]) == 0) + return 0; + } else { + if ((flags & opt_mask[i]) != opt_bits[i]) + return 0; + } + } + + return 1; +} + +static uint64_t expand_overloaded_flags(uint64_t flags) +{ + /* SLOB/SLUB overload several page flags */ + if (flags & BIT(SLAB)) { + if (flags & BIT(PRIVATE)) + flags ^= BIT(PRIVATE) | BIT(SLOB_FREE); + if (flags & BIT(ACTIVE)) + flags ^= BIT(ACTIVE) | BIT(SLUB_FROZEN); + if (flags & BIT(ERROR)) + flags ^= BIT(ERROR) | BIT(SLUB_DEBUG); + } + + /* PG_reclaim is overloaded as PG_readahead in the read path */ + if ((flags & (BIT(RECLAIM) | BIT(WRITEBACK))) == BIT(RECLAIM)) + flags ^= BIT(RECLAIM) | BIT(READAHEAD); + + return flags; +} + +static uint64_t well_known_flags(uint64_t flags) +{ + /* hide flags intended only for kernel hacker */ + flags &= ~KPF_HACKERS_BITS; + + /* hide non-hugeTLB compound pages */ + if ((flags & BITS_COMPOUND) && !(flags & BIT(HUGE))) + flags &= ~BITS_COMPOUND; + + return flags; +} + +static uint64_t kpageflags_flags(uint64_t flags) +{ + flags = expand_overloaded_flags(flags); + + if (!opt_raw) + flags = well_known_flags(flags); + + return flags; +} + +/* verify that a mountpoint is actually a debugfs instance */ +static int debugfs_valid_mountpoint(const char *debugfs) +{ + struct statfs st_fs; + + if (statfs(debugfs, &st_fs) < 0) + return -ENOENT; + else if (st_fs.f_type != (long) DEBUGFS_MAGIC) + return -ENOENT; + + return 0; +} + +/* find the path to the mounted debugfs */ +static const char *debugfs_find_mountpoint(void) +{ + const char **ptr; + char type[100]; + FILE *fp; + + ptr = debugfs_known_mountpoints; + while (*ptr) { + if (debugfs_valid_mountpoint(*ptr) == 0) { + strcpy(hwpoison_debug_fs, *ptr); + return hwpoison_debug_fs; + } + ptr++; + } + + /* give up and parse /proc/mounts */ + fp = fopen("/proc/mounts", "r"); + if (fp == NULL) + perror("Can't open /proc/mounts for read"); + + while (fscanf(fp, "%*s %" + STR(MAX_PATH) + "s %99s %*s %*d %*d\n", + hwpoison_debug_fs, type) == 2) { + if (strcmp(type, "debugfs") == 0) + break; + } + fclose(fp); + + if (strcmp(type, "debugfs") != 0) + return NULL; + + return hwpoison_debug_fs; +} + +/* mount the debugfs somewhere if it's not mounted */ + +static void debugfs_mount(void) +{ + const char **ptr; + + /* see if it's already mounted */ + if (debugfs_find_mountpoint()) + return; + + ptr = debugfs_known_mountpoints; + while (*ptr) { + if (mount(NULL, *ptr, "debugfs", 0, NULL) == 0) { + /* save the mountpoint */ + strcpy(hwpoison_debug_fs, *ptr); + break; + } + ptr++; + } + + if (*ptr == NULL) { + perror("mount debugfs"); + exit(EXIT_FAILURE); + } +} + +/* + * page actions + */ + +static void prepare_hwpoison_fd(void) +{ + char buf[MAX_PATH + 1]; + + debugfs_mount(); + + if (opt_hwpoison && !hwpoison_inject_fd) { + snprintf(buf, MAX_PATH, "%s/hwpoison/corrupt-pfn", + hwpoison_debug_fs); + hwpoison_inject_fd = checked_open(buf, O_WRONLY); + } + + if (opt_unpoison && !hwpoison_forget_fd) { + snprintf(buf, MAX_PATH, "%s/hwpoison/unpoison-pfn", + hwpoison_debug_fs); + hwpoison_forget_fd = checked_open(buf, O_WRONLY); + } +} + +static int hwpoison_page(unsigned long offset) +{ + char buf[100]; + int len; + + len = sprintf(buf, "0x%lx\n", offset); + len = write(hwpoison_inject_fd, buf, len); + if (len < 0) { + perror("hwpoison inject"); + return len; + } + return 0; +} + +static int unpoison_page(unsigned long offset) +{ + char buf[100]; + int len; + + len = sprintf(buf, "0x%lx\n", offset); + len = write(hwpoison_forget_fd, buf, len); + if (len < 0) { + perror("hwpoison forget"); + return len; + } + return 0; +} + +/* + * page frame walker + */ + +static int hash_slot(uint64_t flags) +{ + int k = HASH_KEY(flags); + int i; + + /* Explicitly reserve slot 0 for flags 0: the following logic + * cannot distinguish an unoccupied slot from slot (flags==0). + */ + if (flags == 0) + return 0; + + /* search through the remaining (HASH_SIZE-1) slots */ + for (i = 1; i < ARRAY_SIZE(page_flags); i++, k++) { + if (!k || k >= ARRAY_SIZE(page_flags)) + k = 1; + if (page_flags[k] == 0) { + page_flags[k] = flags; + return k; + } + if (page_flags[k] == flags) + return k; + } + + fatal("hash table full: bump up HASH_SHIFT?\n"); + exit(EXIT_FAILURE); +} + +static void add_page(unsigned long voffset, + unsigned long offset, uint64_t flags) +{ + flags = kpageflags_flags(flags); + + if (!bit_mask_ok(flags)) + return; + + if (opt_hwpoison) + hwpoison_page(offset); + if (opt_unpoison) + unpoison_page(offset); + + if (opt_list == 1) + show_page_range(voffset, offset, flags); + else if (opt_list == 2) + show_page(voffset, offset, flags); + + nr_pages[hash_slot(flags)]++; + total_pages++; +} + +#define KPAGEFLAGS_BATCH (64 << 10) /* 64k pages */ +static void walk_pfn(unsigned long voffset, + unsigned long index, + unsigned long count) +{ + uint64_t buf[KPAGEFLAGS_BATCH]; + unsigned long batch; + long pages; + unsigned long i; + + while (count) { + batch = min_t(unsigned long, count, KPAGEFLAGS_BATCH); + pages = kpageflags_read(buf, index, batch); + if (pages == 0) + break; + + for (i = 0; i < pages; i++) + add_page(voffset + i, index + i, buf[i]); + + index += pages; + count -= pages; + } +} + +#define PAGEMAP_BATCH (64 << 10) +static void walk_vma(unsigned long index, unsigned long count) +{ + uint64_t buf[PAGEMAP_BATCH]; + unsigned long batch; + unsigned long pages; + unsigned long pfn; + unsigned long i; + + while (count) { + batch = min_t(unsigned long, count, PAGEMAP_BATCH); + pages = pagemap_read(buf, index, batch); + if (pages == 0) + break; + + for (i = 0; i < pages; i++) { + pfn = pagemap_pfn(buf[i]); + if (pfn) + walk_pfn(index + i, pfn, 1); + } + + index += pages; + count -= pages; + } +} + +static void walk_task(unsigned long index, unsigned long count) +{ + const unsigned long end = index + count; + unsigned long start; + int i = 0; + + while (index < end) { + + while (pg_end[i] <= index) + if (++i >= nr_vmas) + return; + if (pg_start[i] >= end) + return; + + start = max_t(unsigned long, pg_start[i], index); + index = min_t(unsigned long, pg_end[i], end); + + assert(start < index); + walk_vma(start, index - start); + } +} + +static void add_addr_range(unsigned long offset, unsigned long size) +{ + if (nr_addr_ranges >= MAX_ADDR_RANGES) + fatal("too many addr ranges\n"); + + opt_offset[nr_addr_ranges] = offset; + opt_size[nr_addr_ranges] = min_t(unsigned long, size, ULONG_MAX-offset); + nr_addr_ranges++; +} + +static void walk_addr_ranges(void) +{ + int i; + + kpageflags_fd = checked_open(PROC_KPAGEFLAGS, O_RDONLY); + + if (!nr_addr_ranges) + add_addr_range(0, ULONG_MAX); + + for (i = 0; i < nr_addr_ranges; i++) + if (!opt_pid) + walk_pfn(0, opt_offset[i], opt_size[i]); + else + walk_task(opt_offset[i], opt_size[i]); + + close(kpageflags_fd); +} + + +/* + * user interface + */ + +static const char *page_flag_type(uint64_t flag) +{ + if (flag & KPF_HACKERS_BITS) + return "(r)"; + if (flag & KPF_OVERLOADED_BITS) + return "(o)"; + return " "; +} + +static void usage(void) +{ + int i, j; + + printf( +"page-types [options]\n" +" -r|--raw Raw mode, for kernel developers\n" +" -d|--describe flags Describe flags\n" +" -a|--addr addr-spec Walk a range of pages\n" +" -b|--bits bits-spec Walk pages with specified bits\n" +" -p|--pid pid Walk process address space\n" +#if 0 /* planned features */ +" -f|--file filename Walk file address space\n" +#endif +" -l|--list Show page details in ranges\n" +" -L|--list-each Show page details one by one\n" +" -N|--no-summary Don't show summary info\n" +" -X|--hwpoison hwpoison pages\n" +" -x|--unpoison unpoison pages\n" +" -h|--help Show this usage message\n" +"flags:\n" +" 0x10 bitfield format, e.g.\n" +" anon bit-name, e.g.\n" +" 0x10,anon comma-separated list, e.g.\n" +"addr-spec:\n" +" N one page at offset N (unit: pages)\n" +" N+M pages range from N to N+M-1\n" +" N,M pages range from N to M-1\n" +" N, pages range from N to end\n" +" ,M pages range from 0 to M-1\n" +"bits-spec:\n" +" bit1,bit2 (flags & (bit1|bit2)) != 0\n" +" bit1,bit2=bit1 (flags & (bit1|bit2)) == bit1\n" +" bit1,~bit2 (flags & (bit1|bit2)) == bit1\n" +" =bit1,bit2 flags == (bit1|bit2)\n" +"bit-names:\n" + ); + + for (i = 0, j = 0; i < ARRAY_SIZE(page_flag_names); i++) { + if (!page_flag_names[i]) + continue; + printf("%16s%s", page_flag_names[i] + 2, + page_flag_type(1ULL << i)); + if (++j > 3) { + j = 0; + putchar('\n'); + } + } + printf("\n " + "(r) raw mode bits (o) overloaded bits\n"); +} + +static unsigned long long parse_number(const char *str) +{ + unsigned long long n; + + n = strtoll(str, NULL, 0); + + if (n == 0 && str[0] != '0') + fatal("invalid name or number: %s\n", str); + + return n; +} + +static void parse_pid(const char *str) +{ + FILE *file; + char buf[5000]; + + opt_pid = parse_number(str); + + sprintf(buf, "/proc/%d/pagemap", opt_pid); + pagemap_fd = checked_open(buf, O_RDONLY); + + sprintf(buf, "/proc/%d/maps", opt_pid); + file = fopen(buf, "r"); + if (!file) { + perror(buf); + exit(EXIT_FAILURE); + } + + while (fgets(buf, sizeof(buf), file) != NULL) { + unsigned long vm_start; + unsigned long vm_end; + unsigned long long pgoff; + int major, minor; + char r, w, x, s; + unsigned long ino; + int n; + + n = sscanf(buf, "%lx-%lx %c%c%c%c %llx %x:%x %lu", + &vm_start, + &vm_end, + &r, &w, &x, &s, + &pgoff, + &major, &minor, + &ino); + if (n < 10) { + fprintf(stderr, "unexpected line: %s\n", buf); + continue; + } + pg_start[nr_vmas] = vm_start / page_size; + pg_end[nr_vmas] = vm_end / page_size; + if (++nr_vmas >= MAX_VMAS) { + fprintf(stderr, "too many VMAs\n"); + break; + } + } + fclose(file); +} + +static void parse_file(const char *name) +{ +} + +static void parse_addr_range(const char *optarg) +{ + unsigned long offset; + unsigned long size; + char *p; + + p = strchr(optarg, ','); + if (!p) + p = strchr(optarg, '+'); + + if (p == optarg) { + offset = 0; + size = parse_number(p + 1); + } else if (p) { + offset = parse_number(optarg); + if (p[1] == '\0') + size = ULONG_MAX; + else { + size = parse_number(p + 1); + if (*p == ',') { + if (size < offset) + fatal("invalid range: %lu,%lu\n", + offset, size); + size -= offset; + } + } + } else { + offset = parse_number(optarg); + size = 1; + } + + add_addr_range(offset, size); +} + +static void add_bits_filter(uint64_t mask, uint64_t bits) +{ + if (nr_bit_filters >= MAX_BIT_FILTERS) + fatal("too much bit filters\n"); + + opt_mask[nr_bit_filters] = mask; + opt_bits[nr_bit_filters] = bits; + nr_bit_filters++; +} + +static uint64_t parse_flag_name(const char *str, int len) +{ + int i; + + if (!*str || !len) + return 0; + + if (len <= 8 && !strncmp(str, "compound", len)) + return BITS_COMPOUND; + + for (i = 0; i < ARRAY_SIZE(page_flag_names); i++) { + if (!page_flag_names[i]) + continue; + if (!strncmp(str, page_flag_names[i] + 2, len)) + return 1ULL << i; + } + + return parse_number(str); +} + +static uint64_t parse_flag_names(const char *str, int all) +{ + const char *p = str; + uint64_t flags = 0; + + while (1) { + if (*p == ',' || *p == '=' || *p == '\0') { + if ((*str != '~') || (*str == '~' && all && *++str)) + flags |= parse_flag_name(str, p - str); + if (*p != ',') + break; + str = p + 1; + } + p++; + } + + return flags; +} + +static void parse_bits_mask(const char *optarg) +{ + uint64_t mask; + uint64_t bits; + const char *p; + + p = strchr(optarg, '='); + if (p == optarg) { + mask = KPF_ALL_BITS; + bits = parse_flag_names(p + 1, 0); + } else if (p) { + mask = parse_flag_names(optarg, 0); + bits = parse_flag_names(p + 1, 0); + } else if (strchr(optarg, '~')) { + mask = parse_flag_names(optarg, 1); + bits = parse_flag_names(optarg, 0); + } else { + mask = parse_flag_names(optarg, 0); + bits = KPF_ALL_BITS; + } + + add_bits_filter(mask, bits); +} + +static void describe_flags(const char *optarg) +{ + uint64_t flags = parse_flag_names(optarg, 0); + + printf("0x%016llx\t%s\t%s\n", + (unsigned long long)flags, + page_flag_name(flags), + page_flag_longname(flags)); +} + +static const struct option opts[] = { + { "raw" , 0, NULL, 'r' }, + { "pid" , 1, NULL, 'p' }, + { "file" , 1, NULL, 'f' }, + { "addr" , 1, NULL, 'a' }, + { "bits" , 1, NULL, 'b' }, + { "describe" , 1, NULL, 'd' }, + { "list" , 0, NULL, 'l' }, + { "list-each" , 0, NULL, 'L' }, + { "no-summary", 0, NULL, 'N' }, + { "hwpoison" , 0, NULL, 'X' }, + { "unpoison" , 0, NULL, 'x' }, + { "help" , 0, NULL, 'h' }, + { NULL , 0, NULL, 0 } +}; + +int main(int argc, char *argv[]) +{ + int c; + + page_size = getpagesize(); + + while ((c = getopt_long(argc, argv, + "rp:f:a:b:d:lLNXxh", opts, NULL)) != -1) { + switch (c) { + case 'r': + opt_raw = 1; + break; + case 'p': + parse_pid(optarg); + break; + case 'f': + parse_file(optarg); + break; + case 'a': + parse_addr_range(optarg); + break; + case 'b': + parse_bits_mask(optarg); + break; + case 'd': + describe_flags(optarg); + exit(0); + case 'l': + opt_list = 1; + break; + case 'L': + opt_list = 2; + break; + case 'N': + opt_no_summary = 1; + break; + case 'X': + opt_hwpoison = 1; + prepare_hwpoison_fd(); + break; + case 'x': + opt_unpoison = 1; + prepare_hwpoison_fd(); + break; + case 'h': + usage(); + exit(0); + default: + usage(); + exit(1); + } + } + + if (opt_list && opt_pid) + printf("voffset\t"); + if (opt_list == 1) + printf("offset\tlen\tflags\n"); + if (opt_list == 2) + printf("offset\tflags\n"); + + walk_addr_ranges(); + + if (opt_list == 1) + show_page_range(0, 0, 0); /* drain the buffer */ + + if (opt_no_summary) + return 0; + + if (opt_list) + printf("\n\n"); + + show_summary(); + + return 0; +} -- cgit v1.2.2 From 63e315535abe0d820d0e3db4c06bc5de74aeefc8 Mon Sep 17 00:00:00 2001 From: Dave Young Date: Wed, 28 Mar 2012 14:42:55 -0700 Subject: mm: move slabinfo.c to tools/vm We have tools/vm/ folder for vm tools, so move slabinfo.c from tools/slub/ to tools/vm/ Signed-off-by: Dave Young Cc: Wu Fengguang Acked-by: Christoph Lameter Cc: Pekka Enberg Cc: Frederic Weisbecker Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- tools/slub/slabinfo.c | 1393 ------------------------------------------------- tools/vm/Makefile | 4 +- tools/vm/slabinfo.c | 1393 +++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 1395 insertions(+), 1395 deletions(-) delete mode 100644 tools/slub/slabinfo.c create mode 100644 tools/vm/slabinfo.c (limited to 'tools') diff --git a/tools/slub/slabinfo.c b/tools/slub/slabinfo.c deleted file mode 100644 index 164cbcf6110..00000000000 --- a/tools/slub/slabinfo.c +++ /dev/null @@ -1,1393 +0,0 @@ -/* - * Slabinfo: Tool to get reports about slabs - * - * (C) 2007 sgi, Christoph Lameter - * (C) 2011 Linux Foundation, Christoph Lameter - * - * Compile with: - * - * gcc -o slabinfo slabinfo.c - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#define MAX_SLABS 500 -#define MAX_ALIASES 500 -#define MAX_NODES 1024 - -struct slabinfo { - char *name; - int alias; - int refs; - int aliases, align, cache_dma, cpu_slabs, destroy_by_rcu; - int hwcache_align, object_size, objs_per_slab; - int sanity_checks, slab_size, store_user, trace; - int order, poison, reclaim_account, red_zone; - unsigned long partial, objects, slabs, objects_partial, objects_total; - unsigned long alloc_fastpath, alloc_slowpath; - unsigned long free_fastpath, free_slowpath; - unsigned long free_frozen, free_add_partial, free_remove_partial; - unsigned long alloc_from_partial, alloc_slab, free_slab, alloc_refill; - unsigned long cpuslab_flush, deactivate_full, deactivate_empty; - unsigned long deactivate_to_head, deactivate_to_tail; - unsigned long deactivate_remote_frees, order_fallback; - unsigned long cmpxchg_double_cpu_fail, cmpxchg_double_fail; - unsigned long alloc_node_mismatch, deactivate_bypass; - unsigned long cpu_partial_alloc, cpu_partial_free; - int numa[MAX_NODES]; - int numa_partial[MAX_NODES]; -} slabinfo[MAX_SLABS]; - -struct aliasinfo { - char *name; - char *ref; - struct slabinfo *slab; -} aliasinfo[MAX_ALIASES]; - -int slabs = 0; -int actual_slabs = 0; -int aliases = 0; -int alias_targets = 0; -int highest_node = 0; - -char buffer[4096]; - -int show_empty = 0; -int show_report = 0; -int show_alias = 0; -int show_slab = 0; -int skip_zero = 1; -int show_numa = 0; -int show_track = 0; -int show_first_alias = 0; -int validate = 0; -int shrink = 0; -int show_inverted = 0; -int show_single_ref = 0; -int show_totals = 0; -int sort_size = 0; -int sort_active = 0; -int set_debug = 0; -int show_ops = 0; -int show_activity = 0; - -/* Debug options */ -int sanity = 0; -int redzone = 0; -int poison = 0; -int tracking = 0; -int tracing = 0; - -int page_size; - -regex_t pattern; - -static void fatal(const char *x, ...) -{ - va_list ap; - - va_start(ap, x); - vfprintf(stderr, x, ap); - va_end(ap); - exit(EXIT_FAILURE); -} - -static void usage(void) -{ - printf("slabinfo 4/15/2011. (c) 2007 sgi/(c) 2011 Linux Foundation.\n\n" - "slabinfo [-ahnpvtsz] [-d debugopts] [slab-regexp]\n" - "-a|--aliases Show aliases\n" - "-A|--activity Most active slabs first\n" - "-d|--debug= Set/Clear Debug options\n" - "-D|--display-active Switch line format to activity\n" - "-e|--empty Show empty slabs\n" - "-f|--first-alias Show first alias\n" - "-h|--help Show usage information\n" - "-i|--inverted Inverted list\n" - "-l|--slabs Show slabs\n" - "-n|--numa Show NUMA information\n" - "-o|--ops Show kmem_cache_ops\n" - "-s|--shrink Shrink slabs\n" - "-r|--report Detailed report on single slabs\n" - "-S|--Size Sort by size\n" - "-t|--tracking Show alloc/free information\n" - "-T|--Totals Show summary information\n" - "-v|--validate Validate slabs\n" - "-z|--zero Include empty slabs\n" - "-1|--1ref Single reference\n" - "\nValid debug options (FZPUT may be combined)\n" - "a / A Switch on all debug options (=FZUP)\n" - "- Switch off all debug options\n" - "f / F Sanity Checks (SLAB_DEBUG_FREE)\n" - "z / Z Redzoning\n" - "p / P Poisoning\n" - "u / U Tracking\n" - "t / T Tracing\n" - ); -} - -static unsigned long read_obj(const char *name) -{ - FILE *f = fopen(name, "r"); - - if (!f) - buffer[0] = 0; - else { - if (!fgets(buffer, sizeof(buffer), f)) - buffer[0] = 0; - fclose(f); - if (buffer[strlen(buffer)] == '\n') - buffer[strlen(buffer)] = 0; - } - return strlen(buffer); -} - - -/* - * Get the contents of an attribute - */ -static unsigned long get_obj(const char *name) -{ - if (!read_obj(name)) - return 0; - - return atol(buffer); -} - -static unsigned long get_obj_and_str(const char *name, char **x) -{ - unsigned long result = 0; - char *p; - - *x = NULL; - - if (!read_obj(name)) { - x = NULL; - return 0; - } - result = strtoul(buffer, &p, 10); - while (*p == ' ') - p++; - if (*p) - *x = strdup(p); - return result; -} - -static void set_obj(struct slabinfo *s, const char *name, int n) -{ - char x[100]; - FILE *f; - - snprintf(x, 100, "%s/%s", s->name, name); - f = fopen(x, "w"); - if (!f) - fatal("Cannot write to %s\n", x); - - fprintf(f, "%d\n", n); - fclose(f); -} - -static unsigned long read_slab_obj(struct slabinfo *s, const char *name) -{ - char x[100]; - FILE *f; - size_t l; - - snprintf(x, 100, "%s/%s", s->name, name); - f = fopen(x, "r"); - if (!f) { - buffer[0] = 0; - l = 0; - } else { - l = fread(buffer, 1, sizeof(buffer), f); - buffer[l] = 0; - fclose(f); - } - return l; -} - - -/* - * Put a size string together - */ -static int store_size(char *buffer, unsigned long value) -{ - unsigned long divisor = 1; - char trailer = 0; - int n; - - if (value > 1000000000UL) { - divisor = 100000000UL; - trailer = 'G'; - } else if (value > 1000000UL) { - divisor = 100000UL; - trailer = 'M'; - } else if (value > 1000UL) { - divisor = 100; - trailer = 'K'; - } - - value /= divisor; - n = sprintf(buffer, "%ld",value); - if (trailer) { - buffer[n] = trailer; - n++; - buffer[n] = 0; - } - if (divisor != 1) { - memmove(buffer + n - 2, buffer + n - 3, 4); - buffer[n-2] = '.'; - n++; - } - return n; -} - -static void decode_numa_list(int *numa, char *t) -{ - int node; - int nr; - - memset(numa, 0, MAX_NODES * sizeof(int)); - - if (!t) - return; - - while (*t == 'N') { - t++; - node = strtoul(t, &t, 10); - if (*t == '=') { - t++; - nr = strtoul(t, &t, 10); - numa[node] = nr; - if (node > highest_node) - highest_node = node; - } - while (*t == ' ') - t++; - } -} - -static void slab_validate(struct slabinfo *s) -{ - if (strcmp(s->name, "*") == 0) - return; - - set_obj(s, "validate", 1); -} - -static void slab_shrink(struct slabinfo *s) -{ - if (strcmp(s->name, "*") == 0) - return; - - set_obj(s, "shrink", 1); -} - -int line = 0; - -static void first_line(void) -{ - if (show_activity) - printf("Name Objects Alloc Free %%Fast Fallb O CmpX UL\n"); - else - printf("Name Objects Objsize Space " - "Slabs/Part/Cpu O/S O %%Fr %%Ef Flg\n"); -} - -/* - * Find the shortest alias of a slab - */ -static struct aliasinfo *find_one_alias(struct slabinfo *find) -{ - struct aliasinfo *a; - struct aliasinfo *best = NULL; - - for(a = aliasinfo;a < aliasinfo + aliases; a++) { - if (a->slab == find && - (!best || strlen(best->name) < strlen(a->name))) { - best = a; - if (strncmp(a->name,"kmall", 5) == 0) - return best; - } - } - return best; -} - -static unsigned long slab_size(struct slabinfo *s) -{ - return s->slabs * (page_size << s->order); -} - -static unsigned long slab_activity(struct slabinfo *s) -{ - return s->alloc_fastpath + s->free_fastpath + - s->alloc_slowpath + s->free_slowpath; -} - -static void slab_numa(struct slabinfo *s, int mode) -{ - int node; - - if (strcmp(s->name, "*") == 0) - return; - - if (!highest_node) { - printf("\n%s: No NUMA information available.\n", s->name); - return; - } - - if (skip_zero && !s->slabs) - return; - - if (!line) { - printf("\n%-21s:", mode ? "NUMA nodes" : "Slab"); - for(node = 0; node <= highest_node; node++) - printf(" %4d", node); - printf("\n----------------------"); - for(node = 0; node <= highest_node; node++) - printf("-----"); - printf("\n"); - } - printf("%-21s ", mode ? "All slabs" : s->name); - for(node = 0; node <= highest_node; node++) { - char b[20]; - - store_size(b, s->numa[node]); - printf(" %4s", b); - } - printf("\n"); - if (mode) { - printf("%-21s ", "Partial slabs"); - for(node = 0; node <= highest_node; node++) { - char b[20]; - - store_size(b, s->numa_partial[node]); - printf(" %4s", b); - } - printf("\n"); - } - line++; -} - -static void show_tracking(struct slabinfo *s) -{ - printf("\n%s: Kernel object allocation\n", s->name); - printf("-----------------------------------------------------------------------\n"); - if (read_slab_obj(s, "alloc_calls")) - printf("%s", buffer); - else - printf("No Data\n"); - - printf("\n%s: Kernel object freeing\n", s->name); - printf("------------------------------------------------------------------------\n"); - if (read_slab_obj(s, "free_calls")) - printf("%s", buffer); - else - printf("No Data\n"); - -} - -static void ops(struct slabinfo *s) -{ - if (strcmp(s->name, "*") == 0) - return; - - if (read_slab_obj(s, "ops")) { - printf("\n%s: kmem_cache operations\n", s->name); - printf("--------------------------------------------\n"); - printf("%s", buffer); - } else - printf("\n%s has no kmem_cache operations\n", s->name); -} - -static const char *onoff(int x) -{ - if (x) - return "On "; - return "Off"; -} - -static void slab_stats(struct slabinfo *s) -{ - unsigned long total_alloc; - unsigned long total_free; - unsigned long total; - - if (!s->alloc_slab) - return; - - total_alloc = s->alloc_fastpath + s->alloc_slowpath; - total_free = s->free_fastpath + s->free_slowpath; - - if (!total_alloc) - return; - - printf("\n"); - printf("Slab Perf Counter Alloc Free %%Al %%Fr\n"); - printf("--------------------------------------------------\n"); - printf("Fastpath %8lu %8lu %3lu %3lu\n", - s->alloc_fastpath, s->free_fastpath, - s->alloc_fastpath * 100 / total_alloc, - s->free_fastpath * 100 / total_free); - printf("Slowpath %8lu %8lu %3lu %3lu\n", - total_alloc - s->alloc_fastpath, s->free_slowpath, - (total_alloc - s->alloc_fastpath) * 100 / total_alloc, - s->free_slowpath * 100 / total_free); - printf("Page Alloc %8lu %8lu %3lu %3lu\n", - s->alloc_slab, s->free_slab, - s->alloc_slab * 100 / total_alloc, - s->free_slab * 100 / total_free); - printf("Add partial %8lu %8lu %3lu %3lu\n", - s->deactivate_to_head + s->deactivate_to_tail, - s->free_add_partial, - (s->deactivate_to_head + s->deactivate_to_tail) * 100 / total_alloc, - s->free_add_partial * 100 / total_free); - printf("Remove partial %8lu %8lu %3lu %3lu\n", - s->alloc_from_partial, s->free_remove_partial, - s->alloc_from_partial * 100 / total_alloc, - s->free_remove_partial * 100 / total_free); - - printf("Cpu partial list %8lu %8lu %3lu %3lu\n", - s->cpu_partial_alloc, s->cpu_partial_free, - s->cpu_partial_alloc * 100 / total_alloc, - s->cpu_partial_free * 100 / total_free); - - printf("RemoteObj/SlabFrozen %8lu %8lu %3lu %3lu\n", - s->deactivate_remote_frees, s->free_frozen, - s->deactivate_remote_frees * 100 / total_alloc, - s->free_frozen * 100 / total_free); - - printf("Total %8lu %8lu\n\n", total_alloc, total_free); - - if (s->cpuslab_flush) - printf("Flushes %8lu\n", s->cpuslab_flush); - - total = s->deactivate_full + s->deactivate_empty + - s->deactivate_to_head + s->deactivate_to_tail + s->deactivate_bypass; - - if (total) { - printf("\nSlab Deactivation Ocurrences %%\n"); - printf("-------------------------------------------------\n"); - printf("Slab full %7lu %3lu%%\n", - s->deactivate_full, (s->deactivate_full * 100) / total); - printf("Slab empty %7lu %3lu%%\n", - s->deactivate_empty, (s->deactivate_empty * 100) / total); - printf("Moved to head of partial list %7lu %3lu%%\n", - s->deactivate_to_head, (s->deactivate_to_head * 100) / total); - printf("Moved to tail of partial list %7lu %3lu%%\n", - s->deactivate_to_tail, (s->deactivate_to_tail * 100) / total); - printf("Deactivation bypass %7lu %3lu%%\n", - s->deactivate_bypass, (s->deactivate_bypass * 100) / total); - printf("Refilled from foreign frees %7lu %3lu%%\n", - s->alloc_refill, (s->alloc_refill * 100) / total); - printf("Node mismatch %7lu %3lu%%\n", - s->alloc_node_mismatch, (s->alloc_node_mismatch * 100) / total); - } - - if (s->cmpxchg_double_fail || s->cmpxchg_double_cpu_fail) - printf("\nCmpxchg_double Looping\n------------------------\n"); - printf("Locked Cmpxchg Double redos %lu\nUnlocked Cmpxchg Double redos %lu\n", - s->cmpxchg_double_fail, s->cmpxchg_double_cpu_fail); -} - -static void report(struct slabinfo *s) -{ - if (strcmp(s->name, "*") == 0) - return; - - printf("\nSlabcache: %-20s Aliases: %2d Order : %2d Objects: %lu\n", - s->name, s->aliases, s->order, s->objects); - if (s->hwcache_align) - printf("** Hardware cacheline aligned\n"); - if (s->cache_dma) - printf("** Memory is allocated in a special DMA zone\n"); - if (s->destroy_by_rcu) - printf("** Slabs are destroyed via RCU\n"); - if (s->reclaim_account) - printf("** Reclaim accounting active\n"); - - printf("\nSizes (bytes) Slabs Debug Memory\n"); - printf("------------------------------------------------------------------------\n"); - printf("Object : %7d Total : %7ld Sanity Checks : %s Total: %7ld\n", - s->object_size, s->slabs, onoff(s->sanity_checks), - s->slabs * (page_size << s->order)); - printf("SlabObj: %7d Full : %7ld Redzoning : %s Used : %7ld\n", - s->slab_size, s->slabs - s->partial - s->cpu_slabs, - onoff(s->red_zone), s->objects * s->object_size); - printf("SlabSiz: %7d Partial: %7ld Poisoning : %s Loss : %7ld\n", - page_size << s->order, s->partial, onoff(s->poison), - s->slabs * (page_size << s->order) - s->objects * s->object_size); - printf("Loss : %7d CpuSlab: %7d Tracking : %s Lalig: %7ld\n", - s->slab_size - s->object_size, s->cpu_slabs, onoff(s->store_user), - (s->slab_size - s->object_size) * s->objects); - printf("Align : %7d Objects: %7d Tracing : %s Lpadd: %7ld\n", - s->align, s->objs_per_slab, onoff(s->trace), - ((page_size << s->order) - s->objs_per_slab * s->slab_size) * - s->slabs); - - ops(s); - show_tracking(s); - slab_numa(s, 1); - slab_stats(s); -} - -static void slabcache(struct slabinfo *s) -{ - char size_str[20]; - char dist_str[40]; - char flags[20]; - char *p = flags; - - if (strcmp(s->name, "*") == 0) - return; - - if (actual_slabs == 1) { - report(s); - return; - } - - if (skip_zero && !show_empty && !s->slabs) - return; - - if (show_empty && s->slabs) - return; - - store_size(size_str, slab_size(s)); - snprintf(dist_str, 40, "%lu/%lu/%d", s->slabs - s->cpu_slabs, - s->partial, s->cpu_slabs); - - if (!line++) - first_line(); - - if (s->aliases) - *p++ = '*'; - if (s->cache_dma) - *p++ = 'd'; - if (s->hwcache_align) - *p++ = 'A'; - if (s->poison) - *p++ = 'P'; - if (s->reclaim_account) - *p++ = 'a'; - if (s->red_zone) - *p++ = 'Z'; - if (s->sanity_checks) - *p++ = 'F'; - if (s->store_user) - *p++ = 'U'; - if (s->trace) - *p++ = 'T'; - - *p = 0; - if (show_activity) { - unsigned long total_alloc; - unsigned long total_free; - - total_alloc = s->alloc_fastpath + s->alloc_slowpath; - total_free = s->free_fastpath + s->free_slowpath; - - printf("%-21s %8ld %10ld %10ld %3ld %3ld %5ld %1d %4ld %4ld\n", - s->name, s->objects, - total_alloc, total_free, - total_alloc ? (s->alloc_fastpath * 100 / total_alloc) : 0, - total_free ? (s->free_fastpath * 100 / total_free) : 0, - s->order_fallback, s->order, s->cmpxchg_double_fail, - s->cmpxchg_double_cpu_fail); - } - else - printf("%-21s %8ld %7d %8s %14s %4d %1d %3ld %3ld %s\n", - s->name, s->objects, s->object_size, size_str, dist_str, - s->objs_per_slab, s->order, - s->slabs ? (s->partial * 100) / s->slabs : 100, - s->slabs ? (s->objects * s->object_size * 100) / - (s->slabs * (page_size << s->order)) : 100, - flags); -} - -/* - * Analyze debug options. Return false if something is amiss. - */ -static int debug_opt_scan(char *opt) -{ - if (!opt || !opt[0] || strcmp(opt, "-") == 0) - return 1; - - if (strcasecmp(opt, "a") == 0) { - sanity = 1; - poison = 1; - redzone = 1; - tracking = 1; - return 1; - } - - for ( ; *opt; opt++) - switch (*opt) { - case 'F' : case 'f': - if (sanity) - return 0; - sanity = 1; - break; - case 'P' : case 'p': - if (poison) - return 0; - poison = 1; - break; - - case 'Z' : case 'z': - if (redzone) - return 0; - redzone = 1; - break; - - case 'U' : case 'u': - if (tracking) - return 0; - tracking = 1; - break; - - case 'T' : case 't': - if (tracing) - return 0; - tracing = 1; - break; - default: - return 0; - } - return 1; -} - -static int slab_empty(struct slabinfo *s) -{ - if (s->objects > 0) - return 0; - - /* - * We may still have slabs even if there are no objects. Shrinking will - * remove them. - */ - if (s->slabs != 0) - set_obj(s, "shrink", 1); - - return 1; -} - -static void slab_debug(struct slabinfo *s) -{ - if (strcmp(s->name, "*") == 0) - return; - - if (sanity && !s->sanity_checks) { - set_obj(s, "sanity", 1); - } - if (!sanity && s->sanity_checks) { - if (slab_empty(s)) - set_obj(s, "sanity", 0); - else - fprintf(stderr, "%s not empty cannot disable sanity checks\n", s->name); - } - if (redzone && !s->red_zone) { - if (slab_empty(s)) - set_obj(s, "red_zone", 1); - else - fprintf(stderr, "%s not empty cannot enable redzoning\n", s->name); - } - if (!redzone && s->red_zone) { - if (slab_empty(s)) - set_obj(s, "red_zone", 0); - else - fprintf(stderr, "%s not empty cannot disable redzoning\n", s->name); - } - if (poison && !s->poison) { - if (slab_empty(s)) - set_obj(s, "poison", 1); - else - fprintf(stderr, "%s not empty cannot enable poisoning\n", s->name); - } - if (!poison && s->poison) { - if (slab_empty(s)) - set_obj(s, "poison", 0); - else - fprintf(stderr, "%s not empty cannot disable poisoning\n", s->name); - } - if (tracking && !s->store_user) { - if (slab_empty(s)) - set_obj(s, "store_user", 1); - else - fprintf(stderr, "%s not empty cannot enable tracking\n", s->name); - } - if (!tracking && s->store_user) { - if (slab_empty(s)) - set_obj(s, "store_user", 0); - else - fprintf(stderr, "%s not empty cannot disable tracking\n", s->name); - } - if (tracing && !s->trace) { - if (slabs == 1) - set_obj(s, "trace", 1); - else - fprintf(stderr, "%s can only enable trace for one slab at a time\n", s->name); - } - if (!tracing && s->trace) - set_obj(s, "trace", 1); -} - -static void totals(void) -{ - struct slabinfo *s; - - int used_slabs = 0; - char b1[20], b2[20], b3[20], b4[20]; - unsigned long long max = 1ULL << 63; - - /* Object size */ - unsigned long long min_objsize = max, max_objsize = 0, avg_objsize; - - /* Number of partial slabs in a slabcache */ - unsigned long long min_partial = max, max_partial = 0, - avg_partial, total_partial = 0; - - /* Number of slabs in a slab cache */ - unsigned long long min_slabs = max, max_slabs = 0, - avg_slabs, total_slabs = 0; - - /* Size of the whole slab */ - unsigned long long min_size = max, max_size = 0, - avg_size, total_size = 0; - - /* Bytes used for object storage in a slab */ - unsigned long long min_used = max, max_used = 0, - avg_used, total_used = 0; - - /* Waste: Bytes used for alignment and padding */ - unsigned long long min_waste = max, max_waste = 0, - avg_waste, total_waste = 0; - /* Number of objects in a slab */ - unsigned long long min_objects = max, max_objects = 0, - avg_objects, total_objects = 0; - /* Waste per object */ - unsigned long long min_objwaste = max, - max_objwaste = 0, avg_objwaste, - total_objwaste = 0; - - /* Memory per object */ - unsigned long long min_memobj = max, - max_memobj = 0, avg_memobj, - total_objsize = 0; - - /* Percentage of partial slabs per slab */ - unsigned long min_ppart = 100, max_ppart = 0, - avg_ppart, total_ppart = 0; - - /* Number of objects in partial slabs */ - unsigned long min_partobj = max, max_partobj = 0, - avg_partobj, total_partobj = 0; - - /* Percentage of partial objects of all objects in a slab */ - unsigned long min_ppartobj = 100, max_ppartobj = 0, - avg_ppartobj, total_ppartobj = 0; - - - for (s = slabinfo; s < slabinfo + slabs; s++) { - unsigned long long size; - unsigned long used; - unsigned long long wasted; - unsigned long long objwaste; - unsigned long percentage_partial_slabs; - unsigned long percentage_partial_objs; - - if (!s->slabs || !s->objects) - continue; - - used_slabs++; - - size = slab_size(s); - used = s->objects * s->object_size; - wasted = size - used; - objwaste = s->slab_size - s->object_size; - - percentage_partial_slabs = s->partial * 100 / s->slabs; - if (percentage_partial_slabs > 100) - percentage_partial_slabs = 100; - - percentage_partial_objs = s->objects_partial * 100 - / s->objects; - - if (percentage_partial_objs > 100) - percentage_partial_objs = 100; - - if (s->object_size < min_objsize) - min_objsize = s->object_size; - if (s->partial < min_partial) - min_partial = s->partial; - if (s->slabs < min_slabs) - min_slabs = s->slabs; - if (size < min_size) - min_size = size; - if (wasted < min_waste) - min_waste = wasted; - if (objwaste < min_objwaste) - min_objwaste = objwaste; - if (s->objects < min_objects) - min_objects = s->objects; - if (used < min_used) - min_used = used; - if (s->objects_partial < min_partobj) - min_partobj = s->objects_partial; - if (percentage_partial_slabs < min_ppart) - min_ppart = percentage_partial_slabs; - if (percentage_partial_objs < min_ppartobj) - min_ppartobj = percentage_partial_objs; - if (s->slab_size < min_memobj) - min_memobj = s->slab_size; - - if (s->object_size > max_objsize) - max_objsize = s->object_size; - if (s->partial > max_partial) - max_partial = s->partial; - if (s->slabs > max_slabs) - max_slabs = s->slabs; - if (size > max_size) - max_size = size; - if (wasted > max_waste) - max_waste = wasted; - if (objwaste > max_objwaste) - max_objwaste = objwaste; - if (s->objects > max_objects) - max_objects = s->objects; - if (used > max_used) - max_used = used; - if (s->objects_partial > max_partobj) - max_partobj = s->objects_partial; - if (percentage_partial_slabs > max_ppart) - max_ppart = percentage_partial_slabs; - if (percentage_partial_objs > max_ppartobj) - max_ppartobj = percentage_partial_objs; - if (s->slab_size > max_memobj) - max_memobj = s->slab_size; - - total_partial += s->partial; - total_slabs += s->slabs; - total_size += size; - total_waste += wasted; - - total_objects += s->objects; - total_used += used; - total_partobj += s->objects_partial; - total_ppart += percentage_partial_slabs; - total_ppartobj += percentage_partial_objs; - - total_objwaste += s->objects * objwaste; - total_objsize += s->objects * s->slab_size; - } - - if (!total_objects) { - printf("No objects\n"); - return; - } - if (!used_slabs) { - printf("No slabs\n"); - return; - } - - /* Per slab averages */ - avg_partial = total_partial / used_slabs; - avg_slabs = total_slabs / used_slabs; - avg_size = total_size / used_slabs; - avg_waste = total_waste / used_slabs; - - avg_objects = total_objects / used_slabs; - avg_used = total_used / used_slabs; - avg_partobj = total_partobj / used_slabs; - avg_ppart = total_ppart / used_slabs; - avg_ppartobj = total_ppartobj / used_slabs; - - /* Per object object sizes */ - avg_objsize = total_used / total_objects; - avg_objwaste = total_objwaste / total_objects; - avg_partobj = total_partobj * 100 / total_objects; - avg_memobj = total_objsize / total_objects; - - printf("Slabcache Totals\n"); - printf("----------------\n"); - printf("Slabcaches : %3d Aliases : %3d->%-3d Active: %3d\n", - slabs, aliases, alias_targets, used_slabs); - - store_size(b1, total_size);store_size(b2, total_waste); - store_size(b3, total_waste * 100 / total_used); - printf("Memory used: %6s # Loss : %6s MRatio:%6s%%\n", b1, b2, b3); - - store_size(b1, total_objects);store_size(b2, total_partobj); - store_size(b3, total_partobj * 100 / total_objects); - printf("# Objects : %6s # PartObj: %6s ORatio:%6s%%\n", b1, b2, b3); - - printf("\n"); - printf("Per Cache Average Min Max Total\n"); - printf("---------------------------------------------------------\n"); - - store_size(b1, avg_objects);store_size(b2, min_objects); - store_size(b3, max_objects);store_size(b4, total_objects); - printf("#Objects %10s %10s %10s %10s\n", - b1, b2, b3, b4); - - store_size(b1, avg_slabs);store_size(b2, min_slabs); - store_size(b3, max_slabs);store_size(b4, total_slabs); - printf("#Slabs %10s %10s %10s %10s\n", - b1, b2, b3, b4); - - store_size(b1, avg_partial);store_size(b2, min_partial); - store_size(b3, max_partial);store_size(b4, total_partial); - printf("#PartSlab %10s %10s %10s %10s\n", - b1, b2, b3, b4); - store_size(b1, avg_ppart);store_size(b2, min_ppart); - store_size(b3, max_ppart); - store_size(b4, total_partial * 100 / total_slabs); - printf("%%PartSlab%10s%% %10s%% %10s%% %10s%%\n", - b1, b2, b3, b4); - - store_size(b1, avg_partobj);store_size(b2, min_partobj); - store_size(b3, max_partobj); - store_size(b4, total_partobj); - printf("PartObjs %10s %10s %10s %10s\n", - b1, b2, b3, b4); - - store_size(b1, avg_ppartobj);store_size(b2, min_ppartobj); - store_size(b3, max_ppartobj); - store_size(b4, total_partobj * 100 / total_objects); - printf("%% PartObj%10s%% %10s%% %10s%% %10s%%\n", - b1, b2, b3, b4); - - store_size(b1, avg_size);store_size(b2, min_size); - store_size(b3, max_size);store_size(b4, total_size); - printf("Memory %10s %10s %10s %10s\n", - b1, b2, b3, b4); - - store_size(b1, avg_used);store_size(b2, min_used); - store_size(b3, max_used);store_size(b4, total_used); - printf("Used %10s %10s %10s %10s\n", - b1, b2, b3, b4); - - store_size(b1, avg_waste);store_size(b2, min_waste); - store_size(b3, max_waste);store_size(b4, total_waste); - printf("Loss %10s %10s %10s %10s\n", - b1, b2, b3, b4); - - printf("\n"); - printf("Per Object Average Min Max\n"); - printf("---------------------------------------------\n"); - - store_size(b1, avg_memobj);store_size(b2, min_memobj); - store_size(b3, max_memobj); - printf("Memory %10s %10s %10s\n", - b1, b2, b3); - store_size(b1, avg_objsize);store_size(b2, min_objsize); - store_size(b3, max_objsize); - printf("User %10s %10s %10s\n", - b1, b2, b3); - - store_size(b1, avg_objwaste);store_size(b2, min_objwaste); - store_size(b3, max_objwaste); - printf("Loss %10s %10s %10s\n", - b1, b2, b3); -} - -static void sort_slabs(void) -{ - struct slabinfo *s1,*s2; - - for (s1 = slabinfo; s1 < slabinfo + slabs; s1++) { - for (s2 = s1 + 1; s2 < slabinfo + slabs; s2++) { - int result; - - if (sort_size) - result = slab_size(s1) < slab_size(s2); - else if (sort_active) - result = slab_activity(s1) < slab_activity(s2); - else - result = strcasecmp(s1->name, s2->name); - - if (show_inverted) - result = -result; - - if (result > 0) { - struct slabinfo t; - - memcpy(&t, s1, sizeof(struct slabinfo)); - memcpy(s1, s2, sizeof(struct slabinfo)); - memcpy(s2, &t, sizeof(struct slabinfo)); - } - } - } -} - -static void sort_aliases(void) -{ - struct aliasinfo *a1,*a2; - - for (a1 = aliasinfo; a1 < aliasinfo + aliases; a1++) { - for (a2 = a1 + 1; a2 < aliasinfo + aliases; a2++) { - char *n1, *n2; - - n1 = a1->name; - n2 = a2->name; - if (show_alias && !show_inverted) { - n1 = a1->ref; - n2 = a2->ref; - } - if (strcasecmp(n1, n2) > 0) { - struct aliasinfo t; - - memcpy(&t, a1, sizeof(struct aliasinfo)); - memcpy(a1, a2, sizeof(struct aliasinfo)); - memcpy(a2, &t, sizeof(struct aliasinfo)); - } - } - } -} - -static void link_slabs(void) -{ - struct aliasinfo *a; - struct slabinfo *s; - - for (a = aliasinfo; a < aliasinfo + aliases; a++) { - - for (s = slabinfo; s < slabinfo + slabs; s++) - if (strcmp(a->ref, s->name) == 0) { - a->slab = s; - s->refs++; - break; - } - if (s == slabinfo + slabs) - fatal("Unresolved alias %s\n", a->ref); - } -} - -static void alias(void) -{ - struct aliasinfo *a; - char *active = NULL; - - sort_aliases(); - link_slabs(); - - for(a = aliasinfo; a < aliasinfo + aliases; a++) { - - if (!show_single_ref && a->slab->refs == 1) - continue; - - if (!show_inverted) { - if (active) { - if (strcmp(a->slab->name, active) == 0) { - printf(" %s", a->name); - continue; - } - } - printf("\n%-12s <- %s", a->slab->name, a->name); - active = a->slab->name; - } - else - printf("%-20s -> %s\n", a->name, a->slab->name); - } - if (active) - printf("\n"); -} - - -static void rename_slabs(void) -{ - struct slabinfo *s; - struct aliasinfo *a; - - for (s = slabinfo; s < slabinfo + slabs; s++) { - if (*s->name != ':') - continue; - - if (s->refs > 1 && !show_first_alias) - continue; - - a = find_one_alias(s); - - if (a) - s->name = a->name; - else { - s->name = "*"; - actual_slabs--; - } - } -} - -static int slab_mismatch(char *slab) -{ - return regexec(&pattern, slab, 0, NULL, 0); -} - -static void read_slab_dir(void) -{ - DIR *dir; - struct dirent *de; - struct slabinfo *slab = slabinfo; - struct aliasinfo *alias = aliasinfo; - char *p; - char *t; - int count; - - if (chdir("/sys/kernel/slab") && chdir("/sys/slab")) - fatal("SYSFS support for SLUB not active\n"); - - dir = opendir("."); - while ((de = readdir(dir))) { - if (de->d_name[0] == '.' || - (de->d_name[0] != ':' && slab_mismatch(de->d_name))) - continue; - switch (de->d_type) { - case DT_LNK: - alias->name = strdup(de->d_name); - count = readlink(de->d_name, buffer, sizeof(buffer)-1); - - if (count < 0) - fatal("Cannot read symlink %s\n", de->d_name); - - buffer[count] = 0; - p = buffer + count; - while (p > buffer && p[-1] != '/') - p--; - alias->ref = strdup(p); - alias++; - break; - case DT_DIR: - if (chdir(de->d_name)) - fatal("Unable to access slab %s\n", slab->name); - slab->name = strdup(de->d_name); - slab->alias = 0; - slab->refs = 0; - slab->aliases = get_obj("aliases"); - slab->align = get_obj("align"); - slab->cache_dma = get_obj("cache_dma"); - slab->cpu_slabs = get_obj("cpu_slabs"); - slab->destroy_by_rcu = get_obj("destroy_by_rcu"); - slab->hwcache_align = get_obj("hwcache_align"); - slab->object_size = get_obj("object_size"); - slab->objects = get_obj("objects"); - slab->objects_partial = get_obj("objects_partial"); - slab->objects_total = get_obj("objects_total"); - slab->objs_per_slab = get_obj("objs_per_slab"); - slab->order = get_obj("order"); - slab->partial = get_obj("partial"); - slab->partial = get_obj_and_str("partial", &t); - decode_numa_list(slab->numa_partial, t); - free(t); - slab->poison = get_obj("poison"); - slab->reclaim_account = get_obj("reclaim_account"); - slab->red_zone = get_obj("red_zone"); - slab->sanity_checks = get_obj("sanity_checks"); - slab->slab_size = get_obj("slab_size"); - slab->slabs = get_obj_and_str("slabs", &t); - decode_numa_list(slab->numa, t); - free(t); - slab->store_user = get_obj("store_user"); - slab->trace = get_obj("trace"); - slab->alloc_fastpath = get_obj("alloc_fastpath"); - slab->alloc_slowpath = get_obj("alloc_slowpath"); - slab->free_fastpath = get_obj("free_fastpath"); - slab->free_slowpath = get_obj("free_slowpath"); - slab->free_frozen= get_obj("free_frozen"); - slab->free_add_partial = get_obj("free_add_partial"); - slab->free_remove_partial = get_obj("free_remove_partial"); - slab->alloc_from_partial = get_obj("alloc_from_partial"); - slab->alloc_slab = get_obj("alloc_slab"); - slab->alloc_refill = get_obj("alloc_refill"); - slab->free_slab = get_obj("free_slab"); - slab->cpuslab_flush = get_obj("cpuslab_flush"); - slab->deactivate_full = get_obj("deactivate_full"); - slab->deactivate_empty = get_obj("deactivate_empty"); - slab->deactivate_to_head = get_obj("deactivate_to_head"); - slab->deactivate_to_tail = get_obj("deactivate_to_tail"); - slab->deactivate_remote_frees = get_obj("deactivate_remote_frees"); - slab->order_fallback = get_obj("order_fallback"); - slab->cmpxchg_double_cpu_fail = get_obj("cmpxchg_double_cpu_fail"); - slab->cmpxchg_double_fail = get_obj("cmpxchg_double_fail"); - slab->cpu_partial_alloc = get_obj("cpu_partial_alloc"); - slab->cpu_partial_free = get_obj("cpu_partial_free"); - slab->alloc_node_mismatch = get_obj("alloc_node_mismatch"); - slab->deactivate_bypass = get_obj("deactivate_bypass"); - chdir(".."); - if (slab->name[0] == ':') - alias_targets++; - slab++; - break; - default : - fatal("Unknown file type %lx\n", de->d_type); - } - } - closedir(dir); - slabs = slab - slabinfo; - actual_slabs = slabs; - aliases = alias - aliasinfo; - if (slabs > MAX_SLABS) - fatal("Too many slabs\n"); - if (aliases > MAX_ALIASES) - fatal("Too many aliases\n"); -} - -static void output_slabs(void) -{ - struct slabinfo *slab; - - for (slab = slabinfo; slab < slabinfo + slabs; slab++) { - - if (slab->alias) - continue; - - - if (show_numa) - slab_numa(slab, 0); - else if (show_track) - show_tracking(slab); - else if (validate) - slab_validate(slab); - else if (shrink) - slab_shrink(slab); - else if (set_debug) - slab_debug(slab); - else if (show_ops) - ops(slab); - else if (show_slab) - slabcache(slab); - else if (show_report) - report(slab); - } -} - -struct option opts[] = { - { "aliases", 0, NULL, 'a' }, - { "activity", 0, NULL, 'A' }, - { "debug", 2, NULL, 'd' }, - { "display-activity", 0, NULL, 'D' }, - { "empty", 0, NULL, 'e' }, - { "first-alias", 0, NULL, 'f' }, - { "help", 0, NULL, 'h' }, - { "inverted", 0, NULL, 'i'}, - { "numa", 0, NULL, 'n' }, - { "ops", 0, NULL, 'o' }, - { "report", 0, NULL, 'r' }, - { "shrink", 0, NULL, 's' }, - { "slabs", 0, NULL, 'l' }, - { "track", 0, NULL, 't'}, - { "validate", 0, NULL, 'v' }, - { "zero", 0, NULL, 'z' }, - { "1ref", 0, NULL, '1'}, - { NULL, 0, NULL, 0 } -}; - -int main(int argc, char *argv[]) -{ - int c; - int err; - char *pattern_source; - - page_size = getpagesize(); - - while ((c = getopt_long(argc, argv, "aAd::Defhil1noprstvzTS", - opts, NULL)) != -1) - switch (c) { - case '1': - show_single_ref = 1; - break; - case 'a': - show_alias = 1; - break; - case 'A': - sort_active = 1; - break; - case 'd': - set_debug = 1; - if (!debug_opt_scan(optarg)) - fatal("Invalid debug option '%s'\n", optarg); - break; - case 'D': - show_activity = 1; - break; - case 'e': - show_empty = 1; - break; - case 'f': - show_first_alias = 1; - break; - case 'h': - usage(); - return 0; - case 'i': - show_inverted = 1; - break; - case 'n': - show_numa = 1; - break; - case 'o': - show_ops = 1; - break; - case 'r': - show_report = 1; - break; - case 's': - shrink = 1; - break; - case 'l': - show_slab = 1; - break; - case 't': - show_track = 1; - break; - case 'v': - validate = 1; - break; - case 'z': - skip_zero = 0; - break; - case 'T': - show_totals = 1; - break; - case 'S': - sort_size = 1; - break; - - default: - fatal("%s: Invalid option '%c'\n", argv[0], optopt); - - } - - if (!show_slab && !show_alias && !show_track && !show_report - && !validate && !shrink && !set_debug && !show_ops) - show_slab = 1; - - if (argc > optind) - pattern_source = argv[optind]; - else - pattern_source = ".*"; - - err = regcomp(&pattern, pattern_source, REG_ICASE|REG_NOSUB); - if (err) - fatal("%s: Invalid pattern '%s' code %d\n", - argv[0], pattern_source, err); - read_slab_dir(); - if (show_alias) - alias(); - else - if (show_totals) - totals(); - else { - link_slabs(); - rename_slabs(); - sort_slabs(); - output_slabs(); - } - return 0; -} diff --git a/tools/vm/Makefile b/tools/vm/Makefile index 3823d4b1fa7..8e30e5c40f8 100644 --- a/tools/vm/Makefile +++ b/tools/vm/Makefile @@ -3,9 +3,9 @@ CC = $(CROSS_COMPILE)gcc CFLAGS = -Wall -Wextra -all: page-types +all: page-types slabinfo %: %.c $(CC) $(CFLAGS) -o $@ $^ clean: - $(RM) page-types + $(RM) page-types slabinfo diff --git a/tools/vm/slabinfo.c b/tools/vm/slabinfo.c new file mode 100644 index 00000000000..164cbcf6110 --- /dev/null +++ b/tools/vm/slabinfo.c @@ -0,0 +1,1393 @@ +/* + * Slabinfo: Tool to get reports about slabs + * + * (C) 2007 sgi, Christoph Lameter + * (C) 2011 Linux Foundation, Christoph Lameter + * + * Compile with: + * + * gcc -o slabinfo slabinfo.c + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define MAX_SLABS 500 +#define MAX_ALIASES 500 +#define MAX_NODES 1024 + +struct slabinfo { + char *name; + int alias; + int refs; + int aliases, align, cache_dma, cpu_slabs, destroy_by_rcu; + int hwcache_align, object_size, objs_per_slab; + int sanity_checks, slab_size, store_user, trace; + int order, poison, reclaim_account, red_zone; + unsigned long partial, objects, slabs, objects_partial, objects_total; + unsigned long alloc_fastpath, alloc_slowpath; + unsigned long free_fastpath, free_slowpath; + unsigned long free_frozen, free_add_partial, free_remove_partial; + unsigned long alloc_from_partial, alloc_slab, free_slab, alloc_refill; + unsigned long cpuslab_flush, deactivate_full, deactivate_empty; + unsigned long deactivate_to_head, deactivate_to_tail; + unsigned long deactivate_remote_frees, order_fallback; + unsigned long cmpxchg_double_cpu_fail, cmpxchg_double_fail; + unsigned long alloc_node_mismatch, deactivate_bypass; + unsigned long cpu_partial_alloc, cpu_partial_free; + int numa[MAX_NODES]; + int numa_partial[MAX_NODES]; +} slabinfo[MAX_SLABS]; + +struct aliasinfo { + char *name; + char *ref; + struct slabinfo *slab; +} aliasinfo[MAX_ALIASES]; + +int slabs = 0; +int actual_slabs = 0; +int aliases = 0; +int alias_targets = 0; +int highest_node = 0; + +char buffer[4096]; + +int show_empty = 0; +int show_report = 0; +int show_alias = 0; +int show_slab = 0; +int skip_zero = 1; +int show_numa = 0; +int show_track = 0; +int show_first_alias = 0; +int validate = 0; +int shrink = 0; +int show_inverted = 0; +int show_single_ref = 0; +int show_totals = 0; +int sort_size = 0; +int sort_active = 0; +int set_debug = 0; +int show_ops = 0; +int show_activity = 0; + +/* Debug options */ +int sanity = 0; +int redzone = 0; +int poison = 0; +int tracking = 0; +int tracing = 0; + +int page_size; + +regex_t pattern; + +static void fatal(const char *x, ...) +{ + va_list ap; + + va_start(ap, x); + vfprintf(stderr, x, ap); + va_end(ap); + exit(EXIT_FAILURE); +} + +static void usage(void) +{ + printf("slabinfo 4/15/2011. (c) 2007 sgi/(c) 2011 Linux Foundation.\n\n" + "slabinfo [-ahnpvtsz] [-d debugopts] [slab-regexp]\n" + "-a|--aliases Show aliases\n" + "-A|--activity Most active slabs first\n" + "-d|--debug= Set/Clear Debug options\n" + "-D|--display-active Switch line format to activity\n" + "-e|--empty Show empty slabs\n" + "-f|--first-alias Show first alias\n" + "-h|--help Show usage information\n" + "-i|--inverted Inverted list\n" + "-l|--slabs Show slabs\n" + "-n|--numa Show NUMA information\n" + "-o|--ops Show kmem_cache_ops\n" + "-s|--shrink Shrink slabs\n" + "-r|--report Detailed report on single slabs\n" + "-S|--Size Sort by size\n" + "-t|--tracking Show alloc/free information\n" + "-T|--Totals Show summary information\n" + "-v|--validate Validate slabs\n" + "-z|--zero Include empty slabs\n" + "-1|--1ref Single reference\n" + "\nValid debug options (FZPUT may be combined)\n" + "a / A Switch on all debug options (=FZUP)\n" + "- Switch off all debug options\n" + "f / F Sanity Checks (SLAB_DEBUG_FREE)\n" + "z / Z Redzoning\n" + "p / P Poisoning\n" + "u / U Tracking\n" + "t / T Tracing\n" + ); +} + +static unsigned long read_obj(const char *name) +{ + FILE *f = fopen(name, "r"); + + if (!f) + buffer[0] = 0; + else { + if (!fgets(buffer, sizeof(buffer), f)) + buffer[0] = 0; + fclose(f); + if (buffer[strlen(buffer)] == '\n') + buffer[strlen(buffer)] = 0; + } + return strlen(buffer); +} + + +/* + * Get the contents of an attribute + */ +static unsigned long get_obj(const char *name) +{ + if (!read_obj(name)) + return 0; + + return atol(buffer); +} + +static unsigned long get_obj_and_str(const char *name, char **x) +{ + unsigned long result = 0; + char *p; + + *x = NULL; + + if (!read_obj(name)) { + x = NULL; + return 0; + } + result = strtoul(buffer, &p, 10); + while (*p == ' ') + p++; + if (*p) + *x = strdup(p); + return result; +} + +static void set_obj(struct slabinfo *s, const char *name, int n) +{ + char x[100]; + FILE *f; + + snprintf(x, 100, "%s/%s", s->name, name); + f = fopen(x, "w"); + if (!f) + fatal("Cannot write to %s\n", x); + + fprintf(f, "%d\n", n); + fclose(f); +} + +static unsigned long read_slab_obj(struct slabinfo *s, const char *name) +{ + char x[100]; + FILE *f; + size_t l; + + snprintf(x, 100, "%s/%s", s->name, name); + f = fopen(x, "r"); + if (!f) { + buffer[0] = 0; + l = 0; + } else { + l = fread(buffer, 1, sizeof(buffer), f); + buffer[l] = 0; + fclose(f); + } + return l; +} + + +/* + * Put a size string together + */ +static int store_size(char *buffer, unsigned long value) +{ + unsigned long divisor = 1; + char trailer = 0; + int n; + + if (value > 1000000000UL) { + divisor = 100000000UL; + trailer = 'G'; + } else if (value > 1000000UL) { + divisor = 100000UL; + trailer = 'M'; + } else if (value > 1000UL) { + divisor = 100; + trailer = 'K'; + } + + value /= divisor; + n = sprintf(buffer, "%ld",value); + if (trailer) { + buffer[n] = trailer; + n++; + buffer[n] = 0; + } + if (divisor != 1) { + memmove(buffer + n - 2, buffer + n - 3, 4); + buffer[n-2] = '.'; + n++; + } + return n; +} + +static void decode_numa_list(int *numa, char *t) +{ + int node; + int nr; + + memset(numa, 0, MAX_NODES * sizeof(int)); + + if (!t) + return; + + while (*t == 'N') { + t++; + node = strtoul(t, &t, 10); + if (*t == '=') { + t++; + nr = strtoul(t, &t, 10); + numa[node] = nr; + if (node > highest_node) + highest_node = node; + } + while (*t == ' ') + t++; + } +} + +static void slab_validate(struct slabinfo *s) +{ + if (strcmp(s->name, "*") == 0) + return; + + set_obj(s, "validate", 1); +} + +static void slab_shrink(struct slabinfo *s) +{ + if (strcmp(s->name, "*") == 0) + return; + + set_obj(s, "shrink", 1); +} + +int line = 0; + +static void first_line(void) +{ + if (show_activity) + printf("Name Objects Alloc Free %%Fast Fallb O CmpX UL\n"); + else + printf("Name Objects Objsize Space " + "Slabs/Part/Cpu O/S O %%Fr %%Ef Flg\n"); +} + +/* + * Find the shortest alias of a slab + */ +static struct aliasinfo *find_one_alias(struct slabinfo *find) +{ + struct aliasinfo *a; + struct aliasinfo *best = NULL; + + for(a = aliasinfo;a < aliasinfo + aliases; a++) { + if (a->slab == find && + (!best || strlen(best->name) < strlen(a->name))) { + best = a; + if (strncmp(a->name,"kmall", 5) == 0) + return best; + } + } + return best; +} + +static unsigned long slab_size(struct slabinfo *s) +{ + return s->slabs * (page_size << s->order); +} + +static unsigned long slab_activity(struct slabinfo *s) +{ + return s->alloc_fastpath + s->free_fastpath + + s->alloc_slowpath + s->free_slowpath; +} + +static void slab_numa(struct slabinfo *s, int mode) +{ + int node; + + if (strcmp(s->name, "*") == 0) + return; + + if (!highest_node) { + printf("\n%s: No NUMA information available.\n", s->name); + return; + } + + if (skip_zero && !s->slabs) + return; + + if (!line) { + printf("\n%-21s:", mode ? "NUMA nodes" : "Slab"); + for(node = 0; node <= highest_node; node++) + printf(" %4d", node); + printf("\n----------------------"); + for(node = 0; node <= highest_node; node++) + printf("-----"); + printf("\n"); + } + printf("%-21s ", mode ? "All slabs" : s->name); + for(node = 0; node <= highest_node; node++) { + char b[20]; + + store_size(b, s->numa[node]); + printf(" %4s", b); + } + printf("\n"); + if (mode) { + printf("%-21s ", "Partial slabs"); + for(node = 0; node <= highest_node; node++) { + char b[20]; + + store_size(b, s->numa_partial[node]); + printf(" %4s", b); + } + printf("\n"); + } + line++; +} + +static void show_tracking(struct slabinfo *s) +{ + printf("\n%s: Kernel object allocation\n", s->name); + printf("-----------------------------------------------------------------------\n"); + if (read_slab_obj(s, "alloc_calls")) + printf("%s", buffer); + else + printf("No Data\n"); + + printf("\n%s: Kernel object freeing\n", s->name); + printf("------------------------------------------------------------------------\n"); + if (read_slab_obj(s, "free_calls")) + printf("%s", buffer); + else + printf("No Data\n"); + +} + +static void ops(struct slabinfo *s) +{ + if (strcmp(s->name, "*") == 0) + return; + + if (read_slab_obj(s, "ops")) { + printf("\n%s: kmem_cache operations\n", s->name); + printf("--------------------------------------------\n"); + printf("%s", buffer); + } else + printf("\n%s has no kmem_cache operations\n", s->name); +} + +static const char *onoff(int x) +{ + if (x) + return "On "; + return "Off"; +} + +static void slab_stats(struct slabinfo *s) +{ + unsigned long total_alloc; + unsigned long total_free; + unsigned long total; + + if (!s->alloc_slab) + return; + + total_alloc = s->alloc_fastpath + s->alloc_slowpath; + total_free = s->free_fastpath + s->free_slowpath; + + if (!total_alloc) + return; + + printf("\n"); + printf("Slab Perf Counter Alloc Free %%Al %%Fr\n"); + printf("--------------------------------------------------\n"); + printf("Fastpath %8lu %8lu %3lu %3lu\n", + s->alloc_fastpath, s->free_fastpath, + s->alloc_fastpath * 100 / total_alloc, + s->free_fastpath * 100 / total_free); + printf("Slowpath %8lu %8lu %3lu %3lu\n", + total_alloc - s->alloc_fastpath, s->free_slowpath, + (total_alloc - s->alloc_fastpath) * 100 / total_alloc, + s->free_slowpath * 100 / total_free); + printf("Page Alloc %8lu %8lu %3lu %3lu\n", + s->alloc_slab, s->free_slab, + s->alloc_slab * 100 / total_alloc, + s->free_slab * 100 / total_free); + printf("Add partial %8lu %8lu %3lu %3lu\n", + s->deactivate_to_head + s->deactivate_to_tail, + s->free_add_partial, + (s->deactivate_to_head + s->deactivate_to_tail) * 100 / total_alloc, + s->free_add_partial * 100 / total_free); + printf("Remove partial %8lu %8lu %3lu %3lu\n", + s->alloc_from_partial, s->free_remove_partial, + s->alloc_from_partial * 100 / total_alloc, + s->free_remove_partial * 100 / total_free); + + printf("Cpu partial list %8lu %8lu %3lu %3lu\n", + s->cpu_partial_alloc, s->cpu_partial_free, + s->cpu_partial_alloc * 100 / total_alloc, + s->cpu_partial_free * 100 / total_free); + + printf("RemoteObj/SlabFrozen %8lu %8lu %3lu %3lu\n", + s->deactivate_remote_frees, s->free_frozen, + s->deactivate_remote_frees * 100 / total_alloc, + s->free_frozen * 100 / total_free); + + printf("Total %8lu %8lu\n\n", total_alloc, total_free); + + if (s->cpuslab_flush) + printf("Flushes %8lu\n", s->cpuslab_flush); + + total = s->deactivate_full + s->deactivate_empty + + s->deactivate_to_head + s->deactivate_to_tail + s->deactivate_bypass; + + if (total) { + printf("\nSlab Deactivation Ocurrences %%\n"); + printf("-------------------------------------------------\n"); + printf("Slab full %7lu %3lu%%\n", + s->deactivate_full, (s->deactivate_full * 100) / total); + printf("Slab empty %7lu %3lu%%\n", + s->deactivate_empty, (s->deactivate_empty * 100) / total); + printf("Moved to head of partial list %7lu %3lu%%\n", + s->deactivate_to_head, (s->deactivate_to_head * 100) / total); + printf("Moved to tail of partial list %7lu %3lu%%\n", + s->deactivate_to_tail, (s->deactivate_to_tail * 100) / total); + printf("Deactivation bypass %7lu %3lu%%\n", + s->deactivate_bypass, (s->deactivate_bypass * 100) / total); + printf("Refilled from foreign frees %7lu %3lu%%\n", + s->alloc_refill, (s->alloc_refill * 100) / total); + printf("Node mismatch %7lu %3lu%%\n", + s->alloc_node_mismatch, (s->alloc_node_mismatch * 100) / total); + } + + if (s->cmpxchg_double_fail || s->cmpxchg_double_cpu_fail) + printf("\nCmpxchg_double Looping\n------------------------\n"); + printf("Locked Cmpxchg Double redos %lu\nUnlocked Cmpxchg Double redos %lu\n", + s->cmpxchg_double_fail, s->cmpxchg_double_cpu_fail); +} + +static void report(struct slabinfo *s) +{ + if (strcmp(s->name, "*") == 0) + return; + + printf("\nSlabcache: %-20s Aliases: %2d Order : %2d Objects: %lu\n", + s->name, s->aliases, s->order, s->objects); + if (s->hwcache_align) + printf("** Hardware cacheline aligned\n"); + if (s->cache_dma) + printf("** Memory is allocated in a special DMA zone\n"); + if (s->destroy_by_rcu) + printf("** Slabs are destroyed via RCU\n"); + if (s->reclaim_account) + printf("** Reclaim accounting active\n"); + + printf("\nSizes (bytes) Slabs Debug Memory\n"); + printf("------------------------------------------------------------------------\n"); + printf("Object : %7d Total : %7ld Sanity Checks : %s Total: %7ld\n", + s->object_size, s->slabs, onoff(s->sanity_checks), + s->slabs * (page_size << s->order)); + printf("SlabObj: %7d Full : %7ld Redzoning : %s Used : %7ld\n", + s->slab_size, s->slabs - s->partial - s->cpu_slabs, + onoff(s->red_zone), s->objects * s->object_size); + printf("SlabSiz: %7d Partial: %7ld Poisoning : %s Loss : %7ld\n", + page_size << s->order, s->partial, onoff(s->poison), + s->slabs * (page_size << s->order) - s->objects * s->object_size); + printf("Loss : %7d CpuSlab: %7d Tracking : %s Lalig: %7ld\n", + s->slab_size - s->object_size, s->cpu_slabs, onoff(s->store_user), + (s->slab_size - s->object_size) * s->objects); + printf("Align : %7d Objects: %7d Tracing : %s Lpadd: %7ld\n", + s->align, s->objs_per_slab, onoff(s->trace), + ((page_size << s->order) - s->objs_per_slab * s->slab_size) * + s->slabs); + + ops(s); + show_tracking(s); + slab_numa(s, 1); + slab_stats(s); +} + +static void slabcache(struct slabinfo *s) +{ + char size_str[20]; + char dist_str[40]; + char flags[20]; + char *p = flags; + + if (strcmp(s->name, "*") == 0) + return; + + if (actual_slabs == 1) { + report(s); + return; + } + + if (skip_zero && !show_empty && !s->slabs) + return; + + if (show_empty && s->slabs) + return; + + store_size(size_str, slab_size(s)); + snprintf(dist_str, 40, "%lu/%lu/%d", s->slabs - s->cpu_slabs, + s->partial, s->cpu_slabs); + + if (!line++) + first_line(); + + if (s->aliases) + *p++ = '*'; + if (s->cache_dma) + *p++ = 'd'; + if (s->hwcache_align) + *p++ = 'A'; + if (s->poison) + *p++ = 'P'; + if (s->reclaim_account) + *p++ = 'a'; + if (s->red_zone) + *p++ = 'Z'; + if (s->sanity_checks) + *p++ = 'F'; + if (s->store_user) + *p++ = 'U'; + if (s->trace) + *p++ = 'T'; + + *p = 0; + if (show_activity) { + unsigned long total_alloc; + unsigned long total_free; + + total_alloc = s->alloc_fastpath + s->alloc_slowpath; + total_free = s->free_fastpath + s->free_slowpath; + + printf("%-21s %8ld %10ld %10ld %3ld %3ld %5ld %1d %4ld %4ld\n", + s->name, s->objects, + total_alloc, total_free, + total_alloc ? (s->alloc_fastpath * 100 / total_alloc) : 0, + total_free ? (s->free_fastpath * 100 / total_free) : 0, + s->order_fallback, s->order, s->cmpxchg_double_fail, + s->cmpxchg_double_cpu_fail); + } + else + printf("%-21s %8ld %7d %8s %14s %4d %1d %3ld %3ld %s\n", + s->name, s->objects, s->object_size, size_str, dist_str, + s->objs_per_slab, s->order, + s->slabs ? (s->partial * 100) / s->slabs : 100, + s->slabs ? (s->objects * s->object_size * 100) / + (s->slabs * (page_size << s->order)) : 100, + flags); +} + +/* + * Analyze debug options. Return false if something is amiss. + */ +static int debug_opt_scan(char *opt) +{ + if (!opt || !opt[0] || strcmp(opt, "-") == 0) + return 1; + + if (strcasecmp(opt, "a") == 0) { + sanity = 1; + poison = 1; + redzone = 1; + tracking = 1; + return 1; + } + + for ( ; *opt; opt++) + switch (*opt) { + case 'F' : case 'f': + if (sanity) + return 0; + sanity = 1; + break; + case 'P' : case 'p': + if (poison) + return 0; + poison = 1; + break; + + case 'Z' : case 'z': + if (redzone) + return 0; + redzone = 1; + break; + + case 'U' : case 'u': + if (tracking) + return 0; + tracking = 1; + break; + + case 'T' : case 't': + if (tracing) + return 0; + tracing = 1; + break; + default: + return 0; + } + return 1; +} + +static int slab_empty(struct slabinfo *s) +{ + if (s->objects > 0) + return 0; + + /* + * We may still have slabs even if there are no objects. Shrinking will + * remove them. + */ + if (s->slabs != 0) + set_obj(s, "shrink", 1); + + return 1; +} + +static void slab_debug(struct slabinfo *s) +{ + if (strcmp(s->name, "*") == 0) + return; + + if (sanity && !s->sanity_checks) { + set_obj(s, "sanity", 1); + } + if (!sanity && s->sanity_checks) { + if (slab_empty(s)) + set_obj(s, "sanity", 0); + else + fprintf(stderr, "%s not empty cannot disable sanity checks\n", s->name); + } + if (redzone && !s->red_zone) { + if (slab_empty(s)) + set_obj(s, "red_zone", 1); + else + fprintf(stderr, "%s not empty cannot enable redzoning\n", s->name); + } + if (!redzone && s->red_zone) { + if (slab_empty(s)) + set_obj(s, "red_zone", 0); + else + fprintf(stderr, "%s not empty cannot disable redzoning\n", s->name); + } + if (poison && !s->poison) { + if (slab_empty(s)) + set_obj(s, "poison", 1); + else + fprintf(stderr, "%s not empty cannot enable poisoning\n", s->name); + } + if (!poison && s->poison) { + if (slab_empty(s)) + set_obj(s, "poison", 0); + else + fprintf(stderr, "%s not empty cannot disable poisoning\n", s->name); + } + if (tracking && !s->store_user) { + if (slab_empty(s)) + set_obj(s, "store_user", 1); + else + fprintf(stderr, "%s not empty cannot enable tracking\n", s->name); + } + if (!tracking && s->store_user) { + if (slab_empty(s)) + set_obj(s, "store_user", 0); + else + fprintf(stderr, "%s not empty cannot disable tracking\n", s->name); + } + if (tracing && !s->trace) { + if (slabs == 1) + set_obj(s, "trace", 1); + else + fprintf(stderr, "%s can only enable trace for one slab at a time\n", s->name); + } + if (!tracing && s->trace) + set_obj(s, "trace", 1); +} + +static void totals(void) +{ + struct slabinfo *s; + + int used_slabs = 0; + char b1[20], b2[20], b3[20], b4[20]; + unsigned long long max = 1ULL << 63; + + /* Object size */ + unsigned long long min_objsize = max, max_objsize = 0, avg_objsize; + + /* Number of partial slabs in a slabcache */ + unsigned long long min_partial = max, max_partial = 0, + avg_partial, total_partial = 0; + + /* Number of slabs in a slab cache */ + unsigned long long min_slabs = max, max_slabs = 0, + avg_slabs, total_slabs = 0; + + /* Size of the whole slab */ + unsigned long long min_size = max, max_size = 0, + avg_size, total_size = 0; + + /* Bytes used for object storage in a slab */ + unsigned long long min_used = max, max_used = 0, + avg_used, total_used = 0; + + /* Waste: Bytes used for alignment and padding */ + unsigned long long min_waste = max, max_waste = 0, + avg_waste, total_waste = 0; + /* Number of objects in a slab */ + unsigned long long min_objects = max, max_objects = 0, + avg_objects, total_objects = 0; + /* Waste per object */ + unsigned long long min_objwaste = max, + max_objwaste = 0, avg_objwaste, + total_objwaste = 0; + + /* Memory per object */ + unsigned long long min_memobj = max, + max_memobj = 0, avg_memobj, + total_objsize = 0; + + /* Percentage of partial slabs per slab */ + unsigned long min_ppart = 100, max_ppart = 0, + avg_ppart, total_ppart = 0; + + /* Number of objects in partial slabs */ + unsigned long min_partobj = max, max_partobj = 0, + avg_partobj, total_partobj = 0; + + /* Percentage of partial objects of all objects in a slab */ + unsigned long min_ppartobj = 100, max_ppartobj = 0, + avg_ppartobj, total_ppartobj = 0; + + + for (s = slabinfo; s < slabinfo + slabs; s++) { + unsigned long long size; + unsigned long used; + unsigned long long wasted; + unsigned long long objwaste; + unsigned long percentage_partial_slabs; + unsigned long percentage_partial_objs; + + if (!s->slabs || !s->objects) + continue; + + used_slabs++; + + size = slab_size(s); + used = s->objects * s->object_size; + wasted = size - used; + objwaste = s->slab_size - s->object_size; + + percentage_partial_slabs = s->partial * 100 / s->slabs; + if (percentage_partial_slabs > 100) + percentage_partial_slabs = 100; + + percentage_partial_objs = s->objects_partial * 100 + / s->objects; + + if (percentage_partial_objs > 100) + percentage_partial_objs = 100; + + if (s->object_size < min_objsize) + min_objsize = s->object_size; + if (s->partial < min_partial) + min_partial = s->partial; + if (s->slabs < min_slabs) + min_slabs = s->slabs; + if (size < min_size) + min_size = size; + if (wasted < min_waste) + min_waste = wasted; + if (objwaste < min_objwaste) + min_objwaste = objwaste; + if (s->objects < min_objects) + min_objects = s->objects; + if (used < min_used) + min_used = used; + if (s->objects_partial < min_partobj) + min_partobj = s->objects_partial; + if (percentage_partial_slabs < min_ppart) + min_ppart = percentage_partial_slabs; + if (percentage_partial_objs < min_ppartobj) + min_ppartobj = percentage_partial_objs; + if (s->slab_size < min_memobj) + min_memobj = s->slab_size; + + if (s->object_size > max_objsize) + max_objsize = s->object_size; + if (s->partial > max_partial) + max_partial = s->partial; + if (s->slabs > max_slabs) + max_slabs = s->slabs; + if (size > max_size) + max_size = size; + if (wasted > max_waste) + max_waste = wasted; + if (objwaste > max_objwaste) + max_objwaste = objwaste; + if (s->objects > max_objects) + max_objects = s->objects; + if (used > max_used) + max_used = used; + if (s->objects_partial > max_partobj) + max_partobj = s->objects_partial; + if (percentage_partial_slabs > max_ppart) + max_ppart = percentage_partial_slabs; + if (percentage_partial_objs > max_ppartobj) + max_ppartobj = percentage_partial_objs; + if (s->slab_size > max_memobj) + max_memobj = s->slab_size; + + total_partial += s->partial; + total_slabs += s->slabs; + total_size += size; + total_waste += wasted; + + total_objects += s->objects; + total_used += used; + total_partobj += s->objects_partial; + total_ppart += percentage_partial_slabs; + total_ppartobj += percentage_partial_objs; + + total_objwaste += s->objects * objwaste; + total_objsize += s->objects * s->slab_size; + } + + if (!total_objects) { + printf("No objects\n"); + return; + } + if (!used_slabs) { + printf("No slabs\n"); + return; + } + + /* Per slab averages */ + avg_partial = total_partial / used_slabs; + avg_slabs = total_slabs / used_slabs; + avg_size = total_size / used_slabs; + avg_waste = total_waste / used_slabs; + + avg_objects = total_objects / used_slabs; + avg_used = total_used / used_slabs; + avg_partobj = total_partobj / used_slabs; + avg_ppart = total_ppart / used_slabs; + avg_ppartobj = total_ppartobj / used_slabs; + + /* Per object object sizes */ + avg_objsize = total_used / total_objects; + avg_objwaste = total_objwaste / total_objects; + avg_partobj = total_partobj * 100 / total_objects; + avg_memobj = total_objsize / total_objects; + + printf("Slabcache Totals\n"); + printf("----------------\n"); + printf("Slabcaches : %3d Aliases : %3d->%-3d Active: %3d\n", + slabs, aliases, alias_targets, used_slabs); + + store_size(b1, total_size);store_size(b2, total_waste); + store_size(b3, total_waste * 100 / total_used); + printf("Memory used: %6s # Loss : %6s MRatio:%6s%%\n", b1, b2, b3); + + store_size(b1, total_objects);store_size(b2, total_partobj); + store_size(b3, total_partobj * 100 / total_objects); + printf("# Objects : %6s # PartObj: %6s ORatio:%6s%%\n", b1, b2, b3); + + printf("\n"); + printf("Per Cache Average Min Max Total\n"); + printf("---------------------------------------------------------\n"); + + store_size(b1, avg_objects);store_size(b2, min_objects); + store_size(b3, max_objects);store_size(b4, total_objects); + printf("#Objects %10s %10s %10s %10s\n", + b1, b2, b3, b4); + + store_size(b1, avg_slabs);store_size(b2, min_slabs); + store_size(b3, max_slabs);store_size(b4, total_slabs); + printf("#Slabs %10s %10s %10s %10s\n", + b1, b2, b3, b4); + + store_size(b1, avg_partial);store_size(b2, min_partial); + store_size(b3, max_partial);store_size(b4, total_partial); + printf("#PartSlab %10s %10s %10s %10s\n", + b1, b2, b3, b4); + store_size(b1, avg_ppart);store_size(b2, min_ppart); + store_size(b3, max_ppart); + store_size(b4, total_partial * 100 / total_slabs); + printf("%%PartSlab%10s%% %10s%% %10s%% %10s%%\n", + b1, b2, b3, b4); + + store_size(b1, avg_partobj);store_size(b2, min_partobj); + store_size(b3, max_partobj); + store_size(b4, total_partobj); + printf("PartObjs %10s %10s %10s %10s\n", + b1, b2, b3, b4); + + store_size(b1, avg_ppartobj);store_size(b2, min_ppartobj); + store_size(b3, max_ppartobj); + store_size(b4, total_partobj * 100 / total_objects); + printf("%% PartObj%10s%% %10s%% %10s%% %10s%%\n", + b1, b2, b3, b4); + + store_size(b1, avg_size);store_size(b2, min_size); + store_size(b3, max_size);store_size(b4, total_size); + printf("Memory %10s %10s %10s %10s\n", + b1, b2, b3, b4); + + store_size(b1, avg_used);store_size(b2, min_used); + store_size(b3, max_used);store_size(b4, total_used); + printf("Used %10s %10s %10s %10s\n", + b1, b2, b3, b4); + + store_size(b1, avg_waste);store_size(b2, min_waste); + store_size(b3, max_waste);store_size(b4, total_waste); + printf("Loss %10s %10s %10s %10s\n", + b1, b2, b3, b4); + + printf("\n"); + printf("Per Object Average Min Max\n"); + printf("---------------------------------------------\n"); + + store_size(b1, avg_memobj);store_size(b2, min_memobj); + store_size(b3, max_memobj); + printf("Memory %10s %10s %10s\n", + b1, b2, b3); + store_size(b1, avg_objsize);store_size(b2, min_objsize); + store_size(b3, max_objsize); + printf("User %10s %10s %10s\n", + b1, b2, b3); + + store_size(b1, avg_objwaste);store_size(b2, min_objwaste); + store_size(b3, max_objwaste); + printf("Loss %10s %10s %10s\n", + b1, b2, b3); +} + +static void sort_slabs(void) +{ + struct slabinfo *s1,*s2; + + for (s1 = slabinfo; s1 < slabinfo + slabs; s1++) { + for (s2 = s1 + 1; s2 < slabinfo + slabs; s2++) { + int result; + + if (sort_size) + result = slab_size(s1) < slab_size(s2); + else if (sort_active) + result = slab_activity(s1) < slab_activity(s2); + else + result = strcasecmp(s1->name, s2->name); + + if (show_inverted) + result = -result; + + if (result > 0) { + struct slabinfo t; + + memcpy(&t, s1, sizeof(struct slabinfo)); + memcpy(s1, s2, sizeof(struct slabinfo)); + memcpy(s2, &t, sizeof(struct slabinfo)); + } + } + } +} + +static void sort_aliases(void) +{ + struct aliasinfo *a1,*a2; + + for (a1 = aliasinfo; a1 < aliasinfo + aliases; a1++) { + for (a2 = a1 + 1; a2 < aliasinfo + aliases; a2++) { + char *n1, *n2; + + n1 = a1->name; + n2 = a2->name; + if (show_alias && !show_inverted) { + n1 = a1->ref; + n2 = a2->ref; + } + if (strcasecmp(n1, n2) > 0) { + struct aliasinfo t; + + memcpy(&t, a1, sizeof(struct aliasinfo)); + memcpy(a1, a2, sizeof(struct aliasinfo)); + memcpy(a2, &t, sizeof(struct aliasinfo)); + } + } + } +} + +static void link_slabs(void) +{ + struct aliasinfo *a; + struct slabinfo *s; + + for (a = aliasinfo; a < aliasinfo + aliases; a++) { + + for (s = slabinfo; s < slabinfo + slabs; s++) + if (strcmp(a->ref, s->name) == 0) { + a->slab = s; + s->refs++; + break; + } + if (s == slabinfo + slabs) + fatal("Unresolved alias %s\n", a->ref); + } +} + +static void alias(void) +{ + struct aliasinfo *a; + char *active = NULL; + + sort_aliases(); + link_slabs(); + + for(a = aliasinfo; a < aliasinfo + aliases; a++) { + + if (!show_single_ref && a->slab->refs == 1) + continue; + + if (!show_inverted) { + if (active) { + if (strcmp(a->slab->name, active) == 0) { + printf(" %s", a->name); + continue; + } + } + printf("\n%-12s <- %s", a->slab->name, a->name); + active = a->slab->name; + } + else + printf("%-20s -> %s\n", a->name, a->slab->name); + } + if (active) + printf("\n"); +} + + +static void rename_slabs(void) +{ + struct slabinfo *s; + struct aliasinfo *a; + + for (s = slabinfo; s < slabinfo + slabs; s++) { + if (*s->name != ':') + continue; + + if (s->refs > 1 && !show_first_alias) + continue; + + a = find_one_alias(s); + + if (a) + s->name = a->name; + else { + s->name = "*"; + actual_slabs--; + } + } +} + +static int slab_mismatch(char *slab) +{ + return regexec(&pattern, slab, 0, NULL, 0); +} + +static void read_slab_dir(void) +{ + DIR *dir; + struct dirent *de; + struct slabinfo *slab = slabinfo; + struct aliasinfo *alias = aliasinfo; + char *p; + char *t; + int count; + + if (chdir("/sys/kernel/slab") && chdir("/sys/slab")) + fatal("SYSFS support for SLUB not active\n"); + + dir = opendir("."); + while ((de = readdir(dir))) { + if (de->d_name[0] == '.' || + (de->d_name[0] != ':' && slab_mismatch(de->d_name))) + continue; + switch (de->d_type) { + case DT_LNK: + alias->name = strdup(de->d_name); + count = readlink(de->d_name, buffer, sizeof(buffer)-1); + + if (count < 0) + fatal("Cannot read symlink %s\n", de->d_name); + + buffer[count] = 0; + p = buffer + count; + while (p > buffer && p[-1] != '/') + p--; + alias->ref = strdup(p); + alias++; + break; + case DT_DIR: + if (chdir(de->d_name)) + fatal("Unable to access slab %s\n", slab->name); + slab->name = strdup(de->d_name); + slab->alias = 0; + slab->refs = 0; + slab->aliases = get_obj("aliases"); + slab->align = get_obj("align"); + slab->cache_dma = get_obj("cache_dma"); + slab->cpu_slabs = get_obj("cpu_slabs"); + slab->destroy_by_rcu = get_obj("destroy_by_rcu"); + slab->hwcache_align = get_obj("hwcache_align"); + slab->object_size = get_obj("object_size"); + slab->objects = get_obj("objects"); + slab->objects_partial = get_obj("objects_partial"); + slab->objects_total = get_obj("objects_total"); + slab->objs_per_slab = get_obj("objs_per_slab"); + slab->order = get_obj("order"); + slab->partial = get_obj("partial"); + slab->partial = get_obj_and_str("partial", &t); + decode_numa_list(slab->numa_partial, t); + free(t); + slab->poison = get_obj("poison"); + slab->reclaim_account = get_obj("reclaim_account"); + slab->red_zone = get_obj("red_zone"); + slab->sanity_checks = get_obj("sanity_checks"); + slab->slab_size = get_obj("slab_size"); + slab->slabs = get_obj_and_str("slabs", &t); + decode_numa_list(slab->numa, t); + free(t); + slab->store_user = get_obj("store_user"); + slab->trace = get_obj("trace"); + slab->alloc_fastpath = get_obj("alloc_fastpath"); + slab->alloc_slowpath = get_obj("alloc_slowpath"); + slab->free_fastpath = get_obj("free_fastpath"); + slab->free_slowpath = get_obj("free_slowpath"); + slab->free_frozen= get_obj("free_frozen"); + slab->free_add_partial = get_obj("free_add_partial"); + slab->free_remove_partial = get_obj("free_remove_partial"); + slab->alloc_from_partial = get_obj("alloc_from_partial"); + slab->alloc_slab = get_obj("alloc_slab"); + slab->alloc_refill = get_obj("alloc_refill"); + slab->free_slab = get_obj("free_slab"); + slab->cpuslab_flush = get_obj("cpuslab_flush"); + slab->deactivate_full = get_obj("deactivate_full"); + slab->deactivate_empty = get_obj("deactivate_empty"); + slab->deactivate_to_head = get_obj("deactivate_to_head"); + slab->deactivate_to_tail = get_obj("deactivate_to_tail"); + slab->deactivate_remote_frees = get_obj("deactivate_remote_frees"); + slab->order_fallback = get_obj("order_fallback"); + slab->cmpxchg_double_cpu_fail = get_obj("cmpxchg_double_cpu_fail"); + slab->cmpxchg_double_fail = get_obj("cmpxchg_double_fail"); + slab->cpu_partial_alloc = get_obj("cpu_partial_alloc"); + slab->cpu_partial_free = get_obj("cpu_partial_free"); + slab->alloc_node_mismatch = get_obj("alloc_node_mismatch"); + slab->deactivate_bypass = get_obj("deactivate_bypass"); + chdir(".."); + if (slab->name[0] == ':') + alias_targets++; + slab++; + break; + default : + fatal("Unknown file type %lx\n", de->d_type); + } + } + closedir(dir); + slabs = slab - slabinfo; + actual_slabs = slabs; + aliases = alias - aliasinfo; + if (slabs > MAX_SLABS) + fatal("Too many slabs\n"); + if (aliases > MAX_ALIASES) + fatal("Too many aliases\n"); +} + +static void output_slabs(void) +{ + struct slabinfo *slab; + + for (slab = slabinfo; slab < slabinfo + slabs; slab++) { + + if (slab->alias) + continue; + + + if (show_numa) + slab_numa(slab, 0); + else if (show_track) + show_tracking(slab); + else if (validate) + slab_validate(slab); + else if (shrink) + slab_shrink(slab); + else if (set_debug) + slab_debug(slab); + else if (show_ops) + ops(slab); + else if (show_slab) + slabcache(slab); + else if (show_report) + report(slab); + } +} + +struct option opts[] = { + { "aliases", 0, NULL, 'a' }, + { "activity", 0, NULL, 'A' }, + { "debug", 2, NULL, 'd' }, + { "display-activity", 0, NULL, 'D' }, + { "empty", 0, NULL, 'e' }, + { "first-alias", 0, NULL, 'f' }, + { "help", 0, NULL, 'h' }, + { "inverted", 0, NULL, 'i'}, + { "numa", 0, NULL, 'n' }, + { "ops", 0, NULL, 'o' }, + { "report", 0, NULL, 'r' }, + { "shrink", 0, NULL, 's' }, + { "slabs", 0, NULL, 'l' }, + { "track", 0, NULL, 't'}, + { "validate", 0, NULL, 'v' }, + { "zero", 0, NULL, 'z' }, + { "1ref", 0, NULL, '1'}, + { NULL, 0, NULL, 0 } +}; + +int main(int argc, char *argv[]) +{ + int c; + int err; + char *pattern_source; + + page_size = getpagesize(); + + while ((c = getopt_long(argc, argv, "aAd::Defhil1noprstvzTS", + opts, NULL)) != -1) + switch (c) { + case '1': + show_single_ref = 1; + break; + case 'a': + show_alias = 1; + break; + case 'A': + sort_active = 1; + break; + case 'd': + set_debug = 1; + if (!debug_opt_scan(optarg)) + fatal("Invalid debug option '%s'\n", optarg); + break; + case 'D': + show_activity = 1; + break; + case 'e': + show_empty = 1; + break; + case 'f': + show_first_alias = 1; + break; + case 'h': + usage(); + return 0; + case 'i': + show_inverted = 1; + break; + case 'n': + show_numa = 1; + break; + case 'o': + show_ops = 1; + break; + case 'r': + show_report = 1; + break; + case 's': + shrink = 1; + break; + case 'l': + show_slab = 1; + break; + case 't': + show_track = 1; + break; + case 'v': + validate = 1; + break; + case 'z': + skip_zero = 0; + break; + case 'T': + show_totals = 1; + break; + case 'S': + sort_size = 1; + break; + + default: + fatal("%s: Invalid option '%c'\n", argv[0], optopt); + + } + + if (!show_slab && !show_alias && !show_track && !show_report + && !validate && !shrink && !set_debug && !show_ops) + show_slab = 1; + + if (argc > optind) + pattern_source = argv[optind]; + else + pattern_source = ".*"; + + err = regcomp(&pattern, pattern_source, REG_ICASE|REG_NOSUB); + if (err) + fatal("%s: Invalid pattern '%s' code %d\n", + argv[0], pattern_source, err); + read_slab_dir(); + if (show_alias) + alias(); + else + if (show_totals) + totals(); + else { + link_slabs(); + rename_slabs(); + sort_slabs(); + output_slabs(); + } + return 0; +} -- cgit v1.2.2 From f0f57b2b1488251970c25deea0ea150a8d0911ed Mon Sep 17 00:00:00 2001 From: Dave Young Date: Wed, 28 Mar 2012 14:42:56 -0700 Subject: mm: move hugepage test examples to tools/testing/selftests/vm hugepage-mmap.c, hugepage-shm.c and map_hugetlb.c in Documentation/vm are simple pass/fail tests, It's better to promote them to tools/testing/selftests. Thanks suggestion of Andrew Morton about this. They all need firstly setting up proper nr_hugepages and hugepage-mmap need to mount hugetlbfs. So I add a shell script run_vmtests to do such work which will call the three test programs and check the return value of them. Changes to original code including below: a. add run_vmtests script b. return error when read_bytes mismatch with writed bytes. c. coding style fixes: do not use assignment in if condition [akpm@linux-foundation.org: build the targets before trying to execute them] [akpm@linux-foundation.org: Documentation/vm/ no longer has a Makefile. Fixes "make clean"] Signed-off-by: Dave Young Cc: Wu Fengguang Cc: Christoph Lameter Cc: Pekka Enberg Cc: Frederic Weisbecker Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- tools/testing/selftests/Makefile | 2 +- tools/testing/selftests/vm/Makefile | 14 ++++ tools/testing/selftests/vm/hugepage-mmap.c | 92 ++++++++++++++++++++++++++ tools/testing/selftests/vm/hugepage-shm.c | 100 +++++++++++++++++++++++++++++ tools/testing/selftests/vm/map_hugetlb.c | 79 +++++++++++++++++++++++ tools/testing/selftests/vm/run_vmtests | 77 ++++++++++++++++++++++ 6 files changed, 363 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/vm/Makefile create mode 100644 tools/testing/selftests/vm/hugepage-mmap.c create mode 100644 tools/testing/selftests/vm/hugepage-shm.c create mode 100644 tools/testing/selftests/vm/map_hugetlb.c create mode 100644 tools/testing/selftests/vm/run_vmtests (limited to 'tools') diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index 9203cd77fc3..28bc57ee757 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile @@ -1,4 +1,4 @@ -TARGETS = breakpoints +TARGETS = breakpoints vm all: for TARGET in $(TARGETS); do \ diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile new file mode 100644 index 00000000000..b336b24aa6c --- /dev/null +++ b/tools/testing/selftests/vm/Makefile @@ -0,0 +1,14 @@ +# Makefile for vm selftests + +CC = $(CROSS_COMPILE)gcc +CFLAGS = -Wall -Wextra + +all: hugepage-mmap hugepage-shm map_hugetlb +%: %.c + $(CC) $(CFLAGS) -o $@ $^ + +run_tests: all + /bin/sh ./run_vmtests + +clean: + $(RM) hugepage-mmap hugepage-shm map_hugetlb diff --git a/tools/testing/selftests/vm/hugepage-mmap.c b/tools/testing/selftests/vm/hugepage-mmap.c new file mode 100644 index 00000000000..a10f310d236 --- /dev/null +++ b/tools/testing/selftests/vm/hugepage-mmap.c @@ -0,0 +1,92 @@ +/* + * hugepage-mmap: + * + * Example of using huge page memory in a user application using the mmap + * system call. Before running this application, make sure that the + * administrator has mounted the hugetlbfs filesystem (on some directory + * like /mnt) using the command mount -t hugetlbfs nodev /mnt. In this + * example, the app is requesting memory of size 256MB that is backed by + * huge pages. + * + * For the ia64 architecture, the Linux kernel reserves Region number 4 for + * huge pages. That means that if one requires a fixed address, a huge page + * aligned address starting with 0x800000... will be required. If a fixed + * address is not required, the kernel will select an address in the proper + * range. + * Other architectures, such as ppc64, i386 or x86_64 are not so constrained. + */ + +#include +#include +#include +#include +#include + +#define FILE_NAME "huge/hugepagefile" +#define LENGTH (256UL*1024*1024) +#define PROTECTION (PROT_READ | PROT_WRITE) + +/* Only ia64 requires this */ +#ifdef __ia64__ +#define ADDR (void *)(0x8000000000000000UL) +#define FLAGS (MAP_SHARED | MAP_FIXED) +#else +#define ADDR (void *)(0x0UL) +#define FLAGS (MAP_SHARED) +#endif + +static void check_bytes(char *addr) +{ + printf("First hex is %x\n", *((unsigned int *)addr)); +} + +static void write_bytes(char *addr) +{ + unsigned long i; + + for (i = 0; i < LENGTH; i++) + *(addr + i) = (char)i; +} + +static int read_bytes(char *addr) +{ + unsigned long i; + + check_bytes(addr); + for (i = 0; i < LENGTH; i++) + if (*(addr + i) != (char)i) { + printf("Mismatch at %lu\n", i); + return 1; + } + return 0; +} + +int main(void) +{ + void *addr; + int fd, ret; + + fd = open(FILE_NAME, O_CREAT | O_RDWR, 0755); + if (fd < 0) { + perror("Open failed"); + exit(1); + } + + addr = mmap(ADDR, LENGTH, PROTECTION, FLAGS, fd, 0); + if (addr == MAP_FAILED) { + perror("mmap"); + unlink(FILE_NAME); + exit(1); + } + + printf("Returned address is %p\n", addr); + check_bytes(addr); + write_bytes(addr); + ret = read_bytes(addr); + + munmap(addr, LENGTH); + close(fd); + unlink(FILE_NAME); + + return ret; +} diff --git a/tools/testing/selftests/vm/hugepage-shm.c b/tools/testing/selftests/vm/hugepage-shm.c new file mode 100644 index 00000000000..0d0ef4fc0c0 --- /dev/null +++ b/tools/testing/selftests/vm/hugepage-shm.c @@ -0,0 +1,100 @@ +/* + * hugepage-shm: + * + * Example of using huge page memory in a user application using Sys V shared + * memory system calls. In this example the app is requesting 256MB of + * memory that is backed by huge pages. The application uses the flag + * SHM_HUGETLB in the shmget system call to inform the kernel that it is + * requesting huge pages. + * + * For the ia64 architecture, the Linux kernel reserves Region number 4 for + * huge pages. That means that if one requires a fixed address, a huge page + * aligned address starting with 0x800000... will be required. If a fixed + * address is not required, the kernel will select an address in the proper + * range. + * Other architectures, such as ppc64, i386 or x86_64 are not so constrained. + * + * Note: The default shared memory limit is quite low on many kernels, + * you may need to increase it via: + * + * echo 268435456 > /proc/sys/kernel/shmmax + * + * This will increase the maximum size per shared memory segment to 256MB. + * The other limit that you will hit eventually is shmall which is the + * total amount of shared memory in pages. To set it to 16GB on a system + * with a 4kB pagesize do: + * + * echo 4194304 > /proc/sys/kernel/shmall + */ + +#include +#include +#include +#include +#include +#include + +#ifndef SHM_HUGETLB +#define SHM_HUGETLB 04000 +#endif + +#define LENGTH (256UL*1024*1024) + +#define dprintf(x) printf(x) + +/* Only ia64 requires this */ +#ifdef __ia64__ +#define ADDR (void *)(0x8000000000000000UL) +#define SHMAT_FLAGS (SHM_RND) +#else +#define ADDR (void *)(0x0UL) +#define SHMAT_FLAGS (0) +#endif + +int main(void) +{ + int shmid; + unsigned long i; + char *shmaddr; + + shmid = shmget(2, LENGTH, SHM_HUGETLB | IPC_CREAT | SHM_R | SHM_W); + if (shmid < 0) { + perror("shmget"); + exit(1); + } + printf("shmid: 0x%x\n", shmid); + + shmaddr = shmat(shmid, ADDR, SHMAT_FLAGS); + if (shmaddr == (char *)-1) { + perror("Shared memory attach failure"); + shmctl(shmid, IPC_RMID, NULL); + exit(2); + } + printf("shmaddr: %p\n", shmaddr); + + dprintf("Starting the writes:\n"); + for (i = 0; i < LENGTH; i++) { + shmaddr[i] = (char)(i); + if (!(i % (1024 * 1024))) + dprintf("."); + } + dprintf("\n"); + + dprintf("Starting the Check..."); + for (i = 0; i < LENGTH; i++) + if (shmaddr[i] != (char)i) { + printf("\nIndex %lu mismatched\n", i); + exit(3); + } + dprintf("Done.\n"); + + if (shmdt((const void *)shmaddr) != 0) { + perror("Detach failure"); + shmctl(shmid, IPC_RMID, NULL); + exit(4); + } + + shmctl(shmid, IPC_RMID, NULL); + + return 0; +} diff --git a/tools/testing/selftests/vm/map_hugetlb.c b/tools/testing/selftests/vm/map_hugetlb.c new file mode 100644 index 00000000000..ac56639dd4a --- /dev/null +++ b/tools/testing/selftests/vm/map_hugetlb.c @@ -0,0 +1,79 @@ +/* + * Example of using hugepage memory in a user application using the mmap + * system call with MAP_HUGETLB flag. Before running this program make + * sure the administrator has allocated enough default sized huge pages + * to cover the 256 MB allocation. + * + * For ia64 architecture, Linux kernel reserves Region number 4 for hugepages. + * That means the addresses starting with 0x800000... will need to be + * specified. Specifying a fixed address is not required on ppc64, i386 + * or x86_64. + */ +#include +#include +#include +#include +#include + +#define LENGTH (256UL*1024*1024) +#define PROTECTION (PROT_READ | PROT_WRITE) + +#ifndef MAP_HUGETLB +#define MAP_HUGETLB 0x40000 /* arch specific */ +#endif + +/* Only ia64 requires this */ +#ifdef __ia64__ +#define ADDR (void *)(0x8000000000000000UL) +#define FLAGS (MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB | MAP_FIXED) +#else +#define ADDR (void *)(0x0UL) +#define FLAGS (MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB) +#endif + +static void check_bytes(char *addr) +{ + printf("First hex is %x\n", *((unsigned int *)addr)); +} + +static void write_bytes(char *addr) +{ + unsigned long i; + + for (i = 0; i < LENGTH; i++) + *(addr + i) = (char)i; +} + +static int read_bytes(char *addr) +{ + unsigned long i; + + check_bytes(addr); + for (i = 0; i < LENGTH; i++) + if (*(addr + i) != (char)i) { + printf("Mismatch at %lu\n", i); + return 1; + } + return 0; +} + +int main(void) +{ + void *addr; + int ret; + + addr = mmap(ADDR, LENGTH, PROTECTION, FLAGS, 0, 0); + if (addr == MAP_FAILED) { + perror("mmap"); + exit(1); + } + + printf("Returned address is %p\n", addr); + check_bytes(addr); + write_bytes(addr); + ret = read_bytes(addr); + + munmap(addr, LENGTH); + + return ret; +} diff --git a/tools/testing/selftests/vm/run_vmtests b/tools/testing/selftests/vm/run_vmtests new file mode 100644 index 00000000000..8b40bd5e5cc --- /dev/null +++ b/tools/testing/selftests/vm/run_vmtests @@ -0,0 +1,77 @@ +#!/bin/bash +#please run as root + +#we need 256M, below is the size in kB +needmem=262144 +mnt=./huge + +#get pagesize and freepages from /proc/meminfo +while read name size unit; do + if [ "$name" = "HugePages_Free:" ]; then + freepgs=$size + fi + if [ "$name" = "Hugepagesize:" ]; then + pgsize=$size + fi +done < /proc/meminfo + +#set proper nr_hugepages +if [ -n "$freepgs" ] && [ -n "$pgsize" ]; then + nr_hugepgs=`cat /proc/sys/vm/nr_hugepages` + needpgs=`expr $needmem / $pgsize` + if [ $freepgs -lt $needpgs ]; then + lackpgs=$(( $needpgs - $freepgs )) + echo $(( $lackpgs + $nr_hugepgs )) > /proc/sys/vm/nr_hugepages + if [ $? -ne 0 ]; then + echo "Please run this test as root" + exit 1 + fi + fi +else + echo "no hugetlbfs support in kernel?" + exit 1 +fi + +mkdir $mnt +mount -t hugetlbfs none $mnt + +echo "--------------------" +echo "runing hugepage-mmap" +echo "--------------------" +./hugepage-mmap +if [ $? -ne 0 ]; then + echo "[FAIL]" +else + echo "[PASS]" +fi + +shmmax=`cat /proc/sys/kernel/shmmax` +shmall=`cat /proc/sys/kernel/shmall` +echo 268435456 > /proc/sys/kernel/shmmax +echo 4194304 > /proc/sys/kernel/shmall +echo "--------------------" +echo "runing hugepage-shm" +echo "--------------------" +./hugepage-shm +if [ $? -ne 0 ]; then + echo "[FAIL]" +else + echo "[PASS]" +fi +echo $shmmax > /proc/sys/kernel/shmmax +echo $shmall > /proc/sys/kernel/shmall + +echo "--------------------" +echo "runing map_hugetlb" +echo "--------------------" +./map_hugetlb +if [ $? -ne 0 ]; then + echo "[FAIL]" +else + echo "[PASS]" +fi + +#cleanup +umount $mnt +rm -rf $mnt +echo $nr_hugepgs > /proc/sys/vm/nr_hugepages -- cgit v1.2.2 From e23da0370f80834e971142e50253f5b79be83631 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Mon, 6 Feb 2012 18:37:16 -0500 Subject: tools turbostat: add summary option turbostat -s cuts down on the amount of output, per user request. also treak some output whitespace and the man page. Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.8 | 99 ++++++++++++++++++++--------------- tools/power/x86/turbostat/turbostat.c | 90 +++++++++++++++++++++---------- 2 files changed, 120 insertions(+), 69 deletions(-) (limited to 'tools') diff --git a/tools/power/x86/turbostat/turbostat.8 b/tools/power/x86/turbostat/turbostat.8 index 555c69a5592..adf175f6149 100644 --- a/tools/power/x86/turbostat/turbostat.8 +++ b/tools/power/x86/turbostat/turbostat.8 @@ -4,11 +4,13 @@ turbostat \- Report processor frequency and idle statistics .SH SYNOPSIS .ft B .B turbostat +.RB [ "\-s" ] .RB [ "\-v" ] .RB [ "\-M MSR#" ] .RB command .br .B turbostat +.RB [ "\-s" ] .RB [ "\-v" ] .RB [ "\-M MSR#" ] .RB [ "\-i interval_sec" ] @@ -25,6 +27,8 @@ supports an "invariant" TSC, plus the APERF and MPERF MSRs. on processors that additionally support C-state residency counters. .SS Options +The \fB-s\fP option prints only a 1-line summary for each sample interval. +.PP The \fB-v\fP option increases verbosity. .PP The \fB-M MSR#\fP option dumps the specified MSR, @@ -39,13 +43,14 @@ displays the statistics gathered since it was forked. .SH FIELD DESCRIPTIONS .nf \fBpk\fP processor package number. -\fBcr\fP processor core number. +\fBcor\fP processor core number. \fBCPU\fP Linux CPU (logical processor) number. +Note that multiple CPUs per core indicate support for Intel(R) Hyper-Threading Technology. \fB%c0\fP percent of the interval that the CPU retired instructions. \fBGHz\fP average clock rate while the CPU was in c0 state. \fBTSC\fP average GHz that the TSC ran during the entire interval. -\fB%c1, %c3, %c6\fP show the percentage residency in hardware core idle states. -\fB%pc3, %pc6\fP percentage residency in hardware package idle states. +\fB%c1, %c3, %c6, %c7\fP show the percentage residency in hardware core idle states. +\fB%pc2, %pc3, %pc6, %pc7\fP percentage residency in hardware package idle states. .fi .PP .SH EXAMPLE @@ -53,25 +58,37 @@ Without any parameters, turbostat prints out counters ever 5 seconds. (override interval with "-i sec" option, or specify a command for turbostat to fork). -The first row of statistics reflect the average for the entire system. +The first row of statistics is a summary for the entire system. +Note that the summary is a weighted average. Subsequent rows show per-CPU statistics. .nf [root@x980]# ./turbostat -cr CPU %c0 GHz TSC %c1 %c3 %c6 %pc3 %pc6 - 0.04 1.62 3.38 0.11 0.00 99.85 0.00 95.07 - 0 0 0.04 1.62 3.38 0.06 0.00 99.90 0.00 95.07 - 0 6 0.02 1.62 3.38 0.08 0.00 99.90 0.00 95.07 - 1 2 0.10 1.62 3.38 0.29 0.00 99.61 0.00 95.07 - 1 8 0.11 1.62 3.38 0.28 0.00 99.61 0.00 95.07 - 2 4 0.01 1.62 3.38 0.01 0.00 99.98 0.00 95.07 - 2 10 0.01 1.61 3.38 0.02 0.00 99.98 0.00 95.07 - 8 1 0.07 1.62 3.38 0.15 0.00 99.78 0.00 95.07 - 8 7 0.03 1.62 3.38 0.19 0.00 99.78 0.00 95.07 - 9 3 0.01 1.62 3.38 0.02 0.00 99.98 0.00 95.07 - 9 9 0.01 1.62 3.38 0.02 0.00 99.98 0.00 95.07 - 10 5 0.01 1.62 3.38 0.13 0.00 99.86 0.00 95.07 - 10 11 0.08 1.62 3.38 0.05 0.00 99.86 0.00 95.07 +cor CPU %c0 GHz TSC %c1 %c3 %c6 %pc3 %pc6 + 0.60 1.63 3.38 2.91 0.00 96.49 0.00 76.64 + 0 0 0.59 1.62 3.38 4.51 0.00 94.90 0.00 76.64 + 0 6 1.13 1.64 3.38 3.97 0.00 94.90 0.00 76.64 + 1 2 0.08 1.62 3.38 0.07 0.00 99.85 0.00 76.64 + 1 8 0.03 1.62 3.38 0.12 0.00 99.85 0.00 76.64 + 2 4 0.01 1.62 3.38 0.06 0.00 99.93 0.00 76.64 + 2 10 0.04 1.62 3.38 0.02 0.00 99.93 0.00 76.64 + 8 1 2.85 1.62 3.38 11.71 0.00 85.44 0.00 76.64 + 8 7 1.98 1.62 3.38 12.58 0.00 85.44 0.00 76.64 + 9 3 0.36 1.62 3.38 0.71 0.00 98.93 0.00 76.64 + 9 9 0.09 1.62 3.38 0.98 0.00 98.93 0.00 76.64 + 10 5 0.03 1.62 3.38 0.09 0.00 99.87 0.00 76.64 + 10 11 0.07 1.62 3.38 0.06 0.00 99.87 0.00 76.64 +.fi +.SH SUMMARY EXAMPLE +The "-s" option prints the column headers just once, +and then the one line system summary for each sample interval. + +.nf +[root@x980]# ./turbostat -s + %c0 GHz TSC %c1 %c3 %c6 %pc3 %pc6 + 0.61 1.89 3.38 5.95 0.00 93.44 0.00 66.33 + 0.52 1.62 3.38 6.83 0.00 92.65 0.00 61.11 + 0.62 1.92 3.38 5.47 0.00 93.91 0.00 67.31 .fi .SH VERBOSE EXAMPLE The "-v" option adds verbosity to the output: @@ -101,33 +118,33 @@ until ^C while the other CPUs are mostly idle: .nf [root@x980 lenb]# ./turbostat cat /dev/zero > /dev/null - -^Ccr CPU %c0 GHz TSC %c1 %c3 %c6 %pc3 %pc6 - 8.49 3.63 3.38 16.23 0.66 74.63 0.00 0.00 - 0 0 1.22 3.62 3.38 32.18 0.00 66.60 0.00 0.00 - 0 6 0.40 3.61 3.38 33.00 0.00 66.60 0.00 0.00 - 1 2 0.11 3.14 3.38 0.19 3.95 95.75 0.00 0.00 - 1 8 0.05 2.88 3.38 0.25 3.95 95.75 0.00 0.00 - 2 4 0.00 3.13 3.38 0.02 0.00 99.98 0.00 0.00 - 2 10 0.00 3.09 3.38 0.02 0.00 99.98 0.00 0.00 - 8 1 0.04 3.50 3.38 14.43 0.00 85.54 0.00 0.00 - 8 7 0.03 2.98 3.38 14.43 0.00 85.54 0.00 0.00 - 9 3 0.00 3.16 3.38 100.00 0.00 0.00 0.00 0.00 - 9 9 99.93 3.63 3.38 0.06 0.00 0.00 0.00 0.00 - 10 5 0.01 2.82 3.38 0.08 0.00 99.91 0.00 0.00 - 10 11 0.02 3.36 3.38 0.06 0.00 99.91 0.00 0.00 -6.950866 sec +^C +cor CPU %c0 GHz TSC %c1 %c3 %c6 %pc3 %pc6 + 8.63 3.64 3.38 14.46 0.49 76.42 0.00 0.00 + 0 0 0.34 3.36 3.38 99.66 0.00 0.00 0.00 0.00 + 0 6 99.96 3.64 3.38 0.04 0.00 0.00 0.00 0.00 + 1 2 0.14 3.50 3.38 1.75 2.04 96.07 0.00 0.00 + 1 8 0.38 3.57 3.38 1.51 2.04 96.07 0.00 0.00 + 2 4 0.01 2.65 3.38 0.06 0.00 99.93 0.00 0.00 + 2 10 0.03 2.12 3.38 0.04 0.00 99.93 0.00 0.00 + 8 1 0.91 3.59 3.38 35.27 0.92 62.90 0.00 0.00 + 8 7 1.61 3.63 3.38 34.57 0.92 62.90 0.00 0.00 + 9 3 0.04 3.38 3.38 0.20 0.00 99.76 0.00 0.00 + 9 9 0.04 3.29 3.38 0.20 0.00 99.76 0.00 0.00 + 10 5 0.03 3.08 3.38 0.12 0.00 99.85 0.00 0.00 + 10 11 0.05 3.07 3.38 0.10 0.00 99.85 0.00 0.00 +4.907015 sec .fi -Above the cycle soaker drives cpu9 up 3.6 Ghz turbo limit +Above the cycle soaker drives cpu6 up 3.6 Ghz turbo limit while the other processors are generally in various states of idle. -Note that cpu3 is an HT sibling sharing core9 -with cpu9, and thus it is unable to get to an idle state -deeper than c1 while cpu9 is busy. +Note that cpu0 is an HT sibling sharing core0 +with cpu6, and thus it is unable to get to an idle state +deeper than c1 while cpu6 is busy. -Note that turbostat reports average GHz of 3.61, while -the arithmetic average of the GHz column above is 3.24. +Note that turbostat reports average GHz of 3.64, while +the arithmetic average of the GHz column above is lower. This is a weighted average, where the weight is %c0. ie. it is the total number of un-halted cycles elapsed per time divided by the number of CPUs. .SH NOTES @@ -167,6 +184,6 @@ http://www.intel.com/products/processor/manuals/ .SH "SEE ALSO" msr(4), vmstat(8) .PP -.SH AUTHORS +.SH AUTHOR .nf Written by Len Brown diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 310d3dd5e54..6436d54378c 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -2,7 +2,7 @@ * turbostat -- show CPU frequency and C-state residency * on modern Intel turbo-capable processors. * - * Copyright (c) 2010, Intel Corporation. + * Copyright (c) 2012 Intel Corporation. * Len Brown * * This program is free software; you can redistribute it and/or modify it @@ -49,6 +49,7 @@ char *proc_stat = "/proc/stat"; unsigned int interval_sec = 5; /* set with -i interval_sec */ unsigned int verbose; /* set with -v */ +unsigned int summary_only; /* set with -s */ unsigned int skip_c0; unsigned int skip_c1; unsigned int do_nhm_cstates; @@ -129,14 +130,18 @@ void print_header(void) { if (show_pkg) fprintf(stderr, "pk"); + if (show_pkg) + fprintf(stderr, " "); if (show_core) - fprintf(stderr, " cr"); + fprintf(stderr, "cor"); if (show_cpu) fprintf(stderr, " CPU"); + if (show_pkg || show_core || show_cpu) + fprintf(stderr, " "); if (do_nhm_cstates) - fprintf(stderr, " %%c0 "); + fprintf(stderr, " %%c0"); if (has_aperf) - fprintf(stderr, " GHz"); + fprintf(stderr, " GHz"); fprintf(stderr, " TSC"); if (do_nhm_cstates) fprintf(stderr, " %%c1"); @@ -147,13 +152,13 @@ void print_header(void) if (do_snb_cstates) fprintf(stderr, " %%c7"); if (do_snb_cstates) - fprintf(stderr, " %%pc2"); + fprintf(stderr, " %%pc2"); if (do_nhm_cstates) - fprintf(stderr, " %%pc3"); + fprintf(stderr, " %%pc3"); if (do_nhm_cstates) - fprintf(stderr, " %%pc6"); + fprintf(stderr, " %%pc6"); if (do_snb_cstates) - fprintf(stderr, " %%pc7"); + fprintf(stderr, " %%pc7"); if (extra_msr_offset) fprintf(stderr, " MSR 0x%x ", extra_msr_offset); @@ -187,6 +192,15 @@ void dump_list(struct counters *cnt) dump_cnt(cnt); } +/* + * column formatting convention & formats + * package: "pk" 2 columns %2d + * core: "cor" 3 columns %3d + * CPU: "CPU" 3 columns %3d + * GHz: "GHz" 3 columns %3.2 + * TSC: "TSC" 3 columns %3.2 + * percentage " %pc3" %6.2 + */ void print_cnt(struct counters *p) { double interval_float; @@ -196,39 +210,45 @@ void print_cnt(struct counters *p) /* topology columns, print blanks on 1st (average) line */ if (p == cnt_average) { if (show_pkg) + fprintf(stderr, " "); + if (show_pkg && show_core) fprintf(stderr, " "); if (show_core) - fprintf(stderr, " "); + fprintf(stderr, " "); if (show_cpu) - fprintf(stderr, " "); + fprintf(stderr, " " " "); } else { if (show_pkg) - fprintf(stderr, "%d", p->pkg); + fprintf(stderr, "%2d", p->pkg); + if (show_pkg && show_core) + fprintf(stderr, " "); if (show_core) - fprintf(stderr, "%4d", p->core); + fprintf(stderr, "%3d", p->core); if (show_cpu) - fprintf(stderr, "%4d", p->cpu); + fprintf(stderr, " %3d", p->cpu); } /* %c0 */ if (do_nhm_cstates) { + if (show_pkg || show_core || show_cpu) + fprintf(stderr, " "); if (!skip_c0) - fprintf(stderr, "%7.2f", 100.0 * p->mperf/p->tsc); + fprintf(stderr, "%6.2f", 100.0 * p->mperf/p->tsc); else - fprintf(stderr, " ****"); + fprintf(stderr, " ****"); } /* GHz */ if (has_aperf) { if (!aperf_mperf_unstable) { - fprintf(stderr, "%5.2f", + fprintf(stderr, " %3.2f", 1.0 * p->tsc / units * p->aperf / p->mperf / interval_float); } else { if (p->aperf > p->tsc || p->mperf > p->tsc) { - fprintf(stderr, " ****"); + fprintf(stderr, " ***"); } else { - fprintf(stderr, "%4.1f*", + fprintf(stderr, "%3.1f*", 1.0 * p->tsc / units * p->aperf / p->mperf / interval_float); @@ -241,7 +261,7 @@ void print_cnt(struct counters *p) if (do_nhm_cstates) { if (!skip_c1) - fprintf(stderr, "%7.2f", 100.0 * p->c1/p->tsc); + fprintf(stderr, " %6.2f", 100.0 * p->c1/p->tsc); else fprintf(stderr, " ****"); } @@ -252,13 +272,13 @@ void print_cnt(struct counters *p) if (do_snb_cstates) fprintf(stderr, " %6.2f", 100.0 * p->c7/p->tsc); if (do_snb_cstates) - fprintf(stderr, " %5.2f", 100.0 * p->pc2/p->tsc); + fprintf(stderr, " %6.2f", 100.0 * p->pc2/p->tsc); if (do_nhm_cstates) - fprintf(stderr, " %5.2f", 100.0 * p->pc3/p->tsc); + fprintf(stderr, " %6.2f", 100.0 * p->pc3/p->tsc); if (do_nhm_cstates) - fprintf(stderr, " %5.2f", 100.0 * p->pc6/p->tsc); + fprintf(stderr, " %6.2f", 100.0 * p->pc6/p->tsc); if (do_snb_cstates) - fprintf(stderr, " %5.2f", 100.0 * p->pc7/p->tsc); + fprintf(stderr, " %6.2f", 100.0 * p->pc7/p->tsc); if (extra_msr_offset) fprintf(stderr, " 0x%016llx", p->extra_msr); putc('\n', stderr); @@ -267,12 +287,20 @@ void print_cnt(struct counters *p) void print_counters(struct counters *counters) { struct counters *cnt; + static int printed; - print_header(); + + if (!printed || !summary_only) + print_header(); if (num_cpus > 1) print_cnt(cnt_average); + printed = 1; + + if (summary_only) + return; + for (cnt = counters; cnt != NULL; cnt = cnt->next) print_cnt(cnt); @@ -557,7 +585,8 @@ void insert_counters(struct counters **list, return; } - show_cpu = 1; /* there is more than one CPU */ + if (!summary_only) + show_cpu = 1; /* there is more than one CPU */ /* * insert on front of list. @@ -575,13 +604,15 @@ void insert_counters(struct counters **list, while (prev->next && (prev->next->pkg < new->pkg)) { prev = prev->next; - show_pkg = 1; /* there is more than 1 package */ + if (!summary_only) + show_pkg = 1; /* there is more than 1 package */ } while (prev->next && (prev->next->pkg == new->pkg) && (prev->next->core < new->core)) { prev = prev->next; - show_core = 1; /* there is more than 1 core */ + if (!summary_only) + show_core = 1; /* there is more than 1 core */ } while (prev->next && (prev->next->pkg == new->pkg) @@ -1005,8 +1036,11 @@ void cmdline(int argc, char **argv) progname = argv[0]; - while ((opt = getopt(argc, argv, "+vi:M:")) != -1) { + while ((opt = getopt(argc, argv, "+svi:M:")) != -1) { switch (opt) { + case 's': + summary_only++; + break; case 'v': verbose++; break; -- cgit v1.2.2 From 88c3281f7ba449992f7a33bd2452a8c6fa5503cb Mon Sep 17 00:00:00 2001 From: Len Brown Date: Thu, 29 Mar 2012 21:44:40 -0400 Subject: tools turbostat: reduce measurement overhead due to IPIs turbostat uses /dev/cpu/*/msr interface to read MSRs. For modern systems, it reads 10 MSR/CPU. This can be observed as 10 "Function Call Interrupts" per CPU per sample added to /proc/interrupts. This overhead is measurable on large idle systems, and as Yoquan Song pointed out, it can even trick cpuidle into thinking the system is busy. Here turbostat re-schedules itself in-turn to each CPU so that its MSR reads will always be local. This replaces the 10 "Function Call Interrupts" with a single "Rescheduling interrupt" per sample per CPU. On an idle 32-CPU system, this shifts some residency from the shallow c1 state to the deeper c7 state: # ./turbostat.old -s %c0 GHz TSC %c1 %c3 %c6 %c7 %pc2 %pc3 %pc6 %pc7 0.27 1.29 2.29 0.95 0.02 0.00 98.77 20.23 0.00 77.41 0.00 0.25 1.24 2.29 0.98 0.02 0.00 98.75 20.34 0.03 77.74 0.00 0.27 1.22 2.29 0.54 0.00 0.00 99.18 20.64 0.00 77.70 0.00 0.26 1.22 2.29 1.22 0.00 0.00 98.52 20.22 0.00 77.74 0.00 0.26 1.38 2.29 0.78 0.02 0.00 98.95 20.51 0.05 77.56 0.00 ^C i# ./turbostat.new -s %c0 GHz TSC %c1 %c3 %c6 %c7 %pc2 %pc3 %pc6 %pc7 0.27 1.20 2.29 0.24 0.01 0.00 99.49 20.58 0.00 78.20 0.00 0.27 1.22 2.29 0.25 0.00 0.00 99.48 20.79 0.00 77.85 0.00 0.27 1.20 2.29 0.25 0.02 0.00 99.46 20.71 0.03 77.89 0.00 0.28 1.26 2.29 0.25 0.01 0.00 99.46 20.89 0.02 77.67 0.00 0.27 1.20 2.29 0.24 0.01 0.00 99.48 20.65 0.00 78.04 0.00 cc: Youquan Song Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 46 +++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) (limited to 'tools') diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 6436d54378c..fa60872b947 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -19,6 +19,7 @@ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. */ +#define _GNU_SOURCE #include #include #include @@ -32,6 +33,7 @@ #include #include #include +#include #define MSR_TSC 0x10 #define MSR_NEHALEM_PLATFORM_INFO 0xCE @@ -72,6 +74,8 @@ char *progname; int need_reinitialize; int num_cpus; +cpu_set_t *cpu_mask; +size_t cpu_mask_size; struct counters { unsigned long long tsc; /* per thread */ @@ -100,6 +104,40 @@ struct timeval tv_even; struct timeval tv_odd; struct timeval tv_delta; +/* + * cpu_mask_init(ncpus) + * + * allocate and clear cpu_mask + * set cpu_mask_size + */ +void cpu_mask_init(int ncpus) +{ + cpu_mask = CPU_ALLOC(ncpus); + if (cpu_mask == NULL) { + perror("CPU_ALLOC"); + exit(3); + } + cpu_mask_size = CPU_ALLOC_SIZE(ncpus); + CPU_ZERO_S(cpu_mask_size, cpu_mask); +} + +void cpu_mask_uninit() +{ + CPU_FREE(cpu_mask); + cpu_mask = NULL; + cpu_mask_size = 0; +} + +int cpu_migrate(int cpu) +{ + CPU_ZERO_S(cpu_mask_size, cpu_mask); + CPU_SET_S(cpu, cpu_mask_size, cpu_mask); + if (sched_setaffinity(0, cpu_mask_size, cpu_mask) == -1) + return -1; + else + return 0; +} + unsigned long long get_msr(int cpu, off_t offset) { ssize_t retval; @@ -471,6 +509,11 @@ void compute_average(struct counters *delta, struct counters *avg) void get_counters(struct counters *cnt) { for ( ; cnt; cnt = cnt->next) { + if (cpu_migrate(cnt->cpu)) { + need_reinitialize = 1; + return; + } + cnt->tsc = get_msr(cnt->cpu, MSR_TSC); if (do_nhm_cstates) cnt->c3 = get_msr(cnt->cpu, MSR_CORE_C3_RESIDENCY); @@ -752,6 +795,8 @@ void re_initialize(void) free_all_counters(); num_cpus = for_all_cpus(alloc_new_counters); need_reinitialize = 0; + cpu_mask_uninit(); + cpu_mask_init(num_cpus); printf("num_cpus is now %d\n", num_cpus); } @@ -984,6 +1029,7 @@ void turbostat_init() check_super_user(); num_cpus = for_all_cpus(alloc_new_counters); + cpu_mask_init(num_cpus); if (verbose) print_nehalem_info(); -- cgit v1.2.2 From 15aaa34654831e98dd76f7738b6c7f5d05a66430 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Thu, 29 Mar 2012 22:19:58 -0400 Subject: tools turbostat: harden against cpu online/offline Sometimes users have turbostat running in interval mode when they take processors offline/online. Previously, turbostat would survive, but not gracefully. Tighten up the error checking so turbostat notices changesn sooner, and print just 1 line on change: turbostat: re-initialized with num_cpus %d Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 115 ++++++++++++++++++---------------- 1 file changed, 61 insertions(+), 54 deletions(-) (limited to 'tools') diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index fa60872b947..ab2f682fd44 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -71,7 +71,6 @@ unsigned int show_cpu; int aperf_mperf_unstable; int backwards_count; char *progname; -int need_reinitialize; int num_cpus; cpu_set_t *cpu_mask; @@ -138,30 +137,24 @@ int cpu_migrate(int cpu) return 0; } -unsigned long long get_msr(int cpu, off_t offset) +int get_msr(int cpu, off_t offset, unsigned long long *msr) { ssize_t retval; - unsigned long long msr; char pathname[32]; int fd; sprintf(pathname, "/dev/cpu/%d/msr", cpu); fd = open(pathname, O_RDONLY); - if (fd < 0) { - perror(pathname); - need_reinitialize = 1; - return 0; - } - - retval = pread(fd, &msr, sizeof msr, offset); - if (retval != sizeof msr) { - fprintf(stderr, "cpu%d pread(..., 0x%zx) = %jd\n", - cpu, offset, retval); - exit(-2); - } + if (fd < 0) + return -1; + retval = pread(fd, msr, sizeof *msr, offset); close(fd); - return msr; + + if (retval != sizeof *msr) + return -1; + + return 0; } void print_header(void) @@ -506,36 +499,51 @@ void compute_average(struct counters *delta, struct counters *avg) free(sum); } -void get_counters(struct counters *cnt) +int get_counters(struct counters *cnt) { for ( ; cnt; cnt = cnt->next) { - if (cpu_migrate(cnt->cpu)) { - need_reinitialize = 1; - return; + + if (cpu_migrate(cnt->cpu)) + return -1; + + if (get_msr(cnt->cpu, MSR_TSC, &cnt->tsc)) + return -1; + + if (has_aperf) { + if (get_msr(cnt->cpu, MSR_APERF, &cnt->aperf)) + return -1; + if (get_msr(cnt->cpu, MSR_MPERF, &cnt->mperf)) + return -1; + } + + if (do_nhm_cstates) { + if (get_msr(cnt->cpu, MSR_CORE_C3_RESIDENCY, &cnt->c3)) + return -1; + if (get_msr(cnt->cpu, MSR_CORE_C6_RESIDENCY, &cnt->c6)) + return -1; } - cnt->tsc = get_msr(cnt->cpu, MSR_TSC); - if (do_nhm_cstates) - cnt->c3 = get_msr(cnt->cpu, MSR_CORE_C3_RESIDENCY); - if (do_nhm_cstates) - cnt->c6 = get_msr(cnt->cpu, MSR_CORE_C6_RESIDENCY); - if (do_snb_cstates) - cnt->c7 = get_msr(cnt->cpu, MSR_CORE_C7_RESIDENCY); - if (has_aperf) - cnt->aperf = get_msr(cnt->cpu, MSR_APERF); - if (has_aperf) - cnt->mperf = get_msr(cnt->cpu, MSR_MPERF); - if (do_snb_cstates) - cnt->pc2 = get_msr(cnt->cpu, MSR_PKG_C2_RESIDENCY); - if (do_nhm_cstates) - cnt->pc3 = get_msr(cnt->cpu, MSR_PKG_C3_RESIDENCY); - if (do_nhm_cstates) - cnt->pc6 = get_msr(cnt->cpu, MSR_PKG_C6_RESIDENCY); if (do_snb_cstates) - cnt->pc7 = get_msr(cnt->cpu, MSR_PKG_C7_RESIDENCY); + if (get_msr(cnt->cpu, MSR_CORE_C7_RESIDENCY, &cnt->c7)) + return -1; + + if (do_nhm_cstates) { + if (get_msr(cnt->cpu, MSR_PKG_C3_RESIDENCY, &cnt->pc3)) + return -1; + if (get_msr(cnt->cpu, MSR_PKG_C6_RESIDENCY, &cnt->pc6)) + return -1; + } + if (do_snb_cstates) { + if (get_msr(cnt->cpu, MSR_PKG_C2_RESIDENCY, &cnt->pc2)) + return -1; + if (get_msr(cnt->cpu, MSR_PKG_C7_RESIDENCY, &cnt->pc7)) + return -1; + } if (extra_msr_offset) - cnt->extra_msr = get_msr(cnt->cpu, extra_msr_offset); + if (get_msr(cnt->cpu, extra_msr_offset, &cnt->extra_msr)) + return -1; } + return 0; } void print_nehalem_info(void) @@ -546,7 +554,7 @@ void print_nehalem_info(void) if (!do_nehalem_platform_info) return; - msr = get_msr(0, MSR_NEHALEM_PLATFORM_INFO); + get_msr(0, MSR_NEHALEM_PLATFORM_INFO, &msr); ratio = (msr >> 40) & 0xFF; fprintf(stderr, "%d * %.0f = %.0f MHz max efficiency\n", @@ -562,7 +570,7 @@ void print_nehalem_info(void) if (!do_nehalem_turbo_ratio_limit) return; - msr = get_msr(0, MSR_NEHALEM_TURBO_RATIO_LIMIT); + get_msr(0, MSR_NEHALEM_TURBO_RATIO_LIMIT, &msr); ratio = (msr >> 24) & 0xFF; if (ratio) @@ -755,7 +763,7 @@ int get_core_id(int cpu) } /* - * run func(index, cpu) on every cpu in /proc/stat + * run func(pkg, core, cpu) on every cpu in /proc/stat */ int for_all_cpus(void (func)(int, int, int)) @@ -791,20 +799,18 @@ int for_all_cpus(void (func)(int, int, int)) void re_initialize(void) { - printf("turbostat: topology changed, re-initializing.\n"); free_all_counters(); num_cpus = for_all_cpus(alloc_new_counters); - need_reinitialize = 0; cpu_mask_uninit(); cpu_mask_init(num_cpus); - printf("num_cpus is now %d\n", num_cpus); + printf("turbostat: re-initialized with num_cpus %d\n", num_cpus); } void dummy(int pkg, int core, int cpu) { return; } /* * check to see if a cpu came on-line */ -void verify_num_cpus(void) +int verify_num_cpus(void) { int new_num_cpus; @@ -814,8 +820,9 @@ void verify_num_cpus(void) if (verbose) printf("num_cpus was %d, is now %d\n", num_cpus, new_num_cpus); - need_reinitialize = 1; + return -1; } + return 0; } void turbostat_loop() @@ -825,25 +832,25 @@ restart: gettimeofday(&tv_even, (struct timezone *)NULL); while (1) { - verify_num_cpus(); - if (need_reinitialize) { + if (verify_num_cpus()) { re_initialize(); goto restart; } sleep(interval_sec); - get_counters(cnt_odd); + if (get_counters(cnt_odd)) { + re_initialize(); + goto restart; + } gettimeofday(&tv_odd, (struct timezone *)NULL); - compute_delta(cnt_odd, cnt_even, cnt_delta); timersub(&tv_odd, &tv_even, &tv_delta); compute_average(cnt_delta, cnt_average); print_counters(cnt_delta); - if (need_reinitialize) { + sleep(interval_sec); + if (get_counters(cnt_even)) { re_initialize(); goto restart; } - sleep(interval_sec); - get_counters(cnt_even); gettimeofday(&tv_even, (struct timezone *)NULL); compute_delta(cnt_even, cnt_odd, cnt_delta); timersub(&tv_even, &tv_odd, &tv_delta); -- cgit v1.2.2