aboutsummaryrefslogtreecommitdiffstats
path: root/net/core/dev.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2012-03-20 13:29:15 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2012-03-20 13:29:15 -0400
commit9c2b957db1772ebf942ae7a9346b14eba6c8ca66 (patch)
tree0dbb83e57260ea7fc0dc421f214d5f1b26262005 /net/core/dev.c
parent0bbfcaff9b2a69c71a95e6902253487ab30cb498 (diff)
parentbea95c152dee1791dd02cbc708afbb115bb00f9a (diff)
Merge branch 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull perf events changes for v3.4 from Ingo Molnar: - New "hardware based branch profiling" feature both on the kernel and the tooling side, on CPUs that support it. (modern x86 Intel CPUs with the 'LBR' hardware feature currently.) This new feature is basically a sophisticated 'magnifying glass' for branch execution - something that is pretty difficult to extract from regular, function histogram centric profiles. The simplest mode is activated via 'perf record -b', and the result looks like this in perf report: $ perf record -b any_call,u -e cycles:u branchy $ perf report -b --sort=symbol 52.34% [.] main [.] f1 24.04% [.] f1 [.] f3 23.60% [.] f1 [.] f2 0.01% [k] _IO_new_file_xsputn [k] _IO_file_overflow 0.01% [k] _IO_vfprintf_internal [k] _IO_new_file_xsputn 0.01% [k] _IO_vfprintf_internal [k] strchrnul 0.01% [k] __printf [k] _IO_vfprintf_internal 0.01% [k] main [k] __printf This output shows from/to branch columns and shows the highest percentage (from,to) jump combinations - i.e. the most likely taken branches in the system. "branches" can also include function calls and any other synchronous and asynchronous transitions of the instruction pointer that are not 'next instruction' - such as system calls, traps, interrupts, etc. This feature comes with (hopefully intuitive) flat ascii and TUI support in perf report. - Various 'perf annotate' visual improvements for us assembly junkies. It will now recognize function calls in the TUI and by hitting enter you can follow the call (recursively) and back, amongst other improvements. - Multiple threads/processes recording support in perf record, perf stat, perf top - which is activated via a comma-list of PIDs: perf top -p 21483,21485 perf stat -p 21483,21485 -ddd perf record -p 21483,21485 - Support for per UID views, via the --uid paramter to perf top, perf report, etc. For example 'perf top --uid mingo' will only show the tasks that I am running, excluding other users, root, etc. - Jump label restructurings and improvements - this includes the factoring out of the (hopefully much clearer) include/linux/static_key.h generic facility: struct static_key key = STATIC_KEY_INIT_FALSE; ... if (static_key_false(&key)) do unlikely code else do likely code ... static_key_slow_inc(); ... static_key_slow_inc(); ... The static_key_false() branch will be generated into the code with as little impact to the likely code path as possible. the static_key_slow_*() APIs flip the branch via live kernel code patching. This facility can now be used more widely within the kernel to micro-optimize hot branches whose likelihood matches the static-key usage and fast/slow cost patterns. - SW function tracer improvements: perf support and filtering support. - Various hardenings of the perf.data ABI, to make older perf.data's smoother on newer tool versions, to make new features integrate more smoothly, to support cross-endian recording/analyzing workflows better, etc. - Restructuring of the kprobes code, the splitting out of 'optprobes', and a corner case bugfix. - Allow the tracing of kernel console output (printk). - Improvements/fixes to user-space RDPMC support, allowing user-space self-profiling code to extract PMU counts without performing any system calls, while playing nice with the kernel side. - 'perf bench' improvements - ... and lots of internal restructurings, cleanups and fixes that made these features possible. And, as usual this list is incomplete as there were also lots of other improvements * 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (120 commits) perf report: Fix annotate double quit issue in branch view mode perf report: Remove duplicate annotate choice in branch view mode perf/x86: Prettify pmu config literals perf report: Enable TUI in branch view mode perf report: Auto-detect branch stack sampling mode perf record: Add HEADER_BRANCH_STACK tag perf record: Provide default branch stack sampling mode option perf tools: Make perf able to read files from older ABIs perf tools: Fix ABI compatibility bug in print_event_desc() perf tools: Enable reading of perf.data files from different ABI rev perf: Add ABI reference sizes perf report: Add support for taken branch sampling perf record: Add support for sampling taken branch perf tools: Add code to support PERF_SAMPLE_BRANCH_STACK x86/kprobes: Split out optprobe related code to kprobes-opt.c x86/kprobes: Fix a bug which can modify kernel code permanently x86/kprobes: Fix instruction recovery on optimized path perf: Add callback to flush branch_stack on context switch perf: Disable PERF_SAMPLE_BRANCH_* when not supported perf/x86: Add LBR software filter support for Intel CPUs ...
Diffstat (limited to 'net/core/dev.c')
-rw-r--r--net/core/dev.c24
1 files changed, 12 insertions, 12 deletions
diff --git a/net/core/dev.c b/net/core/dev.c
index 6ca32f6b310..6982bfd6a78 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -134,7 +134,7 @@
134#include <linux/inetdevice.h> 134#include <linux/inetdevice.h>
135#include <linux/cpu_rmap.h> 135#include <linux/cpu_rmap.h>
136#include <linux/net_tstamp.h> 136#include <linux/net_tstamp.h>
137#include <linux/jump_label.h> 137#include <linux/static_key.h>
138#include <net/flow_keys.h> 138#include <net/flow_keys.h>
139 139
140#include "net-sysfs.h" 140#include "net-sysfs.h"
@@ -1441,11 +1441,11 @@ int call_netdevice_notifiers(unsigned long val, struct net_device *dev)
1441} 1441}
1442EXPORT_SYMBOL(call_netdevice_notifiers); 1442EXPORT_SYMBOL(call_netdevice_notifiers);
1443 1443
1444static struct jump_label_key netstamp_needed __read_mostly; 1444static struct static_key netstamp_needed __read_mostly;
1445#ifdef HAVE_JUMP_LABEL 1445#ifdef HAVE_JUMP_LABEL
1446/* We are not allowed to call jump_label_dec() from irq context 1446/* We are not allowed to call static_key_slow_dec() from irq context
1447 * If net_disable_timestamp() is called from irq context, defer the 1447 * If net_disable_timestamp() is called from irq context, defer the
1448 * jump_label_dec() calls. 1448 * static_key_slow_dec() calls.
1449 */ 1449 */
1450static atomic_t netstamp_needed_deferred; 1450static atomic_t netstamp_needed_deferred;
1451#endif 1451#endif
@@ -1457,12 +1457,12 @@ void net_enable_timestamp(void)
1457 1457
1458 if (deferred) { 1458 if (deferred) {
1459 while (--deferred) 1459 while (--deferred)
1460 jump_label_dec(&netstamp_needed); 1460 static_key_slow_dec(&netstamp_needed);
1461 return; 1461 return;
1462 } 1462 }
1463#endif 1463#endif
1464 WARN_ON(in_interrupt()); 1464 WARN_ON(in_interrupt());
1465 jump_label_inc(&netstamp_needed); 1465 static_key_slow_inc(&netstamp_needed);
1466} 1466}
1467EXPORT_SYMBOL(net_enable_timestamp); 1467EXPORT_SYMBOL(net_enable_timestamp);
1468 1468
@@ -1474,19 +1474,19 @@ void net_disable_timestamp(void)
1474 return; 1474 return;
1475 } 1475 }
1476#endif 1476#endif
1477 jump_label_dec(&netstamp_needed); 1477 static_key_slow_dec(&netstamp_needed);
1478} 1478}
1479EXPORT_SYMBOL(net_disable_timestamp); 1479EXPORT_SYMBOL(net_disable_timestamp);
1480 1480
1481static inline void net_timestamp_set(struct sk_buff *skb) 1481static inline void net_timestamp_set(struct sk_buff *skb)
1482{ 1482{
1483 skb->tstamp.tv64 = 0; 1483 skb->tstamp.tv64 = 0;
1484 if (static_branch(&netstamp_needed)) 1484 if (static_key_false(&netstamp_needed))
1485 __net_timestamp(skb); 1485 __net_timestamp(skb);
1486} 1486}
1487 1487
1488#define net_timestamp_check(COND, SKB) \ 1488#define net_timestamp_check(COND, SKB) \
1489 if (static_branch(&netstamp_needed)) { \ 1489 if (static_key_false(&netstamp_needed)) { \
1490 if ((COND) && !(SKB)->tstamp.tv64) \ 1490 if ((COND) && !(SKB)->tstamp.tv64) \
1491 __net_timestamp(SKB); \ 1491 __net_timestamp(SKB); \
1492 } \ 1492 } \
@@ -2660,7 +2660,7 @@ EXPORT_SYMBOL(__skb_get_rxhash);
2660struct rps_sock_flow_table __rcu *rps_sock_flow_table __read_mostly; 2660struct rps_sock_flow_table __rcu *rps_sock_flow_table __read_mostly;
2661EXPORT_SYMBOL(rps_sock_flow_table); 2661EXPORT_SYMBOL(rps_sock_flow_table);
2662 2662
2663struct jump_label_key rps_needed __read_mostly; 2663struct static_key rps_needed __read_mostly;
2664 2664
2665static struct rps_dev_flow * 2665static struct rps_dev_flow *
2666set_rps_cpu(struct net_device *dev, struct sk_buff *skb, 2666set_rps_cpu(struct net_device *dev, struct sk_buff *skb,
@@ -2945,7 +2945,7 @@ int netif_rx(struct sk_buff *skb)
2945 2945
2946 trace_netif_rx(skb); 2946 trace_netif_rx(skb);
2947#ifdef CONFIG_RPS 2947#ifdef CONFIG_RPS
2948 if (static_branch(&rps_needed)) { 2948 if (static_key_false(&rps_needed)) {
2949 struct rps_dev_flow voidflow, *rflow = &voidflow; 2949 struct rps_dev_flow voidflow, *rflow = &voidflow;
2950 int cpu; 2950 int cpu;
2951 2951
@@ -3309,7 +3309,7 @@ int netif_receive_skb(struct sk_buff *skb)
3309 return NET_RX_SUCCESS; 3309 return NET_RX_SUCCESS;
3310 3310
3311#ifdef CONFIG_RPS 3311#ifdef CONFIG_RPS
3312 if (static_branch(&rps_needed)) { 3312 if (static_key_false(&rps_needed)) {
3313 struct rps_dev_flow voidflow, *rflow = &voidflow; 3313 struct rps_dev_flow voidflow, *rflow = &voidflow;
3314 int cpu, ret; 3314 int cpu, ret;
3315 3315