diff options
30 files changed, 8372 insertions, 3268 deletions
diff --git a/Documentation/trace/events.rst b/Documentation/trace/events.rst index bdf1963ba6ba..a5ea2cb0082b 100644 --- a/Documentation/trace/events.rst +++ b/Documentation/trace/events.rst | |||
@@ -520,1550 +520,4 @@ The following commands are supported: | |||
520 | totals derived from one or more trace event format fields and/or | 520 | totals derived from one or more trace event format fields and/or |
521 | event counts (hitcount). | 521 | event counts (hitcount). |
522 | 522 | ||
523 | The format of a hist trigger is as follows:: | 523 | See Documentation/trace/histogram.txt for details and examples. |
524 | |||
525 | hist:keys=<field1[,field2,...]>[:values=<field1[,field2,...]>] | ||
526 | [:sort=<field1[,field2,...]>][:size=#entries][:pause][:continue] | ||
527 | [:clear][:name=histname1] [if <filter>] | ||
528 | |||
529 | When a matching event is hit, an entry is added to a hash table | ||
530 | using the key(s) and value(s) named. Keys and values correspond to | ||
531 | fields in the event's format description. Values must correspond to | ||
532 | numeric fields - on an event hit, the value(s) will be added to a | ||
533 | sum kept for that field. The special string 'hitcount' can be used | ||
534 | in place of an explicit value field - this is simply a count of | ||
535 | event hits. If 'values' isn't specified, an implicit 'hitcount' | ||
536 | value will be automatically created and used as the only value. | ||
537 | Keys can be any field, or the special string 'stacktrace', which | ||
538 | will use the event's kernel stacktrace as the key. The keywords | ||
539 | 'keys' or 'key' can be used to specify keys, and the keywords | ||
540 | 'values', 'vals', or 'val' can be used to specify values. Compound | ||
541 | keys consisting of up to two fields can be specified by the 'keys' | ||
542 | keyword. Hashing a compound key produces a unique entry in the | ||
543 | table for each unique combination of component keys, and can be | ||
544 | useful for providing more fine-grained summaries of event data. | ||
545 | Additionally, sort keys consisting of up to two fields can be | ||
546 | specified by the 'sort' keyword. If more than one field is | ||
547 | specified, the result will be a 'sort within a sort': the first key | ||
548 | is taken to be the primary sort key and the second the secondary | ||
549 | key. If a hist trigger is given a name using the 'name' parameter, | ||
550 | its histogram data will be shared with other triggers of the same | ||
551 | name, and trigger hits will update this common data. Only triggers | ||
552 | with 'compatible' fields can be combined in this way; triggers are | ||
553 | 'compatible' if the fields named in the trigger share the same | ||
554 | number and type of fields and those fields also have the same names. | ||
555 | Note that any two events always share the compatible 'hitcount' and | ||
556 | 'stacktrace' fields and can therefore be combined using those | ||
557 | fields, however pointless that may be. | ||
558 | |||
559 | 'hist' triggers add a 'hist' file to each event's subdirectory. | ||
560 | Reading the 'hist' file for the event will dump the hash table in | ||
561 | its entirety to stdout. If there are multiple hist triggers | ||
562 | attached to an event, there will be a table for each trigger in the | ||
563 | output. The table displayed for a named trigger will be the same as | ||
564 | any other instance having the same name. Each printed hash table | ||
565 | entry is a simple list of the keys and values comprising the entry; | ||
566 | keys are printed first and are delineated by curly braces, and are | ||
567 | followed by the set of value fields for the entry. By default, | ||
568 | numeric fields are displayed as base-10 integers. This can be | ||
569 | modified by appending any of the following modifiers to the field | ||
570 | name: | ||
571 | |||
572 | - .hex display a number as a hex value | ||
573 | - .sym display an address as a symbol | ||
574 | - .sym-offset display an address as a symbol and offset | ||
575 | - .syscall display a syscall id as a system call name | ||
576 | - .execname display a common_pid as a program name | ||
577 | |||
578 | Note that in general the semantics of a given field aren't | ||
579 | interpreted when applying a modifier to it, but there are some | ||
580 | restrictions to be aware of in this regard: | ||
581 | |||
582 | - only the 'hex' modifier can be used for values (because values | ||
583 | are essentially sums, and the other modifiers don't make sense | ||
584 | in that context). | ||
585 | - the 'execname' modifier can only be used on a 'common_pid'. The | ||
586 | reason for this is that the execname is simply the 'comm' value | ||
587 | saved for the 'current' process when an event was triggered, | ||
588 | which is the same as the common_pid value saved by the event | ||
589 | tracing code. Trying to apply that comm value to other pid | ||
590 | values wouldn't be correct, and typically events that care save | ||
591 | pid-specific comm fields in the event itself. | ||
592 | |||
593 | A typical usage scenario would be the following to enable a hist | ||
594 | trigger, read its current contents, and then turn it off:: | ||
595 | |||
596 | # echo 'hist:keys=skbaddr.hex:vals=len' > \ | ||
597 | /sys/kernel/debug/tracing/events/net/netif_rx/trigger | ||
598 | |||
599 | # cat /sys/kernel/debug/tracing/events/net/netif_rx/hist | ||
600 | |||
601 | # echo '!hist:keys=skbaddr.hex:vals=len' > \ | ||
602 | /sys/kernel/debug/tracing/events/net/netif_rx/trigger | ||
603 | |||
604 | The trigger file itself can be read to show the details of the | ||
605 | currently attached hist trigger. This information is also displayed | ||
606 | at the top of the 'hist' file when read. | ||
607 | |||
608 | By default, the size of the hash table is 2048 entries. The 'size' | ||
609 | parameter can be used to specify more or fewer than that. The units | ||
610 | are in terms of hashtable entries - if a run uses more entries than | ||
611 | specified, the results will show the number of 'drops', the number | ||
612 | of hits that were ignored. The size should be a power of 2 between | ||
613 | 128 and 131072 (any non- power-of-2 number specified will be rounded | ||
614 | up). | ||
615 | |||
616 | The 'sort' parameter can be used to specify a value field to sort | ||
617 | on. The default if unspecified is 'hitcount' and the default sort | ||
618 | order is 'ascending'. To sort in the opposite direction, append | ||
619 | .descending' to the sort key. | ||
620 | |||
621 | The 'pause' parameter can be used to pause an existing hist trigger | ||
622 | or to start a hist trigger but not log any events until told to do | ||
623 | so. 'continue' or 'cont' can be used to start or restart a paused | ||
624 | hist trigger. | ||
625 | |||
626 | The 'clear' parameter will clear the contents of a running hist | ||
627 | trigger and leave its current paused/active state. | ||
628 | |||
629 | Note that the 'pause', 'cont', and 'clear' parameters should be | ||
630 | applied using 'append' shell operator ('>>') if applied to an | ||
631 | existing trigger, rather than via the '>' operator, which will cause | ||
632 | the trigger to be removed through truncation. | ||
633 | |||
634 | - enable_hist/disable_hist | ||
635 | |||
636 | The enable_hist and disable_hist triggers can be used to have one | ||
637 | event conditionally start and stop another event's already-attached | ||
638 | hist trigger. Any number of enable_hist and disable_hist triggers | ||
639 | can be attached to a given event, allowing that event to kick off | ||
640 | and stop aggregations on a host of other events. | ||
641 | |||
642 | The format is very similar to the enable/disable_event triggers:: | ||
643 | |||
644 | enable_hist:<system>:<event>[:count] | ||
645 | disable_hist:<system>:<event>[:count] | ||
646 | |||
647 | Instead of enabling or disabling the tracing of the target event | ||
648 | into the trace buffer as the enable/disable_event triggers do, the | ||
649 | enable/disable_hist triggers enable or disable the aggregation of | ||
650 | the target event into a hash table. | ||
651 | |||
652 | A typical usage scenario for the enable_hist/disable_hist triggers | ||
653 | would be to first set up a paused hist trigger on some event, | ||
654 | followed by an enable_hist/disable_hist pair that turns the hist | ||
655 | aggregation on and off when conditions of interest are hit:: | ||
656 | |||
657 | # echo 'hist:keys=skbaddr.hex:vals=len:pause' > \ | ||
658 | /sys/kernel/debug/tracing/events/net/netif_receive_skb/trigger | ||
659 | |||
660 | # echo 'enable_hist:net:netif_receive_skb if filename==/usr/bin/wget' > \ | ||
661 | /sys/kernel/debug/tracing/events/sched/sched_process_exec/trigger | ||
662 | |||
663 | # echo 'disable_hist:net:netif_receive_skb if comm==wget' > \ | ||
664 | /sys/kernel/debug/tracing/events/sched/sched_process_exit/trigger | ||
665 | |||
666 | The above sets up an initially paused hist trigger which is unpaused | ||
667 | and starts aggregating events when a given program is executed, and | ||
668 | which stops aggregating when the process exits and the hist trigger | ||
669 | is paused again. | ||
670 | |||
671 | The examples below provide a more concrete illustration of the | ||
672 | concepts and typical usage patterns discussed above. | ||
673 | |||
674 | |||
675 | 6.2 'hist' trigger examples | ||
676 | --------------------------- | ||
677 | |||
678 | The first set of examples creates aggregations using the kmalloc | ||
679 | event. The fields that can be used for the hist trigger are listed | ||
680 | in the kmalloc event's format file:: | ||
681 | |||
682 | # cat /sys/kernel/debug/tracing/events/kmem/kmalloc/format | ||
683 | name: kmalloc | ||
684 | ID: 374 | ||
685 | format: | ||
686 | field:unsigned short common_type; offset:0; size:2; signed:0; | ||
687 | field:unsigned char common_flags; offset:2; size:1; signed:0; | ||
688 | field:unsigned char common_preempt_count; offset:3; size:1; signed:0; | ||
689 | field:int common_pid; offset:4; size:4; signed:1; | ||
690 | |||
691 | field:unsigned long call_site; offset:8; size:8; signed:0; | ||
692 | field:const void * ptr; offset:16; size:8; signed:0; | ||
693 | field:size_t bytes_req; offset:24; size:8; signed:0; | ||
694 | field:size_t bytes_alloc; offset:32; size:8; signed:0; | ||
695 | field:gfp_t gfp_flags; offset:40; size:4; signed:0; | ||
696 | |||
697 | We'll start by creating a hist trigger that generates a simple table | ||
698 | that lists the total number of bytes requested for each function in | ||
699 | the kernel that made one or more calls to kmalloc:: | ||
700 | |||
701 | # echo 'hist:key=call_site:val=bytes_req' > \ | ||
702 | /sys/kernel/debug/tracing/events/kmem/kmalloc/trigger | ||
703 | |||
704 | This tells the tracing system to create a 'hist' trigger using the | ||
705 | call_site field of the kmalloc event as the key for the table, which | ||
706 | just means that each unique call_site address will have an entry | ||
707 | created for it in the table. The 'val=bytes_req' parameter tells | ||
708 | the hist trigger that for each unique entry (call_site) in the | ||
709 | table, it should keep a running total of the number of bytes | ||
710 | requested by that call_site. | ||
711 | |||
712 | We'll let it run for awhile and then dump the contents of the 'hist' | ||
713 | file in the kmalloc event's subdirectory (for readability, a number | ||
714 | of entries have been omitted):: | ||
715 | |||
716 | # cat /sys/kernel/debug/tracing/events/kmem/kmalloc/hist | ||
717 | # trigger info: hist:keys=call_site:vals=bytes_req:sort=hitcount:size=2048 [active] | ||
718 | |||
719 | { call_site: 18446744072106379007 } hitcount: 1 bytes_req: 176 | ||
720 | { call_site: 18446744071579557049 } hitcount: 1 bytes_req: 1024 | ||
721 | { call_site: 18446744071580608289 } hitcount: 1 bytes_req: 16384 | ||
722 | { call_site: 18446744071581827654 } hitcount: 1 bytes_req: 24 | ||
723 | { call_site: 18446744071580700980 } hitcount: 1 bytes_req: 8 | ||
724 | { call_site: 18446744071579359876 } hitcount: 1 bytes_req: 152 | ||
725 | { call_site: 18446744071580795365 } hitcount: 3 bytes_req: 144 | ||
726 | { call_site: 18446744071581303129 } hitcount: 3 bytes_req: 144 | ||
727 | { call_site: 18446744071580713234 } hitcount: 4 bytes_req: 2560 | ||
728 | { call_site: 18446744071580933750 } hitcount: 4 bytes_req: 736 | ||
729 | . | ||
730 | . | ||
731 | . | ||
732 | { call_site: 18446744072106047046 } hitcount: 69 bytes_req: 5576 | ||
733 | { call_site: 18446744071582116407 } hitcount: 73 bytes_req: 2336 | ||
734 | { call_site: 18446744072106054684 } hitcount: 136 bytes_req: 140504 | ||
735 | { call_site: 18446744072106224230 } hitcount: 136 bytes_req: 19584 | ||
736 | { call_site: 18446744072106078074 } hitcount: 153 bytes_req: 2448 | ||
737 | { call_site: 18446744072106062406 } hitcount: 153 bytes_req: 36720 | ||
738 | { call_site: 18446744071582507929 } hitcount: 153 bytes_req: 37088 | ||
739 | { call_site: 18446744072102520590 } hitcount: 273 bytes_req: 10920 | ||
740 | { call_site: 18446744071582143559 } hitcount: 358 bytes_req: 716 | ||
741 | { call_site: 18446744072106465852 } hitcount: 417 bytes_req: 56712 | ||
742 | { call_site: 18446744072102523378 } hitcount: 485 bytes_req: 27160 | ||
743 | { call_site: 18446744072099568646 } hitcount: 1676 bytes_req: 33520 | ||
744 | |||
745 | Totals: | ||
746 | Hits: 4610 | ||
747 | Entries: 45 | ||
748 | Dropped: 0 | ||
749 | |||
750 | The output displays a line for each entry, beginning with the key | ||
751 | specified in the trigger, followed by the value(s) also specified in | ||
752 | the trigger. At the beginning of the output is a line that displays | ||
753 | the trigger info, which can also be displayed by reading the | ||
754 | 'trigger' file:: | ||
755 | |||
756 | # cat /sys/kernel/debug/tracing/events/kmem/kmalloc/trigger | ||
757 | hist:keys=call_site:vals=bytes_req:sort=hitcount:size=2048 [active] | ||
758 | |||
759 | At the end of the output are a few lines that display the overall | ||
760 | totals for the run. The 'Hits' field shows the total number of | ||
761 | times the event trigger was hit, the 'Entries' field shows the total | ||
762 | number of used entries in the hash table, and the 'Dropped' field | ||
763 | shows the number of hits that were dropped because the number of | ||
764 | used entries for the run exceeded the maximum number of entries | ||
765 | allowed for the table (normally 0, but if not a hint that you may | ||
766 | want to increase the size of the table using the 'size' parameter). | ||
767 | |||
768 | Notice in the above output that there's an extra field, 'hitcount', | ||
769 | which wasn't specified in the trigger. Also notice that in the | ||
770 | trigger info output, there's a parameter, 'sort=hitcount', which | ||
771 | wasn't specified in the trigger either. The reason for that is that | ||
772 | every trigger implicitly keeps a count of the total number of hits | ||
773 | attributed to a given entry, called the 'hitcount'. That hitcount | ||
774 | information is explicitly displayed in the output, and in the | ||
775 | absence of a user-specified sort parameter, is used as the default | ||
776 | sort field. | ||
777 | |||
778 | The value 'hitcount' can be used in place of an explicit value in | ||
779 | the 'values' parameter if you don't really need to have any | ||
780 | particular field summed and are mainly interested in hit | ||
781 | frequencies. | ||
782 | |||
783 | To turn the hist trigger off, simply call up the trigger in the | ||
784 | command history and re-execute it with a '!' prepended:: | ||
785 | |||
786 | # echo '!hist:key=call_site:val=bytes_req' > \ | ||
787 | /sys/kernel/debug/tracing/events/kmem/kmalloc/trigger | ||
788 | |||
789 | Finally, notice that the call_site as displayed in the output above | ||
790 | isn't really very useful. It's an address, but normally addresses | ||
791 | are displayed in hex. To have a numeric field displayed as a hex | ||
792 | value, simply append '.hex' to the field name in the trigger:: | ||
793 | |||
794 | # echo 'hist:key=call_site.hex:val=bytes_req' > \ | ||
795 | /sys/kernel/debug/tracing/events/kmem/kmalloc/trigger | ||
796 | |||
797 | # cat /sys/kernel/debug/tracing/events/kmem/kmalloc/hist | ||
798 | # trigger info: hist:keys=call_site.hex:vals=bytes_req:sort=hitcount:size=2048 [active] | ||
799 | |||
800 | { call_site: ffffffffa026b291 } hitcount: 1 bytes_req: 433 | ||
801 | { call_site: ffffffffa07186ff } hitcount: 1 bytes_req: 176 | ||
802 | { call_site: ffffffff811ae721 } hitcount: 1 bytes_req: 16384 | ||
803 | { call_site: ffffffff811c5134 } hitcount: 1 bytes_req: 8 | ||
804 | { call_site: ffffffffa04a9ebb } hitcount: 1 bytes_req: 511 | ||
805 | { call_site: ffffffff8122e0a6 } hitcount: 1 bytes_req: 12 | ||
806 | { call_site: ffffffff8107da84 } hitcount: 1 bytes_req: 152 | ||
807 | { call_site: ffffffff812d8246 } hitcount: 1 bytes_req: 24 | ||
808 | { call_site: ffffffff811dc1e5 } hitcount: 3 bytes_req: 144 | ||
809 | { call_site: ffffffffa02515e8 } hitcount: 3 bytes_req: 648 | ||
810 | { call_site: ffffffff81258159 } hitcount: 3 bytes_req: 144 | ||
811 | { call_site: ffffffff811c80f4 } hitcount: 4 bytes_req: 544 | ||
812 | . | ||
813 | . | ||
814 | . | ||
815 | { call_site: ffffffffa06c7646 } hitcount: 106 bytes_req: 8024 | ||
816 | { call_site: ffffffffa06cb246 } hitcount: 132 bytes_req: 31680 | ||
817 | { call_site: ffffffffa06cef7a } hitcount: 132 bytes_req: 2112 | ||
818 | { call_site: ffffffff8137e399 } hitcount: 132 bytes_req: 23232 | ||
819 | { call_site: ffffffffa06c941c } hitcount: 185 bytes_req: 171360 | ||
820 | { call_site: ffffffffa06f2a66 } hitcount: 185 bytes_req: 26640 | ||
821 | { call_site: ffffffffa036a70e } hitcount: 265 bytes_req: 10600 | ||
822 | { call_site: ffffffff81325447 } hitcount: 292 bytes_req: 584 | ||
823 | { call_site: ffffffffa072da3c } hitcount: 446 bytes_req: 60656 | ||
824 | { call_site: ffffffffa036b1f2 } hitcount: 526 bytes_req: 29456 | ||
825 | { call_site: ffffffffa0099c06 } hitcount: 1780 bytes_req: 35600 | ||
826 | |||
827 | Totals: | ||
828 | Hits: 4775 | ||
829 | Entries: 46 | ||
830 | Dropped: 0 | ||
831 | |||
832 | Even that's only marginally more useful - while hex values do look | ||
833 | more like addresses, what users are typically more interested in | ||
834 | when looking at text addresses are the corresponding symbols | ||
835 | instead. To have an address displayed as symbolic value instead, | ||
836 | simply append '.sym' or '.sym-offset' to the field name in the | ||
837 | trigger:: | ||
838 | |||
839 | # echo 'hist:key=call_site.sym:val=bytes_req' > \ | ||
840 | /sys/kernel/debug/tracing/events/kmem/kmalloc/trigger | ||
841 | |||
842 | # cat /sys/kernel/debug/tracing/events/kmem/kmalloc/hist | ||
843 | # trigger info: hist:keys=call_site.sym:vals=bytes_req:sort=hitcount:size=2048 [active] | ||
844 | |||
845 | { call_site: [ffffffff810adcb9] syslog_print_all } hitcount: 1 bytes_req: 1024 | ||
846 | { call_site: [ffffffff8154bc62] usb_control_msg } hitcount: 1 bytes_req: 8 | ||
847 | { call_site: [ffffffffa00bf6fe] hidraw_send_report [hid] } hitcount: 1 bytes_req: 7 | ||
848 | { call_site: [ffffffff8154acbe] usb_alloc_urb } hitcount: 1 bytes_req: 192 | ||
849 | { call_site: [ffffffffa00bf1ca] hidraw_report_event [hid] } hitcount: 1 bytes_req: 7 | ||
850 | { call_site: [ffffffff811e3a25] __seq_open_private } hitcount: 1 bytes_req: 40 | ||
851 | { call_site: [ffffffff8109524a] alloc_fair_sched_group } hitcount: 2 bytes_req: 128 | ||
852 | { call_site: [ffffffff811febd5] fsnotify_alloc_group } hitcount: 2 bytes_req: 528 | ||
853 | { call_site: [ffffffff81440f58] __tty_buffer_request_room } hitcount: 2 bytes_req: 2624 | ||
854 | { call_site: [ffffffff81200ba6] inotify_new_group } hitcount: 2 bytes_req: 96 | ||
855 | { call_site: [ffffffffa05e19af] ieee80211_start_tx_ba_session [mac80211] } hitcount: 2 bytes_req: 464 | ||
856 | { call_site: [ffffffff81672406] tcp_get_metrics } hitcount: 2 bytes_req: 304 | ||
857 | { call_site: [ffffffff81097ec2] alloc_rt_sched_group } hitcount: 2 bytes_req: 128 | ||
858 | { call_site: [ffffffff81089b05] sched_create_group } hitcount: 2 bytes_req: 1424 | ||
859 | . | ||
860 | . | ||
861 | . | ||
862 | { call_site: [ffffffffa04a580c] intel_crtc_page_flip [i915] } hitcount: 1185 bytes_req: 123240 | ||
863 | { call_site: [ffffffffa0287592] drm_mode_page_flip_ioctl [drm] } hitcount: 1185 bytes_req: 104280 | ||
864 | { call_site: [ffffffffa04c4a3c] intel_plane_duplicate_state [i915] } hitcount: 1402 bytes_req: 190672 | ||
865 | { call_site: [ffffffff812891ca] ext4_find_extent } hitcount: 1518 bytes_req: 146208 | ||
866 | { call_site: [ffffffffa029070e] drm_vma_node_allow [drm] } hitcount: 1746 bytes_req: 69840 | ||
867 | { call_site: [ffffffffa045e7c4] i915_gem_do_execbuffer.isra.23 [i915] } hitcount: 2021 bytes_req: 792312 | ||
868 | { call_site: [ffffffffa02911f2] drm_modeset_lock_crtc [drm] } hitcount: 2592 bytes_req: 145152 | ||
869 | { call_site: [ffffffffa0489a66] intel_ring_begin [i915] } hitcount: 2629 bytes_req: 378576 | ||
870 | { call_site: [ffffffffa046041c] i915_gem_execbuffer2 [i915] } hitcount: 2629 bytes_req: 3783248 | ||
871 | { call_site: [ffffffff81325607] apparmor_file_alloc_security } hitcount: 5192 bytes_req: 10384 | ||
872 | { call_site: [ffffffffa00b7c06] hid_report_raw_event [hid] } hitcount: 5529 bytes_req: 110584 | ||
873 | { call_site: [ffffffff8131ebf7] aa_alloc_task_context } hitcount: 21943 bytes_req: 702176 | ||
874 | { call_site: [ffffffff8125847d] ext4_htree_store_dirent } hitcount: 55759 bytes_req: 5074265 | ||
875 | |||
876 | Totals: | ||
877 | Hits: 109928 | ||
878 | Entries: 71 | ||
879 | Dropped: 0 | ||
880 | |||
881 | Because the default sort key above is 'hitcount', the above shows a | ||
882 | the list of call_sites by increasing hitcount, so that at the bottom | ||
883 | we see the functions that made the most kmalloc calls during the | ||
884 | run. If instead we we wanted to see the top kmalloc callers in | ||
885 | terms of the number of bytes requested rather than the number of | ||
886 | calls, and we wanted the top caller to appear at the top, we can use | ||
887 | the 'sort' parameter, along with the 'descending' modifier:: | ||
888 | |||
889 | # echo 'hist:key=call_site.sym:val=bytes_req:sort=bytes_req.descending' > \ | ||
890 | /sys/kernel/debug/tracing/events/kmem/kmalloc/trigger | ||
891 | |||
892 | # cat /sys/kernel/debug/tracing/events/kmem/kmalloc/hist | ||
893 | # trigger info: hist:keys=call_site.sym:vals=bytes_req:sort=bytes_req.descending:size=2048 [active] | ||
894 | |||
895 | { call_site: [ffffffffa046041c] i915_gem_execbuffer2 [i915] } hitcount: 2186 bytes_req: 3397464 | ||
896 | { call_site: [ffffffffa045e7c4] i915_gem_do_execbuffer.isra.23 [i915] } hitcount: 1790 bytes_req: 712176 | ||
897 | { call_site: [ffffffff8125847d] ext4_htree_store_dirent } hitcount: 8132 bytes_req: 513135 | ||
898 | { call_site: [ffffffff811e2a1b] seq_buf_alloc } hitcount: 106 bytes_req: 440128 | ||
899 | { call_site: [ffffffffa0489a66] intel_ring_begin [i915] } hitcount: 2186 bytes_req: 314784 | ||
900 | { call_site: [ffffffff812891ca] ext4_find_extent } hitcount: 2174 bytes_req: 208992 | ||
901 | { call_site: [ffffffff811ae8e1] __kmalloc } hitcount: 8 bytes_req: 131072 | ||
902 | { call_site: [ffffffffa04c4a3c] intel_plane_duplicate_state [i915] } hitcount: 859 bytes_req: 116824 | ||
903 | { call_site: [ffffffffa02911f2] drm_modeset_lock_crtc [drm] } hitcount: 1834 bytes_req: 102704 | ||
904 | { call_site: [ffffffffa04a580c] intel_crtc_page_flip [i915] } hitcount: 972 bytes_req: 101088 | ||
905 | { call_site: [ffffffffa0287592] drm_mode_page_flip_ioctl [drm] } hitcount: 972 bytes_req: 85536 | ||
906 | { call_site: [ffffffffa00b7c06] hid_report_raw_event [hid] } hitcount: 3333 bytes_req: 66664 | ||
907 | { call_site: [ffffffff8137e559] sg_kmalloc } hitcount: 209 bytes_req: 61632 | ||
908 | . | ||
909 | . | ||
910 | . | ||
911 | { call_site: [ffffffff81095225] alloc_fair_sched_group } hitcount: 2 bytes_req: 128 | ||
912 | { call_site: [ffffffff81097ec2] alloc_rt_sched_group } hitcount: 2 bytes_req: 128 | ||
913 | { call_site: [ffffffff812d8406] copy_semundo } hitcount: 2 bytes_req: 48 | ||
914 | { call_site: [ffffffff81200ba6] inotify_new_group } hitcount: 1 bytes_req: 48 | ||
915 | { call_site: [ffffffffa027121a] drm_getmagic [drm] } hitcount: 1 bytes_req: 48 | ||
916 | { call_site: [ffffffff811e3a25] __seq_open_private } hitcount: 1 bytes_req: 40 | ||
917 | { call_site: [ffffffff811c52f4] bprm_change_interp } hitcount: 2 bytes_req: 16 | ||
918 | { call_site: [ffffffff8154bc62] usb_control_msg } hitcount: 1 bytes_req: 8 | ||
919 | { call_site: [ffffffffa00bf1ca] hidraw_report_event [hid] } hitcount: 1 bytes_req: 7 | ||
920 | { call_site: [ffffffffa00bf6fe] hidraw_send_report [hid] } hitcount: 1 bytes_req: 7 | ||
921 | |||
922 | Totals: | ||
923 | Hits: 32133 | ||
924 | Entries: 81 | ||
925 | Dropped: 0 | ||
926 | |||
927 | To display the offset and size information in addition to the symbol | ||
928 | name, just use 'sym-offset' instead:: | ||
929 | |||
930 | # echo 'hist:key=call_site.sym-offset:val=bytes_req:sort=bytes_req.descending' > \ | ||
931 | /sys/kernel/debug/tracing/events/kmem/kmalloc/trigger | ||
932 | |||
933 | # cat /sys/kernel/debug/tracing/events/kmem/kmalloc/hist | ||
934 | # trigger info: hist:keys=call_site.sym-offset:vals=bytes_req:sort=bytes_req.descending:size=2048 [active] | ||
935 | |||
936 | { call_site: [ffffffffa046041c] i915_gem_execbuffer2+0x6c/0x2c0 [i915] } hitcount: 4569 bytes_req: 3163720 | ||
937 | { call_site: [ffffffffa0489a66] intel_ring_begin+0xc6/0x1f0 [i915] } hitcount: 4569 bytes_req: 657936 | ||
938 | { call_site: [ffffffffa045e7c4] i915_gem_do_execbuffer.isra.23+0x694/0x1020 [i915] } hitcount: 1519 bytes_req: 472936 | ||
939 | { call_site: [ffffffffa045e646] i915_gem_do_execbuffer.isra.23+0x516/0x1020 [i915] } hitcount: 3050 bytes_req: 211832 | ||
940 | { call_site: [ffffffff811e2a1b] seq_buf_alloc+0x1b/0x50 } hitcount: 34 bytes_req: 148384 | ||
941 | { call_site: [ffffffffa04a580c] intel_crtc_page_flip+0xbc/0x870 [i915] } hitcount: 1385 bytes_req: 144040 | ||
942 | { call_site: [ffffffff811ae8e1] __kmalloc+0x191/0x1b0 } hitcount: 8 bytes_req: 131072 | ||
943 | { call_site: [ffffffffa0287592] drm_mode_page_flip_ioctl+0x282/0x360 [drm] } hitcount: 1385 bytes_req: 121880 | ||
944 | { call_site: [ffffffffa02911f2] drm_modeset_lock_crtc+0x32/0x100 [drm] } hitcount: 1848 bytes_req: 103488 | ||
945 | { call_site: [ffffffffa04c4a3c] intel_plane_duplicate_state+0x2c/0xa0 [i915] } hitcount: 461 bytes_req: 62696 | ||
946 | { call_site: [ffffffffa029070e] drm_vma_node_allow+0x2e/0xd0 [drm] } hitcount: 1541 bytes_req: 61640 | ||
947 | { call_site: [ffffffff815f8d7b] sk_prot_alloc+0xcb/0x1b0 } hitcount: 57 bytes_req: 57456 | ||
948 | . | ||
949 | . | ||
950 | . | ||
951 | { call_site: [ffffffff8109524a] alloc_fair_sched_group+0x5a/0x1a0 } hitcount: 2 bytes_req: 128 | ||
952 | { call_site: [ffffffffa027b921] drm_vm_open_locked+0x31/0xa0 [drm] } hitcount: 3 bytes_req: 96 | ||
953 | { call_site: [ffffffff8122e266] proc_self_follow_link+0x76/0xb0 } hitcount: 8 bytes_req: 96 | ||
954 | { call_site: [ffffffff81213e80] load_elf_binary+0x240/0x1650 } hitcount: 3 bytes_req: 84 | ||
955 | { call_site: [ffffffff8154bc62] usb_control_msg+0x42/0x110 } hitcount: 1 bytes_req: 8 | ||
956 | { call_site: [ffffffffa00bf6fe] hidraw_send_report+0x7e/0x1a0 [hid] } hitcount: 1 bytes_req: 7 | ||
957 | { call_site: [ffffffffa00bf1ca] hidraw_report_event+0x8a/0x120 [hid] } hitcount: 1 bytes_req: 7 | ||
958 | |||
959 | Totals: | ||
960 | Hits: 26098 | ||
961 | Entries: 64 | ||
962 | Dropped: 0 | ||
963 | |||
964 | We can also add multiple fields to the 'values' parameter. For | ||
965 | example, we might want to see the total number of bytes allocated | ||
966 | alongside bytes requested, and display the result sorted by bytes | ||
967 | allocated in a descending order:: | ||
968 | |||
969 | # echo 'hist:keys=call_site.sym:values=bytes_req,bytes_alloc:sort=bytes_alloc.descending' > \ | ||
970 | /sys/kernel/debug/tracing/events/kmem/kmalloc/trigger | ||
971 | |||
972 | # cat /sys/kernel/debug/tracing/events/kmem/kmalloc/hist | ||
973 | # trigger info: hist:keys=call_site.sym:vals=bytes_req,bytes_alloc:sort=bytes_alloc.descending:size=2048 [active] | ||
974 | |||
975 | { call_site: [ffffffffa046041c] i915_gem_execbuffer2 [i915] } hitcount: 7403 bytes_req: 4084360 bytes_alloc: 5958016 | ||
976 | { call_site: [ffffffff811e2a1b] seq_buf_alloc } hitcount: 541 bytes_req: 2213968 bytes_alloc: 2228224 | ||
977 | { call_site: [ffffffffa0489a66] intel_ring_begin [i915] } hitcount: 7404 bytes_req: 1066176 bytes_alloc: 1421568 | ||
978 | { call_site: [ffffffffa045e7c4] i915_gem_do_execbuffer.isra.23 [i915] } hitcount: 1565 bytes_req: 557368 bytes_alloc: 1037760 | ||
979 | { call_site: [ffffffff8125847d] ext4_htree_store_dirent } hitcount: 9557 bytes_req: 595778 bytes_alloc: 695744 | ||
980 | { call_site: [ffffffffa045e646] i915_gem_do_execbuffer.isra.23 [i915] } hitcount: 5839 bytes_req: 430680 bytes_alloc: 470400 | ||
981 | { call_site: [ffffffffa04c4a3c] intel_plane_duplicate_state [i915] } hitcount: 2388 bytes_req: 324768 bytes_alloc: 458496 | ||
982 | { call_site: [ffffffffa02911f2] drm_modeset_lock_crtc [drm] } hitcount: 3911 bytes_req: 219016 bytes_alloc: 250304 | ||
983 | { call_site: [ffffffff815f8d7b] sk_prot_alloc } hitcount: 235 bytes_req: 236880 bytes_alloc: 240640 | ||
984 | { call_site: [ffffffff8137e559] sg_kmalloc } hitcount: 557 bytes_req: 169024 bytes_alloc: 221760 | ||
985 | { call_site: [ffffffffa00b7c06] hid_report_raw_event [hid] } hitcount: 9378 bytes_req: 187548 bytes_alloc: 206312 | ||
986 | { call_site: [ffffffffa04a580c] intel_crtc_page_flip [i915] } hitcount: 1519 bytes_req: 157976 bytes_alloc: 194432 | ||
987 | . | ||
988 | . | ||
989 | . | ||
990 | { call_site: [ffffffff8109bd3b] sched_autogroup_create_attach } hitcount: 2 bytes_req: 144 bytes_alloc: 192 | ||
991 | { call_site: [ffffffff81097ee8] alloc_rt_sched_group } hitcount: 2 bytes_req: 128 bytes_alloc: 128 | ||
992 | { call_site: [ffffffff8109524a] alloc_fair_sched_group } hitcount: 2 bytes_req: 128 bytes_alloc: 128 | ||
993 | { call_site: [ffffffff81095225] alloc_fair_sched_group } hitcount: 2 bytes_req: 128 bytes_alloc: 128 | ||
994 | { call_site: [ffffffff81097ec2] alloc_rt_sched_group } hitcount: 2 bytes_req: 128 bytes_alloc: 128 | ||
995 | { call_site: [ffffffff81213e80] load_elf_binary } hitcount: 3 bytes_req: 84 bytes_alloc: 96 | ||
996 | { call_site: [ffffffff81079a2e] kthread_create_on_node } hitcount: 1 bytes_req: 56 bytes_alloc: 64 | ||
997 | { call_site: [ffffffffa00bf6fe] hidraw_send_report [hid] } hitcount: 1 bytes_req: 7 bytes_alloc: 8 | ||
998 | { call_site: [ffffffff8154bc62] usb_control_msg } hitcount: 1 bytes_req: 8 bytes_alloc: 8 | ||
999 | { call_site: [ffffffffa00bf1ca] hidraw_report_event [hid] } hitcount: 1 bytes_req: 7 bytes_alloc: 8 | ||
1000 | |||
1001 | Totals: | ||
1002 | Hits: 66598 | ||
1003 | Entries: 65 | ||
1004 | Dropped: 0 | ||
1005 | |||
1006 | Finally, to finish off our kmalloc example, instead of simply having | ||
1007 | the hist trigger display symbolic call_sites, we can have the hist | ||
1008 | trigger additionally display the complete set of kernel stack traces | ||
1009 | that led to each call_site. To do that, we simply use the special | ||
1010 | value 'stacktrace' for the key parameter:: | ||
1011 | |||
1012 | # echo 'hist:keys=stacktrace:values=bytes_req,bytes_alloc:sort=bytes_alloc' > \ | ||
1013 | /sys/kernel/debug/tracing/events/kmem/kmalloc/trigger | ||
1014 | |||
1015 | The above trigger will use the kernel stack trace in effect when an | ||
1016 | event is triggered as the key for the hash table. This allows the | ||
1017 | enumeration of every kernel callpath that led up to a particular | ||
1018 | event, along with a running total of any of the event fields for | ||
1019 | that event. Here we tally bytes requested and bytes allocated for | ||
1020 | every callpath in the system that led up to a kmalloc (in this case | ||
1021 | every callpath to a kmalloc for a kernel compile):: | ||
1022 | |||
1023 | # cat /sys/kernel/debug/tracing/events/kmem/kmalloc/hist | ||
1024 | # trigger info: hist:keys=stacktrace:vals=bytes_req,bytes_alloc:sort=bytes_alloc:size=2048 [active] | ||
1025 | |||
1026 | { stacktrace: | ||
1027 | __kmalloc_track_caller+0x10b/0x1a0 | ||
1028 | kmemdup+0x20/0x50 | ||
1029 | hidraw_report_event+0x8a/0x120 [hid] | ||
1030 | hid_report_raw_event+0x3ea/0x440 [hid] | ||
1031 | hid_input_report+0x112/0x190 [hid] | ||
1032 | hid_irq_in+0xc2/0x260 [usbhid] | ||
1033 | __usb_hcd_giveback_urb+0x72/0x120 | ||
1034 | usb_giveback_urb_bh+0x9e/0xe0 | ||
1035 | tasklet_hi_action+0xf8/0x100 | ||
1036 | __do_softirq+0x114/0x2c0 | ||
1037 | irq_exit+0xa5/0xb0 | ||
1038 | do_IRQ+0x5a/0xf0 | ||
1039 | ret_from_intr+0x0/0x30 | ||
1040 | cpuidle_enter+0x17/0x20 | ||
1041 | cpu_startup_entry+0x315/0x3e0 | ||
1042 | rest_init+0x7c/0x80 | ||
1043 | } hitcount: 3 bytes_req: 21 bytes_alloc: 24 | ||
1044 | { stacktrace: | ||
1045 | __kmalloc_track_caller+0x10b/0x1a0 | ||
1046 | kmemdup+0x20/0x50 | ||
1047 | hidraw_report_event+0x8a/0x120 [hid] | ||
1048 | hid_report_raw_event+0x3ea/0x440 [hid] | ||
1049 | hid_input_report+0x112/0x190 [hid] | ||
1050 | hid_irq_in+0xc2/0x260 [usbhid] | ||
1051 | __usb_hcd_giveback_urb+0x72/0x120 | ||
1052 | usb_giveback_urb_bh+0x9e/0xe0 | ||
1053 | tasklet_hi_action+0xf8/0x100 | ||
1054 | __do_softirq+0x114/0x2c0 | ||
1055 | irq_exit+0xa5/0xb0 | ||
1056 | do_IRQ+0x5a/0xf0 | ||
1057 | ret_from_intr+0x0/0x30 | ||
1058 | } hitcount: 3 bytes_req: 21 bytes_alloc: 24 | ||
1059 | { stacktrace: | ||
1060 | kmem_cache_alloc_trace+0xeb/0x150 | ||
1061 | aa_alloc_task_context+0x27/0x40 | ||
1062 | apparmor_cred_prepare+0x1f/0x50 | ||
1063 | security_prepare_creds+0x16/0x20 | ||
1064 | prepare_creds+0xdf/0x1a0 | ||
1065 | SyS_capset+0xb5/0x200 | ||
1066 | system_call_fastpath+0x12/0x6a | ||
1067 | } hitcount: 1 bytes_req: 32 bytes_alloc: 32 | ||
1068 | . | ||
1069 | . | ||
1070 | . | ||
1071 | { stacktrace: | ||
1072 | __kmalloc+0x11b/0x1b0 | ||
1073 | i915_gem_execbuffer2+0x6c/0x2c0 [i915] | ||
1074 | drm_ioctl+0x349/0x670 [drm] | ||
1075 | do_vfs_ioctl+0x2f0/0x4f0 | ||
1076 | SyS_ioctl+0x81/0xa0 | ||
1077 | system_call_fastpath+0x12/0x6a | ||
1078 | } hitcount: 17726 bytes_req: 13944120 bytes_alloc: 19593808 | ||
1079 | { stacktrace: | ||
1080 | __kmalloc+0x11b/0x1b0 | ||
1081 | load_elf_phdrs+0x76/0xa0 | ||
1082 | load_elf_binary+0x102/0x1650 | ||
1083 | search_binary_handler+0x97/0x1d0 | ||
1084 | do_execveat_common.isra.34+0x551/0x6e0 | ||
1085 | SyS_execve+0x3a/0x50 | ||
1086 | return_from_execve+0x0/0x23 | ||
1087 | } hitcount: 33348 bytes_req: 17152128 bytes_alloc: 20226048 | ||
1088 | { stacktrace: | ||
1089 | kmem_cache_alloc_trace+0xeb/0x150 | ||
1090 | apparmor_file_alloc_security+0x27/0x40 | ||
1091 | security_file_alloc+0x16/0x20 | ||
1092 | get_empty_filp+0x93/0x1c0 | ||
1093 | path_openat+0x31/0x5f0 | ||
1094 | do_filp_open+0x3a/0x90 | ||
1095 | do_sys_open+0x128/0x220 | ||
1096 | SyS_open+0x1e/0x20 | ||
1097 | system_call_fastpath+0x12/0x6a | ||
1098 | } hitcount: 4766422 bytes_req: 9532844 bytes_alloc: 38131376 | ||
1099 | { stacktrace: | ||
1100 | __kmalloc+0x11b/0x1b0 | ||
1101 | seq_buf_alloc+0x1b/0x50 | ||
1102 | seq_read+0x2cc/0x370 | ||
1103 | proc_reg_read+0x3d/0x80 | ||
1104 | __vfs_read+0x28/0xe0 | ||
1105 | vfs_read+0x86/0x140 | ||
1106 | SyS_read+0x46/0xb0 | ||
1107 | system_call_fastpath+0x12/0x6a | ||
1108 | } hitcount: 19133 bytes_req: 78368768 bytes_alloc: 78368768 | ||
1109 | |||
1110 | Totals: | ||
1111 | Hits: 6085872 | ||
1112 | Entries: 253 | ||
1113 | Dropped: 0 | ||
1114 | |||
1115 | If you key a hist trigger on common_pid, in order for example to | ||
1116 | gather and display sorted totals for each process, you can use the | ||
1117 | special .execname modifier to display the executable names for the | ||
1118 | processes in the table rather than raw pids. The example below | ||
1119 | keeps a per-process sum of total bytes read:: | ||
1120 | |||
1121 | # echo 'hist:key=common_pid.execname:val=count:sort=count.descending' > \ | ||
1122 | /sys/kernel/debug/tracing/events/syscalls/sys_enter_read/trigger | ||
1123 | |||
1124 | # cat /sys/kernel/debug/tracing/events/syscalls/sys_enter_read/hist | ||
1125 | # trigger info: hist:keys=common_pid.execname:vals=count:sort=count.descending:size=2048 [active] | ||
1126 | |||
1127 | { common_pid: gnome-terminal [ 3196] } hitcount: 280 count: 1093512 | ||
1128 | { common_pid: Xorg [ 1309] } hitcount: 525 count: 256640 | ||
1129 | { common_pid: compiz [ 2889] } hitcount: 59 count: 254400 | ||
1130 | { common_pid: bash [ 8710] } hitcount: 3 count: 66369 | ||
1131 | { common_pid: dbus-daemon-lau [ 8703] } hitcount: 49 count: 47739 | ||
1132 | { common_pid: irqbalance [ 1252] } hitcount: 27 count: 27648 | ||
1133 | { common_pid: 01ifupdown [ 8705] } hitcount: 3 count: 17216 | ||
1134 | { common_pid: dbus-daemon [ 772] } hitcount: 10 count: 12396 | ||
1135 | { common_pid: Socket Thread [ 8342] } hitcount: 11 count: 11264 | ||
1136 | { common_pid: nm-dhcp-client. [ 8701] } hitcount: 6 count: 7424 | ||
1137 | { common_pid: gmain [ 1315] } hitcount: 18 count: 6336 | ||
1138 | . | ||
1139 | . | ||
1140 | . | ||
1141 | { common_pid: postgres [ 1892] } hitcount: 2 count: 32 | ||
1142 | { common_pid: postgres [ 1891] } hitcount: 2 count: 32 | ||
1143 | { common_pid: gmain [ 8704] } hitcount: 2 count: 32 | ||
1144 | { common_pid: upstart-dbus-br [ 2740] } hitcount: 21 count: 21 | ||
1145 | { common_pid: nm-dispatcher.a [ 8696] } hitcount: 1 count: 16 | ||
1146 | { common_pid: indicator-datet [ 2904] } hitcount: 1 count: 16 | ||
1147 | { common_pid: gdbus [ 2998] } hitcount: 1 count: 16 | ||
1148 | { common_pid: rtkit-daemon [ 2052] } hitcount: 1 count: 8 | ||
1149 | { common_pid: init [ 1] } hitcount: 2 count: 2 | ||
1150 | |||
1151 | Totals: | ||
1152 | Hits: 2116 | ||
1153 | Entries: 51 | ||
1154 | Dropped: 0 | ||
1155 | |||
1156 | Similarly, if you key a hist trigger on syscall id, for example to | ||
1157 | gather and display a list of systemwide syscall hits, you can use | ||
1158 | the special .syscall modifier to display the syscall names rather | ||
1159 | than raw ids. The example below keeps a running total of syscall | ||
1160 | counts for the system during the run:: | ||
1161 | |||
1162 | # echo 'hist:key=id.syscall:val=hitcount' > \ | ||
1163 | /sys/kernel/debug/tracing/events/raw_syscalls/sys_enter/trigger | ||
1164 | |||
1165 | # cat /sys/kernel/debug/tracing/events/raw_syscalls/sys_enter/hist | ||
1166 | # trigger info: hist:keys=id.syscall:vals=hitcount:sort=hitcount:size=2048 [active] | ||
1167 | |||
1168 | { id: sys_fsync [ 74] } hitcount: 1 | ||
1169 | { id: sys_newuname [ 63] } hitcount: 1 | ||
1170 | { id: sys_prctl [157] } hitcount: 1 | ||
1171 | { id: sys_statfs [137] } hitcount: 1 | ||
1172 | { id: sys_symlink [ 88] } hitcount: 1 | ||
1173 | { id: sys_sendmmsg [307] } hitcount: 1 | ||
1174 | { id: sys_semctl [ 66] } hitcount: 1 | ||
1175 | { id: sys_readlink [ 89] } hitcount: 3 | ||
1176 | { id: sys_bind [ 49] } hitcount: 3 | ||
1177 | { id: sys_getsockname [ 51] } hitcount: 3 | ||
1178 | { id: sys_unlink [ 87] } hitcount: 3 | ||
1179 | { id: sys_rename [ 82] } hitcount: 4 | ||
1180 | { id: unknown_syscall [ 58] } hitcount: 4 | ||
1181 | { id: sys_connect [ 42] } hitcount: 4 | ||
1182 | { id: sys_getpid [ 39] } hitcount: 4 | ||
1183 | . | ||
1184 | . | ||
1185 | . | ||
1186 | { id: sys_rt_sigprocmask [ 14] } hitcount: 952 | ||
1187 | { id: sys_futex [202] } hitcount: 1534 | ||
1188 | { id: sys_write [ 1] } hitcount: 2689 | ||
1189 | { id: sys_setitimer [ 38] } hitcount: 2797 | ||
1190 | { id: sys_read [ 0] } hitcount: 3202 | ||
1191 | { id: sys_select [ 23] } hitcount: 3773 | ||
1192 | { id: sys_writev [ 20] } hitcount: 4531 | ||
1193 | { id: sys_poll [ 7] } hitcount: 8314 | ||
1194 | { id: sys_recvmsg [ 47] } hitcount: 13738 | ||
1195 | { id: sys_ioctl [ 16] } hitcount: 21843 | ||
1196 | |||
1197 | Totals: | ||
1198 | Hits: 67612 | ||
1199 | Entries: 72 | ||
1200 | Dropped: 0 | ||
1201 | |||
1202 | The syscall counts above provide a rough overall picture of system | ||
1203 | call activity on the system; we can see for example that the most | ||
1204 | popular system call on this system was the 'sys_ioctl' system call. | ||
1205 | |||
1206 | We can use 'compound' keys to refine that number and provide some | ||
1207 | further insight as to which processes exactly contribute to the | ||
1208 | overall ioctl count. | ||
1209 | |||
1210 | The command below keeps a hitcount for every unique combination of | ||
1211 | system call id and pid - the end result is essentially a table | ||
1212 | that keeps a per-pid sum of system call hits. The results are | ||
1213 | sorted using the system call id as the primary key, and the | ||
1214 | hitcount sum as the secondary key:: | ||
1215 | |||
1216 | # echo 'hist:key=id.syscall,common_pid.execname:val=hitcount:sort=id,hitcount' > \ | ||
1217 | /sys/kernel/debug/tracing/events/raw_syscalls/sys_enter/trigger | ||
1218 | |||
1219 | # cat /sys/kernel/debug/tracing/events/raw_syscalls/sys_enter/hist | ||
1220 | # trigger info: hist:keys=id.syscall,common_pid.execname:vals=hitcount:sort=id.syscall,hitcount:size=2048 [active] | ||
1221 | |||
1222 | { id: sys_read [ 0], common_pid: rtkit-daemon [ 1877] } hitcount: 1 | ||
1223 | { id: sys_read [ 0], common_pid: gdbus [ 2976] } hitcount: 1 | ||
1224 | { id: sys_read [ 0], common_pid: console-kit-dae [ 3400] } hitcount: 1 | ||
1225 | { id: sys_read [ 0], common_pid: postgres [ 1865] } hitcount: 1 | ||
1226 | { id: sys_read [ 0], common_pid: deja-dup-monito [ 3543] } hitcount: 2 | ||
1227 | { id: sys_read [ 0], common_pid: NetworkManager [ 890] } hitcount: 2 | ||
1228 | { id: sys_read [ 0], common_pid: evolution-calen [ 3048] } hitcount: 2 | ||
1229 | { id: sys_read [ 0], common_pid: postgres [ 1864] } hitcount: 2 | ||
1230 | { id: sys_read [ 0], common_pid: nm-applet [ 3022] } hitcount: 2 | ||
1231 | { id: sys_read [ 0], common_pid: whoopsie [ 1212] } hitcount: 2 | ||
1232 | . | ||
1233 | . | ||
1234 | . | ||
1235 | { id: sys_ioctl [ 16], common_pid: bash [ 8479] } hitcount: 1 | ||
1236 | { id: sys_ioctl [ 16], common_pid: bash [ 3472] } hitcount: 12 | ||
1237 | { id: sys_ioctl [ 16], common_pid: gnome-terminal [ 3199] } hitcount: 16 | ||
1238 | { id: sys_ioctl [ 16], common_pid: Xorg [ 1267] } hitcount: 1808 | ||
1239 | { id: sys_ioctl [ 16], common_pid: compiz [ 2994] } hitcount: 5580 | ||
1240 | . | ||
1241 | . | ||
1242 | . | ||
1243 | { id: sys_waitid [247], common_pid: upstart-dbus-br [ 2690] } hitcount: 3 | ||
1244 | { id: sys_waitid [247], common_pid: upstart-dbus-br [ 2688] } hitcount: 16 | ||
1245 | { id: sys_inotify_add_watch [254], common_pid: gmain [ 975] } hitcount: 2 | ||
1246 | { id: sys_inotify_add_watch [254], common_pid: gmain [ 3204] } hitcount: 4 | ||
1247 | { id: sys_inotify_add_watch [254], common_pid: gmain [ 2888] } hitcount: 4 | ||
1248 | { id: sys_inotify_add_watch [254], common_pid: gmain [ 3003] } hitcount: 4 | ||
1249 | { id: sys_inotify_add_watch [254], common_pid: gmain [ 2873] } hitcount: 4 | ||
1250 | { id: sys_inotify_add_watch [254], common_pid: gmain [ 3196] } hitcount: 6 | ||
1251 | { id: sys_openat [257], common_pid: java [ 2623] } hitcount: 2 | ||
1252 | { id: sys_eventfd2 [290], common_pid: ibus-ui-gtk3 [ 2760] } hitcount: 4 | ||
1253 | { id: sys_eventfd2 [290], common_pid: compiz [ 2994] } hitcount: 6 | ||
1254 | |||
1255 | Totals: | ||
1256 | Hits: 31536 | ||
1257 | Entries: 323 | ||
1258 | Dropped: 0 | ||
1259 | |||
1260 | The above list does give us a breakdown of the ioctl syscall by | ||
1261 | pid, but it also gives us quite a bit more than that, which we | ||
1262 | don't really care about at the moment. Since we know the syscall | ||
1263 | id for sys_ioctl (16, displayed next to the sys_ioctl name), we | ||
1264 | can use that to filter out all the other syscalls:: | ||
1265 | |||
1266 | # echo 'hist:key=id.syscall,common_pid.execname:val=hitcount:sort=id,hitcount if id == 16' > \ | ||
1267 | /sys/kernel/debug/tracing/events/raw_syscalls/sys_enter/trigger | ||
1268 | |||
1269 | # cat /sys/kernel/debug/tracing/events/raw_syscalls/sys_enter/hist | ||
1270 | # trigger info: hist:keys=id.syscall,common_pid.execname:vals=hitcount:sort=id.syscall,hitcount:size=2048 if id == 16 [active] | ||
1271 | |||
1272 | { id: sys_ioctl [ 16], common_pid: gmain [ 2769] } hitcount: 1 | ||
1273 | { id: sys_ioctl [ 16], common_pid: evolution-addre [ 8571] } hitcount: 1 | ||
1274 | { id: sys_ioctl [ 16], common_pid: gmain [ 3003] } hitcount: 1 | ||
1275 | { id: sys_ioctl [ 16], common_pid: gmain [ 2781] } hitcount: 1 | ||
1276 | { id: sys_ioctl [ 16], common_pid: gmain [ 2829] } hitcount: 1 | ||
1277 | { id: sys_ioctl [ 16], common_pid: bash [ 8726] } hitcount: 1 | ||
1278 | { id: sys_ioctl [ 16], common_pid: bash [ 8508] } hitcount: 1 | ||
1279 | { id: sys_ioctl [ 16], common_pid: gmain [ 2970] } hitcount: 1 | ||
1280 | { id: sys_ioctl [ 16], common_pid: gmain [ 2768] } hitcount: 1 | ||
1281 | . | ||
1282 | . | ||
1283 | . | ||
1284 | { id: sys_ioctl [ 16], common_pid: pool [ 8559] } hitcount: 45 | ||
1285 | { id: sys_ioctl [ 16], common_pid: pool [ 8555] } hitcount: 48 | ||
1286 | { id: sys_ioctl [ 16], common_pid: pool [ 8551] } hitcount: 48 | ||
1287 | { id: sys_ioctl [ 16], common_pid: avahi-daemon [ 896] } hitcount: 66 | ||
1288 | { id: sys_ioctl [ 16], common_pid: Xorg [ 1267] } hitcount: 26674 | ||
1289 | { id: sys_ioctl [ 16], common_pid: compiz [ 2994] } hitcount: 73443 | ||
1290 | |||
1291 | Totals: | ||
1292 | Hits: 101162 | ||
1293 | Entries: 103 | ||
1294 | Dropped: 0 | ||
1295 | |||
1296 | The above output shows that 'compiz' and 'Xorg' are far and away | ||
1297 | the heaviest ioctl callers (which might lead to questions about | ||
1298 | whether they really need to be making all those calls and to | ||
1299 | possible avenues for further investigation.) | ||
1300 | |||
1301 | The compound key examples used a key and a sum value (hitcount) to | ||
1302 | sort the output, but we can just as easily use two keys instead. | ||
1303 | Here's an example where we use a compound key composed of the the | ||
1304 | common_pid and size event fields. Sorting with pid as the primary | ||
1305 | key and 'size' as the secondary key allows us to display an | ||
1306 | ordered summary of the recvfrom sizes, with counts, received by | ||
1307 | each process:: | ||
1308 | |||
1309 | # echo 'hist:key=common_pid.execname,size:val=hitcount:sort=common_pid,size' > \ | ||
1310 | /sys/kernel/debug/tracing/events/syscalls/sys_enter_recvfrom/trigger | ||
1311 | |||
1312 | # cat /sys/kernel/debug/tracing/events/syscalls/sys_enter_recvfrom/hist | ||
1313 | # trigger info: hist:keys=common_pid.execname,size:vals=hitcount:sort=common_pid.execname,size:size=2048 [active] | ||
1314 | |||
1315 | { common_pid: smbd [ 784], size: 4 } hitcount: 1 | ||
1316 | { common_pid: dnsmasq [ 1412], size: 4096 } hitcount: 672 | ||
1317 | { common_pid: postgres [ 1796], size: 1000 } hitcount: 6 | ||
1318 | { common_pid: postgres [ 1867], size: 1000 } hitcount: 10 | ||
1319 | { common_pid: bamfdaemon [ 2787], size: 28 } hitcount: 2 | ||
1320 | { common_pid: bamfdaemon [ 2787], size: 14360 } hitcount: 1 | ||
1321 | { common_pid: compiz [ 2994], size: 8 } hitcount: 1 | ||
1322 | { common_pid: compiz [ 2994], size: 20 } hitcount: 11 | ||
1323 | { common_pid: gnome-terminal [ 3199], size: 4 } hitcount: 2 | ||
1324 | { common_pid: firefox [ 8817], size: 4 } hitcount: 1 | ||
1325 | { common_pid: firefox [ 8817], size: 8 } hitcount: 5 | ||
1326 | { common_pid: firefox [ 8817], size: 588 } hitcount: 2 | ||
1327 | { common_pid: firefox [ 8817], size: 628 } hitcount: 1 | ||
1328 | { common_pid: firefox [ 8817], size: 6944 } hitcount: 1 | ||
1329 | { common_pid: firefox [ 8817], size: 408880 } hitcount: 2 | ||
1330 | { common_pid: firefox [ 8822], size: 8 } hitcount: 2 | ||
1331 | { common_pid: firefox [ 8822], size: 160 } hitcount: 2 | ||
1332 | { common_pid: firefox [ 8822], size: 320 } hitcount: 2 | ||
1333 | { common_pid: firefox [ 8822], size: 352 } hitcount: 1 | ||
1334 | . | ||
1335 | . | ||
1336 | . | ||
1337 | { common_pid: pool [ 8923], size: 1960 } hitcount: 10 | ||
1338 | { common_pid: pool [ 8923], size: 2048 } hitcount: 10 | ||
1339 | { common_pid: pool [ 8924], size: 1960 } hitcount: 10 | ||
1340 | { common_pid: pool [ 8924], size: 2048 } hitcount: 10 | ||
1341 | { common_pid: pool [ 8928], size: 1964 } hitcount: 4 | ||
1342 | { common_pid: pool [ 8928], size: 1965 } hitcount: 2 | ||
1343 | { common_pid: pool [ 8928], size: 2048 } hitcount: 6 | ||
1344 | { common_pid: pool [ 8929], size: 1982 } hitcount: 1 | ||
1345 | { common_pid: pool [ 8929], size: 2048 } hitcount: 1 | ||
1346 | |||
1347 | Totals: | ||
1348 | Hits: 2016 | ||
1349 | Entries: 224 | ||
1350 | Dropped: 0 | ||
1351 | |||
1352 | The above example also illustrates the fact that although a compound | ||
1353 | key is treated as a single entity for hashing purposes, the sub-keys | ||
1354 | it's composed of can be accessed independently. | ||
1355 | |||
1356 | The next example uses a string field as the hash key and | ||
1357 | demonstrates how you can manually pause and continue a hist trigger. | ||
1358 | In this example, we'll aggregate fork counts and don't expect a | ||
1359 | large number of entries in the hash table, so we'll drop it to a | ||
1360 | much smaller number, say 256:: | ||
1361 | |||
1362 | # echo 'hist:key=child_comm:val=hitcount:size=256' > \ | ||
1363 | /sys/kernel/debug/tracing/events/sched/sched_process_fork/trigger | ||
1364 | |||
1365 | # cat /sys/kernel/debug/tracing/events/sched/sched_process_fork/hist | ||
1366 | # trigger info: hist:keys=child_comm:vals=hitcount:sort=hitcount:size=256 [active] | ||
1367 | |||
1368 | { child_comm: dconf worker } hitcount: 1 | ||
1369 | { child_comm: ibus-daemon } hitcount: 1 | ||
1370 | { child_comm: whoopsie } hitcount: 1 | ||
1371 | { child_comm: smbd } hitcount: 1 | ||
1372 | { child_comm: gdbus } hitcount: 1 | ||
1373 | { child_comm: kthreadd } hitcount: 1 | ||
1374 | { child_comm: dconf worker } hitcount: 1 | ||
1375 | { child_comm: evolution-alarm } hitcount: 2 | ||
1376 | { child_comm: Socket Thread } hitcount: 2 | ||
1377 | { child_comm: postgres } hitcount: 2 | ||
1378 | { child_comm: bash } hitcount: 3 | ||
1379 | { child_comm: compiz } hitcount: 3 | ||
1380 | { child_comm: evolution-sourc } hitcount: 4 | ||
1381 | { child_comm: dhclient } hitcount: 4 | ||
1382 | { child_comm: pool } hitcount: 5 | ||
1383 | { child_comm: nm-dispatcher.a } hitcount: 8 | ||
1384 | { child_comm: firefox } hitcount: 8 | ||
1385 | { child_comm: dbus-daemon } hitcount: 8 | ||
1386 | { child_comm: glib-pacrunner } hitcount: 10 | ||
1387 | { child_comm: evolution } hitcount: 23 | ||
1388 | |||
1389 | Totals: | ||
1390 | Hits: 89 | ||
1391 | Entries: 20 | ||
1392 | Dropped: 0 | ||
1393 | |||
1394 | If we want to pause the hist trigger, we can simply append :pause to | ||
1395 | the command that started the trigger. Notice that the trigger info | ||
1396 | displays as [paused]:: | ||
1397 | |||
1398 | # echo 'hist:key=child_comm:val=hitcount:size=256:pause' >> \ | ||
1399 | /sys/kernel/debug/tracing/events/sched/sched_process_fork/trigger | ||
1400 | |||
1401 | # cat /sys/kernel/debug/tracing/events/sched/sched_process_fork/hist | ||
1402 | # trigger info: hist:keys=child_comm:vals=hitcount:sort=hitcount:size=256 [paused] | ||
1403 | |||
1404 | { child_comm: dconf worker } hitcount: 1 | ||
1405 | { child_comm: kthreadd } hitcount: 1 | ||
1406 | { child_comm: dconf worker } hitcount: 1 | ||
1407 | { child_comm: gdbus } hitcount: 1 | ||
1408 | { child_comm: ibus-daemon } hitcount: 1 | ||
1409 | { child_comm: Socket Thread } hitcount: 2 | ||
1410 | { child_comm: evolution-alarm } hitcount: 2 | ||
1411 | { child_comm: smbd } hitcount: 2 | ||
1412 | { child_comm: bash } hitcount: 3 | ||
1413 | { child_comm: whoopsie } hitcount: 3 | ||
1414 | { child_comm: compiz } hitcount: 3 | ||
1415 | { child_comm: evolution-sourc } hitcount: 4 | ||
1416 | { child_comm: pool } hitcount: 5 | ||
1417 | { child_comm: postgres } hitcount: 6 | ||
1418 | { child_comm: firefox } hitcount: 8 | ||
1419 | { child_comm: dhclient } hitcount: 10 | ||
1420 | { child_comm: emacs } hitcount: 12 | ||
1421 | { child_comm: dbus-daemon } hitcount: 20 | ||
1422 | { child_comm: nm-dispatcher.a } hitcount: 20 | ||
1423 | { child_comm: evolution } hitcount: 35 | ||
1424 | { child_comm: glib-pacrunner } hitcount: 59 | ||
1425 | |||
1426 | Totals: | ||
1427 | Hits: 199 | ||
1428 | Entries: 21 | ||
1429 | Dropped: 0 | ||
1430 | |||
1431 | To manually continue having the trigger aggregate events, append | ||
1432 | :cont instead. Notice that the trigger info displays as [active] | ||
1433 | again, and the data has changed:: | ||
1434 | |||
1435 | # echo 'hist:key=child_comm:val=hitcount:size=256:cont' >> \ | ||
1436 | /sys/kernel/debug/tracing/events/sched/sched_process_fork/trigger | ||
1437 | |||
1438 | # cat /sys/kernel/debug/tracing/events/sched/sched_process_fork/hist | ||
1439 | # trigger info: hist:keys=child_comm:vals=hitcount:sort=hitcount:size=256 [active] | ||
1440 | |||
1441 | { child_comm: dconf worker } hitcount: 1 | ||
1442 | { child_comm: dconf worker } hitcount: 1 | ||
1443 | { child_comm: kthreadd } hitcount: 1 | ||
1444 | { child_comm: gdbus } hitcount: 1 | ||
1445 | { child_comm: ibus-daemon } hitcount: 1 | ||
1446 | { child_comm: Socket Thread } hitcount: 2 | ||
1447 | { child_comm: evolution-alarm } hitcount: 2 | ||
1448 | { child_comm: smbd } hitcount: 2 | ||
1449 | { child_comm: whoopsie } hitcount: 3 | ||
1450 | { child_comm: compiz } hitcount: 3 | ||
1451 | { child_comm: evolution-sourc } hitcount: 4 | ||
1452 | { child_comm: bash } hitcount: 5 | ||
1453 | { child_comm: pool } hitcount: 5 | ||
1454 | { child_comm: postgres } hitcount: 6 | ||
1455 | { child_comm: firefox } hitcount: 8 | ||
1456 | { child_comm: dhclient } hitcount: 11 | ||
1457 | { child_comm: emacs } hitcount: 12 | ||
1458 | { child_comm: dbus-daemon } hitcount: 22 | ||
1459 | { child_comm: nm-dispatcher.a } hitcount: 22 | ||
1460 | { child_comm: evolution } hitcount: 35 | ||
1461 | { child_comm: glib-pacrunner } hitcount: 59 | ||
1462 | |||
1463 | Totals: | ||
1464 | Hits: 206 | ||
1465 | Entries: 21 | ||
1466 | Dropped: 0 | ||
1467 | |||
1468 | The previous example showed how to start and stop a hist trigger by | ||
1469 | appending 'pause' and 'continue' to the hist trigger command. A | ||
1470 | hist trigger can also be started in a paused state by initially | ||
1471 | starting the trigger with ':pause' appended. This allows you to | ||
1472 | start the trigger only when you're ready to start collecting data | ||
1473 | and not before. For example, you could start the trigger in a | ||
1474 | paused state, then unpause it and do something you want to measure, | ||
1475 | then pause the trigger again when done. | ||
1476 | |||
1477 | Of course, doing this manually can be difficult and error-prone, but | ||
1478 | it is possible to automatically start and stop a hist trigger based | ||
1479 | on some condition, via the enable_hist and disable_hist triggers. | ||
1480 | |||
1481 | For example, suppose we wanted to take a look at the relative | ||
1482 | weights in terms of skb length for each callpath that leads to a | ||
1483 | netif_receieve_skb event when downloading a decent-sized file using | ||
1484 | wget. | ||
1485 | |||
1486 | First we set up an initially paused stacktrace trigger on the | ||
1487 | netif_receive_skb event:: | ||
1488 | |||
1489 | # echo 'hist:key=stacktrace:vals=len:pause' > \ | ||
1490 | /sys/kernel/debug/tracing/events/net/netif_receive_skb/trigger | ||
1491 | |||
1492 | Next, we set up an 'enable_hist' trigger on the sched_process_exec | ||
1493 | event, with an 'if filename==/usr/bin/wget' filter. The effect of | ||
1494 | this new trigger is that it will 'unpause' the hist trigger we just | ||
1495 | set up on netif_receive_skb if and only if it sees a | ||
1496 | sched_process_exec event with a filename of '/usr/bin/wget'. When | ||
1497 | that happens, all netif_receive_skb events are aggregated into a | ||
1498 | hash table keyed on stacktrace:: | ||
1499 | |||
1500 | # echo 'enable_hist:net:netif_receive_skb if filename==/usr/bin/wget' > \ | ||
1501 | /sys/kernel/debug/tracing/events/sched/sched_process_exec/trigger | ||
1502 | |||
1503 | The aggregation continues until the netif_receive_skb is paused | ||
1504 | again, which is what the following disable_hist event does by | ||
1505 | creating a similar setup on the sched_process_exit event, using the | ||
1506 | filter 'comm==wget':: | ||
1507 | |||
1508 | # echo 'disable_hist:net:netif_receive_skb if comm==wget' > \ | ||
1509 | /sys/kernel/debug/tracing/events/sched/sched_process_exit/trigger | ||
1510 | |||
1511 | Whenever a process exits and the comm field of the disable_hist | ||
1512 | trigger filter matches 'comm==wget', the netif_receive_skb hist | ||
1513 | trigger is disabled. | ||
1514 | |||
1515 | The overall effect is that netif_receive_skb events are aggregated | ||
1516 | into the hash table for only the duration of the wget. Executing a | ||
1517 | wget command and then listing the 'hist' file will display the | ||
1518 | output generated by the wget command:: | ||
1519 | |||
1520 | $ wget https://www.kernel.org/pub/linux/kernel/v3.x/patch-3.19.xz | ||
1521 | |||
1522 | # cat /sys/kernel/debug/tracing/events/net/netif_receive_skb/hist | ||
1523 | # trigger info: hist:keys=stacktrace:vals=len:sort=hitcount:size=2048 [paused] | ||
1524 | |||
1525 | { stacktrace: | ||
1526 | __netif_receive_skb_core+0x46d/0x990 | ||
1527 | __netif_receive_skb+0x18/0x60 | ||
1528 | netif_receive_skb_internal+0x23/0x90 | ||
1529 | napi_gro_receive+0xc8/0x100 | ||
1530 | ieee80211_deliver_skb+0xd6/0x270 [mac80211] | ||
1531 | ieee80211_rx_handlers+0xccf/0x22f0 [mac80211] | ||
1532 | ieee80211_prepare_and_rx_handle+0x4e7/0xc40 [mac80211] | ||
1533 | ieee80211_rx+0x31d/0x900 [mac80211] | ||
1534 | iwlagn_rx_reply_rx+0x3db/0x6f0 [iwldvm] | ||
1535 | iwl_rx_dispatch+0x8e/0xf0 [iwldvm] | ||
1536 | iwl_pcie_irq_handler+0xe3c/0x12f0 [iwlwifi] | ||
1537 | irq_thread_fn+0x20/0x50 | ||
1538 | irq_thread+0x11f/0x150 | ||
1539 | kthread+0xd2/0xf0 | ||
1540 | ret_from_fork+0x42/0x70 | ||
1541 | } hitcount: 85 len: 28884 | ||
1542 | { stacktrace: | ||
1543 | __netif_receive_skb_core+0x46d/0x990 | ||
1544 | __netif_receive_skb+0x18/0x60 | ||
1545 | netif_receive_skb_internal+0x23/0x90 | ||
1546 | napi_gro_complete+0xa4/0xe0 | ||
1547 | dev_gro_receive+0x23a/0x360 | ||
1548 | napi_gro_receive+0x30/0x100 | ||
1549 | ieee80211_deliver_skb+0xd6/0x270 [mac80211] | ||
1550 | ieee80211_rx_handlers+0xccf/0x22f0 [mac80211] | ||
1551 | ieee80211_prepare_and_rx_handle+0x4e7/0xc40 [mac80211] | ||
1552 | ieee80211_rx+0x31d/0x900 [mac80211] | ||
1553 | iwlagn_rx_reply_rx+0x3db/0x6f0 [iwldvm] | ||
1554 | iwl_rx_dispatch+0x8e/0xf0 [iwldvm] | ||
1555 | iwl_pcie_irq_handler+0xe3c/0x12f0 [iwlwifi] | ||
1556 | irq_thread_fn+0x20/0x50 | ||
1557 | irq_thread+0x11f/0x150 | ||
1558 | kthread+0xd2/0xf0 | ||
1559 | } hitcount: 98 len: 664329 | ||
1560 | { stacktrace: | ||
1561 | __netif_receive_skb_core+0x46d/0x990 | ||
1562 | __netif_receive_skb+0x18/0x60 | ||
1563 | process_backlog+0xa8/0x150 | ||
1564 | net_rx_action+0x15d/0x340 | ||
1565 | __do_softirq+0x114/0x2c0 | ||
1566 | do_softirq_own_stack+0x1c/0x30 | ||
1567 | do_softirq+0x65/0x70 | ||
1568 | __local_bh_enable_ip+0xb5/0xc0 | ||
1569 | ip_finish_output+0x1f4/0x840 | ||
1570 | ip_output+0x6b/0xc0 | ||
1571 | ip_local_out_sk+0x31/0x40 | ||
1572 | ip_send_skb+0x1a/0x50 | ||
1573 | udp_send_skb+0x173/0x2a0 | ||
1574 | udp_sendmsg+0x2bf/0x9f0 | ||
1575 | inet_sendmsg+0x64/0xa0 | ||
1576 | sock_sendmsg+0x3d/0x50 | ||
1577 | } hitcount: 115 len: 13030 | ||
1578 | { stacktrace: | ||
1579 | __netif_receive_skb_core+0x46d/0x990 | ||
1580 | __netif_receive_skb+0x18/0x60 | ||
1581 | netif_receive_skb_internal+0x23/0x90 | ||
1582 | napi_gro_complete+0xa4/0xe0 | ||
1583 | napi_gro_flush+0x6d/0x90 | ||
1584 | iwl_pcie_irq_handler+0x92a/0x12f0 [iwlwifi] | ||
1585 | irq_thread_fn+0x20/0x50 | ||
1586 | irq_thread+0x11f/0x150 | ||
1587 | kthread+0xd2/0xf0 | ||
1588 | ret_from_fork+0x42/0x70 | ||
1589 | } hitcount: 934 len: 5512212 | ||
1590 | |||
1591 | Totals: | ||
1592 | Hits: 1232 | ||
1593 | Entries: 4 | ||
1594 | Dropped: 0 | ||
1595 | |||
1596 | The above shows all the netif_receive_skb callpaths and their total | ||
1597 | lengths for the duration of the wget command. | ||
1598 | |||
1599 | The 'clear' hist trigger param can be used to clear the hash table. | ||
1600 | Suppose we wanted to try another run of the previous example but | ||
1601 | this time also wanted to see the complete list of events that went | ||
1602 | into the histogram. In order to avoid having to set everything up | ||
1603 | again, we can just clear the histogram first:: | ||
1604 | |||
1605 | # echo 'hist:key=stacktrace:vals=len:clear' >> \ | ||
1606 | /sys/kernel/debug/tracing/events/net/netif_receive_skb/trigger | ||
1607 | |||
1608 | Just to verify that it is in fact cleared, here's what we now see in | ||
1609 | the hist file:: | ||
1610 | |||
1611 | # cat /sys/kernel/debug/tracing/events/net/netif_receive_skb/hist | ||
1612 | # trigger info: hist:keys=stacktrace:vals=len:sort=hitcount:size=2048 [paused] | ||
1613 | |||
1614 | Totals: | ||
1615 | Hits: 0 | ||
1616 | Entries: 0 | ||
1617 | Dropped: 0 | ||
1618 | |||
1619 | Since we want to see the detailed list of every netif_receive_skb | ||
1620 | event occurring during the new run, which are in fact the same | ||
1621 | events being aggregated into the hash table, we add some additional | ||
1622 | 'enable_event' events to the triggering sched_process_exec and | ||
1623 | sched_process_exit events as such:: | ||
1624 | |||
1625 | # echo 'enable_event:net:netif_receive_skb if filename==/usr/bin/wget' > \ | ||
1626 | /sys/kernel/debug/tracing/events/sched/sched_process_exec/trigger | ||
1627 | |||
1628 | # echo 'disable_event:net:netif_receive_skb if comm==wget' > \ | ||
1629 | /sys/kernel/debug/tracing/events/sched/sched_process_exit/trigger | ||
1630 | |||
1631 | If you read the trigger files for the sched_process_exec and | ||
1632 | sched_process_exit triggers, you should see two triggers for each: | ||
1633 | one enabling/disabling the hist aggregation and the other | ||
1634 | enabling/disabling the logging of events:: | ||
1635 | |||
1636 | # cat /sys/kernel/debug/tracing/events/sched/sched_process_exec/trigger | ||
1637 | enable_event:net:netif_receive_skb:unlimited if filename==/usr/bin/wget | ||
1638 | enable_hist:net:netif_receive_skb:unlimited if filename==/usr/bin/wget | ||
1639 | |||
1640 | # cat /sys/kernel/debug/tracing/events/sched/sched_process_exit/trigger | ||
1641 | enable_event:net:netif_receive_skb:unlimited if comm==wget | ||
1642 | disable_hist:net:netif_receive_skb:unlimited if comm==wget | ||
1643 | |||
1644 | In other words, whenever either of the sched_process_exec or | ||
1645 | sched_process_exit events is hit and matches 'wget', it enables or | ||
1646 | disables both the histogram and the event log, and what you end up | ||
1647 | with is a hash table and set of events just covering the specified | ||
1648 | duration. Run the wget command again:: | ||
1649 | |||
1650 | $ wget https://www.kernel.org/pub/linux/kernel/v3.x/patch-3.19.xz | ||
1651 | |||
1652 | Displaying the 'hist' file should show something similar to what you | ||
1653 | saw in the last run, but this time you should also see the | ||
1654 | individual events in the trace file:: | ||
1655 | |||
1656 | # cat /sys/kernel/debug/tracing/trace | ||
1657 | |||
1658 | # tracer: nop | ||
1659 | # | ||
1660 | # entries-in-buffer/entries-written: 183/1426 #P:4 | ||
1661 | # | ||
1662 | # _-----=> irqs-off | ||
1663 | # / _----=> need-resched | ||
1664 | # | / _---=> hardirq/softirq | ||
1665 | # || / _--=> preempt-depth | ||
1666 | # ||| / delay | ||
1667 | # TASK-PID CPU# |||| TIMESTAMP FUNCTION | ||
1668 | # | | | |||| | | | ||
1669 | wget-15108 [000] ..s1 31769.606929: netif_receive_skb: dev=lo skbaddr=ffff88009c353100 len=60 | ||
1670 | wget-15108 [000] ..s1 31769.606999: netif_receive_skb: dev=lo skbaddr=ffff88009c353200 len=60 | ||
1671 | dnsmasq-1382 [000] ..s1 31769.677652: netif_receive_skb: dev=lo skbaddr=ffff88009c352b00 len=130 | ||
1672 | dnsmasq-1382 [000] ..s1 31769.685917: netif_receive_skb: dev=lo skbaddr=ffff88009c352200 len=138 | ||
1673 | ##### CPU 2 buffer started #### | ||
1674 | irq/29-iwlwifi-559 [002] ..s. 31772.031529: netif_receive_skb: dev=wlan0 skbaddr=ffff88009d433d00 len=2948 | ||
1675 | irq/29-iwlwifi-559 [002] ..s. 31772.031572: netif_receive_skb: dev=wlan0 skbaddr=ffff88009d432200 len=1500 | ||
1676 | irq/29-iwlwifi-559 [002] ..s. 31772.032196: netif_receive_skb: dev=wlan0 skbaddr=ffff88009d433100 len=2948 | ||
1677 | irq/29-iwlwifi-559 [002] ..s. 31772.032761: netif_receive_skb: dev=wlan0 skbaddr=ffff88009d433000 len=2948 | ||
1678 | irq/29-iwlwifi-559 [002] ..s. 31772.033220: netif_receive_skb: dev=wlan0 skbaddr=ffff88009d432e00 len=1500 | ||
1679 | .... | ||
1680 | |||
1681 | |||
1682 | The following example demonstrates how multiple hist triggers can be | ||
1683 | attached to a given event. This capability can be useful for | ||
1684 | creating a set of different summaries derived from the same set of | ||
1685 | events, or for comparing the effects of different filters, among | ||
1686 | other things. | ||
1687 | :: | ||
1688 | |||
1689 | # echo 'hist:keys=skbaddr.hex:vals=len if len < 0' >> \ | ||
1690 | /sys/kernel/debug/tracing/events/net/netif_receive_skb/trigger | ||
1691 | # echo 'hist:keys=skbaddr.hex:vals=len if len > 4096' >> \ | ||
1692 | /sys/kernel/debug/tracing/events/net/netif_receive_skb/trigger | ||
1693 | # echo 'hist:keys=skbaddr.hex:vals=len if len == 256' >> \ | ||
1694 | /sys/kernel/debug/tracing/events/net/netif_receive_skb/trigger | ||
1695 | # echo 'hist:keys=skbaddr.hex:vals=len' >> \ | ||
1696 | /sys/kernel/debug/tracing/events/net/netif_receive_skb/trigger | ||
1697 | # echo 'hist:keys=len:vals=common_preempt_count' >> \ | ||
1698 | /sys/kernel/debug/tracing/events/net/netif_receive_skb/trigger | ||
1699 | |||
1700 | The above set of commands create four triggers differing only in | ||
1701 | their filters, along with a completely different though fairly | ||
1702 | nonsensical trigger. Note that in order to append multiple hist | ||
1703 | triggers to the same file, you should use the '>>' operator to | ||
1704 | append them ('>' will also add the new hist trigger, but will remove | ||
1705 | any existing hist triggers beforehand). | ||
1706 | |||
1707 | Displaying the contents of the 'hist' file for the event shows the | ||
1708 | contents of all five histograms:: | ||
1709 | |||
1710 | # cat /sys/kernel/debug/tracing/events/net/netif_receive_skb/hist | ||
1711 | |||
1712 | # event histogram | ||
1713 | # | ||
1714 | # trigger info: hist:keys=len:vals=hitcount,common_preempt_count:sort=hitcount:size=2048 [active] | ||
1715 | # | ||
1716 | |||
1717 | { len: 176 } hitcount: 1 common_preempt_count: 0 | ||
1718 | { len: 223 } hitcount: 1 common_preempt_count: 0 | ||
1719 | { len: 4854 } hitcount: 1 common_preempt_count: 0 | ||
1720 | { len: 395 } hitcount: 1 common_preempt_count: 0 | ||
1721 | { len: 177 } hitcount: 1 common_preempt_count: 0 | ||
1722 | { len: 446 } hitcount: 1 common_preempt_count: 0 | ||
1723 | { len: 1601 } hitcount: 1 common_preempt_count: 0 | ||
1724 | . | ||
1725 | . | ||
1726 | . | ||
1727 | { len: 1280 } hitcount: 66 common_preempt_count: 0 | ||
1728 | { len: 116 } hitcount: 81 common_preempt_count: 40 | ||
1729 | { len: 708 } hitcount: 112 common_preempt_count: 0 | ||
1730 | { len: 46 } hitcount: 221 common_preempt_count: 0 | ||
1731 | { len: 1264 } hitcount: 458 common_preempt_count: 0 | ||
1732 | |||
1733 | Totals: | ||
1734 | Hits: 1428 | ||
1735 | Entries: 147 | ||
1736 | Dropped: 0 | ||
1737 | |||
1738 | |||
1739 | # event histogram | ||
1740 | # | ||
1741 | # trigger info: hist:keys=skbaddr.hex:vals=hitcount,len:sort=hitcount:size=2048 [active] | ||
1742 | # | ||
1743 | |||
1744 | { skbaddr: ffff8800baee5e00 } hitcount: 1 len: 130 | ||
1745 | { skbaddr: ffff88005f3d5600 } hitcount: 1 len: 1280 | ||
1746 | { skbaddr: ffff88005f3d4900 } hitcount: 1 len: 1280 | ||
1747 | { skbaddr: ffff88009fed6300 } hitcount: 1 len: 115 | ||
1748 | { skbaddr: ffff88009fe0ad00 } hitcount: 1 len: 115 | ||
1749 | { skbaddr: ffff88008cdb1900 } hitcount: 1 len: 46 | ||
1750 | { skbaddr: ffff880064b5ef00 } hitcount: 1 len: 118 | ||
1751 | { skbaddr: ffff880044e3c700 } hitcount: 1 len: 60 | ||
1752 | { skbaddr: ffff880100065900 } hitcount: 1 len: 46 | ||
1753 | { skbaddr: ffff8800d46bd500 } hitcount: 1 len: 116 | ||
1754 | { skbaddr: ffff88005f3d5f00 } hitcount: 1 len: 1280 | ||
1755 | { skbaddr: ffff880100064700 } hitcount: 1 len: 365 | ||
1756 | { skbaddr: ffff8800badb6f00 } hitcount: 1 len: 60 | ||
1757 | . | ||
1758 | . | ||
1759 | . | ||
1760 | { skbaddr: ffff88009fe0be00 } hitcount: 27 len: 24677 | ||
1761 | { skbaddr: ffff88009fe0a400 } hitcount: 27 len: 23052 | ||
1762 | { skbaddr: ffff88009fe0b700 } hitcount: 31 len: 25589 | ||
1763 | { skbaddr: ffff88009fe0b600 } hitcount: 32 len: 27326 | ||
1764 | { skbaddr: ffff88006a462800 } hitcount: 68 len: 71678 | ||
1765 | { skbaddr: ffff88006a463700 } hitcount: 70 len: 72678 | ||
1766 | { skbaddr: ffff88006a462b00 } hitcount: 71 len: 77589 | ||
1767 | { skbaddr: ffff88006a463600 } hitcount: 73 len: 71307 | ||
1768 | { skbaddr: ffff88006a462200 } hitcount: 81 len: 81032 | ||
1769 | |||
1770 | Totals: | ||
1771 | Hits: 1451 | ||
1772 | Entries: 318 | ||
1773 | Dropped: 0 | ||
1774 | |||
1775 | |||
1776 | # event histogram | ||
1777 | # | ||
1778 | # trigger info: hist:keys=skbaddr.hex:vals=hitcount,len:sort=hitcount:size=2048 if len == 256 [active] | ||
1779 | # | ||
1780 | |||
1781 | |||
1782 | Totals: | ||
1783 | Hits: 0 | ||
1784 | Entries: 0 | ||
1785 | Dropped: 0 | ||
1786 | |||
1787 | |||
1788 | # event histogram | ||
1789 | # | ||
1790 | # trigger info: hist:keys=skbaddr.hex:vals=hitcount,len:sort=hitcount:size=2048 if len > 4096 [active] | ||
1791 | # | ||
1792 | |||
1793 | { skbaddr: ffff88009fd2c300 } hitcount: 1 len: 7212 | ||
1794 | { skbaddr: ffff8800d2bcce00 } hitcount: 1 len: 7212 | ||
1795 | { skbaddr: ffff8800d2bcd700 } hitcount: 1 len: 7212 | ||
1796 | { skbaddr: ffff8800d2bcda00 } hitcount: 1 len: 21492 | ||
1797 | { skbaddr: ffff8800ae2e2d00 } hitcount: 1 len: 7212 | ||
1798 | { skbaddr: ffff8800d2bcdb00 } hitcount: 1 len: 7212 | ||
1799 | { skbaddr: ffff88006a4df500 } hitcount: 1 len: 4854 | ||
1800 | { skbaddr: ffff88008ce47b00 } hitcount: 1 len: 18636 | ||
1801 | { skbaddr: ffff8800ae2e2200 } hitcount: 1 len: 12924 | ||
1802 | { skbaddr: ffff88005f3e1000 } hitcount: 1 len: 4356 | ||
1803 | { skbaddr: ffff8800d2bcdc00 } hitcount: 2 len: 24420 | ||
1804 | { skbaddr: ffff8800d2bcc200 } hitcount: 2 len: 12996 | ||
1805 | |||
1806 | Totals: | ||
1807 | Hits: 14 | ||
1808 | Entries: 12 | ||
1809 | Dropped: 0 | ||
1810 | |||
1811 | |||
1812 | # event histogram | ||
1813 | # | ||
1814 | # trigger info: hist:keys=skbaddr.hex:vals=hitcount,len:sort=hitcount:size=2048 if len < 0 [active] | ||
1815 | # | ||
1816 | |||
1817 | |||
1818 | Totals: | ||
1819 | Hits: 0 | ||
1820 | Entries: 0 | ||
1821 | Dropped: 0 | ||
1822 | |||
1823 | Named triggers can be used to have triggers share a common set of | ||
1824 | histogram data. This capability is mostly useful for combining the | ||
1825 | output of events generated by tracepoints contained inside inline | ||
1826 | functions, but names can be used in a hist trigger on any event. | ||
1827 | For example, these two triggers when hit will update the same 'len' | ||
1828 | field in the shared 'foo' histogram data:: | ||
1829 | |||
1830 | # echo 'hist:name=foo:keys=skbaddr.hex:vals=len' > \ | ||
1831 | /sys/kernel/debug/tracing/events/net/netif_receive_skb/trigger | ||
1832 | # echo 'hist:name=foo:keys=skbaddr.hex:vals=len' > \ | ||
1833 | /sys/kernel/debug/tracing/events/net/netif_rx/trigger | ||
1834 | |||
1835 | You can see that they're updating common histogram data by reading | ||
1836 | each event's hist files at the same time:: | ||
1837 | |||
1838 | # cat /sys/kernel/debug/tracing/events/net/netif_receive_skb/hist; | ||
1839 | cat /sys/kernel/debug/tracing/events/net/netif_rx/hist | ||
1840 | |||
1841 | # event histogram | ||
1842 | # | ||
1843 | # trigger info: hist:name=foo:keys=skbaddr.hex:vals=hitcount,len:sort=hitcount:size=2048 [active] | ||
1844 | # | ||
1845 | |||
1846 | { skbaddr: ffff88000ad53500 } hitcount: 1 len: 46 | ||
1847 | { skbaddr: ffff8800af5a1500 } hitcount: 1 len: 76 | ||
1848 | { skbaddr: ffff8800d62a1900 } hitcount: 1 len: 46 | ||
1849 | { skbaddr: ffff8800d2bccb00 } hitcount: 1 len: 468 | ||
1850 | { skbaddr: ffff8800d3c69900 } hitcount: 1 len: 46 | ||
1851 | { skbaddr: ffff88009ff09100 } hitcount: 1 len: 52 | ||
1852 | { skbaddr: ffff88010f13ab00 } hitcount: 1 len: 168 | ||
1853 | { skbaddr: ffff88006a54f400 } hitcount: 1 len: 46 | ||
1854 | { skbaddr: ffff8800d2bcc500 } hitcount: 1 len: 260 | ||
1855 | { skbaddr: ffff880064505000 } hitcount: 1 len: 46 | ||
1856 | { skbaddr: ffff8800baf24e00 } hitcount: 1 len: 32 | ||
1857 | { skbaddr: ffff88009fe0ad00 } hitcount: 1 len: 46 | ||
1858 | { skbaddr: ffff8800d3edff00 } hitcount: 1 len: 44 | ||
1859 | { skbaddr: ffff88009fe0b400 } hitcount: 1 len: 168 | ||
1860 | { skbaddr: ffff8800a1c55a00 } hitcount: 1 len: 40 | ||
1861 | { skbaddr: ffff8800d2bcd100 } hitcount: 1 len: 40 | ||
1862 | { skbaddr: ffff880064505f00 } hitcount: 1 len: 174 | ||
1863 | { skbaddr: ffff8800a8bff200 } hitcount: 1 len: 160 | ||
1864 | { skbaddr: ffff880044e3cc00 } hitcount: 1 len: 76 | ||
1865 | { skbaddr: ffff8800a8bfe700 } hitcount: 1 len: 46 | ||
1866 | { skbaddr: ffff8800d2bcdc00 } hitcount: 1 len: 32 | ||
1867 | { skbaddr: ffff8800a1f64800 } hitcount: 1 len: 46 | ||
1868 | { skbaddr: ffff8800d2bcde00 } hitcount: 1 len: 988 | ||
1869 | { skbaddr: ffff88006a5dea00 } hitcount: 1 len: 46 | ||
1870 | { skbaddr: ffff88002e37a200 } hitcount: 1 len: 44 | ||
1871 | { skbaddr: ffff8800a1f32c00 } hitcount: 2 len: 676 | ||
1872 | { skbaddr: ffff88000ad52600 } hitcount: 2 len: 107 | ||
1873 | { skbaddr: ffff8800a1f91e00 } hitcount: 2 len: 92 | ||
1874 | { skbaddr: ffff8800af5a0200 } hitcount: 2 len: 142 | ||
1875 | { skbaddr: ffff8800d2bcc600 } hitcount: 2 len: 220 | ||
1876 | { skbaddr: ffff8800ba36f500 } hitcount: 2 len: 92 | ||
1877 | { skbaddr: ffff8800d021f800 } hitcount: 2 len: 92 | ||
1878 | { skbaddr: ffff8800a1f33600 } hitcount: 2 len: 675 | ||
1879 | { skbaddr: ffff8800a8bfff00 } hitcount: 3 len: 138 | ||
1880 | { skbaddr: ffff8800d62a1300 } hitcount: 3 len: 138 | ||
1881 | { skbaddr: ffff88002e37a100 } hitcount: 4 len: 184 | ||
1882 | { skbaddr: ffff880064504400 } hitcount: 4 len: 184 | ||
1883 | { skbaddr: ffff8800a8bfec00 } hitcount: 4 len: 184 | ||
1884 | { skbaddr: ffff88000ad53700 } hitcount: 5 len: 230 | ||
1885 | { skbaddr: ffff8800d2bcdb00 } hitcount: 5 len: 196 | ||
1886 | { skbaddr: ffff8800a1f90000 } hitcount: 6 len: 276 | ||
1887 | { skbaddr: ffff88006a54f900 } hitcount: 6 len: 276 | ||
1888 | |||
1889 | Totals: | ||
1890 | Hits: 81 | ||
1891 | Entries: 42 | ||
1892 | Dropped: 0 | ||
1893 | # event histogram | ||
1894 | # | ||
1895 | # trigger info: hist:name=foo:keys=skbaddr.hex:vals=hitcount,len:sort=hitcount:size=2048 [active] | ||
1896 | # | ||
1897 | |||
1898 | { skbaddr: ffff88000ad53500 } hitcount: 1 len: 46 | ||
1899 | { skbaddr: ffff8800af5a1500 } hitcount: 1 len: 76 | ||
1900 | { skbaddr: ffff8800d62a1900 } hitcount: 1 len: 46 | ||
1901 | { skbaddr: ffff8800d2bccb00 } hitcount: 1 len: 468 | ||
1902 | { skbaddr: ffff8800d3c69900 } hitcount: 1 len: 46 | ||
1903 | { skbaddr: ffff88009ff09100 } hitcount: 1 len: 52 | ||
1904 | { skbaddr: ffff88010f13ab00 } hitcount: 1 len: 168 | ||
1905 | { skbaddr: ffff88006a54f400 } hitcount: 1 len: 46 | ||
1906 | { skbaddr: ffff8800d2bcc500 } hitcount: 1 len: 260 | ||
1907 | { skbaddr: ffff880064505000 } hitcount: 1 len: 46 | ||
1908 | { skbaddr: ffff8800baf24e00 } hitcount: 1 len: 32 | ||
1909 | { skbaddr: ffff88009fe0ad00 } hitcount: 1 len: 46 | ||
1910 | { skbaddr: ffff8800d3edff00 } hitcount: 1 len: 44 | ||
1911 | { skbaddr: ffff88009fe0b400 } hitcount: 1 len: 168 | ||
1912 | { skbaddr: ffff8800a1c55a00 } hitcount: 1 len: 40 | ||
1913 | { skbaddr: ffff8800d2bcd100 } hitcount: 1 len: 40 | ||
1914 | { skbaddr: ffff880064505f00 } hitcount: 1 len: 174 | ||
1915 | { skbaddr: ffff8800a8bff200 } hitcount: 1 len: 160 | ||
1916 | { skbaddr: ffff880044e3cc00 } hitcount: 1 len: 76 | ||
1917 | { skbaddr: ffff8800a8bfe700 } hitcount: 1 len: 46 | ||
1918 | { skbaddr: ffff8800d2bcdc00 } hitcount: 1 len: 32 | ||
1919 | { skbaddr: ffff8800a1f64800 } hitcount: 1 len: 46 | ||
1920 | { skbaddr: ffff8800d2bcde00 } hitcount: 1 len: 988 | ||
1921 | { skbaddr: ffff88006a5dea00 } hitcount: 1 len: 46 | ||
1922 | { skbaddr: ffff88002e37a200 } hitcount: 1 len: 44 | ||
1923 | { skbaddr: ffff8800a1f32c00 } hitcount: 2 len: 676 | ||
1924 | { skbaddr: ffff88000ad52600 } hitcount: 2 len: 107 | ||
1925 | { skbaddr: ffff8800a1f91e00 } hitcount: 2 len: 92 | ||
1926 | { skbaddr: ffff8800af5a0200 } hitcount: 2 len: 142 | ||
1927 | { skbaddr: ffff8800d2bcc600 } hitcount: 2 len: 220 | ||
1928 | { skbaddr: ffff8800ba36f500 } hitcount: 2 len: 92 | ||
1929 | { skbaddr: ffff8800d021f800 } hitcount: 2 len: 92 | ||
1930 | { skbaddr: ffff8800a1f33600 } hitcount: 2 len: 675 | ||
1931 | { skbaddr: ffff8800a8bfff00 } hitcount: 3 len: 138 | ||
1932 | { skbaddr: ffff8800d62a1300 } hitcount: 3 len: 138 | ||
1933 | { skbaddr: ffff88002e37a100 } hitcount: 4 len: 184 | ||
1934 | { skbaddr: ffff880064504400 } hitcount: 4 len: 184 | ||
1935 | { skbaddr: ffff8800a8bfec00 } hitcount: 4 len: 184 | ||
1936 | { skbaddr: ffff88000ad53700 } hitcount: 5 len: 230 | ||
1937 | { skbaddr: ffff8800d2bcdb00 } hitcount: 5 len: 196 | ||
1938 | { skbaddr: ffff8800a1f90000 } hitcount: 6 len: 276 | ||
1939 | { skbaddr: ffff88006a54f900 } hitcount: 6 len: 276 | ||
1940 | |||
1941 | Totals: | ||
1942 | Hits: 81 | ||
1943 | Entries: 42 | ||
1944 | Dropped: 0 | ||
1945 | |||
1946 | And here's an example that shows how to combine histogram data from | ||
1947 | any two events even if they don't share any 'compatible' fields | ||
1948 | other than 'hitcount' and 'stacktrace'. These commands create a | ||
1949 | couple of triggers named 'bar' using those fields:: | ||
1950 | |||
1951 | # echo 'hist:name=bar:key=stacktrace:val=hitcount' > \ | ||
1952 | /sys/kernel/debug/tracing/events/sched/sched_process_fork/trigger | ||
1953 | # echo 'hist:name=bar:key=stacktrace:val=hitcount' > \ | ||
1954 | /sys/kernel/debug/tracing/events/net/netif_rx/trigger | ||
1955 | |||
1956 | And displaying the output of either shows some interesting if | ||
1957 | somewhat confusing output:: | ||
1958 | |||
1959 | # cat /sys/kernel/debug/tracing/events/sched/sched_process_fork/hist | ||
1960 | # cat /sys/kernel/debug/tracing/events/net/netif_rx/hist | ||
1961 | |||
1962 | # event histogram | ||
1963 | # | ||
1964 | # trigger info: hist:name=bar:keys=stacktrace:vals=hitcount:sort=hitcount:size=2048 [active] | ||
1965 | # | ||
1966 | |||
1967 | { stacktrace: | ||
1968 | _do_fork+0x18e/0x330 | ||
1969 | kernel_thread+0x29/0x30 | ||
1970 | kthreadd+0x154/0x1b0 | ||
1971 | ret_from_fork+0x3f/0x70 | ||
1972 | } hitcount: 1 | ||
1973 | { stacktrace: | ||
1974 | netif_rx_internal+0xb2/0xd0 | ||
1975 | netif_rx_ni+0x20/0x70 | ||
1976 | dev_loopback_xmit+0xaa/0xd0 | ||
1977 | ip_mc_output+0x126/0x240 | ||
1978 | ip_local_out_sk+0x31/0x40 | ||
1979 | igmp_send_report+0x1e9/0x230 | ||
1980 | igmp_timer_expire+0xe9/0x120 | ||
1981 | call_timer_fn+0x39/0xf0 | ||
1982 | run_timer_softirq+0x1e1/0x290 | ||
1983 | __do_softirq+0xfd/0x290 | ||
1984 | irq_exit+0x98/0xb0 | ||
1985 | smp_apic_timer_interrupt+0x4a/0x60 | ||
1986 | apic_timer_interrupt+0x6d/0x80 | ||
1987 | cpuidle_enter+0x17/0x20 | ||
1988 | call_cpuidle+0x3b/0x60 | ||
1989 | cpu_startup_entry+0x22d/0x310 | ||
1990 | } hitcount: 1 | ||
1991 | { stacktrace: | ||
1992 | netif_rx_internal+0xb2/0xd0 | ||
1993 | netif_rx_ni+0x20/0x70 | ||
1994 | dev_loopback_xmit+0xaa/0xd0 | ||
1995 | ip_mc_output+0x17f/0x240 | ||
1996 | ip_local_out_sk+0x31/0x40 | ||
1997 | ip_send_skb+0x1a/0x50 | ||
1998 | udp_send_skb+0x13e/0x270 | ||
1999 | udp_sendmsg+0x2bf/0x980 | ||
2000 | inet_sendmsg+0x67/0xa0 | ||
2001 | sock_sendmsg+0x38/0x50 | ||
2002 | SYSC_sendto+0xef/0x170 | ||
2003 | SyS_sendto+0xe/0x10 | ||
2004 | entry_SYSCALL_64_fastpath+0x12/0x6a | ||
2005 | } hitcount: 2 | ||
2006 | { stacktrace: | ||
2007 | netif_rx_internal+0xb2/0xd0 | ||
2008 | netif_rx+0x1c/0x60 | ||
2009 | loopback_xmit+0x6c/0xb0 | ||
2010 | dev_hard_start_xmit+0x219/0x3a0 | ||
2011 | __dev_queue_xmit+0x415/0x4f0 | ||
2012 | dev_queue_xmit_sk+0x13/0x20 | ||
2013 | ip_finish_output2+0x237/0x340 | ||
2014 | ip_finish_output+0x113/0x1d0 | ||
2015 | ip_output+0x66/0xc0 | ||
2016 | ip_local_out_sk+0x31/0x40 | ||
2017 | ip_send_skb+0x1a/0x50 | ||
2018 | udp_send_skb+0x16d/0x270 | ||
2019 | udp_sendmsg+0x2bf/0x980 | ||
2020 | inet_sendmsg+0x67/0xa0 | ||
2021 | sock_sendmsg+0x38/0x50 | ||
2022 | ___sys_sendmsg+0x14e/0x270 | ||
2023 | } hitcount: 76 | ||
2024 | { stacktrace: | ||
2025 | netif_rx_internal+0xb2/0xd0 | ||
2026 | netif_rx+0x1c/0x60 | ||
2027 | loopback_xmit+0x6c/0xb0 | ||
2028 | dev_hard_start_xmit+0x219/0x3a0 | ||
2029 | __dev_queue_xmit+0x415/0x4f0 | ||
2030 | dev_queue_xmit_sk+0x13/0x20 | ||
2031 | ip_finish_output2+0x237/0x340 | ||
2032 | ip_finish_output+0x113/0x1d0 | ||
2033 | ip_output+0x66/0xc0 | ||
2034 | ip_local_out_sk+0x31/0x40 | ||
2035 | ip_send_skb+0x1a/0x50 | ||
2036 | udp_send_skb+0x16d/0x270 | ||
2037 | udp_sendmsg+0x2bf/0x980 | ||
2038 | inet_sendmsg+0x67/0xa0 | ||
2039 | sock_sendmsg+0x38/0x50 | ||
2040 | ___sys_sendmsg+0x269/0x270 | ||
2041 | } hitcount: 77 | ||
2042 | { stacktrace: | ||
2043 | netif_rx_internal+0xb2/0xd0 | ||
2044 | netif_rx+0x1c/0x60 | ||
2045 | loopback_xmit+0x6c/0xb0 | ||
2046 | dev_hard_start_xmit+0x219/0x3a0 | ||
2047 | __dev_queue_xmit+0x415/0x4f0 | ||
2048 | dev_queue_xmit_sk+0x13/0x20 | ||
2049 | ip_finish_output2+0x237/0x340 | ||
2050 | ip_finish_output+0x113/0x1d0 | ||
2051 | ip_output+0x66/0xc0 | ||
2052 | ip_local_out_sk+0x31/0x40 | ||
2053 | ip_send_skb+0x1a/0x50 | ||
2054 | udp_send_skb+0x16d/0x270 | ||
2055 | udp_sendmsg+0x2bf/0x980 | ||
2056 | inet_sendmsg+0x67/0xa0 | ||
2057 | sock_sendmsg+0x38/0x50 | ||
2058 | SYSC_sendto+0xef/0x170 | ||
2059 | } hitcount: 88 | ||
2060 | { stacktrace: | ||
2061 | _do_fork+0x18e/0x330 | ||
2062 | SyS_clone+0x19/0x20 | ||
2063 | entry_SYSCALL_64_fastpath+0x12/0x6a | ||
2064 | } hitcount: 244 | ||
2065 | |||
2066 | Totals: | ||
2067 | Hits: 489 | ||
2068 | Entries: 7 | ||
2069 | Dropped: 0 | ||
diff --git a/Documentation/trace/ftrace.rst b/Documentation/trace/ftrace.rst index fdf5fb54a04c..e45f0786f3f9 100644 --- a/Documentation/trace/ftrace.rst +++ b/Documentation/trace/ftrace.rst | |||
@@ -543,6 +543,30 @@ of ftrace. Here is a list of some of the key files: | |||
543 | 543 | ||
544 | See events.txt for more information. | 544 | See events.txt for more information. |
545 | 545 | ||
546 | timestamp_mode: | ||
547 | |||
548 | Certain tracers may change the timestamp mode used when | ||
549 | logging trace events into the event buffer. Events with | ||
550 | different modes can coexist within a buffer but the mode in | ||
551 | effect when an event is logged determines which timestamp mode | ||
552 | is used for that event. The default timestamp mode is | ||
553 | 'delta'. | ||
554 | |||
555 | Usual timestamp modes for tracing: | ||
556 | |||
557 | # cat timestamp_mode | ||
558 | [delta] absolute | ||
559 | |||
560 | The timestamp mode with the square brackets around it is the | ||
561 | one in effect. | ||
562 | |||
563 | delta: Default timestamp mode - timestamp is a delta against | ||
564 | a per-buffer timestamp. | ||
565 | |||
566 | absolute: The timestamp is a full timestamp, not a delta | ||
567 | against some other value. As such it takes up more | ||
568 | space and is less efficient. | ||
569 | |||
546 | hwlat_detector: | 570 | hwlat_detector: |
547 | 571 | ||
548 | Directory for the Hardware Latency Detector. | 572 | Directory for the Hardware Latency Detector. |
diff --git a/Documentation/trace/histogram.txt b/Documentation/trace/histogram.txt new file mode 100644 index 000000000000..6e05510afc28 --- /dev/null +++ b/Documentation/trace/histogram.txt | |||
@@ -0,0 +1,1995 @@ | |||
1 | Event Histograms | ||
2 | |||
3 | Documentation written by Tom Zanussi | ||
4 | |||
5 | 1. Introduction | ||
6 | =============== | ||
7 | |||
8 | Histogram triggers are special event triggers that can be used to | ||
9 | aggregate trace event data into histograms. For information on | ||
10 | trace events and event triggers, see Documentation/trace/events.txt. | ||
11 | |||
12 | |||
13 | 2. Histogram Trigger Command | ||
14 | ============================ | ||
15 | |||
16 | A histogram trigger command is an event trigger command that | ||
17 | aggregates event hits into a hash table keyed on one or more trace | ||
18 | event format fields (or stacktrace) and a set of running totals | ||
19 | derived from one or more trace event format fields and/or event | ||
20 | counts (hitcount). | ||
21 | |||
22 | The format of a hist trigger is as follows: | ||
23 | |||
24 | hist:keys=<field1[,field2,...]>[:values=<field1[,field2,...]>] | ||
25 | [:sort=<field1[,field2,...]>][:size=#entries][:pause][:continue] | ||
26 | [:clear][:name=histname1] [if <filter>] | ||
27 | |||
28 | When a matching event is hit, an entry is added to a hash table | ||
29 | using the key(s) and value(s) named. Keys and values correspond to | ||
30 | fields in the event's format description. Values must correspond to | ||
31 | numeric fields - on an event hit, the value(s) will be added to a | ||
32 | sum kept for that field. The special string 'hitcount' can be used | ||
33 | in place of an explicit value field - this is simply a count of | ||
34 | event hits. If 'values' isn't specified, an implicit 'hitcount' | ||
35 | value will be automatically created and used as the only value. | ||
36 | Keys can be any field, or the special string 'stacktrace', which | ||
37 | will use the event's kernel stacktrace as the key. The keywords | ||
38 | 'keys' or 'key' can be used to specify keys, and the keywords | ||
39 | 'values', 'vals', or 'val' can be used to specify values. Compound | ||
40 | keys consisting of up to two fields can be specified by the 'keys' | ||
41 | keyword. Hashing a compound key produces a unique entry in the | ||
42 | table for each unique combination of component keys, and can be | ||
43 | useful for providing more fine-grained summaries of event data. | ||
44 | Additionally, sort keys consisting of up to two fields can be | ||
45 | specified by the 'sort' keyword. If more than one field is | ||
46 | specified, the result will be a 'sort within a sort': the first key | ||
47 | is taken to be the primary sort key and the second the secondary | ||
48 | key. If a hist trigger is given a name using the 'name' parameter, | ||
49 | its histogram data will be shared with other triggers of the same | ||
50 | name, and trigger hits will update this common data. Only triggers | ||
51 | with 'compatible' fields can be combined in this way; triggers are | ||
52 | 'compatible' if the fields named in the trigger share the same | ||
53 | number and type of fields and those fields also have the same names. | ||
54 | Note that any two events always share the compatible 'hitcount' and | ||
55 | 'stacktrace' fields and can therefore be combined using those | ||
56 | fields, however pointless that may be. | ||
57 | |||
58 | 'hist' triggers add a 'hist' file to each event's subdirectory. | ||
59 | Reading the 'hist' file for the event will dump the hash table in | ||
60 | its entirety to stdout. If there are multiple hist triggers | ||
61 | attached to an event, there will be a table for each trigger in the | ||
62 | output. The table displayed for a named trigger will be the same as | ||
63 | any other instance having the same name. Each printed hash table | ||
64 | entry is a simple list of the keys and values comprising the entry; | ||
65 | keys are printed first and are delineated by curly braces, and are | ||
66 | followed by the set of value fields for the entry. By default, | ||
67 | numeric fields are displayed as base-10 integers. This can be | ||
68 | modified by appending any of the following modifiers to the field | ||
69 | name: | ||
70 | |||
71 | .hex display a number as a hex value | ||
72 | .sym display an address as a symbol | ||
73 | .sym-offset display an address as a symbol and offset | ||
74 | .syscall display a syscall id as a system call name | ||
75 | .execname display a common_pid as a program name | ||
76 | .log2 display log2 value rather than raw number | ||
77 | .usecs display a common_timestamp in microseconds | ||
78 | |||
79 | Note that in general the semantics of a given field aren't | ||
80 | interpreted when applying a modifier to it, but there are some | ||
81 | restrictions to be aware of in this regard: | ||
82 | |||
83 | - only the 'hex' modifier can be used for values (because values | ||
84 | are essentially sums, and the other modifiers don't make sense | ||
85 | in that context). | ||
86 | - the 'execname' modifier can only be used on a 'common_pid'. The | ||
87 | reason for this is that the execname is simply the 'comm' value | ||
88 | saved for the 'current' process when an event was triggered, | ||
89 | which is the same as the common_pid value saved by the event | ||
90 | tracing code. Trying to apply that comm value to other pid | ||
91 | values wouldn't be correct, and typically events that care save | ||
92 | pid-specific comm fields in the event itself. | ||
93 | |||
94 | A typical usage scenario would be the following to enable a hist | ||
95 | trigger, read its current contents, and then turn it off: | ||
96 | |||
97 | # echo 'hist:keys=skbaddr.hex:vals=len' > \ | ||
98 | /sys/kernel/debug/tracing/events/net/netif_rx/trigger | ||
99 | |||
100 | # cat /sys/kernel/debug/tracing/events/net/netif_rx/hist | ||
101 | |||
102 | # echo '!hist:keys=skbaddr.hex:vals=len' > \ | ||
103 | /sys/kernel/debug/tracing/events/net/netif_rx/trigger | ||
104 | |||
105 | The trigger file itself can be read to show the details of the | ||
106 | currently attached hist trigger. This information is also displayed | ||
107 | at the top of the 'hist' file when read. | ||
108 | |||
109 | By default, the size of the hash table is 2048 entries. The 'size' | ||
110 | parameter can be used to specify more or fewer than that. The units | ||
111 | are in terms of hashtable entries - if a run uses more entries than | ||
112 | specified, the results will show the number of 'drops', the number | ||
113 | of hits that were ignored. The size should be a power of 2 between | ||
114 | 128 and 131072 (any non- power-of-2 number specified will be rounded | ||
115 | up). | ||
116 | |||
117 | The 'sort' parameter can be used to specify a value field to sort | ||
118 | on. The default if unspecified is 'hitcount' and the default sort | ||
119 | order is 'ascending'. To sort in the opposite direction, append | ||
120 | .descending' to the sort key. | ||
121 | |||
122 | The 'pause' parameter can be used to pause an existing hist trigger | ||
123 | or to start a hist trigger but not log any events until told to do | ||
124 | so. 'continue' or 'cont' can be used to start or restart a paused | ||
125 | hist trigger. | ||
126 | |||
127 | The 'clear' parameter will clear the contents of a running hist | ||
128 | trigger and leave its current paused/active state. | ||
129 | |||
130 | Note that the 'pause', 'cont', and 'clear' parameters should be | ||
131 | applied using 'append' shell operator ('>>') if applied to an | ||
132 | existing trigger, rather than via the '>' operator, which will cause | ||
133 | the trigger to be removed through truncation. | ||
134 | |||
135 | - enable_hist/disable_hist | ||
136 | |||
137 | The enable_hist and disable_hist triggers can be used to have one | ||
138 | event conditionally start and stop another event's already-attached | ||
139 | hist trigger. Any number of enable_hist and disable_hist triggers | ||
140 | can be attached to a given event, allowing that event to kick off | ||
141 | and stop aggregations on a host of other events. | ||
142 | |||
143 | The format is very similar to the enable/disable_event triggers: | ||
144 | |||
145 | enable_hist:<system>:<event>[:count] | ||
146 | disable_hist:<system>:<event>[:count] | ||
147 | |||
148 | Instead of enabling or disabling the tracing of the target event | ||
149 | into the trace buffer as the enable/disable_event triggers do, the | ||
150 | enable/disable_hist triggers enable or disable the aggregation of | ||
151 | the target event into a hash table. | ||
152 | |||
153 | A typical usage scenario for the enable_hist/disable_hist triggers | ||
154 | would be to first set up a paused hist trigger on some event, | ||
155 | followed by an enable_hist/disable_hist pair that turns the hist | ||
156 | aggregation on and off when conditions of interest are hit: | ||
157 | |||
158 | # echo 'hist:keys=skbaddr.hex:vals=len:pause' > \ | ||
159 | /sys/kernel/debug/tracing/events/net/netif_receive_skb/trigger | ||
160 | |||
161 | # echo 'enable_hist:net:netif_receive_skb if filename==/usr/bin/wget' > \ | ||
162 | /sys/kernel/debug/tracing/events/sched/sched_process_exec/trigger | ||
163 | |||
164 | # echo 'disable_hist:net:netif_receive_skb if comm==wget' > \ | ||
165 | /sys/kernel/debug/tracing/events/sched/sched_process_exit/trigger | ||
166 | |||
167 | The above sets up an initially paused hist trigger which is unpaused | ||
168 | and starts aggregating events when a given program is executed, and | ||
169 | which stops aggregating when the process exits and the hist trigger | ||
170 | is paused again. | ||
171 | |||
172 | The examples below provide a more concrete illustration of the | ||
173 | concepts and typical usage patterns discussed above. | ||
174 | |||
175 | 'special' event fields | ||
176 | ------------------------ | ||
177 | |||
178 | There are a number of 'special event fields' available for use as | ||
179 | keys or values in a hist trigger. These look like and behave as if | ||
180 | they were actual event fields, but aren't really part of the event's | ||
181 | field definition or format file. They are however available for any | ||
182 | event, and can be used anywhere an actual event field could be. | ||
183 | They are: | ||
184 | |||
185 | common_timestamp u64 - timestamp (from ring buffer) associated | ||
186 | with the event, in nanoseconds. May be | ||
187 | modified by .usecs to have timestamps | ||
188 | interpreted as microseconds. | ||
189 | cpu int - the cpu on which the event occurred. | ||
190 | |||
191 | Extended error information | ||
192 | -------------------------- | ||
193 | |||
194 | For some error conditions encountered when invoking a hist trigger | ||
195 | command, extended error information is available via the | ||
196 | corresponding event's 'hist' file. Reading the hist file after an | ||
197 | error will display more detailed information about what went wrong, | ||
198 | if information is available. This extended error information will | ||
199 | be available until the next hist trigger command for that event. | ||
200 | |||
201 | If available for a given error condition, the extended error | ||
202 | information and usage takes the following form: | ||
203 | |||
204 | # echo xxx > /sys/kernel/debug/tracing/events/sched/sched_wakeup/trigger | ||
205 | echo: write error: Invalid argument | ||
206 | |||
207 | # cat /sys/kernel/debug/tracing/events/sched/sched_wakeup/hist | ||
208 | ERROR: Couldn't yyy: zzz | ||
209 | Last command: xxx | ||
210 | |||
211 | 6.2 'hist' trigger examples | ||
212 | --------------------------- | ||
213 | |||
214 | The first set of examples creates aggregations using the kmalloc | ||
215 | event. The fields that can be used for the hist trigger are listed | ||
216 | in the kmalloc event's format file: | ||
217 | |||
218 | # cat /sys/kernel/debug/tracing/events/kmem/kmalloc/format | ||
219 | name: kmalloc | ||
220 | ID: 374 | ||
221 | format: | ||
222 | field:unsigned short common_type; offset:0; size:2; signed:0; | ||
223 | field:unsigned char common_flags; offset:2; size:1; signed:0; | ||
224 | field:unsigned char common_preempt_count; offset:3; size:1; signed:0; | ||
225 | field:int common_pid; offset:4; size:4; signed:1; | ||
226 | |||
227 | field:unsigned long call_site; offset:8; size:8; signed:0; | ||
228 | field:const void * ptr; offset:16; size:8; signed:0; | ||
229 | field:size_t bytes_req; offset:24; size:8; signed:0; | ||
230 | field:size_t bytes_alloc; offset:32; size:8; signed:0; | ||
231 | field:gfp_t gfp_flags; offset:40; size:4; signed:0; | ||
232 | |||
233 | We'll start by creating a hist trigger that generates a simple table | ||
234 | that lists the total number of bytes requested for each function in | ||
235 | the kernel that made one or more calls to kmalloc: | ||
236 | |||
237 | # echo 'hist:key=call_site:val=bytes_req' > \ | ||
238 | /sys/kernel/debug/tracing/events/kmem/kmalloc/trigger | ||
239 | |||
240 | This tells the tracing system to create a 'hist' trigger using the | ||
241 | call_site field of the kmalloc event as the key for the table, which | ||
242 | just means that each unique call_site address will have an entry | ||
243 | created for it in the table. The 'val=bytes_req' parameter tells | ||
244 | the hist trigger that for each unique entry (call_site) in the | ||
245 | table, it should keep a running total of the number of bytes | ||
246 | requested by that call_site. | ||
247 | |||
248 | We'll let it run for awhile and then dump the contents of the 'hist' | ||
249 | file in the kmalloc event's subdirectory (for readability, a number | ||
250 | of entries have been omitted): | ||
251 | |||
252 | # cat /sys/kernel/debug/tracing/events/kmem/kmalloc/hist | ||
253 | # trigger info: hist:keys=call_site:vals=bytes_req:sort=hitcount:size=2048 [active] | ||
254 | |||
255 | { call_site: 18446744072106379007 } hitcount: 1 bytes_req: 176 | ||
256 | { call_site: 18446744071579557049 } hitcount: 1 bytes_req: 1024 | ||
257 | { call_site: 18446744071580608289 } hitcount: 1 bytes_req: 16384 | ||
258 | { call_site: 18446744071581827654 } hitcount: 1 bytes_req: 24 | ||
259 | { call_site: 18446744071580700980 } hitcount: 1 bytes_req: 8 | ||
260 | { call_site: 18446744071579359876 } hitcount: 1 bytes_req: 152 | ||
261 | { call_site: 18446744071580795365 } hitcount: 3 bytes_req: 144 | ||
262 | { call_site: 18446744071581303129 } hitcount: 3 bytes_req: 144 | ||
263 | { call_site: 18446744071580713234 } hitcount: 4 bytes_req: 2560 | ||
264 | { call_site: 18446744071580933750 } hitcount: 4 bytes_req: 736 | ||
265 | . | ||
266 | . | ||
267 | . | ||
268 | { call_site: 18446744072106047046 } hitcount: 69 bytes_req: 5576 | ||
269 | { call_site: 18446744071582116407 } hitcount: 73 bytes_req: 2336 | ||
270 | { call_site: 18446744072106054684 } hitcount: 136 bytes_req: 140504 | ||
271 | { call_site: 18446744072106224230 } hitcount: 136 bytes_req: 19584 | ||
272 | { call_site: 18446744072106078074 } hitcount: 153 bytes_req: 2448 | ||
273 | { call_site: 18446744072106062406 } hitcount: 153 bytes_req: 36720 | ||
274 | { call_site: 18446744071582507929 } hitcount: 153 bytes_req: 37088 | ||
275 | { call_site: 18446744072102520590 } hitcount: 273 bytes_req: 10920 | ||
276 | { call_site: 18446744071582143559 } hitcount: 358 bytes_req: 716 | ||
277 | { call_site: 18446744072106465852 } hitcount: 417 bytes_req: 56712 | ||
278 | { call_site: 18446744072102523378 } hitcount: 485 bytes_req: 27160 | ||
279 | { call_site: 18446744072099568646 } hitcount: 1676 bytes_req: 33520 | ||
280 | |||
281 | Totals: | ||
282 | Hits: 4610 | ||
283 | Entries: 45 | ||
284 | Dropped: 0 | ||
285 | |||
286 | The output displays a line for each entry, beginning with the key | ||
287 | specified in the trigger, followed by the value(s) also specified in | ||
288 | the trigger. At the beginning of the output is a line that displays | ||
289 | the trigger info, which can also be displayed by reading the | ||
290 | 'trigger' file: | ||
291 | |||
292 | # cat /sys/kernel/debug/tracing/events/kmem/kmalloc/trigger | ||
293 | hist:keys=call_site:vals=bytes_req:sort=hitcount:size=2048 [active] | ||
294 | |||
295 | At the end of the output are a few lines that display the overall | ||
296 | totals for the run. The 'Hits' field shows the total number of | ||
297 | times the event trigger was hit, the 'Entries' field shows the total | ||
298 | number of used entries in the hash table, and the 'Dropped' field | ||
299 | shows the number of hits that were dropped because the number of | ||
300 | used entries for the run exceeded the maximum number of entries | ||
301 | allowed for the table (normally 0, but if not a hint that you may | ||
302 | want to increase the size of the table using the 'size' parameter). | ||
303 | |||
304 | Notice in the above output that there's an extra field, 'hitcount', | ||
305 | which wasn't specified in the trigger. Also notice that in the | ||
306 | trigger info output, there's a parameter, 'sort=hitcount', which | ||
307 | wasn't specified in the trigger either. The reason for that is that | ||
308 | every trigger implicitly keeps a count of the total number of hits | ||
309 | attributed to a given entry, called the 'hitcount'. That hitcount | ||
310 | information is explicitly displayed in the output, and in the | ||
311 | absence of a user-specified sort parameter, is used as the default | ||
312 | sort field. | ||
313 | |||
314 | The value 'hitcount' can be used in place of an explicit value in | ||
315 | the 'values' parameter if you don't really need to have any | ||
316 | particular field summed and are mainly interested in hit | ||
317 | frequencies. | ||
318 | |||
319 | To turn the hist trigger off, simply call up the trigger in the | ||
320 | command history and re-execute it with a '!' prepended: | ||
321 | |||
322 | # echo '!hist:key=call_site:val=bytes_req' > \ | ||
323 | /sys/kernel/debug/tracing/events/kmem/kmalloc/trigger | ||
324 | |||
325 | Finally, notice that the call_site as displayed in the output above | ||
326 | isn't really very useful. It's an address, but normally addresses | ||
327 | are displayed in hex. To have a numeric field displayed as a hex | ||
328 | value, simply append '.hex' to the field name in the trigger: | ||
329 | |||
330 | # echo 'hist:key=call_site.hex:val=bytes_req' > \ | ||
331 | /sys/kernel/debug/tracing/events/kmem/kmalloc/trigger | ||
332 | |||
333 | # cat /sys/kernel/debug/tracing/events/kmem/kmalloc/hist | ||
334 | # trigger info: hist:keys=call_site.hex:vals=bytes_req:sort=hitcount:size=2048 [active] | ||
335 | |||
336 | { call_site: ffffffffa026b291 } hitcount: 1 bytes_req: 433 | ||
337 | { call_site: ffffffffa07186ff } hitcount: 1 bytes_req: 176 | ||
338 | { call_site: ffffffff811ae721 } hitcount: 1 bytes_req: 16384 | ||
339 | { call_site: ffffffff811c5134 } hitcount: 1 bytes_req: 8 | ||
340 | { call_site: ffffffffa04a9ebb } hitcount: 1 bytes_req: 511 | ||
341 | { call_site: ffffffff8122e0a6 } hitcount: 1 bytes_req: 12 | ||
342 | { call_site: ffffffff8107da84 } hitcount: 1 bytes_req: 152 | ||
343 | { call_site: ffffffff812d8246 } hitcount: 1 bytes_req: 24 | ||
344 | { call_site: ffffffff811dc1e5 } hitcount: 3 bytes_req: 144 | ||
345 | { call_site: ffffffffa02515e8 } hitcount: 3 bytes_req: 648 | ||
346 | { call_site: ffffffff81258159 } hitcount: 3 bytes_req: 144 | ||
347 | { call_site: ffffffff811c80f4 } hitcount: 4 bytes_req: 544 | ||
348 | . | ||
349 | . | ||
350 | . | ||
351 | { call_site: ffffffffa06c7646 } hitcount: 106 bytes_req: 8024 | ||
352 | { call_site: ffffffffa06cb246 } hitcount: 132 bytes_req: 31680 | ||
353 | { call_site: ffffffffa06cef7a } hitcount: 132 bytes_req: 2112 | ||
354 | { call_site: ffffffff8137e399 } hitcount: 132 bytes_req: 23232 | ||
355 | { call_site: ffffffffa06c941c } hitcount: 185 bytes_req: 171360 | ||
356 | { call_site: ffffffffa06f2a66 } hitcount: 185 bytes_req: 26640 | ||
357 | { call_site: ffffffffa036a70e } hitcount: 265 bytes_req: 10600 | ||
358 | { call_site: ffffffff81325447 } hitcount: 292 bytes_req: 584 | ||
359 | { call_site: ffffffffa072da3c } hitcount: 446 bytes_req: 60656 | ||
360 | { call_site: ffffffffa036b1f2 } hitcount: 526 bytes_req: 29456 | ||
361 | { call_site: ffffffffa0099c06 } hitcount: 1780 bytes_req: 35600 | ||
362 | |||
363 | Totals: | ||
364 | Hits: 4775 | ||
365 | Entries: 46 | ||
366 | Dropped: 0 | ||
367 | |||
368 | Even that's only marginally more useful - while hex values do look | ||
369 | more like addresses, what users are typically more interested in | ||
370 | when looking at text addresses are the corresponding symbols | ||
371 | instead. To have an address displayed as symbolic value instead, | ||
372 | simply append '.sym' or '.sym-offset' to the field name in the | ||
373 | trigger: | ||
374 | |||
375 | # echo 'hist:key=call_site.sym:val=bytes_req' > \ | ||
376 | /sys/kernel/debug/tracing/events/kmem/kmalloc/trigger | ||
377 | |||
378 | # cat /sys/kernel/debug/tracing/events/kmem/kmalloc/hist | ||
379 | # trigger info: hist:keys=call_site.sym:vals=bytes_req:sort=hitcount:size=2048 [active] | ||
380 | |||
381 | { call_site: [ffffffff810adcb9] syslog_print_all } hitcount: 1 bytes_req: 1024 | ||
382 | { call_site: [ffffffff8154bc62] usb_control_msg } hitcount: 1 bytes_req: 8 | ||
383 | { call_site: [ffffffffa00bf6fe] hidraw_send_report [hid] } hitcount: 1 bytes_req: 7 | ||
384 | { call_site: [ffffffff8154acbe] usb_alloc_urb } hitcount: 1 bytes_req: 192 | ||
385 | { call_site: [ffffffffa00bf1ca] hidraw_report_event [hid] } hitcount: 1 bytes_req: 7 | ||
386 | { call_site: [ffffffff811e3a25] __seq_open_private } hitcount: 1 bytes_req: 40 | ||
387 | { call_site: [ffffffff8109524a] alloc_fair_sched_group } hitcount: 2 bytes_req: 128 | ||
388 | { call_site: [ffffffff811febd5] fsnotify_alloc_group } hitcount: 2 bytes_req: 528 | ||
389 | { call_site: [ffffffff81440f58] __tty_buffer_request_room } hitcount: 2 bytes_req: 2624 | ||
390 | { call_site: [ffffffff81200ba6] inotify_new_group } hitcount: 2 bytes_req: 96 | ||
391 | { call_site: [ffffffffa05e19af] ieee80211_start_tx_ba_session [mac80211] } hitcount: 2 bytes_req: 464 | ||
392 | { call_site: [ffffffff81672406] tcp_get_metrics } hitcount: 2 bytes_req: 304 | ||
393 | { call_site: [ffffffff81097ec2] alloc_rt_sched_group } hitcount: 2 bytes_req: 128 | ||
394 | { call_site: [ffffffff81089b05] sched_create_group } hitcount: 2 bytes_req: 1424 | ||
395 | . | ||
396 | . | ||
397 | . | ||
398 | { call_site: [ffffffffa04a580c] intel_crtc_page_flip [i915] } hitcount: 1185 bytes_req: 123240 | ||
399 | { call_site: [ffffffffa0287592] drm_mode_page_flip_ioctl [drm] } hitcount: 1185 bytes_req: 104280 | ||
400 | { call_site: [ffffffffa04c4a3c] intel_plane_duplicate_state [i915] } hitcount: 1402 bytes_req: 190672 | ||
401 | { call_site: [ffffffff812891ca] ext4_find_extent } hitcount: 1518 bytes_req: 146208 | ||
402 | { call_site: [ffffffffa029070e] drm_vma_node_allow [drm] } hitcount: 1746 bytes_req: 69840 | ||
403 | { call_site: [ffffffffa045e7c4] i915_gem_do_execbuffer.isra.23 [i915] } hitcount: 2021 bytes_req: 792312 | ||
404 | { call_site: [ffffffffa02911f2] drm_modeset_lock_crtc [drm] } hitcount: 2592 bytes_req: 145152 | ||
405 | { call_site: [ffffffffa0489a66] intel_ring_begin [i915] } hitcount: 2629 bytes_req: 378576 | ||
406 | { call_site: [ffffffffa046041c] i915_gem_execbuffer2 [i915] } hitcount: 2629 bytes_req: 3783248 | ||
407 | { call_site: [ffffffff81325607] apparmor_file_alloc_security } hitcount: 5192 bytes_req: 10384 | ||
408 | { call_site: [ffffffffa00b7c06] hid_report_raw_event [hid] } hitcount: 5529 bytes_req: 110584 | ||
409 | { call_site: [ffffffff8131ebf7] aa_alloc_task_context } hitcount: 21943 bytes_req: 702176 | ||
410 | { call_site: [ffffffff8125847d] ext4_htree_store_dirent } hitcount: 55759 bytes_req: 5074265 | ||
411 | |||
412 | Totals: | ||
413 | Hits: 109928 | ||
414 | Entries: 71 | ||
415 | Dropped: 0 | ||
416 | |||
417 | Because the default sort key above is 'hitcount', the above shows a | ||
418 | the list of call_sites by increasing hitcount, so that at the bottom | ||
419 | we see the functions that made the most kmalloc calls during the | ||
420 | run. If instead we we wanted to see the top kmalloc callers in | ||
421 | terms of the number of bytes requested rather than the number of | ||
422 | calls, and we wanted the top caller to appear at the top, we can use | ||
423 | the 'sort' parameter, along with the 'descending' modifier: | ||
424 | |||
425 | # echo 'hist:key=call_site.sym:val=bytes_req:sort=bytes_req.descending' > \ | ||
426 | /sys/kernel/debug/tracing/events/kmem/kmalloc/trigger | ||
427 | |||
428 | # cat /sys/kernel/debug/tracing/events/kmem/kmalloc/hist | ||
429 | # trigger info: hist:keys=call_site.sym:vals=bytes_req:sort=bytes_req.descending:size=2048 [active] | ||
430 | |||
431 | { call_site: [ffffffffa046041c] i915_gem_execbuffer2 [i915] } hitcount: 2186 bytes_req: 3397464 | ||
432 | { call_site: [ffffffffa045e7c4] i915_gem_do_execbuffer.isra.23 [i915] } hitcount: 1790 bytes_req: 712176 | ||
433 | { call_site: [ffffffff8125847d] ext4_htree_store_dirent } hitcount: 8132 bytes_req: 513135 | ||
434 | { call_site: [ffffffff811e2a1b] seq_buf_alloc } hitcount: 106 bytes_req: 440128 | ||
435 | { call_site: [ffffffffa0489a66] intel_ring_begin [i915] } hitcount: 2186 bytes_req: 314784 | ||
436 | { call_site: [ffffffff812891ca] ext4_find_extent } hitcount: 2174 bytes_req: 208992 | ||
437 | { call_site: [ffffffff811ae8e1] __kmalloc } hitcount: 8 bytes_req: 131072 | ||
438 | { call_site: [ffffffffa04c4a3c] intel_plane_duplicate_state [i915] } hitcount: 859 bytes_req: 116824 | ||
439 | { call_site: [ffffffffa02911f2] drm_modeset_lock_crtc [drm] } hitcount: 1834 bytes_req: 102704 | ||
440 | { call_site: [ffffffffa04a580c] intel_crtc_page_flip [i915] } hitcount: 972 bytes_req: 101088 | ||
441 | { call_site: [ffffffffa0287592] drm_mode_page_flip_ioctl [drm] } hitcount: 972 bytes_req: 85536 | ||
442 | { call_site: [ffffffffa00b7c06] hid_report_raw_event [hid] } hitcount: 3333 bytes_req: 66664 | ||
443 | { call_site: [ffffffff8137e559] sg_kmalloc } hitcount: 209 bytes_req: 61632 | ||
444 | . | ||
445 | . | ||
446 | . | ||
447 | { call_site: [ffffffff81095225] alloc_fair_sched_group } hitcount: 2 bytes_req: 128 | ||
448 | { call_site: [ffffffff81097ec2] alloc_rt_sched_group } hitcount: 2 bytes_req: 128 | ||
449 | { call_site: [ffffffff812d8406] copy_semundo } hitcount: 2 bytes_req: 48 | ||
450 | { call_site: [ffffffff81200ba6] inotify_new_group } hitcount: 1 bytes_req: 48 | ||
451 | { call_site: [ffffffffa027121a] drm_getmagic [drm] } hitcount: 1 bytes_req: 48 | ||
452 | { call_site: [ffffffff811e3a25] __seq_open_private } hitcount: 1 bytes_req: 40 | ||
453 | { call_site: [ffffffff811c52f4] bprm_change_interp } hitcount: 2 bytes_req: 16 | ||
454 | { call_site: [ffffffff8154bc62] usb_control_msg } hitcount: 1 bytes_req: 8 | ||
455 | { call_site: [ffffffffa00bf1ca] hidraw_report_event [hid] } hitcount: 1 bytes_req: 7 | ||
456 | { call_site: [ffffffffa00bf6fe] hidraw_send_report [hid] } hitcount: 1 bytes_req: 7 | ||
457 | |||
458 | Totals: | ||
459 | Hits: 32133 | ||
460 | Entries: 81 | ||
461 | Dropped: 0 | ||
462 | |||
463 | To display the offset and size information in addition to the symbol | ||
464 | name, just use 'sym-offset' instead: | ||
465 | |||
466 | # echo 'hist:key=call_site.sym-offset:val=bytes_req:sort=bytes_req.descending' > \ | ||
467 | /sys/kernel/debug/tracing/events/kmem/kmalloc/trigger | ||
468 | |||
469 | # cat /sys/kernel/debug/tracing/events/kmem/kmalloc/hist | ||
470 | # trigger info: hist:keys=call_site.sym-offset:vals=bytes_req:sort=bytes_req.descending:size=2048 [active] | ||
471 | |||
472 | { call_site: [ffffffffa046041c] i915_gem_execbuffer2+0x6c/0x2c0 [i915] } hitcount: 4569 bytes_req: 3163720 | ||
473 | { call_site: [ffffffffa0489a66] intel_ring_begin+0xc6/0x1f0 [i915] } hitcount: 4569 bytes_req: 657936 | ||
474 | { call_site: [ffffffffa045e7c4] i915_gem_do_execbuffer.isra.23+0x694/0x1020 [i915] } hitcount: 1519 bytes_req: 472936 | ||
475 | { call_site: [ffffffffa045e646] i915_gem_do_execbuffer.isra.23+0x516/0x1020 [i915] } hitcount: 3050 bytes_req: 211832 | ||
476 | { call_site: [ffffffff811e2a1b] seq_buf_alloc+0x1b/0x50 } hitcount: 34 bytes_req: 148384 | ||
477 | { call_site: [ffffffffa04a580c] intel_crtc_page_flip+0xbc/0x870 [i915] } hitcount: 1385 bytes_req: 144040 | ||
478 | { call_site: [ffffffff811ae8e1] __kmalloc+0x191/0x1b0 } hitcount: 8 bytes_req: 131072 | ||
479 | { call_site: [ffffffffa0287592] drm_mode_page_flip_ioctl+0x282/0x360 [drm] } hitcount: 1385 bytes_req: 121880 | ||
480 | { call_site: [ffffffffa02911f2] drm_modeset_lock_crtc+0x32/0x100 [drm] } hitcount: 1848 bytes_req: 103488 | ||
481 | { call_site: [ffffffffa04c4a3c] intel_plane_duplicate_state+0x2c/0xa0 [i915] } hitcount: 461 bytes_req: 62696 | ||
482 | { call_site: [ffffffffa029070e] drm_vma_node_allow+0x2e/0xd0 [drm] } hitcount: 1541 bytes_req: 61640 | ||
483 | { call_site: [ffffffff815f8d7b] sk_prot_alloc+0xcb/0x1b0 } hitcount: 57 bytes_req: 57456 | ||
484 | . | ||
485 | . | ||
486 | . | ||
487 | { call_site: [ffffffff8109524a] alloc_fair_sched_group+0x5a/0x1a0 } hitcount: 2 bytes_req: 128 | ||
488 | { call_site: [ffffffffa027b921] drm_vm_open_locked+0x31/0xa0 [drm] } hitcount: 3 bytes_req: 96 | ||
489 | { call_site: [ffffffff8122e266] proc_self_follow_link+0x76/0xb0 } hitcount: 8 bytes_req: 96 | ||
490 | { call_site: [ffffffff81213e80] load_elf_binary+0x240/0x1650 } hitcount: 3 bytes_req: 84 | ||
491 | { call_site: [ffffffff8154bc62] usb_control_msg+0x42/0x110 } hitcount: 1 bytes_req: 8 | ||
492 | { call_site: [ffffffffa00bf6fe] hidraw_send_report+0x7e/0x1a0 [hid] } hitcount: 1 bytes_req: 7 | ||
493 | { call_site: [ffffffffa00bf1ca] hidraw_report_event+0x8a/0x120 [hid] } hitcount: 1 bytes_req: 7 | ||
494 | |||
495 | Totals: | ||
496 | Hits: 26098 | ||
497 | Entries: 64 | ||
498 | Dropped: 0 | ||
499 | |||
500 | We can also add multiple fields to the 'values' parameter. For | ||
501 | example, we might want to see the total number of bytes allocated | ||
502 | alongside bytes requested, and display the result sorted by bytes | ||
503 | allocated in a descending order: | ||
504 | |||
505 | # echo 'hist:keys=call_site.sym:values=bytes_req,bytes_alloc:sort=bytes_alloc.descending' > \ | ||
506 | /sys/kernel/debug/tracing/events/kmem/kmalloc/trigger | ||
507 | |||
508 | # cat /sys/kernel/debug/tracing/events/kmem/kmalloc/hist | ||
509 | # trigger info: hist:keys=call_site.sym:vals=bytes_req,bytes_alloc:sort=bytes_alloc.descending:size=2048 [active] | ||
510 | |||
511 | { call_site: [ffffffffa046041c] i915_gem_execbuffer2 [i915] } hitcount: 7403 bytes_req: 4084360 bytes_alloc: 5958016 | ||
512 | { call_site: [ffffffff811e2a1b] seq_buf_alloc } hitcount: 541 bytes_req: 2213968 bytes_alloc: 2228224 | ||
513 | { call_site: [ffffffffa0489a66] intel_ring_begin [i915] } hitcount: 7404 bytes_req: 1066176 bytes_alloc: 1421568 | ||
514 | { call_site: [ffffffffa045e7c4] i915_gem_do_execbuffer.isra.23 [i915] } hitcount: 1565 bytes_req: 557368 bytes_alloc: 1037760 | ||
515 | { call_site: [ffffffff8125847d] ext4_htree_store_dirent } hitcount: 9557 bytes_req: 595778 bytes_alloc: 695744 | ||
516 | { call_site: [ffffffffa045e646] i915_gem_do_execbuffer.isra.23 [i915] } hitcount: 5839 bytes_req: 430680 bytes_alloc: 470400 | ||
517 | { call_site: [ffffffffa04c4a3c] intel_plane_duplicate_state [i915] } hitcount: 2388 bytes_req: 324768 bytes_alloc: 458496 | ||
518 | { call_site: [ffffffffa02911f2] drm_modeset_lock_crtc [drm] } hitcount: 3911 bytes_req: 219016 bytes_alloc: 250304 | ||
519 | { call_site: [ffffffff815f8d7b] sk_prot_alloc } hitcount: 235 bytes_req: 236880 bytes_alloc: 240640 | ||
520 | { call_site: [ffffffff8137e559] sg_kmalloc } hitcount: 557 bytes_req: 169024 bytes_alloc: 221760 | ||
521 | { call_site: [ffffffffa00b7c06] hid_report_raw_event [hid] } hitcount: 9378 bytes_req: 187548 bytes_alloc: 206312 | ||
522 | { call_site: [ffffffffa04a580c] intel_crtc_page_flip [i915] } hitcount: 1519 bytes_req: 157976 bytes_alloc: 194432 | ||
523 | . | ||
524 | . | ||
525 | . | ||
526 | { call_site: [ffffffff8109bd3b] sched_autogroup_create_attach } hitcount: 2 bytes_req: 144 bytes_alloc: 192 | ||
527 | { call_site: [ffffffff81097ee8] alloc_rt_sched_group } hitcount: 2 bytes_req: 128 bytes_alloc: 128 | ||
528 | { call_site: [ffffffff8109524a] alloc_fair_sched_group } hitcount: 2 bytes_req: 128 bytes_alloc: 128 | ||
529 | { call_site: [ffffffff81095225] alloc_fair_sched_group } hitcount: 2 bytes_req: 128 bytes_alloc: 128 | ||
530 | { call_site: [ffffffff81097ec2] alloc_rt_sched_group } hitcount: 2 bytes_req: 128 bytes_alloc: 128 | ||
531 | { call_site: [ffffffff81213e80] load_elf_binary } hitcount: 3 bytes_req: 84 bytes_alloc: 96 | ||
532 | { call_site: [ffffffff81079a2e] kthread_create_on_node } hitcount: 1 bytes_req: 56 bytes_alloc: 64 | ||
533 | { call_site: [ffffffffa00bf6fe] hidraw_send_report [hid] } hitcount: 1 bytes_req: 7 bytes_alloc: 8 | ||
534 | { call_site: [ffffffff8154bc62] usb_control_msg } hitcount: 1 bytes_req: 8 bytes_alloc: 8 | ||
535 | { call_site: [ffffffffa00bf1ca] hidraw_report_event [hid] } hitcount: 1 bytes_req: 7 bytes_alloc: 8 | ||
536 | |||
537 | Totals: | ||
538 | Hits: 66598 | ||
539 | Entries: 65 | ||
540 | Dropped: 0 | ||
541 | |||
542 | Finally, to finish off our kmalloc example, instead of simply having | ||
543 | the hist trigger display symbolic call_sites, we can have the hist | ||
544 | trigger additionally display the complete set of kernel stack traces | ||
545 | that led to each call_site. To do that, we simply use the special | ||
546 | value 'stacktrace' for the key parameter: | ||
547 | |||
548 | # echo 'hist:keys=stacktrace:values=bytes_req,bytes_alloc:sort=bytes_alloc' > \ | ||
549 | /sys/kernel/debug/tracing/events/kmem/kmalloc/trigger | ||
550 | |||
551 | The above trigger will use the kernel stack trace in effect when an | ||
552 | event is triggered as the key for the hash table. This allows the | ||
553 | enumeration of every kernel callpath that led up to a particular | ||
554 | event, along with a running total of any of the event fields for | ||
555 | that event. Here we tally bytes requested and bytes allocated for | ||
556 | every callpath in the system that led up to a kmalloc (in this case | ||
557 | every callpath to a kmalloc for a kernel compile): | ||
558 | |||
559 | # cat /sys/kernel/debug/tracing/events/kmem/kmalloc/hist | ||
560 | # trigger info: hist:keys=stacktrace:vals=bytes_req,bytes_alloc:sort=bytes_alloc:size=2048 [active] | ||
561 | |||
562 | { stacktrace: | ||
563 | __kmalloc_track_caller+0x10b/0x1a0 | ||
564 | kmemdup+0x20/0x50 | ||
565 | hidraw_report_event+0x8a/0x120 [hid] | ||
566 | hid_report_raw_event+0x3ea/0x440 [hid] | ||
567 | hid_input_report+0x112/0x190 [hid] | ||
568 | hid_irq_in+0xc2/0x260 [usbhid] | ||
569 | __usb_hcd_giveback_urb+0x72/0x120 | ||
570 | usb_giveback_urb_bh+0x9e/0xe0 | ||
571 | tasklet_hi_action+0xf8/0x100 | ||
572 | __do_softirq+0x114/0x2c0 | ||
573 | irq_exit+0xa5/0xb0 | ||
574 | do_IRQ+0x5a/0xf0 | ||
575 | ret_from_intr+0x0/0x30 | ||
576 | cpuidle_enter+0x17/0x20 | ||
577 | cpu_startup_entry+0x315/0x3e0 | ||
578 | rest_init+0x7c/0x80 | ||
579 | } hitcount: 3 bytes_req: 21 bytes_alloc: 24 | ||
580 | { stacktrace: | ||
581 | __kmalloc_track_caller+0x10b/0x1a0 | ||
582 | kmemdup+0x20/0x50 | ||
583 | hidraw_report_event+0x8a/0x120 [hid] | ||
584 | hid_report_raw_event+0x3ea/0x440 [hid] | ||
585 | hid_input_report+0x112/0x190 [hid] | ||
586 | hid_irq_in+0xc2/0x260 [usbhid] | ||
587 | __usb_hcd_giveback_urb+0x72/0x120 | ||
588 | usb_giveback_urb_bh+0x9e/0xe0 | ||
589 | tasklet_hi_action+0xf8/0x100 | ||
590 | __do_softirq+0x114/0x2c0 | ||
591 | irq_exit+0xa5/0xb0 | ||
592 | do_IRQ+0x5a/0xf0 | ||
593 | ret_from_intr+0x0/0x30 | ||
594 | } hitcount: 3 bytes_req: 21 bytes_alloc: 24 | ||
595 | { stacktrace: | ||
596 | kmem_cache_alloc_trace+0xeb/0x150 | ||
597 | aa_alloc_task_context+0x27/0x40 | ||
598 | apparmor_cred_prepare+0x1f/0x50 | ||
599 | security_prepare_creds+0x16/0x20 | ||
600 | prepare_creds+0xdf/0x1a0 | ||
601 | SyS_capset+0xb5/0x200 | ||
602 | system_call_fastpath+0x12/0x6a | ||
603 | } hitcount: 1 bytes_req: 32 bytes_alloc: 32 | ||
604 | . | ||
605 | . | ||
606 | . | ||
607 | { stacktrace: | ||
608 | __kmalloc+0x11b/0x1b0 | ||
609 | i915_gem_execbuffer2+0x6c/0x2c0 [i915] | ||
610 | drm_ioctl+0x349/0x670 [drm] | ||
611 | do_vfs_ioctl+0x2f0/0x4f0 | ||
612 | SyS_ioctl+0x81/0xa0 | ||
613 | system_call_fastpath+0x12/0x6a | ||
614 | } hitcount: 17726 bytes_req: 13944120 bytes_alloc: 19593808 | ||
615 | { stacktrace: | ||
616 | __kmalloc+0x11b/0x1b0 | ||
617 | load_elf_phdrs+0x76/0xa0 | ||
618 | load_elf_binary+0x102/0x1650 | ||
619 | search_binary_handler+0x97/0x1d0 | ||
620 | do_execveat_common.isra.34+0x551/0x6e0 | ||
621 | SyS_execve+0x3a/0x50 | ||
622 | return_from_execve+0x0/0x23 | ||
623 | } hitcount: 33348 bytes_req: 17152128 bytes_alloc: 20226048 | ||
624 | { stacktrace: | ||
625 | kmem_cache_alloc_trace+0xeb/0x150 | ||
626 | apparmor_file_alloc_security+0x27/0x40 | ||
627 | security_file_alloc+0x16/0x20 | ||
628 | get_empty_filp+0x93/0x1c0 | ||
629 | path_openat+0x31/0x5f0 | ||
630 | do_filp_open+0x3a/0x90 | ||
631 | do_sys_open+0x128/0x220 | ||
632 | SyS_open+0x1e/0x20 | ||
633 | system_call_fastpath+0x12/0x6a | ||
634 | } hitcount: 4766422 bytes_req: 9532844 bytes_alloc: 38131376 | ||
635 | { stacktrace: | ||
636 | __kmalloc+0x11b/0x1b0 | ||
637 | seq_buf_alloc+0x1b/0x50 | ||
638 | seq_read+0x2cc/0x370 | ||
639 | proc_reg_read+0x3d/0x80 | ||
640 | __vfs_read+0x28/0xe0 | ||
641 | vfs_read+0x86/0x140 | ||
642 | SyS_read+0x46/0xb0 | ||
643 | system_call_fastpath+0x12/0x6a | ||
644 | } hitcount: 19133 bytes_req: 78368768 bytes_alloc: 78368768 | ||
645 | |||
646 | Totals: | ||
647 | Hits: 6085872 | ||
648 | Entries: 253 | ||
649 | Dropped: 0 | ||
650 | |||
651 | If you key a hist trigger on common_pid, in order for example to | ||
652 | gather and display sorted totals for each process, you can use the | ||
653 | special .execname modifier to display the executable names for the | ||
654 | processes in the table rather than raw pids. The example below | ||
655 | keeps a per-process sum of total bytes read: | ||
656 | |||
657 | # echo 'hist:key=common_pid.execname:val=count:sort=count.descending' > \ | ||
658 | /sys/kernel/debug/tracing/events/syscalls/sys_enter_read/trigger | ||
659 | |||
660 | # cat /sys/kernel/debug/tracing/events/syscalls/sys_enter_read/hist | ||
661 | # trigger info: hist:keys=common_pid.execname:vals=count:sort=count.descending:size=2048 [active] | ||
662 | |||
663 | { common_pid: gnome-terminal [ 3196] } hitcount: 280 count: 1093512 | ||
664 | { common_pid: Xorg [ 1309] } hitcount: 525 count: 256640 | ||
665 | { common_pid: compiz [ 2889] } hitcount: 59 count: 254400 | ||
666 | { common_pid: bash [ 8710] } hitcount: 3 count: 66369 | ||
667 | { common_pid: dbus-daemon-lau [ 8703] } hitcount: 49 count: 47739 | ||
668 | { common_pid: irqbalance [ 1252] } hitcount: 27 count: 27648 | ||
669 | { common_pid: 01ifupdown [ 8705] } hitcount: 3 count: 17216 | ||
670 | { common_pid: dbus-daemon [ 772] } hitcount: 10 count: 12396 | ||
671 | { common_pid: Socket Thread [ 8342] } hitcount: 11 count: 11264 | ||
672 | { common_pid: nm-dhcp-client. [ 8701] } hitcount: 6 count: 7424 | ||
673 | { common_pid: gmain [ 1315] } hitcount: 18 count: 6336 | ||
674 | . | ||
675 | . | ||
676 | . | ||
677 | { common_pid: postgres [ 1892] } hitcount: 2 count: 32 | ||
678 | { common_pid: postgres [ 1891] } hitcount: 2 count: 32 | ||
679 | { common_pid: gmain [ 8704] } hitcount: 2 count: 32 | ||
680 | { common_pid: upstart-dbus-br [ 2740] } hitcount: 21 count: 21 | ||
681 | { common_pid: nm-dispatcher.a [ 8696] } hitcount: 1 count: 16 | ||
682 | { common_pid: indicator-datet [ 2904] } hitcount: 1 count: 16 | ||
683 | { common_pid: gdbus [ 2998] } hitcount: 1 count: 16 | ||
684 | { common_pid: rtkit-daemon [ 2052] } hitcount: 1 count: 8 | ||
685 | { common_pid: init [ 1] } hitcount: 2 count: 2 | ||
686 | |||
687 | Totals: | ||
688 | Hits: 2116 | ||
689 | Entries: 51 | ||
690 | Dropped: 0 | ||
691 | |||
692 | Similarly, if you key a hist trigger on syscall id, for example to | ||
693 | gather and display a list of systemwide syscall hits, you can use | ||
694 | the special .syscall modifier to display the syscall names rather | ||
695 | than raw ids. The example below keeps a running total of syscall | ||
696 | counts for the system during the run: | ||
697 | |||
698 | # echo 'hist:key=id.syscall:val=hitcount' > \ | ||
699 | /sys/kernel/debug/tracing/events/raw_syscalls/sys_enter/trigger | ||
700 | |||
701 | # cat /sys/kernel/debug/tracing/events/raw_syscalls/sys_enter/hist | ||
702 | # trigger info: hist:keys=id.syscall:vals=hitcount:sort=hitcount:size=2048 [active] | ||
703 | |||
704 | { id: sys_fsync [ 74] } hitcount: 1 | ||
705 | { id: sys_newuname [ 63] } hitcount: 1 | ||
706 | { id: sys_prctl [157] } hitcount: 1 | ||
707 | { id: sys_statfs [137] } hitcount: 1 | ||
708 | { id: sys_symlink [ 88] } hitcount: 1 | ||
709 | { id: sys_sendmmsg [307] } hitcount: 1 | ||
710 | { id: sys_semctl [ 66] } hitcount: 1 | ||
711 | { id: sys_readlink [ 89] } hitcount: 3 | ||
712 | { id: sys_bind [ 49] } hitcount: 3 | ||
713 | { id: sys_getsockname [ 51] } hitcount: 3 | ||
714 | { id: sys_unlink [ 87] } hitcount: 3 | ||
715 | { id: sys_rename [ 82] } hitcount: 4 | ||
716 | { id: unknown_syscall [ 58] } hitcount: 4 | ||
717 | { id: sys_connect [ 42] } hitcount: 4 | ||
718 | { id: sys_getpid [ 39] } hitcount: 4 | ||
719 | . | ||
720 | . | ||
721 | . | ||
722 | { id: sys_rt_sigprocmask [ 14] } hitcount: 952 | ||
723 | { id: sys_futex [202] } hitcount: 1534 | ||
724 | { id: sys_write [ 1] } hitcount: 2689 | ||
725 | { id: sys_setitimer [ 38] } hitcount: 2797 | ||
726 | { id: sys_read [ 0] } hitcount: 3202 | ||
727 | { id: sys_select [ 23] } hitcount: 3773 | ||
728 | { id: sys_writev [ 20] } hitcount: 4531 | ||
729 | { id: sys_poll [ 7] } hitcount: 8314 | ||
730 | { id: sys_recvmsg [ 47] } hitcount: 13738 | ||
731 | { id: sys_ioctl [ 16] } hitcount: 21843 | ||
732 | |||
733 | Totals: | ||
734 | Hits: 67612 | ||
735 | Entries: 72 | ||
736 | Dropped: 0 | ||
737 | |||
738 | The syscall counts above provide a rough overall picture of system | ||
739 | call activity on the system; we can see for example that the most | ||
740 | popular system call on this system was the 'sys_ioctl' system call. | ||
741 | |||
742 | We can use 'compound' keys to refine that number and provide some | ||
743 | further insight as to which processes exactly contribute to the | ||
744 | overall ioctl count. | ||
745 | |||
746 | The command below keeps a hitcount for every unique combination of | ||
747 | system call id and pid - the end result is essentially a table | ||
748 | that keeps a per-pid sum of system call hits. The results are | ||
749 | sorted using the system call id as the primary key, and the | ||
750 | hitcount sum as the secondary key: | ||
751 | |||
752 | # echo 'hist:key=id.syscall,common_pid.execname:val=hitcount:sort=id,hitcount' > \ | ||
753 | /sys/kernel/debug/tracing/events/raw_syscalls/sys_enter/trigger | ||
754 | |||
755 | # cat /sys/kernel/debug/tracing/events/raw_syscalls/sys_enter/hist | ||
756 | # trigger info: hist:keys=id.syscall,common_pid.execname:vals=hitcount:sort=id.syscall,hitcount:size=2048 [active] | ||
757 | |||
758 | { id: sys_read [ 0], common_pid: rtkit-daemon [ 1877] } hitcount: 1 | ||
759 | { id: sys_read [ 0], common_pid: gdbus [ 2976] } hitcount: 1 | ||
760 | { id: sys_read [ 0], common_pid: console-kit-dae [ 3400] } hitcount: 1 | ||
761 | { id: sys_read [ 0], common_pid: postgres [ 1865] } hitcount: 1 | ||
762 | { id: sys_read [ 0], common_pid: deja-dup-monito [ 3543] } hitcount: 2 | ||
763 | { id: sys_read [ 0], common_pid: NetworkManager [ 890] } hitcount: 2 | ||
764 | { id: sys_read [ 0], common_pid: evolution-calen [ 3048] } hitcount: 2 | ||
765 | { id: sys_read [ 0], common_pid: postgres [ 1864] } hitcount: 2 | ||
766 | { id: sys_read [ 0], common_pid: nm-applet [ 3022] } hitcount: 2 | ||
767 | { id: sys_read [ 0], common_pid: whoopsie [ 1212] } hitcount: 2 | ||
768 | . | ||
769 | . | ||
770 | . | ||
771 | { id: sys_ioctl [ 16], common_pid: bash [ 8479] } hitcount: 1 | ||
772 | { id: sys_ioctl [ 16], common_pid: bash [ 3472] } hitcount: 12 | ||
773 | { id: sys_ioctl [ 16], common_pid: gnome-terminal [ 3199] } hitcount: 16 | ||
774 | { id: sys_ioctl [ 16], common_pid: Xorg [ 1267] } hitcount: 1808 | ||
775 | { id: sys_ioctl [ 16], common_pid: compiz [ 2994] } hitcount: 5580 | ||
776 | . | ||
777 | . | ||
778 | . | ||
779 | { id: sys_waitid [247], common_pid: upstart-dbus-br [ 2690] } hitcount: 3 | ||
780 | { id: sys_waitid [247], common_pid: upstart-dbus-br [ 2688] } hitcount: 16 | ||
781 | { id: sys_inotify_add_watch [254], common_pid: gmain [ 975] } hitcount: 2 | ||
782 | { id: sys_inotify_add_watch [254], common_pid: gmain [ 3204] } hitcount: 4 | ||
783 | { id: sys_inotify_add_watch [254], common_pid: gmain [ 2888] } hitcount: 4 | ||
784 | { id: sys_inotify_add_watch [254], common_pid: gmain [ 3003] } hitcount: 4 | ||
785 | { id: sys_inotify_add_watch [254], common_pid: gmain [ 2873] } hitcount: 4 | ||
786 | { id: sys_inotify_add_watch [254], common_pid: gmain [ 3196] } hitcount: 6 | ||
787 | { id: sys_openat [257], common_pid: java [ 2623] } hitcount: 2 | ||
788 | { id: sys_eventfd2 [290], common_pid: ibus-ui-gtk3 [ 2760] } hitcount: 4 | ||
789 | { id: sys_eventfd2 [290], common_pid: compiz [ 2994] } hitcount: 6 | ||
790 | |||
791 | Totals: | ||
792 | Hits: 31536 | ||
793 | Entries: 323 | ||
794 | Dropped: 0 | ||
795 | |||
796 | The above list does give us a breakdown of the ioctl syscall by | ||
797 | pid, but it also gives us quite a bit more than that, which we | ||
798 | don't really care about at the moment. Since we know the syscall | ||
799 | id for sys_ioctl (16, displayed next to the sys_ioctl name), we | ||
800 | can use that to filter out all the other syscalls: | ||
801 | |||
802 | # echo 'hist:key=id.syscall,common_pid.execname:val=hitcount:sort=id,hitcount if id == 16' > \ | ||
803 | /sys/kernel/debug/tracing/events/raw_syscalls/sys_enter/trigger | ||
804 | |||
805 | # cat /sys/kernel/debug/tracing/events/raw_syscalls/sys_enter/hist | ||
806 | # trigger info: hist:keys=id.syscall,common_pid.execname:vals=hitcount:sort=id.syscall,hitcount:size=2048 if id == 16 [active] | ||
807 | |||
808 | { id: sys_ioctl [ 16], common_pid: gmain [ 2769] } hitcount: 1 | ||
809 | { id: sys_ioctl [ 16], common_pid: evolution-addre [ 8571] } hitcount: 1 | ||
810 | { id: sys_ioctl [ 16], common_pid: gmain [ 3003] } hitcount: 1 | ||
811 | { id: sys_ioctl [ 16], common_pid: gmain [ 2781] } hitcount: 1 | ||
812 | { id: sys_ioctl [ 16], common_pid: gmain [ 2829] } hitcount: 1 | ||
813 | { id: sys_ioctl [ 16], common_pid: bash [ 8726] } hitcount: 1 | ||
814 | { id: sys_ioctl [ 16], common_pid: bash [ 8508] } hitcount: 1 | ||
815 | { id: sys_ioctl [ 16], common_pid: gmain [ 2970] } hitcount: 1 | ||
816 | { id: sys_ioctl [ 16], common_pid: gmain [ 2768] } hitcount: 1 | ||
817 | . | ||
818 | . | ||
819 | . | ||
820 | { id: sys_ioctl [ 16], common_pid: pool [ 8559] } hitcount: 45 | ||
821 | { id: sys_ioctl [ 16], common_pid: pool [ 8555] } hitcount: 48 | ||
822 | { id: sys_ioctl [ 16], common_pid: pool [ 8551] } hitcount: 48 | ||
823 | { id: sys_ioctl [ 16], common_pid: avahi-daemon [ 896] } hitcount: 66 | ||
824 | { id: sys_ioctl [ 16], common_pid: Xorg [ 1267] } hitcount: 26674 | ||
825 | { id: sys_ioctl [ 16], common_pid: compiz [ 2994] } hitcount: 73443 | ||
826 | |||
827 | Totals: | ||
828 | Hits: 101162 | ||
829 | Entries: 103 | ||
830 | Dropped: 0 | ||
831 | |||
832 | The above output shows that 'compiz' and 'Xorg' are far and away | ||
833 | the heaviest ioctl callers (which might lead to questions about | ||
834 | whether they really need to be making all those calls and to | ||
835 | possible avenues for further investigation.) | ||
836 | |||
837 | The compound key examples used a key and a sum value (hitcount) to | ||
838 | sort the output, but we can just as easily use two keys instead. | ||
839 | Here's an example where we use a compound key composed of the the | ||
840 | common_pid and size event fields. Sorting with pid as the primary | ||
841 | key and 'size' as the secondary key allows us to display an | ||
842 | ordered summary of the recvfrom sizes, with counts, received by | ||
843 | each process: | ||
844 | |||
845 | # echo 'hist:key=common_pid.execname,size:val=hitcount:sort=common_pid,size' > \ | ||
846 | /sys/kernel/debug/tracing/events/syscalls/sys_enter_recvfrom/trigger | ||
847 | |||
848 | # cat /sys/kernel/debug/tracing/events/syscalls/sys_enter_recvfrom/hist | ||
849 | # trigger info: hist:keys=common_pid.execname,size:vals=hitcount:sort=common_pid.execname,size:size=2048 [active] | ||
850 | |||
851 | { common_pid: smbd [ 784], size: 4 } hitcount: 1 | ||
852 | { common_pid: dnsmasq [ 1412], size: 4096 } hitcount: 672 | ||
853 | { common_pid: postgres [ 1796], size: 1000 } hitcount: 6 | ||
854 | { common_pid: postgres [ 1867], size: 1000 } hitcount: 10 | ||
855 | { common_pid: bamfdaemon [ 2787], size: 28 } hitcount: 2 | ||
856 | { common_pid: bamfdaemon [ 2787], size: 14360 } hitcount: 1 | ||
857 | { common_pid: compiz [ 2994], size: 8 } hitcount: 1 | ||
858 | { common_pid: compiz [ 2994], size: 20 } hitcount: 11 | ||
859 | { common_pid: gnome-terminal [ 3199], size: 4 } hitcount: 2 | ||
860 | { common_pid: firefox [ 8817], size: 4 } hitcount: 1 | ||
861 | { common_pid: firefox [ 8817], size: 8 } hitcount: 5 | ||
862 | { common_pid: firefox [ 8817], size: 588 } hitcount: 2 | ||
863 | { common_pid: firefox [ 8817], size: 628 } hitcount: 1 | ||
864 | { common_pid: firefox [ 8817], size: 6944 } hitcount: 1 | ||
865 | { common_pid: firefox [ 8817], size: 408880 } hitcount: 2 | ||
866 | { common_pid: firefox [ 8822], size: 8 } hitcount: 2 | ||
867 | { common_pid: firefox [ 8822], size: 160 } hitcount: 2 | ||
868 | { common_pid: firefox [ 8822], size: 320 } hitcount: 2 | ||
869 | { common_pid: firefox [ 8822], size: 352 } hitcount: 1 | ||
870 | . | ||
871 | . | ||
872 | . | ||
873 | { common_pid: pool [ 8923], size: 1960 } hitcount: 10 | ||
874 | { common_pid: pool [ 8923], size: 2048 } hitcount: 10 | ||
875 | { common_pid: pool [ 8924], size: 1960 } hitcount: 10 | ||
876 | { common_pid: pool [ 8924], size: 2048 } hitcount: 10 | ||
877 | { common_pid: pool [ 8928], size: 1964 } hitcount: 4 | ||
878 | { common_pid: pool [ 8928], size: 1965 } hitcount: 2 | ||
879 | { common_pid: pool [ 8928], size: 2048 } hitcount: 6 | ||
880 | { common_pid: pool [ 8929], size: 1982 } hitcount: 1 | ||
881 | { common_pid: pool [ 8929], size: 2048 } hitcount: 1 | ||
882 | |||
883 | Totals: | ||
884 | Hits: 2016 | ||
885 | Entries: 224 | ||
886 | Dropped: 0 | ||
887 | |||
888 | The above example also illustrates the fact that although a compound | ||
889 | key is treated as a single entity for hashing purposes, the sub-keys | ||
890 | it's composed of can be accessed independently. | ||
891 | |||
892 | The next example uses a string field as the hash key and | ||
893 | demonstrates how you can manually pause and continue a hist trigger. | ||
894 | In this example, we'll aggregate fork counts and don't expect a | ||
895 | large number of entries in the hash table, so we'll drop it to a | ||
896 | much smaller number, say 256: | ||
897 | |||
898 | # echo 'hist:key=child_comm:val=hitcount:size=256' > \ | ||
899 | /sys/kernel/debug/tracing/events/sched/sched_process_fork/trigger | ||
900 | |||
901 | # cat /sys/kernel/debug/tracing/events/sched/sched_process_fork/hist | ||
902 | # trigger info: hist:keys=child_comm:vals=hitcount:sort=hitcount:size=256 [active] | ||
903 | |||
904 | { child_comm: dconf worker } hitcount: 1 | ||
905 | { child_comm: ibus-daemon } hitcount: 1 | ||
906 | { child_comm: whoopsie } hitcount: 1 | ||
907 | { child_comm: smbd } hitcount: 1 | ||
908 | { child_comm: gdbus } hitcount: 1 | ||
909 | { child_comm: kthreadd } hitcount: 1 | ||
910 | { child_comm: dconf worker } hitcount: 1 | ||
911 | { child_comm: evolution-alarm } hitcount: 2 | ||
912 | { child_comm: Socket Thread } hitcount: 2 | ||
913 | { child_comm: postgres } hitcount: 2 | ||
914 | { child_comm: bash } hitcount: 3 | ||
915 | { child_comm: compiz } hitcount: 3 | ||
916 | { child_comm: evolution-sourc } hitcount: 4 | ||
917 | { child_comm: dhclient } hitcount: 4 | ||
918 | { child_comm: pool } hitcount: 5 | ||
919 | { child_comm: nm-dispatcher.a } hitcount: 8 | ||
920 | { child_comm: firefox } hitcount: 8 | ||
921 | { child_comm: dbus-daemon } hitcount: 8 | ||
922 | { child_comm: glib-pacrunner } hitcount: 10 | ||
923 | { child_comm: evolution } hitcount: 23 | ||
924 | |||
925 | Totals: | ||
926 | Hits: 89 | ||
927 | Entries: 20 | ||
928 | Dropped: 0 | ||
929 | |||
930 | If we want to pause the hist trigger, we can simply append :pause to | ||
931 | the command that started the trigger. Notice that the trigger info | ||
932 | displays as [paused]: | ||
933 | |||
934 | # echo 'hist:key=child_comm:val=hitcount:size=256:pause' >> \ | ||
935 | /sys/kernel/debug/tracing/events/sched/sched_process_fork/trigger | ||
936 | |||
937 | # cat /sys/kernel/debug/tracing/events/sched/sched_process_fork/hist | ||
938 | # trigger info: hist:keys=child_comm:vals=hitcount:sort=hitcount:size=256 [paused] | ||
939 | |||
940 | { child_comm: dconf worker } hitcount: 1 | ||
941 | { child_comm: kthreadd } hitcount: 1 | ||
942 | { child_comm: dconf worker } hitcount: 1 | ||
943 | { child_comm: gdbus } hitcount: 1 | ||
944 | { child_comm: ibus-daemon } hitcount: 1 | ||
945 | { child_comm: Socket Thread } hitcount: 2 | ||
946 | { child_comm: evolution-alarm } hitcount: 2 | ||
947 | { child_comm: smbd } hitcount: 2 | ||
948 | { child_comm: bash } hitcount: 3 | ||
949 | { child_comm: whoopsie } hitcount: 3 | ||
950 | { child_comm: compiz } hitcount: 3 | ||
951 | { child_comm: evolution-sourc } hitcount: 4 | ||
952 | { child_comm: pool } hitcount: 5 | ||
953 | { child_comm: postgres } hitcount: 6 | ||
954 | { child_comm: firefox } hitcount: 8 | ||
955 | { child_comm: dhclient } hitcount: 10 | ||
956 | { child_comm: emacs } hitcount: 12 | ||
957 | { child_comm: dbus-daemon } hitcount: 20 | ||
958 | { child_comm: nm-dispatcher.a } hitcount: 20 | ||
959 | { child_comm: evolution } hitcount: 35 | ||
960 | { child_comm: glib-pacrunner } hitcount: 59 | ||
961 | |||
962 | Totals: | ||
963 | Hits: 199 | ||
964 | Entries: 21 | ||
965 | Dropped: 0 | ||
966 | |||
967 | To manually continue having the trigger aggregate events, append | ||
968 | :cont instead. Notice that the trigger info displays as [active] | ||
969 | again, and the data has changed: | ||
970 | |||
971 | # echo 'hist:key=child_comm:val=hitcount:size=256:cont' >> \ | ||
972 | /sys/kernel/debug/tracing/events/sched/sched_process_fork/trigger | ||
973 | |||
974 | # cat /sys/kernel/debug/tracing/events/sched/sched_process_fork/hist | ||
975 | # trigger info: hist:keys=child_comm:vals=hitcount:sort=hitcount:size=256 [active] | ||
976 | |||
977 | { child_comm: dconf worker } hitcount: 1 | ||
978 | { child_comm: dconf worker } hitcount: 1 | ||
979 | { child_comm: kthreadd } hitcount: 1 | ||
980 | { child_comm: gdbus } hitcount: 1 | ||
981 | { child_comm: ibus-daemon } hitcount: 1 | ||
982 | { child_comm: Socket Thread } hitcount: 2 | ||
983 | { child_comm: evolution-alarm } hitcount: 2 | ||
984 | { child_comm: smbd } hitcount: 2 | ||
985 | { child_comm: whoopsie } hitcount: 3 | ||
986 | { child_comm: compiz } hitcount: 3 | ||
987 | { child_comm: evolution-sourc } hitcount: 4 | ||
988 | { child_comm: bash } hitcount: 5 | ||
989 | { child_comm: pool } hitcount: 5 | ||
990 | { child_comm: postgres } hitcount: 6 | ||
991 | { child_comm: firefox } hitcount: 8 | ||
992 | { child_comm: dhclient } hitcount: 11 | ||
993 | { child_comm: emacs } hitcount: 12 | ||
994 | { child_comm: dbus-daemon } hitcount: 22 | ||
995 | { child_comm: nm-dispatcher.a } hitcount: 22 | ||
996 | { child_comm: evolution } hitcount: 35 | ||
997 | { child_comm: glib-pacrunner } hitcount: 59 | ||
998 | |||
999 | Totals: | ||
1000 | Hits: 206 | ||
1001 | Entries: 21 | ||
1002 | Dropped: 0 | ||
1003 | |||
1004 | The previous example showed how to start and stop a hist trigger by | ||
1005 | appending 'pause' and 'continue' to the hist trigger command. A | ||
1006 | hist trigger can also be started in a paused state by initially | ||
1007 | starting the trigger with ':pause' appended. This allows you to | ||
1008 | start the trigger only when you're ready to start collecting data | ||
1009 | and not before. For example, you could start the trigger in a | ||
1010 | paused state, then unpause it and do something you want to measure, | ||
1011 | then pause the trigger again when done. | ||
1012 | |||
1013 | Of course, doing this manually can be difficult and error-prone, but | ||
1014 | it is possible to automatically start and stop a hist trigger based | ||
1015 | on some condition, via the enable_hist and disable_hist triggers. | ||
1016 | |||
1017 | For example, suppose we wanted to take a look at the relative | ||
1018 | weights in terms of skb length for each callpath that leads to a | ||
1019 | netif_receieve_skb event when downloading a decent-sized file using | ||
1020 | wget. | ||
1021 | |||
1022 | First we set up an initially paused stacktrace trigger on the | ||
1023 | netif_receive_skb event: | ||
1024 | |||
1025 | # echo 'hist:key=stacktrace:vals=len:pause' > \ | ||
1026 | /sys/kernel/debug/tracing/events/net/netif_receive_skb/trigger | ||
1027 | |||
1028 | Next, we set up an 'enable_hist' trigger on the sched_process_exec | ||
1029 | event, with an 'if filename==/usr/bin/wget' filter. The effect of | ||
1030 | this new trigger is that it will 'unpause' the hist trigger we just | ||
1031 | set up on netif_receive_skb if and only if it sees a | ||
1032 | sched_process_exec event with a filename of '/usr/bin/wget'. When | ||
1033 | that happens, all netif_receive_skb events are aggregated into a | ||
1034 | hash table keyed on stacktrace: | ||
1035 | |||
1036 | # echo 'enable_hist:net:netif_receive_skb if filename==/usr/bin/wget' > \ | ||
1037 | /sys/kernel/debug/tracing/events/sched/sched_process_exec/trigger | ||
1038 | |||
1039 | The aggregation continues until the netif_receive_skb is paused | ||
1040 | again, which is what the following disable_hist event does by | ||
1041 | creating a similar setup on the sched_process_exit event, using the | ||
1042 | filter 'comm==wget': | ||
1043 | |||
1044 | # echo 'disable_hist:net:netif_receive_skb if comm==wget' > \ | ||
1045 | /sys/kernel/debug/tracing/events/sched/sched_process_exit/trigger | ||
1046 | |||
1047 | Whenever a process exits and the comm field of the disable_hist | ||
1048 | trigger filter matches 'comm==wget', the netif_receive_skb hist | ||
1049 | trigger is disabled. | ||
1050 | |||
1051 | The overall effect is that netif_receive_skb events are aggregated | ||
1052 | into the hash table for only the duration of the wget. Executing a | ||
1053 | wget command and then listing the 'hist' file will display the | ||
1054 | output generated by the wget command: | ||
1055 | |||
1056 | $ wget https://www.kernel.org/pub/linux/kernel/v3.x/patch-3.19.xz | ||
1057 | |||
1058 | # cat /sys/kernel/debug/tracing/events/net/netif_receive_skb/hist | ||
1059 | # trigger info: hist:keys=stacktrace:vals=len:sort=hitcount:size=2048 [paused] | ||
1060 | |||
1061 | { stacktrace: | ||
1062 | __netif_receive_skb_core+0x46d/0x990 | ||
1063 | __netif_receive_skb+0x18/0x60 | ||
1064 | netif_receive_skb_internal+0x23/0x90 | ||
1065 | napi_gro_receive+0xc8/0x100 | ||
1066 | ieee80211_deliver_skb+0xd6/0x270 [mac80211] | ||
1067 | ieee80211_rx_handlers+0xccf/0x22f0 [mac80211] | ||
1068 | ieee80211_prepare_and_rx_handle+0x4e7/0xc40 [mac80211] | ||
1069 | ieee80211_rx+0x31d/0x900 [mac80211] | ||
1070 | iwlagn_rx_reply_rx+0x3db/0x6f0 [iwldvm] | ||
1071 | iwl_rx_dispatch+0x8e/0xf0 [iwldvm] | ||
1072 | iwl_pcie_irq_handler+0xe3c/0x12f0 [iwlwifi] | ||
1073 | irq_thread_fn+0x20/0x50 | ||
1074 | irq_thread+0x11f/0x150 | ||
1075 | kthread+0xd2/0xf0 | ||
1076 | ret_from_fork+0x42/0x70 | ||
1077 | } hitcount: 85 len: 28884 | ||
1078 | { stacktrace: | ||
1079 | __netif_receive_skb_core+0x46d/0x990 | ||
1080 | __netif_receive_skb+0x18/0x60 | ||
1081 | netif_receive_skb_internal+0x23/0x90 | ||
1082 | napi_gro_complete+0xa4/0xe0 | ||
1083 | dev_gro_receive+0x23a/0x360 | ||
1084 | napi_gro_receive+0x30/0x100 | ||
1085 | ieee80211_deliver_skb+0xd6/0x270 [mac80211] | ||
1086 | ieee80211_rx_handlers+0xccf/0x22f0 [mac80211] | ||
1087 | ieee80211_prepare_and_rx_handle+0x4e7/0xc40 [mac80211] | ||
1088 | ieee80211_rx+0x31d/0x900 [mac80211] | ||
1089 | iwlagn_rx_reply_rx+0x3db/0x6f0 [iwldvm] | ||
1090 | iwl_rx_dispatch+0x8e/0xf0 [iwldvm] | ||
1091 | iwl_pcie_irq_handler+0xe3c/0x12f0 [iwlwifi] | ||
1092 | irq_thread_fn+0x20/0x50 | ||
1093 | irq_thread+0x11f/0x150 | ||
1094 | kthread+0xd2/0xf0 | ||
1095 | } hitcount: 98 len: 664329 | ||
1096 | { stacktrace: | ||
1097 | __netif_receive_skb_core+0x46d/0x990 | ||
1098 | __netif_receive_skb+0x18/0x60 | ||
1099 | process_backlog+0xa8/0x150 | ||
1100 | net_rx_action+0x15d/0x340 | ||
1101 | __do_softirq+0x114/0x2c0 | ||
1102 | do_softirq_own_stack+0x1c/0x30 | ||
1103 | do_softirq+0x65/0x70 | ||
1104 | __local_bh_enable_ip+0xb5/0xc0 | ||
1105 | ip_finish_output+0x1f4/0x840 | ||
1106 | ip_output+0x6b/0xc0 | ||
1107 | ip_local_out_sk+0x31/0x40 | ||
1108 | ip_send_skb+0x1a/0x50 | ||
1109 | udp_send_skb+0x173/0x2a0 | ||
1110 | udp_sendmsg+0x2bf/0x9f0 | ||
1111 | inet_sendmsg+0x64/0xa0 | ||
1112 | sock_sendmsg+0x3d/0x50 | ||
1113 | } hitcount: 115 len: 13030 | ||
1114 | { stacktrace: | ||
1115 | __netif_receive_skb_core+0x46d/0x990 | ||
1116 | __netif_receive_skb+0x18/0x60 | ||
1117 | netif_receive_skb_internal+0x23/0x90 | ||
1118 | napi_gro_complete+0xa4/0xe0 | ||
1119 | napi_gro_flush+0x6d/0x90 | ||
1120 | iwl_pcie_irq_handler+0x92a/0x12f0 [iwlwifi] | ||
1121 | irq_thread_fn+0x20/0x50 | ||
1122 | irq_thread+0x11f/0x150 | ||
1123 | kthread+0xd2/0xf0 | ||
1124 | ret_from_fork+0x42/0x70 | ||
1125 | } hitcount: 934 len: 5512212 | ||
1126 | |||
1127 | Totals: | ||
1128 | Hits: 1232 | ||
1129 | Entries: 4 | ||
1130 | Dropped: 0 | ||
1131 | |||
1132 | The above shows all the netif_receive_skb callpaths and their total | ||
1133 | lengths for the duration of the wget command. | ||
1134 | |||
1135 | The 'clear' hist trigger param can be used to clear the hash table. | ||
1136 | Suppose we wanted to try another run of the previous example but | ||
1137 | this time also wanted to see the complete list of events that went | ||
1138 | into the histogram. In order to avoid having to set everything up | ||
1139 | again, we can just clear the histogram first: | ||
1140 | |||
1141 | # echo 'hist:key=stacktrace:vals=len:clear' >> \ | ||
1142 | /sys/kernel/debug/tracing/events/net/netif_receive_skb/trigger | ||
1143 | |||
1144 | Just to verify that it is in fact cleared, here's what we now see in | ||
1145 | the hist file: | ||
1146 | |||
1147 | # cat /sys/kernel/debug/tracing/events/net/netif_receive_skb/hist | ||
1148 | # trigger info: hist:keys=stacktrace:vals=len:sort=hitcount:size=2048 [paused] | ||
1149 | |||
1150 | Totals: | ||
1151 | Hits: 0 | ||
1152 | Entries: 0 | ||
1153 | Dropped: 0 | ||
1154 | |||
1155 | Since we want to see the detailed list of every netif_receive_skb | ||
1156 | event occurring during the new run, which are in fact the same | ||
1157 | events being aggregated into the hash table, we add some additional | ||
1158 | 'enable_event' events to the triggering sched_process_exec and | ||
1159 | sched_process_exit events as such: | ||
1160 | |||
1161 | # echo 'enable_event:net:netif_receive_skb if filename==/usr/bin/wget' > \ | ||
1162 | /sys/kernel/debug/tracing/events/sched/sched_process_exec/trigger | ||
1163 | |||
1164 | # echo 'disable_event:net:netif_receive_skb if comm==wget' > \ | ||
1165 | /sys/kernel/debug/tracing/events/sched/sched_process_exit/trigger | ||
1166 | |||
1167 | If you read the trigger files for the sched_process_exec and | ||
1168 | sched_process_exit triggers, you should see two triggers for each: | ||
1169 | one enabling/disabling the hist aggregation and the other | ||
1170 | enabling/disabling the logging of events: | ||
1171 | |||
1172 | # cat /sys/kernel/debug/tracing/events/sched/sched_process_exec/trigger | ||
1173 | enable_event:net:netif_receive_skb:unlimited if filename==/usr/bin/wget | ||
1174 | enable_hist:net:netif_receive_skb:unlimited if filename==/usr/bin/wget | ||
1175 | |||
1176 | # cat /sys/kernel/debug/tracing/events/sched/sched_process_exit/trigger | ||
1177 | enable_event:net:netif_receive_skb:unlimited if comm==wget | ||
1178 | disable_hist:net:netif_receive_skb:unlimited if comm==wget | ||
1179 | |||
1180 | In other words, whenever either of the sched_process_exec or | ||
1181 | sched_process_exit events is hit and matches 'wget', it enables or | ||
1182 | disables both the histogram and the event log, and what you end up | ||
1183 | with is a hash table and set of events just covering the specified | ||
1184 | duration. Run the wget command again: | ||
1185 | |||
1186 | $ wget https://www.kernel.org/pub/linux/kernel/v3.x/patch-3.19.xz | ||
1187 | |||
1188 | Displaying the 'hist' file should show something similar to what you | ||
1189 | saw in the last run, but this time you should also see the | ||
1190 | individual events in the trace file: | ||
1191 | |||
1192 | # cat /sys/kernel/debug/tracing/trace | ||
1193 | |||
1194 | # tracer: nop | ||
1195 | # | ||
1196 | # entries-in-buffer/entries-written: 183/1426 #P:4 | ||
1197 | # | ||
1198 | # _-----=> irqs-off | ||
1199 | # / _----=> need-resched | ||
1200 | # | / _---=> hardirq/softirq | ||
1201 | # || / _--=> preempt-depth | ||
1202 | # ||| / delay | ||
1203 | # TASK-PID CPU# |||| TIMESTAMP FUNCTION | ||
1204 | # | | | |||| | | | ||
1205 | wget-15108 [000] ..s1 31769.606929: netif_receive_skb: dev=lo skbaddr=ffff88009c353100 len=60 | ||
1206 | wget-15108 [000] ..s1 31769.606999: netif_receive_skb: dev=lo skbaddr=ffff88009c353200 len=60 | ||
1207 | dnsmasq-1382 [000] ..s1 31769.677652: netif_receive_skb: dev=lo skbaddr=ffff88009c352b00 len=130 | ||
1208 | dnsmasq-1382 [000] ..s1 31769.685917: netif_receive_skb: dev=lo skbaddr=ffff88009c352200 len=138 | ||
1209 | ##### CPU 2 buffer started #### | ||
1210 | irq/29-iwlwifi-559 [002] ..s. 31772.031529: netif_receive_skb: dev=wlan0 skbaddr=ffff88009d433d00 len=2948 | ||
1211 | irq/29-iwlwifi-559 [002] ..s. 31772.031572: netif_receive_skb: dev=wlan0 skbaddr=ffff88009d432200 len=1500 | ||
1212 | irq/29-iwlwifi-559 [002] ..s. 31772.032196: netif_receive_skb: dev=wlan0 skbaddr=ffff88009d433100 len=2948 | ||
1213 | irq/29-iwlwifi-559 [002] ..s. 31772.032761: netif_receive_skb: dev=wlan0 skbaddr=ffff88009d433000 len=2948 | ||
1214 | irq/29-iwlwifi-559 [002] ..s. 31772.033220: netif_receive_skb: dev=wlan0 skbaddr=ffff88009d432e00 len=1500 | ||
1215 | . | ||
1216 | . | ||
1217 | . | ||
1218 | |||
1219 | The following example demonstrates how multiple hist triggers can be | ||
1220 | attached to a given event. This capability can be useful for | ||
1221 | creating a set of different summaries derived from the same set of | ||
1222 | events, or for comparing the effects of different filters, among | ||
1223 | other things. | ||
1224 | |||
1225 | # echo 'hist:keys=skbaddr.hex:vals=len if len < 0' >> \ | ||
1226 | /sys/kernel/debug/tracing/events/net/netif_receive_skb/trigger | ||
1227 | # echo 'hist:keys=skbaddr.hex:vals=len if len > 4096' >> \ | ||
1228 | /sys/kernel/debug/tracing/events/net/netif_receive_skb/trigger | ||
1229 | # echo 'hist:keys=skbaddr.hex:vals=len if len == 256' >> \ | ||
1230 | /sys/kernel/debug/tracing/events/net/netif_receive_skb/trigger | ||
1231 | # echo 'hist:keys=skbaddr.hex:vals=len' >> \ | ||
1232 | /sys/kernel/debug/tracing/events/net/netif_receive_skb/trigger | ||
1233 | # echo 'hist:keys=len:vals=common_preempt_count' >> \ | ||
1234 | /sys/kernel/debug/tracing/events/net/netif_receive_skb/trigger | ||
1235 | |||
1236 | The above set of commands create four triggers differing only in | ||
1237 | their filters, along with a completely different though fairly | ||
1238 | nonsensical trigger. Note that in order to append multiple hist | ||
1239 | triggers to the same file, you should use the '>>' operator to | ||
1240 | append them ('>' will also add the new hist trigger, but will remove | ||
1241 | any existing hist triggers beforehand). | ||
1242 | |||
1243 | Displaying the contents of the 'hist' file for the event shows the | ||
1244 | contents of all five histograms: | ||
1245 | |||
1246 | # cat /sys/kernel/debug/tracing/events/net/netif_receive_skb/hist | ||
1247 | |||
1248 | # event histogram | ||
1249 | # | ||
1250 | # trigger info: hist:keys=len:vals=hitcount,common_preempt_count:sort=hitcount:size=2048 [active] | ||
1251 | # | ||
1252 | |||
1253 | { len: 176 } hitcount: 1 common_preempt_count: 0 | ||
1254 | { len: 223 } hitcount: 1 common_preempt_count: 0 | ||
1255 | { len: 4854 } hitcount: 1 common_preempt_count: 0 | ||
1256 | { len: 395 } hitcount: 1 common_preempt_count: 0 | ||
1257 | { len: 177 } hitcount: 1 common_preempt_count: 0 | ||
1258 | { len: 446 } hitcount: 1 common_preempt_count: 0 | ||
1259 | { len: 1601 } hitcount: 1 common_preempt_count: 0 | ||
1260 | . | ||
1261 | . | ||
1262 | . | ||
1263 | { len: 1280 } hitcount: 66 common_preempt_count: 0 | ||
1264 | { len: 116 } hitcount: 81 common_preempt_count: 40 | ||
1265 | { len: 708 } hitcount: 112 common_preempt_count: 0 | ||
1266 | { len: 46 } hitcount: 221 common_preempt_count: 0 | ||
1267 | { len: 1264 } hitcount: 458 common_preempt_count: 0 | ||
1268 | |||
1269 | Totals: | ||
1270 | Hits: 1428 | ||
1271 | Entries: 147 | ||
1272 | Dropped: 0 | ||
1273 | |||
1274 | |||
1275 | # event histogram | ||
1276 | # | ||
1277 | # trigger info: hist:keys=skbaddr.hex:vals=hitcount,len:sort=hitcount:size=2048 [active] | ||
1278 | # | ||
1279 | |||
1280 | { skbaddr: ffff8800baee5e00 } hitcount: 1 len: 130 | ||
1281 | { skbaddr: ffff88005f3d5600 } hitcount: 1 len: 1280 | ||
1282 | { skbaddr: ffff88005f3d4900 } hitcount: 1 len: 1280 | ||
1283 | { skbaddr: ffff88009fed6300 } hitcount: 1 len: 115 | ||
1284 | { skbaddr: ffff88009fe0ad00 } hitcount: 1 len: 115 | ||
1285 | { skbaddr: ffff88008cdb1900 } hitcount: 1 len: 46 | ||
1286 | { skbaddr: ffff880064b5ef00 } hitcount: 1 len: 118 | ||
1287 | { skbaddr: ffff880044e3c700 } hitcount: 1 len: 60 | ||
1288 | { skbaddr: ffff880100065900 } hitcount: 1 len: 46 | ||
1289 | { skbaddr: ffff8800d46bd500 } hitcount: 1 len: 116 | ||
1290 | { skbaddr: ffff88005f3d5f00 } hitcount: 1 len: 1280 | ||
1291 | { skbaddr: ffff880100064700 } hitcount: 1 len: 365 | ||
1292 | { skbaddr: ffff8800badb6f00 } hitcount: 1 len: 60 | ||
1293 | . | ||
1294 | . | ||
1295 | . | ||
1296 | { skbaddr: ffff88009fe0be00 } hitcount: 27 len: 24677 | ||
1297 | { skbaddr: ffff88009fe0a400 } hitcount: 27 len: 23052 | ||
1298 | { skbaddr: ffff88009fe0b700 } hitcount: 31 len: 25589 | ||
1299 | { skbaddr: ffff88009fe0b600 } hitcount: 32 len: 27326 | ||
1300 | { skbaddr: ffff88006a462800 } hitcount: 68 len: 71678 | ||
1301 | { skbaddr: ffff88006a463700 } hitcount: 70 len: 72678 | ||
1302 | { skbaddr: ffff88006a462b00 } hitcount: 71 len: 77589 | ||
1303 | { skbaddr: ffff88006a463600 } hitcount: 73 len: 71307 | ||
1304 | { skbaddr: ffff88006a462200 } hitcount: 81 len: 81032 | ||
1305 | |||
1306 | Totals: | ||
1307 | Hits: 1451 | ||
1308 | Entries: 318 | ||
1309 | Dropped: 0 | ||
1310 | |||
1311 | |||
1312 | # event histogram | ||
1313 | # | ||
1314 | # trigger info: hist:keys=skbaddr.hex:vals=hitcount,len:sort=hitcount:size=2048 if len == 256 [active] | ||
1315 | # | ||
1316 | |||
1317 | |||
1318 | Totals: | ||
1319 | Hits: 0 | ||
1320 | Entries: 0 | ||
1321 | Dropped: 0 | ||
1322 | |||
1323 | |||
1324 | # event histogram | ||
1325 | # | ||
1326 | # trigger info: hist:keys=skbaddr.hex:vals=hitcount,len:sort=hitcount:size=2048 if len > 4096 [active] | ||
1327 | # | ||
1328 | |||
1329 | { skbaddr: ffff88009fd2c300 } hitcount: 1 len: 7212 | ||
1330 | { skbaddr: ffff8800d2bcce00 } hitcount: 1 len: 7212 | ||
1331 | { skbaddr: ffff8800d2bcd700 } hitcount: 1 len: 7212 | ||
1332 | { skbaddr: ffff8800d2bcda00 } hitcount: 1 len: 21492 | ||
1333 | { skbaddr: ffff8800ae2e2d00 } hitcount: 1 len: 7212 | ||
1334 | { skbaddr: ffff8800d2bcdb00 } hitcount: 1 len: 7212 | ||
1335 | { skbaddr: ffff88006a4df500 } hitcount: 1 len: 4854 | ||
1336 | { skbaddr: ffff88008ce47b00 } hitcount: 1 len: 18636 | ||
1337 | { skbaddr: ffff8800ae2e2200 } hitcount: 1 len: 12924 | ||
1338 | { skbaddr: ffff88005f3e1000 } hitcount: 1 len: 4356 | ||
1339 | { skbaddr: ffff8800d2bcdc00 } hitcount: 2 len: 24420 | ||
1340 | { skbaddr: ffff8800d2bcc200 } hitcount: 2 len: 12996 | ||
1341 | |||
1342 | Totals: | ||
1343 | Hits: 14 | ||
1344 | Entries: 12 | ||
1345 | Dropped: 0 | ||
1346 | |||
1347 | |||
1348 | # event histogram | ||
1349 | # | ||
1350 | # trigger info: hist:keys=skbaddr.hex:vals=hitcount,len:sort=hitcount:size=2048 if len < 0 [active] | ||
1351 | # | ||
1352 | |||
1353 | |||
1354 | Totals: | ||
1355 | Hits: 0 | ||
1356 | Entries: 0 | ||
1357 | Dropped: 0 | ||
1358 | |||
1359 | Named triggers can be used to have triggers share a common set of | ||
1360 | histogram data. This capability is mostly useful for combining the | ||
1361 | output of events generated by tracepoints contained inside inline | ||
1362 | functions, but names can be used in a hist trigger on any event. | ||
1363 | For example, these two triggers when hit will update the same 'len' | ||
1364 | field in the shared 'foo' histogram data: | ||
1365 | |||
1366 | # echo 'hist:name=foo:keys=skbaddr.hex:vals=len' > \ | ||
1367 | /sys/kernel/debug/tracing/events/net/netif_receive_skb/trigger | ||
1368 | # echo 'hist:name=foo:keys=skbaddr.hex:vals=len' > \ | ||
1369 | /sys/kernel/debug/tracing/events/net/netif_rx/trigger | ||
1370 | |||
1371 | You can see that they're updating common histogram data by reading | ||
1372 | each event's hist files at the same time: | ||
1373 | |||
1374 | # cat /sys/kernel/debug/tracing/events/net/netif_receive_skb/hist; | ||
1375 | cat /sys/kernel/debug/tracing/events/net/netif_rx/hist | ||
1376 | |||
1377 | # event histogram | ||
1378 | # | ||
1379 | # trigger info: hist:name=foo:keys=skbaddr.hex:vals=hitcount,len:sort=hitcount:size=2048 [active] | ||
1380 | # | ||
1381 | |||
1382 | { skbaddr: ffff88000ad53500 } hitcount: 1 len: 46 | ||
1383 | { skbaddr: ffff8800af5a1500 } hitcount: 1 len: 76 | ||
1384 | { skbaddr: ffff8800d62a1900 } hitcount: 1 len: 46 | ||
1385 | { skbaddr: ffff8800d2bccb00 } hitcount: 1 len: 468 | ||
1386 | { skbaddr: ffff8800d3c69900 } hitcount: 1 len: 46 | ||
1387 | { skbaddr: ffff88009ff09100 } hitcount: 1 len: 52 | ||
1388 | { skbaddr: ffff88010f13ab00 } hitcount: 1 len: 168 | ||
1389 | { skbaddr: ffff88006a54f400 } hitcount: 1 len: 46 | ||
1390 | { skbaddr: ffff8800d2bcc500 } hitcount: 1 len: 260 | ||
1391 | { skbaddr: ffff880064505000 } hitcount: 1 len: 46 | ||
1392 | { skbaddr: ffff8800baf24e00 } hitcount: 1 len: 32 | ||
1393 | { skbaddr: ffff88009fe0ad00 } hitcount: 1 len: 46 | ||
1394 | { skbaddr: ffff8800d3edff00 } hitcount: 1 len: 44 | ||
1395 | { skbaddr: ffff88009fe0b400 } hitcount: 1 len: 168 | ||
1396 | { skbaddr: ffff8800a1c55a00 } hitcount: 1 len: 40 | ||
1397 | { skbaddr: ffff8800d2bcd100 } hitcount: 1 len: 40 | ||
1398 | { skbaddr: ffff880064505f00 } hitcount: 1 len: 174 | ||
1399 | { skbaddr: ffff8800a8bff200 } hitcount: 1 len: 160 | ||
1400 | { skbaddr: ffff880044e3cc00 } hitcount: 1 len: 76 | ||
1401 | { skbaddr: ffff8800a8bfe700 } hitcount: 1 len: 46 | ||
1402 | { skbaddr: ffff8800d2bcdc00 } hitcount: 1 len: 32 | ||
1403 | { skbaddr: ffff8800a1f64800 } hitcount: 1 len: 46 | ||
1404 | { skbaddr: ffff8800d2bcde00 } hitcount: 1 len: 988 | ||
1405 | { skbaddr: ffff88006a5dea00 } hitcount: 1 len: 46 | ||
1406 | { skbaddr: ffff88002e37a200 } hitcount: 1 len: 44 | ||
1407 | { skbaddr: ffff8800a1f32c00 } hitcount: 2 len: 676 | ||
1408 | { skbaddr: ffff88000ad52600 } hitcount: 2 len: 107 | ||
1409 | { skbaddr: ffff8800a1f91e00 } hitcount: 2 len: 92 | ||
1410 | { skbaddr: ffff8800af5a0200 } hitcount: 2 len: 142 | ||
1411 | { skbaddr: ffff8800d2bcc600 } hitcount: 2 len: 220 | ||
1412 | { skbaddr: ffff8800ba36f500 } hitcount: 2 len: 92 | ||
1413 | { skbaddr: ffff8800d021f800 } hitcount: 2 len: 92 | ||
1414 | { skbaddr: ffff8800a1f33600 } hitcount: 2 len: 675 | ||
1415 | { skbaddr: ffff8800a8bfff00 } hitcount: 3 len: 138 | ||
1416 | { skbaddr: ffff8800d62a1300 } hitcount: 3 len: 138 | ||
1417 | { skbaddr: ffff88002e37a100 } hitcount: 4 len: 184 | ||
1418 | { skbaddr: ffff880064504400 } hitcount: 4 len: 184 | ||
1419 | { skbaddr: ffff8800a8bfec00 } hitcount: 4 len: 184 | ||
1420 | { skbaddr: ffff88000ad53700 } hitcount: 5 len: 230 | ||
1421 | { skbaddr: ffff8800d2bcdb00 } hitcount: 5 len: 196 | ||
1422 | { skbaddr: ffff8800a1f90000 } hitcount: 6 len: 276 | ||
1423 | { skbaddr: ffff88006a54f900 } hitcount: 6 len: 276 | ||
1424 | |||
1425 | Totals: | ||
1426 | Hits: 81 | ||
1427 | Entries: 42 | ||
1428 | Dropped: 0 | ||
1429 | # event histogram | ||
1430 | # | ||
1431 | # trigger info: hist:name=foo:keys=skbaddr.hex:vals=hitcount,len:sort=hitcount:size=2048 [active] | ||
1432 | # | ||
1433 | |||
1434 | { skbaddr: ffff88000ad53500 } hitcount: 1 len: 46 | ||
1435 | { skbaddr: ffff8800af5a1500 } hitcount: 1 len: 76 | ||
1436 | { skbaddr: ffff8800d62a1900 } hitcount: 1 len: 46 | ||
1437 | { skbaddr: ffff8800d2bccb00 } hitcount: 1 len: 468 | ||
1438 | { skbaddr: ffff8800d3c69900 } hitcount: 1 len: 46 | ||
1439 | { skbaddr: ffff88009ff09100 } hitcount: 1 len: 52 | ||
1440 | { skbaddr: ffff88010f13ab00 } hitcount: 1 len: 168 | ||
1441 | { skbaddr: ffff88006a54f400 } hitcount: 1 len: 46 | ||
1442 | { skbaddr: ffff8800d2bcc500 } hitcount: 1 len: 260 | ||
1443 | { skbaddr: ffff880064505000 } hitcount: 1 len: 46 | ||
1444 | { skbaddr: ffff8800baf24e00 } hitcount: 1 len: 32 | ||
1445 | { skbaddr: ffff88009fe0ad00 } hitcount: 1 len: 46 | ||
1446 | { skbaddr: ffff8800d3edff00 } hitcount: 1 len: 44 | ||
1447 | { skbaddr: ffff88009fe0b400 } hitcount: 1 len: 168 | ||
1448 | { skbaddr: ffff8800a1c55a00 } hitcount: 1 len: 40 | ||
1449 | { skbaddr: ffff8800d2bcd100 } hitcount: 1 len: 40 | ||
1450 | { skbaddr: ffff880064505f00 } hitcount: 1 len: 174 | ||
1451 | { skbaddr: ffff8800a8bff200 } hitcount: 1 len: 160 | ||
1452 | { skbaddr: ffff880044e3cc00 } hitcount: 1 len: 76 | ||
1453 | { skbaddr: ffff8800a8bfe700 } hitcount: 1 len: 46 | ||
1454 | { skbaddr: ffff8800d2bcdc00 } hitcount: 1 len: 32 | ||
1455 | { skbaddr: ffff8800a1f64800 } hitcount: 1 len: 46 | ||
1456 | { skbaddr: ffff8800d2bcde00 } hitcount: 1 len: 988 | ||
1457 | { skbaddr: ffff88006a5dea00 } hitcount: 1 len: 46 | ||
1458 | { skbaddr: ffff88002e37a200 } hitcount: 1 len: 44 | ||
1459 | { skbaddr: ffff8800a1f32c00 } hitcount: 2 len: 676 | ||
1460 | { skbaddr: ffff88000ad52600 } hitcount: 2 len: 107 | ||
1461 | { skbaddr: ffff8800a1f91e00 } hitcount: 2 len: 92 | ||
1462 | { skbaddr: ffff8800af5a0200 } hitcount: 2 len: 142 | ||
1463 | { skbaddr: ffff8800d2bcc600 } hitcount: 2 len: 220 | ||
1464 | { skbaddr: ffff8800ba36f500 } hitcount: 2 len: 92 | ||
1465 | { skbaddr: ffff8800d021f800 } hitcount: 2 len: 92 | ||
1466 | { skbaddr: ffff8800a1f33600 } hitcount: 2 len: 675 | ||
1467 | { skbaddr: ffff8800a8bfff00 } hitcount: 3 len: 138 | ||
1468 | { skbaddr: ffff8800d62a1300 } hitcount: 3 len: 138 | ||
1469 | { skbaddr: ffff88002e37a100 } hitcount: 4 len: 184 | ||
1470 | { skbaddr: ffff880064504400 } hitcount: 4 len: 184 | ||
1471 | { skbaddr: ffff8800a8bfec00 } hitcount: 4 len: 184 | ||
1472 | { skbaddr: ffff88000ad53700 } hitcount: 5 len: 230 | ||
1473 | { skbaddr: ffff8800d2bcdb00 } hitcount: 5 len: 196 | ||
1474 | { skbaddr: ffff8800a1f90000 } hitcount: 6 len: 276 | ||
1475 | { skbaddr: ffff88006a54f900 } hitcount: 6 len: 276 | ||
1476 | |||
1477 | Totals: | ||
1478 | Hits: 81 | ||
1479 | Entries: 42 | ||
1480 | Dropped: 0 | ||
1481 | |||
1482 | And here's an example that shows how to combine histogram data from | ||
1483 | any two events even if they don't share any 'compatible' fields | ||
1484 | other than 'hitcount' and 'stacktrace'. These commands create a | ||
1485 | couple of triggers named 'bar' using those fields: | ||
1486 | |||
1487 | # echo 'hist:name=bar:key=stacktrace:val=hitcount' > \ | ||
1488 | /sys/kernel/debug/tracing/events/sched/sched_process_fork/trigger | ||
1489 | # echo 'hist:name=bar:key=stacktrace:val=hitcount' > \ | ||
1490 | /sys/kernel/debug/tracing/events/net/netif_rx/trigger | ||
1491 | |||
1492 | And displaying the output of either shows some interesting if | ||
1493 | somewhat confusing output: | ||
1494 | |||
1495 | # cat /sys/kernel/debug/tracing/events/sched/sched_process_fork/hist | ||
1496 | # cat /sys/kernel/debug/tracing/events/net/netif_rx/hist | ||
1497 | |||
1498 | # event histogram | ||
1499 | # | ||
1500 | # trigger info: hist:name=bar:keys=stacktrace:vals=hitcount:sort=hitcount:size=2048 [active] | ||
1501 | # | ||
1502 | |||
1503 | { stacktrace: | ||
1504 | _do_fork+0x18e/0x330 | ||
1505 | kernel_thread+0x29/0x30 | ||
1506 | kthreadd+0x154/0x1b0 | ||
1507 | ret_from_fork+0x3f/0x70 | ||
1508 | } hitcount: 1 | ||
1509 | { stacktrace: | ||
1510 | netif_rx_internal+0xb2/0xd0 | ||
1511 | netif_rx_ni+0x20/0x70 | ||
1512 | dev_loopback_xmit+0xaa/0xd0 | ||
1513 | ip_mc_output+0x126/0x240 | ||
1514 | ip_local_out_sk+0x31/0x40 | ||
1515 | igmp_send_report+0x1e9/0x230 | ||
1516 | igmp_timer_expire+0xe9/0x120 | ||
1517 | call_timer_fn+0x39/0xf0 | ||
1518 | run_timer_softirq+0x1e1/0x290 | ||
1519 | __do_softirq+0xfd/0x290 | ||
1520 | irq_exit+0x98/0xb0 | ||
1521 | smp_apic_timer_interrupt+0x4a/0x60 | ||
1522 | apic_timer_interrupt+0x6d/0x80 | ||
1523 | cpuidle_enter+0x17/0x20 | ||
1524 | call_cpuidle+0x3b/0x60 | ||
1525 | cpu_startup_entry+0x22d/0x310 | ||
1526 | } hitcount: 1 | ||
1527 | { stacktrace: | ||
1528 | netif_rx_internal+0xb2/0xd0 | ||
1529 | netif_rx_ni+0x20/0x70 | ||
1530 | dev_loopback_xmit+0xaa/0xd0 | ||
1531 | ip_mc_output+0x17f/0x240 | ||
1532 | ip_local_out_sk+0x31/0x40 | ||
1533 | ip_send_skb+0x1a/0x50 | ||
1534 | udp_send_skb+0x13e/0x270 | ||
1535 | udp_sendmsg+0x2bf/0x980 | ||
1536 | inet_sendmsg+0x67/0xa0 | ||
1537 | sock_sendmsg+0x38/0x50 | ||
1538 | SYSC_sendto+0xef/0x170 | ||
1539 | SyS_sendto+0xe/0x10 | ||
1540 | entry_SYSCALL_64_fastpath+0x12/0x6a | ||
1541 | } hitcount: 2 | ||
1542 | { stacktrace: | ||
1543 | netif_rx_internal+0xb2/0xd0 | ||
1544 | netif_rx+0x1c/0x60 | ||
1545 | loopback_xmit+0x6c/0xb0 | ||
1546 | dev_hard_start_xmit+0x219/0x3a0 | ||
1547 | __dev_queue_xmit+0x415/0x4f0 | ||
1548 | dev_queue_xmit_sk+0x13/0x20 | ||
1549 | ip_finish_output2+0x237/0x340 | ||
1550 | ip_finish_output+0x113/0x1d0 | ||
1551 | ip_output+0x66/0xc0 | ||
1552 | ip_local_out_sk+0x31/0x40 | ||
1553 | ip_send_skb+0x1a/0x50 | ||
1554 | udp_send_skb+0x16d/0x270 | ||
1555 | udp_sendmsg+0x2bf/0x980 | ||
1556 | inet_sendmsg+0x67/0xa0 | ||
1557 | sock_sendmsg+0x38/0x50 | ||
1558 | ___sys_sendmsg+0x14e/0x270 | ||
1559 | } hitcount: 76 | ||
1560 | { stacktrace: | ||
1561 | netif_rx_internal+0xb2/0xd0 | ||
1562 | netif_rx+0x1c/0x60 | ||
1563 | loopback_xmit+0x6c/0xb0 | ||
1564 | dev_hard_start_xmit+0x219/0x3a0 | ||
1565 | __dev_queue_xmit+0x415/0x4f0 | ||
1566 | dev_queue_xmit_sk+0x13/0x20 | ||
1567 | ip_finish_output2+0x237/0x340 | ||
1568 | ip_finish_output+0x113/0x1d0 | ||
1569 | ip_output+0x66/0xc0 | ||
1570 | ip_local_out_sk+0x31/0x40 | ||
1571 | ip_send_skb+0x1a/0x50 | ||
1572 | udp_send_skb+0x16d/0x270 | ||
1573 | udp_sendmsg+0x2bf/0x980 | ||
1574 | inet_sendmsg+0x67/0xa0 | ||
1575 | sock_sendmsg+0x38/0x50 | ||
1576 | ___sys_sendmsg+0x269/0x270 | ||
1577 | } hitcount: 77 | ||
1578 | { stacktrace: | ||
1579 | netif_rx_internal+0xb2/0xd0 | ||
1580 | netif_rx+0x1c/0x60 | ||
1581 | loopback_xmit+0x6c/0xb0 | ||
1582 | dev_hard_start_xmit+0x219/0x3a0 | ||
1583 | __dev_queue_xmit+0x415/0x4f0 | ||
1584 | dev_queue_xmit_sk+0x13/0x20 | ||
1585 | ip_finish_output2+0x237/0x340 | ||
1586 | ip_finish_output+0x113/0x1d0 | ||
1587 | ip_output+0x66/0xc0 | ||
1588 | ip_local_out_sk+0x31/0x40 | ||
1589 | ip_send_skb+0x1a/0x50 | ||
1590 | udp_send_skb+0x16d/0x270 | ||
1591 | udp_sendmsg+0x2bf/0x980 | ||
1592 | inet_sendmsg+0x67/0xa0 | ||
1593 | sock_sendmsg+0x38/0x50 | ||
1594 | SYSC_sendto+0xef/0x170 | ||
1595 | } hitcount: 88 | ||
1596 | { stacktrace: | ||
1597 | _do_fork+0x18e/0x330 | ||
1598 | SyS_clone+0x19/0x20 | ||
1599 | entry_SYSCALL_64_fastpath+0x12/0x6a | ||
1600 | } hitcount: 244 | ||
1601 | |||
1602 | Totals: | ||
1603 | Hits: 489 | ||
1604 | Entries: 7 | ||
1605 | Dropped: 0 | ||
1606 | |||
1607 | |||
1608 | 2.2 Inter-event hist triggers | ||
1609 | ----------------------------- | ||
1610 | |||
1611 | Inter-event hist triggers are hist triggers that combine values from | ||
1612 | one or more other events and create a histogram using that data. Data | ||
1613 | from an inter-event histogram can in turn become the source for | ||
1614 | further combined histograms, thus providing a chain of related | ||
1615 | histograms, which is important for some applications. | ||
1616 | |||
1617 | The most important example of an inter-event quantity that can be used | ||
1618 | in this manner is latency, which is simply a difference in timestamps | ||
1619 | between two events. Although latency is the most important | ||
1620 | inter-event quantity, note that because the support is completely | ||
1621 | general across the trace event subsystem, any event field can be used | ||
1622 | in an inter-event quantity. | ||
1623 | |||
1624 | An example of a histogram that combines data from other histograms | ||
1625 | into a useful chain would be a 'wakeupswitch latency' histogram that | ||
1626 | combines a 'wakeup latency' histogram and a 'switch latency' | ||
1627 | histogram. | ||
1628 | |||
1629 | Normally, a hist trigger specification consists of a (possibly | ||
1630 | compound) key along with one or more numeric values, which are | ||
1631 | continually updated sums associated with that key. A histogram | ||
1632 | specification in this case consists of individual key and value | ||
1633 | specifications that refer to trace event fields associated with a | ||
1634 | single event type. | ||
1635 | |||
1636 | The inter-event hist trigger extension allows fields from multiple | ||
1637 | events to be referenced and combined into a multi-event histogram | ||
1638 | specification. In support of this overall goal, a few enabling | ||
1639 | features have been added to the hist trigger support: | ||
1640 | |||
1641 | - In order to compute an inter-event quantity, a value from one | ||
1642 | event needs to saved and then referenced from another event. This | ||
1643 | requires the introduction of support for histogram 'variables'. | ||
1644 | |||
1645 | - The computation of inter-event quantities and their combination | ||
1646 | require some minimal amount of support for applying simple | ||
1647 | expressions to variables (+ and -). | ||
1648 | |||
1649 | - A histogram consisting of inter-event quantities isn't logically a | ||
1650 | histogram on either event (so having the 'hist' file for either | ||
1651 | event host the histogram output doesn't really make sense). To | ||
1652 | address the idea that the histogram is associated with a | ||
1653 | combination of events, support is added allowing the creation of | ||
1654 | 'synthetic' events that are events derived from other events. | ||
1655 | These synthetic events are full-fledged events just like any other | ||
1656 | and can be used as such, as for instance to create the | ||
1657 | 'combination' histograms mentioned previously. | ||
1658 | |||
1659 | - A set of 'actions' can be associated with histogram entries - | ||
1660 | these can be used to generate the previously mentioned synthetic | ||
1661 | events, but can also be used for other purposes, such as for | ||
1662 | example saving context when a 'max' latency has been hit. | ||
1663 | |||
1664 | - Trace events don't have a 'timestamp' associated with them, but | ||
1665 | there is an implicit timestamp saved along with an event in the | ||
1666 | underlying ftrace ring buffer. This timestamp is now exposed as a | ||
1667 | a synthetic field named 'common_timestamp' which can be used in | ||
1668 | histograms as if it were any other event field; it isn't an actual | ||
1669 | field in the trace format but rather is a synthesized value that | ||
1670 | nonetheless can be used as if it were an actual field. By default | ||
1671 | it is in units of nanoseconds; appending '.usecs' to a | ||
1672 | common_timestamp field changes the units to microseconds. | ||
1673 | |||
1674 | A note on inter-event timestamps: If common_timestamp is used in a | ||
1675 | histogram, the trace buffer is automatically switched over to using | ||
1676 | absolute timestamps and the "global" trace clock, in order to avoid | ||
1677 | bogus timestamp differences with other clocks that aren't coherent | ||
1678 | across CPUs. This can be overridden by specifying one of the other | ||
1679 | trace clocks instead, using the "clock=XXX" hist trigger attribute, | ||
1680 | where XXX is any of the clocks listed in the tracing/trace_clock | ||
1681 | pseudo-file. | ||
1682 | |||
1683 | These features are described in more detail in the following sections. | ||
1684 | |||
1685 | 2.2.1 Histogram Variables | ||
1686 | ------------------------- | ||
1687 | |||
1688 | Variables are simply named locations used for saving and retrieving | ||
1689 | values between matching events. A 'matching' event is defined as an | ||
1690 | event that has a matching key - if a variable is saved for a histogram | ||
1691 | entry corresponding to that key, any subsequent event with a matching | ||
1692 | key can access that variable. | ||
1693 | |||
1694 | A variable's value is normally available to any subsequent event until | ||
1695 | it is set to something else by a subsequent event. The one exception | ||
1696 | to that rule is that any variable used in an expression is essentially | ||
1697 | 'read-once' - once it's used by an expression in a subsequent event, | ||
1698 | it's reset to its 'unset' state, which means it can't be used again | ||
1699 | unless it's set again. This ensures not only that an event doesn't | ||
1700 | use an uninitialized variable in a calculation, but that that variable | ||
1701 | is used only once and not for any unrelated subsequent match. | ||
1702 | |||
1703 | The basic syntax for saving a variable is to simply prefix a unique | ||
1704 | variable name not corresponding to any keyword along with an '=' sign | ||
1705 | to any event field. | ||
1706 | |||
1707 | Either keys or values can be saved and retrieved in this way. This | ||
1708 | creates a variable named 'ts0' for a histogram entry with the key | ||
1709 | 'next_pid': | ||
1710 | |||
1711 | # echo 'hist:keys=next_pid:vals=$ts0:ts0=common_timestamp ... >> \ | ||
1712 | event/trigger | ||
1713 | |||
1714 | The ts0 variable can be accessed by any subsequent event having the | ||
1715 | same pid as 'next_pid'. | ||
1716 | |||
1717 | Variable references are formed by prepending the variable name with | ||
1718 | the '$' sign. Thus for example, the ts0 variable above would be | ||
1719 | referenced as '$ts0' in expressions. | ||
1720 | |||
1721 | Because 'vals=' is used, the common_timestamp variable value above | ||
1722 | will also be summed as a normal histogram value would (though for a | ||
1723 | timestamp it makes little sense). | ||
1724 | |||
1725 | The below shows that a key value can also be saved in the same way: | ||
1726 | |||
1727 | # echo 'hist:timer_pid=common_pid:key=timer_pid ...' >> event/trigger | ||
1728 | |||
1729 | If a variable isn't a key variable or prefixed with 'vals=', the | ||
1730 | associated event field will be saved in a variable but won't be summed | ||
1731 | as a value: | ||
1732 | |||
1733 | # echo 'hist:keys=next_pid:ts1=common_timestamp ... >> event/trigger | ||
1734 | |||
1735 | Multiple variables can be assigned at the same time. The below would | ||
1736 | result in both ts0 and b being created as variables, with both | ||
1737 | common_timestamp and field1 additionally being summed as values: | ||
1738 | |||
1739 | # echo 'hist:keys=pid:vals=$ts0,$b:ts0=common_timestamp,b=field1 ... >> \ | ||
1740 | event/trigger | ||
1741 | |||
1742 | Note that variable assignments can appear either preceding or | ||
1743 | following their use. The command below behaves identically to the | ||
1744 | command above: | ||
1745 | |||
1746 | # echo 'hist:keys=pid:ts0=common_timestamp,b=field1:vals=$ts0,$b ... >> \ | ||
1747 | event/trigger | ||
1748 | |||
1749 | Any number of variables not bound to a 'vals=' prefix can also be | ||
1750 | assigned by simply separating them with colons. Below is the same | ||
1751 | thing but without the values being summed in the histogram: | ||
1752 | |||
1753 | # echo 'hist:keys=pid:ts0=common_timestamp:b=field1 ... >> event/trigger | ||
1754 | |||
1755 | Variables set as above can be referenced and used in expressions on | ||
1756 | another event. | ||
1757 | |||
1758 | For example, here's how a latency can be calculated: | ||
1759 | |||
1760 | # echo 'hist:keys=pid,prio:ts0=common_timestamp ... >> event1/trigger | ||
1761 | # echo 'hist:keys=next_pid:wakeup_lat=common_timestamp-$ts0 ... >> event2/trigger | ||
1762 | |||
1763 | In the first line above, the event's timetamp is saved into the | ||
1764 | variable ts0. In the next line, ts0 is subtracted from the second | ||
1765 | event's timestamp to produce the latency, which is then assigned into | ||
1766 | yet another variable, 'wakeup_lat'. The hist trigger below in turn | ||
1767 | makes use of the wakeup_lat variable to compute a combined latency | ||
1768 | using the same key and variable from yet another event: | ||
1769 | |||
1770 | # echo 'hist:key=pid:wakeupswitch_lat=$wakeup_lat+$switchtime_lat ... >> event3/trigger | ||
1771 | |||
1772 | 2.2.2 Synthetic Events | ||
1773 | ---------------------- | ||
1774 | |||
1775 | Synthetic events are user-defined events generated from hist trigger | ||
1776 | variables or fields associated with one or more other events. Their | ||
1777 | purpose is to provide a mechanism for displaying data spanning | ||
1778 | multiple events consistent with the existing and already familiar | ||
1779 | usage for normal events. | ||
1780 | |||
1781 | To define a synthetic event, the user writes a simple specification | ||
1782 | consisting of the name of the new event along with one or more | ||
1783 | variables and their types, which can be any valid field type, | ||
1784 | separated by semicolons, to the tracing/synthetic_events file. | ||
1785 | |||
1786 | For instance, the following creates a new event named 'wakeup_latency' | ||
1787 | with 3 fields: lat, pid, and prio. Each of those fields is simply a | ||
1788 | variable reference to a variable on another event: | ||
1789 | |||
1790 | # echo 'wakeup_latency \ | ||
1791 | u64 lat; \ | ||
1792 | pid_t pid; \ | ||
1793 | int prio' >> \ | ||
1794 | /sys/kernel/debug/tracing/synthetic_events | ||
1795 | |||
1796 | Reading the tracing/synthetic_events file lists all the currently | ||
1797 | defined synthetic events, in this case the event defined above: | ||
1798 | |||
1799 | # cat /sys/kernel/debug/tracing/synthetic_events | ||
1800 | wakeup_latency u64 lat; pid_t pid; int prio | ||
1801 | |||
1802 | An existing synthetic event definition can be removed by prepending | ||
1803 | the command that defined it with a '!': | ||
1804 | |||
1805 | # echo '!wakeup_latency u64 lat pid_t pid int prio' >> \ | ||
1806 | /sys/kernel/debug/tracing/synthetic_events | ||
1807 | |||
1808 | At this point, there isn't yet an actual 'wakeup_latency' event | ||
1809 | instantiated in the event subsytem - for this to happen, a 'hist | ||
1810 | trigger action' needs to be instantiated and bound to actual fields | ||
1811 | and variables defined on other events (see Section 6.3.3 below). | ||
1812 | |||
1813 | Once that is done, an event instance is created, and a histogram can | ||
1814 | be defined using it: | ||
1815 | |||
1816 | # echo 'hist:keys=pid,prio,lat.log2:sort=pid,lat' >> \ | ||
1817 | /sys/kernel/debug/tracing/events/synthetic/wakeup_latency/trigger | ||
1818 | |||
1819 | The new event is created under the tracing/events/synthetic/ directory | ||
1820 | and looks and behaves just like any other event: | ||
1821 | |||
1822 | # ls /sys/kernel/debug/tracing/events/synthetic/wakeup_latency | ||
1823 | enable filter format hist id trigger | ||
1824 | |||
1825 | Like any other event, once a histogram is enabled for the event, the | ||
1826 | output can be displayed by reading the event's 'hist' file. | ||
1827 | |||
1828 | 2.2.3 Hist trigger 'actions' | ||
1829 | ---------------------------- | ||
1830 | |||
1831 | A hist trigger 'action' is a function that's executed whenever a | ||
1832 | histogram entry is added or updated. | ||
1833 | |||
1834 | The default 'action' if no special function is explicity specified is | ||
1835 | as it always has been, to simply update the set of values associated | ||
1836 | with an entry. Some applications, however, may want to perform | ||
1837 | additional actions at that point, such as generate another event, or | ||
1838 | compare and save a maximum. | ||
1839 | |||
1840 | The following additional actions are available. To specify an action | ||
1841 | for a given event, simply specify the action between colons in the | ||
1842 | hist trigger specification. | ||
1843 | |||
1844 | - onmatch(matching.event).<synthetic_event_name>(param list) | ||
1845 | |||
1846 | The 'onmatch(matching.event).<synthetic_event_name>(params)' hist | ||
1847 | trigger action is invoked whenever an event matches and the | ||
1848 | histogram entry would be added or updated. It causes the named | ||
1849 | synthetic event to be generated with the values given in the | ||
1850 | 'param list'. The result is the generation of a synthetic event | ||
1851 | that consists of the values contained in those variables at the | ||
1852 | time the invoking event was hit. | ||
1853 | |||
1854 | The 'param list' consists of one or more parameters which may be | ||
1855 | either variables or fields defined on either the 'matching.event' | ||
1856 | or the target event. The variables or fields specified in the | ||
1857 | param list may be either fully-qualified or unqualified. If a | ||
1858 | variable is specified as unqualified, it must be unique between | ||
1859 | the two events. A field name used as a param can be unqualified | ||
1860 | if it refers to the target event, but must be fully qualified if | ||
1861 | it refers to the matching event. A fully-qualified name is of the | ||
1862 | form 'system.event_name.$var_name' or 'system.event_name.field'. | ||
1863 | |||
1864 | The 'matching.event' specification is simply the fully qualified | ||
1865 | event name of the event that matches the target event for the | ||
1866 | onmatch() functionality, in the form 'system.event_name'. | ||
1867 | |||
1868 | Finally, the number and type of variables/fields in the 'param | ||
1869 | list' must match the number and types of the fields in the | ||
1870 | synthetic event being generated. | ||
1871 | |||
1872 | As an example the below defines a simple synthetic event and uses | ||
1873 | a variable defined on the sched_wakeup_new event as a parameter | ||
1874 | when invoking the synthetic event. Here we define the synthetic | ||
1875 | event: | ||
1876 | |||
1877 | # echo 'wakeup_new_test pid_t pid' >> \ | ||
1878 | /sys/kernel/debug/tracing/synthetic_events | ||
1879 | |||
1880 | # cat /sys/kernel/debug/tracing/synthetic_events | ||
1881 | wakeup_new_test pid_t pid | ||
1882 | |||
1883 | The following hist trigger both defines the missing testpid | ||
1884 | variable and specifies an onmatch() action that generates a | ||
1885 | wakeup_new_test synthetic event whenever a sched_wakeup_new event | ||
1886 | occurs, which because of the 'if comm == "cyclictest"' filter only | ||
1887 | happens when the executable is cyclictest: | ||
1888 | |||
1889 | # echo 'hist:keys=$testpid:testpid=pid:onmatch(sched.sched_wakeup_new).\ | ||
1890 | wakeup_new_test($testpid) if comm=="cyclictest"' >> \ | ||
1891 | /sys/kernel/debug/tracing/events/sched/sched_wakeup_new/trigger | ||
1892 | |||
1893 | Creating and displaying a histogram based on those events is now | ||
1894 | just a matter of using the fields and new synthetic event in the | ||
1895 | tracing/events/synthetic directory, as usual: | ||
1896 | |||
1897 | # echo 'hist:keys=pid:sort=pid' >> \ | ||
1898 | /sys/kernel/debug/tracing/events/synthetic/wakeup_new_test/trigger | ||
1899 | |||
1900 | Running 'cyclictest' should cause wakeup_new events to generate | ||
1901 | wakeup_new_test synthetic events which should result in histogram | ||
1902 | output in the wakeup_new_test event's hist file: | ||
1903 | |||
1904 | # cat /sys/kernel/debug/tracing/events/synthetic/wakeup_new_test/hist | ||
1905 | |||
1906 | A more typical usage would be to use two events to calculate a | ||
1907 | latency. The following example uses a set of hist triggers to | ||
1908 | produce a 'wakeup_latency' histogram: | ||
1909 | |||
1910 | First, we define a 'wakeup_latency' synthetic event: | ||
1911 | |||
1912 | # echo 'wakeup_latency u64 lat; pid_t pid; int prio' >> \ | ||
1913 | /sys/kernel/debug/tracing/synthetic_events | ||
1914 | |||
1915 | Next, we specify that whenever we see a sched_waking event for a | ||
1916 | cyclictest thread, save the timestamp in a 'ts0' variable: | ||
1917 | |||
1918 | # echo 'hist:keys=$saved_pid:saved_pid=pid:ts0=common_timestamp.usecs \ | ||
1919 | if comm=="cyclictest"' >> \ | ||
1920 | /sys/kernel/debug/tracing/events/sched/sched_waking/trigger | ||
1921 | |||
1922 | Then, when the corresponding thread is actually scheduled onto the | ||
1923 | CPU by a sched_switch event, calculate the latency and use that | ||
1924 | along with another variable and an event field to generate a | ||
1925 | wakeup_latency synthetic event: | ||
1926 | |||
1927 | # echo 'hist:keys=next_pid:wakeup_lat=common_timestamp.usecs-$ts0:\ | ||
1928 | onmatch(sched.sched_waking).wakeup_latency($wakeup_lat,\ | ||
1929 | $saved_pid,next_prio) if next_comm=="cyclictest"' >> \ | ||
1930 | /sys/kernel/debug/tracing/events/sched/sched_switch/trigger | ||
1931 | |||
1932 | We also need to create a histogram on the wakeup_latency synthetic | ||
1933 | event in order to aggregate the generated synthetic event data: | ||
1934 | |||
1935 | # echo 'hist:keys=pid,prio,lat:sort=pid,lat' >> \ | ||
1936 | /sys/kernel/debug/tracing/events/synthetic/wakeup_latency/trigger | ||
1937 | |||
1938 | Finally, once we've run cyclictest to actually generate some | ||
1939 | events, we can see the output by looking at the wakeup_latency | ||
1940 | synthetic event's hist file: | ||
1941 | |||
1942 | # cat /sys/kernel/debug/tracing/events/synthetic/wakeup_latency/hist | ||
1943 | |||
1944 | - onmax(var).save(field,.. .) | ||
1945 | |||
1946 | The 'onmax(var).save(field,...)' hist trigger action is invoked | ||
1947 | whenever the value of 'var' associated with a histogram entry | ||
1948 | exceeds the current maximum contained in that variable. | ||
1949 | |||
1950 | The end result is that the trace event fields specified as the | ||
1951 | onmax.save() params will be saved if 'var' exceeds the current | ||
1952 | maximum for that hist trigger entry. This allows context from the | ||
1953 | event that exhibited the new maximum to be saved for later | ||
1954 | reference. When the histogram is displayed, additional fields | ||
1955 | displaying the saved values will be printed. | ||
1956 | |||
1957 | As an example the below defines a couple of hist triggers, one for | ||
1958 | sched_waking and another for sched_switch, keyed on pid. Whenever | ||
1959 | a sched_waking occurs, the timestamp is saved in the entry | ||
1960 | corresponding to the current pid, and when the scheduler switches | ||
1961 | back to that pid, the timestamp difference is calculated. If the | ||
1962 | resulting latency, stored in wakeup_lat, exceeds the current | ||
1963 | maximum latency, the values specified in the save() fields are | ||
1964 | recoreded: | ||
1965 | |||
1966 | # echo 'hist:keys=pid:ts0=common_timestamp.usecs \ | ||
1967 | if comm=="cyclictest"' >> \ | ||
1968 | /sys/kernel/debug/tracing/events/sched/sched_waking/trigger | ||
1969 | |||
1970 | # echo 'hist:keys=next_pid:\ | ||
1971 | wakeup_lat=common_timestamp.usecs-$ts0:\ | ||
1972 | onmax($wakeup_lat).save(next_comm,prev_pid,prev_prio,prev_comm) \ | ||
1973 | if next_comm=="cyclictest"' >> \ | ||
1974 | /sys/kernel/debug/tracing/events/sched/sched_switch/trigger | ||
1975 | |||
1976 | When the histogram is displayed, the max value and the saved | ||
1977 | values corresponding to the max are displayed following the rest | ||
1978 | of the fields: | ||
1979 | |||
1980 | # cat /sys/kernel/debug/tracing/events/sched/sched_switch/hist | ||
1981 | { next_pid: 2255 } hitcount: 239 | ||
1982 | common_timestamp-ts0: 0 | ||
1983 | max: 27 | ||
1984 | next_comm: cyclictest | ||
1985 | prev_pid: 0 prev_prio: 120 prev_comm: swapper/1 | ||
1986 | |||
1987 | { next_pid: 2256 } hitcount: 2355 | ||
1988 | common_timestamp-ts0: 0 | ||
1989 | max: 49 next_comm: cyclictest | ||
1990 | prev_pid: 0 prev_prio: 120 prev_comm: swapper/0 | ||
1991 | |||
1992 | Totals: | ||
1993 | Hits: 12970 | ||
1994 | Entries: 2 | ||
1995 | Dropped: 0 | ||
diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h index 7d9eb39fa76a..a0233edc0718 100644 --- a/include/linux/ring_buffer.h +++ b/include/linux/ring_buffer.h | |||
@@ -34,10 +34,12 @@ struct ring_buffer_event { | |||
34 | * array[0] = time delta (28 .. 59) | 34 | * array[0] = time delta (28 .. 59) |
35 | * size = 8 bytes | 35 | * size = 8 bytes |
36 | * | 36 | * |
37 | * @RINGBUF_TYPE_TIME_STAMP: Sync time stamp with external clock | 37 | * @RINGBUF_TYPE_TIME_STAMP: Absolute timestamp |
38 | * array[0] = tv_nsec | 38 | * Same format as TIME_EXTEND except that the |
39 | * array[1..2] = tv_sec | 39 | * value is an absolute timestamp, not a delta |
40 | * size = 16 bytes | 40 | * event.time_delta contains bottom 27 bits |
41 | * array[0] = top (28 .. 59) bits | ||
42 | * size = 8 bytes | ||
41 | * | 43 | * |
42 | * <= @RINGBUF_TYPE_DATA_TYPE_LEN_MAX: | 44 | * <= @RINGBUF_TYPE_DATA_TYPE_LEN_MAX: |
43 | * Data record | 45 | * Data record |
@@ -54,12 +56,12 @@ enum ring_buffer_type { | |||
54 | RINGBUF_TYPE_DATA_TYPE_LEN_MAX = 28, | 56 | RINGBUF_TYPE_DATA_TYPE_LEN_MAX = 28, |
55 | RINGBUF_TYPE_PADDING, | 57 | RINGBUF_TYPE_PADDING, |
56 | RINGBUF_TYPE_TIME_EXTEND, | 58 | RINGBUF_TYPE_TIME_EXTEND, |
57 | /* FIXME: RINGBUF_TYPE_TIME_STAMP not implemented */ | ||
58 | RINGBUF_TYPE_TIME_STAMP, | 59 | RINGBUF_TYPE_TIME_STAMP, |
59 | }; | 60 | }; |
60 | 61 | ||
61 | unsigned ring_buffer_event_length(struct ring_buffer_event *event); | 62 | unsigned ring_buffer_event_length(struct ring_buffer_event *event); |
62 | void *ring_buffer_event_data(struct ring_buffer_event *event); | 63 | void *ring_buffer_event_data(struct ring_buffer_event *event); |
64 | u64 ring_buffer_event_time_stamp(struct ring_buffer_event *event); | ||
63 | 65 | ||
64 | /* | 66 | /* |
65 | * ring_buffer_discard_commit will remove an event that has not | 67 | * ring_buffer_discard_commit will remove an event that has not |
@@ -115,6 +117,9 @@ int ring_buffer_unlock_commit(struct ring_buffer *buffer, | |||
115 | int ring_buffer_write(struct ring_buffer *buffer, | 117 | int ring_buffer_write(struct ring_buffer *buffer, |
116 | unsigned long length, void *data); | 118 | unsigned long length, void *data); |
117 | 119 | ||
120 | void ring_buffer_nest_start(struct ring_buffer *buffer); | ||
121 | void ring_buffer_nest_end(struct ring_buffer *buffer); | ||
122 | |||
118 | struct ring_buffer_event * | 123 | struct ring_buffer_event * |
119 | ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts, | 124 | ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts, |
120 | unsigned long *lost_events); | 125 | unsigned long *lost_events); |
@@ -178,6 +183,8 @@ void ring_buffer_normalize_time_stamp(struct ring_buffer *buffer, | |||
178 | int cpu, u64 *ts); | 183 | int cpu, u64 *ts); |
179 | void ring_buffer_set_clock(struct ring_buffer *buffer, | 184 | void ring_buffer_set_clock(struct ring_buffer *buffer, |
180 | u64 (*clock)(void)); | 185 | u64 (*clock)(void)); |
186 | void ring_buffer_set_time_stamp_abs(struct ring_buffer *buffer, bool abs); | ||
187 | bool ring_buffer_time_stamp_abs(struct ring_buffer *buffer); | ||
181 | 188 | ||
182 | size_t ring_buffer_page_len(void *page); | 189 | size_t ring_buffer_page_len(void *page); |
183 | 190 | ||
diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index e0e98000b665..2bde3eff564c 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h | |||
@@ -430,11 +430,13 @@ enum event_trigger_type { | |||
430 | 430 | ||
431 | extern int filter_match_preds(struct event_filter *filter, void *rec); | 431 | extern int filter_match_preds(struct event_filter *filter, void *rec); |
432 | 432 | ||
433 | extern enum event_trigger_type event_triggers_call(struct trace_event_file *file, | 433 | extern enum event_trigger_type |
434 | void *rec); | 434 | event_triggers_call(struct trace_event_file *file, void *rec, |
435 | extern void event_triggers_post_call(struct trace_event_file *file, | 435 | struct ring_buffer_event *event); |
436 | enum event_trigger_type tt, | 436 | extern void |
437 | void *rec); | 437 | event_triggers_post_call(struct trace_event_file *file, |
438 | enum event_trigger_type tt, | ||
439 | void *rec, struct ring_buffer_event *event); | ||
438 | 440 | ||
439 | bool trace_event_ignore_this_pid(struct trace_event_file *trace_file); | 441 | bool trace_event_ignore_this_pid(struct trace_event_file *trace_file); |
440 | 442 | ||
@@ -454,7 +456,7 @@ trace_trigger_soft_disabled(struct trace_event_file *file) | |||
454 | 456 | ||
455 | if (!(eflags & EVENT_FILE_FL_TRIGGER_COND)) { | 457 | if (!(eflags & EVENT_FILE_FL_TRIGGER_COND)) { |
456 | if (eflags & EVENT_FILE_FL_TRIGGER_MODE) | 458 | if (eflags & EVENT_FILE_FL_TRIGGER_MODE) |
457 | event_triggers_call(file, NULL); | 459 | event_triggers_call(file, NULL, NULL); |
458 | if (eflags & EVENT_FILE_FL_SOFT_DISABLED) | 460 | if (eflags & EVENT_FILE_FL_SOFT_DISABLED) |
459 | return true; | 461 | return true; |
460 | if (eflags & EVENT_FILE_FL_PID_FILTER) | 462 | if (eflags & EVENT_FILE_FL_PID_FILTER) |
diff --git a/include/trace/events/initcall.h b/include/trace/events/initcall.h new file mode 100644 index 000000000000..8d6cf10d27c9 --- /dev/null +++ b/include/trace/events/initcall.h | |||
@@ -0,0 +1,66 @@ | |||
1 | /* SPDX-License-Identifier: GPL-2.0 */ | ||
2 | #undef TRACE_SYSTEM | ||
3 | #define TRACE_SYSTEM initcall | ||
4 | |||
5 | #if !defined(_TRACE_INITCALL_H) || defined(TRACE_HEADER_MULTI_READ) | ||
6 | #define _TRACE_INITCALL_H | ||
7 | |||
8 | #include <linux/tracepoint.h> | ||
9 | |||
10 | TRACE_EVENT(initcall_level, | ||
11 | |||
12 | TP_PROTO(const char *level), | ||
13 | |||
14 | TP_ARGS(level), | ||
15 | |||
16 | TP_STRUCT__entry( | ||
17 | __string(level, level) | ||
18 | ), | ||
19 | |||
20 | TP_fast_assign( | ||
21 | __assign_str(level, level); | ||
22 | ), | ||
23 | |||
24 | TP_printk("level=%s", __get_str(level)) | ||
25 | ); | ||
26 | |||
27 | TRACE_EVENT(initcall_start, | ||
28 | |||
29 | TP_PROTO(initcall_t func), | ||
30 | |||
31 | TP_ARGS(func), | ||
32 | |||
33 | TP_STRUCT__entry( | ||
34 | __field(initcall_t, func) | ||
35 | ), | ||
36 | |||
37 | TP_fast_assign( | ||
38 | __entry->func = func; | ||
39 | ), | ||
40 | |||
41 | TP_printk("func=%pS", __entry->func) | ||
42 | ); | ||
43 | |||
44 | TRACE_EVENT(initcall_finish, | ||
45 | |||
46 | TP_PROTO(initcall_t func, int ret), | ||
47 | |||
48 | TP_ARGS(func, ret), | ||
49 | |||
50 | TP_STRUCT__entry( | ||
51 | __field(initcall_t, func) | ||
52 | __field(int, ret) | ||
53 | ), | ||
54 | |||
55 | TP_fast_assign( | ||
56 | __entry->func = func; | ||
57 | __entry->ret = ret; | ||
58 | ), | ||
59 | |||
60 | TP_printk("func=%pS ret=%d", __entry->func, __entry->ret) | ||
61 | ); | ||
62 | |||
63 | #endif /* if !defined(_TRACE_GPIO_H) || defined(TRACE_HEADER_MULTI_READ) */ | ||
64 | |||
65 | /* This part must be outside protection */ | ||
66 | #include <trace/define_trace.h> | ||
diff --git a/init/main.c b/init/main.c index e4a3160991ea..d499f4a80e0b 100644 --- a/init/main.c +++ b/init/main.c | |||
@@ -97,6 +97,9 @@ | |||
97 | #include <asm/sections.h> | 97 | #include <asm/sections.h> |
98 | #include <asm/cacheflush.h> | 98 | #include <asm/cacheflush.h> |
99 | 99 | ||
100 | #define CREATE_TRACE_POINTS | ||
101 | #include <trace/events/initcall.h> | ||
102 | |||
100 | static int kernel_init(void *); | 103 | static int kernel_init(void *); |
101 | 104 | ||
102 | extern void init_IRQ(void); | 105 | extern void init_IRQ(void); |
@@ -491,6 +494,17 @@ void __init __weak thread_stack_cache_init(void) | |||
491 | 494 | ||
492 | void __init __weak mem_encrypt_init(void) { } | 495 | void __init __weak mem_encrypt_init(void) { } |
493 | 496 | ||
497 | bool initcall_debug; | ||
498 | core_param(initcall_debug, initcall_debug, bool, 0644); | ||
499 | |||
500 | #ifdef TRACEPOINTS_ENABLED | ||
501 | static void __init initcall_debug_enable(void); | ||
502 | #else | ||
503 | static inline void initcall_debug_enable(void) | ||
504 | { | ||
505 | } | ||
506 | #endif | ||
507 | |||
494 | /* | 508 | /* |
495 | * Set up kernel memory allocators | 509 | * Set up kernel memory allocators |
496 | */ | 510 | */ |
@@ -612,6 +626,9 @@ asmlinkage __visible void __init start_kernel(void) | |||
612 | /* Trace events are available after this */ | 626 | /* Trace events are available after this */ |
613 | trace_init(); | 627 | trace_init(); |
614 | 628 | ||
629 | if (initcall_debug) | ||
630 | initcall_debug_enable(); | ||
631 | |||
615 | context_tracking_init(); | 632 | context_tracking_init(); |
616 | /* init some links before init_ISA_irqs() */ | 633 | /* init some links before init_ISA_irqs() */ |
617 | early_irq_init(); | 634 | early_irq_init(); |
@@ -728,9 +745,6 @@ static void __init do_ctors(void) | |||
728 | #endif | 745 | #endif |
729 | } | 746 | } |
730 | 747 | ||
731 | bool initcall_debug; | ||
732 | core_param(initcall_debug, initcall_debug, bool, 0644); | ||
733 | |||
734 | #ifdef CONFIG_KALLSYMS | 748 | #ifdef CONFIG_KALLSYMS |
735 | struct blacklist_entry { | 749 | struct blacklist_entry { |
736 | struct list_head next; | 750 | struct list_head next; |
@@ -800,37 +814,71 @@ static bool __init_or_module initcall_blacklisted(initcall_t fn) | |||
800 | #endif | 814 | #endif |
801 | __setup("initcall_blacklist=", initcall_blacklist); | 815 | __setup("initcall_blacklist=", initcall_blacklist); |
802 | 816 | ||
803 | static int __init_or_module do_one_initcall_debug(initcall_t fn) | 817 | static __init_or_module void |
818 | trace_initcall_start_cb(void *data, initcall_t fn) | ||
804 | { | 819 | { |
805 | ktime_t calltime, delta, rettime; | 820 | ktime_t *calltime = (ktime_t *)data; |
806 | unsigned long long duration; | ||
807 | int ret; | ||
808 | 821 | ||
809 | printk(KERN_DEBUG "calling %pF @ %i\n", fn, task_pid_nr(current)); | 822 | printk(KERN_DEBUG "calling %pF @ %i\n", fn, task_pid_nr(current)); |
810 | calltime = ktime_get(); | 823 | *calltime = ktime_get(); |
811 | ret = fn(); | 824 | } |
825 | |||
826 | static __init_or_module void | ||
827 | trace_initcall_finish_cb(void *data, initcall_t fn, int ret) | ||
828 | { | ||
829 | ktime_t *calltime = (ktime_t *)data; | ||
830 | ktime_t delta, rettime; | ||
831 | unsigned long long duration; | ||
832 | |||
812 | rettime = ktime_get(); | 833 | rettime = ktime_get(); |
813 | delta = ktime_sub(rettime, calltime); | 834 | delta = ktime_sub(rettime, *calltime); |
814 | duration = (unsigned long long) ktime_to_ns(delta) >> 10; | 835 | duration = (unsigned long long) ktime_to_ns(delta) >> 10; |
815 | printk(KERN_DEBUG "initcall %pF returned %d after %lld usecs\n", | 836 | printk(KERN_DEBUG "initcall %pF returned %d after %lld usecs\n", |
816 | fn, ret, duration); | 837 | fn, ret, duration); |
838 | } | ||
817 | 839 | ||
818 | return ret; | 840 | static ktime_t initcall_calltime; |
841 | |||
842 | #ifdef TRACEPOINTS_ENABLED | ||
843 | static void __init initcall_debug_enable(void) | ||
844 | { | ||
845 | int ret; | ||
846 | |||
847 | ret = register_trace_initcall_start(trace_initcall_start_cb, | ||
848 | &initcall_calltime); | ||
849 | ret |= register_trace_initcall_finish(trace_initcall_finish_cb, | ||
850 | &initcall_calltime); | ||
851 | WARN(ret, "Failed to register initcall tracepoints\n"); | ||
819 | } | 852 | } |
853 | # define do_trace_initcall_start trace_initcall_start | ||
854 | # define do_trace_initcall_finish trace_initcall_finish | ||
855 | #else | ||
856 | static inline void do_trace_initcall_start(initcall_t fn) | ||
857 | { | ||
858 | if (!initcall_debug) | ||
859 | return; | ||
860 | trace_initcall_start_cb(&initcall_calltime, fn); | ||
861 | } | ||
862 | static inline void do_trace_initcall_finish(initcall_t fn, int ret) | ||
863 | { | ||
864 | if (!initcall_debug) | ||
865 | return; | ||
866 | trace_initcall_finish_cb(&initcall_calltime, fn, ret); | ||
867 | } | ||
868 | #endif /* !TRACEPOINTS_ENABLED */ | ||
820 | 869 | ||
821 | int __init_or_module do_one_initcall(initcall_t fn) | 870 | int __init_or_module do_one_initcall(initcall_t fn) |
822 | { | 871 | { |
823 | int count = preempt_count(); | 872 | int count = preempt_count(); |
824 | int ret; | ||
825 | char msgbuf[64]; | 873 | char msgbuf[64]; |
874 | int ret; | ||
826 | 875 | ||
827 | if (initcall_blacklisted(fn)) | 876 | if (initcall_blacklisted(fn)) |
828 | return -EPERM; | 877 | return -EPERM; |
829 | 878 | ||
830 | if (initcall_debug) | 879 | do_trace_initcall_start(fn); |
831 | ret = do_one_initcall_debug(fn); | 880 | ret = fn(); |
832 | else | 881 | do_trace_initcall_finish(fn, ret); |
833 | ret = fn(); | ||
834 | 882 | ||
835 | msgbuf[0] = 0; | 883 | msgbuf[0] = 0; |
836 | 884 | ||
@@ -874,7 +922,7 @@ static initcall_t *initcall_levels[] __initdata = { | |||
874 | 922 | ||
875 | /* Keep these in sync with initcalls in include/linux/init.h */ | 923 | /* Keep these in sync with initcalls in include/linux/init.h */ |
876 | static char *initcall_level_names[] __initdata = { | 924 | static char *initcall_level_names[] __initdata = { |
877 | "early", | 925 | "pure", |
878 | "core", | 926 | "core", |
879 | "postcore", | 927 | "postcore", |
880 | "arch", | 928 | "arch", |
@@ -895,6 +943,7 @@ static void __init do_initcall_level(int level) | |||
895 | level, level, | 943 | level, level, |
896 | NULL, &repair_env_string); | 944 | NULL, &repair_env_string); |
897 | 945 | ||
946 | trace_initcall_level(initcall_level_names[level]); | ||
898 | for (fn = initcall_levels[level]; fn < initcall_levels[level+1]; fn++) | 947 | for (fn = initcall_levels[level]; fn < initcall_levels[level+1]; fn++) |
899 | do_one_initcall(*fn); | 948 | do_one_initcall(*fn); |
900 | } | 949 | } |
@@ -929,6 +978,7 @@ static void __init do_pre_smp_initcalls(void) | |||
929 | { | 978 | { |
930 | initcall_t *fn; | 979 | initcall_t *fn; |
931 | 980 | ||
981 | trace_initcall_level("early"); | ||
932 | for (fn = __initcall_start; fn < __initcall0_start; fn++) | 982 | for (fn = __initcall_start; fn < __initcall0_start; fn++) |
933 | do_one_initcall(*fn); | 983 | do_one_initcall(*fn); |
934 | } | 984 | } |
diff --git a/kernel/panic.c b/kernel/panic.c index 9d833d913c84..6c3b08cd1139 100644 --- a/kernel/panic.c +++ b/kernel/panic.c | |||
@@ -554,6 +554,8 @@ void __warn(const char *file, int line, void *caller, unsigned taint, | |||
554 | else | 554 | else |
555 | dump_stack(); | 555 | dump_stack(); |
556 | 556 | ||
557 | print_irqtrace_events(current); | ||
558 | |||
557 | print_oops_end_marker(); | 559 | print_oops_end_marker(); |
558 | 560 | ||
559 | /* Just a warning, don't kill lockdep. */ | 561 | /* Just a warning, don't kill lockdep. */ |
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 704e55129c3a..2f4af216bd6e 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c | |||
@@ -51,6 +51,7 @@ | |||
51 | #include <linux/uaccess.h> | 51 | #include <linux/uaccess.h> |
52 | #include <asm/sections.h> | 52 | #include <asm/sections.h> |
53 | 53 | ||
54 | #include <trace/events/initcall.h> | ||
54 | #define CREATE_TRACE_POINTS | 55 | #define CREATE_TRACE_POINTS |
55 | #include <trace/events/printk.h> | 56 | #include <trace/events/printk.h> |
56 | 57 | ||
@@ -2780,6 +2781,7 @@ EXPORT_SYMBOL(unregister_console); | |||
2780 | */ | 2781 | */ |
2781 | void __init console_init(void) | 2782 | void __init console_init(void) |
2782 | { | 2783 | { |
2784 | int ret; | ||
2783 | initcall_t *call; | 2785 | initcall_t *call; |
2784 | 2786 | ||
2785 | /* Setup the default TTY line discipline. */ | 2787 | /* Setup the default TTY line discipline. */ |
@@ -2790,8 +2792,11 @@ void __init console_init(void) | |||
2790 | * inform about problems etc.. | 2792 | * inform about problems etc.. |
2791 | */ | 2793 | */ |
2792 | call = __con_initcall_start; | 2794 | call = __con_initcall_start; |
2795 | trace_initcall_level("console"); | ||
2793 | while (call < __con_initcall_end) { | 2796 | while (call < __con_initcall_end) { |
2794 | (*call)(); | 2797 | trace_initcall_start((*call)); |
2798 | ret = (*call)(); | ||
2799 | trace_initcall_finish((*call), ret); | ||
2795 | call++; | 2800 | call++; |
2796 | } | 2801 | } |
2797 | } | 2802 | } |
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 0b249e2f0c3c..c4f0f2e4126e 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig | |||
@@ -606,7 +606,10 @@ config HIST_TRIGGERS | |||
606 | event activity as an initial guide for further investigation | 606 | event activity as an initial guide for further investigation |
607 | using more advanced tools. | 607 | using more advanced tools. |
608 | 608 | ||
609 | See Documentation/trace/events.txt. | 609 | Inter-event tracing of quantities such as latencies is also |
610 | supported using hist triggers under this option. | ||
611 | |||
612 | See Documentation/trace/histogram.txt. | ||
610 | If in doubt, say N. | 613 | If in doubt, say N. |
611 | 614 | ||
612 | config MMIOTRACE_TEST | 615 | config MMIOTRACE_TEST |
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index eac9ce2c57a2..16bbf062018f 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c | |||
@@ -3902,14 +3902,13 @@ static bool module_exists(const char *module) | |||
3902 | { | 3902 | { |
3903 | /* All modules have the symbol __this_module */ | 3903 | /* All modules have the symbol __this_module */ |
3904 | const char this_mod[] = "__this_module"; | 3904 | const char this_mod[] = "__this_module"; |
3905 | const int modname_size = MAX_PARAM_PREFIX_LEN + sizeof(this_mod) + 1; | 3905 | char modname[MAX_PARAM_PREFIX_LEN + sizeof(this_mod) + 2]; |
3906 | char modname[modname_size + 1]; | ||
3907 | unsigned long val; | 3906 | unsigned long val; |
3908 | int n; | 3907 | int n; |
3909 | 3908 | ||
3910 | n = snprintf(modname, modname_size + 1, "%s:%s", module, this_mod); | 3909 | n = snprintf(modname, sizeof(modname), "%s:%s", module, this_mod); |
3911 | 3910 | ||
3912 | if (n > modname_size) | 3911 | if (n > sizeof(modname) - 1) |
3913 | return false; | 3912 | return false; |
3914 | 3913 | ||
3915 | val = module_kallsyms_lookup_name(modname); | 3914 | val = module_kallsyms_lookup_name(modname); |
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index dcf1c4dd3efe..c9cb9767d49b 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c | |||
@@ -22,6 +22,7 @@ | |||
22 | #include <linux/hash.h> | 22 | #include <linux/hash.h> |
23 | #include <linux/list.h> | 23 | #include <linux/list.h> |
24 | #include <linux/cpu.h> | 24 | #include <linux/cpu.h> |
25 | #include <linux/oom.h> | ||
25 | 26 | ||
26 | #include <asm/local.h> | 27 | #include <asm/local.h> |
27 | 28 | ||
@@ -41,6 +42,8 @@ int ring_buffer_print_entry_header(struct trace_seq *s) | |||
41 | RINGBUF_TYPE_PADDING); | 42 | RINGBUF_TYPE_PADDING); |
42 | trace_seq_printf(s, "\ttime_extend : type == %d\n", | 43 | trace_seq_printf(s, "\ttime_extend : type == %d\n", |
43 | RINGBUF_TYPE_TIME_EXTEND); | 44 | RINGBUF_TYPE_TIME_EXTEND); |
45 | trace_seq_printf(s, "\ttime_stamp : type == %d\n", | ||
46 | RINGBUF_TYPE_TIME_STAMP); | ||
44 | trace_seq_printf(s, "\tdata max type_len == %d\n", | 47 | trace_seq_printf(s, "\tdata max type_len == %d\n", |
45 | RINGBUF_TYPE_DATA_TYPE_LEN_MAX); | 48 | RINGBUF_TYPE_DATA_TYPE_LEN_MAX); |
46 | 49 | ||
@@ -140,12 +143,15 @@ int ring_buffer_print_entry_header(struct trace_seq *s) | |||
140 | 143 | ||
141 | enum { | 144 | enum { |
142 | RB_LEN_TIME_EXTEND = 8, | 145 | RB_LEN_TIME_EXTEND = 8, |
143 | RB_LEN_TIME_STAMP = 16, | 146 | RB_LEN_TIME_STAMP = 8, |
144 | }; | 147 | }; |
145 | 148 | ||
146 | #define skip_time_extend(event) \ | 149 | #define skip_time_extend(event) \ |
147 | ((struct ring_buffer_event *)((char *)event + RB_LEN_TIME_EXTEND)) | 150 | ((struct ring_buffer_event *)((char *)event + RB_LEN_TIME_EXTEND)) |
148 | 151 | ||
152 | #define extended_time(event) \ | ||
153 | (event->type_len >= RINGBUF_TYPE_TIME_EXTEND) | ||
154 | |||
149 | static inline int rb_null_event(struct ring_buffer_event *event) | 155 | static inline int rb_null_event(struct ring_buffer_event *event) |
150 | { | 156 | { |
151 | return event->type_len == RINGBUF_TYPE_PADDING && !event->time_delta; | 157 | return event->type_len == RINGBUF_TYPE_PADDING && !event->time_delta; |
@@ -209,7 +215,7 @@ rb_event_ts_length(struct ring_buffer_event *event) | |||
209 | { | 215 | { |
210 | unsigned len = 0; | 216 | unsigned len = 0; |
211 | 217 | ||
212 | if (event->type_len == RINGBUF_TYPE_TIME_EXTEND) { | 218 | if (extended_time(event)) { |
213 | /* time extends include the data event after it */ | 219 | /* time extends include the data event after it */ |
214 | len = RB_LEN_TIME_EXTEND; | 220 | len = RB_LEN_TIME_EXTEND; |
215 | event = skip_time_extend(event); | 221 | event = skip_time_extend(event); |
@@ -231,7 +237,7 @@ unsigned ring_buffer_event_length(struct ring_buffer_event *event) | |||
231 | { | 237 | { |
232 | unsigned length; | 238 | unsigned length; |
233 | 239 | ||
234 | if (event->type_len == RINGBUF_TYPE_TIME_EXTEND) | 240 | if (extended_time(event)) |
235 | event = skip_time_extend(event); | 241 | event = skip_time_extend(event); |
236 | 242 | ||
237 | length = rb_event_length(event); | 243 | length = rb_event_length(event); |
@@ -248,7 +254,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_event_length); | |||
248 | static __always_inline void * | 254 | static __always_inline void * |
249 | rb_event_data(struct ring_buffer_event *event) | 255 | rb_event_data(struct ring_buffer_event *event) |
250 | { | 256 | { |
251 | if (event->type_len == RINGBUF_TYPE_TIME_EXTEND) | 257 | if (extended_time(event)) |
252 | event = skip_time_extend(event); | 258 | event = skip_time_extend(event); |
253 | BUG_ON(event->type_len > RINGBUF_TYPE_DATA_TYPE_LEN_MAX); | 259 | BUG_ON(event->type_len > RINGBUF_TYPE_DATA_TYPE_LEN_MAX); |
254 | /* If length is in len field, then array[0] has the data */ | 260 | /* If length is in len field, then array[0] has the data */ |
@@ -275,6 +281,27 @@ EXPORT_SYMBOL_GPL(ring_buffer_event_data); | |||
275 | #define TS_MASK ((1ULL << TS_SHIFT) - 1) | 281 | #define TS_MASK ((1ULL << TS_SHIFT) - 1) |
276 | #define TS_DELTA_TEST (~TS_MASK) | 282 | #define TS_DELTA_TEST (~TS_MASK) |
277 | 283 | ||
284 | /** | ||
285 | * ring_buffer_event_time_stamp - return the event's extended timestamp | ||
286 | * @event: the event to get the timestamp of | ||
287 | * | ||
288 | * Returns the extended timestamp associated with a data event. | ||
289 | * An extended time_stamp is a 64-bit timestamp represented | ||
290 | * internally in a special way that makes the best use of space | ||
291 | * contained within a ring buffer event. This function decodes | ||
292 | * it and maps it to a straight u64 value. | ||
293 | */ | ||
294 | u64 ring_buffer_event_time_stamp(struct ring_buffer_event *event) | ||
295 | { | ||
296 | u64 ts; | ||
297 | |||
298 | ts = event->array[0]; | ||
299 | ts <<= TS_SHIFT; | ||
300 | ts += event->time_delta; | ||
301 | |||
302 | return ts; | ||
303 | } | ||
304 | |||
278 | /* Flag when events were overwritten */ | 305 | /* Flag when events were overwritten */ |
279 | #define RB_MISSED_EVENTS (1 << 31) | 306 | #define RB_MISSED_EVENTS (1 << 31) |
280 | /* Missed count stored at end */ | 307 | /* Missed count stored at end */ |
@@ -451,6 +478,7 @@ struct ring_buffer_per_cpu { | |||
451 | struct buffer_page *reader_page; | 478 | struct buffer_page *reader_page; |
452 | unsigned long lost_events; | 479 | unsigned long lost_events; |
453 | unsigned long last_overrun; | 480 | unsigned long last_overrun; |
481 | unsigned long nest; | ||
454 | local_t entries_bytes; | 482 | local_t entries_bytes; |
455 | local_t entries; | 483 | local_t entries; |
456 | local_t overrun; | 484 | local_t overrun; |
@@ -488,6 +516,7 @@ struct ring_buffer { | |||
488 | u64 (*clock)(void); | 516 | u64 (*clock)(void); |
489 | 517 | ||
490 | struct rb_irq_work irq_work; | 518 | struct rb_irq_work irq_work; |
519 | bool time_stamp_abs; | ||
491 | }; | 520 | }; |
492 | 521 | ||
493 | struct ring_buffer_iter { | 522 | struct ring_buffer_iter { |
@@ -1134,30 +1163,60 @@ static int rb_check_pages(struct ring_buffer_per_cpu *cpu_buffer) | |||
1134 | static int __rb_allocate_pages(long nr_pages, struct list_head *pages, int cpu) | 1163 | static int __rb_allocate_pages(long nr_pages, struct list_head *pages, int cpu) |
1135 | { | 1164 | { |
1136 | struct buffer_page *bpage, *tmp; | 1165 | struct buffer_page *bpage, *tmp; |
1166 | bool user_thread = current->mm != NULL; | ||
1167 | gfp_t mflags; | ||
1137 | long i; | 1168 | long i; |
1138 | 1169 | ||
1170 | /* | ||
1171 | * Check if the available memory is there first. | ||
1172 | * Note, si_mem_available() only gives us a rough estimate of available | ||
1173 | * memory. It may not be accurate. But we don't care, we just want | ||
1174 | * to prevent doing any allocation when it is obvious that it is | ||
1175 | * not going to succeed. | ||
1176 | */ | ||
1177 | i = si_mem_available(); | ||
1178 | if (i < nr_pages) | ||
1179 | return -ENOMEM; | ||
1180 | |||
1181 | /* | ||
1182 | * __GFP_RETRY_MAYFAIL flag makes sure that the allocation fails | ||
1183 | * gracefully without invoking oom-killer and the system is not | ||
1184 | * destabilized. | ||
1185 | */ | ||
1186 | mflags = GFP_KERNEL | __GFP_RETRY_MAYFAIL; | ||
1187 | |||
1188 | /* | ||
1189 | * If a user thread allocates too much, and si_mem_available() | ||
1190 | * reports there's enough memory, even though there is not. | ||
1191 | * Make sure the OOM killer kills this thread. This can happen | ||
1192 | * even with RETRY_MAYFAIL because another task may be doing | ||
1193 | * an allocation after this task has taken all memory. | ||
1194 | * This is the task the OOM killer needs to take out during this | ||
1195 | * loop, even if it was triggered by an allocation somewhere else. | ||
1196 | */ | ||
1197 | if (user_thread) | ||
1198 | set_current_oom_origin(); | ||
1139 | for (i = 0; i < nr_pages; i++) { | 1199 | for (i = 0; i < nr_pages; i++) { |
1140 | struct page *page; | 1200 | struct page *page; |
1141 | /* | 1201 | |
1142 | * __GFP_RETRY_MAYFAIL flag makes sure that the allocation fails | ||
1143 | * gracefully without invoking oom-killer and the system is not | ||
1144 | * destabilized. | ||
1145 | */ | ||
1146 | bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()), | 1202 | bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()), |
1147 | GFP_KERNEL | __GFP_RETRY_MAYFAIL, | 1203 | mflags, cpu_to_node(cpu)); |
1148 | cpu_to_node(cpu)); | ||
1149 | if (!bpage) | 1204 | if (!bpage) |
1150 | goto free_pages; | 1205 | goto free_pages; |
1151 | 1206 | ||
1152 | list_add(&bpage->list, pages); | 1207 | list_add(&bpage->list, pages); |
1153 | 1208 | ||
1154 | page = alloc_pages_node(cpu_to_node(cpu), | 1209 | page = alloc_pages_node(cpu_to_node(cpu), mflags, 0); |
1155 | GFP_KERNEL | __GFP_RETRY_MAYFAIL, 0); | ||
1156 | if (!page) | 1210 | if (!page) |
1157 | goto free_pages; | 1211 | goto free_pages; |
1158 | bpage->page = page_address(page); | 1212 | bpage->page = page_address(page); |
1159 | rb_init_page(bpage->page); | 1213 | rb_init_page(bpage->page); |
1214 | |||
1215 | if (user_thread && fatal_signal_pending(current)) | ||
1216 | goto free_pages; | ||
1160 | } | 1217 | } |
1218 | if (user_thread) | ||
1219 | clear_current_oom_origin(); | ||
1161 | 1220 | ||
1162 | return 0; | 1221 | return 0; |
1163 | 1222 | ||
@@ -1166,6 +1225,8 @@ free_pages: | |||
1166 | list_del_init(&bpage->list); | 1225 | list_del_init(&bpage->list); |
1167 | free_buffer_page(bpage); | 1226 | free_buffer_page(bpage); |
1168 | } | 1227 | } |
1228 | if (user_thread) | ||
1229 | clear_current_oom_origin(); | ||
1169 | 1230 | ||
1170 | return -ENOMEM; | 1231 | return -ENOMEM; |
1171 | } | 1232 | } |
@@ -1382,6 +1443,16 @@ void ring_buffer_set_clock(struct ring_buffer *buffer, | |||
1382 | buffer->clock = clock; | 1443 | buffer->clock = clock; |
1383 | } | 1444 | } |
1384 | 1445 | ||
1446 | void ring_buffer_set_time_stamp_abs(struct ring_buffer *buffer, bool abs) | ||
1447 | { | ||
1448 | buffer->time_stamp_abs = abs; | ||
1449 | } | ||
1450 | |||
1451 | bool ring_buffer_time_stamp_abs(struct ring_buffer *buffer) | ||
1452 | { | ||
1453 | return buffer->time_stamp_abs; | ||
1454 | } | ||
1455 | |||
1385 | static void rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer); | 1456 | static void rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer); |
1386 | 1457 | ||
1387 | static inline unsigned long rb_page_entries(struct buffer_page *bpage) | 1458 | static inline unsigned long rb_page_entries(struct buffer_page *bpage) |
@@ -2206,12 +2277,15 @@ rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer, | |||
2206 | 2277 | ||
2207 | /* Slow path, do not inline */ | 2278 | /* Slow path, do not inline */ |
2208 | static noinline struct ring_buffer_event * | 2279 | static noinline struct ring_buffer_event * |
2209 | rb_add_time_stamp(struct ring_buffer_event *event, u64 delta) | 2280 | rb_add_time_stamp(struct ring_buffer_event *event, u64 delta, bool abs) |
2210 | { | 2281 | { |
2211 | event->type_len = RINGBUF_TYPE_TIME_EXTEND; | 2282 | if (abs) |
2283 | event->type_len = RINGBUF_TYPE_TIME_STAMP; | ||
2284 | else | ||
2285 | event->type_len = RINGBUF_TYPE_TIME_EXTEND; | ||
2212 | 2286 | ||
2213 | /* Not the first event on the page? */ | 2287 | /* Not the first event on the page, or not delta? */ |
2214 | if (rb_event_index(event)) { | 2288 | if (abs || rb_event_index(event)) { |
2215 | event->time_delta = delta & TS_MASK; | 2289 | event->time_delta = delta & TS_MASK; |
2216 | event->array[0] = delta >> TS_SHIFT; | 2290 | event->array[0] = delta >> TS_SHIFT; |
2217 | } else { | 2291 | } else { |
@@ -2254,7 +2328,9 @@ rb_update_event(struct ring_buffer_per_cpu *cpu_buffer, | |||
2254 | * add it to the start of the resevered space. | 2328 | * add it to the start of the resevered space. |
2255 | */ | 2329 | */ |
2256 | if (unlikely(info->add_timestamp)) { | 2330 | if (unlikely(info->add_timestamp)) { |
2257 | event = rb_add_time_stamp(event, delta); | 2331 | bool abs = ring_buffer_time_stamp_abs(cpu_buffer->buffer); |
2332 | |||
2333 | event = rb_add_time_stamp(event, info->delta, abs); | ||
2258 | length -= RB_LEN_TIME_EXTEND; | 2334 | length -= RB_LEN_TIME_EXTEND; |
2259 | delta = 0; | 2335 | delta = 0; |
2260 | } | 2336 | } |
@@ -2442,7 +2518,7 @@ static __always_inline void rb_end_commit(struct ring_buffer_per_cpu *cpu_buffer | |||
2442 | 2518 | ||
2443 | static inline void rb_event_discard(struct ring_buffer_event *event) | 2519 | static inline void rb_event_discard(struct ring_buffer_event *event) |
2444 | { | 2520 | { |
2445 | if (event->type_len == RINGBUF_TYPE_TIME_EXTEND) | 2521 | if (extended_time(event)) |
2446 | event = skip_time_extend(event); | 2522 | event = skip_time_extend(event); |
2447 | 2523 | ||
2448 | /* array[0] holds the actual length for the discarded event */ | 2524 | /* array[0] holds the actual length for the discarded event */ |
@@ -2486,10 +2562,11 @@ rb_update_write_stamp(struct ring_buffer_per_cpu *cpu_buffer, | |||
2486 | cpu_buffer->write_stamp = | 2562 | cpu_buffer->write_stamp = |
2487 | cpu_buffer->commit_page->page->time_stamp; | 2563 | cpu_buffer->commit_page->page->time_stamp; |
2488 | else if (event->type_len == RINGBUF_TYPE_TIME_EXTEND) { | 2564 | else if (event->type_len == RINGBUF_TYPE_TIME_EXTEND) { |
2489 | delta = event->array[0]; | 2565 | delta = ring_buffer_event_time_stamp(event); |
2490 | delta <<= TS_SHIFT; | ||
2491 | delta += event->time_delta; | ||
2492 | cpu_buffer->write_stamp += delta; | 2566 | cpu_buffer->write_stamp += delta; |
2567 | } else if (event->type_len == RINGBUF_TYPE_TIME_STAMP) { | ||
2568 | delta = ring_buffer_event_time_stamp(event); | ||
2569 | cpu_buffer->write_stamp = delta; | ||
2493 | } else | 2570 | } else |
2494 | cpu_buffer->write_stamp += event->time_delta; | 2571 | cpu_buffer->write_stamp += event->time_delta; |
2495 | } | 2572 | } |
@@ -2581,10 +2658,10 @@ trace_recursive_lock(struct ring_buffer_per_cpu *cpu_buffer) | |||
2581 | bit = pc & NMI_MASK ? RB_CTX_NMI : | 2658 | bit = pc & NMI_MASK ? RB_CTX_NMI : |
2582 | pc & HARDIRQ_MASK ? RB_CTX_IRQ : RB_CTX_SOFTIRQ; | 2659 | pc & HARDIRQ_MASK ? RB_CTX_IRQ : RB_CTX_SOFTIRQ; |
2583 | 2660 | ||
2584 | if (unlikely(val & (1 << bit))) | 2661 | if (unlikely(val & (1 << (bit + cpu_buffer->nest)))) |
2585 | return 1; | 2662 | return 1; |
2586 | 2663 | ||
2587 | val |= (1 << bit); | 2664 | val |= (1 << (bit + cpu_buffer->nest)); |
2588 | cpu_buffer->current_context = val; | 2665 | cpu_buffer->current_context = val; |
2589 | 2666 | ||
2590 | return 0; | 2667 | return 0; |
@@ -2593,7 +2670,57 @@ trace_recursive_lock(struct ring_buffer_per_cpu *cpu_buffer) | |||
2593 | static __always_inline void | 2670 | static __always_inline void |
2594 | trace_recursive_unlock(struct ring_buffer_per_cpu *cpu_buffer) | 2671 | trace_recursive_unlock(struct ring_buffer_per_cpu *cpu_buffer) |
2595 | { | 2672 | { |
2596 | cpu_buffer->current_context &= cpu_buffer->current_context - 1; | 2673 | cpu_buffer->current_context &= |
2674 | cpu_buffer->current_context - (1 << cpu_buffer->nest); | ||
2675 | } | ||
2676 | |||
2677 | /* The recursive locking above uses 4 bits */ | ||
2678 | #define NESTED_BITS 4 | ||
2679 | |||
2680 | /** | ||
2681 | * ring_buffer_nest_start - Allow to trace while nested | ||
2682 | * @buffer: The ring buffer to modify | ||
2683 | * | ||
2684 | * The ring buffer has a safty mechanism to prevent recursion. | ||
2685 | * But there may be a case where a trace needs to be done while | ||
2686 | * tracing something else. In this case, calling this function | ||
2687 | * will allow this function to nest within a currently active | ||
2688 | * ring_buffer_lock_reserve(). | ||
2689 | * | ||
2690 | * Call this function before calling another ring_buffer_lock_reserve() and | ||
2691 | * call ring_buffer_nest_end() after the nested ring_buffer_unlock_commit(). | ||
2692 | */ | ||
2693 | void ring_buffer_nest_start(struct ring_buffer *buffer) | ||
2694 | { | ||
2695 | struct ring_buffer_per_cpu *cpu_buffer; | ||
2696 | int cpu; | ||
2697 | |||
2698 | /* Enabled by ring_buffer_nest_end() */ | ||
2699 | preempt_disable_notrace(); | ||
2700 | cpu = raw_smp_processor_id(); | ||
2701 | cpu_buffer = buffer->buffers[cpu]; | ||
2702 | /* This is the shift value for the above recusive locking */ | ||
2703 | cpu_buffer->nest += NESTED_BITS; | ||
2704 | } | ||
2705 | |||
2706 | /** | ||
2707 | * ring_buffer_nest_end - Allow to trace while nested | ||
2708 | * @buffer: The ring buffer to modify | ||
2709 | * | ||
2710 | * Must be called after ring_buffer_nest_start() and after the | ||
2711 | * ring_buffer_unlock_commit(). | ||
2712 | */ | ||
2713 | void ring_buffer_nest_end(struct ring_buffer *buffer) | ||
2714 | { | ||
2715 | struct ring_buffer_per_cpu *cpu_buffer; | ||
2716 | int cpu; | ||
2717 | |||
2718 | /* disabled by ring_buffer_nest_start() */ | ||
2719 | cpu = raw_smp_processor_id(); | ||
2720 | cpu_buffer = buffer->buffers[cpu]; | ||
2721 | /* This is the shift value for the above recusive locking */ | ||
2722 | cpu_buffer->nest -= NESTED_BITS; | ||
2723 | preempt_enable_notrace(); | ||
2597 | } | 2724 | } |
2598 | 2725 | ||
2599 | /** | 2726 | /** |
@@ -2637,7 +2764,8 @@ rb_handle_timestamp(struct ring_buffer_per_cpu *cpu_buffer, | |||
2637 | sched_clock_stable() ? "" : | 2764 | sched_clock_stable() ? "" : |
2638 | "If you just came from a suspend/resume,\n" | 2765 | "If you just came from a suspend/resume,\n" |
2639 | "please switch to the trace global clock:\n" | 2766 | "please switch to the trace global clock:\n" |
2640 | " echo global > /sys/kernel/debug/tracing/trace_clock\n"); | 2767 | " echo global > /sys/kernel/debug/tracing/trace_clock\n" |
2768 | "or add trace_clock=global to the kernel command line\n"); | ||
2641 | info->add_timestamp = 1; | 2769 | info->add_timestamp = 1; |
2642 | } | 2770 | } |
2643 | 2771 | ||
@@ -2669,7 +2797,7 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, | |||
2669 | * If this is the first commit on the page, then it has the same | 2797 | * If this is the first commit on the page, then it has the same |
2670 | * timestamp as the page itself. | 2798 | * timestamp as the page itself. |
2671 | */ | 2799 | */ |
2672 | if (!tail) | 2800 | if (!tail && !ring_buffer_time_stamp_abs(cpu_buffer->buffer)) |
2673 | info->delta = 0; | 2801 | info->delta = 0; |
2674 | 2802 | ||
2675 | /* See if we shot pass the end of this buffer page */ | 2803 | /* See if we shot pass the end of this buffer page */ |
@@ -2746,8 +2874,11 @@ rb_reserve_next_event(struct ring_buffer *buffer, | |||
2746 | /* make sure this diff is calculated here */ | 2874 | /* make sure this diff is calculated here */ |
2747 | barrier(); | 2875 | barrier(); |
2748 | 2876 | ||
2749 | /* Did the write stamp get updated already? */ | 2877 | if (ring_buffer_time_stamp_abs(buffer)) { |
2750 | if (likely(info.ts >= cpu_buffer->write_stamp)) { | 2878 | info.delta = info.ts; |
2879 | rb_handle_timestamp(cpu_buffer, &info); | ||
2880 | } else /* Did the write stamp get updated already? */ | ||
2881 | if (likely(info.ts >= cpu_buffer->write_stamp)) { | ||
2751 | info.delta = diff; | 2882 | info.delta = diff; |
2752 | if (unlikely(test_time_stamp(info.delta))) | 2883 | if (unlikely(test_time_stamp(info.delta))) |
2753 | rb_handle_timestamp(cpu_buffer, &info); | 2884 | rb_handle_timestamp(cpu_buffer, &info); |
@@ -3429,14 +3560,13 @@ rb_update_read_stamp(struct ring_buffer_per_cpu *cpu_buffer, | |||
3429 | return; | 3560 | return; |
3430 | 3561 | ||
3431 | case RINGBUF_TYPE_TIME_EXTEND: | 3562 | case RINGBUF_TYPE_TIME_EXTEND: |
3432 | delta = event->array[0]; | 3563 | delta = ring_buffer_event_time_stamp(event); |
3433 | delta <<= TS_SHIFT; | ||
3434 | delta += event->time_delta; | ||
3435 | cpu_buffer->read_stamp += delta; | 3564 | cpu_buffer->read_stamp += delta; |
3436 | return; | 3565 | return; |
3437 | 3566 | ||
3438 | case RINGBUF_TYPE_TIME_STAMP: | 3567 | case RINGBUF_TYPE_TIME_STAMP: |
3439 | /* FIXME: not implemented */ | 3568 | delta = ring_buffer_event_time_stamp(event); |
3569 | cpu_buffer->read_stamp = delta; | ||
3440 | return; | 3570 | return; |
3441 | 3571 | ||
3442 | case RINGBUF_TYPE_DATA: | 3572 | case RINGBUF_TYPE_DATA: |
@@ -3460,14 +3590,13 @@ rb_update_iter_read_stamp(struct ring_buffer_iter *iter, | |||
3460 | return; | 3590 | return; |
3461 | 3591 | ||
3462 | case RINGBUF_TYPE_TIME_EXTEND: | 3592 | case RINGBUF_TYPE_TIME_EXTEND: |
3463 | delta = event->array[0]; | 3593 | delta = ring_buffer_event_time_stamp(event); |
3464 | delta <<= TS_SHIFT; | ||
3465 | delta += event->time_delta; | ||
3466 | iter->read_stamp += delta; | 3594 | iter->read_stamp += delta; |
3467 | return; | 3595 | return; |
3468 | 3596 | ||
3469 | case RINGBUF_TYPE_TIME_STAMP: | 3597 | case RINGBUF_TYPE_TIME_STAMP: |
3470 | /* FIXME: not implemented */ | 3598 | delta = ring_buffer_event_time_stamp(event); |
3599 | iter->read_stamp = delta; | ||
3471 | return; | 3600 | return; |
3472 | 3601 | ||
3473 | case RINGBUF_TYPE_DATA: | 3602 | case RINGBUF_TYPE_DATA: |
@@ -3691,6 +3820,8 @@ rb_buffer_peek(struct ring_buffer_per_cpu *cpu_buffer, u64 *ts, | |||
3691 | struct buffer_page *reader; | 3820 | struct buffer_page *reader; |
3692 | int nr_loops = 0; | 3821 | int nr_loops = 0; |
3693 | 3822 | ||
3823 | if (ts) | ||
3824 | *ts = 0; | ||
3694 | again: | 3825 | again: |
3695 | /* | 3826 | /* |
3696 | * We repeat when a time extend is encountered. | 3827 | * We repeat when a time extend is encountered. |
@@ -3727,12 +3858,17 @@ rb_buffer_peek(struct ring_buffer_per_cpu *cpu_buffer, u64 *ts, | |||
3727 | goto again; | 3858 | goto again; |
3728 | 3859 | ||
3729 | case RINGBUF_TYPE_TIME_STAMP: | 3860 | case RINGBUF_TYPE_TIME_STAMP: |
3730 | /* FIXME: not implemented */ | 3861 | if (ts) { |
3862 | *ts = ring_buffer_event_time_stamp(event); | ||
3863 | ring_buffer_normalize_time_stamp(cpu_buffer->buffer, | ||
3864 | cpu_buffer->cpu, ts); | ||
3865 | } | ||
3866 | /* Internal data, OK to advance */ | ||
3731 | rb_advance_reader(cpu_buffer); | 3867 | rb_advance_reader(cpu_buffer); |
3732 | goto again; | 3868 | goto again; |
3733 | 3869 | ||
3734 | case RINGBUF_TYPE_DATA: | 3870 | case RINGBUF_TYPE_DATA: |
3735 | if (ts) { | 3871 | if (ts && !(*ts)) { |
3736 | *ts = cpu_buffer->read_stamp + event->time_delta; | 3872 | *ts = cpu_buffer->read_stamp + event->time_delta; |
3737 | ring_buffer_normalize_time_stamp(cpu_buffer->buffer, | 3873 | ring_buffer_normalize_time_stamp(cpu_buffer->buffer, |
3738 | cpu_buffer->cpu, ts); | 3874 | cpu_buffer->cpu, ts); |
@@ -3757,6 +3893,9 @@ rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts) | |||
3757 | struct ring_buffer_event *event; | 3893 | struct ring_buffer_event *event; |
3758 | int nr_loops = 0; | 3894 | int nr_loops = 0; |
3759 | 3895 | ||
3896 | if (ts) | ||
3897 | *ts = 0; | ||
3898 | |||
3760 | cpu_buffer = iter->cpu_buffer; | 3899 | cpu_buffer = iter->cpu_buffer; |
3761 | buffer = cpu_buffer->buffer; | 3900 | buffer = cpu_buffer->buffer; |
3762 | 3901 | ||
@@ -3809,12 +3948,17 @@ rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts) | |||
3809 | goto again; | 3948 | goto again; |
3810 | 3949 | ||
3811 | case RINGBUF_TYPE_TIME_STAMP: | 3950 | case RINGBUF_TYPE_TIME_STAMP: |
3812 | /* FIXME: not implemented */ | 3951 | if (ts) { |
3952 | *ts = ring_buffer_event_time_stamp(event); | ||
3953 | ring_buffer_normalize_time_stamp(cpu_buffer->buffer, | ||
3954 | cpu_buffer->cpu, ts); | ||
3955 | } | ||
3956 | /* Internal data, OK to advance */ | ||
3813 | rb_advance_iter(iter); | 3957 | rb_advance_iter(iter); |
3814 | goto again; | 3958 | goto again; |
3815 | 3959 | ||
3816 | case RINGBUF_TYPE_DATA: | 3960 | case RINGBUF_TYPE_DATA: |
3817 | if (ts) { | 3961 | if (ts && !(*ts)) { |
3818 | *ts = iter->read_stamp + event->time_delta; | 3962 | *ts = iter->read_stamp + event->time_delta; |
3819 | ring_buffer_normalize_time_stamp(buffer, | 3963 | ring_buffer_normalize_time_stamp(buffer, |
3820 | cpu_buffer->cpu, ts); | 3964 | cpu_buffer->cpu, ts); |
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 5071931eb943..dfbcf9ee1447 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c | |||
@@ -41,6 +41,7 @@ | |||
41 | #include <linux/nmi.h> | 41 | #include <linux/nmi.h> |
42 | #include <linux/fs.h> | 42 | #include <linux/fs.h> |
43 | #include <linux/trace.h> | 43 | #include <linux/trace.h> |
44 | #include <linux/sched/clock.h> | ||
44 | #include <linux/sched/rt.h> | 45 | #include <linux/sched/rt.h> |
45 | 46 | ||
46 | #include "trace.h" | 47 | #include "trace.h" |
@@ -1168,6 +1169,14 @@ static struct { | |||
1168 | ARCH_TRACE_CLOCKS | 1169 | ARCH_TRACE_CLOCKS |
1169 | }; | 1170 | }; |
1170 | 1171 | ||
1172 | bool trace_clock_in_ns(struct trace_array *tr) | ||
1173 | { | ||
1174 | if (trace_clocks[tr->clock_id].in_ns) | ||
1175 | return true; | ||
1176 | |||
1177 | return false; | ||
1178 | } | ||
1179 | |||
1171 | /* | 1180 | /* |
1172 | * trace_parser_get_init - gets the buffer for trace parser | 1181 | * trace_parser_get_init - gets the buffer for trace parser |
1173 | */ | 1182 | */ |
@@ -2269,7 +2278,7 @@ trace_event_buffer_lock_reserve(struct ring_buffer **current_rb, | |||
2269 | 2278 | ||
2270 | *current_rb = trace_file->tr->trace_buffer.buffer; | 2279 | *current_rb = trace_file->tr->trace_buffer.buffer; |
2271 | 2280 | ||
2272 | if ((trace_file->flags & | 2281 | if (!ring_buffer_time_stamp_abs(*current_rb) && (trace_file->flags & |
2273 | (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) && | 2282 | (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) && |
2274 | (entry = this_cpu_read(trace_buffered_event))) { | 2283 | (entry = this_cpu_read(trace_buffered_event))) { |
2275 | /* Try to use the per cpu buffer first */ | 2284 | /* Try to use the per cpu buffer first */ |
@@ -4515,6 +4524,9 @@ static const char readme_msg[] = | |||
4515 | #ifdef CONFIG_X86_64 | 4524 | #ifdef CONFIG_X86_64 |
4516 | " x86-tsc: TSC cycle counter\n" | 4525 | " x86-tsc: TSC cycle counter\n" |
4517 | #endif | 4526 | #endif |
4527 | "\n timestamp_mode\t-view the mode used to timestamp events\n" | ||
4528 | " delta: Delta difference against a buffer-wide timestamp\n" | ||
4529 | " absolute: Absolute (standalone) timestamp\n" | ||
4518 | "\n trace_marker\t\t- Writes into this file writes into the kernel buffer\n" | 4530 | "\n trace_marker\t\t- Writes into this file writes into the kernel buffer\n" |
4519 | "\n trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n" | 4531 | "\n trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n" |
4520 | " tracing_cpumask\t- Limit which CPUs to trace\n" | 4532 | " tracing_cpumask\t- Limit which CPUs to trace\n" |
@@ -4691,8 +4703,9 @@ static const char readme_msg[] = | |||
4691 | "\t .sym display an address as a symbol\n" | 4703 | "\t .sym display an address as a symbol\n" |
4692 | "\t .sym-offset display an address as a symbol and offset\n" | 4704 | "\t .sym-offset display an address as a symbol and offset\n" |
4693 | "\t .execname display a common_pid as a program name\n" | 4705 | "\t .execname display a common_pid as a program name\n" |
4694 | "\t .syscall display a syscall id as a syscall name\n\n" | 4706 | "\t .syscall display a syscall id as a syscall name\n" |
4695 | "\t .log2 display log2 value rather than raw number\n\n" | 4707 | "\t .log2 display log2 value rather than raw number\n" |
4708 | "\t .usecs display a common_timestamp in microseconds\n\n" | ||
4696 | "\t The 'pause' parameter can be used to pause an existing hist\n" | 4709 | "\t The 'pause' parameter can be used to pause an existing hist\n" |
4697 | "\t trigger or to start a hist trigger but not log any events\n" | 4710 | "\t trigger or to start a hist trigger but not log any events\n" |
4698 | "\t until told to do so. 'continue' can be used to start or\n" | 4711 | "\t until told to do so. 'continue' can be used to start or\n" |
@@ -6202,7 +6215,7 @@ static int tracing_clock_show(struct seq_file *m, void *v) | |||
6202 | return 0; | 6215 | return 0; |
6203 | } | 6216 | } |
6204 | 6217 | ||
6205 | static int tracing_set_clock(struct trace_array *tr, const char *clockstr) | 6218 | int tracing_set_clock(struct trace_array *tr, const char *clockstr) |
6206 | { | 6219 | { |
6207 | int i; | 6220 | int i; |
6208 | 6221 | ||
@@ -6282,6 +6295,71 @@ static int tracing_clock_open(struct inode *inode, struct file *file) | |||
6282 | return ret; | 6295 | return ret; |
6283 | } | 6296 | } |
6284 | 6297 | ||
6298 | static int tracing_time_stamp_mode_show(struct seq_file *m, void *v) | ||
6299 | { | ||
6300 | struct trace_array *tr = m->private; | ||
6301 | |||
6302 | mutex_lock(&trace_types_lock); | ||
6303 | |||
6304 | if (ring_buffer_time_stamp_abs(tr->trace_buffer.buffer)) | ||
6305 | seq_puts(m, "delta [absolute]\n"); | ||
6306 | else | ||
6307 | seq_puts(m, "[delta] absolute\n"); | ||
6308 | |||
6309 | mutex_unlock(&trace_types_lock); | ||
6310 | |||
6311 | return 0; | ||
6312 | } | ||
6313 | |||
6314 | static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file) | ||
6315 | { | ||
6316 | struct trace_array *tr = inode->i_private; | ||
6317 | int ret; | ||
6318 | |||
6319 | if (tracing_disabled) | ||
6320 | return -ENODEV; | ||
6321 | |||
6322 | if (trace_array_get(tr)) | ||
6323 | return -ENODEV; | ||
6324 | |||
6325 | ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private); | ||
6326 | if (ret < 0) | ||
6327 | trace_array_put(tr); | ||
6328 | |||
6329 | return ret; | ||
6330 | } | ||
6331 | |||
6332 | int tracing_set_time_stamp_abs(struct trace_array *tr, bool abs) | ||
6333 | { | ||
6334 | int ret = 0; | ||
6335 | |||
6336 | mutex_lock(&trace_types_lock); | ||
6337 | |||
6338 | if (abs && tr->time_stamp_abs_ref++) | ||
6339 | goto out; | ||
6340 | |||
6341 | if (!abs) { | ||
6342 | if (WARN_ON_ONCE(!tr->time_stamp_abs_ref)) { | ||
6343 | ret = -EINVAL; | ||
6344 | goto out; | ||
6345 | } | ||
6346 | |||
6347 | if (--tr->time_stamp_abs_ref) | ||
6348 | goto out; | ||
6349 | } | ||
6350 | |||
6351 | ring_buffer_set_time_stamp_abs(tr->trace_buffer.buffer, abs); | ||
6352 | |||
6353 | #ifdef CONFIG_TRACER_MAX_TRACE | ||
6354 | if (tr->max_buffer.buffer) | ||
6355 | ring_buffer_set_time_stamp_abs(tr->max_buffer.buffer, abs); | ||
6356 | #endif | ||
6357 | out: | ||
6358 | mutex_unlock(&trace_types_lock); | ||
6359 | |||
6360 | return ret; | ||
6361 | } | ||
6362 | |||
6285 | struct ftrace_buffer_info { | 6363 | struct ftrace_buffer_info { |
6286 | struct trace_iterator iter; | 6364 | struct trace_iterator iter; |
6287 | void *spare; | 6365 | void *spare; |
@@ -6529,6 +6607,13 @@ static const struct file_operations trace_clock_fops = { | |||
6529 | .write = tracing_clock_write, | 6607 | .write = tracing_clock_write, |
6530 | }; | 6608 | }; |
6531 | 6609 | ||
6610 | static const struct file_operations trace_time_stamp_mode_fops = { | ||
6611 | .open = tracing_time_stamp_mode_open, | ||
6612 | .read = seq_read, | ||
6613 | .llseek = seq_lseek, | ||
6614 | .release = tracing_single_release_tr, | ||
6615 | }; | ||
6616 | |||
6532 | #ifdef CONFIG_TRACER_SNAPSHOT | 6617 | #ifdef CONFIG_TRACER_SNAPSHOT |
6533 | static const struct file_operations snapshot_fops = { | 6618 | static const struct file_operations snapshot_fops = { |
6534 | .open = tracing_snapshot_open, | 6619 | .open = tracing_snapshot_open, |
@@ -7699,6 +7784,7 @@ static int instance_mkdir(const char *name) | |||
7699 | 7784 | ||
7700 | INIT_LIST_HEAD(&tr->systems); | 7785 | INIT_LIST_HEAD(&tr->systems); |
7701 | INIT_LIST_HEAD(&tr->events); | 7786 | INIT_LIST_HEAD(&tr->events); |
7787 | INIT_LIST_HEAD(&tr->hist_vars); | ||
7702 | 7788 | ||
7703 | if (allocate_trace_buffers(tr, trace_buf_size) < 0) | 7789 | if (allocate_trace_buffers(tr, trace_buf_size) < 0) |
7704 | goto out_free_tr; | 7790 | goto out_free_tr; |
@@ -7851,6 +7937,9 @@ init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer) | |||
7851 | trace_create_file("tracing_on", 0644, d_tracer, | 7937 | trace_create_file("tracing_on", 0644, d_tracer, |
7852 | tr, &rb_simple_fops); | 7938 | tr, &rb_simple_fops); |
7853 | 7939 | ||
7940 | trace_create_file("timestamp_mode", 0444, d_tracer, tr, | ||
7941 | &trace_time_stamp_mode_fops); | ||
7942 | |||
7854 | create_trace_options_dir(tr); | 7943 | create_trace_options_dir(tr); |
7855 | 7944 | ||
7856 | #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER) | 7945 | #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER) |
@@ -8446,6 +8535,7 @@ __init static int tracer_alloc_buffers(void) | |||
8446 | 8535 | ||
8447 | INIT_LIST_HEAD(&global_trace.systems); | 8536 | INIT_LIST_HEAD(&global_trace.systems); |
8448 | INIT_LIST_HEAD(&global_trace.events); | 8537 | INIT_LIST_HEAD(&global_trace.events); |
8538 | INIT_LIST_HEAD(&global_trace.hist_vars); | ||
8449 | list_add(&global_trace.list, &ftrace_trace_arrays); | 8539 | list_add(&global_trace.list, &ftrace_trace_arrays); |
8450 | 8540 | ||
8451 | apply_trace_boot_options(); | 8541 | apply_trace_boot_options(); |
@@ -8507,3 +8597,21 @@ __init static int clear_boot_tracer(void) | |||
8507 | 8597 | ||
8508 | fs_initcall(tracer_init_tracefs); | 8598 | fs_initcall(tracer_init_tracefs); |
8509 | late_initcall_sync(clear_boot_tracer); | 8599 | late_initcall_sync(clear_boot_tracer); |
8600 | |||
8601 | #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK | ||
8602 | __init static int tracing_set_default_clock(void) | ||
8603 | { | ||
8604 | /* sched_clock_stable() is determined in late_initcall */ | ||
8605 | if (!trace_boot_clock && !sched_clock_stable()) { | ||
8606 | printk(KERN_WARNING | ||
8607 | "Unstable clock detected, switching default tracing clock to \"global\"\n" | ||
8608 | "If you want to keep using the local clock, then add:\n" | ||
8609 | " \"trace_clock=local\"\n" | ||
8610 | "on the kernel command line\n"); | ||
8611 | tracing_set_clock(&global_trace, "global"); | ||
8612 | } | ||
8613 | |||
8614 | return 0; | ||
8615 | } | ||
8616 | late_initcall_sync(tracing_set_default_clock); | ||
8617 | #endif | ||
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 2a6d0325a761..6fb46a06c9dc 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h | |||
@@ -273,6 +273,8 @@ struct trace_array { | |||
273 | /* function tracing enabled */ | 273 | /* function tracing enabled */ |
274 | int function_enabled; | 274 | int function_enabled; |
275 | #endif | 275 | #endif |
276 | int time_stamp_abs_ref; | ||
277 | struct list_head hist_vars; | ||
276 | }; | 278 | }; |
277 | 279 | ||
278 | enum { | 280 | enum { |
@@ -286,6 +288,11 @@ extern struct mutex trace_types_lock; | |||
286 | extern int trace_array_get(struct trace_array *tr); | 288 | extern int trace_array_get(struct trace_array *tr); |
287 | extern void trace_array_put(struct trace_array *tr); | 289 | extern void trace_array_put(struct trace_array *tr); |
288 | 290 | ||
291 | extern int tracing_set_time_stamp_abs(struct trace_array *tr, bool abs); | ||
292 | extern int tracing_set_clock(struct trace_array *tr, const char *clockstr); | ||
293 | |||
294 | extern bool trace_clock_in_ns(struct trace_array *tr); | ||
295 | |||
289 | /* | 296 | /* |
290 | * The global tracer (top) should be the first trace array added, | 297 | * The global tracer (top) should be the first trace array added, |
291 | * but we check the flag anyway. | 298 | * but we check the flag anyway. |
@@ -1209,12 +1216,11 @@ struct ftrace_event_field { | |||
1209 | int is_signed; | 1216 | int is_signed; |
1210 | }; | 1217 | }; |
1211 | 1218 | ||
1219 | struct prog_entry; | ||
1220 | |||
1212 | struct event_filter { | 1221 | struct event_filter { |
1213 | int n_preds; /* Number assigned */ | 1222 | struct prog_entry __rcu *prog; |
1214 | int a_preds; /* allocated */ | 1223 | char *filter_string; |
1215 | struct filter_pred __rcu *preds; | ||
1216 | struct filter_pred __rcu *root; | ||
1217 | char *filter_string; | ||
1218 | }; | 1224 | }; |
1219 | 1225 | ||
1220 | struct event_subsystem { | 1226 | struct event_subsystem { |
@@ -1291,7 +1297,7 @@ __event_trigger_test_discard(struct trace_event_file *file, | |||
1291 | unsigned long eflags = file->flags; | 1297 | unsigned long eflags = file->flags; |
1292 | 1298 | ||
1293 | if (eflags & EVENT_FILE_FL_TRIGGER_COND) | 1299 | if (eflags & EVENT_FILE_FL_TRIGGER_COND) |
1294 | *tt = event_triggers_call(file, entry); | 1300 | *tt = event_triggers_call(file, entry, event); |
1295 | 1301 | ||
1296 | if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) || | 1302 | if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) || |
1297 | (unlikely(file->flags & EVENT_FILE_FL_FILTERED) && | 1303 | (unlikely(file->flags & EVENT_FILE_FL_FILTERED) && |
@@ -1328,7 +1334,7 @@ event_trigger_unlock_commit(struct trace_event_file *file, | |||
1328 | trace_buffer_unlock_commit(file->tr, buffer, event, irq_flags, pc); | 1334 | trace_buffer_unlock_commit(file->tr, buffer, event, irq_flags, pc); |
1329 | 1335 | ||
1330 | if (tt) | 1336 | if (tt) |
1331 | event_triggers_post_call(file, tt, entry); | 1337 | event_triggers_post_call(file, tt, entry, event); |
1332 | } | 1338 | } |
1333 | 1339 | ||
1334 | /** | 1340 | /** |
@@ -1361,7 +1367,7 @@ event_trigger_unlock_commit_regs(struct trace_event_file *file, | |||
1361 | irq_flags, pc, regs); | 1367 | irq_flags, pc, regs); |
1362 | 1368 | ||
1363 | if (tt) | 1369 | if (tt) |
1364 | event_triggers_post_call(file, tt, entry); | 1370 | event_triggers_post_call(file, tt, entry, event); |
1365 | } | 1371 | } |
1366 | 1372 | ||
1367 | #define FILTER_PRED_INVALID ((unsigned short)-1) | 1373 | #define FILTER_PRED_INVALID ((unsigned short)-1) |
@@ -1406,12 +1412,8 @@ struct filter_pred { | |||
1406 | unsigned short *ops; | 1412 | unsigned short *ops; |
1407 | struct ftrace_event_field *field; | 1413 | struct ftrace_event_field *field; |
1408 | int offset; | 1414 | int offset; |
1409 | int not; | 1415 | int not; |
1410 | int op; | 1416 | int op; |
1411 | unsigned short index; | ||
1412 | unsigned short parent; | ||
1413 | unsigned short left; | ||
1414 | unsigned short right; | ||
1415 | }; | 1417 | }; |
1416 | 1418 | ||
1417 | static inline bool is_string_field(struct ftrace_event_field *field) | 1419 | static inline bool is_string_field(struct ftrace_event_field *field) |
@@ -1543,6 +1545,8 @@ extern void pause_named_trigger(struct event_trigger_data *data); | |||
1543 | extern void unpause_named_trigger(struct event_trigger_data *data); | 1545 | extern void unpause_named_trigger(struct event_trigger_data *data); |
1544 | extern void set_named_trigger_data(struct event_trigger_data *data, | 1546 | extern void set_named_trigger_data(struct event_trigger_data *data, |
1545 | struct event_trigger_data *named_data); | 1547 | struct event_trigger_data *named_data); |
1548 | extern struct event_trigger_data * | ||
1549 | get_named_trigger_data(struct event_trigger_data *data); | ||
1546 | extern int register_event_command(struct event_command *cmd); | 1550 | extern int register_event_command(struct event_command *cmd); |
1547 | extern int unregister_event_command(struct event_command *cmd); | 1551 | extern int unregister_event_command(struct event_command *cmd); |
1548 | extern int register_trigger_hist_enable_disable_cmds(void); | 1552 | extern int register_trigger_hist_enable_disable_cmds(void); |
@@ -1586,7 +1590,8 @@ extern int register_trigger_hist_enable_disable_cmds(void); | |||
1586 | */ | 1590 | */ |
1587 | struct event_trigger_ops { | 1591 | struct event_trigger_ops { |
1588 | void (*func)(struct event_trigger_data *data, | 1592 | void (*func)(struct event_trigger_data *data, |
1589 | void *rec); | 1593 | void *rec, |
1594 | struct ring_buffer_event *rbe); | ||
1590 | int (*init)(struct event_trigger_ops *ops, | 1595 | int (*init)(struct event_trigger_ops *ops, |
1591 | struct event_trigger_data *data); | 1596 | struct event_trigger_data *data); |
1592 | void (*free)(struct event_trigger_ops *ops, | 1597 | void (*free)(struct event_trigger_ops *ops, |
diff --git a/kernel/trace/trace_clock.c b/kernel/trace/trace_clock.c index 5fdc779f411d..d8a188e0418a 100644 --- a/kernel/trace/trace_clock.c +++ b/kernel/trace/trace_clock.c | |||
@@ -96,7 +96,7 @@ u64 notrace trace_clock_global(void) | |||
96 | int this_cpu; | 96 | int this_cpu; |
97 | u64 now; | 97 | u64 now; |
98 | 98 | ||
99 | local_irq_save(flags); | 99 | raw_local_irq_save(flags); |
100 | 100 | ||
101 | this_cpu = raw_smp_processor_id(); | 101 | this_cpu = raw_smp_processor_id(); |
102 | now = sched_clock_cpu(this_cpu); | 102 | now = sched_clock_cpu(this_cpu); |
@@ -122,7 +122,7 @@ u64 notrace trace_clock_global(void) | |||
122 | arch_spin_unlock(&trace_clock_struct.lock); | 122 | arch_spin_unlock(&trace_clock_struct.lock); |
123 | 123 | ||
124 | out: | 124 | out: |
125 | local_irq_restore(flags); | 125 | raw_local_irq_restore(flags); |
126 | 126 | ||
127 | return now; | 127 | return now; |
128 | } | 128 | } |
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index a764aec3c9a1..1bda4ec95e18 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c | |||
@@ -33,163 +33,595 @@ | |||
33 | "# Only events with the given fields will be affected.\n" \ | 33 | "# Only events with the given fields will be affected.\n" \ |
34 | "# If no events are modified, an error message will be displayed here" | 34 | "# If no events are modified, an error message will be displayed here" |
35 | 35 | ||
36 | enum filter_op_ids | 36 | /* Due to token parsing '<=' must be before '<' and '>=' must be before '>' */ |
37 | { | 37 | #define OPS \ |
38 | OP_OR, | 38 | C( OP_GLOB, "~" ), \ |
39 | OP_AND, | 39 | C( OP_NE, "!=" ), \ |
40 | OP_GLOB, | 40 | C( OP_EQ, "==" ), \ |
41 | OP_NE, | 41 | C( OP_LE, "<=" ), \ |
42 | OP_EQ, | 42 | C( OP_LT, "<" ), \ |
43 | OP_LT, | 43 | C( OP_GE, ">=" ), \ |
44 | OP_LE, | 44 | C( OP_GT, ">" ), \ |
45 | OP_GT, | 45 | C( OP_BAND, "&" ), \ |
46 | OP_GE, | 46 | C( OP_MAX, NULL ) |
47 | OP_BAND, | ||
48 | OP_NOT, | ||
49 | OP_NONE, | ||
50 | OP_OPEN_PAREN, | ||
51 | }; | ||
52 | 47 | ||
53 | struct filter_op { | 48 | #undef C |
54 | int id; | 49 | #define C(a, b) a |
55 | char *string; | ||
56 | int precedence; | ||
57 | }; | ||
58 | 50 | ||
59 | /* Order must be the same as enum filter_op_ids above */ | 51 | enum filter_op_ids { OPS }; |
60 | static struct filter_op filter_ops[] = { | ||
61 | { OP_OR, "||", 1 }, | ||
62 | { OP_AND, "&&", 2 }, | ||
63 | { OP_GLOB, "~", 4 }, | ||
64 | { OP_NE, "!=", 4 }, | ||
65 | { OP_EQ, "==", 4 }, | ||
66 | { OP_LT, "<", 5 }, | ||
67 | { OP_LE, "<=", 5 }, | ||
68 | { OP_GT, ">", 5 }, | ||
69 | { OP_GE, ">=", 5 }, | ||
70 | { OP_BAND, "&", 6 }, | ||
71 | { OP_NOT, "!", 6 }, | ||
72 | { OP_NONE, "OP_NONE", 0 }, | ||
73 | { OP_OPEN_PAREN, "(", 0 }, | ||
74 | }; | ||
75 | 52 | ||
76 | enum { | 53 | #undef C |
77 | FILT_ERR_NONE, | 54 | #define C(a, b) b |
78 | FILT_ERR_INVALID_OP, | ||
79 | FILT_ERR_UNBALANCED_PAREN, | ||
80 | FILT_ERR_TOO_MANY_OPERANDS, | ||
81 | FILT_ERR_OPERAND_TOO_LONG, | ||
82 | FILT_ERR_FIELD_NOT_FOUND, | ||
83 | FILT_ERR_ILLEGAL_FIELD_OP, | ||
84 | FILT_ERR_ILLEGAL_INTVAL, | ||
85 | FILT_ERR_BAD_SUBSYS_FILTER, | ||
86 | FILT_ERR_TOO_MANY_PREDS, | ||
87 | FILT_ERR_MISSING_FIELD, | ||
88 | FILT_ERR_INVALID_FILTER, | ||
89 | FILT_ERR_IP_FIELD_ONLY, | ||
90 | FILT_ERR_ILLEGAL_NOT_OP, | ||
91 | }; | ||
92 | 55 | ||
93 | static char *err_text[] = { | 56 | static const char * ops[] = { OPS }; |
94 | "No error", | ||
95 | "Invalid operator", | ||
96 | "Unbalanced parens", | ||
97 | "Too many operands", | ||
98 | "Operand too long", | ||
99 | "Field not found", | ||
100 | "Illegal operation for field type", | ||
101 | "Illegal integer value", | ||
102 | "Couldn't find or set field in one of a subsystem's events", | ||
103 | "Too many terms in predicate expression", | ||
104 | "Missing field name and/or value", | ||
105 | "Meaningless filter expression", | ||
106 | "Only 'ip' field is supported for function trace", | ||
107 | "Illegal use of '!'", | ||
108 | }; | ||
109 | 57 | ||
110 | struct opstack_op { | 58 | /* |
111 | enum filter_op_ids op; | 59 | * pred functions are OP_LE, OP_LT, OP_GE, OP_GT, and OP_BAND |
112 | struct list_head list; | 60 | * pred_funcs_##type below must match the order of them above. |
113 | }; | 61 | */ |
62 | #define PRED_FUNC_START OP_LE | ||
63 | #define PRED_FUNC_MAX (OP_BAND - PRED_FUNC_START) | ||
64 | |||
65 | #define ERRORS \ | ||
66 | C(NONE, "No error"), \ | ||
67 | C(INVALID_OP, "Invalid operator"), \ | ||
68 | C(TOO_MANY_OPEN, "Too many '('"), \ | ||
69 | C(TOO_MANY_CLOSE, "Too few '('"), \ | ||
70 | C(MISSING_QUOTE, "Missing matching quote"), \ | ||
71 | C(OPERAND_TOO_LONG, "Operand too long"), \ | ||
72 | C(EXPECT_STRING, "Expecting string field"), \ | ||
73 | C(EXPECT_DIGIT, "Expecting numeric field"), \ | ||
74 | C(ILLEGAL_FIELD_OP, "Illegal operation for field type"), \ | ||
75 | C(FIELD_NOT_FOUND, "Field not found"), \ | ||
76 | C(ILLEGAL_INTVAL, "Illegal integer value"), \ | ||
77 | C(BAD_SUBSYS_FILTER, "Couldn't find or set field in one of a subsystem's events"), \ | ||
78 | C(TOO_MANY_PREDS, "Too many terms in predicate expression"), \ | ||
79 | C(INVALID_FILTER, "Meaningless filter expression"), \ | ||
80 | C(IP_FIELD_ONLY, "Only 'ip' field is supported for function trace"), \ | ||
81 | C(INVALID_VALUE, "Invalid value (did you forget quotes)?"), | ||
82 | |||
83 | #undef C | ||
84 | #define C(a, b) FILT_ERR_##a | ||
85 | |||
86 | enum { ERRORS }; | ||
87 | |||
88 | #undef C | ||
89 | #define C(a, b) b | ||
90 | |||
91 | static char *err_text[] = { ERRORS }; | ||
92 | |||
93 | /* Called after a '!' character but "!=" and "!~" are not "not"s */ | ||
94 | static bool is_not(const char *str) | ||
95 | { | ||
96 | switch (str[1]) { | ||
97 | case '=': | ||
98 | case '~': | ||
99 | return false; | ||
100 | } | ||
101 | return true; | ||
102 | } | ||
114 | 103 | ||
115 | struct postfix_elt { | 104 | /** |
116 | enum filter_op_ids op; | 105 | * prog_entry - a singe entry in the filter program |
117 | char *operand; | 106 | * @target: Index to jump to on a branch (actually one minus the index) |
118 | struct list_head list; | 107 | * @when_to_branch: The value of the result of the predicate to do a branch |
108 | * @pred: The predicate to execute. | ||
109 | */ | ||
110 | struct prog_entry { | ||
111 | int target; | ||
112 | int when_to_branch; | ||
113 | struct filter_pred *pred; | ||
119 | }; | 114 | }; |
120 | 115 | ||
121 | struct filter_parse_state { | 116 | /** |
122 | struct filter_op *ops; | 117 | * update_preds- assign a program entry a label target |
123 | struct list_head opstack; | 118 | * @prog: The program array |
124 | struct list_head postfix; | 119 | * @N: The index of the current entry in @prog |
120 | * @when_to_branch: What to assign a program entry for its branch condition | ||
121 | * | ||
122 | * The program entry at @N has a target that points to the index of a program | ||
123 | * entry that can have its target and when_to_branch fields updated. | ||
124 | * Update the current program entry denoted by index @N target field to be | ||
125 | * that of the updated entry. This will denote the entry to update if | ||
126 | * we are processing an "||" after an "&&" | ||
127 | */ | ||
128 | static void update_preds(struct prog_entry *prog, int N, int invert) | ||
129 | { | ||
130 | int t, s; | ||
131 | |||
132 | t = prog[N].target; | ||
133 | s = prog[t].target; | ||
134 | prog[t].when_to_branch = invert; | ||
135 | prog[t].target = N; | ||
136 | prog[N].target = s; | ||
137 | } | ||
138 | |||
139 | struct filter_parse_error { | ||
125 | int lasterr; | 140 | int lasterr; |
126 | int lasterr_pos; | 141 | int lasterr_pos; |
127 | |||
128 | struct { | ||
129 | char *string; | ||
130 | unsigned int cnt; | ||
131 | unsigned int tail; | ||
132 | } infix; | ||
133 | |||
134 | struct { | ||
135 | char string[MAX_FILTER_STR_VAL]; | ||
136 | int pos; | ||
137 | unsigned int tail; | ||
138 | } operand; | ||
139 | }; | 142 | }; |
140 | 143 | ||
141 | struct pred_stack { | 144 | static void parse_error(struct filter_parse_error *pe, int err, int pos) |
142 | struct filter_pred **preds; | 145 | { |
143 | int index; | 146 | pe->lasterr = err; |
147 | pe->lasterr_pos = pos; | ||
148 | } | ||
149 | |||
150 | typedef int (*parse_pred_fn)(const char *str, void *data, int pos, | ||
151 | struct filter_parse_error *pe, | ||
152 | struct filter_pred **pred); | ||
153 | |||
154 | enum { | ||
155 | INVERT = 1, | ||
156 | PROCESS_AND = 2, | ||
157 | PROCESS_OR = 4, | ||
144 | }; | 158 | }; |
145 | 159 | ||
146 | /* If not of not match is equal to not of not, then it is a match */ | 160 | /* |
161 | * Without going into a formal proof, this explains the method that is used in | ||
162 | * parsing the logical expressions. | ||
163 | * | ||
164 | * For example, if we have: "a && !(!b || (c && g)) || d || e && !f" | ||
165 | * The first pass will convert it into the following program: | ||
166 | * | ||
167 | * n1: r=a; l1: if (!r) goto l4; | ||
168 | * n2: r=b; l2: if (!r) goto l4; | ||
169 | * n3: r=c; r=!r; l3: if (r) goto l4; | ||
170 | * n4: r=g; r=!r; l4: if (r) goto l5; | ||
171 | * n5: r=d; l5: if (r) goto T | ||
172 | * n6: r=e; l6: if (!r) goto l7; | ||
173 | * n7: r=f; r=!r; l7: if (!r) goto F | ||
174 | * T: return TRUE | ||
175 | * F: return FALSE | ||
176 | * | ||
177 | * To do this, we use a data structure to represent each of the above | ||
178 | * predicate and conditions that has: | ||
179 | * | ||
180 | * predicate, when_to_branch, invert, target | ||
181 | * | ||
182 | * The "predicate" will hold the function to determine the result "r". | ||
183 | * The "when_to_branch" denotes what "r" should be if a branch is to be taken | ||
184 | * "&&" would contain "!r" or (0) and "||" would contain "r" or (1). | ||
185 | * The "invert" holds whether the value should be reversed before testing. | ||
186 | * The "target" contains the label "l#" to jump to. | ||
187 | * | ||
188 | * A stack is created to hold values when parentheses are used. | ||
189 | * | ||
190 | * To simplify the logic, the labels will start at 0 and not 1. | ||
191 | * | ||
192 | * The possible invert values are 1 and 0. The number of "!"s that are in scope | ||
193 | * before the predicate determines the invert value, if the number is odd then | ||
194 | * the invert value is 1 and 0 otherwise. This means the invert value only | ||
195 | * needs to be toggled when a new "!" is introduced compared to what is stored | ||
196 | * on the stack, where parentheses were used. | ||
197 | * | ||
198 | * The top of the stack and "invert" are initialized to zero. | ||
199 | * | ||
200 | * ** FIRST PASS ** | ||
201 | * | ||
202 | * #1 A loop through all the tokens is done: | ||
203 | * | ||
204 | * #2 If the token is an "(", the stack is push, and the current stack value | ||
205 | * gets the current invert value, and the loop continues to the next token. | ||
206 | * The top of the stack saves the "invert" value to keep track of what | ||
207 | * the current inversion is. As "!(a && !b || c)" would require all | ||
208 | * predicates being affected separately by the "!" before the parentheses. | ||
209 | * And that would end up being equivalent to "(!a || b) && !c" | ||
210 | * | ||
211 | * #3 If the token is an "!", the current "invert" value gets inverted, and | ||
212 | * the loop continues. Note, if the next token is a predicate, then | ||
213 | * this "invert" value is only valid for the current program entry, | ||
214 | * and does not affect other predicates later on. | ||
215 | * | ||
216 | * The only other acceptable token is the predicate string. | ||
217 | * | ||
218 | * #4 A new entry into the program is added saving: the predicate and the | ||
219 | * current value of "invert". The target is currently assigned to the | ||
220 | * previous program index (this will not be its final value). | ||
221 | * | ||
222 | * #5 We now enter another loop and look at the next token. The only valid | ||
223 | * tokens are ")", "&&", "||" or end of the input string "\0". | ||
224 | * | ||
225 | * #6 The invert variable is reset to the current value saved on the top of | ||
226 | * the stack. | ||
227 | * | ||
228 | * #7 The top of the stack holds not only the current invert value, but also | ||
229 | * if a "&&" or "||" needs to be processed. Note, the "&&" takes higher | ||
230 | * precedence than "||". That is "a && b || c && d" is equivalent to | ||
231 | * "(a && b) || (c && d)". Thus the first thing to do is to see if "&&" needs | ||
232 | * to be processed. This is the case if an "&&" was the last token. If it was | ||
233 | * then we call update_preds(). This takes the program, the current index in | ||
234 | * the program, and the current value of "invert". More will be described | ||
235 | * below about this function. | ||
236 | * | ||
237 | * #8 If the next token is "&&" then we set a flag in the top of the stack | ||
238 | * that denotes that "&&" needs to be processed, break out of this loop | ||
239 | * and continue with the outer loop. | ||
240 | * | ||
241 | * #9 Otherwise, if a "||" needs to be processed then update_preds() is called. | ||
242 | * This is called with the program, the current index in the program, but | ||
243 | * this time with an inverted value of "invert" (that is !invert). This is | ||
244 | * because the value taken will become the "when_to_branch" value of the | ||
245 | * program. | ||
246 | * Note, this is called when the next token is not an "&&". As stated before, | ||
247 | * "&&" takes higher precedence, and "||" should not be processed yet if the | ||
248 | * next logical operation is "&&". | ||
249 | * | ||
250 | * #10 If the next token is "||" then we set a flag in the top of the stack | ||
251 | * that denotes that "||" needs to be processed, break out of this loop | ||
252 | * and continue with the outer loop. | ||
253 | * | ||
254 | * #11 If this is the end of the input string "\0" then we break out of both | ||
255 | * loops. | ||
256 | * | ||
257 | * #12 Otherwise, the next token is ")", where we pop the stack and continue | ||
258 | * this inner loop. | ||
259 | * | ||
260 | * Now to discuss the update_pred() function, as that is key to the setting up | ||
261 | * of the program. Remember the "target" of the program is initialized to the | ||
262 | * previous index and not the "l" label. The target holds the index into the | ||
263 | * program that gets affected by the operand. Thus if we have something like | ||
264 | * "a || b && c", when we process "a" the target will be "-1" (undefined). | ||
265 | * When we process "b", its target is "0", which is the index of "a", as that's | ||
266 | * the predicate that is affected by "||". But because the next token after "b" | ||
267 | * is "&&" we don't call update_preds(). Instead continue to "c". As the | ||
268 | * next token after "c" is not "&&" but the end of input, we first process the | ||
269 | * "&&" by calling update_preds() for the "&&" then we process the "||" by | ||
270 | * callin updates_preds() with the values for processing "||". | ||
271 | * | ||
272 | * What does that mean? What update_preds() does is to first save the "target" | ||
273 | * of the program entry indexed by the current program entry's "target" | ||
274 | * (remember the "target" is initialized to previous program entry), and then | ||
275 | * sets that "target" to the current index which represents the label "l#". | ||
276 | * That entry's "when_to_branch" is set to the value passed in (the "invert" | ||
277 | * or "!invert"). Then it sets the current program entry's target to the saved | ||
278 | * "target" value (the old value of the program that had its "target" updated | ||
279 | * to the label). | ||
280 | * | ||
281 | * Looking back at "a || b && c", we have the following steps: | ||
282 | * "a" - prog[0] = { "a", X, -1 } // pred, when_to_branch, target | ||
283 | * "||" - flag that we need to process "||"; continue outer loop | ||
284 | * "b" - prog[1] = { "b", X, 0 } | ||
285 | * "&&" - flag that we need to process "&&"; continue outer loop | ||
286 | * (Notice we did not process "||") | ||
287 | * "c" - prog[2] = { "c", X, 1 } | ||
288 | * update_preds(prog, 2, 0); // invert = 0 as we are processing "&&" | ||
289 | * t = prog[2].target; // t = 1 | ||
290 | * s = prog[t].target; // s = 0 | ||
291 | * prog[t].target = 2; // Set target to "l2" | ||
292 | * prog[t].when_to_branch = 0; | ||
293 | * prog[2].target = s; | ||
294 | * update_preds(prog, 2, 1); // invert = 1 as we are now processing "||" | ||
295 | * t = prog[2].target; // t = 0 | ||
296 | * s = prog[t].target; // s = -1 | ||
297 | * prog[t].target = 2; // Set target to "l2" | ||
298 | * prog[t].when_to_branch = 1; | ||
299 | * prog[2].target = s; | ||
300 | * | ||
301 | * #13 Which brings us to the final step of the first pass, which is to set | ||
302 | * the last program entry's when_to_branch and target, which will be | ||
303 | * when_to_branch = 0; target = N; ( the label after the program entry after | ||
304 | * the last program entry processed above). | ||
305 | * | ||
306 | * If we denote "TRUE" to be the entry after the last program entry processed, | ||
307 | * and "FALSE" the program entry after that, we are now done with the first | ||
308 | * pass. | ||
309 | * | ||
310 | * Making the above "a || b && c" have a progam of: | ||
311 | * prog[0] = { "a", 1, 2 } | ||
312 | * prog[1] = { "b", 0, 2 } | ||
313 | * prog[2] = { "c", 0, 3 } | ||
314 | * | ||
315 | * Which translates into: | ||
316 | * n0: r = a; l0: if (r) goto l2; | ||
317 | * n1: r = b; l1: if (!r) goto l2; | ||
318 | * n2: r = c; l2: if (!r) goto l3; // Which is the same as "goto F;" | ||
319 | * T: return TRUE; l3: | ||
320 | * F: return FALSE | ||
321 | * | ||
322 | * Although, after the first pass, the program is correct, it is | ||
323 | * inefficient. The simple sample of "a || b && c" could be easily been | ||
324 | * converted into: | ||
325 | * n0: r = a; if (r) goto T | ||
326 | * n1: r = b; if (!r) goto F | ||
327 | * n2: r = c; if (!r) goto F | ||
328 | * T: return TRUE; | ||
329 | * F: return FALSE; | ||
330 | * | ||
331 | * The First Pass is over the input string. The next too passes are over | ||
332 | * the program itself. | ||
333 | * | ||
334 | * ** SECOND PASS ** | ||
335 | * | ||
336 | * Which brings us to the second pass. If a jump to a label has the | ||
337 | * same condition as that label, it can instead jump to its target. | ||
338 | * The original example of "a && !(!b || (c && g)) || d || e && !f" | ||
339 | * where the first pass gives us: | ||
340 | * | ||
341 | * n1: r=a; l1: if (!r) goto l4; | ||
342 | * n2: r=b; l2: if (!r) goto l4; | ||
343 | * n3: r=c; r=!r; l3: if (r) goto l4; | ||
344 | * n4: r=g; r=!r; l4: if (r) goto l5; | ||
345 | * n5: r=d; l5: if (r) goto T | ||
346 | * n6: r=e; l6: if (!r) goto l7; | ||
347 | * n7: r=f; r=!r; l7: if (!r) goto F: | ||
348 | * T: return TRUE; | ||
349 | * F: return FALSE | ||
350 | * | ||
351 | * We can see that "l3: if (r) goto l4;" and at l4, we have "if (r) goto l5;". | ||
352 | * And "l5: if (r) goto T", we could optimize this by converting l3 and l4 | ||
353 | * to go directly to T. To accomplish this, we start from the last | ||
354 | * entry in the program and work our way back. If the target of the entry | ||
355 | * has the same "when_to_branch" then we could use that entry's target. | ||
356 | * Doing this, the above would end up as: | ||
357 | * | ||
358 | * n1: r=a; l1: if (!r) goto l4; | ||
359 | * n2: r=b; l2: if (!r) goto l4; | ||
360 | * n3: r=c; r=!r; l3: if (r) goto T; | ||
361 | * n4: r=g; r=!r; l4: if (r) goto T; | ||
362 | * n5: r=d; l5: if (r) goto T; | ||
363 | * n6: r=e; l6: if (!r) goto F; | ||
364 | * n7: r=f; r=!r; l7: if (!r) goto F; | ||
365 | * T: return TRUE | ||
366 | * F: return FALSE | ||
367 | * | ||
368 | * In that same pass, if the "when_to_branch" doesn't match, we can simply | ||
369 | * go to the program entry after the label. That is, "l2: if (!r) goto l4;" | ||
370 | * where "l4: if (r) goto T;", then we can convert l2 to be: | ||
371 | * "l2: if (!r) goto n5;". | ||
372 | * | ||
373 | * This will have the second pass give us: | ||
374 | * n1: r=a; l1: if (!r) goto n5; | ||
375 | * n2: r=b; l2: if (!r) goto n5; | ||
376 | * n3: r=c; r=!r; l3: if (r) goto T; | ||
377 | * n4: r=g; r=!r; l4: if (r) goto T; | ||
378 | * n5: r=d; l5: if (r) goto T | ||
379 | * n6: r=e; l6: if (!r) goto F; | ||
380 | * n7: r=f; r=!r; l7: if (!r) goto F | ||
381 | * T: return TRUE | ||
382 | * F: return FALSE | ||
383 | * | ||
384 | * Notice, all the "l#" labels are no longer used, and they can now | ||
385 | * be discarded. | ||
386 | * | ||
387 | * ** THIRD PASS ** | ||
388 | * | ||
389 | * For the third pass we deal with the inverts. As they simply just | ||
390 | * make the "when_to_branch" get inverted, a simple loop over the | ||
391 | * program to that does: "when_to_branch ^= invert;" will do the | ||
392 | * job, leaving us with: | ||
393 | * n1: r=a; if (!r) goto n5; | ||
394 | * n2: r=b; if (!r) goto n5; | ||
395 | * n3: r=c: if (!r) goto T; | ||
396 | * n4: r=g; if (!r) goto T; | ||
397 | * n5: r=d; if (r) goto T | ||
398 | * n6: r=e; if (!r) goto F; | ||
399 | * n7: r=f; if (r) goto F | ||
400 | * T: return TRUE | ||
401 | * F: return FALSE | ||
402 | * | ||
403 | * As "r = a; if (!r) goto n5;" is obviously the same as | ||
404 | * "if (!a) goto n5;" without doing anything we can interperate the | ||
405 | * program as: | ||
406 | * n1: if (!a) goto n5; | ||
407 | * n2: if (!b) goto n5; | ||
408 | * n3: if (!c) goto T; | ||
409 | * n4: if (!g) goto T; | ||
410 | * n5: if (d) goto T | ||
411 | * n6: if (!e) goto F; | ||
412 | * n7: if (f) goto F | ||
413 | * T: return TRUE | ||
414 | * F: return FALSE | ||
415 | * | ||
416 | * Since the inverts are discarded at the end, there's no reason to store | ||
417 | * them in the program array (and waste memory). A separate array to hold | ||
418 | * the inverts is used and freed at the end. | ||
419 | */ | ||
420 | static struct prog_entry * | ||
421 | predicate_parse(const char *str, int nr_parens, int nr_preds, | ||
422 | parse_pred_fn parse_pred, void *data, | ||
423 | struct filter_parse_error *pe) | ||
424 | { | ||
425 | struct prog_entry *prog_stack; | ||
426 | struct prog_entry *prog; | ||
427 | const char *ptr = str; | ||
428 | char *inverts = NULL; | ||
429 | int *op_stack; | ||
430 | int *top; | ||
431 | int invert = 0; | ||
432 | int ret = -ENOMEM; | ||
433 | int len; | ||
434 | int N = 0; | ||
435 | int i; | ||
436 | |||
437 | nr_preds += 2; /* For TRUE and FALSE */ | ||
438 | |||
439 | op_stack = kmalloc(sizeof(*op_stack) * nr_parens, GFP_KERNEL); | ||
440 | if (!op_stack) | ||
441 | return ERR_PTR(-ENOMEM); | ||
442 | prog_stack = kmalloc(sizeof(*prog_stack) * nr_preds, GFP_KERNEL); | ||
443 | if (!prog_stack) { | ||
444 | parse_error(pe, -ENOMEM, 0); | ||
445 | goto out_free; | ||
446 | } | ||
447 | inverts = kmalloc(sizeof(*inverts) * nr_preds, GFP_KERNEL); | ||
448 | if (!inverts) { | ||
449 | parse_error(pe, -ENOMEM, 0); | ||
450 | goto out_free; | ||
451 | } | ||
452 | |||
453 | top = op_stack; | ||
454 | prog = prog_stack; | ||
455 | *top = 0; | ||
456 | |||
457 | /* First pass */ | ||
458 | while (*ptr) { /* #1 */ | ||
459 | const char *next = ptr++; | ||
460 | |||
461 | if (isspace(*next)) | ||
462 | continue; | ||
463 | |||
464 | switch (*next) { | ||
465 | case '(': /* #2 */ | ||
466 | if (top - op_stack > nr_parens) | ||
467 | return ERR_PTR(-EINVAL); | ||
468 | *(++top) = invert; | ||
469 | continue; | ||
470 | case '!': /* #3 */ | ||
471 | if (!is_not(next)) | ||
472 | break; | ||
473 | invert = !invert; | ||
474 | continue; | ||
475 | } | ||
476 | |||
477 | if (N >= nr_preds) { | ||
478 | parse_error(pe, FILT_ERR_TOO_MANY_PREDS, next - str); | ||
479 | goto out_free; | ||
480 | } | ||
481 | |||
482 | inverts[N] = invert; /* #4 */ | ||
483 | prog[N].target = N-1; | ||
484 | |||
485 | len = parse_pred(next, data, ptr - str, pe, &prog[N].pred); | ||
486 | if (len < 0) { | ||
487 | ret = len; | ||
488 | goto out_free; | ||
489 | } | ||
490 | ptr = next + len; | ||
491 | |||
492 | N++; | ||
493 | |||
494 | ret = -1; | ||
495 | while (1) { /* #5 */ | ||
496 | next = ptr++; | ||
497 | if (isspace(*next)) | ||
498 | continue; | ||
499 | |||
500 | switch (*next) { | ||
501 | case ')': | ||
502 | case '\0': | ||
503 | break; | ||
504 | case '&': | ||
505 | case '|': | ||
506 | if (next[1] == next[0]) { | ||
507 | ptr++; | ||
508 | break; | ||
509 | } | ||
510 | default: | ||
511 | parse_error(pe, FILT_ERR_TOO_MANY_PREDS, | ||
512 | next - str); | ||
513 | goto out_free; | ||
514 | } | ||
515 | |||
516 | invert = *top & INVERT; | ||
517 | |||
518 | if (*top & PROCESS_AND) { /* #7 */ | ||
519 | update_preds(prog, N - 1, invert); | ||
520 | *top &= ~PROCESS_AND; | ||
521 | } | ||
522 | if (*next == '&') { /* #8 */ | ||
523 | *top |= PROCESS_AND; | ||
524 | break; | ||
525 | } | ||
526 | if (*top & PROCESS_OR) { /* #9 */ | ||
527 | update_preds(prog, N - 1, !invert); | ||
528 | *top &= ~PROCESS_OR; | ||
529 | } | ||
530 | if (*next == '|') { /* #10 */ | ||
531 | *top |= PROCESS_OR; | ||
532 | break; | ||
533 | } | ||
534 | if (!*next) /* #11 */ | ||
535 | goto out; | ||
536 | |||
537 | if (top == op_stack) { | ||
538 | ret = -1; | ||
539 | /* Too few '(' */ | ||
540 | parse_error(pe, FILT_ERR_TOO_MANY_CLOSE, ptr - str); | ||
541 | goto out_free; | ||
542 | } | ||
543 | top--; /* #12 */ | ||
544 | } | ||
545 | } | ||
546 | out: | ||
547 | if (top != op_stack) { | ||
548 | /* Too many '(' */ | ||
549 | parse_error(pe, FILT_ERR_TOO_MANY_OPEN, ptr - str); | ||
550 | goto out_free; | ||
551 | } | ||
552 | |||
553 | prog[N].pred = NULL; /* #13 */ | ||
554 | prog[N].target = 1; /* TRUE */ | ||
555 | prog[N+1].pred = NULL; | ||
556 | prog[N+1].target = 0; /* FALSE */ | ||
557 | prog[N-1].target = N; | ||
558 | prog[N-1].when_to_branch = false; | ||
559 | |||
560 | /* Second Pass */ | ||
561 | for (i = N-1 ; i--; ) { | ||
562 | int target = prog[i].target; | ||
563 | if (prog[i].when_to_branch == prog[target].when_to_branch) | ||
564 | prog[i].target = prog[target].target; | ||
565 | } | ||
566 | |||
567 | /* Third Pass */ | ||
568 | for (i = 0; i < N; i++) { | ||
569 | invert = inverts[i] ^ prog[i].when_to_branch; | ||
570 | prog[i].when_to_branch = invert; | ||
571 | /* Make sure the program always moves forward */ | ||
572 | if (WARN_ON(prog[i].target <= i)) { | ||
573 | ret = -EINVAL; | ||
574 | goto out_free; | ||
575 | } | ||
576 | } | ||
577 | |||
578 | return prog; | ||
579 | out_free: | ||
580 | kfree(op_stack); | ||
581 | kfree(prog_stack); | ||
582 | kfree(inverts); | ||
583 | return ERR_PTR(ret); | ||
584 | } | ||
585 | |||
147 | #define DEFINE_COMPARISON_PRED(type) \ | 586 | #define DEFINE_COMPARISON_PRED(type) \ |
148 | static int filter_pred_LT_##type(struct filter_pred *pred, void *event) \ | 587 | static int filter_pred_LT_##type(struct filter_pred *pred, void *event) \ |
149 | { \ | 588 | { \ |
150 | type *addr = (type *)(event + pred->offset); \ | 589 | type *addr = (type *)(event + pred->offset); \ |
151 | type val = (type)pred->val; \ | 590 | type val = (type)pred->val; \ |
152 | int match = (*addr < val); \ | 591 | return *addr < val; \ |
153 | return !!match == !pred->not; \ | ||
154 | } \ | 592 | } \ |
155 | static int filter_pred_LE_##type(struct filter_pred *pred, void *event) \ | 593 | static int filter_pred_LE_##type(struct filter_pred *pred, void *event) \ |
156 | { \ | 594 | { \ |
157 | type *addr = (type *)(event + pred->offset); \ | 595 | type *addr = (type *)(event + pred->offset); \ |
158 | type val = (type)pred->val; \ | 596 | type val = (type)pred->val; \ |
159 | int match = (*addr <= val); \ | 597 | return *addr <= val; \ |
160 | return !!match == !pred->not; \ | ||
161 | } \ | 598 | } \ |
162 | static int filter_pred_GT_##type(struct filter_pred *pred, void *event) \ | 599 | static int filter_pred_GT_##type(struct filter_pred *pred, void *event) \ |
163 | { \ | 600 | { \ |
164 | type *addr = (type *)(event + pred->offset); \ | 601 | type *addr = (type *)(event + pred->offset); \ |
165 | type val = (type)pred->val; \ | 602 | type val = (type)pred->val; \ |
166 | int match = (*addr > val); \ | 603 | return *addr > val; \ |
167 | return !!match == !pred->not; \ | ||
168 | } \ | 604 | } \ |
169 | static int filter_pred_GE_##type(struct filter_pred *pred, void *event) \ | 605 | static int filter_pred_GE_##type(struct filter_pred *pred, void *event) \ |
170 | { \ | 606 | { \ |
171 | type *addr = (type *)(event + pred->offset); \ | 607 | type *addr = (type *)(event + pred->offset); \ |
172 | type val = (type)pred->val; \ | 608 | type val = (type)pred->val; \ |
173 | int match = (*addr >= val); \ | 609 | return *addr >= val; \ |
174 | return !!match == !pred->not; \ | ||
175 | } \ | 610 | } \ |
176 | static int filter_pred_BAND_##type(struct filter_pred *pred, void *event) \ | 611 | static int filter_pred_BAND_##type(struct filter_pred *pred, void *event) \ |
177 | { \ | 612 | { \ |
178 | type *addr = (type *)(event + pred->offset); \ | 613 | type *addr = (type *)(event + pred->offset); \ |
179 | type val = (type)pred->val; \ | 614 | type val = (type)pred->val; \ |
180 | int match = !!(*addr & val); \ | 615 | return !!(*addr & val); \ |
181 | return match == !pred->not; \ | ||
182 | } \ | 616 | } \ |
183 | static const filter_pred_fn_t pred_funcs_##type[] = { \ | 617 | static const filter_pred_fn_t pred_funcs_##type[] = { \ |
184 | filter_pred_LT_##type, \ | ||
185 | filter_pred_LE_##type, \ | 618 | filter_pred_LE_##type, \ |
186 | filter_pred_GT_##type, \ | 619 | filter_pred_LT_##type, \ |
187 | filter_pred_GE_##type, \ | 620 | filter_pred_GE_##type, \ |
621 | filter_pred_GT_##type, \ | ||
188 | filter_pred_BAND_##type, \ | 622 | filter_pred_BAND_##type, \ |
189 | }; | 623 | }; |
190 | 624 | ||
191 | #define PRED_FUNC_START OP_LT | ||
192 | |||
193 | #define DEFINE_EQUALITY_PRED(size) \ | 625 | #define DEFINE_EQUALITY_PRED(size) \ |
194 | static int filter_pred_##size(struct filter_pred *pred, void *event) \ | 626 | static int filter_pred_##size(struct filter_pred *pred, void *event) \ |
195 | { \ | 627 | { \ |
@@ -272,44 +704,36 @@ static int filter_pred_strloc(struct filter_pred *pred, void *event) | |||
272 | static int filter_pred_cpu(struct filter_pred *pred, void *event) | 704 | static int filter_pred_cpu(struct filter_pred *pred, void *event) |
273 | { | 705 | { |
274 | int cpu, cmp; | 706 | int cpu, cmp; |
275 | int match = 0; | ||
276 | 707 | ||
277 | cpu = raw_smp_processor_id(); | 708 | cpu = raw_smp_processor_id(); |
278 | cmp = pred->val; | 709 | cmp = pred->val; |
279 | 710 | ||
280 | switch (pred->op) { | 711 | switch (pred->op) { |
281 | case OP_EQ: | 712 | case OP_EQ: |
282 | match = cpu == cmp; | 713 | return cpu == cmp; |
283 | break; | 714 | case OP_NE: |
715 | return cpu != cmp; | ||
284 | case OP_LT: | 716 | case OP_LT: |
285 | match = cpu < cmp; | 717 | return cpu < cmp; |
286 | break; | ||
287 | case OP_LE: | 718 | case OP_LE: |
288 | match = cpu <= cmp; | 719 | return cpu <= cmp; |
289 | break; | ||
290 | case OP_GT: | 720 | case OP_GT: |
291 | match = cpu > cmp; | 721 | return cpu > cmp; |
292 | break; | ||
293 | case OP_GE: | 722 | case OP_GE: |
294 | match = cpu >= cmp; | 723 | return cpu >= cmp; |
295 | break; | ||
296 | default: | 724 | default: |
297 | break; | 725 | return 0; |
298 | } | 726 | } |
299 | |||
300 | return !!match == !pred->not; | ||
301 | } | 727 | } |
302 | 728 | ||
303 | /* Filter predicate for COMM. */ | 729 | /* Filter predicate for COMM. */ |
304 | static int filter_pred_comm(struct filter_pred *pred, void *event) | 730 | static int filter_pred_comm(struct filter_pred *pred, void *event) |
305 | { | 731 | { |
306 | int cmp, match; | 732 | int cmp; |
307 | 733 | ||
308 | cmp = pred->regex.match(current->comm, &pred->regex, | 734 | cmp = pred->regex.match(current->comm, &pred->regex, |
309 | pred->regex.field_len); | 735 | TASK_COMM_LEN); |
310 | match = cmp ^ pred->not; | 736 | return cmp ^ pred->not; |
311 | |||
312 | return match; | ||
313 | } | 737 | } |
314 | 738 | ||
315 | static int filter_pred_none(struct filter_pred *pred, void *event) | 739 | static int filter_pred_none(struct filter_pred *pred, void *event) |
@@ -366,6 +790,7 @@ static int regex_match_glob(char *str, struct regex *r, int len __maybe_unused) | |||
366 | return 1; | 790 | return 1; |
367 | return 0; | 791 | return 0; |
368 | } | 792 | } |
793 | |||
369 | /** | 794 | /** |
370 | * filter_parse_regex - parse a basic regex | 795 | * filter_parse_regex - parse a basic regex |
371 | * @buff: the raw regex | 796 | * @buff: the raw regex |
@@ -426,10 +851,9 @@ static void filter_build_regex(struct filter_pred *pred) | |||
426 | struct regex *r = &pred->regex; | 851 | struct regex *r = &pred->regex; |
427 | char *search; | 852 | char *search; |
428 | enum regex_type type = MATCH_FULL; | 853 | enum regex_type type = MATCH_FULL; |
429 | int not = 0; | ||
430 | 854 | ||
431 | if (pred->op == OP_GLOB) { | 855 | if (pred->op == OP_GLOB) { |
432 | type = filter_parse_regex(r->pattern, r->len, &search, ¬); | 856 | type = filter_parse_regex(r->pattern, r->len, &search, &pred->not); |
433 | r->len = strlen(search); | 857 | r->len = strlen(search); |
434 | memmove(r->pattern, search, r->len+1); | 858 | memmove(r->pattern, search, r->len+1); |
435 | } | 859 | } |
@@ -451,210 +875,32 @@ static void filter_build_regex(struct filter_pred *pred) | |||
451 | r->match = regex_match_glob; | 875 | r->match = regex_match_glob; |
452 | break; | 876 | break; |
453 | } | 877 | } |
454 | |||
455 | pred->not ^= not; | ||
456 | } | ||
457 | |||
458 | enum move_type { | ||
459 | MOVE_DOWN, | ||
460 | MOVE_UP_FROM_LEFT, | ||
461 | MOVE_UP_FROM_RIGHT | ||
462 | }; | ||
463 | |||
464 | static struct filter_pred * | ||
465 | get_pred_parent(struct filter_pred *pred, struct filter_pred *preds, | ||
466 | int index, enum move_type *move) | ||
467 | { | ||
468 | if (pred->parent & FILTER_PRED_IS_RIGHT) | ||
469 | *move = MOVE_UP_FROM_RIGHT; | ||
470 | else | ||
471 | *move = MOVE_UP_FROM_LEFT; | ||
472 | pred = &preds[pred->parent & ~FILTER_PRED_IS_RIGHT]; | ||
473 | |||
474 | return pred; | ||
475 | } | ||
476 | |||
477 | enum walk_return { | ||
478 | WALK_PRED_ABORT, | ||
479 | WALK_PRED_PARENT, | ||
480 | WALK_PRED_DEFAULT, | ||
481 | }; | ||
482 | |||
483 | typedef int (*filter_pred_walkcb_t) (enum move_type move, | ||
484 | struct filter_pred *pred, | ||
485 | int *err, void *data); | ||
486 | |||
487 | static int walk_pred_tree(struct filter_pred *preds, | ||
488 | struct filter_pred *root, | ||
489 | filter_pred_walkcb_t cb, void *data) | ||
490 | { | ||
491 | struct filter_pred *pred = root; | ||
492 | enum move_type move = MOVE_DOWN; | ||
493 | int done = 0; | ||
494 | |||
495 | if (!preds) | ||
496 | return -EINVAL; | ||
497 | |||
498 | do { | ||
499 | int err = 0, ret; | ||
500 | |||
501 | ret = cb(move, pred, &err, data); | ||
502 | if (ret == WALK_PRED_ABORT) | ||
503 | return err; | ||
504 | if (ret == WALK_PRED_PARENT) | ||
505 | goto get_parent; | ||
506 | |||
507 | switch (move) { | ||
508 | case MOVE_DOWN: | ||
509 | if (pred->left != FILTER_PRED_INVALID) { | ||
510 | pred = &preds[pred->left]; | ||
511 | continue; | ||
512 | } | ||
513 | goto get_parent; | ||
514 | case MOVE_UP_FROM_LEFT: | ||
515 | pred = &preds[pred->right]; | ||
516 | move = MOVE_DOWN; | ||
517 | continue; | ||
518 | case MOVE_UP_FROM_RIGHT: | ||
519 | get_parent: | ||
520 | if (pred == root) | ||
521 | break; | ||
522 | pred = get_pred_parent(pred, preds, | ||
523 | pred->parent, | ||
524 | &move); | ||
525 | continue; | ||
526 | } | ||
527 | done = 1; | ||
528 | } while (!done); | ||
529 | |||
530 | /* We are fine. */ | ||
531 | return 0; | ||
532 | } | ||
533 | |||
534 | /* | ||
535 | * A series of AND or ORs where found together. Instead of | ||
536 | * climbing up and down the tree branches, an array of the | ||
537 | * ops were made in order of checks. We can just move across | ||
538 | * the array and short circuit if needed. | ||
539 | */ | ||
540 | static int process_ops(struct filter_pred *preds, | ||
541 | struct filter_pred *op, void *rec) | ||
542 | { | ||
543 | struct filter_pred *pred; | ||
544 | int match = 0; | ||
545 | int type; | ||
546 | int i; | ||
547 | |||
548 | /* | ||
549 | * Micro-optimization: We set type to true if op | ||
550 | * is an OR and false otherwise (AND). Then we | ||
551 | * just need to test if the match is equal to | ||
552 | * the type, and if it is, we can short circuit the | ||
553 | * rest of the checks: | ||
554 | * | ||
555 | * if ((match && op->op == OP_OR) || | ||
556 | * (!match && op->op == OP_AND)) | ||
557 | * return match; | ||
558 | */ | ||
559 | type = op->op == OP_OR; | ||
560 | |||
561 | for (i = 0; i < op->val; i++) { | ||
562 | pred = &preds[op->ops[i]]; | ||
563 | if (!WARN_ON_ONCE(!pred->fn)) | ||
564 | match = pred->fn(pred, rec); | ||
565 | if (!!match == type) | ||
566 | break; | ||
567 | } | ||
568 | /* If not of not match is equal to not of not, then it is a match */ | ||
569 | return !!match == !op->not; | ||
570 | } | ||
571 | |||
572 | struct filter_match_preds_data { | ||
573 | struct filter_pred *preds; | ||
574 | int match; | ||
575 | void *rec; | ||
576 | }; | ||
577 | |||
578 | static int filter_match_preds_cb(enum move_type move, struct filter_pred *pred, | ||
579 | int *err, void *data) | ||
580 | { | ||
581 | struct filter_match_preds_data *d = data; | ||
582 | |||
583 | *err = 0; | ||
584 | switch (move) { | ||
585 | case MOVE_DOWN: | ||
586 | /* only AND and OR have children */ | ||
587 | if (pred->left != FILTER_PRED_INVALID) { | ||
588 | /* If ops is set, then it was folded. */ | ||
589 | if (!pred->ops) | ||
590 | return WALK_PRED_DEFAULT; | ||
591 | /* We can treat folded ops as a leaf node */ | ||
592 | d->match = process_ops(d->preds, pred, d->rec); | ||
593 | } else { | ||
594 | if (!WARN_ON_ONCE(!pred->fn)) | ||
595 | d->match = pred->fn(pred, d->rec); | ||
596 | } | ||
597 | |||
598 | return WALK_PRED_PARENT; | ||
599 | case MOVE_UP_FROM_LEFT: | ||
600 | /* | ||
601 | * Check for short circuits. | ||
602 | * | ||
603 | * Optimization: !!match == (pred->op == OP_OR) | ||
604 | * is the same as: | ||
605 | * if ((match && pred->op == OP_OR) || | ||
606 | * (!match && pred->op == OP_AND)) | ||
607 | */ | ||
608 | if (!!d->match == (pred->op == OP_OR)) | ||
609 | return WALK_PRED_PARENT; | ||
610 | break; | ||
611 | case MOVE_UP_FROM_RIGHT: | ||
612 | break; | ||
613 | } | ||
614 | |||
615 | return WALK_PRED_DEFAULT; | ||
616 | } | 878 | } |
617 | 879 | ||
618 | /* return 1 if event matches, 0 otherwise (discard) */ | 880 | /* return 1 if event matches, 0 otherwise (discard) */ |
619 | int filter_match_preds(struct event_filter *filter, void *rec) | 881 | int filter_match_preds(struct event_filter *filter, void *rec) |
620 | { | 882 | { |
621 | struct filter_pred *preds; | 883 | struct prog_entry *prog; |
622 | struct filter_pred *root; | 884 | int i; |
623 | struct filter_match_preds_data data = { | ||
624 | /* match is currently meaningless */ | ||
625 | .match = -1, | ||
626 | .rec = rec, | ||
627 | }; | ||
628 | int n_preds, ret; | ||
629 | 885 | ||
630 | /* no filter is considered a match */ | 886 | /* no filter is considered a match */ |
631 | if (!filter) | 887 | if (!filter) |
632 | return 1; | 888 | return 1; |
633 | 889 | ||
634 | n_preds = filter->n_preds; | 890 | prog = rcu_dereference_sched(filter->prog); |
635 | if (!n_preds) | 891 | if (!prog) |
636 | return 1; | 892 | return 1; |
637 | 893 | ||
638 | /* | 894 | for (i = 0; prog[i].pred; i++) { |
639 | * n_preds, root and filter->preds are protect with preemption disabled. | 895 | struct filter_pred *pred = prog[i].pred; |
640 | */ | 896 | int match = pred->fn(pred, rec); |
641 | root = rcu_dereference_sched(filter->root); | 897 | if (match == prog[i].when_to_branch) |
642 | if (!root) | 898 | i = prog[i].target; |
643 | return 1; | 899 | } |
644 | 900 | return prog[i].target; | |
645 | data.preds = preds = rcu_dereference_sched(filter->preds); | ||
646 | ret = walk_pred_tree(preds, root, filter_match_preds_cb, &data); | ||
647 | WARN_ON(ret); | ||
648 | return data.match; | ||
649 | } | 901 | } |
650 | EXPORT_SYMBOL_GPL(filter_match_preds); | 902 | EXPORT_SYMBOL_GPL(filter_match_preds); |
651 | 903 | ||
652 | static void parse_error(struct filter_parse_state *ps, int err, int pos) | ||
653 | { | ||
654 | ps->lasterr = err; | ||
655 | ps->lasterr_pos = pos; | ||
656 | } | ||
657 | |||
658 | static void remove_filter_string(struct event_filter *filter) | 904 | static void remove_filter_string(struct event_filter *filter) |
659 | { | 905 | { |
660 | if (!filter) | 906 | if (!filter) |
@@ -664,57 +910,44 @@ static void remove_filter_string(struct event_filter *filter) | |||
664 | filter->filter_string = NULL; | 910 | filter->filter_string = NULL; |
665 | } | 911 | } |
666 | 912 | ||
667 | static int replace_filter_string(struct event_filter *filter, | 913 | static void append_filter_err(struct filter_parse_error *pe, |
668 | char *filter_string) | ||
669 | { | ||
670 | kfree(filter->filter_string); | ||
671 | filter->filter_string = kstrdup(filter_string, GFP_KERNEL); | ||
672 | if (!filter->filter_string) | ||
673 | return -ENOMEM; | ||
674 | |||
675 | return 0; | ||
676 | } | ||
677 | |||
678 | static int append_filter_string(struct event_filter *filter, | ||
679 | char *string) | ||
680 | { | ||
681 | int newlen; | ||
682 | char *new_filter_string; | ||
683 | |||
684 | BUG_ON(!filter->filter_string); | ||
685 | newlen = strlen(filter->filter_string) + strlen(string) + 1; | ||
686 | new_filter_string = kmalloc(newlen, GFP_KERNEL); | ||
687 | if (!new_filter_string) | ||
688 | return -ENOMEM; | ||
689 | |||
690 | strcpy(new_filter_string, filter->filter_string); | ||
691 | strcat(new_filter_string, string); | ||
692 | kfree(filter->filter_string); | ||
693 | filter->filter_string = new_filter_string; | ||
694 | |||
695 | return 0; | ||
696 | } | ||
697 | |||
698 | static void append_filter_err(struct filter_parse_state *ps, | ||
699 | struct event_filter *filter) | 914 | struct event_filter *filter) |
700 | { | 915 | { |
701 | int pos = ps->lasterr_pos; | 916 | struct trace_seq *s; |
702 | char *buf, *pbuf; | 917 | int pos = pe->lasterr_pos; |
918 | char *buf; | ||
919 | int len; | ||
920 | |||
921 | if (WARN_ON(!filter->filter_string)) | ||
922 | return; | ||
703 | 923 | ||
704 | buf = (char *)__get_free_page(GFP_KERNEL); | 924 | s = kmalloc(sizeof(*s), GFP_KERNEL); |
705 | if (!buf) | 925 | if (!s) |
706 | return; | 926 | return; |
927 | trace_seq_init(s); | ||
707 | 928 | ||
708 | append_filter_string(filter, "\n"); | 929 | len = strlen(filter->filter_string); |
709 | memset(buf, ' ', PAGE_SIZE); | 930 | if (pos > len) |
710 | if (pos > PAGE_SIZE - 128) | 931 | pos = len; |
711 | pos = 0; | ||
712 | buf[pos] = '^'; | ||
713 | pbuf = &buf[pos] + 1; | ||
714 | 932 | ||
715 | sprintf(pbuf, "\nparse_error: %s\n", err_text[ps->lasterr]); | 933 | /* indexing is off by one */ |
716 | append_filter_string(filter, buf); | 934 | if (pos) |
717 | free_page((unsigned long) buf); | 935 | pos++; |
936 | |||
937 | trace_seq_puts(s, filter->filter_string); | ||
938 | if (pe->lasterr > 0) { | ||
939 | trace_seq_printf(s, "\n%*s", pos, "^"); | ||
940 | trace_seq_printf(s, "\nparse_error: %s\n", err_text[pe->lasterr]); | ||
941 | } else { | ||
942 | trace_seq_printf(s, "\nError: (%d)\n", pe->lasterr); | ||
943 | } | ||
944 | trace_seq_putc(s, 0); | ||
945 | buf = kmemdup_nul(s->buffer, s->seq.len, GFP_KERNEL); | ||
946 | if (buf) { | ||
947 | kfree(filter->filter_string); | ||
948 | filter->filter_string = buf; | ||
949 | } | ||
950 | kfree(s); | ||
718 | } | 951 | } |
719 | 952 | ||
720 | static inline struct event_filter *event_filter(struct trace_event_file *file) | 953 | static inline struct event_filter *event_filter(struct trace_event_file *file) |
@@ -747,108 +980,18 @@ void print_subsystem_event_filter(struct event_subsystem *system, | |||
747 | mutex_unlock(&event_mutex); | 980 | mutex_unlock(&event_mutex); |
748 | } | 981 | } |
749 | 982 | ||
750 | static int __alloc_pred_stack(struct pred_stack *stack, int n_preds) | 983 | static void free_prog(struct event_filter *filter) |
751 | { | ||
752 | stack->preds = kcalloc(n_preds + 1, sizeof(*stack->preds), GFP_KERNEL); | ||
753 | if (!stack->preds) | ||
754 | return -ENOMEM; | ||
755 | stack->index = n_preds; | ||
756 | return 0; | ||
757 | } | ||
758 | |||
759 | static void __free_pred_stack(struct pred_stack *stack) | ||
760 | { | ||
761 | kfree(stack->preds); | ||
762 | stack->index = 0; | ||
763 | } | ||
764 | |||
765 | static int __push_pred_stack(struct pred_stack *stack, | ||
766 | struct filter_pred *pred) | ||
767 | { | ||
768 | int index = stack->index; | ||
769 | |||
770 | if (WARN_ON(index == 0)) | ||
771 | return -ENOSPC; | ||
772 | |||
773 | stack->preds[--index] = pred; | ||
774 | stack->index = index; | ||
775 | return 0; | ||
776 | } | ||
777 | |||
778 | static struct filter_pred * | ||
779 | __pop_pred_stack(struct pred_stack *stack) | ||
780 | { | ||
781 | struct filter_pred *pred; | ||
782 | int index = stack->index; | ||
783 | |||
784 | pred = stack->preds[index++]; | ||
785 | if (!pred) | ||
786 | return NULL; | ||
787 | |||
788 | stack->index = index; | ||
789 | return pred; | ||
790 | } | ||
791 | |||
792 | static int filter_set_pred(struct event_filter *filter, | ||
793 | int idx, | ||
794 | struct pred_stack *stack, | ||
795 | struct filter_pred *src) | ||
796 | { | ||
797 | struct filter_pred *dest = &filter->preds[idx]; | ||
798 | struct filter_pred *left; | ||
799 | struct filter_pred *right; | ||
800 | |||
801 | *dest = *src; | ||
802 | dest->index = idx; | ||
803 | |||
804 | if (dest->op == OP_OR || dest->op == OP_AND) { | ||
805 | right = __pop_pred_stack(stack); | ||
806 | left = __pop_pred_stack(stack); | ||
807 | if (!left || !right) | ||
808 | return -EINVAL; | ||
809 | /* | ||
810 | * If both children can be folded | ||
811 | * and they are the same op as this op or a leaf, | ||
812 | * then this op can be folded. | ||
813 | */ | ||
814 | if (left->index & FILTER_PRED_FOLD && | ||
815 | ((left->op == dest->op && !left->not) || | ||
816 | left->left == FILTER_PRED_INVALID) && | ||
817 | right->index & FILTER_PRED_FOLD && | ||
818 | ((right->op == dest->op && !right->not) || | ||
819 | right->left == FILTER_PRED_INVALID)) | ||
820 | dest->index |= FILTER_PRED_FOLD; | ||
821 | |||
822 | dest->left = left->index & ~FILTER_PRED_FOLD; | ||
823 | dest->right = right->index & ~FILTER_PRED_FOLD; | ||
824 | left->parent = dest->index & ~FILTER_PRED_FOLD; | ||
825 | right->parent = dest->index | FILTER_PRED_IS_RIGHT; | ||
826 | } else { | ||
827 | /* | ||
828 | * Make dest->left invalid to be used as a quick | ||
829 | * way to know this is a leaf node. | ||
830 | */ | ||
831 | dest->left = FILTER_PRED_INVALID; | ||
832 | |||
833 | /* All leafs allow folding the parent ops. */ | ||
834 | dest->index |= FILTER_PRED_FOLD; | ||
835 | } | ||
836 | |||
837 | return __push_pred_stack(stack, dest); | ||
838 | } | ||
839 | |||
840 | static void __free_preds(struct event_filter *filter) | ||
841 | { | 984 | { |
985 | struct prog_entry *prog; | ||
842 | int i; | 986 | int i; |
843 | 987 | ||
844 | if (filter->preds) { | 988 | prog = rcu_access_pointer(filter->prog); |
845 | for (i = 0; i < filter->n_preds; i++) | 989 | if (!prog) |
846 | kfree(filter->preds[i].ops); | 990 | return; |
847 | kfree(filter->preds); | 991 | |
848 | filter->preds = NULL; | 992 | for (i = 0; prog[i].pred; i++) |
849 | } | 993 | kfree(prog[i].pred); |
850 | filter->a_preds = 0; | 994 | kfree(prog); |
851 | filter->n_preds = 0; | ||
852 | } | 995 | } |
853 | 996 | ||
854 | static void filter_disable(struct trace_event_file *file) | 997 | static void filter_disable(struct trace_event_file *file) |
@@ -866,7 +1009,7 @@ static void __free_filter(struct event_filter *filter) | |||
866 | if (!filter) | 1009 | if (!filter) |
867 | return; | 1010 | return; |
868 | 1011 | ||
869 | __free_preds(filter); | 1012 | free_prog(filter); |
870 | kfree(filter->filter_string); | 1013 | kfree(filter->filter_string); |
871 | kfree(filter); | 1014 | kfree(filter); |
872 | } | 1015 | } |
@@ -876,38 +1019,6 @@ void free_event_filter(struct event_filter *filter) | |||
876 | __free_filter(filter); | 1019 | __free_filter(filter); |
877 | } | 1020 | } |
878 | 1021 | ||
879 | static struct event_filter *__alloc_filter(void) | ||
880 | { | ||
881 | struct event_filter *filter; | ||
882 | |||
883 | filter = kzalloc(sizeof(*filter), GFP_KERNEL); | ||
884 | return filter; | ||
885 | } | ||
886 | |||
887 | static int __alloc_preds(struct event_filter *filter, int n_preds) | ||
888 | { | ||
889 | struct filter_pred *pred; | ||
890 | int i; | ||
891 | |||
892 | if (filter->preds) | ||
893 | __free_preds(filter); | ||
894 | |||
895 | filter->preds = kcalloc(n_preds, sizeof(*filter->preds), GFP_KERNEL); | ||
896 | |||
897 | if (!filter->preds) | ||
898 | return -ENOMEM; | ||
899 | |||
900 | filter->a_preds = n_preds; | ||
901 | filter->n_preds = 0; | ||
902 | |||
903 | for (i = 0; i < n_preds; i++) { | ||
904 | pred = &filter->preds[i]; | ||
905 | pred->fn = filter_pred_none; | ||
906 | } | ||
907 | |||
908 | return 0; | ||
909 | } | ||
910 | |||
911 | static inline void __remove_filter(struct trace_event_file *file) | 1022 | static inline void __remove_filter(struct trace_event_file *file) |
912 | { | 1023 | { |
913 | filter_disable(file); | 1024 | filter_disable(file); |
@@ -944,27 +1055,6 @@ static void filter_free_subsystem_filters(struct trace_subsystem_dir *dir, | |||
944 | } | 1055 | } |
945 | } | 1056 | } |
946 | 1057 | ||
947 | static int filter_add_pred(struct filter_parse_state *ps, | ||
948 | struct event_filter *filter, | ||
949 | struct filter_pred *pred, | ||
950 | struct pred_stack *stack) | ||
951 | { | ||
952 | int err; | ||
953 | |||
954 | if (WARN_ON(filter->n_preds == filter->a_preds)) { | ||
955 | parse_error(ps, FILT_ERR_TOO_MANY_PREDS, 0); | ||
956 | return -ENOSPC; | ||
957 | } | ||
958 | |||
959 | err = filter_set_pred(filter, filter->n_preds, stack, pred); | ||
960 | if (err) | ||
961 | return err; | ||
962 | |||
963 | filter->n_preds++; | ||
964 | |||
965 | return 0; | ||
966 | } | ||
967 | |||
968 | int filter_assign_type(const char *type) | 1058 | int filter_assign_type(const char *type) |
969 | { | 1059 | { |
970 | if (strstr(type, "__data_loc") && strstr(type, "char")) | 1060 | if (strstr(type, "__data_loc") && strstr(type, "char")) |
@@ -976,761 +1066,449 @@ int filter_assign_type(const char *type) | |||
976 | return FILTER_OTHER; | 1066 | return FILTER_OTHER; |
977 | } | 1067 | } |
978 | 1068 | ||
979 | static bool is_legal_op(struct ftrace_event_field *field, enum filter_op_ids op) | ||
980 | { | ||
981 | if (is_string_field(field) && | ||
982 | (op != OP_EQ && op != OP_NE && op != OP_GLOB)) | ||
983 | return false; | ||
984 | if (!is_string_field(field) && op == OP_GLOB) | ||
985 | return false; | ||
986 | |||
987 | return true; | ||
988 | } | ||
989 | |||
990 | static filter_pred_fn_t select_comparison_fn(enum filter_op_ids op, | 1069 | static filter_pred_fn_t select_comparison_fn(enum filter_op_ids op, |
991 | int field_size, int field_is_signed) | 1070 | int field_size, int field_is_signed) |
992 | { | 1071 | { |
993 | filter_pred_fn_t fn = NULL; | 1072 | filter_pred_fn_t fn = NULL; |
1073 | int pred_func_index = -1; | ||
1074 | |||
1075 | switch (op) { | ||
1076 | case OP_EQ: | ||
1077 | case OP_NE: | ||
1078 | break; | ||
1079 | default: | ||
1080 | if (WARN_ON_ONCE(op < PRED_FUNC_START)) | ||
1081 | return NULL; | ||
1082 | pred_func_index = op - PRED_FUNC_START; | ||
1083 | if (WARN_ON_ONCE(pred_func_index > PRED_FUNC_MAX)) | ||
1084 | return NULL; | ||
1085 | } | ||
994 | 1086 | ||
995 | switch (field_size) { | 1087 | switch (field_size) { |
996 | case 8: | 1088 | case 8: |
997 | if (op == OP_EQ || op == OP_NE) | 1089 | if (pred_func_index < 0) |
998 | fn = filter_pred_64; | 1090 | fn = filter_pred_64; |
999 | else if (field_is_signed) | 1091 | else if (field_is_signed) |
1000 | fn = pred_funcs_s64[op - PRED_FUNC_START]; | 1092 | fn = pred_funcs_s64[pred_func_index]; |
1001 | else | 1093 | else |
1002 | fn = pred_funcs_u64[op - PRED_FUNC_START]; | 1094 | fn = pred_funcs_u64[pred_func_index]; |
1003 | break; | 1095 | break; |
1004 | case 4: | 1096 | case 4: |
1005 | if (op == OP_EQ || op == OP_NE) | 1097 | if (pred_func_index < 0) |
1006 | fn = filter_pred_32; | 1098 | fn = filter_pred_32; |
1007 | else if (field_is_signed) | 1099 | else if (field_is_signed) |
1008 | fn = pred_funcs_s32[op - PRED_FUNC_START]; | 1100 | fn = pred_funcs_s32[pred_func_index]; |
1009 | else | 1101 | else |
1010 | fn = pred_funcs_u32[op - PRED_FUNC_START]; | 1102 | fn = pred_funcs_u32[pred_func_index]; |
1011 | break; | 1103 | break; |
1012 | case 2: | 1104 | case 2: |
1013 | if (op == OP_EQ || op == OP_NE) | 1105 | if (pred_func_index < 0) |
1014 | fn = filter_pred_16; | 1106 | fn = filter_pred_16; |
1015 | else if (field_is_signed) | 1107 | else if (field_is_signed) |
1016 | fn = pred_funcs_s16[op - PRED_FUNC_START]; | 1108 | fn = pred_funcs_s16[pred_func_index]; |
1017 | else | 1109 | else |
1018 | fn = pred_funcs_u16[op - PRED_FUNC_START]; | 1110 | fn = pred_funcs_u16[pred_func_index]; |
1019 | break; | 1111 | break; |
1020 | case 1: | 1112 | case 1: |
1021 | if (op == OP_EQ || op == OP_NE) | 1113 | if (pred_func_index < 0) |
1022 | fn = filter_pred_8; | 1114 | fn = filter_pred_8; |
1023 | else if (field_is_signed) | 1115 | else if (field_is_signed) |
1024 | fn = pred_funcs_s8[op - PRED_FUNC_START]; | 1116 | fn = pred_funcs_s8[pred_func_index]; |
1025 | else | 1117 | else |
1026 | fn = pred_funcs_u8[op - PRED_FUNC_START]; | 1118 | fn = pred_funcs_u8[pred_func_index]; |
1027 | break; | 1119 | break; |
1028 | } | 1120 | } |
1029 | 1121 | ||
1030 | return fn; | 1122 | return fn; |
1031 | } | 1123 | } |
1032 | 1124 | ||
1033 | static int init_pred(struct filter_parse_state *ps, | 1125 | /* Called when a predicate is encountered by predicate_parse() */ |
1034 | struct ftrace_event_field *field, | 1126 | static int parse_pred(const char *str, void *data, |
1035 | struct filter_pred *pred) | 1127 | int pos, struct filter_parse_error *pe, |
1036 | 1128 | struct filter_pred **pred_ptr) | |
1037 | { | 1129 | { |
1038 | filter_pred_fn_t fn = filter_pred_none; | 1130 | struct trace_event_call *call = data; |
1039 | unsigned long long val; | 1131 | struct ftrace_event_field *field; |
1132 | struct filter_pred *pred = NULL; | ||
1133 | char num_buf[24]; /* Big enough to hold an address */ | ||
1134 | char *field_name; | ||
1135 | char q; | ||
1136 | u64 val; | ||
1137 | int len; | ||
1040 | int ret; | 1138 | int ret; |
1139 | int op; | ||
1140 | int s; | ||
1141 | int i = 0; | ||
1041 | 1142 | ||
1042 | pred->offset = field->offset; | 1143 | /* First find the field to associate to */ |
1043 | 1144 | while (isspace(str[i])) | |
1044 | if (!is_legal_op(field, pred->op)) { | 1145 | i++; |
1045 | parse_error(ps, FILT_ERR_ILLEGAL_FIELD_OP, 0); | 1146 | s = i; |
1046 | return -EINVAL; | ||
1047 | } | ||
1048 | |||
1049 | if (field->filter_type == FILTER_COMM) { | ||
1050 | filter_build_regex(pred); | ||
1051 | fn = filter_pred_comm; | ||
1052 | pred->regex.field_len = TASK_COMM_LEN; | ||
1053 | } else if (is_string_field(field)) { | ||
1054 | filter_build_regex(pred); | ||
1055 | |||
1056 | if (field->filter_type == FILTER_STATIC_STRING) { | ||
1057 | fn = filter_pred_string; | ||
1058 | pred->regex.field_len = field->size; | ||
1059 | } else if (field->filter_type == FILTER_DYN_STRING) | ||
1060 | fn = filter_pred_strloc; | ||
1061 | else | ||
1062 | fn = filter_pred_pchar; | ||
1063 | } else if (is_function_field(field)) { | ||
1064 | if (strcmp(field->name, "ip")) { | ||
1065 | parse_error(ps, FILT_ERR_IP_FIELD_ONLY, 0); | ||
1066 | return -EINVAL; | ||
1067 | } | ||
1068 | } else { | ||
1069 | if (field->is_signed) | ||
1070 | ret = kstrtoll(pred->regex.pattern, 0, &val); | ||
1071 | else | ||
1072 | ret = kstrtoull(pred->regex.pattern, 0, &val); | ||
1073 | if (ret) { | ||
1074 | parse_error(ps, FILT_ERR_ILLEGAL_INTVAL, 0); | ||
1075 | return -EINVAL; | ||
1076 | } | ||
1077 | pred->val = val; | ||
1078 | |||
1079 | if (field->filter_type == FILTER_CPU) | ||
1080 | fn = filter_pred_cpu; | ||
1081 | else | ||
1082 | fn = select_comparison_fn(pred->op, field->size, | ||
1083 | field->is_signed); | ||
1084 | if (!fn) { | ||
1085 | parse_error(ps, FILT_ERR_INVALID_OP, 0); | ||
1086 | return -EINVAL; | ||
1087 | } | ||
1088 | } | ||
1089 | |||
1090 | if (pred->op == OP_NE) | ||
1091 | pred->not ^= 1; | ||
1092 | |||
1093 | pred->fn = fn; | ||
1094 | return 0; | ||
1095 | } | ||
1096 | |||
1097 | static void parse_init(struct filter_parse_state *ps, | ||
1098 | struct filter_op *ops, | ||
1099 | char *infix_string) | ||
1100 | { | ||
1101 | memset(ps, '\0', sizeof(*ps)); | ||
1102 | |||
1103 | ps->infix.string = infix_string; | ||
1104 | ps->infix.cnt = strlen(infix_string); | ||
1105 | ps->ops = ops; | ||
1106 | |||
1107 | INIT_LIST_HEAD(&ps->opstack); | ||
1108 | INIT_LIST_HEAD(&ps->postfix); | ||
1109 | } | ||
1110 | |||
1111 | static char infix_next(struct filter_parse_state *ps) | ||
1112 | { | ||
1113 | if (!ps->infix.cnt) | ||
1114 | return 0; | ||
1115 | |||
1116 | ps->infix.cnt--; | ||
1117 | |||
1118 | return ps->infix.string[ps->infix.tail++]; | ||
1119 | } | ||
1120 | 1147 | ||
1121 | static char infix_peek(struct filter_parse_state *ps) | 1148 | while (isalnum(str[i]) || str[i] == '_') |
1122 | { | 1149 | i++; |
1123 | if (ps->infix.tail == strlen(ps->infix.string)) | ||
1124 | return 0; | ||
1125 | 1150 | ||
1126 | return ps->infix.string[ps->infix.tail]; | 1151 | len = i - s; |
1127 | } | ||
1128 | 1152 | ||
1129 | static void infix_advance(struct filter_parse_state *ps) | 1153 | if (!len) |
1130 | { | 1154 | return -1; |
1131 | if (!ps->infix.cnt) | ||
1132 | return; | ||
1133 | 1155 | ||
1134 | ps->infix.cnt--; | 1156 | field_name = kmemdup_nul(str + s, len, GFP_KERNEL); |
1135 | ps->infix.tail++; | 1157 | if (!field_name) |
1136 | } | 1158 | return -ENOMEM; |
1137 | 1159 | ||
1138 | static inline int is_precedence_lower(struct filter_parse_state *ps, | 1160 | /* Make sure that the field exists */ |
1139 | int a, int b) | ||
1140 | { | ||
1141 | return ps->ops[a].precedence < ps->ops[b].precedence; | ||
1142 | } | ||
1143 | 1161 | ||
1144 | static inline int is_op_char(struct filter_parse_state *ps, char c) | 1162 | field = trace_find_event_field(call, field_name); |
1145 | { | 1163 | kfree(field_name); |
1146 | int i; | 1164 | if (!field) { |
1147 | 1165 | parse_error(pe, FILT_ERR_FIELD_NOT_FOUND, pos + i); | |
1148 | for (i = 0; strcmp(ps->ops[i].string, "OP_NONE"); i++) { | 1166 | return -EINVAL; |
1149 | if (ps->ops[i].string[0] == c) | ||
1150 | return 1; | ||
1151 | } | 1167 | } |
1152 | 1168 | ||
1153 | return 0; | 1169 | while (isspace(str[i])) |
1154 | } | 1170 | i++; |
1155 | 1171 | ||
1156 | static int infix_get_op(struct filter_parse_state *ps, char firstc) | 1172 | /* Make sure this op is supported */ |
1157 | { | 1173 | for (op = 0; ops[op]; op++) { |
1158 | char nextc = infix_peek(ps); | 1174 | /* This is why '<=' must come before '<' in ops[] */ |
1159 | char opstr[3]; | 1175 | if (strncmp(str + i, ops[op], strlen(ops[op])) == 0) |
1160 | int i; | 1176 | break; |
1161 | |||
1162 | opstr[0] = firstc; | ||
1163 | opstr[1] = nextc; | ||
1164 | opstr[2] = '\0'; | ||
1165 | |||
1166 | for (i = 0; strcmp(ps->ops[i].string, "OP_NONE"); i++) { | ||
1167 | if (!strcmp(opstr, ps->ops[i].string)) { | ||
1168 | infix_advance(ps); | ||
1169 | return ps->ops[i].id; | ||
1170 | } | ||
1171 | } | 1177 | } |
1172 | 1178 | ||
1173 | opstr[1] = '\0'; | 1179 | if (!ops[op]) { |
1174 | 1180 | parse_error(pe, FILT_ERR_INVALID_OP, pos + i); | |
1175 | for (i = 0; strcmp(ps->ops[i].string, "OP_NONE"); i++) { | 1181 | goto err_free; |
1176 | if (!strcmp(opstr, ps->ops[i].string)) | ||
1177 | return ps->ops[i].id; | ||
1178 | } | 1182 | } |
1179 | 1183 | ||
1180 | return OP_NONE; | 1184 | i += strlen(ops[op]); |
1181 | } | ||
1182 | |||
1183 | static inline void clear_operand_string(struct filter_parse_state *ps) | ||
1184 | { | ||
1185 | memset(ps->operand.string, '\0', MAX_FILTER_STR_VAL); | ||
1186 | ps->operand.tail = 0; | ||
1187 | } | ||
1188 | |||
1189 | static inline int append_operand_char(struct filter_parse_state *ps, char c) | ||
1190 | { | ||
1191 | if (ps->operand.tail == MAX_FILTER_STR_VAL - 1) | ||
1192 | return -EINVAL; | ||
1193 | |||
1194 | ps->operand.string[ps->operand.tail++] = c; | ||
1195 | 1185 | ||
1196 | return 0; | 1186 | while (isspace(str[i])) |
1197 | } | 1187 | i++; |
1198 | 1188 | ||
1199 | static int filter_opstack_push(struct filter_parse_state *ps, | 1189 | s = i; |
1200 | enum filter_op_ids op) | ||
1201 | { | ||
1202 | struct opstack_op *opstack_op; | ||
1203 | 1190 | ||
1204 | opstack_op = kmalloc(sizeof(*opstack_op), GFP_KERNEL); | 1191 | pred = kzalloc(sizeof(*pred), GFP_KERNEL); |
1205 | if (!opstack_op) | 1192 | if (!pred) |
1206 | return -ENOMEM; | 1193 | return -ENOMEM; |
1207 | 1194 | ||
1208 | opstack_op->op = op; | 1195 | pred->field = field; |
1209 | list_add(&opstack_op->list, &ps->opstack); | 1196 | pred->offset = field->offset; |
1197 | pred->op = op; | ||
1210 | 1198 | ||
1211 | return 0; | 1199 | if (ftrace_event_is_function(call)) { |
1212 | } | 1200 | /* |
1201 | * Perf does things different with function events. | ||
1202 | * It only allows an "ip" field, and expects a string. | ||
1203 | * But the string does not need to be surrounded by quotes. | ||
1204 | * If it is a string, the assigned function as a nop, | ||
1205 | * (perf doesn't use it) and grab everything. | ||
1206 | */ | ||
1207 | if (strcmp(field->name, "ip") != 0) { | ||
1208 | parse_error(pe, FILT_ERR_IP_FIELD_ONLY, pos + i); | ||
1209 | goto err_free; | ||
1210 | } | ||
1211 | pred->fn = filter_pred_none; | ||
1212 | |||
1213 | /* | ||
1214 | * Quotes are not required, but if they exist then we need | ||
1215 | * to read them till we hit a matching one. | ||
1216 | */ | ||
1217 | if (str[i] == '\'' || str[i] == '"') | ||
1218 | q = str[i]; | ||
1219 | else | ||
1220 | q = 0; | ||
1221 | |||
1222 | for (i++; str[i]; i++) { | ||
1223 | if (q && str[i] == q) | ||
1224 | break; | ||
1225 | if (!q && (str[i] == ')' || str[i] == '&' || | ||
1226 | str[i] == '|')) | ||
1227 | break; | ||
1228 | } | ||
1229 | /* Skip quotes */ | ||
1230 | if (q) | ||
1231 | s++; | ||
1232 | len = i - s; | ||
1233 | if (len >= MAX_FILTER_STR_VAL) { | ||
1234 | parse_error(pe, FILT_ERR_OPERAND_TOO_LONG, pos + i); | ||
1235 | goto err_free; | ||
1236 | } | ||
1213 | 1237 | ||
1214 | static int filter_opstack_empty(struct filter_parse_state *ps) | 1238 | pred->regex.len = len; |
1215 | { | 1239 | strncpy(pred->regex.pattern, str + s, len); |
1216 | return list_empty(&ps->opstack); | 1240 | pred->regex.pattern[len] = 0; |
1217 | } | 1241 | |
1242 | /* This is either a string, or an integer */ | ||
1243 | } else if (str[i] == '\'' || str[i] == '"') { | ||
1244 | char q = str[i]; | ||
1245 | |||
1246 | /* Make sure the op is OK for strings */ | ||
1247 | switch (op) { | ||
1248 | case OP_NE: | ||
1249 | pred->not = 1; | ||
1250 | /* Fall through */ | ||
1251 | case OP_GLOB: | ||
1252 | case OP_EQ: | ||
1253 | break; | ||
1254 | default: | ||
1255 | parse_error(pe, FILT_ERR_ILLEGAL_FIELD_OP, pos + i); | ||
1256 | goto err_free; | ||
1257 | } | ||
1218 | 1258 | ||
1219 | static int filter_opstack_top(struct filter_parse_state *ps) | 1259 | /* Make sure the field is OK for strings */ |
1220 | { | 1260 | if (!is_string_field(field)) { |
1221 | struct opstack_op *opstack_op; | 1261 | parse_error(pe, FILT_ERR_EXPECT_DIGIT, pos + i); |
1262 | goto err_free; | ||
1263 | } | ||
1222 | 1264 | ||
1223 | if (filter_opstack_empty(ps)) | 1265 | for (i++; str[i]; i++) { |
1224 | return OP_NONE; | 1266 | if (str[i] == q) |
1267 | break; | ||
1268 | } | ||
1269 | if (!str[i]) { | ||
1270 | parse_error(pe, FILT_ERR_MISSING_QUOTE, pos + i); | ||
1271 | goto err_free; | ||
1272 | } | ||
1225 | 1273 | ||
1226 | opstack_op = list_first_entry(&ps->opstack, struct opstack_op, list); | 1274 | /* Skip quotes */ |
1275 | s++; | ||
1276 | len = i - s; | ||
1277 | if (len >= MAX_FILTER_STR_VAL) { | ||
1278 | parse_error(pe, FILT_ERR_OPERAND_TOO_LONG, pos + i); | ||
1279 | goto err_free; | ||
1280 | } | ||
1227 | 1281 | ||
1228 | return opstack_op->op; | 1282 | pred->regex.len = len; |
1229 | } | 1283 | strncpy(pred->regex.pattern, str + s, len); |
1284 | pred->regex.pattern[len] = 0; | ||
1230 | 1285 | ||
1231 | static int filter_opstack_pop(struct filter_parse_state *ps) | 1286 | filter_build_regex(pred); |
1232 | { | ||
1233 | struct opstack_op *opstack_op; | ||
1234 | enum filter_op_ids op; | ||
1235 | 1287 | ||
1236 | if (filter_opstack_empty(ps)) | 1288 | if (field->filter_type == FILTER_COMM) { |
1237 | return OP_NONE; | 1289 | pred->fn = filter_pred_comm; |
1238 | 1290 | ||
1239 | opstack_op = list_first_entry(&ps->opstack, struct opstack_op, list); | 1291 | } else if (field->filter_type == FILTER_STATIC_STRING) { |
1240 | op = opstack_op->op; | 1292 | pred->fn = filter_pred_string; |
1241 | list_del(&opstack_op->list); | 1293 | pred->regex.field_len = field->size; |
1242 | 1294 | ||
1243 | kfree(opstack_op); | 1295 | } else if (field->filter_type == FILTER_DYN_STRING) |
1296 | pred->fn = filter_pred_strloc; | ||
1297 | else | ||
1298 | pred->fn = filter_pred_pchar; | ||
1299 | /* go past the last quote */ | ||
1300 | i++; | ||
1244 | 1301 | ||
1245 | return op; | 1302 | } else if (isdigit(str[i])) { |
1246 | } | ||
1247 | 1303 | ||
1248 | static void filter_opstack_clear(struct filter_parse_state *ps) | 1304 | /* Make sure the field is not a string */ |
1249 | { | 1305 | if (is_string_field(field)) { |
1250 | while (!filter_opstack_empty(ps)) | 1306 | parse_error(pe, FILT_ERR_EXPECT_STRING, pos + i); |
1251 | filter_opstack_pop(ps); | 1307 | goto err_free; |
1252 | } | 1308 | } |
1253 | |||
1254 | static char *curr_operand(struct filter_parse_state *ps) | ||
1255 | { | ||
1256 | return ps->operand.string; | ||
1257 | } | ||
1258 | 1309 | ||
1259 | static int postfix_append_operand(struct filter_parse_state *ps, char *operand) | 1310 | if (op == OP_GLOB) { |
1260 | { | 1311 | parse_error(pe, FILT_ERR_ILLEGAL_FIELD_OP, pos + i); |
1261 | struct postfix_elt *elt; | 1312 | goto err_free; |
1313 | } | ||
1262 | 1314 | ||
1263 | elt = kmalloc(sizeof(*elt), GFP_KERNEL); | 1315 | /* We allow 0xDEADBEEF */ |
1264 | if (!elt) | 1316 | while (isalnum(str[i])) |
1265 | return -ENOMEM; | 1317 | i++; |
1266 | 1318 | ||
1267 | elt->op = OP_NONE; | 1319 | len = i - s; |
1268 | elt->operand = kstrdup(operand, GFP_KERNEL); | 1320 | /* 0xfeedfacedeadbeef is 18 chars max */ |
1269 | if (!elt->operand) { | 1321 | if (len >= sizeof(num_buf)) { |
1270 | kfree(elt); | 1322 | parse_error(pe, FILT_ERR_OPERAND_TOO_LONG, pos + i); |
1271 | return -ENOMEM; | 1323 | goto err_free; |
1272 | } | 1324 | } |
1273 | 1325 | ||
1274 | list_add_tail(&elt->list, &ps->postfix); | 1326 | strncpy(num_buf, str + s, len); |
1327 | num_buf[len] = 0; | ||
1275 | 1328 | ||
1276 | return 0; | 1329 | /* Make sure it is a value */ |
1277 | } | 1330 | if (field->is_signed) |
1331 | ret = kstrtoll(num_buf, 0, &val); | ||
1332 | else | ||
1333 | ret = kstrtoull(num_buf, 0, &val); | ||
1334 | if (ret) { | ||
1335 | parse_error(pe, FILT_ERR_ILLEGAL_INTVAL, pos + s); | ||
1336 | goto err_free; | ||
1337 | } | ||
1278 | 1338 | ||
1279 | static int postfix_append_op(struct filter_parse_state *ps, enum filter_op_ids op) | 1339 | pred->val = val; |
1280 | { | ||
1281 | struct postfix_elt *elt; | ||
1282 | 1340 | ||
1283 | elt = kmalloc(sizeof(*elt), GFP_KERNEL); | 1341 | if (field->filter_type == FILTER_CPU) |
1284 | if (!elt) | 1342 | pred->fn = filter_pred_cpu; |
1285 | return -ENOMEM; | 1343 | else { |
1344 | pred->fn = select_comparison_fn(pred->op, field->size, | ||
1345 | field->is_signed); | ||
1346 | if (pred->op == OP_NE) | ||
1347 | pred->not = 1; | ||
1348 | } | ||
1286 | 1349 | ||
1287 | elt->op = op; | 1350 | } else { |
1288 | elt->operand = NULL; | 1351 | parse_error(pe, FILT_ERR_INVALID_VALUE, pos + i); |
1352 | goto err_free; | ||
1353 | } | ||
1289 | 1354 | ||
1290 | list_add_tail(&elt->list, &ps->postfix); | 1355 | *pred_ptr = pred; |
1356 | return i; | ||
1291 | 1357 | ||
1292 | return 0; | 1358 | err_free: |
1359 | kfree(pred); | ||
1360 | return -EINVAL; | ||
1293 | } | 1361 | } |
1294 | 1362 | ||
1295 | static void postfix_clear(struct filter_parse_state *ps) | 1363 | enum { |
1296 | { | 1364 | TOO_MANY_CLOSE = -1, |
1297 | struct postfix_elt *elt; | 1365 | TOO_MANY_OPEN = -2, |
1366 | MISSING_QUOTE = -3, | ||
1367 | }; | ||
1298 | 1368 | ||
1299 | while (!list_empty(&ps->postfix)) { | 1369 | /* |
1300 | elt = list_first_entry(&ps->postfix, struct postfix_elt, list); | 1370 | * Read the filter string once to calculate the number of predicates |
1301 | list_del(&elt->list); | 1371 | * as well as how deep the parentheses go. |
1302 | kfree(elt->operand); | 1372 | * |
1303 | kfree(elt); | 1373 | * Returns: |
1304 | } | 1374 | * 0 - everything is fine (err is undefined) |
1305 | } | 1375 | * -1 - too many ')' |
1376 | * -2 - too many '(' | ||
1377 | * -3 - No matching quote | ||
1378 | */ | ||
1379 | static int calc_stack(const char *str, int *parens, int *preds, int *err) | ||
1380 | { | ||
1381 | bool is_pred = false; | ||
1382 | int nr_preds = 0; | ||
1383 | int open = 1; /* Count the expression as "(E)" */ | ||
1384 | int last_quote = 0; | ||
1385 | int max_open = 1; | ||
1386 | int quote = 0; | ||
1387 | int i; | ||
1306 | 1388 | ||
1307 | static int filter_parse(struct filter_parse_state *ps) | 1389 | *err = 0; |
1308 | { | ||
1309 | enum filter_op_ids op, top_op; | ||
1310 | int in_string = 0; | ||
1311 | char ch; | ||
1312 | 1390 | ||
1313 | while ((ch = infix_next(ps))) { | 1391 | for (i = 0; str[i]; i++) { |
1314 | if (ch == '"') { | 1392 | if (isspace(str[i])) |
1315 | in_string ^= 1; | ||
1316 | continue; | 1393 | continue; |
1317 | } | 1394 | if (quote) { |
1318 | 1395 | if (str[i] == quote) | |
1319 | if (in_string) | 1396 | quote = 0; |
1320 | goto parse_operand; | ||
1321 | |||
1322 | if (isspace(ch)) | ||
1323 | continue; | 1397 | continue; |
1398 | } | ||
1324 | 1399 | ||
1325 | if (is_op_char(ps, ch)) { | 1400 | switch (str[i]) { |
1326 | op = infix_get_op(ps, ch); | 1401 | case '\'': |
1327 | if (op == OP_NONE) { | 1402 | case '"': |
1328 | parse_error(ps, FILT_ERR_INVALID_OP, 0); | 1403 | quote = str[i]; |
1329 | return -EINVAL; | 1404 | last_quote = i; |
1330 | } | 1405 | break; |
1331 | 1406 | case '|': | |
1332 | if (strlen(curr_operand(ps))) { | 1407 | case '&': |
1333 | postfix_append_operand(ps, curr_operand(ps)); | 1408 | if (str[i+1] != str[i]) |
1334 | clear_operand_string(ps); | ||
1335 | } | ||
1336 | |||
1337 | while (!filter_opstack_empty(ps)) { | ||
1338 | top_op = filter_opstack_top(ps); | ||
1339 | if (!is_precedence_lower(ps, top_op, op)) { | ||
1340 | top_op = filter_opstack_pop(ps); | ||
1341 | postfix_append_op(ps, top_op); | ||
1342 | continue; | ||
1343 | } | ||
1344 | break; | 1409 | break; |
1345 | } | 1410 | is_pred = false; |
1346 | |||
1347 | filter_opstack_push(ps, op); | ||
1348 | continue; | 1411 | continue; |
1349 | } | 1412 | case '(': |
1350 | 1413 | is_pred = false; | |
1351 | if (ch == '(') { | 1414 | open++; |
1352 | filter_opstack_push(ps, OP_OPEN_PAREN); | 1415 | if (open > max_open) |
1416 | max_open = open; | ||
1353 | continue; | 1417 | continue; |
1354 | } | 1418 | case ')': |
1355 | 1419 | is_pred = false; | |
1356 | if (ch == ')') { | 1420 | if (open == 1) { |
1357 | if (strlen(curr_operand(ps))) { | 1421 | *err = i; |
1358 | postfix_append_operand(ps, curr_operand(ps)); | 1422 | return TOO_MANY_CLOSE; |
1359 | clear_operand_string(ps); | ||
1360 | } | ||
1361 | |||
1362 | top_op = filter_opstack_pop(ps); | ||
1363 | while (top_op != OP_NONE) { | ||
1364 | if (top_op == OP_OPEN_PAREN) | ||
1365 | break; | ||
1366 | postfix_append_op(ps, top_op); | ||
1367 | top_op = filter_opstack_pop(ps); | ||
1368 | } | ||
1369 | if (top_op == OP_NONE) { | ||
1370 | parse_error(ps, FILT_ERR_UNBALANCED_PAREN, 0); | ||
1371 | return -EINVAL; | ||
1372 | } | 1423 | } |
1424 | open--; | ||
1373 | continue; | 1425 | continue; |
1374 | } | 1426 | } |
1375 | parse_operand: | 1427 | if (!is_pred) { |
1376 | if (append_operand_char(ps, ch)) { | 1428 | nr_preds++; |
1377 | parse_error(ps, FILT_ERR_OPERAND_TOO_LONG, 0); | 1429 | is_pred = true; |
1378 | return -EINVAL; | ||
1379 | } | 1430 | } |
1380 | } | 1431 | } |
1381 | 1432 | ||
1382 | if (strlen(curr_operand(ps))) | 1433 | if (quote) { |
1383 | postfix_append_operand(ps, curr_operand(ps)); | 1434 | *err = last_quote; |
1384 | 1435 | return MISSING_QUOTE; | |
1385 | while (!filter_opstack_empty(ps)) { | ||
1386 | top_op = filter_opstack_pop(ps); | ||
1387 | if (top_op == OP_NONE) | ||
1388 | break; | ||
1389 | if (top_op == OP_OPEN_PAREN) { | ||
1390 | parse_error(ps, FILT_ERR_UNBALANCED_PAREN, 0); | ||
1391 | return -EINVAL; | ||
1392 | } | ||
1393 | postfix_append_op(ps, top_op); | ||
1394 | } | 1436 | } |
1395 | 1437 | ||
1396 | return 0; | 1438 | if (open != 1) { |
1397 | } | 1439 | int level = open; |
1398 | |||
1399 | static struct filter_pred *create_pred(struct filter_parse_state *ps, | ||
1400 | struct trace_event_call *call, | ||
1401 | enum filter_op_ids op, | ||
1402 | char *operand1, char *operand2) | ||
1403 | { | ||
1404 | struct ftrace_event_field *field; | ||
1405 | static struct filter_pred pred; | ||
1406 | |||
1407 | memset(&pred, 0, sizeof(pred)); | ||
1408 | pred.op = op; | ||
1409 | |||
1410 | if (op == OP_AND || op == OP_OR) | ||
1411 | return &pred; | ||
1412 | |||
1413 | if (!operand1 || !operand2) { | ||
1414 | parse_error(ps, FILT_ERR_MISSING_FIELD, 0); | ||
1415 | return NULL; | ||
1416 | } | ||
1417 | |||
1418 | field = trace_find_event_field(call, operand1); | ||
1419 | if (!field) { | ||
1420 | parse_error(ps, FILT_ERR_FIELD_NOT_FOUND, 0); | ||
1421 | return NULL; | ||
1422 | } | ||
1423 | |||
1424 | strcpy(pred.regex.pattern, operand2); | ||
1425 | pred.regex.len = strlen(pred.regex.pattern); | ||
1426 | pred.field = field; | ||
1427 | return init_pred(ps, field, &pred) ? NULL : &pred; | ||
1428 | } | ||
1429 | |||
1430 | static int check_preds(struct filter_parse_state *ps) | ||
1431 | { | ||
1432 | int n_normal_preds = 0, n_logical_preds = 0; | ||
1433 | struct postfix_elt *elt; | ||
1434 | int cnt = 0; | ||
1435 | |||
1436 | list_for_each_entry(elt, &ps->postfix, list) { | ||
1437 | if (elt->op == OP_NONE) { | ||
1438 | cnt++; | ||
1439 | continue; | ||
1440 | } | ||
1441 | 1440 | ||
1442 | if (elt->op == OP_AND || elt->op == OP_OR) { | 1441 | /* find the bad open */ |
1443 | n_logical_preds++; | 1442 | for (i--; i; i--) { |
1444 | cnt--; | 1443 | if (quote) { |
1445 | continue; | 1444 | if (str[i] == quote) |
1445 | quote = 0; | ||
1446 | continue; | ||
1447 | } | ||
1448 | switch (str[i]) { | ||
1449 | case '(': | ||
1450 | if (level == open) { | ||
1451 | *err = i; | ||
1452 | return TOO_MANY_OPEN; | ||
1453 | } | ||
1454 | level--; | ||
1455 | break; | ||
1456 | case ')': | ||
1457 | level++; | ||
1458 | break; | ||
1459 | case '\'': | ||
1460 | case '"': | ||
1461 | quote = str[i]; | ||
1462 | break; | ||
1463 | } | ||
1446 | } | 1464 | } |
1447 | if (elt->op != OP_NOT) | 1465 | /* First character is the '(' with missing ')' */ |
1448 | cnt--; | 1466 | *err = 0; |
1449 | n_normal_preds++; | 1467 | return TOO_MANY_OPEN; |
1450 | /* all ops should have operands */ | ||
1451 | if (cnt < 0) | ||
1452 | break; | ||
1453 | } | ||
1454 | |||
1455 | if (cnt != 1 || !n_normal_preds || n_logical_preds >= n_normal_preds) { | ||
1456 | parse_error(ps, FILT_ERR_INVALID_FILTER, 0); | ||
1457 | return -EINVAL; | ||
1458 | } | 1468 | } |
1459 | 1469 | ||
1470 | /* Set the size of the required stacks */ | ||
1471 | *parens = max_open; | ||
1472 | *preds = nr_preds; | ||
1460 | return 0; | 1473 | return 0; |
1461 | } | 1474 | } |
1462 | 1475 | ||
1463 | static int count_preds(struct filter_parse_state *ps) | 1476 | static int process_preds(struct trace_event_call *call, |
1464 | { | 1477 | const char *filter_string, |
1465 | struct postfix_elt *elt; | ||
1466 | int n_preds = 0; | ||
1467 | |||
1468 | list_for_each_entry(elt, &ps->postfix, list) { | ||
1469 | if (elt->op == OP_NONE) | ||
1470 | continue; | ||
1471 | n_preds++; | ||
1472 | } | ||
1473 | |||
1474 | return n_preds; | ||
1475 | } | ||
1476 | |||
1477 | struct check_pred_data { | ||
1478 | int count; | ||
1479 | int max; | ||
1480 | }; | ||
1481 | |||
1482 | static int check_pred_tree_cb(enum move_type move, struct filter_pred *pred, | ||
1483 | int *err, void *data) | ||
1484 | { | ||
1485 | struct check_pred_data *d = data; | ||
1486 | |||
1487 | if (WARN_ON(d->count++ > d->max)) { | ||
1488 | *err = -EINVAL; | ||
1489 | return WALK_PRED_ABORT; | ||
1490 | } | ||
1491 | return WALK_PRED_DEFAULT; | ||
1492 | } | ||
1493 | |||
1494 | /* | ||
1495 | * The tree is walked at filtering of an event. If the tree is not correctly | ||
1496 | * built, it may cause an infinite loop. Check here that the tree does | ||
1497 | * indeed terminate. | ||
1498 | */ | ||
1499 | static int check_pred_tree(struct event_filter *filter, | ||
1500 | struct filter_pred *root) | ||
1501 | { | ||
1502 | struct check_pred_data data = { | ||
1503 | /* | ||
1504 | * The max that we can hit a node is three times. | ||
1505 | * Once going down, once coming up from left, and | ||
1506 | * once coming up from right. This is more than enough | ||
1507 | * since leafs are only hit a single time. | ||
1508 | */ | ||
1509 | .max = 3 * filter->n_preds, | ||
1510 | .count = 0, | ||
1511 | }; | ||
1512 | |||
1513 | return walk_pred_tree(filter->preds, root, | ||
1514 | check_pred_tree_cb, &data); | ||
1515 | } | ||
1516 | |||
1517 | static int count_leafs_cb(enum move_type move, struct filter_pred *pred, | ||
1518 | int *err, void *data) | ||
1519 | { | ||
1520 | int *count = data; | ||
1521 | |||
1522 | if ((move == MOVE_DOWN) && | ||
1523 | (pred->left == FILTER_PRED_INVALID)) | ||
1524 | (*count)++; | ||
1525 | |||
1526 | return WALK_PRED_DEFAULT; | ||
1527 | } | ||
1528 | |||
1529 | static int count_leafs(struct filter_pred *preds, struct filter_pred *root) | ||
1530 | { | ||
1531 | int count = 0, ret; | ||
1532 | |||
1533 | ret = walk_pred_tree(preds, root, count_leafs_cb, &count); | ||
1534 | WARN_ON(ret); | ||
1535 | return count; | ||
1536 | } | ||
1537 | |||
1538 | struct fold_pred_data { | ||
1539 | struct filter_pred *root; | ||
1540 | int count; | ||
1541 | int children; | ||
1542 | }; | ||
1543 | |||
1544 | static int fold_pred_cb(enum move_type move, struct filter_pred *pred, | ||
1545 | int *err, void *data) | ||
1546 | { | ||
1547 | struct fold_pred_data *d = data; | ||
1548 | struct filter_pred *root = d->root; | ||
1549 | |||
1550 | if (move != MOVE_DOWN) | ||
1551 | return WALK_PRED_DEFAULT; | ||
1552 | if (pred->left != FILTER_PRED_INVALID) | ||
1553 | return WALK_PRED_DEFAULT; | ||
1554 | |||
1555 | if (WARN_ON(d->count == d->children)) { | ||
1556 | *err = -EINVAL; | ||
1557 | return WALK_PRED_ABORT; | ||
1558 | } | ||
1559 | |||
1560 | pred->index &= ~FILTER_PRED_FOLD; | ||
1561 | root->ops[d->count++] = pred->index; | ||
1562 | return WALK_PRED_DEFAULT; | ||
1563 | } | ||
1564 | |||
1565 | static int fold_pred(struct filter_pred *preds, struct filter_pred *root) | ||
1566 | { | ||
1567 | struct fold_pred_data data = { | ||
1568 | .root = root, | ||
1569 | .count = 0, | ||
1570 | }; | ||
1571 | int children; | ||
1572 | |||
1573 | /* No need to keep the fold flag */ | ||
1574 | root->index &= ~FILTER_PRED_FOLD; | ||
1575 | |||
1576 | /* If the root is a leaf then do nothing */ | ||
1577 | if (root->left == FILTER_PRED_INVALID) | ||
1578 | return 0; | ||
1579 | |||
1580 | /* count the children */ | ||
1581 | children = count_leafs(preds, &preds[root->left]); | ||
1582 | children += count_leafs(preds, &preds[root->right]); | ||
1583 | |||
1584 | root->ops = kcalloc(children, sizeof(*root->ops), GFP_KERNEL); | ||
1585 | if (!root->ops) | ||
1586 | return -ENOMEM; | ||
1587 | |||
1588 | root->val = children; | ||
1589 | data.children = children; | ||
1590 | return walk_pred_tree(preds, root, fold_pred_cb, &data); | ||
1591 | } | ||
1592 | |||
1593 | static int fold_pred_tree_cb(enum move_type move, struct filter_pred *pred, | ||
1594 | int *err, void *data) | ||
1595 | { | ||
1596 | struct filter_pred *preds = data; | ||
1597 | |||
1598 | if (move != MOVE_DOWN) | ||
1599 | return WALK_PRED_DEFAULT; | ||
1600 | if (!(pred->index & FILTER_PRED_FOLD)) | ||
1601 | return WALK_PRED_DEFAULT; | ||
1602 | |||
1603 | *err = fold_pred(preds, pred); | ||
1604 | if (*err) | ||
1605 | return WALK_PRED_ABORT; | ||
1606 | |||
1607 | /* eveyrhing below is folded, continue with parent */ | ||
1608 | return WALK_PRED_PARENT; | ||
1609 | } | ||
1610 | |||
1611 | /* | ||
1612 | * To optimize the processing of the ops, if we have several "ors" or | ||
1613 | * "ands" together, we can put them in an array and process them all | ||
1614 | * together speeding up the filter logic. | ||
1615 | */ | ||
1616 | static int fold_pred_tree(struct event_filter *filter, | ||
1617 | struct filter_pred *root) | ||
1618 | { | ||
1619 | return walk_pred_tree(filter->preds, root, fold_pred_tree_cb, | ||
1620 | filter->preds); | ||
1621 | } | ||
1622 | |||
1623 | static int replace_preds(struct trace_event_call *call, | ||
1624 | struct event_filter *filter, | 1478 | struct event_filter *filter, |
1625 | struct filter_parse_state *ps, | 1479 | struct filter_parse_error *pe) |
1626 | bool dry_run) | ||
1627 | { | 1480 | { |
1628 | char *operand1 = NULL, *operand2 = NULL; | 1481 | struct prog_entry *prog; |
1629 | struct filter_pred *pred; | 1482 | int nr_parens; |
1630 | struct filter_pred *root; | 1483 | int nr_preds; |
1631 | struct postfix_elt *elt; | 1484 | int index; |
1632 | struct pred_stack stack = { }; /* init to NULL */ | 1485 | int ret; |
1633 | int err; | ||
1634 | int n_preds = 0; | ||
1635 | |||
1636 | n_preds = count_preds(ps); | ||
1637 | if (n_preds >= MAX_FILTER_PRED) { | ||
1638 | parse_error(ps, FILT_ERR_TOO_MANY_PREDS, 0); | ||
1639 | return -ENOSPC; | ||
1640 | } | ||
1641 | |||
1642 | err = check_preds(ps); | ||
1643 | if (err) | ||
1644 | return err; | ||
1645 | |||
1646 | if (!dry_run) { | ||
1647 | err = __alloc_pred_stack(&stack, n_preds); | ||
1648 | if (err) | ||
1649 | return err; | ||
1650 | err = __alloc_preds(filter, n_preds); | ||
1651 | if (err) | ||
1652 | goto fail; | ||
1653 | } | ||
1654 | |||
1655 | n_preds = 0; | ||
1656 | list_for_each_entry(elt, &ps->postfix, list) { | ||
1657 | if (elt->op == OP_NONE) { | ||
1658 | if (!operand1) | ||
1659 | operand1 = elt->operand; | ||
1660 | else if (!operand2) | ||
1661 | operand2 = elt->operand; | ||
1662 | else { | ||
1663 | parse_error(ps, FILT_ERR_TOO_MANY_OPERANDS, 0); | ||
1664 | err = -EINVAL; | ||
1665 | goto fail; | ||
1666 | } | ||
1667 | continue; | ||
1668 | } | ||
1669 | |||
1670 | if (elt->op == OP_NOT) { | ||
1671 | if (!n_preds || operand1 || operand2) { | ||
1672 | parse_error(ps, FILT_ERR_ILLEGAL_NOT_OP, 0); | ||
1673 | err = -EINVAL; | ||
1674 | goto fail; | ||
1675 | } | ||
1676 | if (!dry_run) | ||
1677 | filter->preds[n_preds - 1].not ^= 1; | ||
1678 | continue; | ||
1679 | } | ||
1680 | |||
1681 | if (WARN_ON(n_preds++ == MAX_FILTER_PRED)) { | ||
1682 | parse_error(ps, FILT_ERR_TOO_MANY_PREDS, 0); | ||
1683 | err = -ENOSPC; | ||
1684 | goto fail; | ||
1685 | } | ||
1686 | |||
1687 | pred = create_pred(ps, call, elt->op, operand1, operand2); | ||
1688 | if (!pred) { | ||
1689 | err = -EINVAL; | ||
1690 | goto fail; | ||
1691 | } | ||
1692 | 1486 | ||
1693 | if (!dry_run) { | 1487 | ret = calc_stack(filter_string, &nr_parens, &nr_preds, &index); |
1694 | err = filter_add_pred(ps, filter, pred, &stack); | 1488 | if (ret < 0) { |
1695 | if (err) | 1489 | switch (ret) { |
1696 | goto fail; | 1490 | case MISSING_QUOTE: |
1491 | parse_error(pe, FILT_ERR_MISSING_QUOTE, index); | ||
1492 | break; | ||
1493 | case TOO_MANY_OPEN: | ||
1494 | parse_error(pe, FILT_ERR_TOO_MANY_OPEN, index); | ||
1495 | break; | ||
1496 | default: | ||
1497 | parse_error(pe, FILT_ERR_TOO_MANY_CLOSE, index); | ||
1697 | } | 1498 | } |
1698 | 1499 | return ret; | |
1699 | operand1 = operand2 = NULL; | ||
1700 | } | 1500 | } |
1701 | 1501 | ||
1702 | if (!dry_run) { | 1502 | if (!nr_preds) { |
1703 | /* We should have one item left on the stack */ | 1503 | prog = NULL; |
1704 | pred = __pop_pred_stack(&stack); | 1504 | } else { |
1705 | if (!pred) | 1505 | prog = predicate_parse(filter_string, nr_parens, nr_preds, |
1706 | return -EINVAL; | 1506 | parse_pred, call, pe); |
1707 | /* This item is where we start from in matching */ | 1507 | if (IS_ERR(prog)) |
1708 | root = pred; | 1508 | return PTR_ERR(prog); |
1709 | /* Make sure the stack is empty */ | ||
1710 | pred = __pop_pred_stack(&stack); | ||
1711 | if (WARN_ON(pred)) { | ||
1712 | err = -EINVAL; | ||
1713 | filter->root = NULL; | ||
1714 | goto fail; | ||
1715 | } | ||
1716 | err = check_pred_tree(filter, root); | ||
1717 | if (err) | ||
1718 | goto fail; | ||
1719 | |||
1720 | /* Optimize the tree */ | ||
1721 | err = fold_pred_tree(filter, root); | ||
1722 | if (err) | ||
1723 | goto fail; | ||
1724 | |||
1725 | /* We don't set root until we know it works */ | ||
1726 | barrier(); | ||
1727 | filter->root = root; | ||
1728 | } | 1509 | } |
1729 | 1510 | rcu_assign_pointer(filter->prog, prog); | |
1730 | err = 0; | 1511 | return 0; |
1731 | fail: | ||
1732 | __free_pred_stack(&stack); | ||
1733 | return err; | ||
1734 | } | 1512 | } |
1735 | 1513 | ||
1736 | static inline void event_set_filtered_flag(struct trace_event_file *file) | 1514 | static inline void event_set_filtered_flag(struct trace_event_file *file) |
@@ -1780,72 +1558,53 @@ struct filter_list { | |||
1780 | struct event_filter *filter; | 1558 | struct event_filter *filter; |
1781 | }; | 1559 | }; |
1782 | 1560 | ||
1783 | static int replace_system_preds(struct trace_subsystem_dir *dir, | 1561 | static int process_system_preds(struct trace_subsystem_dir *dir, |
1784 | struct trace_array *tr, | 1562 | struct trace_array *tr, |
1785 | struct filter_parse_state *ps, | 1563 | struct filter_parse_error *pe, |
1786 | char *filter_string) | 1564 | char *filter_string) |
1787 | { | 1565 | { |
1788 | struct trace_event_file *file; | 1566 | struct trace_event_file *file; |
1789 | struct filter_list *filter_item; | 1567 | struct filter_list *filter_item; |
1568 | struct event_filter *filter = NULL; | ||
1790 | struct filter_list *tmp; | 1569 | struct filter_list *tmp; |
1791 | LIST_HEAD(filter_list); | 1570 | LIST_HEAD(filter_list); |
1792 | bool fail = true; | 1571 | bool fail = true; |
1793 | int err; | 1572 | int err; |
1794 | 1573 | ||
1795 | list_for_each_entry(file, &tr->events, list) { | 1574 | list_for_each_entry(file, &tr->events, list) { |
1796 | if (file->system != dir) | ||
1797 | continue; | ||
1798 | |||
1799 | /* | ||
1800 | * Try to see if the filter can be applied | ||
1801 | * (filter arg is ignored on dry_run) | ||
1802 | */ | ||
1803 | err = replace_preds(file->event_call, NULL, ps, true); | ||
1804 | if (err) | ||
1805 | event_set_no_set_filter_flag(file); | ||
1806 | else | ||
1807 | event_clear_no_set_filter_flag(file); | ||
1808 | } | ||
1809 | |||
1810 | list_for_each_entry(file, &tr->events, list) { | ||
1811 | struct event_filter *filter; | ||
1812 | 1575 | ||
1813 | if (file->system != dir) | 1576 | if (file->system != dir) |
1814 | continue; | 1577 | continue; |
1815 | 1578 | ||
1816 | if (event_no_set_filter_flag(file)) | 1579 | filter = kzalloc(sizeof(*filter), GFP_KERNEL); |
1817 | continue; | 1580 | if (!filter) |
1818 | |||
1819 | filter_item = kzalloc(sizeof(*filter_item), GFP_KERNEL); | ||
1820 | if (!filter_item) | ||
1821 | goto fail_mem; | ||
1822 | |||
1823 | list_add_tail(&filter_item->list, &filter_list); | ||
1824 | |||
1825 | filter_item->filter = __alloc_filter(); | ||
1826 | if (!filter_item->filter) | ||
1827 | goto fail_mem; | 1581 | goto fail_mem; |
1828 | filter = filter_item->filter; | ||
1829 | 1582 | ||
1830 | /* Can only fail on no memory */ | 1583 | filter->filter_string = kstrdup(filter_string, GFP_KERNEL); |
1831 | err = replace_filter_string(filter, filter_string); | 1584 | if (!filter->filter_string) |
1832 | if (err) | ||
1833 | goto fail_mem; | 1585 | goto fail_mem; |
1834 | 1586 | ||
1835 | err = replace_preds(file->event_call, filter, ps, false); | 1587 | err = process_preds(file->event_call, filter_string, filter, pe); |
1836 | if (err) { | 1588 | if (err) { |
1837 | filter_disable(file); | 1589 | filter_disable(file); |
1838 | parse_error(ps, FILT_ERR_BAD_SUBSYS_FILTER, 0); | 1590 | parse_error(pe, FILT_ERR_BAD_SUBSYS_FILTER, 0); |
1839 | append_filter_err(ps, filter); | 1591 | append_filter_err(pe, filter); |
1840 | } else | 1592 | } else |
1841 | event_set_filtered_flag(file); | 1593 | event_set_filtered_flag(file); |
1594 | |||
1595 | |||
1596 | filter_item = kzalloc(sizeof(*filter_item), GFP_KERNEL); | ||
1597 | if (!filter_item) | ||
1598 | goto fail_mem; | ||
1599 | |||
1600 | list_add_tail(&filter_item->list, &filter_list); | ||
1842 | /* | 1601 | /* |
1843 | * Regardless of if this returned an error, we still | 1602 | * Regardless of if this returned an error, we still |
1844 | * replace the filter for the call. | 1603 | * replace the filter for the call. |
1845 | */ | 1604 | */ |
1846 | filter = event_filter(file); | 1605 | filter_item->filter = event_filter(file); |
1847 | event_set_filter(file, filter_item->filter); | 1606 | event_set_filter(file, filter); |
1848 | filter_item->filter = filter; | 1607 | filter = NULL; |
1849 | 1608 | ||
1850 | fail = false; | 1609 | fail = false; |
1851 | } | 1610 | } |
@@ -1871,9 +1630,10 @@ static int replace_system_preds(struct trace_subsystem_dir *dir, | |||
1871 | list_del(&filter_item->list); | 1630 | list_del(&filter_item->list); |
1872 | kfree(filter_item); | 1631 | kfree(filter_item); |
1873 | } | 1632 | } |
1874 | parse_error(ps, FILT_ERR_BAD_SUBSYS_FILTER, 0); | 1633 | parse_error(pe, FILT_ERR_BAD_SUBSYS_FILTER, 0); |
1875 | return -EINVAL; | 1634 | return -EINVAL; |
1876 | fail_mem: | 1635 | fail_mem: |
1636 | kfree(filter); | ||
1877 | /* If any call succeeded, we still need to sync */ | 1637 | /* If any call succeeded, we still need to sync */ |
1878 | if (!fail) | 1638 | if (!fail) |
1879 | synchronize_sched(); | 1639 | synchronize_sched(); |
@@ -1885,47 +1645,42 @@ static int replace_system_preds(struct trace_subsystem_dir *dir, | |||
1885 | return -ENOMEM; | 1645 | return -ENOMEM; |
1886 | } | 1646 | } |
1887 | 1647 | ||
1888 | static int create_filter_start(char *filter_str, bool set_str, | 1648 | static int create_filter_start(char *filter_string, bool set_str, |
1889 | struct filter_parse_state **psp, | 1649 | struct filter_parse_error **pse, |
1890 | struct event_filter **filterp) | 1650 | struct event_filter **filterp) |
1891 | { | 1651 | { |
1892 | struct event_filter *filter; | 1652 | struct event_filter *filter; |
1893 | struct filter_parse_state *ps = NULL; | 1653 | struct filter_parse_error *pe = NULL; |
1894 | int err = 0; | 1654 | int err = 0; |
1895 | 1655 | ||
1896 | WARN_ON_ONCE(*psp || *filterp); | 1656 | if (WARN_ON_ONCE(*pse || *filterp)) |
1657 | return -EINVAL; | ||
1897 | 1658 | ||
1898 | /* allocate everything, and if any fails, free all and fail */ | 1659 | filter = kzalloc(sizeof(*filter), GFP_KERNEL); |
1899 | filter = __alloc_filter(); | 1660 | if (filter && set_str) { |
1900 | if (filter && set_str) | 1661 | filter->filter_string = kstrdup(filter_string, GFP_KERNEL); |
1901 | err = replace_filter_string(filter, filter_str); | 1662 | if (!filter->filter_string) |
1663 | err = -ENOMEM; | ||
1664 | } | ||
1902 | 1665 | ||
1903 | ps = kzalloc(sizeof(*ps), GFP_KERNEL); | 1666 | pe = kzalloc(sizeof(*pe), GFP_KERNEL); |
1904 | 1667 | ||
1905 | if (!filter || !ps || err) { | 1668 | if (!filter || !pe || err) { |
1906 | kfree(ps); | 1669 | kfree(pe); |
1907 | __free_filter(filter); | 1670 | __free_filter(filter); |
1908 | return -ENOMEM; | 1671 | return -ENOMEM; |
1909 | } | 1672 | } |
1910 | 1673 | ||
1911 | /* we're committed to creating a new filter */ | 1674 | /* we're committed to creating a new filter */ |
1912 | *filterp = filter; | 1675 | *filterp = filter; |
1913 | *psp = ps; | 1676 | *pse = pe; |
1914 | 1677 | ||
1915 | parse_init(ps, filter_ops, filter_str); | 1678 | return 0; |
1916 | err = filter_parse(ps); | ||
1917 | if (err && set_str) | ||
1918 | append_filter_err(ps, filter); | ||
1919 | return err; | ||
1920 | } | 1679 | } |
1921 | 1680 | ||
1922 | static void create_filter_finish(struct filter_parse_state *ps) | 1681 | static void create_filter_finish(struct filter_parse_error *pe) |
1923 | { | 1682 | { |
1924 | if (ps) { | 1683 | kfree(pe); |
1925 | filter_opstack_clear(ps); | ||
1926 | postfix_clear(ps); | ||
1927 | kfree(ps); | ||
1928 | } | ||
1929 | } | 1684 | } |
1930 | 1685 | ||
1931 | /** | 1686 | /** |
@@ -1945,24 +1700,20 @@ static void create_filter_finish(struct filter_parse_state *ps) | |||
1945 | * freeing it. | 1700 | * freeing it. |
1946 | */ | 1701 | */ |
1947 | static int create_filter(struct trace_event_call *call, | 1702 | static int create_filter(struct trace_event_call *call, |
1948 | char *filter_str, bool set_str, | 1703 | char *filter_string, bool set_str, |
1949 | struct event_filter **filterp) | 1704 | struct event_filter **filterp) |
1950 | { | 1705 | { |
1706 | struct filter_parse_error *pe = NULL; | ||
1951 | struct event_filter *filter = NULL; | 1707 | struct event_filter *filter = NULL; |
1952 | struct filter_parse_state *ps = NULL; | ||
1953 | int err; | 1708 | int err; |
1954 | 1709 | ||
1955 | err = create_filter_start(filter_str, set_str, &ps, &filter); | 1710 | err = create_filter_start(filter_string, set_str, &pe, &filter); |
1956 | if (!err) { | 1711 | if (err) |
1957 | err = replace_preds(call, filter, ps, false); | 1712 | return err; |
1958 | if (err && set_str) | 1713 | |
1959 | append_filter_err(ps, filter); | 1714 | err = process_preds(call, filter_string, filter, pe); |
1960 | } | 1715 | if (err && set_str) |
1961 | if (err && !set_str) { | 1716 | append_filter_err(pe, filter); |
1962 | free_event_filter(filter); | ||
1963 | filter = NULL; | ||
1964 | } | ||
1965 | create_filter_finish(ps); | ||
1966 | 1717 | ||
1967 | *filterp = filter; | 1718 | *filterp = filter; |
1968 | return err; | 1719 | return err; |
@@ -1989,21 +1740,21 @@ static int create_system_filter(struct trace_subsystem_dir *dir, | |||
1989 | char *filter_str, struct event_filter **filterp) | 1740 | char *filter_str, struct event_filter **filterp) |
1990 | { | 1741 | { |
1991 | struct event_filter *filter = NULL; | 1742 | struct event_filter *filter = NULL; |
1992 | struct filter_parse_state *ps = NULL; | 1743 | struct filter_parse_error *pe = NULL; |
1993 | int err; | 1744 | int err; |
1994 | 1745 | ||
1995 | err = create_filter_start(filter_str, true, &ps, &filter); | 1746 | err = create_filter_start(filter_str, true, &pe, &filter); |
1996 | if (!err) { | 1747 | if (!err) { |
1997 | err = replace_system_preds(dir, tr, ps, filter_str); | 1748 | err = process_system_preds(dir, tr, pe, filter_str); |
1998 | if (!err) { | 1749 | if (!err) { |
1999 | /* System filters just show a default message */ | 1750 | /* System filters just show a default message */ |
2000 | kfree(filter->filter_string); | 1751 | kfree(filter->filter_string); |
2001 | filter->filter_string = NULL; | 1752 | filter->filter_string = NULL; |
2002 | } else { | 1753 | } else { |
2003 | append_filter_err(ps, filter); | 1754 | append_filter_err(pe, filter); |
2004 | } | 1755 | } |
2005 | } | 1756 | } |
2006 | create_filter_finish(ps); | 1757 | create_filter_finish(pe); |
2007 | 1758 | ||
2008 | *filterp = filter; | 1759 | *filterp = filter; |
2009 | return err; | 1760 | return err; |
@@ -2186,66 +1937,80 @@ static int __ftrace_function_set_filter(int filter, char *buf, int len, | |||
2186 | return ret; | 1937 | return ret; |
2187 | } | 1938 | } |
2188 | 1939 | ||
2189 | static int ftrace_function_check_pred(struct filter_pred *pred, int leaf) | 1940 | static int ftrace_function_check_pred(struct filter_pred *pred) |
2190 | { | 1941 | { |
2191 | struct ftrace_event_field *field = pred->field; | 1942 | struct ftrace_event_field *field = pred->field; |
2192 | 1943 | ||
2193 | if (leaf) { | 1944 | /* |
2194 | /* | 1945 | * Check the predicate for function trace, verify: |
2195 | * Check the leaf predicate for function trace, verify: | 1946 | * - only '==' and '!=' is used |
2196 | * - only '==' and '!=' is used | 1947 | * - the 'ip' field is used |
2197 | * - the 'ip' field is used | 1948 | */ |
2198 | */ | 1949 | if ((pred->op != OP_EQ) && (pred->op != OP_NE)) |
2199 | if ((pred->op != OP_EQ) && (pred->op != OP_NE)) | 1950 | return -EINVAL; |
2200 | return -EINVAL; | ||
2201 | 1951 | ||
2202 | if (strcmp(field->name, "ip")) | 1952 | if (strcmp(field->name, "ip")) |
2203 | return -EINVAL; | 1953 | return -EINVAL; |
2204 | } else { | ||
2205 | /* | ||
2206 | * Check the non leaf predicate for function trace, verify: | ||
2207 | * - only '||' is used | ||
2208 | */ | ||
2209 | if (pred->op != OP_OR) | ||
2210 | return -EINVAL; | ||
2211 | } | ||
2212 | 1954 | ||
2213 | return 0; | 1955 | return 0; |
2214 | } | 1956 | } |
2215 | 1957 | ||
2216 | static int ftrace_function_set_filter_cb(enum move_type move, | 1958 | static int ftrace_function_set_filter_pred(struct filter_pred *pred, |
2217 | struct filter_pred *pred, | 1959 | struct function_filter_data *data) |
2218 | int *err, void *data) | ||
2219 | { | 1960 | { |
1961 | int ret; | ||
1962 | |||
2220 | /* Checking the node is valid for function trace. */ | 1963 | /* Checking the node is valid for function trace. */ |
2221 | if ((move != MOVE_DOWN) || | 1964 | ret = ftrace_function_check_pred(pred); |
2222 | (pred->left != FILTER_PRED_INVALID)) { | 1965 | if (ret) |
2223 | *err = ftrace_function_check_pred(pred, 0); | 1966 | return ret; |
2224 | } else { | ||
2225 | *err = ftrace_function_check_pred(pred, 1); | ||
2226 | if (*err) | ||
2227 | return WALK_PRED_ABORT; | ||
2228 | |||
2229 | *err = __ftrace_function_set_filter(pred->op == OP_EQ, | ||
2230 | pred->regex.pattern, | ||
2231 | pred->regex.len, | ||
2232 | data); | ||
2233 | } | ||
2234 | 1967 | ||
2235 | return (*err) ? WALK_PRED_ABORT : WALK_PRED_DEFAULT; | 1968 | return __ftrace_function_set_filter(pred->op == OP_EQ, |
1969 | pred->regex.pattern, | ||
1970 | pred->regex.len, | ||
1971 | data); | ||
1972 | } | ||
1973 | |||
1974 | static bool is_or(struct prog_entry *prog, int i) | ||
1975 | { | ||
1976 | int target; | ||
1977 | |||
1978 | /* | ||
1979 | * Only "||" is allowed for function events, thus, | ||
1980 | * all true branches should jump to true, and any | ||
1981 | * false branch should jump to false. | ||
1982 | */ | ||
1983 | target = prog[i].target + 1; | ||
1984 | /* True and false have NULL preds (all prog entries should jump to one */ | ||
1985 | if (prog[target].pred) | ||
1986 | return false; | ||
1987 | |||
1988 | /* prog[target].target is 1 for TRUE, 0 for FALSE */ | ||
1989 | return prog[i].when_to_branch == prog[target].target; | ||
2236 | } | 1990 | } |
2237 | 1991 | ||
2238 | static int ftrace_function_set_filter(struct perf_event *event, | 1992 | static int ftrace_function_set_filter(struct perf_event *event, |
2239 | struct event_filter *filter) | 1993 | struct event_filter *filter) |
2240 | { | 1994 | { |
1995 | struct prog_entry *prog = rcu_dereference_protected(filter->prog, | ||
1996 | lockdep_is_held(&event_mutex)); | ||
2241 | struct function_filter_data data = { | 1997 | struct function_filter_data data = { |
2242 | .first_filter = 1, | 1998 | .first_filter = 1, |
2243 | .first_notrace = 1, | 1999 | .first_notrace = 1, |
2244 | .ops = &event->ftrace_ops, | 2000 | .ops = &event->ftrace_ops, |
2245 | }; | 2001 | }; |
2002 | int i; | ||
2246 | 2003 | ||
2247 | return walk_pred_tree(filter->preds, filter->root, | 2004 | for (i = 0; prog[i].pred; i++) { |
2248 | ftrace_function_set_filter_cb, &data); | 2005 | struct filter_pred *pred = prog[i].pred; |
2006 | |||
2007 | if (!is_or(prog, i)) | ||
2008 | return -EINVAL; | ||
2009 | |||
2010 | if (ftrace_function_set_filter_pred(pred, &data) < 0) | ||
2011 | return -EINVAL; | ||
2012 | } | ||
2013 | return 0; | ||
2249 | } | 2014 | } |
2250 | #else | 2015 | #else |
2251 | static int ftrace_function_set_filter(struct perf_event *event, | 2016 | static int ftrace_function_set_filter(struct perf_event *event, |
@@ -2388,26 +2153,28 @@ static int test_pred_visited_fn(struct filter_pred *pred, void *event) | |||
2388 | return 1; | 2153 | return 1; |
2389 | } | 2154 | } |
2390 | 2155 | ||
2391 | static int test_walk_pred_cb(enum move_type move, struct filter_pred *pred, | 2156 | static void update_pred_fn(struct event_filter *filter, char *fields) |
2392 | int *err, void *data) | ||
2393 | { | 2157 | { |
2394 | char *fields = data; | 2158 | struct prog_entry *prog = rcu_dereference_protected(filter->prog, |
2159 | lockdep_is_held(&event_mutex)); | ||
2160 | int i; | ||
2395 | 2161 | ||
2396 | if ((move == MOVE_DOWN) && | 2162 | for (i = 0; prog[i].pred; i++) { |
2397 | (pred->left == FILTER_PRED_INVALID)) { | 2163 | struct filter_pred *pred = prog[i].pred; |
2398 | struct ftrace_event_field *field = pred->field; | 2164 | struct ftrace_event_field *field = pred->field; |
2399 | 2165 | ||
2166 | WARN_ON_ONCE(!pred->fn); | ||
2167 | |||
2400 | if (!field) { | 2168 | if (!field) { |
2401 | WARN(1, "all leafs should have field defined"); | 2169 | WARN_ONCE(1, "all leafs should have field defined %d", i); |
2402 | return WALK_PRED_DEFAULT; | 2170 | continue; |
2403 | } | 2171 | } |
2172 | |||
2404 | if (!strchr(fields, *field->name)) | 2173 | if (!strchr(fields, *field->name)) |
2405 | return WALK_PRED_DEFAULT; | 2174 | continue; |
2406 | 2175 | ||
2407 | WARN_ON(!pred->fn); | ||
2408 | pred->fn = test_pred_visited_fn; | 2176 | pred->fn = test_pred_visited_fn; |
2409 | } | 2177 | } |
2410 | return WALK_PRED_DEFAULT; | ||
2411 | } | 2178 | } |
2412 | 2179 | ||
2413 | static __init int ftrace_test_event_filter(void) | 2180 | static __init int ftrace_test_event_filter(void) |
@@ -2431,20 +2198,22 @@ static __init int ftrace_test_event_filter(void) | |||
2431 | break; | 2198 | break; |
2432 | } | 2199 | } |
2433 | 2200 | ||
2201 | /* Needed to dereference filter->prog */ | ||
2202 | mutex_lock(&event_mutex); | ||
2434 | /* | 2203 | /* |
2435 | * The preemption disabling is not really needed for self | 2204 | * The preemption disabling is not really needed for self |
2436 | * tests, but the rcu dereference will complain without it. | 2205 | * tests, but the rcu dereference will complain without it. |
2437 | */ | 2206 | */ |
2438 | preempt_disable(); | 2207 | preempt_disable(); |
2439 | if (*d->not_visited) | 2208 | if (*d->not_visited) |
2440 | walk_pred_tree(filter->preds, filter->root, | 2209 | update_pred_fn(filter, d->not_visited); |
2441 | test_walk_pred_cb, | ||
2442 | d->not_visited); | ||
2443 | 2210 | ||
2444 | test_pred_visited = 0; | 2211 | test_pred_visited = 0; |
2445 | err = filter_match_preds(filter, &d->rec); | 2212 | err = filter_match_preds(filter, &d->rec); |
2446 | preempt_enable(); | 2213 | preempt_enable(); |
2447 | 2214 | ||
2215 | mutex_unlock(&event_mutex); | ||
2216 | |||
2448 | __free_filter(filter); | 2217 | __free_filter(filter); |
2449 | 2218 | ||
2450 | if (test_pred_visited) { | 2219 | if (test_pred_visited) { |
diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c index 1e1558c99d56..0d7b3ffbecc2 100644 --- a/kernel/trace/trace_events_hist.c +++ b/kernel/trace/trace_events_hist.c | |||
@@ -20,15 +20,39 @@ | |||
20 | #include <linux/slab.h> | 20 | #include <linux/slab.h> |
21 | #include <linux/stacktrace.h> | 21 | #include <linux/stacktrace.h> |
22 | #include <linux/rculist.h> | 22 | #include <linux/rculist.h> |
23 | #include <linux/tracefs.h> | ||
23 | 24 | ||
24 | #include "tracing_map.h" | 25 | #include "tracing_map.h" |
25 | #include "trace.h" | 26 | #include "trace.h" |
26 | 27 | ||
28 | #define SYNTH_SYSTEM "synthetic" | ||
29 | #define SYNTH_FIELDS_MAX 16 | ||
30 | |||
31 | #define STR_VAR_LEN_MAX 32 /* must be multiple of sizeof(u64) */ | ||
32 | |||
27 | struct hist_field; | 33 | struct hist_field; |
28 | 34 | ||
29 | typedef u64 (*hist_field_fn_t) (struct hist_field *field, void *event); | 35 | typedef u64 (*hist_field_fn_t) (struct hist_field *field, |
36 | struct tracing_map_elt *elt, | ||
37 | struct ring_buffer_event *rbe, | ||
38 | void *event); | ||
30 | 39 | ||
31 | #define HIST_FIELD_OPERANDS_MAX 2 | 40 | #define HIST_FIELD_OPERANDS_MAX 2 |
41 | #define HIST_FIELDS_MAX (TRACING_MAP_FIELDS_MAX + TRACING_MAP_VARS_MAX) | ||
42 | #define HIST_ACTIONS_MAX 8 | ||
43 | |||
44 | enum field_op_id { | ||
45 | FIELD_OP_NONE, | ||
46 | FIELD_OP_PLUS, | ||
47 | FIELD_OP_MINUS, | ||
48 | FIELD_OP_UNARY_MINUS, | ||
49 | }; | ||
50 | |||
51 | struct hist_var { | ||
52 | char *name; | ||
53 | struct hist_trigger_data *hist_data; | ||
54 | unsigned int idx; | ||
55 | }; | ||
32 | 56 | ||
33 | struct hist_field { | 57 | struct hist_field { |
34 | struct ftrace_event_field *field; | 58 | struct ftrace_event_field *field; |
@@ -37,27 +61,49 @@ struct hist_field { | |||
37 | unsigned int size; | 61 | unsigned int size; |
38 | unsigned int offset; | 62 | unsigned int offset; |
39 | unsigned int is_signed; | 63 | unsigned int is_signed; |
64 | const char *type; | ||
40 | struct hist_field *operands[HIST_FIELD_OPERANDS_MAX]; | 65 | struct hist_field *operands[HIST_FIELD_OPERANDS_MAX]; |
66 | struct hist_trigger_data *hist_data; | ||
67 | struct hist_var var; | ||
68 | enum field_op_id operator; | ||
69 | char *system; | ||
70 | char *event_name; | ||
71 | char *name; | ||
72 | unsigned int var_idx; | ||
73 | unsigned int var_ref_idx; | ||
74 | bool read_once; | ||
41 | }; | 75 | }; |
42 | 76 | ||
43 | static u64 hist_field_none(struct hist_field *field, void *event) | 77 | static u64 hist_field_none(struct hist_field *field, |
78 | struct tracing_map_elt *elt, | ||
79 | struct ring_buffer_event *rbe, | ||
80 | void *event) | ||
44 | { | 81 | { |
45 | return 0; | 82 | return 0; |
46 | } | 83 | } |
47 | 84 | ||
48 | static u64 hist_field_counter(struct hist_field *field, void *event) | 85 | static u64 hist_field_counter(struct hist_field *field, |
86 | struct tracing_map_elt *elt, | ||
87 | struct ring_buffer_event *rbe, | ||
88 | void *event) | ||
49 | { | 89 | { |
50 | return 1; | 90 | return 1; |
51 | } | 91 | } |
52 | 92 | ||
53 | static u64 hist_field_string(struct hist_field *hist_field, void *event) | 93 | static u64 hist_field_string(struct hist_field *hist_field, |
94 | struct tracing_map_elt *elt, | ||
95 | struct ring_buffer_event *rbe, | ||
96 | void *event) | ||
54 | { | 97 | { |
55 | char *addr = (char *)(event + hist_field->field->offset); | 98 | char *addr = (char *)(event + hist_field->field->offset); |
56 | 99 | ||
57 | return (u64)(unsigned long)addr; | 100 | return (u64)(unsigned long)addr; |
58 | } | 101 | } |
59 | 102 | ||
60 | static u64 hist_field_dynstring(struct hist_field *hist_field, void *event) | 103 | static u64 hist_field_dynstring(struct hist_field *hist_field, |
104 | struct tracing_map_elt *elt, | ||
105 | struct ring_buffer_event *rbe, | ||
106 | void *event) | ||
61 | { | 107 | { |
62 | u32 str_item = *(u32 *)(event + hist_field->field->offset); | 108 | u32 str_item = *(u32 *)(event + hist_field->field->offset); |
63 | int str_loc = str_item & 0xffff; | 109 | int str_loc = str_item & 0xffff; |
@@ -66,24 +112,74 @@ static u64 hist_field_dynstring(struct hist_field *hist_field, void *event) | |||
66 | return (u64)(unsigned long)addr; | 112 | return (u64)(unsigned long)addr; |
67 | } | 113 | } |
68 | 114 | ||
69 | static u64 hist_field_pstring(struct hist_field *hist_field, void *event) | 115 | static u64 hist_field_pstring(struct hist_field *hist_field, |
116 | struct tracing_map_elt *elt, | ||
117 | struct ring_buffer_event *rbe, | ||
118 | void *event) | ||
70 | { | 119 | { |
71 | char **addr = (char **)(event + hist_field->field->offset); | 120 | char **addr = (char **)(event + hist_field->field->offset); |
72 | 121 | ||
73 | return (u64)(unsigned long)*addr; | 122 | return (u64)(unsigned long)*addr; |
74 | } | 123 | } |
75 | 124 | ||
76 | static u64 hist_field_log2(struct hist_field *hist_field, void *event) | 125 | static u64 hist_field_log2(struct hist_field *hist_field, |
126 | struct tracing_map_elt *elt, | ||
127 | struct ring_buffer_event *rbe, | ||
128 | void *event) | ||
77 | { | 129 | { |
78 | struct hist_field *operand = hist_field->operands[0]; | 130 | struct hist_field *operand = hist_field->operands[0]; |
79 | 131 | ||
80 | u64 val = operand->fn(operand, event); | 132 | u64 val = operand->fn(operand, elt, rbe, event); |
81 | 133 | ||
82 | return (u64) ilog2(roundup_pow_of_two(val)); | 134 | return (u64) ilog2(roundup_pow_of_two(val)); |
83 | } | 135 | } |
84 | 136 | ||
137 | static u64 hist_field_plus(struct hist_field *hist_field, | ||
138 | struct tracing_map_elt *elt, | ||
139 | struct ring_buffer_event *rbe, | ||
140 | void *event) | ||
141 | { | ||
142 | struct hist_field *operand1 = hist_field->operands[0]; | ||
143 | struct hist_field *operand2 = hist_field->operands[1]; | ||
144 | |||
145 | u64 val1 = operand1->fn(operand1, elt, rbe, event); | ||
146 | u64 val2 = operand2->fn(operand2, elt, rbe, event); | ||
147 | |||
148 | return val1 + val2; | ||
149 | } | ||
150 | |||
151 | static u64 hist_field_minus(struct hist_field *hist_field, | ||
152 | struct tracing_map_elt *elt, | ||
153 | struct ring_buffer_event *rbe, | ||
154 | void *event) | ||
155 | { | ||
156 | struct hist_field *operand1 = hist_field->operands[0]; | ||
157 | struct hist_field *operand2 = hist_field->operands[1]; | ||
158 | |||
159 | u64 val1 = operand1->fn(operand1, elt, rbe, event); | ||
160 | u64 val2 = operand2->fn(operand2, elt, rbe, event); | ||
161 | |||
162 | return val1 - val2; | ||
163 | } | ||
164 | |||
165 | static u64 hist_field_unary_minus(struct hist_field *hist_field, | ||
166 | struct tracing_map_elt *elt, | ||
167 | struct ring_buffer_event *rbe, | ||
168 | void *event) | ||
169 | { | ||
170 | struct hist_field *operand = hist_field->operands[0]; | ||
171 | |||
172 | s64 sval = (s64)operand->fn(operand, elt, rbe, event); | ||
173 | u64 val = (u64)-sval; | ||
174 | |||
175 | return val; | ||
176 | } | ||
177 | |||
85 | #define DEFINE_HIST_FIELD_FN(type) \ | 178 | #define DEFINE_HIST_FIELD_FN(type) \ |
86 | static u64 hist_field_##type(struct hist_field *hist_field, void *event)\ | 179 | static u64 hist_field_##type(struct hist_field *hist_field, \ |
180 | struct tracing_map_elt *elt, \ | ||
181 | struct ring_buffer_event *rbe, \ | ||
182 | void *event) \ | ||
87 | { \ | 183 | { \ |
88 | type *addr = (type *)(event + hist_field->field->offset); \ | 184 | type *addr = (type *)(event + hist_field->field->offset); \ |
89 | \ | 185 | \ |
@@ -126,6 +222,19 @@ enum hist_field_flags { | |||
126 | HIST_FIELD_FL_SYSCALL = 1 << 7, | 222 | HIST_FIELD_FL_SYSCALL = 1 << 7, |
127 | HIST_FIELD_FL_STACKTRACE = 1 << 8, | 223 | HIST_FIELD_FL_STACKTRACE = 1 << 8, |
128 | HIST_FIELD_FL_LOG2 = 1 << 9, | 224 | HIST_FIELD_FL_LOG2 = 1 << 9, |
225 | HIST_FIELD_FL_TIMESTAMP = 1 << 10, | ||
226 | HIST_FIELD_FL_TIMESTAMP_USECS = 1 << 11, | ||
227 | HIST_FIELD_FL_VAR = 1 << 12, | ||
228 | HIST_FIELD_FL_EXPR = 1 << 13, | ||
229 | HIST_FIELD_FL_VAR_REF = 1 << 14, | ||
230 | HIST_FIELD_FL_CPU = 1 << 15, | ||
231 | HIST_FIELD_FL_ALIAS = 1 << 16, | ||
232 | }; | ||
233 | |||
234 | struct var_defs { | ||
235 | unsigned int n_vars; | ||
236 | char *name[TRACING_MAP_VARS_MAX]; | ||
237 | char *expr[TRACING_MAP_VARS_MAX]; | ||
129 | }; | 238 | }; |
130 | 239 | ||
131 | struct hist_trigger_attrs { | 240 | struct hist_trigger_attrs { |
@@ -133,25 +242,1437 @@ struct hist_trigger_attrs { | |||
133 | char *vals_str; | 242 | char *vals_str; |
134 | char *sort_key_str; | 243 | char *sort_key_str; |
135 | char *name; | 244 | char *name; |
245 | char *clock; | ||
136 | bool pause; | 246 | bool pause; |
137 | bool cont; | 247 | bool cont; |
138 | bool clear; | 248 | bool clear; |
249 | bool ts_in_usecs; | ||
139 | unsigned int map_bits; | 250 | unsigned int map_bits; |
251 | |||
252 | char *assignment_str[TRACING_MAP_VARS_MAX]; | ||
253 | unsigned int n_assignments; | ||
254 | |||
255 | char *action_str[HIST_ACTIONS_MAX]; | ||
256 | unsigned int n_actions; | ||
257 | |||
258 | struct var_defs var_defs; | ||
259 | }; | ||
260 | |||
261 | struct field_var { | ||
262 | struct hist_field *var; | ||
263 | struct hist_field *val; | ||
264 | }; | ||
265 | |||
266 | struct field_var_hist { | ||
267 | struct hist_trigger_data *hist_data; | ||
268 | char *cmd; | ||
140 | }; | 269 | }; |
141 | 270 | ||
142 | struct hist_trigger_data { | 271 | struct hist_trigger_data { |
143 | struct hist_field *fields[TRACING_MAP_FIELDS_MAX]; | 272 | struct hist_field *fields[HIST_FIELDS_MAX]; |
144 | unsigned int n_vals; | 273 | unsigned int n_vals; |
145 | unsigned int n_keys; | 274 | unsigned int n_keys; |
146 | unsigned int n_fields; | 275 | unsigned int n_fields; |
276 | unsigned int n_vars; | ||
147 | unsigned int key_size; | 277 | unsigned int key_size; |
148 | struct tracing_map_sort_key sort_keys[TRACING_MAP_SORT_KEYS_MAX]; | 278 | struct tracing_map_sort_key sort_keys[TRACING_MAP_SORT_KEYS_MAX]; |
149 | unsigned int n_sort_keys; | 279 | unsigned int n_sort_keys; |
150 | struct trace_event_file *event_file; | 280 | struct trace_event_file *event_file; |
151 | struct hist_trigger_attrs *attrs; | 281 | struct hist_trigger_attrs *attrs; |
152 | struct tracing_map *map; | 282 | struct tracing_map *map; |
283 | bool enable_timestamps; | ||
284 | bool remove; | ||
285 | struct hist_field *var_refs[TRACING_MAP_VARS_MAX]; | ||
286 | unsigned int n_var_refs; | ||
287 | |||
288 | struct action_data *actions[HIST_ACTIONS_MAX]; | ||
289 | unsigned int n_actions; | ||
290 | |||
291 | struct hist_field *synth_var_refs[SYNTH_FIELDS_MAX]; | ||
292 | unsigned int n_synth_var_refs; | ||
293 | struct field_var *field_vars[SYNTH_FIELDS_MAX]; | ||
294 | unsigned int n_field_vars; | ||
295 | unsigned int n_field_var_str; | ||
296 | struct field_var_hist *field_var_hists[SYNTH_FIELDS_MAX]; | ||
297 | unsigned int n_field_var_hists; | ||
298 | |||
299 | struct field_var *max_vars[SYNTH_FIELDS_MAX]; | ||
300 | unsigned int n_max_vars; | ||
301 | unsigned int n_max_var_str; | ||
302 | }; | ||
303 | |||
304 | struct synth_field { | ||
305 | char *type; | ||
306 | char *name; | ||
307 | size_t size; | ||
308 | bool is_signed; | ||
309 | bool is_string; | ||
310 | }; | ||
311 | |||
312 | struct synth_event { | ||
313 | struct list_head list; | ||
314 | int ref; | ||
315 | char *name; | ||
316 | struct synth_field **fields; | ||
317 | unsigned int n_fields; | ||
318 | unsigned int n_u64; | ||
319 | struct trace_event_class class; | ||
320 | struct trace_event_call call; | ||
321 | struct tracepoint *tp; | ||
322 | }; | ||
323 | |||
324 | struct action_data; | ||
325 | |||
326 | typedef void (*action_fn_t) (struct hist_trigger_data *hist_data, | ||
327 | struct tracing_map_elt *elt, void *rec, | ||
328 | struct ring_buffer_event *rbe, | ||
329 | struct action_data *data, u64 *var_ref_vals); | ||
330 | |||
331 | struct action_data { | ||
332 | action_fn_t fn; | ||
333 | unsigned int n_params; | ||
334 | char *params[SYNTH_FIELDS_MAX]; | ||
335 | |||
336 | union { | ||
337 | struct { | ||
338 | unsigned int var_ref_idx; | ||
339 | char *match_event; | ||
340 | char *match_event_system; | ||
341 | char *synth_event_name; | ||
342 | struct synth_event *synth_event; | ||
343 | } onmatch; | ||
344 | |||
345 | struct { | ||
346 | char *var_str; | ||
347 | char *fn_name; | ||
348 | unsigned int max_var_ref_idx; | ||
349 | struct hist_field *max_var; | ||
350 | struct hist_field *var; | ||
351 | } onmax; | ||
352 | }; | ||
353 | }; | ||
354 | |||
355 | |||
356 | static char last_hist_cmd[MAX_FILTER_STR_VAL]; | ||
357 | static char hist_err_str[MAX_FILTER_STR_VAL]; | ||
358 | |||
359 | static void last_cmd_set(char *str) | ||
360 | { | ||
361 | if (!str) | ||
362 | return; | ||
363 | |||
364 | strncpy(last_hist_cmd, str, MAX_FILTER_STR_VAL - 1); | ||
365 | } | ||
366 | |||
367 | static void hist_err(char *str, char *var) | ||
368 | { | ||
369 | int maxlen = MAX_FILTER_STR_VAL - 1; | ||
370 | |||
371 | if (!str) | ||
372 | return; | ||
373 | |||
374 | if (strlen(hist_err_str)) | ||
375 | return; | ||
376 | |||
377 | if (!var) | ||
378 | var = ""; | ||
379 | |||
380 | if (strlen(hist_err_str) + strlen(str) + strlen(var) > maxlen) | ||
381 | return; | ||
382 | |||
383 | strcat(hist_err_str, str); | ||
384 | strcat(hist_err_str, var); | ||
385 | } | ||
386 | |||
387 | static void hist_err_event(char *str, char *system, char *event, char *var) | ||
388 | { | ||
389 | char err[MAX_FILTER_STR_VAL]; | ||
390 | |||
391 | if (system && var) | ||
392 | snprintf(err, MAX_FILTER_STR_VAL, "%s.%s.%s", system, event, var); | ||
393 | else if (system) | ||
394 | snprintf(err, MAX_FILTER_STR_VAL, "%s.%s", system, event); | ||
395 | else | ||
396 | strncpy(err, var, MAX_FILTER_STR_VAL); | ||
397 | |||
398 | hist_err(str, err); | ||
399 | } | ||
400 | |||
401 | static void hist_err_clear(void) | ||
402 | { | ||
403 | hist_err_str[0] = '\0'; | ||
404 | } | ||
405 | |||
406 | static bool have_hist_err(void) | ||
407 | { | ||
408 | if (strlen(hist_err_str)) | ||
409 | return true; | ||
410 | |||
411 | return false; | ||
412 | } | ||
413 | |||
414 | static LIST_HEAD(synth_event_list); | ||
415 | static DEFINE_MUTEX(synth_event_mutex); | ||
416 | |||
417 | struct synth_trace_event { | ||
418 | struct trace_entry ent; | ||
419 | u64 fields[]; | ||
420 | }; | ||
421 | |||
422 | static int synth_event_define_fields(struct trace_event_call *call) | ||
423 | { | ||
424 | struct synth_trace_event trace; | ||
425 | int offset = offsetof(typeof(trace), fields); | ||
426 | struct synth_event *event = call->data; | ||
427 | unsigned int i, size, n_u64; | ||
428 | char *name, *type; | ||
429 | bool is_signed; | ||
430 | int ret = 0; | ||
431 | |||
432 | for (i = 0, n_u64 = 0; i < event->n_fields; i++) { | ||
433 | size = event->fields[i]->size; | ||
434 | is_signed = event->fields[i]->is_signed; | ||
435 | type = event->fields[i]->type; | ||
436 | name = event->fields[i]->name; | ||
437 | ret = trace_define_field(call, type, name, offset, size, | ||
438 | is_signed, FILTER_OTHER); | ||
439 | if (ret) | ||
440 | break; | ||
441 | |||
442 | if (event->fields[i]->is_string) { | ||
443 | offset += STR_VAR_LEN_MAX; | ||
444 | n_u64 += STR_VAR_LEN_MAX / sizeof(u64); | ||
445 | } else { | ||
446 | offset += sizeof(u64); | ||
447 | n_u64++; | ||
448 | } | ||
449 | } | ||
450 | |||
451 | event->n_u64 = n_u64; | ||
452 | |||
453 | return ret; | ||
454 | } | ||
455 | |||
456 | static bool synth_field_signed(char *type) | ||
457 | { | ||
458 | if (strncmp(type, "u", 1) == 0) | ||
459 | return false; | ||
460 | |||
461 | return true; | ||
462 | } | ||
463 | |||
464 | static int synth_field_is_string(char *type) | ||
465 | { | ||
466 | if (strstr(type, "char[") != NULL) | ||
467 | return true; | ||
468 | |||
469 | return false; | ||
470 | } | ||
471 | |||
472 | static int synth_field_string_size(char *type) | ||
473 | { | ||
474 | char buf[4], *end, *start; | ||
475 | unsigned int len; | ||
476 | int size, err; | ||
477 | |||
478 | start = strstr(type, "char["); | ||
479 | if (start == NULL) | ||
480 | return -EINVAL; | ||
481 | start += strlen("char["); | ||
482 | |||
483 | end = strchr(type, ']'); | ||
484 | if (!end || end < start) | ||
485 | return -EINVAL; | ||
486 | |||
487 | len = end - start; | ||
488 | if (len > 3) | ||
489 | return -EINVAL; | ||
490 | |||
491 | strncpy(buf, start, len); | ||
492 | buf[len] = '\0'; | ||
493 | |||
494 | err = kstrtouint(buf, 0, &size); | ||
495 | if (err) | ||
496 | return err; | ||
497 | |||
498 | if (size > STR_VAR_LEN_MAX) | ||
499 | return -EINVAL; | ||
500 | |||
501 | return size; | ||
502 | } | ||
503 | |||
504 | static int synth_field_size(char *type) | ||
505 | { | ||
506 | int size = 0; | ||
507 | |||
508 | if (strcmp(type, "s64") == 0) | ||
509 | size = sizeof(s64); | ||
510 | else if (strcmp(type, "u64") == 0) | ||
511 | size = sizeof(u64); | ||
512 | else if (strcmp(type, "s32") == 0) | ||
513 | size = sizeof(s32); | ||
514 | else if (strcmp(type, "u32") == 0) | ||
515 | size = sizeof(u32); | ||
516 | else if (strcmp(type, "s16") == 0) | ||
517 | size = sizeof(s16); | ||
518 | else if (strcmp(type, "u16") == 0) | ||
519 | size = sizeof(u16); | ||
520 | else if (strcmp(type, "s8") == 0) | ||
521 | size = sizeof(s8); | ||
522 | else if (strcmp(type, "u8") == 0) | ||
523 | size = sizeof(u8); | ||
524 | else if (strcmp(type, "char") == 0) | ||
525 | size = sizeof(char); | ||
526 | else if (strcmp(type, "unsigned char") == 0) | ||
527 | size = sizeof(unsigned char); | ||
528 | else if (strcmp(type, "int") == 0) | ||
529 | size = sizeof(int); | ||
530 | else if (strcmp(type, "unsigned int") == 0) | ||
531 | size = sizeof(unsigned int); | ||
532 | else if (strcmp(type, "long") == 0) | ||
533 | size = sizeof(long); | ||
534 | else if (strcmp(type, "unsigned long") == 0) | ||
535 | size = sizeof(unsigned long); | ||
536 | else if (strcmp(type, "pid_t") == 0) | ||
537 | size = sizeof(pid_t); | ||
538 | else if (synth_field_is_string(type)) | ||
539 | size = synth_field_string_size(type); | ||
540 | |||
541 | return size; | ||
542 | } | ||
543 | |||
544 | static const char *synth_field_fmt(char *type) | ||
545 | { | ||
546 | const char *fmt = "%llu"; | ||
547 | |||
548 | if (strcmp(type, "s64") == 0) | ||
549 | fmt = "%lld"; | ||
550 | else if (strcmp(type, "u64") == 0) | ||
551 | fmt = "%llu"; | ||
552 | else if (strcmp(type, "s32") == 0) | ||
553 | fmt = "%d"; | ||
554 | else if (strcmp(type, "u32") == 0) | ||
555 | fmt = "%u"; | ||
556 | else if (strcmp(type, "s16") == 0) | ||
557 | fmt = "%d"; | ||
558 | else if (strcmp(type, "u16") == 0) | ||
559 | fmt = "%u"; | ||
560 | else if (strcmp(type, "s8") == 0) | ||
561 | fmt = "%d"; | ||
562 | else if (strcmp(type, "u8") == 0) | ||
563 | fmt = "%u"; | ||
564 | else if (strcmp(type, "char") == 0) | ||
565 | fmt = "%d"; | ||
566 | else if (strcmp(type, "unsigned char") == 0) | ||
567 | fmt = "%u"; | ||
568 | else if (strcmp(type, "int") == 0) | ||
569 | fmt = "%d"; | ||
570 | else if (strcmp(type, "unsigned int") == 0) | ||
571 | fmt = "%u"; | ||
572 | else if (strcmp(type, "long") == 0) | ||
573 | fmt = "%ld"; | ||
574 | else if (strcmp(type, "unsigned long") == 0) | ||
575 | fmt = "%lu"; | ||
576 | else if (strcmp(type, "pid_t") == 0) | ||
577 | fmt = "%d"; | ||
578 | else if (synth_field_is_string(type)) | ||
579 | fmt = "%s"; | ||
580 | |||
581 | return fmt; | ||
582 | } | ||
583 | |||
584 | static enum print_line_t print_synth_event(struct trace_iterator *iter, | ||
585 | int flags, | ||
586 | struct trace_event *event) | ||
587 | { | ||
588 | struct trace_array *tr = iter->tr; | ||
589 | struct trace_seq *s = &iter->seq; | ||
590 | struct synth_trace_event *entry; | ||
591 | struct synth_event *se; | ||
592 | unsigned int i, n_u64; | ||
593 | char print_fmt[32]; | ||
594 | const char *fmt; | ||
595 | |||
596 | entry = (struct synth_trace_event *)iter->ent; | ||
597 | se = container_of(event, struct synth_event, call.event); | ||
598 | |||
599 | trace_seq_printf(s, "%s: ", se->name); | ||
600 | |||
601 | for (i = 0, n_u64 = 0; i < se->n_fields; i++) { | ||
602 | if (trace_seq_has_overflowed(s)) | ||
603 | goto end; | ||
604 | |||
605 | fmt = synth_field_fmt(se->fields[i]->type); | ||
606 | |||
607 | /* parameter types */ | ||
608 | if (tr->trace_flags & TRACE_ITER_VERBOSE) | ||
609 | trace_seq_printf(s, "%s ", fmt); | ||
610 | |||
611 | snprintf(print_fmt, sizeof(print_fmt), "%%s=%s%%s", fmt); | ||
612 | |||
613 | /* parameter values */ | ||
614 | if (se->fields[i]->is_string) { | ||
615 | trace_seq_printf(s, print_fmt, se->fields[i]->name, | ||
616 | (char *)&entry->fields[n_u64], | ||
617 | i == se->n_fields - 1 ? "" : " "); | ||
618 | n_u64 += STR_VAR_LEN_MAX / sizeof(u64); | ||
619 | } else { | ||
620 | trace_seq_printf(s, print_fmt, se->fields[i]->name, | ||
621 | entry->fields[n_u64], | ||
622 | i == se->n_fields - 1 ? "" : " "); | ||
623 | n_u64++; | ||
624 | } | ||
625 | } | ||
626 | end: | ||
627 | trace_seq_putc(s, '\n'); | ||
628 | |||
629 | return trace_handle_return(s); | ||
630 | } | ||
631 | |||
632 | static struct trace_event_functions synth_event_funcs = { | ||
633 | .trace = print_synth_event | ||
634 | }; | ||
635 | |||
636 | static notrace void trace_event_raw_event_synth(void *__data, | ||
637 | u64 *var_ref_vals, | ||
638 | unsigned int var_ref_idx) | ||
639 | { | ||
640 | struct trace_event_file *trace_file = __data; | ||
641 | struct synth_trace_event *entry; | ||
642 | struct trace_event_buffer fbuffer; | ||
643 | struct ring_buffer *buffer; | ||
644 | struct synth_event *event; | ||
645 | unsigned int i, n_u64; | ||
646 | int fields_size = 0; | ||
647 | |||
648 | event = trace_file->event_call->data; | ||
649 | |||
650 | if (trace_trigger_soft_disabled(trace_file)) | ||
651 | return; | ||
652 | |||
653 | fields_size = event->n_u64 * sizeof(u64); | ||
654 | |||
655 | /* | ||
656 | * Avoid ring buffer recursion detection, as this event | ||
657 | * is being performed within another event. | ||
658 | */ | ||
659 | buffer = trace_file->tr->trace_buffer.buffer; | ||
660 | ring_buffer_nest_start(buffer); | ||
661 | |||
662 | entry = trace_event_buffer_reserve(&fbuffer, trace_file, | ||
663 | sizeof(*entry) + fields_size); | ||
664 | if (!entry) | ||
665 | goto out; | ||
666 | |||
667 | for (i = 0, n_u64 = 0; i < event->n_fields; i++) { | ||
668 | if (event->fields[i]->is_string) { | ||
669 | char *str_val = (char *)(long)var_ref_vals[var_ref_idx + i]; | ||
670 | char *str_field = (char *)&entry->fields[n_u64]; | ||
671 | |||
672 | strscpy(str_field, str_val, STR_VAR_LEN_MAX); | ||
673 | n_u64 += STR_VAR_LEN_MAX / sizeof(u64); | ||
674 | } else { | ||
675 | entry->fields[n_u64] = var_ref_vals[var_ref_idx + i]; | ||
676 | n_u64++; | ||
677 | } | ||
678 | } | ||
679 | |||
680 | trace_event_buffer_commit(&fbuffer); | ||
681 | out: | ||
682 | ring_buffer_nest_end(buffer); | ||
683 | } | ||
684 | |||
685 | static void free_synth_event_print_fmt(struct trace_event_call *call) | ||
686 | { | ||
687 | if (call) { | ||
688 | kfree(call->print_fmt); | ||
689 | call->print_fmt = NULL; | ||
690 | } | ||
691 | } | ||
692 | |||
693 | static int __set_synth_event_print_fmt(struct synth_event *event, | ||
694 | char *buf, int len) | ||
695 | { | ||
696 | const char *fmt; | ||
697 | int pos = 0; | ||
698 | int i; | ||
699 | |||
700 | /* When len=0, we just calculate the needed length */ | ||
701 | #define LEN_OR_ZERO (len ? len - pos : 0) | ||
702 | |||
703 | pos += snprintf(buf + pos, LEN_OR_ZERO, "\""); | ||
704 | for (i = 0; i < event->n_fields; i++) { | ||
705 | fmt = synth_field_fmt(event->fields[i]->type); | ||
706 | pos += snprintf(buf + pos, LEN_OR_ZERO, "%s=%s%s", | ||
707 | event->fields[i]->name, fmt, | ||
708 | i == event->n_fields - 1 ? "" : ", "); | ||
709 | } | ||
710 | pos += snprintf(buf + pos, LEN_OR_ZERO, "\""); | ||
711 | |||
712 | for (i = 0; i < event->n_fields; i++) { | ||
713 | pos += snprintf(buf + pos, LEN_OR_ZERO, | ||
714 | ", REC->%s", event->fields[i]->name); | ||
715 | } | ||
716 | |||
717 | #undef LEN_OR_ZERO | ||
718 | |||
719 | /* return the length of print_fmt */ | ||
720 | return pos; | ||
721 | } | ||
722 | |||
723 | static int set_synth_event_print_fmt(struct trace_event_call *call) | ||
724 | { | ||
725 | struct synth_event *event = call->data; | ||
726 | char *print_fmt; | ||
727 | int len; | ||
728 | |||
729 | /* First: called with 0 length to calculate the needed length */ | ||
730 | len = __set_synth_event_print_fmt(event, NULL, 0); | ||
731 | |||
732 | print_fmt = kmalloc(len + 1, GFP_KERNEL); | ||
733 | if (!print_fmt) | ||
734 | return -ENOMEM; | ||
735 | |||
736 | /* Second: actually write the @print_fmt */ | ||
737 | __set_synth_event_print_fmt(event, print_fmt, len + 1); | ||
738 | call->print_fmt = print_fmt; | ||
739 | |||
740 | return 0; | ||
741 | } | ||
742 | |||
743 | static void free_synth_field(struct synth_field *field) | ||
744 | { | ||
745 | kfree(field->type); | ||
746 | kfree(field->name); | ||
747 | kfree(field); | ||
748 | } | ||
749 | |||
750 | static struct synth_field *parse_synth_field(char *field_type, | ||
751 | char *field_name) | ||
752 | { | ||
753 | struct synth_field *field; | ||
754 | int len, ret = 0; | ||
755 | char *array; | ||
756 | |||
757 | if (field_type[0] == ';') | ||
758 | field_type++; | ||
759 | |||
760 | len = strlen(field_name); | ||
761 | if (field_name[len - 1] == ';') | ||
762 | field_name[len - 1] = '\0'; | ||
763 | |||
764 | field = kzalloc(sizeof(*field), GFP_KERNEL); | ||
765 | if (!field) | ||
766 | return ERR_PTR(-ENOMEM); | ||
767 | |||
768 | len = strlen(field_type) + 1; | ||
769 | array = strchr(field_name, '['); | ||
770 | if (array) | ||
771 | len += strlen(array); | ||
772 | field->type = kzalloc(len, GFP_KERNEL); | ||
773 | if (!field->type) { | ||
774 | ret = -ENOMEM; | ||
775 | goto free; | ||
776 | } | ||
777 | strcat(field->type, field_type); | ||
778 | if (array) { | ||
779 | strcat(field->type, array); | ||
780 | *array = '\0'; | ||
781 | } | ||
782 | |||
783 | field->size = synth_field_size(field->type); | ||
784 | if (!field->size) { | ||
785 | ret = -EINVAL; | ||
786 | goto free; | ||
787 | } | ||
788 | |||
789 | if (synth_field_is_string(field->type)) | ||
790 | field->is_string = true; | ||
791 | |||
792 | field->is_signed = synth_field_signed(field->type); | ||
793 | |||
794 | field->name = kstrdup(field_name, GFP_KERNEL); | ||
795 | if (!field->name) { | ||
796 | ret = -ENOMEM; | ||
797 | goto free; | ||
798 | } | ||
799 | out: | ||
800 | return field; | ||
801 | free: | ||
802 | free_synth_field(field); | ||
803 | field = ERR_PTR(ret); | ||
804 | goto out; | ||
805 | } | ||
806 | |||
807 | static void free_synth_tracepoint(struct tracepoint *tp) | ||
808 | { | ||
809 | if (!tp) | ||
810 | return; | ||
811 | |||
812 | kfree(tp->name); | ||
813 | kfree(tp); | ||
814 | } | ||
815 | |||
816 | static struct tracepoint *alloc_synth_tracepoint(char *name) | ||
817 | { | ||
818 | struct tracepoint *tp; | ||
819 | |||
820 | tp = kzalloc(sizeof(*tp), GFP_KERNEL); | ||
821 | if (!tp) | ||
822 | return ERR_PTR(-ENOMEM); | ||
823 | |||
824 | tp->name = kstrdup(name, GFP_KERNEL); | ||
825 | if (!tp->name) { | ||
826 | kfree(tp); | ||
827 | return ERR_PTR(-ENOMEM); | ||
828 | } | ||
829 | |||
830 | return tp; | ||
831 | } | ||
832 | |||
833 | typedef void (*synth_probe_func_t) (void *__data, u64 *var_ref_vals, | ||
834 | unsigned int var_ref_idx); | ||
835 | |||
836 | static inline void trace_synth(struct synth_event *event, u64 *var_ref_vals, | ||
837 | unsigned int var_ref_idx) | ||
838 | { | ||
839 | struct tracepoint *tp = event->tp; | ||
840 | |||
841 | if (unlikely(atomic_read(&tp->key.enabled) > 0)) { | ||
842 | struct tracepoint_func *probe_func_ptr; | ||
843 | synth_probe_func_t probe_func; | ||
844 | void *__data; | ||
845 | |||
846 | if (!(cpu_online(raw_smp_processor_id()))) | ||
847 | return; | ||
848 | |||
849 | probe_func_ptr = rcu_dereference_sched((tp)->funcs); | ||
850 | if (probe_func_ptr) { | ||
851 | do { | ||
852 | probe_func = probe_func_ptr->func; | ||
853 | __data = probe_func_ptr->data; | ||
854 | probe_func(__data, var_ref_vals, var_ref_idx); | ||
855 | } while ((++probe_func_ptr)->func); | ||
856 | } | ||
857 | } | ||
858 | } | ||
859 | |||
860 | static struct synth_event *find_synth_event(const char *name) | ||
861 | { | ||
862 | struct synth_event *event; | ||
863 | |||
864 | list_for_each_entry(event, &synth_event_list, list) { | ||
865 | if (strcmp(event->name, name) == 0) | ||
866 | return event; | ||
867 | } | ||
868 | |||
869 | return NULL; | ||
870 | } | ||
871 | |||
872 | static int register_synth_event(struct synth_event *event) | ||
873 | { | ||
874 | struct trace_event_call *call = &event->call; | ||
875 | int ret = 0; | ||
876 | |||
877 | event->call.class = &event->class; | ||
878 | event->class.system = kstrdup(SYNTH_SYSTEM, GFP_KERNEL); | ||
879 | if (!event->class.system) { | ||
880 | ret = -ENOMEM; | ||
881 | goto out; | ||
882 | } | ||
883 | |||
884 | event->tp = alloc_synth_tracepoint(event->name); | ||
885 | if (IS_ERR(event->tp)) { | ||
886 | ret = PTR_ERR(event->tp); | ||
887 | event->tp = NULL; | ||
888 | goto out; | ||
889 | } | ||
890 | |||
891 | INIT_LIST_HEAD(&call->class->fields); | ||
892 | call->event.funcs = &synth_event_funcs; | ||
893 | call->class->define_fields = synth_event_define_fields; | ||
894 | |||
895 | ret = register_trace_event(&call->event); | ||
896 | if (!ret) { | ||
897 | ret = -ENODEV; | ||
898 | goto out; | ||
899 | } | ||
900 | call->flags = TRACE_EVENT_FL_TRACEPOINT; | ||
901 | call->class->reg = trace_event_reg; | ||
902 | call->class->probe = trace_event_raw_event_synth; | ||
903 | call->data = event; | ||
904 | call->tp = event->tp; | ||
905 | |||
906 | ret = trace_add_event_call(call); | ||
907 | if (ret) { | ||
908 | pr_warn("Failed to register synthetic event: %s\n", | ||
909 | trace_event_name(call)); | ||
910 | goto err; | ||
911 | } | ||
912 | |||
913 | ret = set_synth_event_print_fmt(call); | ||
914 | if (ret < 0) { | ||
915 | trace_remove_event_call(call); | ||
916 | goto err; | ||
917 | } | ||
918 | out: | ||
919 | return ret; | ||
920 | err: | ||
921 | unregister_trace_event(&call->event); | ||
922 | goto out; | ||
923 | } | ||
924 | |||
925 | static int unregister_synth_event(struct synth_event *event) | ||
926 | { | ||
927 | struct trace_event_call *call = &event->call; | ||
928 | int ret; | ||
929 | |||
930 | ret = trace_remove_event_call(call); | ||
931 | |||
932 | return ret; | ||
933 | } | ||
934 | |||
935 | static void free_synth_event(struct synth_event *event) | ||
936 | { | ||
937 | unsigned int i; | ||
938 | |||
939 | if (!event) | ||
940 | return; | ||
941 | |||
942 | for (i = 0; i < event->n_fields; i++) | ||
943 | free_synth_field(event->fields[i]); | ||
944 | |||
945 | kfree(event->fields); | ||
946 | kfree(event->name); | ||
947 | kfree(event->class.system); | ||
948 | free_synth_tracepoint(event->tp); | ||
949 | free_synth_event_print_fmt(&event->call); | ||
950 | kfree(event); | ||
951 | } | ||
952 | |||
953 | static struct synth_event *alloc_synth_event(char *event_name, int n_fields, | ||
954 | struct synth_field **fields) | ||
955 | { | ||
956 | struct synth_event *event; | ||
957 | unsigned int i; | ||
958 | |||
959 | event = kzalloc(sizeof(*event), GFP_KERNEL); | ||
960 | if (!event) { | ||
961 | event = ERR_PTR(-ENOMEM); | ||
962 | goto out; | ||
963 | } | ||
964 | |||
965 | event->name = kstrdup(event_name, GFP_KERNEL); | ||
966 | if (!event->name) { | ||
967 | kfree(event); | ||
968 | event = ERR_PTR(-ENOMEM); | ||
969 | goto out; | ||
970 | } | ||
971 | |||
972 | event->fields = kcalloc(n_fields, sizeof(*event->fields), GFP_KERNEL); | ||
973 | if (!event->fields) { | ||
974 | free_synth_event(event); | ||
975 | event = ERR_PTR(-ENOMEM); | ||
976 | goto out; | ||
977 | } | ||
978 | |||
979 | for (i = 0; i < n_fields; i++) | ||
980 | event->fields[i] = fields[i]; | ||
981 | |||
982 | event->n_fields = n_fields; | ||
983 | out: | ||
984 | return event; | ||
985 | } | ||
986 | |||
987 | static void action_trace(struct hist_trigger_data *hist_data, | ||
988 | struct tracing_map_elt *elt, void *rec, | ||
989 | struct ring_buffer_event *rbe, | ||
990 | struct action_data *data, u64 *var_ref_vals) | ||
991 | { | ||
992 | struct synth_event *event = data->onmatch.synth_event; | ||
993 | |||
994 | trace_synth(event, var_ref_vals, data->onmatch.var_ref_idx); | ||
995 | } | ||
996 | |||
997 | struct hist_var_data { | ||
998 | struct list_head list; | ||
999 | struct hist_trigger_data *hist_data; | ||
1000 | }; | ||
1001 | |||
1002 | static void add_or_delete_synth_event(struct synth_event *event, int delete) | ||
1003 | { | ||
1004 | if (delete) | ||
1005 | free_synth_event(event); | ||
1006 | else { | ||
1007 | mutex_lock(&synth_event_mutex); | ||
1008 | if (!find_synth_event(event->name)) | ||
1009 | list_add(&event->list, &synth_event_list); | ||
1010 | else | ||
1011 | free_synth_event(event); | ||
1012 | mutex_unlock(&synth_event_mutex); | ||
1013 | } | ||
1014 | } | ||
1015 | |||
1016 | static int create_synth_event(int argc, char **argv) | ||
1017 | { | ||
1018 | struct synth_field *field, *fields[SYNTH_FIELDS_MAX]; | ||
1019 | struct synth_event *event = NULL; | ||
1020 | bool delete_event = false; | ||
1021 | int i, n_fields = 0, ret = 0; | ||
1022 | char *name; | ||
1023 | |||
1024 | mutex_lock(&synth_event_mutex); | ||
1025 | |||
1026 | /* | ||
1027 | * Argument syntax: | ||
1028 | * - Add synthetic event: <event_name> field[;field] ... | ||
1029 | * - Remove synthetic event: !<event_name> field[;field] ... | ||
1030 | * where 'field' = type field_name | ||
1031 | */ | ||
1032 | if (argc < 1) { | ||
1033 | ret = -EINVAL; | ||
1034 | goto out; | ||
1035 | } | ||
1036 | |||
1037 | name = argv[0]; | ||
1038 | if (name[0] == '!') { | ||
1039 | delete_event = true; | ||
1040 | name++; | ||
1041 | } | ||
1042 | |||
1043 | event = find_synth_event(name); | ||
1044 | if (event) { | ||
1045 | if (delete_event) { | ||
1046 | if (event->ref) { | ||
1047 | event = NULL; | ||
1048 | ret = -EBUSY; | ||
1049 | goto out; | ||
1050 | } | ||
1051 | list_del(&event->list); | ||
1052 | goto out; | ||
1053 | } | ||
1054 | event = NULL; | ||
1055 | ret = -EEXIST; | ||
1056 | goto out; | ||
1057 | } else if (delete_event) | ||
1058 | goto out; | ||
1059 | |||
1060 | if (argc < 2) { | ||
1061 | ret = -EINVAL; | ||
1062 | goto out; | ||
1063 | } | ||
1064 | |||
1065 | for (i = 1; i < argc - 1; i++) { | ||
1066 | if (strcmp(argv[i], ";") == 0) | ||
1067 | continue; | ||
1068 | if (n_fields == SYNTH_FIELDS_MAX) { | ||
1069 | ret = -EINVAL; | ||
1070 | goto err; | ||
1071 | } | ||
1072 | |||
1073 | field = parse_synth_field(argv[i], argv[i + 1]); | ||
1074 | if (IS_ERR(field)) { | ||
1075 | ret = PTR_ERR(field); | ||
1076 | goto err; | ||
1077 | } | ||
1078 | fields[n_fields] = field; | ||
1079 | i++; n_fields++; | ||
1080 | } | ||
1081 | |||
1082 | if (i < argc) { | ||
1083 | ret = -EINVAL; | ||
1084 | goto err; | ||
1085 | } | ||
1086 | |||
1087 | event = alloc_synth_event(name, n_fields, fields); | ||
1088 | if (IS_ERR(event)) { | ||
1089 | ret = PTR_ERR(event); | ||
1090 | event = NULL; | ||
1091 | goto err; | ||
1092 | } | ||
1093 | out: | ||
1094 | mutex_unlock(&synth_event_mutex); | ||
1095 | |||
1096 | if (event) { | ||
1097 | if (delete_event) { | ||
1098 | ret = unregister_synth_event(event); | ||
1099 | add_or_delete_synth_event(event, !ret); | ||
1100 | } else { | ||
1101 | ret = register_synth_event(event); | ||
1102 | add_or_delete_synth_event(event, ret); | ||
1103 | } | ||
1104 | } | ||
1105 | |||
1106 | return ret; | ||
1107 | err: | ||
1108 | mutex_unlock(&synth_event_mutex); | ||
1109 | |||
1110 | for (i = 0; i < n_fields; i++) | ||
1111 | free_synth_field(fields[i]); | ||
1112 | free_synth_event(event); | ||
1113 | |||
1114 | return ret; | ||
1115 | } | ||
1116 | |||
1117 | static int release_all_synth_events(void) | ||
1118 | { | ||
1119 | struct list_head release_events; | ||
1120 | struct synth_event *event, *e; | ||
1121 | int ret = 0; | ||
1122 | |||
1123 | INIT_LIST_HEAD(&release_events); | ||
1124 | |||
1125 | mutex_lock(&synth_event_mutex); | ||
1126 | |||
1127 | list_for_each_entry(event, &synth_event_list, list) { | ||
1128 | if (event->ref) { | ||
1129 | mutex_unlock(&synth_event_mutex); | ||
1130 | return -EBUSY; | ||
1131 | } | ||
1132 | } | ||
1133 | |||
1134 | list_splice_init(&event->list, &release_events); | ||
1135 | |||
1136 | mutex_unlock(&synth_event_mutex); | ||
1137 | |||
1138 | list_for_each_entry_safe(event, e, &release_events, list) { | ||
1139 | list_del(&event->list); | ||
1140 | |||
1141 | ret = unregister_synth_event(event); | ||
1142 | add_or_delete_synth_event(event, !ret); | ||
1143 | } | ||
1144 | |||
1145 | return ret; | ||
1146 | } | ||
1147 | |||
1148 | |||
1149 | static void *synth_events_seq_start(struct seq_file *m, loff_t *pos) | ||
1150 | { | ||
1151 | mutex_lock(&synth_event_mutex); | ||
1152 | |||
1153 | return seq_list_start(&synth_event_list, *pos); | ||
1154 | } | ||
1155 | |||
1156 | static void *synth_events_seq_next(struct seq_file *m, void *v, loff_t *pos) | ||
1157 | { | ||
1158 | return seq_list_next(v, &synth_event_list, pos); | ||
1159 | } | ||
1160 | |||
1161 | static void synth_events_seq_stop(struct seq_file *m, void *v) | ||
1162 | { | ||
1163 | mutex_unlock(&synth_event_mutex); | ||
1164 | } | ||
1165 | |||
1166 | static int synth_events_seq_show(struct seq_file *m, void *v) | ||
1167 | { | ||
1168 | struct synth_field *field; | ||
1169 | struct synth_event *event = v; | ||
1170 | unsigned int i; | ||
1171 | |||
1172 | seq_printf(m, "%s\t", event->name); | ||
1173 | |||
1174 | for (i = 0; i < event->n_fields; i++) { | ||
1175 | field = event->fields[i]; | ||
1176 | |||
1177 | /* parameter values */ | ||
1178 | seq_printf(m, "%s %s%s", field->type, field->name, | ||
1179 | i == event->n_fields - 1 ? "" : "; "); | ||
1180 | } | ||
1181 | |||
1182 | seq_putc(m, '\n'); | ||
1183 | |||
1184 | return 0; | ||
1185 | } | ||
1186 | |||
1187 | static const struct seq_operations synth_events_seq_op = { | ||
1188 | .start = synth_events_seq_start, | ||
1189 | .next = synth_events_seq_next, | ||
1190 | .stop = synth_events_seq_stop, | ||
1191 | .show = synth_events_seq_show | ||
1192 | }; | ||
1193 | |||
1194 | static int synth_events_open(struct inode *inode, struct file *file) | ||
1195 | { | ||
1196 | int ret; | ||
1197 | |||
1198 | if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) { | ||
1199 | ret = release_all_synth_events(); | ||
1200 | if (ret < 0) | ||
1201 | return ret; | ||
1202 | } | ||
1203 | |||
1204 | return seq_open(file, &synth_events_seq_op); | ||
1205 | } | ||
1206 | |||
1207 | static ssize_t synth_events_write(struct file *file, | ||
1208 | const char __user *buffer, | ||
1209 | size_t count, loff_t *ppos) | ||
1210 | { | ||
1211 | return trace_parse_run_command(file, buffer, count, ppos, | ||
1212 | create_synth_event); | ||
1213 | } | ||
1214 | |||
1215 | static const struct file_operations synth_events_fops = { | ||
1216 | .open = synth_events_open, | ||
1217 | .write = synth_events_write, | ||
1218 | .read = seq_read, | ||
1219 | .llseek = seq_lseek, | ||
1220 | .release = seq_release, | ||
1221 | }; | ||
1222 | |||
1223 | static u64 hist_field_timestamp(struct hist_field *hist_field, | ||
1224 | struct tracing_map_elt *elt, | ||
1225 | struct ring_buffer_event *rbe, | ||
1226 | void *event) | ||
1227 | { | ||
1228 | struct hist_trigger_data *hist_data = hist_field->hist_data; | ||
1229 | struct trace_array *tr = hist_data->event_file->tr; | ||
1230 | |||
1231 | u64 ts = ring_buffer_event_time_stamp(rbe); | ||
1232 | |||
1233 | if (hist_data->attrs->ts_in_usecs && trace_clock_in_ns(tr)) | ||
1234 | ts = ns2usecs(ts); | ||
1235 | |||
1236 | return ts; | ||
1237 | } | ||
1238 | |||
1239 | static u64 hist_field_cpu(struct hist_field *hist_field, | ||
1240 | struct tracing_map_elt *elt, | ||
1241 | struct ring_buffer_event *rbe, | ||
1242 | void *event) | ||
1243 | { | ||
1244 | int cpu = smp_processor_id(); | ||
1245 | |||
1246 | return cpu; | ||
1247 | } | ||
1248 | |||
1249 | static struct hist_field * | ||
1250 | check_field_for_var_ref(struct hist_field *hist_field, | ||
1251 | struct hist_trigger_data *var_data, | ||
1252 | unsigned int var_idx) | ||
1253 | { | ||
1254 | struct hist_field *found = NULL; | ||
1255 | |||
1256 | if (hist_field && hist_field->flags & HIST_FIELD_FL_VAR_REF) { | ||
1257 | if (hist_field->var.idx == var_idx && | ||
1258 | hist_field->var.hist_data == var_data) { | ||
1259 | found = hist_field; | ||
1260 | } | ||
1261 | } | ||
1262 | |||
1263 | return found; | ||
1264 | } | ||
1265 | |||
1266 | static struct hist_field * | ||
1267 | check_field_for_var_refs(struct hist_trigger_data *hist_data, | ||
1268 | struct hist_field *hist_field, | ||
1269 | struct hist_trigger_data *var_data, | ||
1270 | unsigned int var_idx, | ||
1271 | unsigned int level) | ||
1272 | { | ||
1273 | struct hist_field *found = NULL; | ||
1274 | unsigned int i; | ||
1275 | |||
1276 | if (level > 3) | ||
1277 | return found; | ||
1278 | |||
1279 | if (!hist_field) | ||
1280 | return found; | ||
1281 | |||
1282 | found = check_field_for_var_ref(hist_field, var_data, var_idx); | ||
1283 | if (found) | ||
1284 | return found; | ||
1285 | |||
1286 | for (i = 0; i < HIST_FIELD_OPERANDS_MAX; i++) { | ||
1287 | struct hist_field *operand; | ||
1288 | |||
1289 | operand = hist_field->operands[i]; | ||
1290 | found = check_field_for_var_refs(hist_data, operand, var_data, | ||
1291 | var_idx, level + 1); | ||
1292 | if (found) | ||
1293 | return found; | ||
1294 | } | ||
1295 | |||
1296 | return found; | ||
1297 | } | ||
1298 | |||
1299 | static struct hist_field *find_var_ref(struct hist_trigger_data *hist_data, | ||
1300 | struct hist_trigger_data *var_data, | ||
1301 | unsigned int var_idx) | ||
1302 | { | ||
1303 | struct hist_field *hist_field, *found = NULL; | ||
1304 | unsigned int i; | ||
1305 | |||
1306 | for_each_hist_field(i, hist_data) { | ||
1307 | hist_field = hist_data->fields[i]; | ||
1308 | found = check_field_for_var_refs(hist_data, hist_field, | ||
1309 | var_data, var_idx, 0); | ||
1310 | if (found) | ||
1311 | return found; | ||
1312 | } | ||
1313 | |||
1314 | for (i = 0; i < hist_data->n_synth_var_refs; i++) { | ||
1315 | hist_field = hist_data->synth_var_refs[i]; | ||
1316 | found = check_field_for_var_refs(hist_data, hist_field, | ||
1317 | var_data, var_idx, 0); | ||
1318 | if (found) | ||
1319 | return found; | ||
1320 | } | ||
1321 | |||
1322 | return found; | ||
1323 | } | ||
1324 | |||
1325 | static struct hist_field *find_any_var_ref(struct hist_trigger_data *hist_data, | ||
1326 | unsigned int var_idx) | ||
1327 | { | ||
1328 | struct trace_array *tr = hist_data->event_file->tr; | ||
1329 | struct hist_field *found = NULL; | ||
1330 | struct hist_var_data *var_data; | ||
1331 | |||
1332 | list_for_each_entry(var_data, &tr->hist_vars, list) { | ||
1333 | if (var_data->hist_data == hist_data) | ||
1334 | continue; | ||
1335 | found = find_var_ref(var_data->hist_data, hist_data, var_idx); | ||
1336 | if (found) | ||
1337 | break; | ||
1338 | } | ||
1339 | |||
1340 | return found; | ||
1341 | } | ||
1342 | |||
1343 | static bool check_var_refs(struct hist_trigger_data *hist_data) | ||
1344 | { | ||
1345 | struct hist_field *field; | ||
1346 | bool found = false; | ||
1347 | int i; | ||
1348 | |||
1349 | for_each_hist_field(i, hist_data) { | ||
1350 | field = hist_data->fields[i]; | ||
1351 | if (field && field->flags & HIST_FIELD_FL_VAR) { | ||
1352 | if (find_any_var_ref(hist_data, field->var.idx)) { | ||
1353 | found = true; | ||
1354 | break; | ||
1355 | } | ||
1356 | } | ||
1357 | } | ||
1358 | |||
1359 | return found; | ||
1360 | } | ||
1361 | |||
1362 | static struct hist_var_data *find_hist_vars(struct hist_trigger_data *hist_data) | ||
1363 | { | ||
1364 | struct trace_array *tr = hist_data->event_file->tr; | ||
1365 | struct hist_var_data *var_data, *found = NULL; | ||
1366 | |||
1367 | list_for_each_entry(var_data, &tr->hist_vars, list) { | ||
1368 | if (var_data->hist_data == hist_data) { | ||
1369 | found = var_data; | ||
1370 | break; | ||
1371 | } | ||
1372 | } | ||
1373 | |||
1374 | return found; | ||
1375 | } | ||
1376 | |||
1377 | static bool field_has_hist_vars(struct hist_field *hist_field, | ||
1378 | unsigned int level) | ||
1379 | { | ||
1380 | int i; | ||
1381 | |||
1382 | if (level > 3) | ||
1383 | return false; | ||
1384 | |||
1385 | if (!hist_field) | ||
1386 | return false; | ||
1387 | |||
1388 | if (hist_field->flags & HIST_FIELD_FL_VAR || | ||
1389 | hist_field->flags & HIST_FIELD_FL_VAR_REF) | ||
1390 | return true; | ||
1391 | |||
1392 | for (i = 0; i < HIST_FIELD_OPERANDS_MAX; i++) { | ||
1393 | struct hist_field *operand; | ||
1394 | |||
1395 | operand = hist_field->operands[i]; | ||
1396 | if (field_has_hist_vars(operand, level + 1)) | ||
1397 | return true; | ||
1398 | } | ||
1399 | |||
1400 | return false; | ||
1401 | } | ||
1402 | |||
1403 | static bool has_hist_vars(struct hist_trigger_data *hist_data) | ||
1404 | { | ||
1405 | struct hist_field *hist_field; | ||
1406 | int i; | ||
1407 | |||
1408 | for_each_hist_field(i, hist_data) { | ||
1409 | hist_field = hist_data->fields[i]; | ||
1410 | if (field_has_hist_vars(hist_field, 0)) | ||
1411 | return true; | ||
1412 | } | ||
1413 | |||
1414 | return false; | ||
1415 | } | ||
1416 | |||
1417 | static int save_hist_vars(struct hist_trigger_data *hist_data) | ||
1418 | { | ||
1419 | struct trace_array *tr = hist_data->event_file->tr; | ||
1420 | struct hist_var_data *var_data; | ||
1421 | |||
1422 | var_data = find_hist_vars(hist_data); | ||
1423 | if (var_data) | ||
1424 | return 0; | ||
1425 | |||
1426 | if (trace_array_get(tr) < 0) | ||
1427 | return -ENODEV; | ||
1428 | |||
1429 | var_data = kzalloc(sizeof(*var_data), GFP_KERNEL); | ||
1430 | if (!var_data) { | ||
1431 | trace_array_put(tr); | ||
1432 | return -ENOMEM; | ||
1433 | } | ||
1434 | |||
1435 | var_data->hist_data = hist_data; | ||
1436 | list_add(&var_data->list, &tr->hist_vars); | ||
1437 | |||
1438 | return 0; | ||
1439 | } | ||
1440 | |||
1441 | static void remove_hist_vars(struct hist_trigger_data *hist_data) | ||
1442 | { | ||
1443 | struct trace_array *tr = hist_data->event_file->tr; | ||
1444 | struct hist_var_data *var_data; | ||
1445 | |||
1446 | var_data = find_hist_vars(hist_data); | ||
1447 | if (!var_data) | ||
1448 | return; | ||
1449 | |||
1450 | if (WARN_ON(check_var_refs(hist_data))) | ||
1451 | return; | ||
1452 | |||
1453 | list_del(&var_data->list); | ||
1454 | |||
1455 | kfree(var_data); | ||
1456 | |||
1457 | trace_array_put(tr); | ||
1458 | } | ||
1459 | |||
1460 | static struct hist_field *find_var_field(struct hist_trigger_data *hist_data, | ||
1461 | const char *var_name) | ||
1462 | { | ||
1463 | struct hist_field *hist_field, *found = NULL; | ||
1464 | int i; | ||
1465 | |||
1466 | for_each_hist_field(i, hist_data) { | ||
1467 | hist_field = hist_data->fields[i]; | ||
1468 | if (hist_field && hist_field->flags & HIST_FIELD_FL_VAR && | ||
1469 | strcmp(hist_field->var.name, var_name) == 0) { | ||
1470 | found = hist_field; | ||
1471 | break; | ||
1472 | } | ||
1473 | } | ||
1474 | |||
1475 | return found; | ||
1476 | } | ||
1477 | |||
1478 | static struct hist_field *find_var(struct hist_trigger_data *hist_data, | ||
1479 | struct trace_event_file *file, | ||
1480 | const char *var_name) | ||
1481 | { | ||
1482 | struct hist_trigger_data *test_data; | ||
1483 | struct event_trigger_data *test; | ||
1484 | struct hist_field *hist_field; | ||
1485 | |||
1486 | hist_field = find_var_field(hist_data, var_name); | ||
1487 | if (hist_field) | ||
1488 | return hist_field; | ||
1489 | |||
1490 | list_for_each_entry_rcu(test, &file->triggers, list) { | ||
1491 | if (test->cmd_ops->trigger_type == ETT_EVENT_HIST) { | ||
1492 | test_data = test->private_data; | ||
1493 | hist_field = find_var_field(test_data, var_name); | ||
1494 | if (hist_field) | ||
1495 | return hist_field; | ||
1496 | } | ||
1497 | } | ||
1498 | |||
1499 | return NULL; | ||
1500 | } | ||
1501 | |||
1502 | static struct trace_event_file *find_var_file(struct trace_array *tr, | ||
1503 | char *system, | ||
1504 | char *event_name, | ||
1505 | char *var_name) | ||
1506 | { | ||
1507 | struct hist_trigger_data *var_hist_data; | ||
1508 | struct hist_var_data *var_data; | ||
1509 | struct trace_event_file *file, *found = NULL; | ||
1510 | |||
1511 | if (system) | ||
1512 | return find_event_file(tr, system, event_name); | ||
1513 | |||
1514 | list_for_each_entry(var_data, &tr->hist_vars, list) { | ||
1515 | var_hist_data = var_data->hist_data; | ||
1516 | file = var_hist_data->event_file; | ||
1517 | if (file == found) | ||
1518 | continue; | ||
1519 | |||
1520 | if (find_var_field(var_hist_data, var_name)) { | ||
1521 | if (found) { | ||
1522 | hist_err_event("Variable name not unique, need to use fully qualified name (subsys.event.var) for variable: ", system, event_name, var_name); | ||
1523 | return NULL; | ||
1524 | } | ||
1525 | |||
1526 | found = file; | ||
1527 | } | ||
1528 | } | ||
1529 | |||
1530 | return found; | ||
1531 | } | ||
1532 | |||
1533 | static struct hist_field *find_file_var(struct trace_event_file *file, | ||
1534 | const char *var_name) | ||
1535 | { | ||
1536 | struct hist_trigger_data *test_data; | ||
1537 | struct event_trigger_data *test; | ||
1538 | struct hist_field *hist_field; | ||
1539 | |||
1540 | list_for_each_entry_rcu(test, &file->triggers, list) { | ||
1541 | if (test->cmd_ops->trigger_type == ETT_EVENT_HIST) { | ||
1542 | test_data = test->private_data; | ||
1543 | hist_field = find_var_field(test_data, var_name); | ||
1544 | if (hist_field) | ||
1545 | return hist_field; | ||
1546 | } | ||
1547 | } | ||
1548 | |||
1549 | return NULL; | ||
1550 | } | ||
1551 | |||
1552 | static struct hist_field * | ||
1553 | find_match_var(struct hist_trigger_data *hist_data, char *var_name) | ||
1554 | { | ||
1555 | struct trace_array *tr = hist_data->event_file->tr; | ||
1556 | struct hist_field *hist_field, *found = NULL; | ||
1557 | struct trace_event_file *file; | ||
1558 | unsigned int i; | ||
1559 | |||
1560 | for (i = 0; i < hist_data->n_actions; i++) { | ||
1561 | struct action_data *data = hist_data->actions[i]; | ||
1562 | |||
1563 | if (data->fn == action_trace) { | ||
1564 | char *system = data->onmatch.match_event_system; | ||
1565 | char *event_name = data->onmatch.match_event; | ||
1566 | |||
1567 | file = find_var_file(tr, system, event_name, var_name); | ||
1568 | if (!file) | ||
1569 | continue; | ||
1570 | hist_field = find_file_var(file, var_name); | ||
1571 | if (hist_field) { | ||
1572 | if (found) { | ||
1573 | hist_err_event("Variable name not unique, need to use fully qualified name (subsys.event.var) for variable: ", system, event_name, var_name); | ||
1574 | return ERR_PTR(-EINVAL); | ||
1575 | } | ||
1576 | |||
1577 | found = hist_field; | ||
1578 | } | ||
1579 | } | ||
1580 | } | ||
1581 | return found; | ||
1582 | } | ||
1583 | |||
1584 | static struct hist_field *find_event_var(struct hist_trigger_data *hist_data, | ||
1585 | char *system, | ||
1586 | char *event_name, | ||
1587 | char *var_name) | ||
1588 | { | ||
1589 | struct trace_array *tr = hist_data->event_file->tr; | ||
1590 | struct hist_field *hist_field = NULL; | ||
1591 | struct trace_event_file *file; | ||
1592 | |||
1593 | if (!system || !event_name) { | ||
1594 | hist_field = find_match_var(hist_data, var_name); | ||
1595 | if (IS_ERR(hist_field)) | ||
1596 | return NULL; | ||
1597 | if (hist_field) | ||
1598 | return hist_field; | ||
1599 | } | ||
1600 | |||
1601 | file = find_var_file(tr, system, event_name, var_name); | ||
1602 | if (!file) | ||
1603 | return NULL; | ||
1604 | |||
1605 | hist_field = find_file_var(file, var_name); | ||
1606 | |||
1607 | return hist_field; | ||
1608 | } | ||
1609 | |||
1610 | struct hist_elt_data { | ||
1611 | char *comm; | ||
1612 | u64 *var_ref_vals; | ||
1613 | char *field_var_str[SYNTH_FIELDS_MAX]; | ||
153 | }; | 1614 | }; |
154 | 1615 | ||
1616 | static u64 hist_field_var_ref(struct hist_field *hist_field, | ||
1617 | struct tracing_map_elt *elt, | ||
1618 | struct ring_buffer_event *rbe, | ||
1619 | void *event) | ||
1620 | { | ||
1621 | struct hist_elt_data *elt_data; | ||
1622 | u64 var_val = 0; | ||
1623 | |||
1624 | elt_data = elt->private_data; | ||
1625 | var_val = elt_data->var_ref_vals[hist_field->var_ref_idx]; | ||
1626 | |||
1627 | return var_val; | ||
1628 | } | ||
1629 | |||
1630 | static bool resolve_var_refs(struct hist_trigger_data *hist_data, void *key, | ||
1631 | u64 *var_ref_vals, bool self) | ||
1632 | { | ||
1633 | struct hist_trigger_data *var_data; | ||
1634 | struct tracing_map_elt *var_elt; | ||
1635 | struct hist_field *hist_field; | ||
1636 | unsigned int i, var_idx; | ||
1637 | bool resolved = true; | ||
1638 | u64 var_val = 0; | ||
1639 | |||
1640 | for (i = 0; i < hist_data->n_var_refs; i++) { | ||
1641 | hist_field = hist_data->var_refs[i]; | ||
1642 | var_idx = hist_field->var.idx; | ||
1643 | var_data = hist_field->var.hist_data; | ||
1644 | |||
1645 | if (var_data == NULL) { | ||
1646 | resolved = false; | ||
1647 | break; | ||
1648 | } | ||
1649 | |||
1650 | if ((self && var_data != hist_data) || | ||
1651 | (!self && var_data == hist_data)) | ||
1652 | continue; | ||
1653 | |||
1654 | var_elt = tracing_map_lookup(var_data->map, key); | ||
1655 | if (!var_elt) { | ||
1656 | resolved = false; | ||
1657 | break; | ||
1658 | } | ||
1659 | |||
1660 | if (!tracing_map_var_set(var_elt, var_idx)) { | ||
1661 | resolved = false; | ||
1662 | break; | ||
1663 | } | ||
1664 | |||
1665 | if (self || !hist_field->read_once) | ||
1666 | var_val = tracing_map_read_var(var_elt, var_idx); | ||
1667 | else | ||
1668 | var_val = tracing_map_read_var_once(var_elt, var_idx); | ||
1669 | |||
1670 | var_ref_vals[i] = var_val; | ||
1671 | } | ||
1672 | |||
1673 | return resolved; | ||
1674 | } | ||
1675 | |||
155 | static const char *hist_field_name(struct hist_field *field, | 1676 | static const char *hist_field_name(struct hist_field *field, |
156 | unsigned int level) | 1677 | unsigned int level) |
157 | { | 1678 | { |
@@ -162,8 +1683,26 @@ static const char *hist_field_name(struct hist_field *field, | |||
162 | 1683 | ||
163 | if (field->field) | 1684 | if (field->field) |
164 | field_name = field->field->name; | 1685 | field_name = field->field->name; |
165 | else if (field->flags & HIST_FIELD_FL_LOG2) | 1686 | else if (field->flags & HIST_FIELD_FL_LOG2 || |
1687 | field->flags & HIST_FIELD_FL_ALIAS) | ||
166 | field_name = hist_field_name(field->operands[0], ++level); | 1688 | field_name = hist_field_name(field->operands[0], ++level); |
1689 | else if (field->flags & HIST_FIELD_FL_CPU) | ||
1690 | field_name = "cpu"; | ||
1691 | else if (field->flags & HIST_FIELD_FL_EXPR || | ||
1692 | field->flags & HIST_FIELD_FL_VAR_REF) { | ||
1693 | if (field->system) { | ||
1694 | static char full_name[MAX_FILTER_STR_VAL]; | ||
1695 | |||
1696 | strcat(full_name, field->system); | ||
1697 | strcat(full_name, "."); | ||
1698 | strcat(full_name, field->event_name); | ||
1699 | strcat(full_name, "."); | ||
1700 | strcat(full_name, field->name); | ||
1701 | field_name = full_name; | ||
1702 | } else | ||
1703 | field_name = field->name; | ||
1704 | } else if (field->flags & HIST_FIELD_FL_TIMESTAMP) | ||
1705 | field_name = "common_timestamp"; | ||
167 | 1706 | ||
168 | if (field_name == NULL) | 1707 | if (field_name == NULL) |
169 | field_name = ""; | 1708 | field_name = ""; |
@@ -232,16 +1771,119 @@ static int parse_map_size(char *str) | |||
232 | 1771 | ||
233 | static void destroy_hist_trigger_attrs(struct hist_trigger_attrs *attrs) | 1772 | static void destroy_hist_trigger_attrs(struct hist_trigger_attrs *attrs) |
234 | { | 1773 | { |
1774 | unsigned int i; | ||
1775 | |||
235 | if (!attrs) | 1776 | if (!attrs) |
236 | return; | 1777 | return; |
237 | 1778 | ||
1779 | for (i = 0; i < attrs->n_assignments; i++) | ||
1780 | kfree(attrs->assignment_str[i]); | ||
1781 | |||
1782 | for (i = 0; i < attrs->n_actions; i++) | ||
1783 | kfree(attrs->action_str[i]); | ||
1784 | |||
238 | kfree(attrs->name); | 1785 | kfree(attrs->name); |
239 | kfree(attrs->sort_key_str); | 1786 | kfree(attrs->sort_key_str); |
240 | kfree(attrs->keys_str); | 1787 | kfree(attrs->keys_str); |
241 | kfree(attrs->vals_str); | 1788 | kfree(attrs->vals_str); |
1789 | kfree(attrs->clock); | ||
242 | kfree(attrs); | 1790 | kfree(attrs); |
243 | } | 1791 | } |
244 | 1792 | ||
1793 | static int parse_action(char *str, struct hist_trigger_attrs *attrs) | ||
1794 | { | ||
1795 | int ret = -EINVAL; | ||
1796 | |||
1797 | if (attrs->n_actions >= HIST_ACTIONS_MAX) | ||
1798 | return ret; | ||
1799 | |||
1800 | if ((strncmp(str, "onmatch(", strlen("onmatch(")) == 0) || | ||
1801 | (strncmp(str, "onmax(", strlen("onmax(")) == 0)) { | ||
1802 | attrs->action_str[attrs->n_actions] = kstrdup(str, GFP_KERNEL); | ||
1803 | if (!attrs->action_str[attrs->n_actions]) { | ||
1804 | ret = -ENOMEM; | ||
1805 | return ret; | ||
1806 | } | ||
1807 | attrs->n_actions++; | ||
1808 | ret = 0; | ||
1809 | } | ||
1810 | |||
1811 | return ret; | ||
1812 | } | ||
1813 | |||
1814 | static int parse_assignment(char *str, struct hist_trigger_attrs *attrs) | ||
1815 | { | ||
1816 | int ret = 0; | ||
1817 | |||
1818 | if ((strncmp(str, "key=", strlen("key=")) == 0) || | ||
1819 | (strncmp(str, "keys=", strlen("keys=")) == 0)) { | ||
1820 | attrs->keys_str = kstrdup(str, GFP_KERNEL); | ||
1821 | if (!attrs->keys_str) { | ||
1822 | ret = -ENOMEM; | ||
1823 | goto out; | ||
1824 | } | ||
1825 | } else if ((strncmp(str, "val=", strlen("val=")) == 0) || | ||
1826 | (strncmp(str, "vals=", strlen("vals=")) == 0) || | ||
1827 | (strncmp(str, "values=", strlen("values=")) == 0)) { | ||
1828 | attrs->vals_str = kstrdup(str, GFP_KERNEL); | ||
1829 | if (!attrs->vals_str) { | ||
1830 | ret = -ENOMEM; | ||
1831 | goto out; | ||
1832 | } | ||
1833 | } else if (strncmp(str, "sort=", strlen("sort=")) == 0) { | ||
1834 | attrs->sort_key_str = kstrdup(str, GFP_KERNEL); | ||
1835 | if (!attrs->sort_key_str) { | ||
1836 | ret = -ENOMEM; | ||
1837 | goto out; | ||
1838 | } | ||
1839 | } else if (strncmp(str, "name=", strlen("name=")) == 0) { | ||
1840 | attrs->name = kstrdup(str, GFP_KERNEL); | ||
1841 | if (!attrs->name) { | ||
1842 | ret = -ENOMEM; | ||
1843 | goto out; | ||
1844 | } | ||
1845 | } else if (strncmp(str, "clock=", strlen("clock=")) == 0) { | ||
1846 | strsep(&str, "="); | ||
1847 | if (!str) { | ||
1848 | ret = -EINVAL; | ||
1849 | goto out; | ||
1850 | } | ||
1851 | |||
1852 | str = strstrip(str); | ||
1853 | attrs->clock = kstrdup(str, GFP_KERNEL); | ||
1854 | if (!attrs->clock) { | ||
1855 | ret = -ENOMEM; | ||
1856 | goto out; | ||
1857 | } | ||
1858 | } else if (strncmp(str, "size=", strlen("size=")) == 0) { | ||
1859 | int map_bits = parse_map_size(str); | ||
1860 | |||
1861 | if (map_bits < 0) { | ||
1862 | ret = map_bits; | ||
1863 | goto out; | ||
1864 | } | ||
1865 | attrs->map_bits = map_bits; | ||
1866 | } else { | ||
1867 | char *assignment; | ||
1868 | |||
1869 | if (attrs->n_assignments == TRACING_MAP_VARS_MAX) { | ||
1870 | hist_err("Too many variables defined: ", str); | ||
1871 | ret = -EINVAL; | ||
1872 | goto out; | ||
1873 | } | ||
1874 | |||
1875 | assignment = kstrdup(str, GFP_KERNEL); | ||
1876 | if (!assignment) { | ||
1877 | ret = -ENOMEM; | ||
1878 | goto out; | ||
1879 | } | ||
1880 | |||
1881 | attrs->assignment_str[attrs->n_assignments++] = assignment; | ||
1882 | } | ||
1883 | out: | ||
1884 | return ret; | ||
1885 | } | ||
1886 | |||
245 | static struct hist_trigger_attrs *parse_hist_trigger_attrs(char *trigger_str) | 1887 | static struct hist_trigger_attrs *parse_hist_trigger_attrs(char *trigger_str) |
246 | { | 1888 | { |
247 | struct hist_trigger_attrs *attrs; | 1889 | struct hist_trigger_attrs *attrs; |
@@ -254,35 +1896,21 @@ static struct hist_trigger_attrs *parse_hist_trigger_attrs(char *trigger_str) | |||
254 | while (trigger_str) { | 1896 | while (trigger_str) { |
255 | char *str = strsep(&trigger_str, ":"); | 1897 | char *str = strsep(&trigger_str, ":"); |
256 | 1898 | ||
257 | if ((strncmp(str, "key=", strlen("key=")) == 0) || | 1899 | if (strchr(str, '=')) { |
258 | (strncmp(str, "keys=", strlen("keys=")) == 0)) | 1900 | ret = parse_assignment(str, attrs); |
259 | attrs->keys_str = kstrdup(str, GFP_KERNEL); | 1901 | if (ret) |
260 | else if ((strncmp(str, "val=", strlen("val=")) == 0) || | 1902 | goto free; |
261 | (strncmp(str, "vals=", strlen("vals=")) == 0) || | 1903 | } else if (strcmp(str, "pause") == 0) |
262 | (strncmp(str, "values=", strlen("values=")) == 0)) | ||
263 | attrs->vals_str = kstrdup(str, GFP_KERNEL); | ||
264 | else if (strncmp(str, "sort=", strlen("sort=")) == 0) | ||
265 | attrs->sort_key_str = kstrdup(str, GFP_KERNEL); | ||
266 | else if (strncmp(str, "name=", strlen("name=")) == 0) | ||
267 | attrs->name = kstrdup(str, GFP_KERNEL); | ||
268 | else if (strcmp(str, "pause") == 0) | ||
269 | attrs->pause = true; | 1904 | attrs->pause = true; |
270 | else if ((strcmp(str, "cont") == 0) || | 1905 | else if ((strcmp(str, "cont") == 0) || |
271 | (strcmp(str, "continue") == 0)) | 1906 | (strcmp(str, "continue") == 0)) |
272 | attrs->cont = true; | 1907 | attrs->cont = true; |
273 | else if (strcmp(str, "clear") == 0) | 1908 | else if (strcmp(str, "clear") == 0) |
274 | attrs->clear = true; | 1909 | attrs->clear = true; |
275 | else if (strncmp(str, "size=", strlen("size=")) == 0) { | 1910 | else { |
276 | int map_bits = parse_map_size(str); | 1911 | ret = parse_action(str, attrs); |
277 | 1912 | if (ret) | |
278 | if (map_bits < 0) { | ||
279 | ret = map_bits; | ||
280 | goto free; | 1913 | goto free; |
281 | } | ||
282 | attrs->map_bits = map_bits; | ||
283 | } else { | ||
284 | ret = -EINVAL; | ||
285 | goto free; | ||
286 | } | 1914 | } |
287 | } | 1915 | } |
288 | 1916 | ||
@@ -291,6 +1919,14 @@ static struct hist_trigger_attrs *parse_hist_trigger_attrs(char *trigger_str) | |||
291 | goto free; | 1919 | goto free; |
292 | } | 1920 | } |
293 | 1921 | ||
1922 | if (!attrs->clock) { | ||
1923 | attrs->clock = kstrdup("global", GFP_KERNEL); | ||
1924 | if (!attrs->clock) { | ||
1925 | ret = -ENOMEM; | ||
1926 | goto free; | ||
1927 | } | ||
1928 | } | ||
1929 | |||
294 | return attrs; | 1930 | return attrs; |
295 | free: | 1931 | free: |
296 | destroy_hist_trigger_attrs(attrs); | 1932 | destroy_hist_trigger_attrs(attrs); |
@@ -313,64 +1949,203 @@ static inline void save_comm(char *comm, struct task_struct *task) | |||
313 | memcpy(comm, task->comm, TASK_COMM_LEN); | 1949 | memcpy(comm, task->comm, TASK_COMM_LEN); |
314 | } | 1950 | } |
315 | 1951 | ||
316 | static void hist_trigger_elt_comm_free(struct tracing_map_elt *elt) | 1952 | static void hist_elt_data_free(struct hist_elt_data *elt_data) |
317 | { | 1953 | { |
318 | kfree((char *)elt->private_data); | 1954 | unsigned int i; |
1955 | |||
1956 | for (i = 0; i < SYNTH_FIELDS_MAX; i++) | ||
1957 | kfree(elt_data->field_var_str[i]); | ||
1958 | |||
1959 | kfree(elt_data->comm); | ||
1960 | kfree(elt_data); | ||
319 | } | 1961 | } |
320 | 1962 | ||
321 | static int hist_trigger_elt_comm_alloc(struct tracing_map_elt *elt) | 1963 | static void hist_trigger_elt_data_free(struct tracing_map_elt *elt) |
1964 | { | ||
1965 | struct hist_elt_data *elt_data = elt->private_data; | ||
1966 | |||
1967 | hist_elt_data_free(elt_data); | ||
1968 | } | ||
1969 | |||
1970 | static int hist_trigger_elt_data_alloc(struct tracing_map_elt *elt) | ||
322 | { | 1971 | { |
323 | struct hist_trigger_data *hist_data = elt->map->private_data; | 1972 | struct hist_trigger_data *hist_data = elt->map->private_data; |
1973 | unsigned int size = TASK_COMM_LEN; | ||
1974 | struct hist_elt_data *elt_data; | ||
324 | struct hist_field *key_field; | 1975 | struct hist_field *key_field; |
325 | unsigned int i; | 1976 | unsigned int i, n_str; |
1977 | |||
1978 | elt_data = kzalloc(sizeof(*elt_data), GFP_KERNEL); | ||
1979 | if (!elt_data) | ||
1980 | return -ENOMEM; | ||
326 | 1981 | ||
327 | for_each_hist_key_field(i, hist_data) { | 1982 | for_each_hist_key_field(i, hist_data) { |
328 | key_field = hist_data->fields[i]; | 1983 | key_field = hist_data->fields[i]; |
329 | 1984 | ||
330 | if (key_field->flags & HIST_FIELD_FL_EXECNAME) { | 1985 | if (key_field->flags & HIST_FIELD_FL_EXECNAME) { |
331 | unsigned int size = TASK_COMM_LEN + 1; | 1986 | elt_data->comm = kzalloc(size, GFP_KERNEL); |
332 | 1987 | if (!elt_data->comm) { | |
333 | elt->private_data = kzalloc(size, GFP_KERNEL); | 1988 | kfree(elt_data); |
334 | if (!elt->private_data) | ||
335 | return -ENOMEM; | 1989 | return -ENOMEM; |
1990 | } | ||
336 | break; | 1991 | break; |
337 | } | 1992 | } |
338 | } | 1993 | } |
339 | 1994 | ||
1995 | n_str = hist_data->n_field_var_str + hist_data->n_max_var_str; | ||
1996 | |||
1997 | size = STR_VAR_LEN_MAX; | ||
1998 | |||
1999 | for (i = 0; i < n_str; i++) { | ||
2000 | elt_data->field_var_str[i] = kzalloc(size, GFP_KERNEL); | ||
2001 | if (!elt_data->field_var_str[i]) { | ||
2002 | hist_elt_data_free(elt_data); | ||
2003 | return -ENOMEM; | ||
2004 | } | ||
2005 | } | ||
2006 | |||
2007 | elt->private_data = elt_data; | ||
2008 | |||
340 | return 0; | 2009 | return 0; |
341 | } | 2010 | } |
342 | 2011 | ||
343 | static void hist_trigger_elt_comm_copy(struct tracing_map_elt *to, | 2012 | static void hist_trigger_elt_data_init(struct tracing_map_elt *elt) |
344 | struct tracing_map_elt *from) | 2013 | { |
2014 | struct hist_elt_data *elt_data = elt->private_data; | ||
2015 | |||
2016 | if (elt_data->comm) | ||
2017 | save_comm(elt_data->comm, current); | ||
2018 | } | ||
2019 | |||
2020 | static const struct tracing_map_ops hist_trigger_elt_data_ops = { | ||
2021 | .elt_alloc = hist_trigger_elt_data_alloc, | ||
2022 | .elt_free = hist_trigger_elt_data_free, | ||
2023 | .elt_init = hist_trigger_elt_data_init, | ||
2024 | }; | ||
2025 | |||
2026 | static const char *get_hist_field_flags(struct hist_field *hist_field) | ||
2027 | { | ||
2028 | const char *flags_str = NULL; | ||
2029 | |||
2030 | if (hist_field->flags & HIST_FIELD_FL_HEX) | ||
2031 | flags_str = "hex"; | ||
2032 | else if (hist_field->flags & HIST_FIELD_FL_SYM) | ||
2033 | flags_str = "sym"; | ||
2034 | else if (hist_field->flags & HIST_FIELD_FL_SYM_OFFSET) | ||
2035 | flags_str = "sym-offset"; | ||
2036 | else if (hist_field->flags & HIST_FIELD_FL_EXECNAME) | ||
2037 | flags_str = "execname"; | ||
2038 | else if (hist_field->flags & HIST_FIELD_FL_SYSCALL) | ||
2039 | flags_str = "syscall"; | ||
2040 | else if (hist_field->flags & HIST_FIELD_FL_LOG2) | ||
2041 | flags_str = "log2"; | ||
2042 | else if (hist_field->flags & HIST_FIELD_FL_TIMESTAMP_USECS) | ||
2043 | flags_str = "usecs"; | ||
2044 | |||
2045 | return flags_str; | ||
2046 | } | ||
2047 | |||
2048 | static void expr_field_str(struct hist_field *field, char *expr) | ||
345 | { | 2049 | { |
346 | char *comm_from = from->private_data; | 2050 | if (field->flags & HIST_FIELD_FL_VAR_REF) |
347 | char *comm_to = to->private_data; | 2051 | strcat(expr, "$"); |
2052 | |||
2053 | strcat(expr, hist_field_name(field, 0)); | ||
348 | 2054 | ||
349 | if (comm_from) | 2055 | if (field->flags && !(field->flags & HIST_FIELD_FL_VAR_REF)) { |
350 | memcpy(comm_to, comm_from, TASK_COMM_LEN + 1); | 2056 | const char *flags_str = get_hist_field_flags(field); |
2057 | |||
2058 | if (flags_str) { | ||
2059 | strcat(expr, "."); | ||
2060 | strcat(expr, flags_str); | ||
2061 | } | ||
2062 | } | ||
351 | } | 2063 | } |
352 | 2064 | ||
353 | static void hist_trigger_elt_comm_init(struct tracing_map_elt *elt) | 2065 | static char *expr_str(struct hist_field *field, unsigned int level) |
354 | { | 2066 | { |
355 | char *comm = elt->private_data; | 2067 | char *expr; |
2068 | |||
2069 | if (level > 1) | ||
2070 | return NULL; | ||
2071 | |||
2072 | expr = kzalloc(MAX_FILTER_STR_VAL, GFP_KERNEL); | ||
2073 | if (!expr) | ||
2074 | return NULL; | ||
2075 | |||
2076 | if (!field->operands[0]) { | ||
2077 | expr_field_str(field, expr); | ||
2078 | return expr; | ||
2079 | } | ||
2080 | |||
2081 | if (field->operator == FIELD_OP_UNARY_MINUS) { | ||
2082 | char *subexpr; | ||
356 | 2083 | ||
357 | if (comm) | 2084 | strcat(expr, "-("); |
358 | save_comm(comm, current); | 2085 | subexpr = expr_str(field->operands[0], ++level); |
2086 | if (!subexpr) { | ||
2087 | kfree(expr); | ||
2088 | return NULL; | ||
2089 | } | ||
2090 | strcat(expr, subexpr); | ||
2091 | strcat(expr, ")"); | ||
2092 | |||
2093 | kfree(subexpr); | ||
2094 | |||
2095 | return expr; | ||
2096 | } | ||
2097 | |||
2098 | expr_field_str(field->operands[0], expr); | ||
2099 | |||
2100 | switch (field->operator) { | ||
2101 | case FIELD_OP_MINUS: | ||
2102 | strcat(expr, "-"); | ||
2103 | break; | ||
2104 | case FIELD_OP_PLUS: | ||
2105 | strcat(expr, "+"); | ||
2106 | break; | ||
2107 | default: | ||
2108 | kfree(expr); | ||
2109 | return NULL; | ||
2110 | } | ||
2111 | |||
2112 | expr_field_str(field->operands[1], expr); | ||
2113 | |||
2114 | return expr; | ||
359 | } | 2115 | } |
360 | 2116 | ||
361 | static const struct tracing_map_ops hist_trigger_elt_comm_ops = { | 2117 | static int contains_operator(char *str) |
362 | .elt_alloc = hist_trigger_elt_comm_alloc, | 2118 | { |
363 | .elt_copy = hist_trigger_elt_comm_copy, | 2119 | enum field_op_id field_op = FIELD_OP_NONE; |
364 | .elt_free = hist_trigger_elt_comm_free, | 2120 | char *op; |
365 | .elt_init = hist_trigger_elt_comm_init, | 2121 | |
366 | }; | 2122 | op = strpbrk(str, "+-"); |
2123 | if (!op) | ||
2124 | return FIELD_OP_NONE; | ||
2125 | |||
2126 | switch (*op) { | ||
2127 | case '-': | ||
2128 | if (*str == '-') | ||
2129 | field_op = FIELD_OP_UNARY_MINUS; | ||
2130 | else | ||
2131 | field_op = FIELD_OP_MINUS; | ||
2132 | break; | ||
2133 | case '+': | ||
2134 | field_op = FIELD_OP_PLUS; | ||
2135 | break; | ||
2136 | default: | ||
2137 | break; | ||
2138 | } | ||
2139 | |||
2140 | return field_op; | ||
2141 | } | ||
367 | 2142 | ||
368 | static void destroy_hist_field(struct hist_field *hist_field, | 2143 | static void destroy_hist_field(struct hist_field *hist_field, |
369 | unsigned int level) | 2144 | unsigned int level) |
370 | { | 2145 | { |
371 | unsigned int i; | 2146 | unsigned int i; |
372 | 2147 | ||
373 | if (level > 2) | 2148 | if (level > 3) |
374 | return; | 2149 | return; |
375 | 2150 | ||
376 | if (!hist_field) | 2151 | if (!hist_field) |
@@ -379,11 +2154,17 @@ static void destroy_hist_field(struct hist_field *hist_field, | |||
379 | for (i = 0; i < HIST_FIELD_OPERANDS_MAX; i++) | 2154 | for (i = 0; i < HIST_FIELD_OPERANDS_MAX; i++) |
380 | destroy_hist_field(hist_field->operands[i], level + 1); | 2155 | destroy_hist_field(hist_field->operands[i], level + 1); |
381 | 2156 | ||
2157 | kfree(hist_field->var.name); | ||
2158 | kfree(hist_field->name); | ||
2159 | kfree(hist_field->type); | ||
2160 | |||
382 | kfree(hist_field); | 2161 | kfree(hist_field); |
383 | } | 2162 | } |
384 | 2163 | ||
385 | static struct hist_field *create_hist_field(struct ftrace_event_field *field, | 2164 | static struct hist_field *create_hist_field(struct hist_trigger_data *hist_data, |
386 | unsigned long flags) | 2165 | struct ftrace_event_field *field, |
2166 | unsigned long flags, | ||
2167 | char *var_name) | ||
387 | { | 2168 | { |
388 | struct hist_field *hist_field; | 2169 | struct hist_field *hist_field; |
389 | 2170 | ||
@@ -394,8 +2175,22 @@ static struct hist_field *create_hist_field(struct ftrace_event_field *field, | |||
394 | if (!hist_field) | 2175 | if (!hist_field) |
395 | return NULL; | 2176 | return NULL; |
396 | 2177 | ||
2178 | hist_field->hist_data = hist_data; | ||
2179 | |||
2180 | if (flags & HIST_FIELD_FL_EXPR || flags & HIST_FIELD_FL_ALIAS) | ||
2181 | goto out; /* caller will populate */ | ||
2182 | |||
2183 | if (flags & HIST_FIELD_FL_VAR_REF) { | ||
2184 | hist_field->fn = hist_field_var_ref; | ||
2185 | goto out; | ||
2186 | } | ||
2187 | |||
397 | if (flags & HIST_FIELD_FL_HITCOUNT) { | 2188 | if (flags & HIST_FIELD_FL_HITCOUNT) { |
398 | hist_field->fn = hist_field_counter; | 2189 | hist_field->fn = hist_field_counter; |
2190 | hist_field->size = sizeof(u64); | ||
2191 | hist_field->type = kstrdup("u64", GFP_KERNEL); | ||
2192 | if (!hist_field->type) | ||
2193 | goto free; | ||
399 | goto out; | 2194 | goto out; |
400 | } | 2195 | } |
401 | 2196 | ||
@@ -407,8 +2202,29 @@ static struct hist_field *create_hist_field(struct ftrace_event_field *field, | |||
407 | if (flags & HIST_FIELD_FL_LOG2) { | 2202 | if (flags & HIST_FIELD_FL_LOG2) { |
408 | unsigned long fl = flags & ~HIST_FIELD_FL_LOG2; | 2203 | unsigned long fl = flags & ~HIST_FIELD_FL_LOG2; |
409 | hist_field->fn = hist_field_log2; | 2204 | hist_field->fn = hist_field_log2; |
410 | hist_field->operands[0] = create_hist_field(field, fl); | 2205 | hist_field->operands[0] = create_hist_field(hist_data, field, fl, NULL); |
411 | hist_field->size = hist_field->operands[0]->size; | 2206 | hist_field->size = hist_field->operands[0]->size; |
2207 | hist_field->type = kstrdup(hist_field->operands[0]->type, GFP_KERNEL); | ||
2208 | if (!hist_field->type) | ||
2209 | goto free; | ||
2210 | goto out; | ||
2211 | } | ||
2212 | |||
2213 | if (flags & HIST_FIELD_FL_TIMESTAMP) { | ||
2214 | hist_field->fn = hist_field_timestamp; | ||
2215 | hist_field->size = sizeof(u64); | ||
2216 | hist_field->type = kstrdup("u64", GFP_KERNEL); | ||
2217 | if (!hist_field->type) | ||
2218 | goto free; | ||
2219 | goto out; | ||
2220 | } | ||
2221 | |||
2222 | if (flags & HIST_FIELD_FL_CPU) { | ||
2223 | hist_field->fn = hist_field_cpu; | ||
2224 | hist_field->size = sizeof(int); | ||
2225 | hist_field->type = kstrdup("unsigned int", GFP_KERNEL); | ||
2226 | if (!hist_field->type) | ||
2227 | goto free; | ||
412 | goto out; | 2228 | goto out; |
413 | } | 2229 | } |
414 | 2230 | ||
@@ -418,6 +2234,11 @@ static struct hist_field *create_hist_field(struct ftrace_event_field *field, | |||
418 | if (is_string_field(field)) { | 2234 | if (is_string_field(field)) { |
419 | flags |= HIST_FIELD_FL_STRING; | 2235 | flags |= HIST_FIELD_FL_STRING; |
420 | 2236 | ||
2237 | hist_field->size = MAX_FILTER_STR_VAL; | ||
2238 | hist_field->type = kstrdup(field->type, GFP_KERNEL); | ||
2239 | if (!hist_field->type) | ||
2240 | goto free; | ||
2241 | |||
421 | if (field->filter_type == FILTER_STATIC_STRING) | 2242 | if (field->filter_type == FILTER_STATIC_STRING) |
422 | hist_field->fn = hist_field_string; | 2243 | hist_field->fn = hist_field_string; |
423 | else if (field->filter_type == FILTER_DYN_STRING) | 2244 | else if (field->filter_type == FILTER_DYN_STRING) |
@@ -425,6 +2246,12 @@ static struct hist_field *create_hist_field(struct ftrace_event_field *field, | |||
425 | else | 2246 | else |
426 | hist_field->fn = hist_field_pstring; | 2247 | hist_field->fn = hist_field_pstring; |
427 | } else { | 2248 | } else { |
2249 | hist_field->size = field->size; | ||
2250 | hist_field->is_signed = field->is_signed; | ||
2251 | hist_field->type = kstrdup(field->type, GFP_KERNEL); | ||
2252 | if (!hist_field->type) | ||
2253 | goto free; | ||
2254 | |||
428 | hist_field->fn = select_value_fn(field->size, | 2255 | hist_field->fn = select_value_fn(field->size, |
429 | field->is_signed); | 2256 | field->is_signed); |
430 | if (!hist_field->fn) { | 2257 | if (!hist_field->fn) { |
@@ -436,14 +2263,23 @@ static struct hist_field *create_hist_field(struct ftrace_event_field *field, | |||
436 | hist_field->field = field; | 2263 | hist_field->field = field; |
437 | hist_field->flags = flags; | 2264 | hist_field->flags = flags; |
438 | 2265 | ||
2266 | if (var_name) { | ||
2267 | hist_field->var.name = kstrdup(var_name, GFP_KERNEL); | ||
2268 | if (!hist_field->var.name) | ||
2269 | goto free; | ||
2270 | } | ||
2271 | |||
439 | return hist_field; | 2272 | return hist_field; |
2273 | free: | ||
2274 | destroy_hist_field(hist_field, 0); | ||
2275 | return NULL; | ||
440 | } | 2276 | } |
441 | 2277 | ||
442 | static void destroy_hist_fields(struct hist_trigger_data *hist_data) | 2278 | static void destroy_hist_fields(struct hist_trigger_data *hist_data) |
443 | { | 2279 | { |
444 | unsigned int i; | 2280 | unsigned int i; |
445 | 2281 | ||
446 | for (i = 0; i < TRACING_MAP_FIELDS_MAX; i++) { | 2282 | for (i = 0; i < HIST_FIELDS_MAX; i++) { |
447 | if (hist_data->fields[i]) { | 2283 | if (hist_data->fields[i]) { |
448 | destroy_hist_field(hist_data->fields[i], 0); | 2284 | destroy_hist_field(hist_data->fields[i], 0); |
449 | hist_data->fields[i] = NULL; | 2285 | hist_data->fields[i] = NULL; |
@@ -451,69 +2287,1610 @@ static void destroy_hist_fields(struct hist_trigger_data *hist_data) | |||
451 | } | 2287 | } |
452 | } | 2288 | } |
453 | 2289 | ||
454 | static int create_hitcount_val(struct hist_trigger_data *hist_data) | 2290 | static int init_var_ref(struct hist_field *ref_field, |
2291 | struct hist_field *var_field, | ||
2292 | char *system, char *event_name) | ||
455 | { | 2293 | { |
456 | hist_data->fields[HITCOUNT_IDX] = | 2294 | int err = 0; |
457 | create_hist_field(NULL, HIST_FIELD_FL_HITCOUNT); | 2295 | |
458 | if (!hist_data->fields[HITCOUNT_IDX]) | 2296 | ref_field->var.idx = var_field->var.idx; |
459 | return -ENOMEM; | 2297 | ref_field->var.hist_data = var_field->hist_data; |
2298 | ref_field->size = var_field->size; | ||
2299 | ref_field->is_signed = var_field->is_signed; | ||
2300 | ref_field->flags |= var_field->flags & | ||
2301 | (HIST_FIELD_FL_TIMESTAMP | HIST_FIELD_FL_TIMESTAMP_USECS); | ||
2302 | |||
2303 | if (system) { | ||
2304 | ref_field->system = kstrdup(system, GFP_KERNEL); | ||
2305 | if (!ref_field->system) | ||
2306 | return -ENOMEM; | ||
2307 | } | ||
460 | 2308 | ||
461 | hist_data->n_vals++; | 2309 | if (event_name) { |
2310 | ref_field->event_name = kstrdup(event_name, GFP_KERNEL); | ||
2311 | if (!ref_field->event_name) { | ||
2312 | err = -ENOMEM; | ||
2313 | goto free; | ||
2314 | } | ||
2315 | } | ||
462 | 2316 | ||
463 | if (WARN_ON(hist_data->n_vals > TRACING_MAP_VALS_MAX)) | 2317 | if (var_field->var.name) { |
2318 | ref_field->name = kstrdup(var_field->var.name, GFP_KERNEL); | ||
2319 | if (!ref_field->name) { | ||
2320 | err = -ENOMEM; | ||
2321 | goto free; | ||
2322 | } | ||
2323 | } else if (var_field->name) { | ||
2324 | ref_field->name = kstrdup(var_field->name, GFP_KERNEL); | ||
2325 | if (!ref_field->name) { | ||
2326 | err = -ENOMEM; | ||
2327 | goto free; | ||
2328 | } | ||
2329 | } | ||
2330 | |||
2331 | ref_field->type = kstrdup(var_field->type, GFP_KERNEL); | ||
2332 | if (!ref_field->type) { | ||
2333 | err = -ENOMEM; | ||
2334 | goto free; | ||
2335 | } | ||
2336 | out: | ||
2337 | return err; | ||
2338 | free: | ||
2339 | kfree(ref_field->system); | ||
2340 | kfree(ref_field->event_name); | ||
2341 | kfree(ref_field->name); | ||
2342 | |||
2343 | goto out; | ||
2344 | } | ||
2345 | |||
2346 | static struct hist_field *create_var_ref(struct hist_field *var_field, | ||
2347 | char *system, char *event_name) | ||
2348 | { | ||
2349 | unsigned long flags = HIST_FIELD_FL_VAR_REF; | ||
2350 | struct hist_field *ref_field; | ||
2351 | |||
2352 | ref_field = create_hist_field(var_field->hist_data, NULL, flags, NULL); | ||
2353 | if (ref_field) { | ||
2354 | if (init_var_ref(ref_field, var_field, system, event_name)) { | ||
2355 | destroy_hist_field(ref_field, 0); | ||
2356 | return NULL; | ||
2357 | } | ||
2358 | } | ||
2359 | |||
2360 | return ref_field; | ||
2361 | } | ||
2362 | |||
2363 | static bool is_var_ref(char *var_name) | ||
2364 | { | ||
2365 | if (!var_name || strlen(var_name) < 2 || var_name[0] != '$') | ||
2366 | return false; | ||
2367 | |||
2368 | return true; | ||
2369 | } | ||
2370 | |||
2371 | static char *field_name_from_var(struct hist_trigger_data *hist_data, | ||
2372 | char *var_name) | ||
2373 | { | ||
2374 | char *name, *field; | ||
2375 | unsigned int i; | ||
2376 | |||
2377 | for (i = 0; i < hist_data->attrs->var_defs.n_vars; i++) { | ||
2378 | name = hist_data->attrs->var_defs.name[i]; | ||
2379 | |||
2380 | if (strcmp(var_name, name) == 0) { | ||
2381 | field = hist_data->attrs->var_defs.expr[i]; | ||
2382 | if (contains_operator(field) || is_var_ref(field)) | ||
2383 | continue; | ||
2384 | return field; | ||
2385 | } | ||
2386 | } | ||
2387 | |||
2388 | return NULL; | ||
2389 | } | ||
2390 | |||
2391 | static char *local_field_var_ref(struct hist_trigger_data *hist_data, | ||
2392 | char *system, char *event_name, | ||
2393 | char *var_name) | ||
2394 | { | ||
2395 | struct trace_event_call *call; | ||
2396 | |||
2397 | if (system && event_name) { | ||
2398 | call = hist_data->event_file->event_call; | ||
2399 | |||
2400 | if (strcmp(system, call->class->system) != 0) | ||
2401 | return NULL; | ||
2402 | |||
2403 | if (strcmp(event_name, trace_event_name(call)) != 0) | ||
2404 | return NULL; | ||
2405 | } | ||
2406 | |||
2407 | if (!!system != !!event_name) | ||
2408 | return NULL; | ||
2409 | |||
2410 | if (!is_var_ref(var_name)) | ||
2411 | return NULL; | ||
2412 | |||
2413 | var_name++; | ||
2414 | |||
2415 | return field_name_from_var(hist_data, var_name); | ||
2416 | } | ||
2417 | |||
2418 | static struct hist_field *parse_var_ref(struct hist_trigger_data *hist_data, | ||
2419 | char *system, char *event_name, | ||
2420 | char *var_name) | ||
2421 | { | ||
2422 | struct hist_field *var_field = NULL, *ref_field = NULL; | ||
2423 | |||
2424 | if (!is_var_ref(var_name)) | ||
2425 | return NULL; | ||
2426 | |||
2427 | var_name++; | ||
2428 | |||
2429 | var_field = find_event_var(hist_data, system, event_name, var_name); | ||
2430 | if (var_field) | ||
2431 | ref_field = create_var_ref(var_field, system, event_name); | ||
2432 | |||
2433 | if (!ref_field) | ||
2434 | hist_err_event("Couldn't find variable: $", | ||
2435 | system, event_name, var_name); | ||
2436 | |||
2437 | return ref_field; | ||
2438 | } | ||
2439 | |||
2440 | static struct ftrace_event_field * | ||
2441 | parse_field(struct hist_trigger_data *hist_data, struct trace_event_file *file, | ||
2442 | char *field_str, unsigned long *flags) | ||
2443 | { | ||
2444 | struct ftrace_event_field *field = NULL; | ||
2445 | char *field_name, *modifier, *str; | ||
2446 | |||
2447 | modifier = str = kstrdup(field_str, GFP_KERNEL); | ||
2448 | if (!modifier) | ||
2449 | return ERR_PTR(-ENOMEM); | ||
2450 | |||
2451 | field_name = strsep(&modifier, "."); | ||
2452 | if (modifier) { | ||
2453 | if (strcmp(modifier, "hex") == 0) | ||
2454 | *flags |= HIST_FIELD_FL_HEX; | ||
2455 | else if (strcmp(modifier, "sym") == 0) | ||
2456 | *flags |= HIST_FIELD_FL_SYM; | ||
2457 | else if (strcmp(modifier, "sym-offset") == 0) | ||
2458 | *flags |= HIST_FIELD_FL_SYM_OFFSET; | ||
2459 | else if ((strcmp(modifier, "execname") == 0) && | ||
2460 | (strcmp(field_name, "common_pid") == 0)) | ||
2461 | *flags |= HIST_FIELD_FL_EXECNAME; | ||
2462 | else if (strcmp(modifier, "syscall") == 0) | ||
2463 | *flags |= HIST_FIELD_FL_SYSCALL; | ||
2464 | else if (strcmp(modifier, "log2") == 0) | ||
2465 | *flags |= HIST_FIELD_FL_LOG2; | ||
2466 | else if (strcmp(modifier, "usecs") == 0) | ||
2467 | *flags |= HIST_FIELD_FL_TIMESTAMP_USECS; | ||
2468 | else { | ||
2469 | field = ERR_PTR(-EINVAL); | ||
2470 | goto out; | ||
2471 | } | ||
2472 | } | ||
2473 | |||
2474 | if (strcmp(field_name, "common_timestamp") == 0) { | ||
2475 | *flags |= HIST_FIELD_FL_TIMESTAMP; | ||
2476 | hist_data->enable_timestamps = true; | ||
2477 | if (*flags & HIST_FIELD_FL_TIMESTAMP_USECS) | ||
2478 | hist_data->attrs->ts_in_usecs = true; | ||
2479 | } else if (strcmp(field_name, "cpu") == 0) | ||
2480 | *flags |= HIST_FIELD_FL_CPU; | ||
2481 | else { | ||
2482 | field = trace_find_event_field(file->event_call, field_name); | ||
2483 | if (!field || !field->size) { | ||
2484 | field = ERR_PTR(-EINVAL); | ||
2485 | goto out; | ||
2486 | } | ||
2487 | } | ||
2488 | out: | ||
2489 | kfree(str); | ||
2490 | |||
2491 | return field; | ||
2492 | } | ||
2493 | |||
2494 | static struct hist_field *create_alias(struct hist_trigger_data *hist_data, | ||
2495 | struct hist_field *var_ref, | ||
2496 | char *var_name) | ||
2497 | { | ||
2498 | struct hist_field *alias = NULL; | ||
2499 | unsigned long flags = HIST_FIELD_FL_ALIAS | HIST_FIELD_FL_VAR; | ||
2500 | |||
2501 | alias = create_hist_field(hist_data, NULL, flags, var_name); | ||
2502 | if (!alias) | ||
2503 | return NULL; | ||
2504 | |||
2505 | alias->fn = var_ref->fn; | ||
2506 | alias->operands[0] = var_ref; | ||
2507 | |||
2508 | if (init_var_ref(alias, var_ref, var_ref->system, var_ref->event_name)) { | ||
2509 | destroy_hist_field(alias, 0); | ||
2510 | return NULL; | ||
2511 | } | ||
2512 | |||
2513 | return alias; | ||
2514 | } | ||
2515 | |||
2516 | static struct hist_field *parse_atom(struct hist_trigger_data *hist_data, | ||
2517 | struct trace_event_file *file, char *str, | ||
2518 | unsigned long *flags, char *var_name) | ||
2519 | { | ||
2520 | char *s, *ref_system = NULL, *ref_event = NULL, *ref_var = str; | ||
2521 | struct ftrace_event_field *field = NULL; | ||
2522 | struct hist_field *hist_field = NULL; | ||
2523 | int ret = 0; | ||
2524 | |||
2525 | s = strchr(str, '.'); | ||
2526 | if (s) { | ||
2527 | s = strchr(++s, '.'); | ||
2528 | if (s) { | ||
2529 | ref_system = strsep(&str, "."); | ||
2530 | if (!str) { | ||
2531 | ret = -EINVAL; | ||
2532 | goto out; | ||
2533 | } | ||
2534 | ref_event = strsep(&str, "."); | ||
2535 | if (!str) { | ||
2536 | ret = -EINVAL; | ||
2537 | goto out; | ||
2538 | } | ||
2539 | ref_var = str; | ||
2540 | } | ||
2541 | } | ||
2542 | |||
2543 | s = local_field_var_ref(hist_data, ref_system, ref_event, ref_var); | ||
2544 | if (!s) { | ||
2545 | hist_field = parse_var_ref(hist_data, ref_system, ref_event, ref_var); | ||
2546 | if (hist_field) { | ||
2547 | hist_data->var_refs[hist_data->n_var_refs] = hist_field; | ||
2548 | hist_field->var_ref_idx = hist_data->n_var_refs++; | ||
2549 | if (var_name) { | ||
2550 | hist_field = create_alias(hist_data, hist_field, var_name); | ||
2551 | if (!hist_field) { | ||
2552 | ret = -ENOMEM; | ||
2553 | goto out; | ||
2554 | } | ||
2555 | } | ||
2556 | return hist_field; | ||
2557 | } | ||
2558 | } else | ||
2559 | str = s; | ||
2560 | |||
2561 | field = parse_field(hist_data, file, str, flags); | ||
2562 | if (IS_ERR(field)) { | ||
2563 | ret = PTR_ERR(field); | ||
2564 | goto out; | ||
2565 | } | ||
2566 | |||
2567 | hist_field = create_hist_field(hist_data, field, *flags, var_name); | ||
2568 | if (!hist_field) { | ||
2569 | ret = -ENOMEM; | ||
2570 | goto out; | ||
2571 | } | ||
2572 | |||
2573 | return hist_field; | ||
2574 | out: | ||
2575 | return ERR_PTR(ret); | ||
2576 | } | ||
2577 | |||
2578 | static struct hist_field *parse_expr(struct hist_trigger_data *hist_data, | ||
2579 | struct trace_event_file *file, | ||
2580 | char *str, unsigned long flags, | ||
2581 | char *var_name, unsigned int level); | ||
2582 | |||
2583 | static struct hist_field *parse_unary(struct hist_trigger_data *hist_data, | ||
2584 | struct trace_event_file *file, | ||
2585 | char *str, unsigned long flags, | ||
2586 | char *var_name, unsigned int level) | ||
2587 | { | ||
2588 | struct hist_field *operand1, *expr = NULL; | ||
2589 | unsigned long operand_flags; | ||
2590 | int ret = 0; | ||
2591 | char *s; | ||
2592 | |||
2593 | /* we support only -(xxx) i.e. explicit parens required */ | ||
2594 | |||
2595 | if (level > 3) { | ||
2596 | hist_err("Too many subexpressions (3 max): ", str); | ||
2597 | ret = -EINVAL; | ||
2598 | goto free; | ||
2599 | } | ||
2600 | |||
2601 | str++; /* skip leading '-' */ | ||
2602 | |||
2603 | s = strchr(str, '('); | ||
2604 | if (s) | ||
2605 | str++; | ||
2606 | else { | ||
2607 | ret = -EINVAL; | ||
2608 | goto free; | ||
2609 | } | ||
2610 | |||
2611 | s = strrchr(str, ')'); | ||
2612 | if (s) | ||
2613 | *s = '\0'; | ||
2614 | else { | ||
2615 | ret = -EINVAL; /* no closing ')' */ | ||
2616 | goto free; | ||
2617 | } | ||
2618 | |||
2619 | flags |= HIST_FIELD_FL_EXPR; | ||
2620 | expr = create_hist_field(hist_data, NULL, flags, var_name); | ||
2621 | if (!expr) { | ||
2622 | ret = -ENOMEM; | ||
2623 | goto free; | ||
2624 | } | ||
2625 | |||
2626 | operand_flags = 0; | ||
2627 | operand1 = parse_expr(hist_data, file, str, operand_flags, NULL, ++level); | ||
2628 | if (IS_ERR(operand1)) { | ||
2629 | ret = PTR_ERR(operand1); | ||
2630 | goto free; | ||
2631 | } | ||
2632 | |||
2633 | expr->flags |= operand1->flags & | ||
2634 | (HIST_FIELD_FL_TIMESTAMP | HIST_FIELD_FL_TIMESTAMP_USECS); | ||
2635 | expr->fn = hist_field_unary_minus; | ||
2636 | expr->operands[0] = operand1; | ||
2637 | expr->operator = FIELD_OP_UNARY_MINUS; | ||
2638 | expr->name = expr_str(expr, 0); | ||
2639 | expr->type = kstrdup(operand1->type, GFP_KERNEL); | ||
2640 | if (!expr->type) { | ||
2641 | ret = -ENOMEM; | ||
2642 | goto free; | ||
2643 | } | ||
2644 | |||
2645 | return expr; | ||
2646 | free: | ||
2647 | destroy_hist_field(expr, 0); | ||
2648 | return ERR_PTR(ret); | ||
2649 | } | ||
2650 | |||
2651 | static int check_expr_operands(struct hist_field *operand1, | ||
2652 | struct hist_field *operand2) | ||
2653 | { | ||
2654 | unsigned long operand1_flags = operand1->flags; | ||
2655 | unsigned long operand2_flags = operand2->flags; | ||
2656 | |||
2657 | if ((operand1_flags & HIST_FIELD_FL_VAR_REF) || | ||
2658 | (operand1_flags & HIST_FIELD_FL_ALIAS)) { | ||
2659 | struct hist_field *var; | ||
2660 | |||
2661 | var = find_var_field(operand1->var.hist_data, operand1->name); | ||
2662 | if (!var) | ||
2663 | return -EINVAL; | ||
2664 | operand1_flags = var->flags; | ||
2665 | } | ||
2666 | |||
2667 | if ((operand2_flags & HIST_FIELD_FL_VAR_REF) || | ||
2668 | (operand2_flags & HIST_FIELD_FL_ALIAS)) { | ||
2669 | struct hist_field *var; | ||
2670 | |||
2671 | var = find_var_field(operand2->var.hist_data, operand2->name); | ||
2672 | if (!var) | ||
2673 | return -EINVAL; | ||
2674 | operand2_flags = var->flags; | ||
2675 | } | ||
2676 | |||
2677 | if ((operand1_flags & HIST_FIELD_FL_TIMESTAMP_USECS) != | ||
2678 | (operand2_flags & HIST_FIELD_FL_TIMESTAMP_USECS)) { | ||
2679 | hist_err("Timestamp units in expression don't match", NULL); | ||
464 | return -EINVAL; | 2680 | return -EINVAL; |
2681 | } | ||
465 | 2682 | ||
466 | return 0; | 2683 | return 0; |
467 | } | 2684 | } |
468 | 2685 | ||
469 | static int create_val_field(struct hist_trigger_data *hist_data, | 2686 | static struct hist_field *parse_expr(struct hist_trigger_data *hist_data, |
470 | unsigned int val_idx, | 2687 | struct trace_event_file *file, |
471 | struct trace_event_file *file, | 2688 | char *str, unsigned long flags, |
472 | char *field_str) | 2689 | char *var_name, unsigned int level) |
473 | { | 2690 | { |
474 | struct ftrace_event_field *field = NULL; | 2691 | struct hist_field *operand1 = NULL, *operand2 = NULL, *expr = NULL; |
475 | unsigned long flags = 0; | 2692 | unsigned long operand_flags; |
476 | char *field_name; | 2693 | int field_op, ret = -EINVAL; |
2694 | char *sep, *operand1_str; | ||
2695 | |||
2696 | if (level > 3) { | ||
2697 | hist_err("Too many subexpressions (3 max): ", str); | ||
2698 | return ERR_PTR(-EINVAL); | ||
2699 | } | ||
2700 | |||
2701 | field_op = contains_operator(str); | ||
2702 | |||
2703 | if (field_op == FIELD_OP_NONE) | ||
2704 | return parse_atom(hist_data, file, str, &flags, var_name); | ||
2705 | |||
2706 | if (field_op == FIELD_OP_UNARY_MINUS) | ||
2707 | return parse_unary(hist_data, file, str, flags, var_name, ++level); | ||
2708 | |||
2709 | switch (field_op) { | ||
2710 | case FIELD_OP_MINUS: | ||
2711 | sep = "-"; | ||
2712 | break; | ||
2713 | case FIELD_OP_PLUS: | ||
2714 | sep = "+"; | ||
2715 | break; | ||
2716 | default: | ||
2717 | goto free; | ||
2718 | } | ||
2719 | |||
2720 | operand1_str = strsep(&str, sep); | ||
2721 | if (!operand1_str || !str) | ||
2722 | goto free; | ||
2723 | |||
2724 | operand_flags = 0; | ||
2725 | operand1 = parse_atom(hist_data, file, operand1_str, | ||
2726 | &operand_flags, NULL); | ||
2727 | if (IS_ERR(operand1)) { | ||
2728 | ret = PTR_ERR(operand1); | ||
2729 | operand1 = NULL; | ||
2730 | goto free; | ||
2731 | } | ||
2732 | |||
2733 | /* rest of string could be another expression e.g. b+c in a+b+c */ | ||
2734 | operand_flags = 0; | ||
2735 | operand2 = parse_expr(hist_data, file, str, operand_flags, NULL, ++level); | ||
2736 | if (IS_ERR(operand2)) { | ||
2737 | ret = PTR_ERR(operand2); | ||
2738 | operand2 = NULL; | ||
2739 | goto free; | ||
2740 | } | ||
2741 | |||
2742 | ret = check_expr_operands(operand1, operand2); | ||
2743 | if (ret) | ||
2744 | goto free; | ||
2745 | |||
2746 | flags |= HIST_FIELD_FL_EXPR; | ||
2747 | |||
2748 | flags |= operand1->flags & | ||
2749 | (HIST_FIELD_FL_TIMESTAMP | HIST_FIELD_FL_TIMESTAMP_USECS); | ||
2750 | |||
2751 | expr = create_hist_field(hist_data, NULL, flags, var_name); | ||
2752 | if (!expr) { | ||
2753 | ret = -ENOMEM; | ||
2754 | goto free; | ||
2755 | } | ||
2756 | |||
2757 | operand1->read_once = true; | ||
2758 | operand2->read_once = true; | ||
2759 | |||
2760 | expr->operands[0] = operand1; | ||
2761 | expr->operands[1] = operand2; | ||
2762 | expr->operator = field_op; | ||
2763 | expr->name = expr_str(expr, 0); | ||
2764 | expr->type = kstrdup(operand1->type, GFP_KERNEL); | ||
2765 | if (!expr->type) { | ||
2766 | ret = -ENOMEM; | ||
2767 | goto free; | ||
2768 | } | ||
2769 | |||
2770 | switch (field_op) { | ||
2771 | case FIELD_OP_MINUS: | ||
2772 | expr->fn = hist_field_minus; | ||
2773 | break; | ||
2774 | case FIELD_OP_PLUS: | ||
2775 | expr->fn = hist_field_plus; | ||
2776 | break; | ||
2777 | default: | ||
2778 | ret = -EINVAL; | ||
2779 | goto free; | ||
2780 | } | ||
2781 | |||
2782 | return expr; | ||
2783 | free: | ||
2784 | destroy_hist_field(operand1, 0); | ||
2785 | destroy_hist_field(operand2, 0); | ||
2786 | destroy_hist_field(expr, 0); | ||
2787 | |||
2788 | return ERR_PTR(ret); | ||
2789 | } | ||
2790 | |||
2791 | static char *find_trigger_filter(struct hist_trigger_data *hist_data, | ||
2792 | struct trace_event_file *file) | ||
2793 | { | ||
2794 | struct event_trigger_data *test; | ||
2795 | |||
2796 | list_for_each_entry_rcu(test, &file->triggers, list) { | ||
2797 | if (test->cmd_ops->trigger_type == ETT_EVENT_HIST) { | ||
2798 | if (test->private_data == hist_data) | ||
2799 | return test->filter_str; | ||
2800 | } | ||
2801 | } | ||
2802 | |||
2803 | return NULL; | ||
2804 | } | ||
2805 | |||
2806 | static struct event_command trigger_hist_cmd; | ||
2807 | static int event_hist_trigger_func(struct event_command *cmd_ops, | ||
2808 | struct trace_event_file *file, | ||
2809 | char *glob, char *cmd, char *param); | ||
2810 | |||
2811 | static bool compatible_keys(struct hist_trigger_data *target_hist_data, | ||
2812 | struct hist_trigger_data *hist_data, | ||
2813 | unsigned int n_keys) | ||
2814 | { | ||
2815 | struct hist_field *target_hist_field, *hist_field; | ||
2816 | unsigned int n, i, j; | ||
2817 | |||
2818 | if (hist_data->n_fields - hist_data->n_vals != n_keys) | ||
2819 | return false; | ||
2820 | |||
2821 | i = hist_data->n_vals; | ||
2822 | j = target_hist_data->n_vals; | ||
2823 | |||
2824 | for (n = 0; n < n_keys; n++) { | ||
2825 | hist_field = hist_data->fields[i + n]; | ||
2826 | target_hist_field = target_hist_data->fields[j + n]; | ||
2827 | |||
2828 | if (strcmp(hist_field->type, target_hist_field->type) != 0) | ||
2829 | return false; | ||
2830 | if (hist_field->size != target_hist_field->size) | ||
2831 | return false; | ||
2832 | if (hist_field->is_signed != target_hist_field->is_signed) | ||
2833 | return false; | ||
2834 | } | ||
2835 | |||
2836 | return true; | ||
2837 | } | ||
2838 | |||
2839 | static struct hist_trigger_data * | ||
2840 | find_compatible_hist(struct hist_trigger_data *target_hist_data, | ||
2841 | struct trace_event_file *file) | ||
2842 | { | ||
2843 | struct hist_trigger_data *hist_data; | ||
2844 | struct event_trigger_data *test; | ||
2845 | unsigned int n_keys; | ||
2846 | |||
2847 | n_keys = target_hist_data->n_fields - target_hist_data->n_vals; | ||
2848 | |||
2849 | list_for_each_entry_rcu(test, &file->triggers, list) { | ||
2850 | if (test->cmd_ops->trigger_type == ETT_EVENT_HIST) { | ||
2851 | hist_data = test->private_data; | ||
2852 | |||
2853 | if (compatible_keys(target_hist_data, hist_data, n_keys)) | ||
2854 | return hist_data; | ||
2855 | } | ||
2856 | } | ||
2857 | |||
2858 | return NULL; | ||
2859 | } | ||
2860 | |||
2861 | static struct trace_event_file *event_file(struct trace_array *tr, | ||
2862 | char *system, char *event_name) | ||
2863 | { | ||
2864 | struct trace_event_file *file; | ||
2865 | |||
2866 | file = find_event_file(tr, system, event_name); | ||
2867 | if (!file) | ||
2868 | return ERR_PTR(-EINVAL); | ||
2869 | |||
2870 | return file; | ||
2871 | } | ||
2872 | |||
2873 | static struct hist_field * | ||
2874 | find_synthetic_field_var(struct hist_trigger_data *target_hist_data, | ||
2875 | char *system, char *event_name, char *field_name) | ||
2876 | { | ||
2877 | struct hist_field *event_var; | ||
2878 | char *synthetic_name; | ||
2879 | |||
2880 | synthetic_name = kzalloc(MAX_FILTER_STR_VAL, GFP_KERNEL); | ||
2881 | if (!synthetic_name) | ||
2882 | return ERR_PTR(-ENOMEM); | ||
2883 | |||
2884 | strcpy(synthetic_name, "synthetic_"); | ||
2885 | strcat(synthetic_name, field_name); | ||
2886 | |||
2887 | event_var = find_event_var(target_hist_data, system, event_name, synthetic_name); | ||
2888 | |||
2889 | kfree(synthetic_name); | ||
2890 | |||
2891 | return event_var; | ||
2892 | } | ||
2893 | |||
2894 | /** | ||
2895 | * create_field_var_hist - Automatically create a histogram and var for a field | ||
2896 | * @target_hist_data: The target hist trigger | ||
2897 | * @subsys_name: Optional subsystem name | ||
2898 | * @event_name: Optional event name | ||
2899 | * @field_name: The name of the field (and the resulting variable) | ||
2900 | * | ||
2901 | * Hist trigger actions fetch data from variables, not directly from | ||
2902 | * events. However, for convenience, users are allowed to directly | ||
2903 | * specify an event field in an action, which will be automatically | ||
2904 | * converted into a variable on their behalf. | ||
2905 | |||
2906 | * If a user specifies a field on an event that isn't the event the | ||
2907 | * histogram currently being defined (the target event histogram), the | ||
2908 | * only way that can be accomplished is if a new hist trigger is | ||
2909 | * created and the field variable defined on that. | ||
2910 | * | ||
2911 | * This function creates a new histogram compatible with the target | ||
2912 | * event (meaning a histogram with the same key as the target | ||
2913 | * histogram), and creates a variable for the specified field, but | ||
2914 | * with 'synthetic_' prepended to the variable name in order to avoid | ||
2915 | * collision with normal field variables. | ||
2916 | * | ||
2917 | * Return: The variable created for the field. | ||
2918 | */ | ||
2919 | static struct hist_field * | ||
2920 | create_field_var_hist(struct hist_trigger_data *target_hist_data, | ||
2921 | char *subsys_name, char *event_name, char *field_name) | ||
2922 | { | ||
2923 | struct trace_array *tr = target_hist_data->event_file->tr; | ||
2924 | struct hist_field *event_var = ERR_PTR(-EINVAL); | ||
2925 | struct hist_trigger_data *hist_data; | ||
2926 | unsigned int i, n, first = true; | ||
2927 | struct field_var_hist *var_hist; | ||
2928 | struct trace_event_file *file; | ||
2929 | struct hist_field *key_field; | ||
2930 | char *saved_filter; | ||
2931 | char *cmd; | ||
2932 | int ret; | ||
2933 | |||
2934 | if (target_hist_data->n_field_var_hists >= SYNTH_FIELDS_MAX) { | ||
2935 | hist_err_event("onmatch: Too many field variables defined: ", | ||
2936 | subsys_name, event_name, field_name); | ||
2937 | return ERR_PTR(-EINVAL); | ||
2938 | } | ||
2939 | |||
2940 | file = event_file(tr, subsys_name, event_name); | ||
2941 | |||
2942 | if (IS_ERR(file)) { | ||
2943 | hist_err_event("onmatch: Event file not found: ", | ||
2944 | subsys_name, event_name, field_name); | ||
2945 | ret = PTR_ERR(file); | ||
2946 | return ERR_PTR(ret); | ||
2947 | } | ||
2948 | |||
2949 | /* | ||
2950 | * Look for a histogram compatible with target. We'll use the | ||
2951 | * found histogram specification to create a new matching | ||
2952 | * histogram with our variable on it. target_hist_data is not | ||
2953 | * yet a registered histogram so we can't use that. | ||
2954 | */ | ||
2955 | hist_data = find_compatible_hist(target_hist_data, file); | ||
2956 | if (!hist_data) { | ||
2957 | hist_err_event("onmatch: Matching event histogram not found: ", | ||
2958 | subsys_name, event_name, field_name); | ||
2959 | return ERR_PTR(-EINVAL); | ||
2960 | } | ||
2961 | |||
2962 | /* See if a synthetic field variable has already been created */ | ||
2963 | event_var = find_synthetic_field_var(target_hist_data, subsys_name, | ||
2964 | event_name, field_name); | ||
2965 | if (!IS_ERR_OR_NULL(event_var)) | ||
2966 | return event_var; | ||
2967 | |||
2968 | var_hist = kzalloc(sizeof(*var_hist), GFP_KERNEL); | ||
2969 | if (!var_hist) | ||
2970 | return ERR_PTR(-ENOMEM); | ||
2971 | |||
2972 | cmd = kzalloc(MAX_FILTER_STR_VAL, GFP_KERNEL); | ||
2973 | if (!cmd) { | ||
2974 | kfree(var_hist); | ||
2975 | return ERR_PTR(-ENOMEM); | ||
2976 | } | ||
2977 | |||
2978 | /* Use the same keys as the compatible histogram */ | ||
2979 | strcat(cmd, "keys="); | ||
2980 | |||
2981 | for_each_hist_key_field(i, hist_data) { | ||
2982 | key_field = hist_data->fields[i]; | ||
2983 | if (!first) | ||
2984 | strcat(cmd, ","); | ||
2985 | strcat(cmd, key_field->field->name); | ||
2986 | first = false; | ||
2987 | } | ||
2988 | |||
2989 | /* Create the synthetic field variable specification */ | ||
2990 | strcat(cmd, ":synthetic_"); | ||
2991 | strcat(cmd, field_name); | ||
2992 | strcat(cmd, "="); | ||
2993 | strcat(cmd, field_name); | ||
2994 | |||
2995 | /* Use the same filter as the compatible histogram */ | ||
2996 | saved_filter = find_trigger_filter(hist_data, file); | ||
2997 | if (saved_filter) { | ||
2998 | strcat(cmd, " if "); | ||
2999 | strcat(cmd, saved_filter); | ||
3000 | } | ||
3001 | |||
3002 | var_hist->cmd = kstrdup(cmd, GFP_KERNEL); | ||
3003 | if (!var_hist->cmd) { | ||
3004 | kfree(cmd); | ||
3005 | kfree(var_hist); | ||
3006 | return ERR_PTR(-ENOMEM); | ||
3007 | } | ||
3008 | |||
3009 | /* Save the compatible histogram information */ | ||
3010 | var_hist->hist_data = hist_data; | ||
3011 | |||
3012 | /* Create the new histogram with our variable */ | ||
3013 | ret = event_hist_trigger_func(&trigger_hist_cmd, file, | ||
3014 | "", "hist", cmd); | ||
3015 | if (ret) { | ||
3016 | kfree(cmd); | ||
3017 | kfree(var_hist->cmd); | ||
3018 | kfree(var_hist); | ||
3019 | hist_err_event("onmatch: Couldn't create histogram for field: ", | ||
3020 | subsys_name, event_name, field_name); | ||
3021 | return ERR_PTR(ret); | ||
3022 | } | ||
3023 | |||
3024 | kfree(cmd); | ||
3025 | |||
3026 | /* If we can't find the variable, something went wrong */ | ||
3027 | event_var = find_synthetic_field_var(target_hist_data, subsys_name, | ||
3028 | event_name, field_name); | ||
3029 | if (IS_ERR_OR_NULL(event_var)) { | ||
3030 | kfree(var_hist->cmd); | ||
3031 | kfree(var_hist); | ||
3032 | hist_err_event("onmatch: Couldn't find synthetic variable: ", | ||
3033 | subsys_name, event_name, field_name); | ||
3034 | return ERR_PTR(-EINVAL); | ||
3035 | } | ||
3036 | |||
3037 | n = target_hist_data->n_field_var_hists; | ||
3038 | target_hist_data->field_var_hists[n] = var_hist; | ||
3039 | target_hist_data->n_field_var_hists++; | ||
3040 | |||
3041 | return event_var; | ||
3042 | } | ||
3043 | |||
3044 | static struct hist_field * | ||
3045 | find_target_event_var(struct hist_trigger_data *hist_data, | ||
3046 | char *subsys_name, char *event_name, char *var_name) | ||
3047 | { | ||
3048 | struct trace_event_file *file = hist_data->event_file; | ||
3049 | struct hist_field *hist_field = NULL; | ||
3050 | |||
3051 | if (subsys_name) { | ||
3052 | struct trace_event_call *call; | ||
3053 | |||
3054 | if (!event_name) | ||
3055 | return NULL; | ||
3056 | |||
3057 | call = file->event_call; | ||
3058 | |||
3059 | if (strcmp(subsys_name, call->class->system) != 0) | ||
3060 | return NULL; | ||
3061 | |||
3062 | if (strcmp(event_name, trace_event_name(call)) != 0) | ||
3063 | return NULL; | ||
3064 | } | ||
3065 | |||
3066 | hist_field = find_var_field(hist_data, var_name); | ||
3067 | |||
3068 | return hist_field; | ||
3069 | } | ||
3070 | |||
3071 | static inline void __update_field_vars(struct tracing_map_elt *elt, | ||
3072 | struct ring_buffer_event *rbe, | ||
3073 | void *rec, | ||
3074 | struct field_var **field_vars, | ||
3075 | unsigned int n_field_vars, | ||
3076 | unsigned int field_var_str_start) | ||
3077 | { | ||
3078 | struct hist_elt_data *elt_data = elt->private_data; | ||
3079 | unsigned int i, j, var_idx; | ||
3080 | u64 var_val; | ||
3081 | |||
3082 | for (i = 0, j = field_var_str_start; i < n_field_vars; i++) { | ||
3083 | struct field_var *field_var = field_vars[i]; | ||
3084 | struct hist_field *var = field_var->var; | ||
3085 | struct hist_field *val = field_var->val; | ||
3086 | |||
3087 | var_val = val->fn(val, elt, rbe, rec); | ||
3088 | var_idx = var->var.idx; | ||
3089 | |||
3090 | if (val->flags & HIST_FIELD_FL_STRING) { | ||
3091 | char *str = elt_data->field_var_str[j++]; | ||
3092 | char *val_str = (char *)(uintptr_t)var_val; | ||
3093 | |||
3094 | strscpy(str, val_str, STR_VAR_LEN_MAX); | ||
3095 | var_val = (u64)(uintptr_t)str; | ||
3096 | } | ||
3097 | tracing_map_set_var(elt, var_idx, var_val); | ||
3098 | } | ||
3099 | } | ||
3100 | |||
3101 | static void update_field_vars(struct hist_trigger_data *hist_data, | ||
3102 | struct tracing_map_elt *elt, | ||
3103 | struct ring_buffer_event *rbe, | ||
3104 | void *rec) | ||
3105 | { | ||
3106 | __update_field_vars(elt, rbe, rec, hist_data->field_vars, | ||
3107 | hist_data->n_field_vars, 0); | ||
3108 | } | ||
3109 | |||
3110 | static void update_max_vars(struct hist_trigger_data *hist_data, | ||
3111 | struct tracing_map_elt *elt, | ||
3112 | struct ring_buffer_event *rbe, | ||
3113 | void *rec) | ||
3114 | { | ||
3115 | __update_field_vars(elt, rbe, rec, hist_data->max_vars, | ||
3116 | hist_data->n_max_vars, hist_data->n_field_var_str); | ||
3117 | } | ||
3118 | |||
3119 | static struct hist_field *create_var(struct hist_trigger_data *hist_data, | ||
3120 | struct trace_event_file *file, | ||
3121 | char *name, int size, const char *type) | ||
3122 | { | ||
3123 | struct hist_field *var; | ||
3124 | int idx; | ||
3125 | |||
3126 | if (find_var(hist_data, file, name) && !hist_data->remove) { | ||
3127 | var = ERR_PTR(-EINVAL); | ||
3128 | goto out; | ||
3129 | } | ||
3130 | |||
3131 | var = kzalloc(sizeof(struct hist_field), GFP_KERNEL); | ||
3132 | if (!var) { | ||
3133 | var = ERR_PTR(-ENOMEM); | ||
3134 | goto out; | ||
3135 | } | ||
3136 | |||
3137 | idx = tracing_map_add_var(hist_data->map); | ||
3138 | if (idx < 0) { | ||
3139 | kfree(var); | ||
3140 | var = ERR_PTR(-EINVAL); | ||
3141 | goto out; | ||
3142 | } | ||
3143 | |||
3144 | var->flags = HIST_FIELD_FL_VAR; | ||
3145 | var->var.idx = idx; | ||
3146 | var->var.hist_data = var->hist_data = hist_data; | ||
3147 | var->size = size; | ||
3148 | var->var.name = kstrdup(name, GFP_KERNEL); | ||
3149 | var->type = kstrdup(type, GFP_KERNEL); | ||
3150 | if (!var->var.name || !var->type) { | ||
3151 | kfree(var->var.name); | ||
3152 | kfree(var->type); | ||
3153 | kfree(var); | ||
3154 | var = ERR_PTR(-ENOMEM); | ||
3155 | } | ||
3156 | out: | ||
3157 | return var; | ||
3158 | } | ||
3159 | |||
3160 | static struct field_var *create_field_var(struct hist_trigger_data *hist_data, | ||
3161 | struct trace_event_file *file, | ||
3162 | char *field_name) | ||
3163 | { | ||
3164 | struct hist_field *val = NULL, *var = NULL; | ||
3165 | unsigned long flags = HIST_FIELD_FL_VAR; | ||
3166 | struct field_var *field_var; | ||
477 | int ret = 0; | 3167 | int ret = 0; |
478 | 3168 | ||
479 | if (WARN_ON(val_idx >= TRACING_MAP_VALS_MAX)) | 3169 | if (hist_data->n_field_vars >= SYNTH_FIELDS_MAX) { |
3170 | hist_err("Too many field variables defined: ", field_name); | ||
3171 | ret = -EINVAL; | ||
3172 | goto err; | ||
3173 | } | ||
3174 | |||
3175 | val = parse_atom(hist_data, file, field_name, &flags, NULL); | ||
3176 | if (IS_ERR(val)) { | ||
3177 | hist_err("Couldn't parse field variable: ", field_name); | ||
3178 | ret = PTR_ERR(val); | ||
3179 | goto err; | ||
3180 | } | ||
3181 | |||
3182 | var = create_var(hist_data, file, field_name, val->size, val->type); | ||
3183 | if (IS_ERR(var)) { | ||
3184 | hist_err("Couldn't create or find variable: ", field_name); | ||
3185 | kfree(val); | ||
3186 | ret = PTR_ERR(var); | ||
3187 | goto err; | ||
3188 | } | ||
3189 | |||
3190 | field_var = kzalloc(sizeof(struct field_var), GFP_KERNEL); | ||
3191 | if (!field_var) { | ||
3192 | kfree(val); | ||
3193 | kfree(var); | ||
3194 | ret = -ENOMEM; | ||
3195 | goto err; | ||
3196 | } | ||
3197 | |||
3198 | field_var->var = var; | ||
3199 | field_var->val = val; | ||
3200 | out: | ||
3201 | return field_var; | ||
3202 | err: | ||
3203 | field_var = ERR_PTR(ret); | ||
3204 | goto out; | ||
3205 | } | ||
3206 | |||
3207 | /** | ||
3208 | * create_target_field_var - Automatically create a variable for a field | ||
3209 | * @target_hist_data: The target hist trigger | ||
3210 | * @subsys_name: Optional subsystem name | ||
3211 | * @event_name: Optional event name | ||
3212 | * @var_name: The name of the field (and the resulting variable) | ||
3213 | * | ||
3214 | * Hist trigger actions fetch data from variables, not directly from | ||
3215 | * events. However, for convenience, users are allowed to directly | ||
3216 | * specify an event field in an action, which will be automatically | ||
3217 | * converted into a variable on their behalf. | ||
3218 | |||
3219 | * This function creates a field variable with the name var_name on | ||
3220 | * the hist trigger currently being defined on the target event. If | ||
3221 | * subsys_name and event_name are specified, this function simply | ||
3222 | * verifies that they do in fact match the target event subsystem and | ||
3223 | * event name. | ||
3224 | * | ||
3225 | * Return: The variable created for the field. | ||
3226 | */ | ||
3227 | static struct field_var * | ||
3228 | create_target_field_var(struct hist_trigger_data *target_hist_data, | ||
3229 | char *subsys_name, char *event_name, char *var_name) | ||
3230 | { | ||
3231 | struct trace_event_file *file = target_hist_data->event_file; | ||
3232 | |||
3233 | if (subsys_name) { | ||
3234 | struct trace_event_call *call; | ||
3235 | |||
3236 | if (!event_name) | ||
3237 | return NULL; | ||
3238 | |||
3239 | call = file->event_call; | ||
3240 | |||
3241 | if (strcmp(subsys_name, call->class->system) != 0) | ||
3242 | return NULL; | ||
3243 | |||
3244 | if (strcmp(event_name, trace_event_name(call)) != 0) | ||
3245 | return NULL; | ||
3246 | } | ||
3247 | |||
3248 | return create_field_var(target_hist_data, file, var_name); | ||
3249 | } | ||
3250 | |||
3251 | static void onmax_print(struct seq_file *m, | ||
3252 | struct hist_trigger_data *hist_data, | ||
3253 | struct tracing_map_elt *elt, | ||
3254 | struct action_data *data) | ||
3255 | { | ||
3256 | unsigned int i, save_var_idx, max_idx = data->onmax.max_var->var.idx; | ||
3257 | |||
3258 | seq_printf(m, "\n\tmax: %10llu", tracing_map_read_var(elt, max_idx)); | ||
3259 | |||
3260 | for (i = 0; i < hist_data->n_max_vars; i++) { | ||
3261 | struct hist_field *save_val = hist_data->max_vars[i]->val; | ||
3262 | struct hist_field *save_var = hist_data->max_vars[i]->var; | ||
3263 | u64 val; | ||
3264 | |||
3265 | save_var_idx = save_var->var.idx; | ||
3266 | |||
3267 | val = tracing_map_read_var(elt, save_var_idx); | ||
3268 | |||
3269 | if (save_val->flags & HIST_FIELD_FL_STRING) { | ||
3270 | seq_printf(m, " %s: %-32s", save_var->var.name, | ||
3271 | (char *)(uintptr_t)(val)); | ||
3272 | } else | ||
3273 | seq_printf(m, " %s: %10llu", save_var->var.name, val); | ||
3274 | } | ||
3275 | } | ||
3276 | |||
3277 | static void onmax_save(struct hist_trigger_data *hist_data, | ||
3278 | struct tracing_map_elt *elt, void *rec, | ||
3279 | struct ring_buffer_event *rbe, | ||
3280 | struct action_data *data, u64 *var_ref_vals) | ||
3281 | { | ||
3282 | unsigned int max_idx = data->onmax.max_var->var.idx; | ||
3283 | unsigned int max_var_ref_idx = data->onmax.max_var_ref_idx; | ||
3284 | |||
3285 | u64 var_val, max_val; | ||
3286 | |||
3287 | var_val = var_ref_vals[max_var_ref_idx]; | ||
3288 | max_val = tracing_map_read_var(elt, max_idx); | ||
3289 | |||
3290 | if (var_val <= max_val) | ||
3291 | return; | ||
3292 | |||
3293 | tracing_map_set_var(elt, max_idx, var_val); | ||
3294 | |||
3295 | update_max_vars(hist_data, elt, rbe, rec); | ||
3296 | } | ||
3297 | |||
3298 | static void onmax_destroy(struct action_data *data) | ||
3299 | { | ||
3300 | unsigned int i; | ||
3301 | |||
3302 | destroy_hist_field(data->onmax.max_var, 0); | ||
3303 | destroy_hist_field(data->onmax.var, 0); | ||
3304 | |||
3305 | kfree(data->onmax.var_str); | ||
3306 | kfree(data->onmax.fn_name); | ||
3307 | |||
3308 | for (i = 0; i < data->n_params; i++) | ||
3309 | kfree(data->params[i]); | ||
3310 | |||
3311 | kfree(data); | ||
3312 | } | ||
3313 | |||
3314 | static int onmax_create(struct hist_trigger_data *hist_data, | ||
3315 | struct action_data *data) | ||
3316 | { | ||
3317 | struct trace_event_file *file = hist_data->event_file; | ||
3318 | struct hist_field *var_field, *ref_field, *max_var; | ||
3319 | unsigned int var_ref_idx = hist_data->n_var_refs; | ||
3320 | struct field_var *field_var; | ||
3321 | char *onmax_var_str, *param; | ||
3322 | unsigned long flags; | ||
3323 | unsigned int i; | ||
3324 | int ret = 0; | ||
3325 | |||
3326 | onmax_var_str = data->onmax.var_str; | ||
3327 | if (onmax_var_str[0] != '$') { | ||
3328 | hist_err("onmax: For onmax(x), x must be a variable: ", onmax_var_str); | ||
480 | return -EINVAL; | 3329 | return -EINVAL; |
3330 | } | ||
3331 | onmax_var_str++; | ||
481 | 3332 | ||
482 | field_name = strsep(&field_str, "."); | 3333 | var_field = find_target_event_var(hist_data, NULL, NULL, onmax_var_str); |
483 | if (field_str) { | 3334 | if (!var_field) { |
484 | if (strcmp(field_str, "hex") == 0) | 3335 | hist_err("onmax: Couldn't find onmax variable: ", onmax_var_str); |
485 | flags |= HIST_FIELD_FL_HEX; | 3336 | return -EINVAL; |
486 | else { | 3337 | } |
3338 | |||
3339 | flags = HIST_FIELD_FL_VAR_REF; | ||
3340 | ref_field = create_hist_field(hist_data, NULL, flags, NULL); | ||
3341 | if (!ref_field) | ||
3342 | return -ENOMEM; | ||
3343 | |||
3344 | if (init_var_ref(ref_field, var_field, NULL, NULL)) { | ||
3345 | destroy_hist_field(ref_field, 0); | ||
3346 | ret = -ENOMEM; | ||
3347 | goto out; | ||
3348 | } | ||
3349 | hist_data->var_refs[hist_data->n_var_refs] = ref_field; | ||
3350 | ref_field->var_ref_idx = hist_data->n_var_refs++; | ||
3351 | data->onmax.var = ref_field; | ||
3352 | |||
3353 | data->fn = onmax_save; | ||
3354 | data->onmax.max_var_ref_idx = var_ref_idx; | ||
3355 | max_var = create_var(hist_data, file, "max", sizeof(u64), "u64"); | ||
3356 | if (IS_ERR(max_var)) { | ||
3357 | hist_err("onmax: Couldn't create onmax variable: ", "max"); | ||
3358 | ret = PTR_ERR(max_var); | ||
3359 | goto out; | ||
3360 | } | ||
3361 | data->onmax.max_var = max_var; | ||
3362 | |||
3363 | for (i = 0; i < data->n_params; i++) { | ||
3364 | param = kstrdup(data->params[i], GFP_KERNEL); | ||
3365 | if (!param) { | ||
3366 | ret = -ENOMEM; | ||
3367 | goto out; | ||
3368 | } | ||
3369 | |||
3370 | field_var = create_target_field_var(hist_data, NULL, NULL, param); | ||
3371 | if (IS_ERR(field_var)) { | ||
3372 | hist_err("onmax: Couldn't create field variable: ", param); | ||
3373 | ret = PTR_ERR(field_var); | ||
3374 | kfree(param); | ||
3375 | goto out; | ||
3376 | } | ||
3377 | |||
3378 | hist_data->max_vars[hist_data->n_max_vars++] = field_var; | ||
3379 | if (field_var->val->flags & HIST_FIELD_FL_STRING) | ||
3380 | hist_data->n_max_var_str++; | ||
3381 | |||
3382 | kfree(param); | ||
3383 | } | ||
3384 | out: | ||
3385 | return ret; | ||
3386 | } | ||
3387 | |||
3388 | static int parse_action_params(char *params, struct action_data *data) | ||
3389 | { | ||
3390 | char *param, *saved_param; | ||
3391 | int ret = 0; | ||
3392 | |||
3393 | while (params) { | ||
3394 | if (data->n_params >= SYNTH_FIELDS_MAX) | ||
3395 | goto out; | ||
3396 | |||
3397 | param = strsep(¶ms, ","); | ||
3398 | if (!param) { | ||
487 | ret = -EINVAL; | 3399 | ret = -EINVAL; |
488 | goto out; | 3400 | goto out; |
489 | } | 3401 | } |
3402 | |||
3403 | param = strstrip(param); | ||
3404 | if (strlen(param) < 2) { | ||
3405 | hist_err("Invalid action param: ", param); | ||
3406 | ret = -EINVAL; | ||
3407 | goto out; | ||
3408 | } | ||
3409 | |||
3410 | saved_param = kstrdup(param, GFP_KERNEL); | ||
3411 | if (!saved_param) { | ||
3412 | ret = -ENOMEM; | ||
3413 | goto out; | ||
3414 | } | ||
3415 | |||
3416 | data->params[data->n_params++] = saved_param; | ||
490 | } | 3417 | } |
3418 | out: | ||
3419 | return ret; | ||
3420 | } | ||
491 | 3421 | ||
492 | field = trace_find_event_field(file->event_call, field_name); | 3422 | static struct action_data *onmax_parse(char *str) |
493 | if (!field || !field->size) { | 3423 | { |
3424 | char *onmax_fn_name, *onmax_var_str; | ||
3425 | struct action_data *data; | ||
3426 | int ret = -EINVAL; | ||
3427 | |||
3428 | data = kzalloc(sizeof(*data), GFP_KERNEL); | ||
3429 | if (!data) | ||
3430 | return ERR_PTR(-ENOMEM); | ||
3431 | |||
3432 | onmax_var_str = strsep(&str, ")"); | ||
3433 | if (!onmax_var_str || !str) { | ||
494 | ret = -EINVAL; | 3434 | ret = -EINVAL; |
495 | goto out; | 3435 | goto free; |
3436 | } | ||
3437 | |||
3438 | data->onmax.var_str = kstrdup(onmax_var_str, GFP_KERNEL); | ||
3439 | if (!data->onmax.var_str) { | ||
3440 | ret = -ENOMEM; | ||
3441 | goto free; | ||
3442 | } | ||
3443 | |||
3444 | strsep(&str, "."); | ||
3445 | if (!str) | ||
3446 | goto free; | ||
3447 | |||
3448 | onmax_fn_name = strsep(&str, "("); | ||
3449 | if (!onmax_fn_name || !str) | ||
3450 | goto free; | ||
3451 | |||
3452 | if (strncmp(onmax_fn_name, "save", strlen("save")) == 0) { | ||
3453 | char *params = strsep(&str, ")"); | ||
3454 | |||
3455 | if (!params) { | ||
3456 | ret = -EINVAL; | ||
3457 | goto free; | ||
3458 | } | ||
3459 | |||
3460 | ret = parse_action_params(params, data); | ||
3461 | if (ret) | ||
3462 | goto free; | ||
3463 | } else | ||
3464 | goto free; | ||
3465 | |||
3466 | data->onmax.fn_name = kstrdup(onmax_fn_name, GFP_KERNEL); | ||
3467 | if (!data->onmax.fn_name) { | ||
3468 | ret = -ENOMEM; | ||
3469 | goto free; | ||
3470 | } | ||
3471 | out: | ||
3472 | return data; | ||
3473 | free: | ||
3474 | onmax_destroy(data); | ||
3475 | data = ERR_PTR(ret); | ||
3476 | goto out; | ||
3477 | } | ||
3478 | |||
3479 | static void onmatch_destroy(struct action_data *data) | ||
3480 | { | ||
3481 | unsigned int i; | ||
3482 | |||
3483 | mutex_lock(&synth_event_mutex); | ||
3484 | |||
3485 | kfree(data->onmatch.match_event); | ||
3486 | kfree(data->onmatch.match_event_system); | ||
3487 | kfree(data->onmatch.synth_event_name); | ||
3488 | |||
3489 | for (i = 0; i < data->n_params; i++) | ||
3490 | kfree(data->params[i]); | ||
3491 | |||
3492 | if (data->onmatch.synth_event) | ||
3493 | data->onmatch.synth_event->ref--; | ||
3494 | |||
3495 | kfree(data); | ||
3496 | |||
3497 | mutex_unlock(&synth_event_mutex); | ||
3498 | } | ||
3499 | |||
3500 | static void destroy_field_var(struct field_var *field_var) | ||
3501 | { | ||
3502 | if (!field_var) | ||
3503 | return; | ||
3504 | |||
3505 | destroy_hist_field(field_var->var, 0); | ||
3506 | destroy_hist_field(field_var->val, 0); | ||
3507 | |||
3508 | kfree(field_var); | ||
3509 | } | ||
3510 | |||
3511 | static void destroy_field_vars(struct hist_trigger_data *hist_data) | ||
3512 | { | ||
3513 | unsigned int i; | ||
3514 | |||
3515 | for (i = 0; i < hist_data->n_field_vars; i++) | ||
3516 | destroy_field_var(hist_data->field_vars[i]); | ||
3517 | } | ||
3518 | |||
3519 | static void save_field_var(struct hist_trigger_data *hist_data, | ||
3520 | struct field_var *field_var) | ||
3521 | { | ||
3522 | hist_data->field_vars[hist_data->n_field_vars++] = field_var; | ||
3523 | |||
3524 | if (field_var->val->flags & HIST_FIELD_FL_STRING) | ||
3525 | hist_data->n_field_var_str++; | ||
3526 | } | ||
3527 | |||
3528 | |||
3529 | static void destroy_synth_var_refs(struct hist_trigger_data *hist_data) | ||
3530 | { | ||
3531 | unsigned int i; | ||
3532 | |||
3533 | for (i = 0; i < hist_data->n_synth_var_refs; i++) | ||
3534 | destroy_hist_field(hist_data->synth_var_refs[i], 0); | ||
3535 | } | ||
3536 | |||
3537 | static void save_synth_var_ref(struct hist_trigger_data *hist_data, | ||
3538 | struct hist_field *var_ref) | ||
3539 | { | ||
3540 | hist_data->synth_var_refs[hist_data->n_synth_var_refs++] = var_ref; | ||
3541 | |||
3542 | hist_data->var_refs[hist_data->n_var_refs] = var_ref; | ||
3543 | var_ref->var_ref_idx = hist_data->n_var_refs++; | ||
3544 | } | ||
3545 | |||
3546 | static int check_synth_field(struct synth_event *event, | ||
3547 | struct hist_field *hist_field, | ||
3548 | unsigned int field_pos) | ||
3549 | { | ||
3550 | struct synth_field *field; | ||
3551 | |||
3552 | if (field_pos >= event->n_fields) | ||
3553 | return -EINVAL; | ||
3554 | |||
3555 | field = event->fields[field_pos]; | ||
3556 | |||
3557 | if (strcmp(field->type, hist_field->type) != 0) | ||
3558 | return -EINVAL; | ||
3559 | |||
3560 | return 0; | ||
3561 | } | ||
3562 | |||
3563 | static struct hist_field * | ||
3564 | onmatch_find_var(struct hist_trigger_data *hist_data, struct action_data *data, | ||
3565 | char *system, char *event, char *var) | ||
3566 | { | ||
3567 | struct hist_field *hist_field; | ||
3568 | |||
3569 | var++; /* skip '$' */ | ||
3570 | |||
3571 | hist_field = find_target_event_var(hist_data, system, event, var); | ||
3572 | if (!hist_field) { | ||
3573 | if (!system) { | ||
3574 | system = data->onmatch.match_event_system; | ||
3575 | event = data->onmatch.match_event; | ||
3576 | } | ||
3577 | |||
3578 | hist_field = find_event_var(hist_data, system, event, var); | ||
3579 | } | ||
3580 | |||
3581 | if (!hist_field) | ||
3582 | hist_err_event("onmatch: Couldn't find onmatch param: $", system, event, var); | ||
3583 | |||
3584 | return hist_field; | ||
3585 | } | ||
3586 | |||
3587 | static struct hist_field * | ||
3588 | onmatch_create_field_var(struct hist_trigger_data *hist_data, | ||
3589 | struct action_data *data, char *system, | ||
3590 | char *event, char *var) | ||
3591 | { | ||
3592 | struct hist_field *hist_field = NULL; | ||
3593 | struct field_var *field_var; | ||
3594 | |||
3595 | /* | ||
3596 | * First try to create a field var on the target event (the | ||
3597 | * currently being defined). This will create a variable for | ||
3598 | * unqualified fields on the target event, or if qualified, | ||
3599 | * target fields that have qualified names matching the target. | ||
3600 | */ | ||
3601 | field_var = create_target_field_var(hist_data, system, event, var); | ||
3602 | |||
3603 | if (field_var && !IS_ERR(field_var)) { | ||
3604 | save_field_var(hist_data, field_var); | ||
3605 | hist_field = field_var->var; | ||
3606 | } else { | ||
3607 | field_var = NULL; | ||
3608 | /* | ||
3609 | * If no explicit system.event is specfied, default to | ||
3610 | * looking for fields on the onmatch(system.event.xxx) | ||
3611 | * event. | ||
3612 | */ | ||
3613 | if (!system) { | ||
3614 | system = data->onmatch.match_event_system; | ||
3615 | event = data->onmatch.match_event; | ||
3616 | } | ||
3617 | |||
3618 | /* | ||
3619 | * At this point, we're looking at a field on another | ||
3620 | * event. Because we can't modify a hist trigger on | ||
3621 | * another event to add a variable for a field, we need | ||
3622 | * to create a new trigger on that event and create the | ||
3623 | * variable at the same time. | ||
3624 | */ | ||
3625 | hist_field = create_field_var_hist(hist_data, system, event, var); | ||
3626 | if (IS_ERR(hist_field)) | ||
3627 | goto free; | ||
3628 | } | ||
3629 | out: | ||
3630 | return hist_field; | ||
3631 | free: | ||
3632 | destroy_field_var(field_var); | ||
3633 | hist_field = NULL; | ||
3634 | goto out; | ||
3635 | } | ||
3636 | |||
3637 | static int onmatch_create(struct hist_trigger_data *hist_data, | ||
3638 | struct trace_event_file *file, | ||
3639 | struct action_data *data) | ||
3640 | { | ||
3641 | char *event_name, *param, *system = NULL; | ||
3642 | struct hist_field *hist_field, *var_ref; | ||
3643 | unsigned int i, var_ref_idx; | ||
3644 | unsigned int field_pos = 0; | ||
3645 | struct synth_event *event; | ||
3646 | int ret = 0; | ||
3647 | |||
3648 | mutex_lock(&synth_event_mutex); | ||
3649 | event = find_synth_event(data->onmatch.synth_event_name); | ||
3650 | if (!event) { | ||
3651 | hist_err("onmatch: Couldn't find synthetic event: ", data->onmatch.synth_event_name); | ||
3652 | mutex_unlock(&synth_event_mutex); | ||
3653 | return -EINVAL; | ||
3654 | } | ||
3655 | event->ref++; | ||
3656 | mutex_unlock(&synth_event_mutex); | ||
3657 | |||
3658 | var_ref_idx = hist_data->n_var_refs; | ||
3659 | |||
3660 | for (i = 0; i < data->n_params; i++) { | ||
3661 | char *p; | ||
3662 | |||
3663 | p = param = kstrdup(data->params[i], GFP_KERNEL); | ||
3664 | if (!param) { | ||
3665 | ret = -ENOMEM; | ||
3666 | goto err; | ||
3667 | } | ||
3668 | |||
3669 | system = strsep(¶m, "."); | ||
3670 | if (!param) { | ||
3671 | param = (char *)system; | ||
3672 | system = event_name = NULL; | ||
3673 | } else { | ||
3674 | event_name = strsep(¶m, "."); | ||
3675 | if (!param) { | ||
3676 | kfree(p); | ||
3677 | ret = -EINVAL; | ||
3678 | goto err; | ||
3679 | } | ||
3680 | } | ||
3681 | |||
3682 | if (param[0] == '$') | ||
3683 | hist_field = onmatch_find_var(hist_data, data, system, | ||
3684 | event_name, param); | ||
3685 | else | ||
3686 | hist_field = onmatch_create_field_var(hist_data, data, | ||
3687 | system, | ||
3688 | event_name, | ||
3689 | param); | ||
3690 | |||
3691 | if (!hist_field) { | ||
3692 | kfree(p); | ||
3693 | ret = -EINVAL; | ||
3694 | goto err; | ||
3695 | } | ||
3696 | |||
3697 | if (check_synth_field(event, hist_field, field_pos) == 0) { | ||
3698 | var_ref = create_var_ref(hist_field, system, event_name); | ||
3699 | if (!var_ref) { | ||
3700 | kfree(p); | ||
3701 | ret = -ENOMEM; | ||
3702 | goto err; | ||
3703 | } | ||
3704 | |||
3705 | save_synth_var_ref(hist_data, var_ref); | ||
3706 | field_pos++; | ||
3707 | kfree(p); | ||
3708 | continue; | ||
3709 | } | ||
3710 | |||
3711 | hist_err_event("onmatch: Param type doesn't match synthetic event field type: ", | ||
3712 | system, event_name, param); | ||
3713 | kfree(p); | ||
3714 | ret = -EINVAL; | ||
3715 | goto err; | ||
3716 | } | ||
3717 | |||
3718 | if (field_pos != event->n_fields) { | ||
3719 | hist_err("onmatch: Param count doesn't match synthetic event field count: ", event->name); | ||
3720 | ret = -EINVAL; | ||
3721 | goto err; | ||
3722 | } | ||
3723 | |||
3724 | data->fn = action_trace; | ||
3725 | data->onmatch.synth_event = event; | ||
3726 | data->onmatch.var_ref_idx = var_ref_idx; | ||
3727 | out: | ||
3728 | return ret; | ||
3729 | err: | ||
3730 | mutex_lock(&synth_event_mutex); | ||
3731 | event->ref--; | ||
3732 | mutex_unlock(&synth_event_mutex); | ||
3733 | |||
3734 | goto out; | ||
3735 | } | ||
3736 | |||
3737 | static struct action_data *onmatch_parse(struct trace_array *tr, char *str) | ||
3738 | { | ||
3739 | char *match_event, *match_event_system; | ||
3740 | char *synth_event_name, *params; | ||
3741 | struct action_data *data; | ||
3742 | int ret = -EINVAL; | ||
3743 | |||
3744 | data = kzalloc(sizeof(*data), GFP_KERNEL); | ||
3745 | if (!data) | ||
3746 | return ERR_PTR(-ENOMEM); | ||
3747 | |||
3748 | match_event = strsep(&str, ")"); | ||
3749 | if (!match_event || !str) { | ||
3750 | hist_err("onmatch: Missing closing paren: ", match_event); | ||
3751 | goto free; | ||
3752 | } | ||
3753 | |||
3754 | match_event_system = strsep(&match_event, "."); | ||
3755 | if (!match_event) { | ||
3756 | hist_err("onmatch: Missing subsystem for match event: ", match_event_system); | ||
3757 | goto free; | ||
3758 | } | ||
3759 | |||
3760 | if (IS_ERR(event_file(tr, match_event_system, match_event))) { | ||
3761 | hist_err_event("onmatch: Invalid subsystem or event name: ", | ||
3762 | match_event_system, match_event, NULL); | ||
3763 | goto free; | ||
3764 | } | ||
3765 | |||
3766 | data->onmatch.match_event = kstrdup(match_event, GFP_KERNEL); | ||
3767 | if (!data->onmatch.match_event) { | ||
3768 | ret = -ENOMEM; | ||
3769 | goto free; | ||
3770 | } | ||
3771 | |||
3772 | data->onmatch.match_event_system = kstrdup(match_event_system, GFP_KERNEL); | ||
3773 | if (!data->onmatch.match_event_system) { | ||
3774 | ret = -ENOMEM; | ||
3775 | goto free; | ||
3776 | } | ||
3777 | |||
3778 | strsep(&str, "."); | ||
3779 | if (!str) { | ||
3780 | hist_err("onmatch: Missing . after onmatch(): ", str); | ||
3781 | goto free; | ||
3782 | } | ||
3783 | |||
3784 | synth_event_name = strsep(&str, "("); | ||
3785 | if (!synth_event_name || !str) { | ||
3786 | hist_err("onmatch: Missing opening paramlist paren: ", synth_event_name); | ||
3787 | goto free; | ||
496 | } | 3788 | } |
497 | 3789 | ||
498 | hist_data->fields[val_idx] = create_hist_field(field, flags); | 3790 | data->onmatch.synth_event_name = kstrdup(synth_event_name, GFP_KERNEL); |
499 | if (!hist_data->fields[val_idx]) { | 3791 | if (!data->onmatch.synth_event_name) { |
500 | ret = -ENOMEM; | 3792 | ret = -ENOMEM; |
3793 | goto free; | ||
3794 | } | ||
3795 | |||
3796 | params = strsep(&str, ")"); | ||
3797 | if (!params || !str || (str && strlen(str))) { | ||
3798 | hist_err("onmatch: Missing closing paramlist paren: ", params); | ||
3799 | goto free; | ||
3800 | } | ||
3801 | |||
3802 | ret = parse_action_params(params, data); | ||
3803 | if (ret) | ||
3804 | goto free; | ||
3805 | out: | ||
3806 | return data; | ||
3807 | free: | ||
3808 | onmatch_destroy(data); | ||
3809 | data = ERR_PTR(ret); | ||
3810 | goto out; | ||
3811 | } | ||
3812 | |||
3813 | static int create_hitcount_val(struct hist_trigger_data *hist_data) | ||
3814 | { | ||
3815 | hist_data->fields[HITCOUNT_IDX] = | ||
3816 | create_hist_field(hist_data, NULL, HIST_FIELD_FL_HITCOUNT, NULL); | ||
3817 | if (!hist_data->fields[HITCOUNT_IDX]) | ||
3818 | return -ENOMEM; | ||
3819 | |||
3820 | hist_data->n_vals++; | ||
3821 | hist_data->n_fields++; | ||
3822 | |||
3823 | if (WARN_ON(hist_data->n_vals > TRACING_MAP_VALS_MAX)) | ||
3824 | return -EINVAL; | ||
3825 | |||
3826 | return 0; | ||
3827 | } | ||
3828 | |||
3829 | static int __create_val_field(struct hist_trigger_data *hist_data, | ||
3830 | unsigned int val_idx, | ||
3831 | struct trace_event_file *file, | ||
3832 | char *var_name, char *field_str, | ||
3833 | unsigned long flags) | ||
3834 | { | ||
3835 | struct hist_field *hist_field; | ||
3836 | int ret = 0; | ||
3837 | |||
3838 | hist_field = parse_expr(hist_data, file, field_str, flags, var_name, 0); | ||
3839 | if (IS_ERR(hist_field)) { | ||
3840 | ret = PTR_ERR(hist_field); | ||
501 | goto out; | 3841 | goto out; |
502 | } | 3842 | } |
503 | 3843 | ||
3844 | hist_data->fields[val_idx] = hist_field; | ||
3845 | |||
504 | ++hist_data->n_vals; | 3846 | ++hist_data->n_vals; |
3847 | ++hist_data->n_fields; | ||
505 | 3848 | ||
506 | if (WARN_ON(hist_data->n_vals > TRACING_MAP_VALS_MAX)) | 3849 | if (WARN_ON(hist_data->n_vals > TRACING_MAP_VALS_MAX + TRACING_MAP_VARS_MAX)) |
507 | ret = -EINVAL; | 3850 | ret = -EINVAL; |
508 | out: | 3851 | out: |
509 | return ret; | 3852 | return ret; |
510 | } | 3853 | } |
511 | 3854 | ||
3855 | static int create_val_field(struct hist_trigger_data *hist_data, | ||
3856 | unsigned int val_idx, | ||
3857 | struct trace_event_file *file, | ||
3858 | char *field_str) | ||
3859 | { | ||
3860 | if (WARN_ON(val_idx >= TRACING_MAP_VALS_MAX)) | ||
3861 | return -EINVAL; | ||
3862 | |||
3863 | return __create_val_field(hist_data, val_idx, file, NULL, field_str, 0); | ||
3864 | } | ||
3865 | |||
3866 | static int create_var_field(struct hist_trigger_data *hist_data, | ||
3867 | unsigned int val_idx, | ||
3868 | struct trace_event_file *file, | ||
3869 | char *var_name, char *expr_str) | ||
3870 | { | ||
3871 | unsigned long flags = 0; | ||
3872 | |||
3873 | if (WARN_ON(val_idx >= TRACING_MAP_VALS_MAX + TRACING_MAP_VARS_MAX)) | ||
3874 | return -EINVAL; | ||
3875 | |||
3876 | if (find_var(hist_data, file, var_name) && !hist_data->remove) { | ||
3877 | hist_err("Variable already defined: ", var_name); | ||
3878 | return -EINVAL; | ||
3879 | } | ||
3880 | |||
3881 | flags |= HIST_FIELD_FL_VAR; | ||
3882 | hist_data->n_vars++; | ||
3883 | if (WARN_ON(hist_data->n_vars > TRACING_MAP_VARS_MAX)) | ||
3884 | return -EINVAL; | ||
3885 | |||
3886 | return __create_val_field(hist_data, val_idx, file, var_name, expr_str, flags); | ||
3887 | } | ||
3888 | |||
512 | static int create_val_fields(struct hist_trigger_data *hist_data, | 3889 | static int create_val_fields(struct hist_trigger_data *hist_data, |
513 | struct trace_event_file *file) | 3890 | struct trace_event_file *file) |
514 | { | 3891 | { |
515 | char *fields_str, *field_str; | 3892 | char *fields_str, *field_str; |
516 | unsigned int i, j; | 3893 | unsigned int i, j = 1; |
517 | int ret; | 3894 | int ret; |
518 | 3895 | ||
519 | ret = create_hitcount_val(hist_data); | 3896 | ret = create_hitcount_val(hist_data); |
@@ -533,12 +3910,15 @@ static int create_val_fields(struct hist_trigger_data *hist_data, | |||
533 | field_str = strsep(&fields_str, ","); | 3910 | field_str = strsep(&fields_str, ","); |
534 | if (!field_str) | 3911 | if (!field_str) |
535 | break; | 3912 | break; |
3913 | |||
536 | if (strcmp(field_str, "hitcount") == 0) | 3914 | if (strcmp(field_str, "hitcount") == 0) |
537 | continue; | 3915 | continue; |
3916 | |||
538 | ret = create_val_field(hist_data, j++, file, field_str); | 3917 | ret = create_val_field(hist_data, j++, file, field_str); |
539 | if (ret) | 3918 | if (ret) |
540 | goto out; | 3919 | goto out; |
541 | } | 3920 | } |
3921 | |||
542 | if (fields_str && (strcmp(fields_str, "hitcount") != 0)) | 3922 | if (fields_str && (strcmp(fields_str, "hitcount") != 0)) |
543 | ret = -EINVAL; | 3923 | ret = -EINVAL; |
544 | out: | 3924 | out: |
@@ -551,12 +3931,13 @@ static int create_key_field(struct hist_trigger_data *hist_data, | |||
551 | struct trace_event_file *file, | 3931 | struct trace_event_file *file, |
552 | char *field_str) | 3932 | char *field_str) |
553 | { | 3933 | { |
554 | struct ftrace_event_field *field = NULL; | 3934 | struct hist_field *hist_field = NULL; |
3935 | |||
555 | unsigned long flags = 0; | 3936 | unsigned long flags = 0; |
556 | unsigned int key_size; | 3937 | unsigned int key_size; |
557 | int ret = 0; | 3938 | int ret = 0; |
558 | 3939 | ||
559 | if (WARN_ON(key_idx >= TRACING_MAP_FIELDS_MAX)) | 3940 | if (WARN_ON(key_idx >= HIST_FIELDS_MAX)) |
560 | return -EINVAL; | 3941 | return -EINVAL; |
561 | 3942 | ||
562 | flags |= HIST_FIELD_FL_KEY; | 3943 | flags |= HIST_FIELD_FL_KEY; |
@@ -564,57 +3945,40 @@ static int create_key_field(struct hist_trigger_data *hist_data, | |||
564 | if (strcmp(field_str, "stacktrace") == 0) { | 3945 | if (strcmp(field_str, "stacktrace") == 0) { |
565 | flags |= HIST_FIELD_FL_STACKTRACE; | 3946 | flags |= HIST_FIELD_FL_STACKTRACE; |
566 | key_size = sizeof(unsigned long) * HIST_STACKTRACE_DEPTH; | 3947 | key_size = sizeof(unsigned long) * HIST_STACKTRACE_DEPTH; |
3948 | hist_field = create_hist_field(hist_data, NULL, flags, NULL); | ||
567 | } else { | 3949 | } else { |
568 | char *field_name = strsep(&field_str, "."); | 3950 | hist_field = parse_expr(hist_data, file, field_str, flags, |
569 | 3951 | NULL, 0); | |
570 | if (field_str) { | 3952 | if (IS_ERR(hist_field)) { |
571 | if (strcmp(field_str, "hex") == 0) | 3953 | ret = PTR_ERR(hist_field); |
572 | flags |= HIST_FIELD_FL_HEX; | 3954 | goto out; |
573 | else if (strcmp(field_str, "sym") == 0) | ||
574 | flags |= HIST_FIELD_FL_SYM; | ||
575 | else if (strcmp(field_str, "sym-offset") == 0) | ||
576 | flags |= HIST_FIELD_FL_SYM_OFFSET; | ||
577 | else if ((strcmp(field_str, "execname") == 0) && | ||
578 | (strcmp(field_name, "common_pid") == 0)) | ||
579 | flags |= HIST_FIELD_FL_EXECNAME; | ||
580 | else if (strcmp(field_str, "syscall") == 0) | ||
581 | flags |= HIST_FIELD_FL_SYSCALL; | ||
582 | else if (strcmp(field_str, "log2") == 0) | ||
583 | flags |= HIST_FIELD_FL_LOG2; | ||
584 | else { | ||
585 | ret = -EINVAL; | ||
586 | goto out; | ||
587 | } | ||
588 | } | 3955 | } |
589 | 3956 | ||
590 | field = trace_find_event_field(file->event_call, field_name); | 3957 | if (hist_field->flags & HIST_FIELD_FL_VAR_REF) { |
591 | if (!field || !field->size) { | 3958 | hist_err("Using variable references as keys not supported: ", field_str); |
3959 | destroy_hist_field(hist_field, 0); | ||
592 | ret = -EINVAL; | 3960 | ret = -EINVAL; |
593 | goto out; | 3961 | goto out; |
594 | } | 3962 | } |
595 | 3963 | ||
596 | if (is_string_field(field)) | 3964 | key_size = hist_field->size; |
597 | key_size = MAX_FILTER_STR_VAL; | ||
598 | else | ||
599 | key_size = field->size; | ||
600 | } | 3965 | } |
601 | 3966 | ||
602 | hist_data->fields[key_idx] = create_hist_field(field, flags); | 3967 | hist_data->fields[key_idx] = hist_field; |
603 | if (!hist_data->fields[key_idx]) { | ||
604 | ret = -ENOMEM; | ||
605 | goto out; | ||
606 | } | ||
607 | 3968 | ||
608 | key_size = ALIGN(key_size, sizeof(u64)); | 3969 | key_size = ALIGN(key_size, sizeof(u64)); |
609 | hist_data->fields[key_idx]->size = key_size; | 3970 | hist_data->fields[key_idx]->size = key_size; |
610 | hist_data->fields[key_idx]->offset = key_offset; | 3971 | hist_data->fields[key_idx]->offset = key_offset; |
3972 | |||
611 | hist_data->key_size += key_size; | 3973 | hist_data->key_size += key_size; |
3974 | |||
612 | if (hist_data->key_size > HIST_KEY_SIZE_MAX) { | 3975 | if (hist_data->key_size > HIST_KEY_SIZE_MAX) { |
613 | ret = -EINVAL; | 3976 | ret = -EINVAL; |
614 | goto out; | 3977 | goto out; |
615 | } | 3978 | } |
616 | 3979 | ||
617 | hist_data->n_keys++; | 3980 | hist_data->n_keys++; |
3981 | hist_data->n_fields++; | ||
618 | 3982 | ||
619 | if (WARN_ON(hist_data->n_keys > TRACING_MAP_KEYS_MAX)) | 3983 | if (WARN_ON(hist_data->n_keys > TRACING_MAP_KEYS_MAX)) |
620 | return -EINVAL; | 3984 | return -EINVAL; |
@@ -658,21 +4022,113 @@ static int create_key_fields(struct hist_trigger_data *hist_data, | |||
658 | return ret; | 4022 | return ret; |
659 | } | 4023 | } |
660 | 4024 | ||
4025 | static int create_var_fields(struct hist_trigger_data *hist_data, | ||
4026 | struct trace_event_file *file) | ||
4027 | { | ||
4028 | unsigned int i, j = hist_data->n_vals; | ||
4029 | int ret = 0; | ||
4030 | |||
4031 | unsigned int n_vars = hist_data->attrs->var_defs.n_vars; | ||
4032 | |||
4033 | for (i = 0; i < n_vars; i++) { | ||
4034 | char *var_name = hist_data->attrs->var_defs.name[i]; | ||
4035 | char *expr = hist_data->attrs->var_defs.expr[i]; | ||
4036 | |||
4037 | ret = create_var_field(hist_data, j++, file, var_name, expr); | ||
4038 | if (ret) | ||
4039 | goto out; | ||
4040 | } | ||
4041 | out: | ||
4042 | return ret; | ||
4043 | } | ||
4044 | |||
4045 | static void free_var_defs(struct hist_trigger_data *hist_data) | ||
4046 | { | ||
4047 | unsigned int i; | ||
4048 | |||
4049 | for (i = 0; i < hist_data->attrs->var_defs.n_vars; i++) { | ||
4050 | kfree(hist_data->attrs->var_defs.name[i]); | ||
4051 | kfree(hist_data->attrs->var_defs.expr[i]); | ||
4052 | } | ||
4053 | |||
4054 | hist_data->attrs->var_defs.n_vars = 0; | ||
4055 | } | ||
4056 | |||
4057 | static int parse_var_defs(struct hist_trigger_data *hist_data) | ||
4058 | { | ||
4059 | char *s, *str, *var_name, *field_str; | ||
4060 | unsigned int i, j, n_vars = 0; | ||
4061 | int ret = 0; | ||
4062 | |||
4063 | for (i = 0; i < hist_data->attrs->n_assignments; i++) { | ||
4064 | str = hist_data->attrs->assignment_str[i]; | ||
4065 | for (j = 0; j < TRACING_MAP_VARS_MAX; j++) { | ||
4066 | field_str = strsep(&str, ","); | ||
4067 | if (!field_str) | ||
4068 | break; | ||
4069 | |||
4070 | var_name = strsep(&field_str, "="); | ||
4071 | if (!var_name || !field_str) { | ||
4072 | hist_err("Malformed assignment: ", var_name); | ||
4073 | ret = -EINVAL; | ||
4074 | goto free; | ||
4075 | } | ||
4076 | |||
4077 | if (n_vars == TRACING_MAP_VARS_MAX) { | ||
4078 | hist_err("Too many variables defined: ", var_name); | ||
4079 | ret = -EINVAL; | ||
4080 | goto free; | ||
4081 | } | ||
4082 | |||
4083 | s = kstrdup(var_name, GFP_KERNEL); | ||
4084 | if (!s) { | ||
4085 | ret = -ENOMEM; | ||
4086 | goto free; | ||
4087 | } | ||
4088 | hist_data->attrs->var_defs.name[n_vars] = s; | ||
4089 | |||
4090 | s = kstrdup(field_str, GFP_KERNEL); | ||
4091 | if (!s) { | ||
4092 | kfree(hist_data->attrs->var_defs.name[n_vars]); | ||
4093 | ret = -ENOMEM; | ||
4094 | goto free; | ||
4095 | } | ||
4096 | hist_data->attrs->var_defs.expr[n_vars++] = s; | ||
4097 | |||
4098 | hist_data->attrs->var_defs.n_vars = n_vars; | ||
4099 | } | ||
4100 | } | ||
4101 | |||
4102 | return ret; | ||
4103 | free: | ||
4104 | free_var_defs(hist_data); | ||
4105 | |||
4106 | return ret; | ||
4107 | } | ||
4108 | |||
661 | static int create_hist_fields(struct hist_trigger_data *hist_data, | 4109 | static int create_hist_fields(struct hist_trigger_data *hist_data, |
662 | struct trace_event_file *file) | 4110 | struct trace_event_file *file) |
663 | { | 4111 | { |
664 | int ret; | 4112 | int ret; |
665 | 4113 | ||
4114 | ret = parse_var_defs(hist_data); | ||
4115 | if (ret) | ||
4116 | goto out; | ||
4117 | |||
666 | ret = create_val_fields(hist_data, file); | 4118 | ret = create_val_fields(hist_data, file); |
667 | if (ret) | 4119 | if (ret) |
668 | goto out; | 4120 | goto out; |
669 | 4121 | ||
670 | ret = create_key_fields(hist_data, file); | 4122 | ret = create_var_fields(hist_data, file); |
671 | if (ret) | 4123 | if (ret) |
672 | goto out; | 4124 | goto out; |
673 | 4125 | ||
674 | hist_data->n_fields = hist_data->n_vals + hist_data->n_keys; | 4126 | ret = create_key_fields(hist_data, file); |
4127 | if (ret) | ||
4128 | goto out; | ||
675 | out: | 4129 | out: |
4130 | free_var_defs(hist_data); | ||
4131 | |||
676 | return ret; | 4132 | return ret; |
677 | } | 4133 | } |
678 | 4134 | ||
@@ -695,7 +4151,7 @@ static int create_sort_keys(struct hist_trigger_data *hist_data) | |||
695 | char *fields_str = hist_data->attrs->sort_key_str; | 4151 | char *fields_str = hist_data->attrs->sort_key_str; |
696 | struct tracing_map_sort_key *sort_key; | 4152 | struct tracing_map_sort_key *sort_key; |
697 | int descending, ret = 0; | 4153 | int descending, ret = 0; |
698 | unsigned int i, j; | 4154 | unsigned int i, j, k; |
699 | 4155 | ||
700 | hist_data->n_sort_keys = 1; /* we always have at least one, hitcount */ | 4156 | hist_data->n_sort_keys = 1; /* we always have at least one, hitcount */ |
701 | 4157 | ||
@@ -743,12 +4199,19 @@ static int create_sort_keys(struct hist_trigger_data *hist_data) | |||
743 | continue; | 4199 | continue; |
744 | } | 4200 | } |
745 | 4201 | ||
746 | for (j = 1; j < hist_data->n_fields; j++) { | 4202 | for (j = 1, k = 1; j < hist_data->n_fields; j++) { |
4203 | unsigned int idx; | ||
4204 | |||
747 | hist_field = hist_data->fields[j]; | 4205 | hist_field = hist_data->fields[j]; |
4206 | if (hist_field->flags & HIST_FIELD_FL_VAR) | ||
4207 | continue; | ||
4208 | |||
4209 | idx = k++; | ||
4210 | |||
748 | test_name = hist_field_name(hist_field, 0); | 4211 | test_name = hist_field_name(hist_field, 0); |
749 | 4212 | ||
750 | if (strcmp(field_name, test_name) == 0) { | 4213 | if (strcmp(field_name, test_name) == 0) { |
751 | sort_key->field_idx = j; | 4214 | sort_key->field_idx = idx; |
752 | descending = is_descending(field_str); | 4215 | descending = is_descending(field_str); |
753 | if (descending < 0) { | 4216 | if (descending < 0) { |
754 | ret = descending; | 4217 | ret = descending; |
@@ -763,16 +4226,230 @@ static int create_sort_keys(struct hist_trigger_data *hist_data) | |||
763 | break; | 4226 | break; |
764 | } | 4227 | } |
765 | } | 4228 | } |
4229 | |||
766 | hist_data->n_sort_keys = i; | 4230 | hist_data->n_sort_keys = i; |
767 | out: | 4231 | out: |
768 | return ret; | 4232 | return ret; |
769 | } | 4233 | } |
770 | 4234 | ||
4235 | static void destroy_actions(struct hist_trigger_data *hist_data) | ||
4236 | { | ||
4237 | unsigned int i; | ||
4238 | |||
4239 | for (i = 0; i < hist_data->n_actions; i++) { | ||
4240 | struct action_data *data = hist_data->actions[i]; | ||
4241 | |||
4242 | if (data->fn == action_trace) | ||
4243 | onmatch_destroy(data); | ||
4244 | else if (data->fn == onmax_save) | ||
4245 | onmax_destroy(data); | ||
4246 | else | ||
4247 | kfree(data); | ||
4248 | } | ||
4249 | } | ||
4250 | |||
4251 | static int parse_actions(struct hist_trigger_data *hist_data) | ||
4252 | { | ||
4253 | struct trace_array *tr = hist_data->event_file->tr; | ||
4254 | struct action_data *data; | ||
4255 | unsigned int i; | ||
4256 | int ret = 0; | ||
4257 | char *str; | ||
4258 | |||
4259 | for (i = 0; i < hist_data->attrs->n_actions; i++) { | ||
4260 | str = hist_data->attrs->action_str[i]; | ||
4261 | |||
4262 | if (strncmp(str, "onmatch(", strlen("onmatch(")) == 0) { | ||
4263 | char *action_str = str + strlen("onmatch("); | ||
4264 | |||
4265 | data = onmatch_parse(tr, action_str); | ||
4266 | if (IS_ERR(data)) { | ||
4267 | ret = PTR_ERR(data); | ||
4268 | break; | ||
4269 | } | ||
4270 | data->fn = action_trace; | ||
4271 | } else if (strncmp(str, "onmax(", strlen("onmax(")) == 0) { | ||
4272 | char *action_str = str + strlen("onmax("); | ||
4273 | |||
4274 | data = onmax_parse(action_str); | ||
4275 | if (IS_ERR(data)) { | ||
4276 | ret = PTR_ERR(data); | ||
4277 | break; | ||
4278 | } | ||
4279 | data->fn = onmax_save; | ||
4280 | } else { | ||
4281 | ret = -EINVAL; | ||
4282 | break; | ||
4283 | } | ||
4284 | |||
4285 | hist_data->actions[hist_data->n_actions++] = data; | ||
4286 | } | ||
4287 | |||
4288 | return ret; | ||
4289 | } | ||
4290 | |||
4291 | static int create_actions(struct hist_trigger_data *hist_data, | ||
4292 | struct trace_event_file *file) | ||
4293 | { | ||
4294 | struct action_data *data; | ||
4295 | unsigned int i; | ||
4296 | int ret = 0; | ||
4297 | |||
4298 | for (i = 0; i < hist_data->attrs->n_actions; i++) { | ||
4299 | data = hist_data->actions[i]; | ||
4300 | |||
4301 | if (data->fn == action_trace) { | ||
4302 | ret = onmatch_create(hist_data, file, data); | ||
4303 | if (ret) | ||
4304 | return ret; | ||
4305 | } else if (data->fn == onmax_save) { | ||
4306 | ret = onmax_create(hist_data, data); | ||
4307 | if (ret) | ||
4308 | return ret; | ||
4309 | } | ||
4310 | } | ||
4311 | |||
4312 | return ret; | ||
4313 | } | ||
4314 | |||
4315 | static void print_actions(struct seq_file *m, | ||
4316 | struct hist_trigger_data *hist_data, | ||
4317 | struct tracing_map_elt *elt) | ||
4318 | { | ||
4319 | unsigned int i; | ||
4320 | |||
4321 | for (i = 0; i < hist_data->n_actions; i++) { | ||
4322 | struct action_data *data = hist_data->actions[i]; | ||
4323 | |||
4324 | if (data->fn == onmax_save) | ||
4325 | onmax_print(m, hist_data, elt, data); | ||
4326 | } | ||
4327 | } | ||
4328 | |||
4329 | static void print_onmax_spec(struct seq_file *m, | ||
4330 | struct hist_trigger_data *hist_data, | ||
4331 | struct action_data *data) | ||
4332 | { | ||
4333 | unsigned int i; | ||
4334 | |||
4335 | seq_puts(m, ":onmax("); | ||
4336 | seq_printf(m, "%s", data->onmax.var_str); | ||
4337 | seq_printf(m, ").%s(", data->onmax.fn_name); | ||
4338 | |||
4339 | for (i = 0; i < hist_data->n_max_vars; i++) { | ||
4340 | seq_printf(m, "%s", hist_data->max_vars[i]->var->var.name); | ||
4341 | if (i < hist_data->n_max_vars - 1) | ||
4342 | seq_puts(m, ","); | ||
4343 | } | ||
4344 | seq_puts(m, ")"); | ||
4345 | } | ||
4346 | |||
4347 | static void print_onmatch_spec(struct seq_file *m, | ||
4348 | struct hist_trigger_data *hist_data, | ||
4349 | struct action_data *data) | ||
4350 | { | ||
4351 | unsigned int i; | ||
4352 | |||
4353 | seq_printf(m, ":onmatch(%s.%s).", data->onmatch.match_event_system, | ||
4354 | data->onmatch.match_event); | ||
4355 | |||
4356 | seq_printf(m, "%s(", data->onmatch.synth_event->name); | ||
4357 | |||
4358 | for (i = 0; i < data->n_params; i++) { | ||
4359 | if (i) | ||
4360 | seq_puts(m, ","); | ||
4361 | seq_printf(m, "%s", data->params[i]); | ||
4362 | } | ||
4363 | |||
4364 | seq_puts(m, ")"); | ||
4365 | } | ||
4366 | |||
4367 | static bool actions_match(struct hist_trigger_data *hist_data, | ||
4368 | struct hist_trigger_data *hist_data_test) | ||
4369 | { | ||
4370 | unsigned int i, j; | ||
4371 | |||
4372 | if (hist_data->n_actions != hist_data_test->n_actions) | ||
4373 | return false; | ||
4374 | |||
4375 | for (i = 0; i < hist_data->n_actions; i++) { | ||
4376 | struct action_data *data = hist_data->actions[i]; | ||
4377 | struct action_data *data_test = hist_data_test->actions[i]; | ||
4378 | |||
4379 | if (data->fn != data_test->fn) | ||
4380 | return false; | ||
4381 | |||
4382 | if (data->n_params != data_test->n_params) | ||
4383 | return false; | ||
4384 | |||
4385 | for (j = 0; j < data->n_params; j++) { | ||
4386 | if (strcmp(data->params[j], data_test->params[j]) != 0) | ||
4387 | return false; | ||
4388 | } | ||
4389 | |||
4390 | if (data->fn == action_trace) { | ||
4391 | if (strcmp(data->onmatch.synth_event_name, | ||
4392 | data_test->onmatch.synth_event_name) != 0) | ||
4393 | return false; | ||
4394 | if (strcmp(data->onmatch.match_event_system, | ||
4395 | data_test->onmatch.match_event_system) != 0) | ||
4396 | return false; | ||
4397 | if (strcmp(data->onmatch.match_event, | ||
4398 | data_test->onmatch.match_event) != 0) | ||
4399 | return false; | ||
4400 | } else if (data->fn == onmax_save) { | ||
4401 | if (strcmp(data->onmax.var_str, | ||
4402 | data_test->onmax.var_str) != 0) | ||
4403 | return false; | ||
4404 | if (strcmp(data->onmax.fn_name, | ||
4405 | data_test->onmax.fn_name) != 0) | ||
4406 | return false; | ||
4407 | } | ||
4408 | } | ||
4409 | |||
4410 | return true; | ||
4411 | } | ||
4412 | |||
4413 | |||
4414 | static void print_actions_spec(struct seq_file *m, | ||
4415 | struct hist_trigger_data *hist_data) | ||
4416 | { | ||
4417 | unsigned int i; | ||
4418 | |||
4419 | for (i = 0; i < hist_data->n_actions; i++) { | ||
4420 | struct action_data *data = hist_data->actions[i]; | ||
4421 | |||
4422 | if (data->fn == action_trace) | ||
4423 | print_onmatch_spec(m, hist_data, data); | ||
4424 | else if (data->fn == onmax_save) | ||
4425 | print_onmax_spec(m, hist_data, data); | ||
4426 | } | ||
4427 | } | ||
4428 | |||
4429 | static void destroy_field_var_hists(struct hist_trigger_data *hist_data) | ||
4430 | { | ||
4431 | unsigned int i; | ||
4432 | |||
4433 | for (i = 0; i < hist_data->n_field_var_hists; i++) { | ||
4434 | kfree(hist_data->field_var_hists[i]->cmd); | ||
4435 | kfree(hist_data->field_var_hists[i]); | ||
4436 | } | ||
4437 | } | ||
4438 | |||
771 | static void destroy_hist_data(struct hist_trigger_data *hist_data) | 4439 | static void destroy_hist_data(struct hist_trigger_data *hist_data) |
772 | { | 4440 | { |
4441 | if (!hist_data) | ||
4442 | return; | ||
4443 | |||
773 | destroy_hist_trigger_attrs(hist_data->attrs); | 4444 | destroy_hist_trigger_attrs(hist_data->attrs); |
774 | destroy_hist_fields(hist_data); | 4445 | destroy_hist_fields(hist_data); |
775 | tracing_map_destroy(hist_data->map); | 4446 | tracing_map_destroy(hist_data->map); |
4447 | |||
4448 | destroy_actions(hist_data); | ||
4449 | destroy_field_vars(hist_data); | ||
4450 | destroy_field_var_hists(hist_data); | ||
4451 | destroy_synth_var_refs(hist_data); | ||
4452 | |||
776 | kfree(hist_data); | 4453 | kfree(hist_data); |
777 | } | 4454 | } |
778 | 4455 | ||
@@ -781,7 +4458,7 @@ static int create_tracing_map_fields(struct hist_trigger_data *hist_data) | |||
781 | struct tracing_map *map = hist_data->map; | 4458 | struct tracing_map *map = hist_data->map; |
782 | struct ftrace_event_field *field; | 4459 | struct ftrace_event_field *field; |
783 | struct hist_field *hist_field; | 4460 | struct hist_field *hist_field; |
784 | int i, idx; | 4461 | int i, idx = 0; |
785 | 4462 | ||
786 | for_each_hist_field(i, hist_data) { | 4463 | for_each_hist_field(i, hist_data) { |
787 | hist_field = hist_data->fields[i]; | 4464 | hist_field = hist_data->fields[i]; |
@@ -792,6 +4469,9 @@ static int create_tracing_map_fields(struct hist_trigger_data *hist_data) | |||
792 | 4469 | ||
793 | if (hist_field->flags & HIST_FIELD_FL_STACKTRACE) | 4470 | if (hist_field->flags & HIST_FIELD_FL_STACKTRACE) |
794 | cmp_fn = tracing_map_cmp_none; | 4471 | cmp_fn = tracing_map_cmp_none; |
4472 | else if (!field) | ||
4473 | cmp_fn = tracing_map_cmp_num(hist_field->size, | ||
4474 | hist_field->is_signed); | ||
795 | else if (is_string_field(field)) | 4475 | else if (is_string_field(field)) |
796 | cmp_fn = tracing_map_cmp_string; | 4476 | cmp_fn = tracing_map_cmp_string; |
797 | else | 4477 | else |
@@ -800,36 +4480,29 @@ static int create_tracing_map_fields(struct hist_trigger_data *hist_data) | |||
800 | idx = tracing_map_add_key_field(map, | 4480 | idx = tracing_map_add_key_field(map, |
801 | hist_field->offset, | 4481 | hist_field->offset, |
802 | cmp_fn); | 4482 | cmp_fn); |
803 | 4483 | } else if (!(hist_field->flags & HIST_FIELD_FL_VAR)) | |
804 | } else | ||
805 | idx = tracing_map_add_sum_field(map); | 4484 | idx = tracing_map_add_sum_field(map); |
806 | 4485 | ||
807 | if (idx < 0) | 4486 | if (idx < 0) |
808 | return idx; | 4487 | return idx; |
809 | } | ||
810 | |||
811 | return 0; | ||
812 | } | ||
813 | |||
814 | static bool need_tracing_map_ops(struct hist_trigger_data *hist_data) | ||
815 | { | ||
816 | struct hist_field *key_field; | ||
817 | unsigned int i; | ||
818 | |||
819 | for_each_hist_key_field(i, hist_data) { | ||
820 | key_field = hist_data->fields[i]; | ||
821 | 4488 | ||
822 | if (key_field->flags & HIST_FIELD_FL_EXECNAME) | 4489 | if (hist_field->flags & HIST_FIELD_FL_VAR) { |
823 | return true; | 4490 | idx = tracing_map_add_var(map); |
4491 | if (idx < 0) | ||
4492 | return idx; | ||
4493 | hist_field->var.idx = idx; | ||
4494 | hist_field->var.hist_data = hist_data; | ||
4495 | } | ||
824 | } | 4496 | } |
825 | 4497 | ||
826 | return false; | 4498 | return 0; |
827 | } | 4499 | } |
828 | 4500 | ||
829 | static struct hist_trigger_data * | 4501 | static struct hist_trigger_data * |
830 | create_hist_data(unsigned int map_bits, | 4502 | create_hist_data(unsigned int map_bits, |
831 | struct hist_trigger_attrs *attrs, | 4503 | struct hist_trigger_attrs *attrs, |
832 | struct trace_event_file *file) | 4504 | struct trace_event_file *file, |
4505 | bool remove) | ||
833 | { | 4506 | { |
834 | const struct tracing_map_ops *map_ops = NULL; | 4507 | const struct tracing_map_ops *map_ops = NULL; |
835 | struct hist_trigger_data *hist_data; | 4508 | struct hist_trigger_data *hist_data; |
@@ -840,6 +4513,12 @@ create_hist_data(unsigned int map_bits, | |||
840 | return ERR_PTR(-ENOMEM); | 4513 | return ERR_PTR(-ENOMEM); |
841 | 4514 | ||
842 | hist_data->attrs = attrs; | 4515 | hist_data->attrs = attrs; |
4516 | hist_data->remove = remove; | ||
4517 | hist_data->event_file = file; | ||
4518 | |||
4519 | ret = parse_actions(hist_data); | ||
4520 | if (ret) | ||
4521 | goto free; | ||
843 | 4522 | ||
844 | ret = create_hist_fields(hist_data, file); | 4523 | ret = create_hist_fields(hist_data, file); |
845 | if (ret) | 4524 | if (ret) |
@@ -849,8 +4528,7 @@ create_hist_data(unsigned int map_bits, | |||
849 | if (ret) | 4528 | if (ret) |
850 | goto free; | 4529 | goto free; |
851 | 4530 | ||
852 | if (need_tracing_map_ops(hist_data)) | 4531 | map_ops = &hist_trigger_elt_data_ops; |
853 | map_ops = &hist_trigger_elt_comm_ops; | ||
854 | 4532 | ||
855 | hist_data->map = tracing_map_create(map_bits, hist_data->key_size, | 4533 | hist_data->map = tracing_map_create(map_bits, hist_data->key_size, |
856 | map_ops, hist_data); | 4534 | map_ops, hist_data); |
@@ -863,12 +4541,6 @@ create_hist_data(unsigned int map_bits, | |||
863 | ret = create_tracing_map_fields(hist_data); | 4541 | ret = create_tracing_map_fields(hist_data); |
864 | if (ret) | 4542 | if (ret) |
865 | goto free; | 4543 | goto free; |
866 | |||
867 | ret = tracing_map_init(hist_data->map); | ||
868 | if (ret) | ||
869 | goto free; | ||
870 | |||
871 | hist_data->event_file = file; | ||
872 | out: | 4544 | out: |
873 | return hist_data; | 4545 | return hist_data; |
874 | free: | 4546 | free: |
@@ -882,18 +4554,39 @@ create_hist_data(unsigned int map_bits, | |||
882 | } | 4554 | } |
883 | 4555 | ||
884 | static void hist_trigger_elt_update(struct hist_trigger_data *hist_data, | 4556 | static void hist_trigger_elt_update(struct hist_trigger_data *hist_data, |
885 | struct tracing_map_elt *elt, | 4557 | struct tracing_map_elt *elt, void *rec, |
886 | void *rec) | 4558 | struct ring_buffer_event *rbe, |
4559 | u64 *var_ref_vals) | ||
887 | { | 4560 | { |
4561 | struct hist_elt_data *elt_data; | ||
888 | struct hist_field *hist_field; | 4562 | struct hist_field *hist_field; |
889 | unsigned int i; | 4563 | unsigned int i, var_idx; |
890 | u64 hist_val; | 4564 | u64 hist_val; |
891 | 4565 | ||
4566 | elt_data = elt->private_data; | ||
4567 | elt_data->var_ref_vals = var_ref_vals; | ||
4568 | |||
892 | for_each_hist_val_field(i, hist_data) { | 4569 | for_each_hist_val_field(i, hist_data) { |
893 | hist_field = hist_data->fields[i]; | 4570 | hist_field = hist_data->fields[i]; |
894 | hist_val = hist_field->fn(hist_field, rec); | 4571 | hist_val = hist_field->fn(hist_field, elt, rbe, rec); |
4572 | if (hist_field->flags & HIST_FIELD_FL_VAR) { | ||
4573 | var_idx = hist_field->var.idx; | ||
4574 | tracing_map_set_var(elt, var_idx, hist_val); | ||
4575 | continue; | ||
4576 | } | ||
895 | tracing_map_update_sum(elt, i, hist_val); | 4577 | tracing_map_update_sum(elt, i, hist_val); |
896 | } | 4578 | } |
4579 | |||
4580 | for_each_hist_key_field(i, hist_data) { | ||
4581 | hist_field = hist_data->fields[i]; | ||
4582 | if (hist_field->flags & HIST_FIELD_FL_VAR) { | ||
4583 | hist_val = hist_field->fn(hist_field, elt, rbe, rec); | ||
4584 | var_idx = hist_field->var.idx; | ||
4585 | tracing_map_set_var(elt, var_idx, hist_val); | ||
4586 | } | ||
4587 | } | ||
4588 | |||
4589 | update_field_vars(hist_data, elt, rbe, rec); | ||
897 | } | 4590 | } |
898 | 4591 | ||
899 | static inline void add_to_key(char *compound_key, void *key, | 4592 | static inline void add_to_key(char *compound_key, void *key, |
@@ -920,15 +4613,31 @@ static inline void add_to_key(char *compound_key, void *key, | |||
920 | memcpy(compound_key + key_field->offset, key, size); | 4613 | memcpy(compound_key + key_field->offset, key, size); |
921 | } | 4614 | } |
922 | 4615 | ||
923 | static void event_hist_trigger(struct event_trigger_data *data, void *rec) | 4616 | static void |
4617 | hist_trigger_actions(struct hist_trigger_data *hist_data, | ||
4618 | struct tracing_map_elt *elt, void *rec, | ||
4619 | struct ring_buffer_event *rbe, u64 *var_ref_vals) | ||
4620 | { | ||
4621 | struct action_data *data; | ||
4622 | unsigned int i; | ||
4623 | |||
4624 | for (i = 0; i < hist_data->n_actions; i++) { | ||
4625 | data = hist_data->actions[i]; | ||
4626 | data->fn(hist_data, elt, rec, rbe, data, var_ref_vals); | ||
4627 | } | ||
4628 | } | ||
4629 | |||
4630 | static void event_hist_trigger(struct event_trigger_data *data, void *rec, | ||
4631 | struct ring_buffer_event *rbe) | ||
924 | { | 4632 | { |
925 | struct hist_trigger_data *hist_data = data->private_data; | 4633 | struct hist_trigger_data *hist_data = data->private_data; |
926 | bool use_compound_key = (hist_data->n_keys > 1); | 4634 | bool use_compound_key = (hist_data->n_keys > 1); |
927 | unsigned long entries[HIST_STACKTRACE_DEPTH]; | 4635 | unsigned long entries[HIST_STACKTRACE_DEPTH]; |
4636 | u64 var_ref_vals[TRACING_MAP_VARS_MAX]; | ||
928 | char compound_key[HIST_KEY_SIZE_MAX]; | 4637 | char compound_key[HIST_KEY_SIZE_MAX]; |
4638 | struct tracing_map_elt *elt = NULL; | ||
929 | struct stack_trace stacktrace; | 4639 | struct stack_trace stacktrace; |
930 | struct hist_field *key_field; | 4640 | struct hist_field *key_field; |
931 | struct tracing_map_elt *elt; | ||
932 | u64 field_contents; | 4641 | u64 field_contents; |
933 | void *key = NULL; | 4642 | void *key = NULL; |
934 | unsigned int i; | 4643 | unsigned int i; |
@@ -949,7 +4658,7 @@ static void event_hist_trigger(struct event_trigger_data *data, void *rec) | |||
949 | 4658 | ||
950 | key = entries; | 4659 | key = entries; |
951 | } else { | 4660 | } else { |
952 | field_contents = key_field->fn(key_field, rec); | 4661 | field_contents = key_field->fn(key_field, elt, rbe, rec); |
953 | if (key_field->flags & HIST_FIELD_FL_STRING) { | 4662 | if (key_field->flags & HIST_FIELD_FL_STRING) { |
954 | key = (void *)(unsigned long)field_contents; | 4663 | key = (void *)(unsigned long)field_contents; |
955 | use_compound_key = true; | 4664 | use_compound_key = true; |
@@ -964,9 +4673,18 @@ static void event_hist_trigger(struct event_trigger_data *data, void *rec) | |||
964 | if (use_compound_key) | 4673 | if (use_compound_key) |
965 | key = compound_key; | 4674 | key = compound_key; |
966 | 4675 | ||
4676 | if (hist_data->n_var_refs && | ||
4677 | !resolve_var_refs(hist_data, key, var_ref_vals, false)) | ||
4678 | return; | ||
4679 | |||
967 | elt = tracing_map_insert(hist_data->map, key); | 4680 | elt = tracing_map_insert(hist_data->map, key); |
968 | if (elt) | 4681 | if (!elt) |
969 | hist_trigger_elt_update(hist_data, elt, rec); | 4682 | return; |
4683 | |||
4684 | hist_trigger_elt_update(hist_data, elt, rec, rbe, var_ref_vals); | ||
4685 | |||
4686 | if (resolve_var_refs(hist_data, key, var_ref_vals, true)) | ||
4687 | hist_trigger_actions(hist_data, elt, rec, rbe, var_ref_vals); | ||
970 | } | 4688 | } |
971 | 4689 | ||
972 | static void hist_trigger_stacktrace_print(struct seq_file *m, | 4690 | static void hist_trigger_stacktrace_print(struct seq_file *m, |
@@ -1023,7 +4741,13 @@ hist_trigger_entry_print(struct seq_file *m, | |||
1023 | seq_printf(m, "%s: [%llx] %-55s", field_name, | 4741 | seq_printf(m, "%s: [%llx] %-55s", field_name, |
1024 | uval, str); | 4742 | uval, str); |
1025 | } else if (key_field->flags & HIST_FIELD_FL_EXECNAME) { | 4743 | } else if (key_field->flags & HIST_FIELD_FL_EXECNAME) { |
1026 | char *comm = elt->private_data; | 4744 | struct hist_elt_data *elt_data = elt->private_data; |
4745 | char *comm; | ||
4746 | |||
4747 | if (WARN_ON_ONCE(!elt_data)) | ||
4748 | return; | ||
4749 | |||
4750 | comm = elt_data->comm; | ||
1027 | 4751 | ||
1028 | uval = *(u64 *)(key + key_field->offset); | 4752 | uval = *(u64 *)(key + key_field->offset); |
1029 | seq_printf(m, "%s: %-16s[%10llu]", field_name, | 4753 | seq_printf(m, "%s: %-16s[%10llu]", field_name, |
@@ -1067,6 +4791,10 @@ hist_trigger_entry_print(struct seq_file *m, | |||
1067 | for (i = 1; i < hist_data->n_vals; i++) { | 4791 | for (i = 1; i < hist_data->n_vals; i++) { |
1068 | field_name = hist_field_name(hist_data->fields[i], 0); | 4792 | field_name = hist_field_name(hist_data->fields[i], 0); |
1069 | 4793 | ||
4794 | if (hist_data->fields[i]->flags & HIST_FIELD_FL_VAR || | ||
4795 | hist_data->fields[i]->flags & HIST_FIELD_FL_EXPR) | ||
4796 | continue; | ||
4797 | |||
1070 | if (hist_data->fields[i]->flags & HIST_FIELD_FL_HEX) { | 4798 | if (hist_data->fields[i]->flags & HIST_FIELD_FL_HEX) { |
1071 | seq_printf(m, " %s: %10llx", field_name, | 4799 | seq_printf(m, " %s: %10llx", field_name, |
1072 | tracing_map_read_sum(elt, i)); | 4800 | tracing_map_read_sum(elt, i)); |
@@ -1076,6 +4804,8 @@ hist_trigger_entry_print(struct seq_file *m, | |||
1076 | } | 4804 | } |
1077 | } | 4805 | } |
1078 | 4806 | ||
4807 | print_actions(m, hist_data, elt); | ||
4808 | |||
1079 | seq_puts(m, "\n"); | 4809 | seq_puts(m, "\n"); |
1080 | } | 4810 | } |
1081 | 4811 | ||
@@ -1144,6 +4874,11 @@ static int hist_show(struct seq_file *m, void *v) | |||
1144 | hist_trigger_show(m, data, n++); | 4874 | hist_trigger_show(m, data, n++); |
1145 | } | 4875 | } |
1146 | 4876 | ||
4877 | if (have_hist_err()) { | ||
4878 | seq_printf(m, "\nERROR: %s\n", hist_err_str); | ||
4879 | seq_printf(m, " Last command: %s\n", last_hist_cmd); | ||
4880 | } | ||
4881 | |||
1147 | out_unlock: | 4882 | out_unlock: |
1148 | mutex_unlock(&event_mutex); | 4883 | mutex_unlock(&event_mutex); |
1149 | 4884 | ||
@@ -1162,37 +4897,22 @@ const struct file_operations event_hist_fops = { | |||
1162 | .release = single_release, | 4897 | .release = single_release, |
1163 | }; | 4898 | }; |
1164 | 4899 | ||
1165 | static const char *get_hist_field_flags(struct hist_field *hist_field) | ||
1166 | { | ||
1167 | const char *flags_str = NULL; | ||
1168 | |||
1169 | if (hist_field->flags & HIST_FIELD_FL_HEX) | ||
1170 | flags_str = "hex"; | ||
1171 | else if (hist_field->flags & HIST_FIELD_FL_SYM) | ||
1172 | flags_str = "sym"; | ||
1173 | else if (hist_field->flags & HIST_FIELD_FL_SYM_OFFSET) | ||
1174 | flags_str = "sym-offset"; | ||
1175 | else if (hist_field->flags & HIST_FIELD_FL_EXECNAME) | ||
1176 | flags_str = "execname"; | ||
1177 | else if (hist_field->flags & HIST_FIELD_FL_SYSCALL) | ||
1178 | flags_str = "syscall"; | ||
1179 | else if (hist_field->flags & HIST_FIELD_FL_LOG2) | ||
1180 | flags_str = "log2"; | ||
1181 | |||
1182 | return flags_str; | ||
1183 | } | ||
1184 | |||
1185 | static void hist_field_print(struct seq_file *m, struct hist_field *hist_field) | 4900 | static void hist_field_print(struct seq_file *m, struct hist_field *hist_field) |
1186 | { | 4901 | { |
1187 | const char *field_name = hist_field_name(hist_field, 0); | 4902 | const char *field_name = hist_field_name(hist_field, 0); |
1188 | 4903 | ||
1189 | seq_printf(m, "%s", field_name); | 4904 | if (hist_field->var.name) |
1190 | if (hist_field->flags) { | 4905 | seq_printf(m, "%s=", hist_field->var.name); |
1191 | const char *flags_str = get_hist_field_flags(hist_field); | 4906 | |
1192 | 4907 | if (hist_field->flags & HIST_FIELD_FL_CPU) | |
1193 | if (flags_str) | 4908 | seq_puts(m, "cpu"); |
1194 | seq_printf(m, ".%s", flags_str); | 4909 | else if (field_name) { |
1195 | } | 4910 | if (hist_field->flags & HIST_FIELD_FL_VAR_REF || |
4911 | hist_field->flags & HIST_FIELD_FL_ALIAS) | ||
4912 | seq_putc(m, '$'); | ||
4913 | seq_printf(m, "%s", field_name); | ||
4914 | } else if (hist_field->flags & HIST_FIELD_FL_TIMESTAMP) | ||
4915 | seq_puts(m, "common_timestamp"); | ||
1196 | } | 4916 | } |
1197 | 4917 | ||
1198 | static int event_hist_trigger_print(struct seq_file *m, | 4918 | static int event_hist_trigger_print(struct seq_file *m, |
@@ -1200,7 +4920,8 @@ static int event_hist_trigger_print(struct seq_file *m, | |||
1200 | struct event_trigger_data *data) | 4920 | struct event_trigger_data *data) |
1201 | { | 4921 | { |
1202 | struct hist_trigger_data *hist_data = data->private_data; | 4922 | struct hist_trigger_data *hist_data = data->private_data; |
1203 | struct hist_field *key_field; | 4923 | struct hist_field *field; |
4924 | bool have_var = false; | ||
1204 | unsigned int i; | 4925 | unsigned int i; |
1205 | 4926 | ||
1206 | seq_puts(m, "hist:"); | 4927 | seq_puts(m, "hist:"); |
@@ -1211,25 +4932,47 @@ static int event_hist_trigger_print(struct seq_file *m, | |||
1211 | seq_puts(m, "keys="); | 4932 | seq_puts(m, "keys="); |
1212 | 4933 | ||
1213 | for_each_hist_key_field(i, hist_data) { | 4934 | for_each_hist_key_field(i, hist_data) { |
1214 | key_field = hist_data->fields[i]; | 4935 | field = hist_data->fields[i]; |
1215 | 4936 | ||
1216 | if (i > hist_data->n_vals) | 4937 | if (i > hist_data->n_vals) |
1217 | seq_puts(m, ","); | 4938 | seq_puts(m, ","); |
1218 | 4939 | ||
1219 | if (key_field->flags & HIST_FIELD_FL_STACKTRACE) | 4940 | if (field->flags & HIST_FIELD_FL_STACKTRACE) |
1220 | seq_puts(m, "stacktrace"); | 4941 | seq_puts(m, "stacktrace"); |
1221 | else | 4942 | else |
1222 | hist_field_print(m, key_field); | 4943 | hist_field_print(m, field); |
1223 | } | 4944 | } |
1224 | 4945 | ||
1225 | seq_puts(m, ":vals="); | 4946 | seq_puts(m, ":vals="); |
1226 | 4947 | ||
1227 | for_each_hist_val_field(i, hist_data) { | 4948 | for_each_hist_val_field(i, hist_data) { |
4949 | field = hist_data->fields[i]; | ||
4950 | if (field->flags & HIST_FIELD_FL_VAR) { | ||
4951 | have_var = true; | ||
4952 | continue; | ||
4953 | } | ||
4954 | |||
1228 | if (i == HITCOUNT_IDX) | 4955 | if (i == HITCOUNT_IDX) |
1229 | seq_puts(m, "hitcount"); | 4956 | seq_puts(m, "hitcount"); |
1230 | else { | 4957 | else { |
1231 | seq_puts(m, ","); | 4958 | seq_puts(m, ","); |
1232 | hist_field_print(m, hist_data->fields[i]); | 4959 | hist_field_print(m, field); |
4960 | } | ||
4961 | } | ||
4962 | |||
4963 | if (have_var) { | ||
4964 | unsigned int n = 0; | ||
4965 | |||
4966 | seq_puts(m, ":"); | ||
4967 | |||
4968 | for_each_hist_val_field(i, hist_data) { | ||
4969 | field = hist_data->fields[i]; | ||
4970 | |||
4971 | if (field->flags & HIST_FIELD_FL_VAR) { | ||
4972 | if (n++) | ||
4973 | seq_puts(m, ","); | ||
4974 | hist_field_print(m, field); | ||
4975 | } | ||
1233 | } | 4976 | } |
1234 | } | 4977 | } |
1235 | 4978 | ||
@@ -1237,28 +4980,36 @@ static int event_hist_trigger_print(struct seq_file *m, | |||
1237 | 4980 | ||
1238 | for (i = 0; i < hist_data->n_sort_keys; i++) { | 4981 | for (i = 0; i < hist_data->n_sort_keys; i++) { |
1239 | struct tracing_map_sort_key *sort_key; | 4982 | struct tracing_map_sort_key *sort_key; |
4983 | unsigned int idx, first_key_idx; | ||
4984 | |||
4985 | /* skip VAR vals */ | ||
4986 | first_key_idx = hist_data->n_vals - hist_data->n_vars; | ||
1240 | 4987 | ||
1241 | sort_key = &hist_data->sort_keys[i]; | 4988 | sort_key = &hist_data->sort_keys[i]; |
4989 | idx = sort_key->field_idx; | ||
4990 | |||
4991 | if (WARN_ON(idx >= HIST_FIELDS_MAX)) | ||
4992 | return -EINVAL; | ||
1242 | 4993 | ||
1243 | if (i > 0) | 4994 | if (i > 0) |
1244 | seq_puts(m, ","); | 4995 | seq_puts(m, ","); |
1245 | 4996 | ||
1246 | if (sort_key->field_idx == HITCOUNT_IDX) | 4997 | if (idx == HITCOUNT_IDX) |
1247 | seq_puts(m, "hitcount"); | 4998 | seq_puts(m, "hitcount"); |
1248 | else { | 4999 | else { |
1249 | unsigned int idx = sort_key->field_idx; | 5000 | if (idx >= first_key_idx) |
1250 | 5001 | idx += hist_data->n_vars; | |
1251 | if (WARN_ON(idx >= TRACING_MAP_FIELDS_MAX)) | ||
1252 | return -EINVAL; | ||
1253 | |||
1254 | hist_field_print(m, hist_data->fields[idx]); | 5002 | hist_field_print(m, hist_data->fields[idx]); |
1255 | } | 5003 | } |
1256 | 5004 | ||
1257 | if (sort_key->descending) | 5005 | if (sort_key->descending) |
1258 | seq_puts(m, ".descending"); | 5006 | seq_puts(m, ".descending"); |
1259 | } | 5007 | } |
1260 | |||
1261 | seq_printf(m, ":size=%u", (1 << hist_data->map->map_bits)); | 5008 | seq_printf(m, ":size=%u", (1 << hist_data->map->map_bits)); |
5009 | if (hist_data->enable_timestamps) | ||
5010 | seq_printf(m, ":clock=%s", hist_data->attrs->clock); | ||
5011 | |||
5012 | print_actions_spec(m, hist_data); | ||
1262 | 5013 | ||
1263 | if (data->filter_str) | 5014 | if (data->filter_str) |
1264 | seq_printf(m, " if %s", data->filter_str); | 5015 | seq_printf(m, " if %s", data->filter_str); |
@@ -1286,6 +5037,21 @@ static int event_hist_trigger_init(struct event_trigger_ops *ops, | |||
1286 | return 0; | 5037 | return 0; |
1287 | } | 5038 | } |
1288 | 5039 | ||
5040 | static void unregister_field_var_hists(struct hist_trigger_data *hist_data) | ||
5041 | { | ||
5042 | struct trace_event_file *file; | ||
5043 | unsigned int i; | ||
5044 | char *cmd; | ||
5045 | int ret; | ||
5046 | |||
5047 | for (i = 0; i < hist_data->n_field_var_hists; i++) { | ||
5048 | file = hist_data->field_var_hists[i]->hist_data->event_file; | ||
5049 | cmd = hist_data->field_var_hists[i]->cmd; | ||
5050 | ret = event_hist_trigger_func(&trigger_hist_cmd, file, | ||
5051 | "!hist", "hist", cmd); | ||
5052 | } | ||
5053 | } | ||
5054 | |||
1289 | static void event_hist_trigger_free(struct event_trigger_ops *ops, | 5055 | static void event_hist_trigger_free(struct event_trigger_ops *ops, |
1290 | struct event_trigger_data *data) | 5056 | struct event_trigger_data *data) |
1291 | { | 5057 | { |
@@ -1298,7 +5064,13 @@ static void event_hist_trigger_free(struct event_trigger_ops *ops, | |||
1298 | if (!data->ref) { | 5064 | if (!data->ref) { |
1299 | if (data->name) | 5065 | if (data->name) |
1300 | del_named_trigger(data); | 5066 | del_named_trigger(data); |
5067 | |||
1301 | trigger_data_free(data); | 5068 | trigger_data_free(data); |
5069 | |||
5070 | remove_hist_vars(hist_data); | ||
5071 | |||
5072 | unregister_field_var_hists(hist_data); | ||
5073 | |||
1302 | destroy_hist_data(hist_data); | 5074 | destroy_hist_data(hist_data); |
1303 | } | 5075 | } |
1304 | } | 5076 | } |
@@ -1425,6 +5197,15 @@ static bool hist_trigger_match(struct event_trigger_data *data, | |||
1425 | return false; | 5197 | return false; |
1426 | if (key_field->offset != key_field_test->offset) | 5198 | if (key_field->offset != key_field_test->offset) |
1427 | return false; | 5199 | return false; |
5200 | if (key_field->size != key_field_test->size) | ||
5201 | return false; | ||
5202 | if (key_field->is_signed != key_field_test->is_signed) | ||
5203 | return false; | ||
5204 | if (!!key_field->var.name != !!key_field_test->var.name) | ||
5205 | return false; | ||
5206 | if (key_field->var.name && | ||
5207 | strcmp(key_field->var.name, key_field_test->var.name) != 0) | ||
5208 | return false; | ||
1428 | } | 5209 | } |
1429 | 5210 | ||
1430 | for (i = 0; i < hist_data->n_sort_keys; i++) { | 5211 | for (i = 0; i < hist_data->n_sort_keys; i++) { |
@@ -1440,6 +5221,9 @@ static bool hist_trigger_match(struct event_trigger_data *data, | |||
1440 | (strcmp(data->filter_str, data_test->filter_str) != 0)) | 5221 | (strcmp(data->filter_str, data_test->filter_str) != 0)) |
1441 | return false; | 5222 | return false; |
1442 | 5223 | ||
5224 | if (!actions_match(hist_data, hist_data_test)) | ||
5225 | return false; | ||
5226 | |||
1443 | return true; | 5227 | return true; |
1444 | } | 5228 | } |
1445 | 5229 | ||
@@ -1456,6 +5240,7 @@ static int hist_register_trigger(char *glob, struct event_trigger_ops *ops, | |||
1456 | if (named_data) { | 5240 | if (named_data) { |
1457 | if (!hist_trigger_match(data, named_data, named_data, | 5241 | if (!hist_trigger_match(data, named_data, named_data, |
1458 | true)) { | 5242 | true)) { |
5243 | hist_err("Named hist trigger doesn't match existing named trigger (includes variables): ", hist_data->attrs->name); | ||
1459 | ret = -EINVAL; | 5244 | ret = -EINVAL; |
1460 | goto out; | 5245 | goto out; |
1461 | } | 5246 | } |
@@ -1475,13 +5260,16 @@ static int hist_register_trigger(char *glob, struct event_trigger_ops *ops, | |||
1475 | test->paused = false; | 5260 | test->paused = false; |
1476 | else if (hist_data->attrs->clear) | 5261 | else if (hist_data->attrs->clear) |
1477 | hist_clear(test); | 5262 | hist_clear(test); |
1478 | else | 5263 | else { |
5264 | hist_err("Hist trigger already exists", NULL); | ||
1479 | ret = -EEXIST; | 5265 | ret = -EEXIST; |
5266 | } | ||
1480 | goto out; | 5267 | goto out; |
1481 | } | 5268 | } |
1482 | } | 5269 | } |
1483 | new: | 5270 | new: |
1484 | if (hist_data->attrs->cont || hist_data->attrs->clear) { | 5271 | if (hist_data->attrs->cont || hist_data->attrs->clear) { |
5272 | hist_err("Can't clear or continue a nonexistent hist trigger", NULL); | ||
1485 | ret = -ENOENT; | 5273 | ret = -ENOENT; |
1486 | goto out; | 5274 | goto out; |
1487 | } | 5275 | } |
@@ -1490,7 +5278,6 @@ static int hist_register_trigger(char *glob, struct event_trigger_ops *ops, | |||
1490 | data->paused = true; | 5278 | data->paused = true; |
1491 | 5279 | ||
1492 | if (named_data) { | 5280 | if (named_data) { |
1493 | destroy_hist_data(data->private_data); | ||
1494 | data->private_data = named_data->private_data; | 5281 | data->private_data = named_data->private_data; |
1495 | set_named_trigger_data(data, named_data); | 5282 | set_named_trigger_data(data, named_data); |
1496 | data->ops = &event_hist_trigger_named_ops; | 5283 | data->ops = &event_hist_trigger_named_ops; |
@@ -1502,8 +5289,32 @@ static int hist_register_trigger(char *glob, struct event_trigger_ops *ops, | |||
1502 | goto out; | 5289 | goto out; |
1503 | } | 5290 | } |
1504 | 5291 | ||
1505 | list_add_rcu(&data->list, &file->triggers); | 5292 | if (hist_data->enable_timestamps) { |
5293 | char *clock = hist_data->attrs->clock; | ||
5294 | |||
5295 | ret = tracing_set_clock(file->tr, hist_data->attrs->clock); | ||
5296 | if (ret) { | ||
5297 | hist_err("Couldn't set trace_clock: ", clock); | ||
5298 | goto out; | ||
5299 | } | ||
5300 | |||
5301 | tracing_set_time_stamp_abs(file->tr, true); | ||
5302 | } | ||
5303 | |||
5304 | if (named_data) | ||
5305 | destroy_hist_data(hist_data); | ||
5306 | |||
1506 | ret++; | 5307 | ret++; |
5308 | out: | ||
5309 | return ret; | ||
5310 | } | ||
5311 | |||
5312 | static int hist_trigger_enable(struct event_trigger_data *data, | ||
5313 | struct trace_event_file *file) | ||
5314 | { | ||
5315 | int ret = 0; | ||
5316 | |||
5317 | list_add_tail_rcu(&data->list, &file->triggers); | ||
1507 | 5318 | ||
1508 | update_cond_flag(file); | 5319 | update_cond_flag(file); |
1509 | 5320 | ||
@@ -1512,10 +5323,55 @@ static int hist_register_trigger(char *glob, struct event_trigger_ops *ops, | |||
1512 | update_cond_flag(file); | 5323 | update_cond_flag(file); |
1513 | ret--; | 5324 | ret--; |
1514 | } | 5325 | } |
1515 | out: | 5326 | |
1516 | return ret; | 5327 | return ret; |
1517 | } | 5328 | } |
1518 | 5329 | ||
5330 | static bool have_hist_trigger_match(struct event_trigger_data *data, | ||
5331 | struct trace_event_file *file) | ||
5332 | { | ||
5333 | struct hist_trigger_data *hist_data = data->private_data; | ||
5334 | struct event_trigger_data *test, *named_data = NULL; | ||
5335 | bool match = false; | ||
5336 | |||
5337 | if (hist_data->attrs->name) | ||
5338 | named_data = find_named_trigger(hist_data->attrs->name); | ||
5339 | |||
5340 | list_for_each_entry_rcu(test, &file->triggers, list) { | ||
5341 | if (test->cmd_ops->trigger_type == ETT_EVENT_HIST) { | ||
5342 | if (hist_trigger_match(data, test, named_data, false)) { | ||
5343 | match = true; | ||
5344 | break; | ||
5345 | } | ||
5346 | } | ||
5347 | } | ||
5348 | |||
5349 | return match; | ||
5350 | } | ||
5351 | |||
5352 | static bool hist_trigger_check_refs(struct event_trigger_data *data, | ||
5353 | struct trace_event_file *file) | ||
5354 | { | ||
5355 | struct hist_trigger_data *hist_data = data->private_data; | ||
5356 | struct event_trigger_data *test, *named_data = NULL; | ||
5357 | |||
5358 | if (hist_data->attrs->name) | ||
5359 | named_data = find_named_trigger(hist_data->attrs->name); | ||
5360 | |||
5361 | list_for_each_entry_rcu(test, &file->triggers, list) { | ||
5362 | if (test->cmd_ops->trigger_type == ETT_EVENT_HIST) { | ||
5363 | if (!hist_trigger_match(data, test, named_data, false)) | ||
5364 | continue; | ||
5365 | hist_data = test->private_data; | ||
5366 | if (check_var_refs(hist_data)) | ||
5367 | return true; | ||
5368 | break; | ||
5369 | } | ||
5370 | } | ||
5371 | |||
5372 | return false; | ||
5373 | } | ||
5374 | |||
1519 | static void hist_unregister_trigger(char *glob, struct event_trigger_ops *ops, | 5375 | static void hist_unregister_trigger(char *glob, struct event_trigger_ops *ops, |
1520 | struct event_trigger_data *data, | 5376 | struct event_trigger_data *data, |
1521 | struct trace_event_file *file) | 5377 | struct trace_event_file *file) |
@@ -1541,17 +5397,55 @@ static void hist_unregister_trigger(char *glob, struct event_trigger_ops *ops, | |||
1541 | 5397 | ||
1542 | if (unregistered && test->ops->free) | 5398 | if (unregistered && test->ops->free) |
1543 | test->ops->free(test->ops, test); | 5399 | test->ops->free(test->ops, test); |
5400 | |||
5401 | if (hist_data->enable_timestamps) { | ||
5402 | if (!hist_data->remove || unregistered) | ||
5403 | tracing_set_time_stamp_abs(file->tr, false); | ||
5404 | } | ||
5405 | } | ||
5406 | |||
5407 | static bool hist_file_check_refs(struct trace_event_file *file) | ||
5408 | { | ||
5409 | struct hist_trigger_data *hist_data; | ||
5410 | struct event_trigger_data *test; | ||
5411 | |||
5412 | list_for_each_entry_rcu(test, &file->triggers, list) { | ||
5413 | if (test->cmd_ops->trigger_type == ETT_EVENT_HIST) { | ||
5414 | hist_data = test->private_data; | ||
5415 | if (check_var_refs(hist_data)) | ||
5416 | return true; | ||
5417 | } | ||
5418 | } | ||
5419 | |||
5420 | return false; | ||
1544 | } | 5421 | } |
1545 | 5422 | ||
1546 | static void hist_unreg_all(struct trace_event_file *file) | 5423 | static void hist_unreg_all(struct trace_event_file *file) |
1547 | { | 5424 | { |
1548 | struct event_trigger_data *test, *n; | 5425 | struct event_trigger_data *test, *n; |
5426 | struct hist_trigger_data *hist_data; | ||
5427 | struct synth_event *se; | ||
5428 | const char *se_name; | ||
5429 | |||
5430 | if (hist_file_check_refs(file)) | ||
5431 | return; | ||
1549 | 5432 | ||
1550 | list_for_each_entry_safe(test, n, &file->triggers, list) { | 5433 | list_for_each_entry_safe(test, n, &file->triggers, list) { |
1551 | if (test->cmd_ops->trigger_type == ETT_EVENT_HIST) { | 5434 | if (test->cmd_ops->trigger_type == ETT_EVENT_HIST) { |
5435 | hist_data = test->private_data; | ||
1552 | list_del_rcu(&test->list); | 5436 | list_del_rcu(&test->list); |
1553 | trace_event_trigger_enable_disable(file, 0); | 5437 | trace_event_trigger_enable_disable(file, 0); |
5438 | |||
5439 | mutex_lock(&synth_event_mutex); | ||
5440 | se_name = trace_event_name(file->event_call); | ||
5441 | se = find_synth_event(se_name); | ||
5442 | if (se) | ||
5443 | se->ref--; | ||
5444 | mutex_unlock(&synth_event_mutex); | ||
5445 | |||
1554 | update_cond_flag(file); | 5446 | update_cond_flag(file); |
5447 | if (hist_data->enable_timestamps) | ||
5448 | tracing_set_time_stamp_abs(file->tr, false); | ||
1555 | if (test->ops->free) | 5449 | if (test->ops->free) |
1556 | test->ops->free(test->ops, test); | 5450 | test->ops->free(test->ops, test); |
1557 | } | 5451 | } |
@@ -1567,16 +5461,54 @@ static int event_hist_trigger_func(struct event_command *cmd_ops, | |||
1567 | struct hist_trigger_attrs *attrs; | 5461 | struct hist_trigger_attrs *attrs; |
1568 | struct event_trigger_ops *trigger_ops; | 5462 | struct event_trigger_ops *trigger_ops; |
1569 | struct hist_trigger_data *hist_data; | 5463 | struct hist_trigger_data *hist_data; |
1570 | char *trigger; | 5464 | struct synth_event *se; |
5465 | const char *se_name; | ||
5466 | bool remove = false; | ||
5467 | char *trigger, *p; | ||
1571 | int ret = 0; | 5468 | int ret = 0; |
1572 | 5469 | ||
5470 | if (glob && strlen(glob)) { | ||
5471 | last_cmd_set(param); | ||
5472 | hist_err_clear(); | ||
5473 | } | ||
5474 | |||
1573 | if (!param) | 5475 | if (!param) |
1574 | return -EINVAL; | 5476 | return -EINVAL; |
1575 | 5477 | ||
1576 | /* separate the trigger from the filter (k:v [if filter]) */ | 5478 | if (glob[0] == '!') |
1577 | trigger = strsep(¶m, " \t"); | 5479 | remove = true; |
1578 | if (!trigger) | 5480 | |
1579 | return -EINVAL; | 5481 | /* |
5482 | * separate the trigger from the filter (k:v [if filter]) | ||
5483 | * allowing for whitespace in the trigger | ||
5484 | */ | ||
5485 | p = trigger = param; | ||
5486 | do { | ||
5487 | p = strstr(p, "if"); | ||
5488 | if (!p) | ||
5489 | break; | ||
5490 | if (p == param) | ||
5491 | return -EINVAL; | ||
5492 | if (*(p - 1) != ' ' && *(p - 1) != '\t') { | ||
5493 | p++; | ||
5494 | continue; | ||
5495 | } | ||
5496 | if (p >= param + strlen(param) - strlen("if") - 1) | ||
5497 | return -EINVAL; | ||
5498 | if (*(p + strlen("if")) != ' ' && *(p + strlen("if")) != '\t') { | ||
5499 | p++; | ||
5500 | continue; | ||
5501 | } | ||
5502 | break; | ||
5503 | } while (p); | ||
5504 | |||
5505 | if (!p) | ||
5506 | param = NULL; | ||
5507 | else { | ||
5508 | *(p - 1) = '\0'; | ||
5509 | param = strstrip(p); | ||
5510 | trigger = strstrip(trigger); | ||
5511 | } | ||
1580 | 5512 | ||
1581 | attrs = parse_hist_trigger_attrs(trigger); | 5513 | attrs = parse_hist_trigger_attrs(trigger); |
1582 | if (IS_ERR(attrs)) | 5514 | if (IS_ERR(attrs)) |
@@ -1585,7 +5517,7 @@ static int event_hist_trigger_func(struct event_command *cmd_ops, | |||
1585 | if (attrs->map_bits) | 5517 | if (attrs->map_bits) |
1586 | hist_trigger_bits = attrs->map_bits; | 5518 | hist_trigger_bits = attrs->map_bits; |
1587 | 5519 | ||
1588 | hist_data = create_hist_data(hist_trigger_bits, attrs, file); | 5520 | hist_data = create_hist_data(hist_trigger_bits, attrs, file, remove); |
1589 | if (IS_ERR(hist_data)) { | 5521 | if (IS_ERR(hist_data)) { |
1590 | destroy_hist_trigger_attrs(attrs); | 5522 | destroy_hist_trigger_attrs(attrs); |
1591 | return PTR_ERR(hist_data); | 5523 | return PTR_ERR(hist_data); |
@@ -1593,10 +5525,11 @@ static int event_hist_trigger_func(struct event_command *cmd_ops, | |||
1593 | 5525 | ||
1594 | trigger_ops = cmd_ops->get_trigger_ops(cmd, trigger); | 5526 | trigger_ops = cmd_ops->get_trigger_ops(cmd, trigger); |
1595 | 5527 | ||
1596 | ret = -ENOMEM; | ||
1597 | trigger_data = kzalloc(sizeof(*trigger_data), GFP_KERNEL); | 5528 | trigger_data = kzalloc(sizeof(*trigger_data), GFP_KERNEL); |
1598 | if (!trigger_data) | 5529 | if (!trigger_data) { |
5530 | ret = -ENOMEM; | ||
1599 | goto out_free; | 5531 | goto out_free; |
5532 | } | ||
1600 | 5533 | ||
1601 | trigger_data->count = -1; | 5534 | trigger_data->count = -1; |
1602 | trigger_data->ops = trigger_ops; | 5535 | trigger_data->ops = trigger_ops; |
@@ -1614,8 +5547,24 @@ static int event_hist_trigger_func(struct event_command *cmd_ops, | |||
1614 | goto out_free; | 5547 | goto out_free; |
1615 | } | 5548 | } |
1616 | 5549 | ||
1617 | if (glob[0] == '!') { | 5550 | if (remove) { |
5551 | if (!have_hist_trigger_match(trigger_data, file)) | ||
5552 | goto out_free; | ||
5553 | |||
5554 | if (hist_trigger_check_refs(trigger_data, file)) { | ||
5555 | ret = -EBUSY; | ||
5556 | goto out_free; | ||
5557 | } | ||
5558 | |||
1618 | cmd_ops->unreg(glob+1, trigger_ops, trigger_data, file); | 5559 | cmd_ops->unreg(glob+1, trigger_ops, trigger_data, file); |
5560 | |||
5561 | mutex_lock(&synth_event_mutex); | ||
5562 | se_name = trace_event_name(file->event_call); | ||
5563 | se = find_synth_event(se_name); | ||
5564 | if (se) | ||
5565 | se->ref--; | ||
5566 | mutex_unlock(&synth_event_mutex); | ||
5567 | |||
1619 | ret = 0; | 5568 | ret = 0; |
1620 | goto out_free; | 5569 | goto out_free; |
1621 | } | 5570 | } |
@@ -1632,14 +5581,47 @@ static int event_hist_trigger_func(struct event_command *cmd_ops, | |||
1632 | goto out_free; | 5581 | goto out_free; |
1633 | } else if (ret < 0) | 5582 | } else if (ret < 0) |
1634 | goto out_free; | 5583 | goto out_free; |
5584 | |||
5585 | if (get_named_trigger_data(trigger_data)) | ||
5586 | goto enable; | ||
5587 | |||
5588 | if (has_hist_vars(hist_data)) | ||
5589 | save_hist_vars(hist_data); | ||
5590 | |||
5591 | ret = create_actions(hist_data, file); | ||
5592 | if (ret) | ||
5593 | goto out_unreg; | ||
5594 | |||
5595 | ret = tracing_map_init(hist_data->map); | ||
5596 | if (ret) | ||
5597 | goto out_unreg; | ||
5598 | enable: | ||
5599 | ret = hist_trigger_enable(trigger_data, file); | ||
5600 | if (ret) | ||
5601 | goto out_unreg; | ||
5602 | |||
5603 | mutex_lock(&synth_event_mutex); | ||
5604 | se_name = trace_event_name(file->event_call); | ||
5605 | se = find_synth_event(se_name); | ||
5606 | if (se) | ||
5607 | se->ref++; | ||
5608 | mutex_unlock(&synth_event_mutex); | ||
5609 | |||
1635 | /* Just return zero, not the number of registered triggers */ | 5610 | /* Just return zero, not the number of registered triggers */ |
1636 | ret = 0; | 5611 | ret = 0; |
1637 | out: | 5612 | out: |
5613 | if (ret == 0) | ||
5614 | hist_err_clear(); | ||
5615 | |||
1638 | return ret; | 5616 | return ret; |
5617 | out_unreg: | ||
5618 | cmd_ops->unreg(glob+1, trigger_ops, trigger_data, file); | ||
1639 | out_free: | 5619 | out_free: |
1640 | if (cmd_ops->set_filter) | 5620 | if (cmd_ops->set_filter) |
1641 | cmd_ops->set_filter(NULL, trigger_data, NULL); | 5621 | cmd_ops->set_filter(NULL, trigger_data, NULL); |
1642 | 5622 | ||
5623 | remove_hist_vars(hist_data); | ||
5624 | |||
1643 | kfree(trigger_data); | 5625 | kfree(trigger_data); |
1644 | 5626 | ||
1645 | destroy_hist_data(hist_data); | 5627 | destroy_hist_data(hist_data); |
@@ -1669,7 +5651,8 @@ __init int register_trigger_hist_cmd(void) | |||
1669 | } | 5651 | } |
1670 | 5652 | ||
1671 | static void | 5653 | static void |
1672 | hist_enable_trigger(struct event_trigger_data *data, void *rec) | 5654 | hist_enable_trigger(struct event_trigger_data *data, void *rec, |
5655 | struct ring_buffer_event *event) | ||
1673 | { | 5656 | { |
1674 | struct enable_trigger_data *enable_data = data->private_data; | 5657 | struct enable_trigger_data *enable_data = data->private_data; |
1675 | struct event_trigger_data *test; | 5658 | struct event_trigger_data *test; |
@@ -1685,7 +5668,8 @@ hist_enable_trigger(struct event_trigger_data *data, void *rec) | |||
1685 | } | 5668 | } |
1686 | 5669 | ||
1687 | static void | 5670 | static void |
1688 | hist_enable_count_trigger(struct event_trigger_data *data, void *rec) | 5671 | hist_enable_count_trigger(struct event_trigger_data *data, void *rec, |
5672 | struct ring_buffer_event *event) | ||
1689 | { | 5673 | { |
1690 | if (!data->count) | 5674 | if (!data->count) |
1691 | return; | 5675 | return; |
@@ -1693,7 +5677,7 @@ hist_enable_count_trigger(struct event_trigger_data *data, void *rec) | |||
1693 | if (data->count != -1) | 5677 | if (data->count != -1) |
1694 | (data->count)--; | 5678 | (data->count)--; |
1695 | 5679 | ||
1696 | hist_enable_trigger(data, rec); | 5680 | hist_enable_trigger(data, rec, event); |
1697 | } | 5681 | } |
1698 | 5682 | ||
1699 | static struct event_trigger_ops hist_enable_trigger_ops = { | 5683 | static struct event_trigger_ops hist_enable_trigger_ops = { |
@@ -1798,3 +5782,31 @@ __init int register_trigger_hist_enable_disable_cmds(void) | |||
1798 | 5782 | ||
1799 | return ret; | 5783 | return ret; |
1800 | } | 5784 | } |
5785 | |||
5786 | static __init int trace_events_hist_init(void) | ||
5787 | { | ||
5788 | struct dentry *entry = NULL; | ||
5789 | struct dentry *d_tracer; | ||
5790 | int err = 0; | ||
5791 | |||
5792 | d_tracer = tracing_init_dentry(); | ||
5793 | if (IS_ERR(d_tracer)) { | ||
5794 | err = PTR_ERR(d_tracer); | ||
5795 | goto err; | ||
5796 | } | ||
5797 | |||
5798 | entry = tracefs_create_file("synthetic_events", 0644, d_tracer, | ||
5799 | NULL, &synth_events_fops); | ||
5800 | if (!entry) { | ||
5801 | err = -ENODEV; | ||
5802 | goto err; | ||
5803 | } | ||
5804 | |||
5805 | return err; | ||
5806 | err: | ||
5807 | pr_warn("Could not create tracefs 'synthetic_events' entry\n"); | ||
5808 | |||
5809 | return err; | ||
5810 | } | ||
5811 | |||
5812 | fs_initcall(trace_events_hist_init); | ||
diff --git a/kernel/trace/trace_events_trigger.c b/kernel/trace/trace_events_trigger.c index 87411482a46f..d251cabcf69a 100644 --- a/kernel/trace/trace_events_trigger.c +++ b/kernel/trace/trace_events_trigger.c | |||
@@ -63,7 +63,8 @@ void trigger_data_free(struct event_trigger_data *data) | |||
63 | * any trigger that should be deferred, ETT_NONE if nothing to defer. | 63 | * any trigger that should be deferred, ETT_NONE if nothing to defer. |
64 | */ | 64 | */ |
65 | enum event_trigger_type | 65 | enum event_trigger_type |
66 | event_triggers_call(struct trace_event_file *file, void *rec) | 66 | event_triggers_call(struct trace_event_file *file, void *rec, |
67 | struct ring_buffer_event *event) | ||
67 | { | 68 | { |
68 | struct event_trigger_data *data; | 69 | struct event_trigger_data *data; |
69 | enum event_trigger_type tt = ETT_NONE; | 70 | enum event_trigger_type tt = ETT_NONE; |
@@ -76,7 +77,7 @@ event_triggers_call(struct trace_event_file *file, void *rec) | |||
76 | if (data->paused) | 77 | if (data->paused) |
77 | continue; | 78 | continue; |
78 | if (!rec) { | 79 | if (!rec) { |
79 | data->ops->func(data, rec); | 80 | data->ops->func(data, rec, event); |
80 | continue; | 81 | continue; |
81 | } | 82 | } |
82 | filter = rcu_dereference_sched(data->filter); | 83 | filter = rcu_dereference_sched(data->filter); |
@@ -86,7 +87,7 @@ event_triggers_call(struct trace_event_file *file, void *rec) | |||
86 | tt |= data->cmd_ops->trigger_type; | 87 | tt |= data->cmd_ops->trigger_type; |
87 | continue; | 88 | continue; |
88 | } | 89 | } |
89 | data->ops->func(data, rec); | 90 | data->ops->func(data, rec, event); |
90 | } | 91 | } |
91 | return tt; | 92 | return tt; |
92 | } | 93 | } |
@@ -108,7 +109,7 @@ EXPORT_SYMBOL_GPL(event_triggers_call); | |||
108 | void | 109 | void |
109 | event_triggers_post_call(struct trace_event_file *file, | 110 | event_triggers_post_call(struct trace_event_file *file, |
110 | enum event_trigger_type tt, | 111 | enum event_trigger_type tt, |
111 | void *rec) | 112 | void *rec, struct ring_buffer_event *event) |
112 | { | 113 | { |
113 | struct event_trigger_data *data; | 114 | struct event_trigger_data *data; |
114 | 115 | ||
@@ -116,7 +117,7 @@ event_triggers_post_call(struct trace_event_file *file, | |||
116 | if (data->paused) | 117 | if (data->paused) |
117 | continue; | 118 | continue; |
118 | if (data->cmd_ops->trigger_type & tt) | 119 | if (data->cmd_ops->trigger_type & tt) |
119 | data->ops->func(data, rec); | 120 | data->ops->func(data, rec, event); |
120 | } | 121 | } |
121 | } | 122 | } |
122 | EXPORT_SYMBOL_GPL(event_triggers_post_call); | 123 | EXPORT_SYMBOL_GPL(event_triggers_post_call); |
@@ -908,8 +909,15 @@ void set_named_trigger_data(struct event_trigger_data *data, | |||
908 | data->named_data = named_data; | 909 | data->named_data = named_data; |
909 | } | 910 | } |
910 | 911 | ||
912 | struct event_trigger_data * | ||
913 | get_named_trigger_data(struct event_trigger_data *data) | ||
914 | { | ||
915 | return data->named_data; | ||
916 | } | ||
917 | |||
911 | static void | 918 | static void |
912 | traceon_trigger(struct event_trigger_data *data, void *rec) | 919 | traceon_trigger(struct event_trigger_data *data, void *rec, |
920 | struct ring_buffer_event *event) | ||
913 | { | 921 | { |
914 | if (tracing_is_on()) | 922 | if (tracing_is_on()) |
915 | return; | 923 | return; |
@@ -918,7 +926,8 @@ traceon_trigger(struct event_trigger_data *data, void *rec) | |||
918 | } | 926 | } |
919 | 927 | ||
920 | static void | 928 | static void |
921 | traceon_count_trigger(struct event_trigger_data *data, void *rec) | 929 | traceon_count_trigger(struct event_trigger_data *data, void *rec, |
930 | struct ring_buffer_event *event) | ||
922 | { | 931 | { |
923 | if (tracing_is_on()) | 932 | if (tracing_is_on()) |
924 | return; | 933 | return; |
@@ -933,7 +942,8 @@ traceon_count_trigger(struct event_trigger_data *data, void *rec) | |||
933 | } | 942 | } |
934 | 943 | ||
935 | static void | 944 | static void |
936 | traceoff_trigger(struct event_trigger_data *data, void *rec) | 945 | traceoff_trigger(struct event_trigger_data *data, void *rec, |
946 | struct ring_buffer_event *event) | ||
937 | { | 947 | { |
938 | if (!tracing_is_on()) | 948 | if (!tracing_is_on()) |
939 | return; | 949 | return; |
@@ -942,7 +952,8 @@ traceoff_trigger(struct event_trigger_data *data, void *rec) | |||
942 | } | 952 | } |
943 | 953 | ||
944 | static void | 954 | static void |
945 | traceoff_count_trigger(struct event_trigger_data *data, void *rec) | 955 | traceoff_count_trigger(struct event_trigger_data *data, void *rec, |
956 | struct ring_buffer_event *event) | ||
946 | { | 957 | { |
947 | if (!tracing_is_on()) | 958 | if (!tracing_is_on()) |
948 | return; | 959 | return; |
@@ -1039,13 +1050,15 @@ static struct event_command trigger_traceoff_cmd = { | |||
1039 | 1050 | ||
1040 | #ifdef CONFIG_TRACER_SNAPSHOT | 1051 | #ifdef CONFIG_TRACER_SNAPSHOT |
1041 | static void | 1052 | static void |
1042 | snapshot_trigger(struct event_trigger_data *data, void *rec) | 1053 | snapshot_trigger(struct event_trigger_data *data, void *rec, |
1054 | struct ring_buffer_event *event) | ||
1043 | { | 1055 | { |
1044 | tracing_snapshot(); | 1056 | tracing_snapshot(); |
1045 | } | 1057 | } |
1046 | 1058 | ||
1047 | static void | 1059 | static void |
1048 | snapshot_count_trigger(struct event_trigger_data *data, void *rec) | 1060 | snapshot_count_trigger(struct event_trigger_data *data, void *rec, |
1061 | struct ring_buffer_event *event) | ||
1049 | { | 1062 | { |
1050 | if (!data->count) | 1063 | if (!data->count) |
1051 | return; | 1064 | return; |
@@ -1053,7 +1066,7 @@ snapshot_count_trigger(struct event_trigger_data *data, void *rec) | |||
1053 | if (data->count != -1) | 1066 | if (data->count != -1) |
1054 | (data->count)--; | 1067 | (data->count)--; |
1055 | 1068 | ||
1056 | snapshot_trigger(data, rec); | 1069 | snapshot_trigger(data, rec, event); |
1057 | } | 1070 | } |
1058 | 1071 | ||
1059 | static int | 1072 | static int |
@@ -1141,13 +1154,15 @@ static __init int register_trigger_snapshot_cmd(void) { return 0; } | |||
1141 | #endif | 1154 | #endif |
1142 | 1155 | ||
1143 | static void | 1156 | static void |
1144 | stacktrace_trigger(struct event_trigger_data *data, void *rec) | 1157 | stacktrace_trigger(struct event_trigger_data *data, void *rec, |
1158 | struct ring_buffer_event *event) | ||
1145 | { | 1159 | { |
1146 | trace_dump_stack(STACK_SKIP); | 1160 | trace_dump_stack(STACK_SKIP); |
1147 | } | 1161 | } |
1148 | 1162 | ||
1149 | static void | 1163 | static void |
1150 | stacktrace_count_trigger(struct event_trigger_data *data, void *rec) | 1164 | stacktrace_count_trigger(struct event_trigger_data *data, void *rec, |
1165 | struct ring_buffer_event *event) | ||
1151 | { | 1166 | { |
1152 | if (!data->count) | 1167 | if (!data->count) |
1153 | return; | 1168 | return; |
@@ -1155,7 +1170,7 @@ stacktrace_count_trigger(struct event_trigger_data *data, void *rec) | |||
1155 | if (data->count != -1) | 1170 | if (data->count != -1) |
1156 | (data->count)--; | 1171 | (data->count)--; |
1157 | 1172 | ||
1158 | stacktrace_trigger(data, rec); | 1173 | stacktrace_trigger(data, rec, event); |
1159 | } | 1174 | } |
1160 | 1175 | ||
1161 | static int | 1176 | static int |
@@ -1217,7 +1232,8 @@ static __init void unregister_trigger_traceon_traceoff_cmds(void) | |||
1217 | } | 1232 | } |
1218 | 1233 | ||
1219 | static void | 1234 | static void |
1220 | event_enable_trigger(struct event_trigger_data *data, void *rec) | 1235 | event_enable_trigger(struct event_trigger_data *data, void *rec, |
1236 | struct ring_buffer_event *event) | ||
1221 | { | 1237 | { |
1222 | struct enable_trigger_data *enable_data = data->private_data; | 1238 | struct enable_trigger_data *enable_data = data->private_data; |
1223 | 1239 | ||
@@ -1228,7 +1244,8 @@ event_enable_trigger(struct event_trigger_data *data, void *rec) | |||
1228 | } | 1244 | } |
1229 | 1245 | ||
1230 | static void | 1246 | static void |
1231 | event_enable_count_trigger(struct event_trigger_data *data, void *rec) | 1247 | event_enable_count_trigger(struct event_trigger_data *data, void *rec, |
1248 | struct ring_buffer_event *event) | ||
1232 | { | 1249 | { |
1233 | struct enable_trigger_data *enable_data = data->private_data; | 1250 | struct enable_trigger_data *enable_data = data->private_data; |
1234 | 1251 | ||
@@ -1242,7 +1259,7 @@ event_enable_count_trigger(struct event_trigger_data *data, void *rec) | |||
1242 | if (data->count != -1) | 1259 | if (data->count != -1) |
1243 | (data->count)--; | 1260 | (data->count)--; |
1244 | 1261 | ||
1245 | event_enable_trigger(data, rec); | 1262 | event_enable_trigger(data, rec, event); |
1246 | } | 1263 | } |
1247 | 1264 | ||
1248 | int event_enable_trigger_print(struct seq_file *m, | 1265 | int event_enable_trigger_print(struct seq_file *m, |
diff --git a/kernel/trace/tracing_map.c b/kernel/trace/tracing_map.c index 07e75344725b..5cadb1b8b5fe 100644 --- a/kernel/trace/tracing_map.c +++ b/kernel/trace/tracing_map.c | |||
@@ -66,6 +66,73 @@ u64 tracing_map_read_sum(struct tracing_map_elt *elt, unsigned int i) | |||
66 | return (u64)atomic64_read(&elt->fields[i].sum); | 66 | return (u64)atomic64_read(&elt->fields[i].sum); |
67 | } | 67 | } |
68 | 68 | ||
69 | /** | ||
70 | * tracing_map_set_var - Assign a tracing_map_elt's variable field | ||
71 | * @elt: The tracing_map_elt | ||
72 | * @i: The index of the given variable associated with the tracing_map_elt | ||
73 | * @n: The value to assign | ||
74 | * | ||
75 | * Assign n to variable i associated with the specified tracing_map_elt | ||
76 | * instance. The index i is the index returned by the call to | ||
77 | * tracing_map_add_var() when the tracing map was set up. | ||
78 | */ | ||
79 | void tracing_map_set_var(struct tracing_map_elt *elt, unsigned int i, u64 n) | ||
80 | { | ||
81 | atomic64_set(&elt->vars[i], n); | ||
82 | elt->var_set[i] = true; | ||
83 | } | ||
84 | |||
85 | /** | ||
86 | * tracing_map_var_set - Return whether or not a variable has been set | ||
87 | * @elt: The tracing_map_elt | ||
88 | * @i: The index of the given variable associated with the tracing_map_elt | ||
89 | * | ||
90 | * Return true if the variable has been set, false otherwise. The | ||
91 | * index i is the index returned by the call to tracing_map_add_var() | ||
92 | * when the tracing map was set up. | ||
93 | */ | ||
94 | bool tracing_map_var_set(struct tracing_map_elt *elt, unsigned int i) | ||
95 | { | ||
96 | return elt->var_set[i]; | ||
97 | } | ||
98 | |||
99 | /** | ||
100 | * tracing_map_read_var - Return the value of a tracing_map_elt's variable field | ||
101 | * @elt: The tracing_map_elt | ||
102 | * @i: The index of the given variable associated with the tracing_map_elt | ||
103 | * | ||
104 | * Retrieve the value of the variable i associated with the specified | ||
105 | * tracing_map_elt instance. The index i is the index returned by the | ||
106 | * call to tracing_map_add_var() when the tracing map was set | ||
107 | * up. | ||
108 | * | ||
109 | * Return: The variable value associated with field i for elt. | ||
110 | */ | ||
111 | u64 tracing_map_read_var(struct tracing_map_elt *elt, unsigned int i) | ||
112 | { | ||
113 | return (u64)atomic64_read(&elt->vars[i]); | ||
114 | } | ||
115 | |||
116 | /** | ||
117 | * tracing_map_read_var_once - Return and reset a tracing_map_elt's variable field | ||
118 | * @elt: The tracing_map_elt | ||
119 | * @i: The index of the given variable associated with the tracing_map_elt | ||
120 | * | ||
121 | * Retrieve the value of the variable i associated with the specified | ||
122 | * tracing_map_elt instance, and reset the variable to the 'not set' | ||
123 | * state. The index i is the index returned by the call to | ||
124 | * tracing_map_add_var() when the tracing map was set up. The reset | ||
125 | * essentially makes the variable a read-once variable if it's only | ||
126 | * accessed using this function. | ||
127 | * | ||
128 | * Return: The variable value associated with field i for elt. | ||
129 | */ | ||
130 | u64 tracing_map_read_var_once(struct tracing_map_elt *elt, unsigned int i) | ||
131 | { | ||
132 | elt->var_set[i] = false; | ||
133 | return (u64)atomic64_read(&elt->vars[i]); | ||
134 | } | ||
135 | |||
69 | int tracing_map_cmp_string(void *val_a, void *val_b) | 136 | int tracing_map_cmp_string(void *val_a, void *val_b) |
70 | { | 137 | { |
71 | char *a = val_a; | 138 | char *a = val_a; |
@@ -171,6 +238,28 @@ int tracing_map_add_sum_field(struct tracing_map *map) | |||
171 | } | 238 | } |
172 | 239 | ||
173 | /** | 240 | /** |
241 | * tracing_map_add_var - Add a field describing a tracing_map var | ||
242 | * @map: The tracing_map | ||
243 | * | ||
244 | * Add a var to the map and return the index identifying it in the map | ||
245 | * and associated tracing_map_elts. This is the index used for | ||
246 | * instance to update a var for a particular tracing_map_elt using | ||
247 | * tracing_map_update_var() or reading it via tracing_map_read_var(). | ||
248 | * | ||
249 | * Return: The index identifying the var in the map and associated | ||
250 | * tracing_map_elts, or -EINVAL on error. | ||
251 | */ | ||
252 | int tracing_map_add_var(struct tracing_map *map) | ||
253 | { | ||
254 | int ret = -EINVAL; | ||
255 | |||
256 | if (map->n_vars < TRACING_MAP_VARS_MAX) | ||
257 | ret = map->n_vars++; | ||
258 | |||
259 | return ret; | ||
260 | } | ||
261 | |||
262 | /** | ||
174 | * tracing_map_add_key_field - Add a field describing a tracing_map key | 263 | * tracing_map_add_key_field - Add a field describing a tracing_map key |
175 | * @map: The tracing_map | 264 | * @map: The tracing_map |
176 | * @offset: The offset within the key | 265 | * @offset: The offset within the key |
@@ -280,6 +369,11 @@ static void tracing_map_elt_clear(struct tracing_map_elt *elt) | |||
280 | if (elt->fields[i].cmp_fn == tracing_map_cmp_atomic64) | 369 | if (elt->fields[i].cmp_fn == tracing_map_cmp_atomic64) |
281 | atomic64_set(&elt->fields[i].sum, 0); | 370 | atomic64_set(&elt->fields[i].sum, 0); |
282 | 371 | ||
372 | for (i = 0; i < elt->map->n_vars; i++) { | ||
373 | atomic64_set(&elt->vars[i], 0); | ||
374 | elt->var_set[i] = false; | ||
375 | } | ||
376 | |||
283 | if (elt->map->ops && elt->map->ops->elt_clear) | 377 | if (elt->map->ops && elt->map->ops->elt_clear) |
284 | elt->map->ops->elt_clear(elt); | 378 | elt->map->ops->elt_clear(elt); |
285 | } | 379 | } |
@@ -306,6 +400,8 @@ static void tracing_map_elt_free(struct tracing_map_elt *elt) | |||
306 | if (elt->map->ops && elt->map->ops->elt_free) | 400 | if (elt->map->ops && elt->map->ops->elt_free) |
307 | elt->map->ops->elt_free(elt); | 401 | elt->map->ops->elt_free(elt); |
308 | kfree(elt->fields); | 402 | kfree(elt->fields); |
403 | kfree(elt->vars); | ||
404 | kfree(elt->var_set); | ||
309 | kfree(elt->key); | 405 | kfree(elt->key); |
310 | kfree(elt); | 406 | kfree(elt); |
311 | } | 407 | } |
@@ -333,6 +429,18 @@ static struct tracing_map_elt *tracing_map_elt_alloc(struct tracing_map *map) | |||
333 | goto free; | 429 | goto free; |
334 | } | 430 | } |
335 | 431 | ||
432 | elt->vars = kcalloc(map->n_vars, sizeof(*elt->vars), GFP_KERNEL); | ||
433 | if (!elt->vars) { | ||
434 | err = -ENOMEM; | ||
435 | goto free; | ||
436 | } | ||
437 | |||
438 | elt->var_set = kcalloc(map->n_vars, sizeof(*elt->var_set), GFP_KERNEL); | ||
439 | if (!elt->var_set) { | ||
440 | err = -ENOMEM; | ||
441 | goto free; | ||
442 | } | ||
443 | |||
336 | tracing_map_elt_init_fields(elt); | 444 | tracing_map_elt_init_fields(elt); |
337 | 445 | ||
338 | if (map->ops && map->ops->elt_alloc) { | 446 | if (map->ops && map->ops->elt_alloc) { |
@@ -414,7 +522,9 @@ static inline struct tracing_map_elt * | |||
414 | __tracing_map_insert(struct tracing_map *map, void *key, bool lookup_only) | 522 | __tracing_map_insert(struct tracing_map *map, void *key, bool lookup_only) |
415 | { | 523 | { |
416 | u32 idx, key_hash, test_key; | 524 | u32 idx, key_hash, test_key; |
525 | int dup_try = 0; | ||
417 | struct tracing_map_entry *entry; | 526 | struct tracing_map_entry *entry; |
527 | struct tracing_map_elt *val; | ||
418 | 528 | ||
419 | key_hash = jhash(key, map->key_size, 0); | 529 | key_hash = jhash(key, map->key_size, 0); |
420 | if (key_hash == 0) | 530 | if (key_hash == 0) |
@@ -426,11 +536,33 @@ __tracing_map_insert(struct tracing_map *map, void *key, bool lookup_only) | |||
426 | entry = TRACING_MAP_ENTRY(map->map, idx); | 536 | entry = TRACING_MAP_ENTRY(map->map, idx); |
427 | test_key = entry->key; | 537 | test_key = entry->key; |
428 | 538 | ||
429 | if (test_key && test_key == key_hash && entry->val && | 539 | if (test_key && test_key == key_hash) { |
430 | keys_match(key, entry->val->key, map->key_size)) { | 540 | val = READ_ONCE(entry->val); |
431 | if (!lookup_only) | 541 | if (val && |
432 | atomic64_inc(&map->hits); | 542 | keys_match(key, val->key, map->key_size)) { |
433 | return entry->val; | 543 | if (!lookup_only) |
544 | atomic64_inc(&map->hits); | ||
545 | return val; | ||
546 | } else if (unlikely(!val)) { | ||
547 | /* | ||
548 | * The key is present. But, val (pointer to elt | ||
549 | * struct) is still NULL. which means some other | ||
550 | * thread is in the process of inserting an | ||
551 | * element. | ||
552 | * | ||
553 | * On top of that, it's key_hash is same as the | ||
554 | * one being inserted right now. So, it's | ||
555 | * possible that the element has the same | ||
556 | * key as well. | ||
557 | */ | ||
558 | |||
559 | dup_try++; | ||
560 | if (dup_try > map->map_size) { | ||
561 | atomic64_inc(&map->drops); | ||
562 | break; | ||
563 | } | ||
564 | continue; | ||
565 | } | ||
434 | } | 566 | } |
435 | 567 | ||
436 | if (!test_key) { | 568 | if (!test_key) { |
@@ -452,6 +584,13 @@ __tracing_map_insert(struct tracing_map *map, void *key, bool lookup_only) | |||
452 | atomic64_inc(&map->hits); | 584 | atomic64_inc(&map->hits); |
453 | 585 | ||
454 | return entry->val; | 586 | return entry->val; |
587 | } else { | ||
588 | /* | ||
589 | * cmpxchg() failed. Loop around once | ||
590 | * more to check what key was inserted. | ||
591 | */ | ||
592 | dup_try++; | ||
593 | continue; | ||
455 | } | 594 | } |
456 | } | 595 | } |
457 | 596 | ||
@@ -816,67 +955,15 @@ create_sort_entry(void *key, struct tracing_map_elt *elt) | |||
816 | return sort_entry; | 955 | return sort_entry; |
817 | } | 956 | } |
818 | 957 | ||
819 | static struct tracing_map_elt *copy_elt(struct tracing_map_elt *elt) | 958 | static void detect_dups(struct tracing_map_sort_entry **sort_entries, |
820 | { | ||
821 | struct tracing_map_elt *dup_elt; | ||
822 | unsigned int i; | ||
823 | |||
824 | dup_elt = tracing_map_elt_alloc(elt->map); | ||
825 | if (IS_ERR(dup_elt)) | ||
826 | return NULL; | ||
827 | |||
828 | if (elt->map->ops && elt->map->ops->elt_copy) | ||
829 | elt->map->ops->elt_copy(dup_elt, elt); | ||
830 | |||
831 | dup_elt->private_data = elt->private_data; | ||
832 | memcpy(dup_elt->key, elt->key, elt->map->key_size); | ||
833 | |||
834 | for (i = 0; i < elt->map->n_fields; i++) { | ||
835 | atomic64_set(&dup_elt->fields[i].sum, | ||
836 | atomic64_read(&elt->fields[i].sum)); | ||
837 | dup_elt->fields[i].cmp_fn = elt->fields[i].cmp_fn; | ||
838 | } | ||
839 | |||
840 | return dup_elt; | ||
841 | } | ||
842 | |||
843 | static int merge_dup(struct tracing_map_sort_entry **sort_entries, | ||
844 | unsigned int target, unsigned int dup) | ||
845 | { | ||
846 | struct tracing_map_elt *target_elt, *elt; | ||
847 | bool first_dup = (target - dup) == 1; | ||
848 | int i; | ||
849 | |||
850 | if (first_dup) { | ||
851 | elt = sort_entries[target]->elt; | ||
852 | target_elt = copy_elt(elt); | ||
853 | if (!target_elt) | ||
854 | return -ENOMEM; | ||
855 | sort_entries[target]->elt = target_elt; | ||
856 | sort_entries[target]->elt_copied = true; | ||
857 | } else | ||
858 | target_elt = sort_entries[target]->elt; | ||
859 | |||
860 | elt = sort_entries[dup]->elt; | ||
861 | |||
862 | for (i = 0; i < elt->map->n_fields; i++) | ||
863 | atomic64_add(atomic64_read(&elt->fields[i].sum), | ||
864 | &target_elt->fields[i].sum); | ||
865 | |||
866 | sort_entries[dup]->dup = true; | ||
867 | |||
868 | return 0; | ||
869 | } | ||
870 | |||
871 | static int merge_dups(struct tracing_map_sort_entry **sort_entries, | ||
872 | int n_entries, unsigned int key_size) | 959 | int n_entries, unsigned int key_size) |
873 | { | 960 | { |
874 | unsigned int dups = 0, total_dups = 0; | 961 | unsigned int dups = 0, total_dups = 0; |
875 | int err, i, j; | 962 | int i; |
876 | void *key; | 963 | void *key; |
877 | 964 | ||
878 | if (n_entries < 2) | 965 | if (n_entries < 2) |
879 | return total_dups; | 966 | return; |
880 | 967 | ||
881 | sort(sort_entries, n_entries, sizeof(struct tracing_map_sort_entry *), | 968 | sort(sort_entries, n_entries, sizeof(struct tracing_map_sort_entry *), |
882 | (int (*)(const void *, const void *))cmp_entries_dup, NULL); | 969 | (int (*)(const void *, const void *))cmp_entries_dup, NULL); |
@@ -885,30 +972,14 @@ static int merge_dups(struct tracing_map_sort_entry **sort_entries, | |||
885 | for (i = 1; i < n_entries; i++) { | 972 | for (i = 1; i < n_entries; i++) { |
886 | if (!memcmp(sort_entries[i]->key, key, key_size)) { | 973 | if (!memcmp(sort_entries[i]->key, key, key_size)) { |
887 | dups++; total_dups++; | 974 | dups++; total_dups++; |
888 | err = merge_dup(sort_entries, i - dups, i); | ||
889 | if (err) | ||
890 | return err; | ||
891 | continue; | 975 | continue; |
892 | } | 976 | } |
893 | key = sort_entries[i]->key; | 977 | key = sort_entries[i]->key; |
894 | dups = 0; | 978 | dups = 0; |
895 | } | 979 | } |
896 | 980 | ||
897 | if (!total_dups) | 981 | WARN_ONCE(total_dups > 0, |
898 | return total_dups; | 982 | "Duplicates detected: %d\n", total_dups); |
899 | |||
900 | for (i = 0, j = 0; i < n_entries; i++) { | ||
901 | if (!sort_entries[i]->dup) { | ||
902 | sort_entries[j] = sort_entries[i]; | ||
903 | if (j++ != i) | ||
904 | sort_entries[i] = NULL; | ||
905 | } else { | ||
906 | destroy_sort_entry(sort_entries[i]); | ||
907 | sort_entries[i] = NULL; | ||
908 | } | ||
909 | } | ||
910 | |||
911 | return total_dups; | ||
912 | } | 983 | } |
913 | 984 | ||
914 | static bool is_key(struct tracing_map *map, unsigned int field_idx) | 985 | static bool is_key(struct tracing_map *map, unsigned int field_idx) |
@@ -1034,10 +1105,7 @@ int tracing_map_sort_entries(struct tracing_map *map, | |||
1034 | return 1; | 1105 | return 1; |
1035 | } | 1106 | } |
1036 | 1107 | ||
1037 | ret = merge_dups(entries, n_entries, map->key_size); | 1108 | detect_dups(entries, n_entries, map->key_size); |
1038 | if (ret < 0) | ||
1039 | goto free; | ||
1040 | n_entries -= ret; | ||
1041 | 1109 | ||
1042 | if (is_key(map, sort_keys[0].field_idx)) | 1110 | if (is_key(map, sort_keys[0].field_idx)) |
1043 | cmp_entries_fn = cmp_entries_key; | 1111 | cmp_entries_fn = cmp_entries_key; |
diff --git a/kernel/trace/tracing_map.h b/kernel/trace/tracing_map.h index 5b5bbf8ae550..053eb92b2d31 100644 --- a/kernel/trace/tracing_map.h +++ b/kernel/trace/tracing_map.h | |||
@@ -10,6 +10,7 @@ | |||
10 | #define TRACING_MAP_VALS_MAX 3 | 10 | #define TRACING_MAP_VALS_MAX 3 |
11 | #define TRACING_MAP_FIELDS_MAX (TRACING_MAP_KEYS_MAX + \ | 11 | #define TRACING_MAP_FIELDS_MAX (TRACING_MAP_KEYS_MAX + \ |
12 | TRACING_MAP_VALS_MAX) | 12 | TRACING_MAP_VALS_MAX) |
13 | #define TRACING_MAP_VARS_MAX 16 | ||
13 | #define TRACING_MAP_SORT_KEYS_MAX 2 | 14 | #define TRACING_MAP_SORT_KEYS_MAX 2 |
14 | 15 | ||
15 | typedef int (*tracing_map_cmp_fn_t) (void *val_a, void *val_b); | 16 | typedef int (*tracing_map_cmp_fn_t) (void *val_a, void *val_b); |
@@ -137,6 +138,8 @@ struct tracing_map_field { | |||
137 | struct tracing_map_elt { | 138 | struct tracing_map_elt { |
138 | struct tracing_map *map; | 139 | struct tracing_map *map; |
139 | struct tracing_map_field *fields; | 140 | struct tracing_map_field *fields; |
141 | atomic64_t *vars; | ||
142 | bool *var_set; | ||
140 | void *key; | 143 | void *key; |
141 | void *private_data; | 144 | void *private_data; |
142 | }; | 145 | }; |
@@ -192,6 +195,7 @@ struct tracing_map { | |||
192 | int key_idx[TRACING_MAP_KEYS_MAX]; | 195 | int key_idx[TRACING_MAP_KEYS_MAX]; |
193 | unsigned int n_keys; | 196 | unsigned int n_keys; |
194 | struct tracing_map_sort_key sort_key; | 197 | struct tracing_map_sort_key sort_key; |
198 | unsigned int n_vars; | ||
195 | atomic64_t hits; | 199 | atomic64_t hits; |
196 | atomic64_t drops; | 200 | atomic64_t drops; |
197 | }; | 201 | }; |
@@ -215,11 +219,6 @@ struct tracing_map { | |||
215 | * Element allocation occurs before tracing begins, when the | 219 | * Element allocation occurs before tracing begins, when the |
216 | * tracing_map_init() call is made by client code. | 220 | * tracing_map_init() call is made by client code. |
217 | * | 221 | * |
218 | * @elt_copy: At certain points in the lifetime of an element, it may | ||
219 | * need to be copied. The copy should include a copy of the | ||
220 | * client-allocated data, which can be copied into the 'to' | ||
221 | * element from the 'from' element. | ||
222 | * | ||
223 | * @elt_free: When a tracing_map_elt is freed, this function is called | 222 | * @elt_free: When a tracing_map_elt is freed, this function is called |
224 | * and allows client-allocated per-element data to be freed. | 223 | * and allows client-allocated per-element data to be freed. |
225 | * | 224 | * |
@@ -233,8 +232,6 @@ struct tracing_map { | |||
233 | */ | 232 | */ |
234 | struct tracing_map_ops { | 233 | struct tracing_map_ops { |
235 | int (*elt_alloc)(struct tracing_map_elt *elt); | 234 | int (*elt_alloc)(struct tracing_map_elt *elt); |
236 | void (*elt_copy)(struct tracing_map_elt *to, | ||
237 | struct tracing_map_elt *from); | ||
238 | void (*elt_free)(struct tracing_map_elt *elt); | 235 | void (*elt_free)(struct tracing_map_elt *elt); |
239 | void (*elt_clear)(struct tracing_map_elt *elt); | 236 | void (*elt_clear)(struct tracing_map_elt *elt); |
240 | void (*elt_init)(struct tracing_map_elt *elt); | 237 | void (*elt_init)(struct tracing_map_elt *elt); |
@@ -248,6 +245,7 @@ tracing_map_create(unsigned int map_bits, | |||
248 | extern int tracing_map_init(struct tracing_map *map); | 245 | extern int tracing_map_init(struct tracing_map *map); |
249 | 246 | ||
250 | extern int tracing_map_add_sum_field(struct tracing_map *map); | 247 | extern int tracing_map_add_sum_field(struct tracing_map *map); |
248 | extern int tracing_map_add_var(struct tracing_map *map); | ||
251 | extern int tracing_map_add_key_field(struct tracing_map *map, | 249 | extern int tracing_map_add_key_field(struct tracing_map *map, |
252 | unsigned int offset, | 250 | unsigned int offset, |
253 | tracing_map_cmp_fn_t cmp_fn); | 251 | tracing_map_cmp_fn_t cmp_fn); |
@@ -267,7 +265,13 @@ extern int tracing_map_cmp_none(void *val_a, void *val_b); | |||
267 | 265 | ||
268 | extern void tracing_map_update_sum(struct tracing_map_elt *elt, | 266 | extern void tracing_map_update_sum(struct tracing_map_elt *elt, |
269 | unsigned int i, u64 n); | 267 | unsigned int i, u64 n); |
268 | extern void tracing_map_set_var(struct tracing_map_elt *elt, | ||
269 | unsigned int i, u64 n); | ||
270 | extern bool tracing_map_var_set(struct tracing_map_elt *elt, unsigned int i); | ||
270 | extern u64 tracing_map_read_sum(struct tracing_map_elt *elt, unsigned int i); | 271 | extern u64 tracing_map_read_sum(struct tracing_map_elt *elt, unsigned int i); |
272 | extern u64 tracing_map_read_var(struct tracing_map_elt *elt, unsigned int i); | ||
273 | extern u64 tracing_map_read_var_once(struct tracing_map_elt *elt, unsigned int i); | ||
274 | |||
271 | extern void tracing_map_set_field_descr(struct tracing_map *map, | 275 | extern void tracing_map_set_field_descr(struct tracing_map *map, |
272 | unsigned int i, | 276 | unsigned int i, |
273 | unsigned int key_offset, | 277 | unsigned int key_offset, |
diff --git a/lib/vsprintf.c b/lib/vsprintf.c index d7a708f82559..89f8a4a4b770 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c | |||
@@ -2591,6 +2591,8 @@ int vbin_printf(u32 *bin_buf, size_t size, const char *fmt, va_list args) | |||
2591 | case 's': | 2591 | case 's': |
2592 | case 'F': | 2592 | case 'F': |
2593 | case 'f': | 2593 | case 'f': |
2594 | case 'x': | ||
2595 | case 'K': | ||
2594 | save_arg(void *); | 2596 | save_arg(void *); |
2595 | break; | 2597 | break; |
2596 | default: | 2598 | default: |
@@ -2765,6 +2767,8 @@ int bstr_printf(char *buf, size_t size, const char *fmt, const u32 *bin_buf) | |||
2765 | case 's': | 2767 | case 's': |
2766 | case 'F': | 2768 | case 'F': |
2767 | case 'f': | 2769 | case 'f': |
2770 | case 'x': | ||
2771 | case 'K': | ||
2768 | process = true; | 2772 | process = true; |
2769 | break; | 2773 | break; |
2770 | default: | 2774 | default: |
diff --git a/security/security.c b/security/security.c index d2a84cda7e8d..7bc2fde023a7 100644 --- a/security/security.c +++ b/security/security.c | |||
@@ -30,6 +30,8 @@ | |||
30 | #include <linux/string.h> | 30 | #include <linux/string.h> |
31 | #include <net/flow.h> | 31 | #include <net/flow.h> |
32 | 32 | ||
33 | #include <trace/events/initcall.h> | ||
34 | |||
33 | #define MAX_LSM_EVM_XATTR 2 | 35 | #define MAX_LSM_EVM_XATTR 2 |
34 | 36 | ||
35 | /* Maximum number of letters for an LSM name string */ | 37 | /* Maximum number of letters for an LSM name string */ |
@@ -45,10 +47,14 @@ static __initdata char chosen_lsm[SECURITY_NAME_MAX + 1] = | |||
45 | 47 | ||
46 | static void __init do_security_initcalls(void) | 48 | static void __init do_security_initcalls(void) |
47 | { | 49 | { |
50 | int ret; | ||
48 | initcall_t *call; | 51 | initcall_t *call; |
49 | call = __security_initcall_start; | 52 | call = __security_initcall_start; |
53 | trace_initcall_level("security"); | ||
50 | while (call < __security_initcall_end) { | 54 | while (call < __security_initcall_end) { |
51 | (*call) (); | 55 | trace_initcall_start((*call)); |
56 | ret = (*call) (); | ||
57 | trace_initcall_finish((*call), ret); | ||
52 | call++; | 58 | call++; |
53 | } | 59 | } |
54 | } | 60 | } |
diff --git a/tools/testing/selftests/ftrace/test.d/functions b/tools/testing/selftests/ftrace/test.d/functions index df3dd7fe5f9b..2a4f16fc9819 100644 --- a/tools/testing/selftests/ftrace/test.d/functions +++ b/tools/testing/selftests/ftrace/test.d/functions | |||
@@ -59,6 +59,13 @@ disable_events() { | |||
59 | echo 0 > events/enable | 59 | echo 0 > events/enable |
60 | } | 60 | } |
61 | 61 | ||
62 | clear_synthetic_events() { # reset all current synthetic events | ||
63 | grep -v ^# synthetic_events | | ||
64 | while read line; do | ||
65 | echo "!$line" >> synthetic_events | ||
66 | done | ||
67 | } | ||
68 | |||
62 | initialize_ftrace() { # Reset ftrace to initial-state | 69 | initialize_ftrace() { # Reset ftrace to initial-state |
63 | # As the initial state, ftrace will be set to nop tracer, | 70 | # As the initial state, ftrace will be set to nop tracer, |
64 | # no events, no triggers, no filters, no function filters, | 71 | # no events, no triggers, no filters, no function filters, |
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-extended-error-support.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-extended-error-support.tc new file mode 100644 index 000000000000..786dce7e48be --- /dev/null +++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-extended-error-support.tc | |||
@@ -0,0 +1,39 @@ | |||
1 | #!/bin/sh | ||
2 | # description: event trigger - test extended error support | ||
3 | |||
4 | |||
5 | do_reset() { | ||
6 | reset_trigger | ||
7 | echo > set_event | ||
8 | clear_trace | ||
9 | } | ||
10 | |||
11 | fail() { #msg | ||
12 | do_reset | ||
13 | echo $1 | ||
14 | exit_fail | ||
15 | } | ||
16 | |||
17 | if [ ! -f set_event ]; then | ||
18 | echo "event tracing is not supported" | ||
19 | exit_unsupported | ||
20 | fi | ||
21 | |||
22 | if [ ! -f synthetic_events ]; then | ||
23 | echo "synthetic event is not supported" | ||
24 | exit_unsupported | ||
25 | fi | ||
26 | |||
27 | reset_tracer | ||
28 | do_reset | ||
29 | |||
30 | echo "Test extended error support" | ||
31 | echo 'hist:keys=pid:ts0=common_timestamp.usecs if comm=="ping"' > events/sched/sched_wakeup/trigger | ||
32 | echo 'hist:keys=pid:ts0=common_timestamp.usecs if comm=="ping"' >> events/sched/sched_wakeup/trigger &>/dev/null | ||
33 | if ! grep -q "ERROR:" events/sched/sched_wakeup/hist; then | ||
34 | fail "Failed to generate extended error in histogram" | ||
35 | fi | ||
36 | |||
37 | do_reset | ||
38 | |||
39 | exit 0 | ||
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-field-variable-support.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-field-variable-support.tc new file mode 100644 index 000000000000..7fd5b4a8f060 --- /dev/null +++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-field-variable-support.tc | |||
@@ -0,0 +1,54 @@ | |||
1 | #!/bin/sh | ||
2 | # description: event trigger - test field variable support | ||
3 | |||
4 | do_reset() { | ||
5 | reset_trigger | ||
6 | echo > set_event | ||
7 | clear_trace | ||
8 | } | ||
9 | |||
10 | fail() { #msg | ||
11 | do_reset | ||
12 | echo $1 | ||
13 | exit_fail | ||
14 | } | ||
15 | |||
16 | if [ ! -f set_event ]; then | ||
17 | echo "event tracing is not supported" | ||
18 | exit_unsupported | ||
19 | fi | ||
20 | |||
21 | if [ ! -f synthetic_events ]; then | ||
22 | echo "synthetic event is not supported" | ||
23 | exit_unsupported | ||
24 | fi | ||
25 | |||
26 | clear_synthetic_events | ||
27 | reset_tracer | ||
28 | do_reset | ||
29 | |||
30 | echo "Test field variable support" | ||
31 | |||
32 | echo 'wakeup_latency u64 lat; pid_t pid; int prio; char comm[16]' > synthetic_events | ||
33 | echo 'hist:keys=comm:ts0=common_timestamp.usecs if comm=="ping"' > events/sched/sched_waking/trigger | ||
34 | echo 'hist:keys=next_comm:wakeup_lat=common_timestamp.usecs-$ts0:onmatch(sched.sched_waking).wakeup_latency($wakeup_lat,next_pid,sched.sched_waking.prio,next_comm) if next_comm=="ping"' > events/sched/sched_switch/trigger | ||
35 | echo 'hist:keys=pid,prio,comm:vals=lat:sort=pid,prio' > events/synthetic/wakeup_latency/trigger | ||
36 | |||
37 | ping localhost -c 3 | ||
38 | if ! grep -q "ping" events/synthetic/wakeup_latency/hist; then | ||
39 | fail "Failed to create inter-event histogram" | ||
40 | fi | ||
41 | |||
42 | if ! grep -q "synthetic_prio=prio" events/sched/sched_waking/hist; then | ||
43 | fail "Failed to create histogram with field variable" | ||
44 | fi | ||
45 | |||
46 | echo '!hist:keys=next_comm:wakeup_lat=common_timestamp.usecs-$ts0:onmatch(sched.sched_waking).wakeup_latency($wakeup_lat,next_pid,sched.sched_waking.prio,next_comm) if next_comm=="ping"' >> events/sched/sched_switch/trigger | ||
47 | |||
48 | if grep -q "synthetic_prio=prio" events/sched/sched_waking/hist; then | ||
49 | fail "Failed to remove histogram with field variable" | ||
50 | fi | ||
51 | |||
52 | do_reset | ||
53 | |||
54 | exit 0 | ||
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-inter-event-combined-hist.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-inter-event-combined-hist.tc new file mode 100644 index 000000000000..c93dbe38b5df --- /dev/null +++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-inter-event-combined-hist.tc | |||
@@ -0,0 +1,58 @@ | |||
1 | #!/bin/sh | ||
2 | # description: event trigger - test inter-event combined histogram trigger | ||
3 | |||
4 | do_reset() { | ||
5 | reset_trigger | ||
6 | echo > set_event | ||
7 | clear_trace | ||
8 | } | ||
9 | |||
10 | fail() { #msg | ||
11 | do_reset | ||
12 | echo $1 | ||
13 | exit_fail | ||
14 | } | ||
15 | |||
16 | if [ ! -f set_event ]; then | ||
17 | echo "event tracing is not supported" | ||
18 | exit_unsupported | ||
19 | fi | ||
20 | |||
21 | if [ ! -f synthetic_events ]; then | ||
22 | echo "synthetic event is not supported" | ||
23 | exit_unsupported | ||
24 | fi | ||
25 | |||
26 | reset_tracer | ||
27 | do_reset | ||
28 | clear_synthetic_events | ||
29 | |||
30 | echo "Test create synthetic event" | ||
31 | |||
32 | echo 'waking_latency u64 lat pid_t pid' > synthetic_events | ||
33 | if [ ! -d events/synthetic/waking_latency ]; then | ||
34 | fail "Failed to create waking_latency synthetic event" | ||
35 | fi | ||
36 | |||
37 | echo "Test combined histogram" | ||
38 | |||
39 | echo 'hist:keys=pid:ts0=common_timestamp.usecs if comm=="ping"' > events/sched/sched_waking/trigger | ||
40 | echo 'hist:keys=pid:waking_lat=common_timestamp.usecs-$ts0:onmatch(sched.sched_waking).waking_latency($waking_lat,pid) if comm=="ping"' > events/sched/sched_wakeup/trigger | ||
41 | echo 'hist:keys=pid,lat:sort=pid,lat' > events/synthetic/waking_latency/trigger | ||
42 | |||
43 | echo 'wakeup_latency u64 lat pid_t pid' >> synthetic_events | ||
44 | echo 'hist:keys=pid:ts1=common_timestamp.usecs if comm=="ping"' >> events/sched/sched_wakeup/trigger | ||
45 | echo 'hist:keys=next_pid:wakeup_lat=common_timestamp.usecs-$ts1:onmatch(sched.sched_wakeup).wakeup_latency($wakeup_lat,next_pid) if next_comm=="ping"' > events/sched/sched_switch/trigger | ||
46 | |||
47 | echo 'waking+wakeup_latency u64 lat; pid_t pid' >> synthetic_events | ||
48 | echo 'hist:keys=pid,lat:sort=pid,lat:ww_lat=$waking_lat+$wakeup_lat:onmatch(synthetic.wakeup_latency).waking+wakeup_latency($ww_lat,pid)' >> events/synthetic/wakeup_latency/trigger | ||
49 | echo 'hist:keys=pid,lat:sort=pid,lat' >> events/synthetic/waking+wakeup_latency/trigger | ||
50 | |||
51 | ping localhost -c 3 | ||
52 | if ! grep -q "pid:" events/synthetic/waking+wakeup_latency/hist; then | ||
53 | fail "Failed to create combined histogram" | ||
54 | fi | ||
55 | |||
56 | do_reset | ||
57 | |||
58 | exit 0 | ||
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-action-hist.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-action-hist.tc new file mode 100644 index 000000000000..e84e7d048566 --- /dev/null +++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-action-hist.tc | |||
@@ -0,0 +1,50 @@ | |||
1 | #!/bin/sh | ||
2 | # description: event trigger - test inter-event histogram trigger onmatch action | ||
3 | |||
4 | do_reset() { | ||
5 | reset_trigger | ||
6 | echo > set_event | ||
7 | clear_trace | ||
8 | } | ||
9 | |||
10 | fail() { #msg | ||
11 | do_reset | ||
12 | echo $1 | ||
13 | exit_fail | ||
14 | } | ||
15 | |||
16 | if [ ! -f set_event ]; then | ||
17 | echo "event tracing is not supported" | ||
18 | exit_unsupported | ||
19 | fi | ||
20 | |||
21 | if [ ! -f synthetic_events ]; then | ||
22 | echo "synthetic event is not supported" | ||
23 | exit_unsupported | ||
24 | fi | ||
25 | |||
26 | clear_synthetic_events | ||
27 | reset_tracer | ||
28 | do_reset | ||
29 | |||
30 | echo "Test create synthetic event" | ||
31 | |||
32 | echo 'wakeup_latency u64 lat pid_t pid char comm[16]' > synthetic_events | ||
33 | if [ ! -d events/synthetic/wakeup_latency ]; then | ||
34 | fail "Failed to create wakeup_latency synthetic event" | ||
35 | fi | ||
36 | |||
37 | echo "Test create histogram for synthetic event" | ||
38 | echo "Test histogram variables,simple expression support and onmatch action" | ||
39 | |||
40 | echo 'hist:keys=pid:ts0=common_timestamp.usecs if comm=="ping"' > events/sched/sched_wakeup/trigger | ||
41 | echo 'hist:keys=next_pid:wakeup_lat=common_timestamp.usecs-$ts0:onmatch(sched.sched_wakeup).wakeup_latency($wakeup_lat,next_pid,next_comm) if next_comm=="ping"' > events/sched/sched_switch/trigger | ||
42 | echo 'hist:keys=comm,pid,lat:wakeup_lat=lat:sort=lat' > events/synthetic/wakeup_latency/trigger | ||
43 | ping localhost -c 5 | ||
44 | if ! grep -q "ping" events/synthetic/wakeup_latency/hist; then | ||
45 | fail "Failed to create onmatch action inter-event histogram" | ||
46 | fi | ||
47 | |||
48 | do_reset | ||
49 | |||
50 | exit 0 | ||
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-onmax-action-hist.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-onmax-action-hist.tc new file mode 100644 index 000000000000..7907d8aacde3 --- /dev/null +++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-onmax-action-hist.tc | |||
@@ -0,0 +1,50 @@ | |||
1 | #!/bin/sh | ||
2 | # description: event trigger - test inter-event histogram trigger onmatch-onmax action | ||
3 | |||
4 | do_reset() { | ||
5 | reset_trigger | ||
6 | echo > set_event | ||
7 | clear_trace | ||
8 | } | ||
9 | |||
10 | fail() { #msg | ||
11 | do_reset | ||
12 | echo $1 | ||
13 | exit_fail | ||
14 | } | ||
15 | |||
16 | if [ ! -f set_event ]; then | ||
17 | echo "event tracing is not supported" | ||
18 | exit_unsupported | ||
19 | fi | ||
20 | |||
21 | if [ ! -f synthetic_events ]; then | ||
22 | echo "synthetic event is not supported" | ||
23 | exit_unsupported | ||
24 | fi | ||
25 | |||
26 | clear_synthetic_events | ||
27 | reset_tracer | ||
28 | do_reset | ||
29 | |||
30 | echo "Test create synthetic event" | ||
31 | |||
32 | echo 'wakeup_latency u64 lat pid_t pid char comm[16]' > synthetic_events | ||
33 | if [ ! -d events/synthetic/wakeup_latency ]; then | ||
34 | fail "Failed to create wakeup_latency synthetic event" | ||
35 | fi | ||
36 | |||
37 | echo "Test create histogram for synthetic event" | ||
38 | echo "Test histogram variables,simple expression support and onmatch-onmax action" | ||
39 | |||
40 | echo 'hist:keys=pid:ts0=common_timestamp.usecs if comm=="ping"' > events/sched/sched_wakeup/trigger | ||
41 | echo 'hist:keys=next_pid:wakeup_lat=common_timestamp.usecs-$ts0:onmatch(sched.sched_wakeup).wakeup_latency($wakeup_lat,next_pid,next_comm):onmax($wakeup_lat).save(next_comm,prev_pid,prev_prio,prev_comm) if next_comm=="ping"' >> events/sched/sched_switch/trigger | ||
42 | echo 'hist:keys=comm,pid,lat:wakeup_lat=lat:sort=lat' > events/synthetic/wakeup_latency/trigger | ||
43 | ping localhost -c 5 | ||
44 | if [ ! grep -q "ping" events/synthetic/wakeup_latency/hist -o ! grep -q "max:" events/sched/sched_switch/hist]; then | ||
45 | fail "Failed to create onmatch-onmax action inter-event histogram" | ||
46 | fi | ||
47 | |||
48 | do_reset | ||
49 | |||
50 | exit 0 | ||
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmax-action-hist.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmax-action-hist.tc new file mode 100644 index 000000000000..38b7ed6242b2 --- /dev/null +++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmax-action-hist.tc | |||
@@ -0,0 +1,48 @@ | |||
1 | #!/bin/sh | ||
2 | # description: event trigger - test inter-event histogram trigger onmax action | ||
3 | |||
4 | do_reset() { | ||
5 | reset_trigger | ||
6 | echo > set_event | ||
7 | clear_trace | ||
8 | } | ||
9 | |||
10 | fail() { #msg | ||
11 | do_reset | ||
12 | echo $1 | ||
13 | exit_fail | ||
14 | } | ||
15 | |||
16 | if [ ! -f set_event ]; then | ||
17 | echo "event tracing is not supported" | ||
18 | exit_unsupported | ||
19 | fi | ||
20 | |||
21 | if [ ! -f synthetic_events ]; then | ||
22 | echo "synthetic event is not supported" | ||
23 | exit_unsupported | ||
24 | fi | ||
25 | |||
26 | clear_synthetic_events | ||
27 | reset_tracer | ||
28 | do_reset | ||
29 | |||
30 | echo "Test create synthetic event" | ||
31 | |||
32 | echo 'wakeup_latency u64 lat pid_t pid char comm[16]' > synthetic_events | ||
33 | if [ ! -d events/synthetic/wakeup_latency ]; then | ||
34 | fail "Failed to create wakeup_latency synthetic event" | ||
35 | fi | ||
36 | |||
37 | echo "Test onmax action" | ||
38 | |||
39 | echo 'hist:keys=pid:ts0=common_timestamp.usecs if comm=="ping"' >> events/sched/sched_waking/trigger | ||
40 | echo 'hist:keys=next_pid:wakeup_lat=common_timestamp.usecs-$ts0:onmax($wakeup_lat).save(next_comm,prev_pid,prev_prio,prev_comm) if next_comm=="ping"' >> events/sched/sched_switch/trigger | ||
41 | ping localhost -c 3 | ||
42 | if ! grep -q "max:" events/sched/sched_switch/hist; then | ||
43 | fail "Failed to create onmax action inter-event histogram" | ||
44 | fi | ||
45 | |||
46 | do_reset | ||
47 | |||
48 | exit 0 | ||
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-createremove.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-createremove.tc new file mode 100644 index 000000000000..cef11377dcbd --- /dev/null +++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-createremove.tc | |||
@@ -0,0 +1,54 @@ | |||
1 | #!/bin/sh | ||
2 | # description: event trigger - test synthetic event create remove | ||
3 | do_reset() { | ||
4 | reset_trigger | ||
5 | echo > set_event | ||
6 | clear_trace | ||
7 | } | ||
8 | |||
9 | fail() { #msg | ||
10 | do_reset | ||
11 | echo $1 | ||
12 | exit_fail | ||
13 | } | ||
14 | |||
15 | if [ ! -f set_event ]; then | ||
16 | echo "event tracing is not supported" | ||
17 | exit_unsupported | ||
18 | fi | ||
19 | |||
20 | if [ ! -f synthetic_events ]; then | ||
21 | echo "synthetic event is not supported" | ||
22 | exit_unsupported | ||
23 | fi | ||
24 | |||
25 | clear_synthetic_events | ||
26 | reset_tracer | ||
27 | do_reset | ||
28 | |||
29 | echo "Test create synthetic event" | ||
30 | |||
31 | echo 'wakeup_latency u64 lat pid_t pid char comm[16]' > synthetic_events | ||
32 | if [ ! -d events/synthetic/wakeup_latency ]; then | ||
33 | fail "Failed to create wakeup_latency synthetic event" | ||
34 | fi | ||
35 | |||
36 | reset_trigger | ||
37 | |||
38 | echo "Test create synthetic event with an error" | ||
39 | echo 'wakeup_latency u64 lat pid_t pid char' > synthetic_events > /dev/null | ||
40 | if [ -d events/synthetic/wakeup_latency ]; then | ||
41 | fail "Created wakeup_latency synthetic event with an invalid format" | ||
42 | fi | ||
43 | |||
44 | reset_trigger | ||
45 | |||
46 | echo "Test remove synthetic event" | ||
47 | echo '!wakeup_latency u64 lat pid_t pid char comm[16]' > synthetic_events | ||
48 | if [ -d events/synthetic/wakeup_latency ]; then | ||
49 | fail "Failed to delete wakeup_latency synthetic event" | ||
50 | fi | ||
51 | |||
52 | do_reset | ||
53 | |||
54 | exit 0 | ||