aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSteven Rostedt (Red Hat) <rostedt@goodmis.org>2016-01-07 15:40:01 -0500
committerSteven Rostedt <rostedt@goodmis.org>2016-01-07 15:40:01 -0500
commitb7ffffbb46f205e7727a18bcc7a46c3c2b534f7c (patch)
tree56f532feb0be642c9e2aa24250430c8c960eed78
parentc5d641f92c9633f568740332989c067a0ba7d4dc (diff)
ftrace: Add infrastructure for delayed enabling of module functions
Qiu Peiyang pointed out that there's a race when enabling function tracing and loading a module. In order to make the modifications of converting nops in the prologue of functions into callbacks, the text needs to be converted from read-only to read-write. When enabling function tracing, the text permission is updated, the functions are modified, and then they are put back. When loading a module, the updates to convert function calls to mcount is done before the module text is set to read-only. But after it is done, the module text is visible by the function tracer. Thus we have the following race: CPU 0 CPU 1 ----- ----- start function tracing set text to read-write load_module add functions to ftrace set module text read-only update all functions to callbacks modify module functions too < Can't it's read-only > When this happens, ftrace detects the issue and disables itself till the next reboot. To fix this, a new DISABLED flag is added for ftrace records, which all module functions get when they are added. Then later, after the module code is all set, the records will have the DISABLED flag cleared, and they will be enabled if any callback wants all functions to be traced. Note, this doesn't add the delay to later. It simply changes the ftrace_module_init() to do both the setting of DISABLED records, and then immediately calls the enable code. This helps with testing this new code as it has the same behavior as previously. Another change will come after this to have the ftrace_module_enable() called after the text is set to read-only. Cc: Qiu Peiyang <peiyangx.qiu@intel.com> Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
-rw-r--r--include/linux/ftrace.h6
-rw-r--r--kernel/trace/ftrace.c161
2 files changed, 110 insertions, 57 deletions
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 4736a826baf5..660e7c698f3b 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -357,6 +357,7 @@ bool is_ftrace_trampoline(unsigned long addr);
357 * REGS - the record wants the function to save regs 357 * REGS - the record wants the function to save regs
358 * REGS_EN - the function is set up to save regs. 358 * REGS_EN - the function is set up to save regs.
359 * IPMODIFY - the record allows for the IP address to be changed. 359 * IPMODIFY - the record allows for the IP address to be changed.
360 * DISABLED - the record is not ready to be touched yet
360 * 361 *
361 * When a new ftrace_ops is registered and wants a function to save 362 * When a new ftrace_ops is registered and wants a function to save
362 * pt_regs, the rec->flag REGS is set. When the function has been 363 * pt_regs, the rec->flag REGS is set. When the function has been
@@ -371,10 +372,11 @@ enum {
371 FTRACE_FL_TRAMP = (1UL << 28), 372 FTRACE_FL_TRAMP = (1UL << 28),
372 FTRACE_FL_TRAMP_EN = (1UL << 27), 373 FTRACE_FL_TRAMP_EN = (1UL << 27),
373 FTRACE_FL_IPMODIFY = (1UL << 26), 374 FTRACE_FL_IPMODIFY = (1UL << 26),
375 FTRACE_FL_DISABLED = (1UL << 25),
374}; 376};
375 377
376#define FTRACE_REF_MAX_SHIFT 26 378#define FTRACE_REF_MAX_SHIFT 25
377#define FTRACE_FL_BITS 6 379#define FTRACE_FL_BITS 7
378#define FTRACE_FL_MASKED_BITS ((1UL << FTRACE_FL_BITS) - 1) 380#define FTRACE_FL_MASKED_BITS ((1UL << FTRACE_FL_BITS) - 1)
379#define FTRACE_FL_MASK (FTRACE_FL_MASKED_BITS << FTRACE_REF_MAX_SHIFT) 381#define FTRACE_FL_MASK (FTRACE_FL_MASKED_BITS << FTRACE_REF_MAX_SHIFT)
380#define FTRACE_REF_MAX ((1UL << FTRACE_REF_MAX_SHIFT) - 1) 382#define FTRACE_REF_MAX ((1UL << FTRACE_REF_MAX_SHIFT) - 1)
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 0f7ee341f89f..23683b06b18c 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -1658,6 +1658,9 @@ static void __ftrace_hash_rec_update(struct ftrace_ops *ops,
1658 int in_hash = 0; 1658 int in_hash = 0;
1659 int match = 0; 1659 int match = 0;
1660 1660
1661 if (rec->flags & FTRACE_FL_DISABLED)
1662 continue;
1663
1661 if (all) { 1664 if (all) {
1662 /* 1665 /*
1663 * Only the filter_hash affects all records. 1666 * Only the filter_hash affects all records.
@@ -2023,6 +2026,9 @@ static int ftrace_check_record(struct dyn_ftrace *rec, int enable, int update)
2023 2026
2024 ftrace_bug_type = FTRACE_BUG_UNKNOWN; 2027 ftrace_bug_type = FTRACE_BUG_UNKNOWN;
2025 2028
2029 if (rec->flags & FTRACE_FL_DISABLED)
2030 return FTRACE_UPDATE_IGNORE;
2031
2026 /* 2032 /*
2027 * If we are updating calls: 2033 * If we are updating calls:
2028 * 2034 *
@@ -2833,9 +2839,9 @@ ops_references_rec(struct ftrace_ops *ops, struct dyn_ftrace *rec)
2833 if (!(ops->flags & FTRACE_OPS_FL_ENABLED)) 2839 if (!(ops->flags & FTRACE_OPS_FL_ENABLED))
2834 return 0; 2840 return 0;
2835 2841
2836 /* If ops traces all mods, we already accounted for it */ 2842 /* If ops traces all then it includes this function */
2837 if (ops_traces_mod(ops)) 2843 if (ops_traces_mod(ops))
2838 return 0; 2844 return 1;
2839 2845
2840 /* The function must be in the filter */ 2846 /* The function must be in the filter */
2841 if (!ftrace_hash_empty(ops->func_hash->filter_hash) && 2847 if (!ftrace_hash_empty(ops->func_hash->filter_hash) &&
@@ -2849,64 +2855,41 @@ ops_references_rec(struct ftrace_ops *ops, struct dyn_ftrace *rec)
2849 return 1; 2855 return 1;
2850} 2856}
2851 2857
2852static int referenced_filters(struct dyn_ftrace *rec)
2853{
2854 struct ftrace_ops *ops;
2855 int cnt = 0;
2856
2857 for (ops = ftrace_ops_list; ops != &ftrace_list_end; ops = ops->next) {
2858 if (ops_references_rec(ops, rec))
2859 cnt++;
2860 }
2861
2862 return cnt;
2863}
2864
2865static int ftrace_update_code(struct module *mod, struct ftrace_page *new_pgs) 2858static int ftrace_update_code(struct module *mod, struct ftrace_page *new_pgs)
2866{ 2859{
2867 struct ftrace_page *pg; 2860 struct ftrace_page *pg;
2868 struct dyn_ftrace *p; 2861 struct dyn_ftrace *p;
2869 cycle_t start, stop; 2862 cycle_t start, stop;
2870 unsigned long update_cnt = 0; 2863 unsigned long update_cnt = 0;
2871 unsigned long ref = 0; 2864 unsigned long rec_flags = 0;
2872 bool test = false;
2873 int i; 2865 int i;
2874 2866
2867 start = ftrace_now(raw_smp_processor_id());
2868
2875 /* 2869 /*
2876 * When adding a module, we need to check if tracers are 2870 * When a module is loaded, this function is called to convert
2877 * currently enabled and if they are set to trace all functions. 2871 * the calls to mcount in its text to nops, and also to create
2878 * If they are, we need to enable the module functions as well 2872 * an entry in the ftrace data. Now, if ftrace is activated
2879 * as update the reference counts for those function records. 2873 * after this call, but before the module sets its text to
2874 * read-only, the modification of enabling ftrace can fail if
2875 * the read-only is done while ftrace is converting the calls.
2876 * To prevent this, the module's records are set as disabled
2877 * and will be enabled after the call to set the module's text
2878 * to read-only.
2880 */ 2879 */
2881 if (mod) { 2880 if (mod)
2882 struct ftrace_ops *ops; 2881 rec_flags |= FTRACE_FL_DISABLED;
2883
2884 for (ops = ftrace_ops_list;
2885 ops != &ftrace_list_end; ops = ops->next) {
2886 if (ops->flags & FTRACE_OPS_FL_ENABLED) {
2887 if (ops_traces_mod(ops))
2888 ref++;
2889 else
2890 test = true;
2891 }
2892 }
2893 }
2894
2895 start = ftrace_now(raw_smp_processor_id());
2896 2882
2897 for (pg = new_pgs; pg; pg = pg->next) { 2883 for (pg = new_pgs; pg; pg = pg->next) {
2898 2884
2899 for (i = 0; i < pg->index; i++) { 2885 for (i = 0; i < pg->index; i++) {
2900 int cnt = ref;
2901 2886
2902 /* If something went wrong, bail without enabling anything */ 2887 /* If something went wrong, bail without enabling anything */
2903 if (unlikely(ftrace_disabled)) 2888 if (unlikely(ftrace_disabled))
2904 return -1; 2889 return -1;
2905 2890
2906 p = &pg->records[i]; 2891 p = &pg->records[i];
2907 if (test) 2892 p->flags = rec_flags;
2908 cnt += referenced_filters(p);
2909 p->flags = cnt;
2910 2893
2911 /* 2894 /*
2912 * Do the initial record conversion from mcount jump 2895 * Do the initial record conversion from mcount jump
@@ -2916,21 +2899,6 @@ static int ftrace_update_code(struct module *mod, struct ftrace_page *new_pgs)
2916 break; 2899 break;
2917 2900
2918 update_cnt++; 2901 update_cnt++;
2919
2920 /*
2921 * If the tracing is enabled, go ahead and enable the record.
2922 *
2923 * The reason not to enable the record immediatelly is the
2924 * inherent check of ftrace_make_nop/ftrace_make_call for
2925 * correct previous instructions. Making first the NOP
2926 * conversion puts the module to the correct state, thus
2927 * passing the ftrace_make_call check.
2928 */
2929 if (ftrace_start_up && cnt) {
2930 int failed = __ftrace_replace_code(p, 1);
2931 if (failed)
2932 ftrace_bug(failed, p);
2933 }
2934 } 2902 }
2935 } 2903 }
2936 2904
@@ -4938,6 +4906,19 @@ static int ftrace_process_locs(struct module *mod,
4938 4906
4939#define next_to_ftrace_page(p) container_of(p, struct ftrace_page, next) 4907#define next_to_ftrace_page(p) container_of(p, struct ftrace_page, next)
4940 4908
4909static int referenced_filters(struct dyn_ftrace *rec)
4910{
4911 struct ftrace_ops *ops;
4912 int cnt = 0;
4913
4914 for (ops = ftrace_ops_list; ops != &ftrace_list_end; ops = ops->next) {
4915 if (ops_references_rec(ops, rec))
4916 cnt++;
4917 }
4918
4919 return cnt;
4920}
4921
4941void ftrace_release_mod(struct module *mod) 4922void ftrace_release_mod(struct module *mod)
4942{ 4923{
4943 struct dyn_ftrace *rec; 4924 struct dyn_ftrace *rec;
@@ -4980,6 +4961,75 @@ void ftrace_release_mod(struct module *mod)
4980 mutex_unlock(&ftrace_lock); 4961 mutex_unlock(&ftrace_lock);
4981} 4962}
4982 4963
4964static void ftrace_module_enable(struct module *mod)
4965{
4966 struct dyn_ftrace *rec;
4967 struct ftrace_page *pg;
4968
4969 mutex_lock(&ftrace_lock);
4970
4971 if (ftrace_disabled)
4972 goto out_unlock;
4973
4974 /*
4975 * If the tracing is enabled, go ahead and enable the record.
4976 *
4977 * The reason not to enable the record immediatelly is the
4978 * inherent check of ftrace_make_nop/ftrace_make_call for
4979 * correct previous instructions. Making first the NOP
4980 * conversion puts the module to the correct state, thus
4981 * passing the ftrace_make_call check.
4982 *
4983 * We also delay this to after the module code already set the
4984 * text to read-only, as we now need to set it back to read-write
4985 * so that we can modify the text.
4986 */
4987 if (ftrace_start_up)
4988 ftrace_arch_code_modify_prepare();
4989
4990 do_for_each_ftrace_rec(pg, rec) {
4991 int cnt;
4992 /*
4993 * do_for_each_ftrace_rec() is a double loop.
4994 * module text shares the pg. If a record is
4995 * not part of this module, then skip this pg,
4996 * which the "break" will do.
4997 */
4998 if (!within_module_core(rec->ip, mod))
4999 break;
5000
5001 cnt = 0;
5002
5003 /*
5004 * When adding a module, we need to check if tracers are
5005 * currently enabled and if they are, and can trace this record,
5006 * we need to enable the module functions as well as update the
5007 * reference counts for those function records.
5008 */
5009 if (ftrace_start_up)
5010 cnt += referenced_filters(rec);
5011
5012 /* This clears FTRACE_FL_DISABLED */
5013 rec->flags = cnt;
5014
5015 if (ftrace_start_up && cnt) {
5016 int failed = __ftrace_replace_code(rec, 1);
5017 if (failed) {
5018 ftrace_bug(failed, rec);
5019 goto out_loop;
5020 }
5021 }
5022
5023 } while_for_each_ftrace_rec();
5024
5025 out_loop:
5026 if (ftrace_start_up)
5027 ftrace_arch_code_modify_post_process();
5028
5029 out_unlock:
5030 mutex_unlock(&ftrace_lock);
5031}
5032
4983void ftrace_module_init(struct module *mod) 5033void ftrace_module_init(struct module *mod)
4984{ 5034{
4985 if (ftrace_disabled || !mod->num_ftrace_callsites) 5035 if (ftrace_disabled || !mod->num_ftrace_callsites)
@@ -4987,6 +5037,7 @@ void ftrace_module_init(struct module *mod)
4987 5037
4988 ftrace_process_locs(mod, mod->ftrace_callsites, 5038 ftrace_process_locs(mod, mod->ftrace_callsites,
4989 mod->ftrace_callsites + mod->num_ftrace_callsites); 5039 mod->ftrace_callsites + mod->num_ftrace_callsites);
5040 ftrace_module_enable(mod);
4990} 5041}
4991 5042
4992static int ftrace_module_notify_exit(struct notifier_block *self, 5043static int ftrace_module_notify_exit(struct notifier_block *self,