diff options
author | Eric W. Biederman <ebiederm@xmission.com> | 2012-01-21 16:34:05 -0500 |
---|---|---|
committer | Eric W. Biederman <ebiederm@xmission.com> | 2012-01-24 19:40:27 -0500 |
commit | 7c60c48f58a78195acc1f71c9a9d01958c02ab89 (patch) | |
tree | 7d1a66abc2510aa474105a747fdcd08b033f2b36 | |
parent | f728019bb72e655680c02ad1829323054a8e875f (diff) |
sysctl: Improve the sysctl sanity checks
- Stop validating subdirectories now that we only register leaf tables
- Cleanup and improve the duplicate filename check.
* Run the duplicate filename check under the sysctl_lock to guarantee
we never add duplicate names.
* Reduce the duplicate filename check to nearly O(M*N) where M is the
number of entries in tthe table we are registering and N is the
number of entries in the directory before we got there.
- Move the duplicate filename check into it's own function and call
it directtly from __register_sysctl_table
- Kill the config option as the sanity checks are now cheap enough
the config option is unnecessary. The original reason for the config
option was because we had a huge table used to verify the proc filename
to binary sysctl mapping. That table has now evolved into the binary_sysctl
translation layer and is no longer part of the sysctl_check code.
- Tighten up the permission checks. Guarnateeing that files only have read
or write permissions.
- Removed redudant check for parents having a procname as now everything has
a procname.
- Generalize the backtrace logic so that we print a backtrace from
any failure of __register_sysctl_table that was not caused by
a memmory allocation failure. The backtrace allows us to track
down who erroneously registered a sysctl table.
Bechmark before (CONFIG_SYSCTL_CHECK=y):
make-dummies 0 999 -> 12s
rmmod dummy -> 0.08s
Bechmark before (CONFIG_SYSCTL_CHECK=n):
make-dummies 0 999 -> 0.7s
rmmod dummy -> 0.06s
make-dummies 0 99999 -> 1m13s
rmmod dummy -> 0.38s
Benchmark after:
make-dummies 0 999 -> 0.65s
rmmod dummy -> 0.055s
make-dummies 0 9999 -> 1m10s
rmmod dummy -> 0.39s
The sysctl sanity checks now impose no measurable cost.
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
-rw-r--r-- | fs/proc/proc_sysctl.c | 222 | ||||
-rw-r--r-- | lib/Kconfig.debug | 8 |
2 files changed, 86 insertions, 144 deletions
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index 6bab2ae9e395..a492ff60e071 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c | |||
@@ -726,160 +726,106 @@ static void try_attach(struct ctl_table_header *p, struct ctl_table_header *q) | |||
726 | } | 726 | } |
727 | } | 727 | } |
728 | 728 | ||
729 | #ifdef CONFIG_SYSCTL_SYSCALL_CHECK | 729 | static int sysctl_check_table_dups(const char *path, struct ctl_table *old, |
730 | static int sysctl_depth(struct ctl_table *table) | 730 | struct ctl_table *table) |
731 | { | 731 | { |
732 | struct ctl_table *tmp; | 732 | struct ctl_table *entry, *test; |
733 | int depth; | 733 | int error = 0; |
734 | |||
735 | depth = 0; | ||
736 | for (tmp = table; tmp->parent; tmp = tmp->parent) | ||
737 | depth++; | ||
738 | 734 | ||
739 | return depth; | 735 | for (entry = old; entry->procname; entry++) { |
736 | for (test = table; test->procname; test++) { | ||
737 | if (strcmp(entry->procname, test->procname) == 0) { | ||
738 | printk(KERN_ERR "sysctl duplicate entry: %s/%s\n", | ||
739 | path, test->procname); | ||
740 | error = -EEXIST; | ||
741 | } | ||
742 | } | ||
743 | } | ||
744 | return error; | ||
740 | } | 745 | } |
741 | 746 | ||
742 | static struct ctl_table *sysctl_parent(struct ctl_table *table, int n) | 747 | static int sysctl_check_dups(struct nsproxy *namespaces, |
748 | struct ctl_table_header *header, | ||
749 | const char *path, struct ctl_table *table) | ||
743 | { | 750 | { |
744 | int i; | 751 | struct ctl_table_root *root; |
752 | struct ctl_table_set *set; | ||
753 | struct ctl_table_header *dir_head, *head; | ||
754 | struct ctl_table *dir_table; | ||
755 | int error = 0; | ||
745 | 756 | ||
746 | for (i = 0; table && i < n; i++) | 757 | /* No dups if we are the only member of our directory */ |
747 | table = table->parent; | 758 | if (header->attached_by != table) |
759 | return 0; | ||
748 | 760 | ||
749 | return table; | 761 | dir_head = header->parent; |
750 | } | 762 | dir_table = header->attached_to; |
751 | 763 | ||
764 | error = sysctl_check_table_dups(path, dir_table, table); | ||
752 | 765 | ||
753 | static void sysctl_print_path(struct ctl_table *table) | 766 | root = &sysctl_table_root; |
754 | { | 767 | do { |
755 | struct ctl_table *tmp; | 768 | set = lookup_header_set(root, namespaces); |
756 | int depth, i; | ||
757 | depth = sysctl_depth(table); | ||
758 | if (table->procname) { | ||
759 | for (i = depth; i >= 0; i--) { | ||
760 | tmp = sysctl_parent(table, i); | ||
761 | printk("/%s", tmp->procname?tmp->procname:""); | ||
762 | } | ||
763 | } | ||
764 | printk(" "); | ||
765 | } | ||
766 | 769 | ||
767 | static struct ctl_table *sysctl_check_lookup(struct nsproxy *namespaces, | 770 | list_for_each_entry(head, &set->list, ctl_entry) { |
768 | struct ctl_table *table) | 771 | if (head->unregistering) |
769 | { | ||
770 | struct ctl_table_header *head; | ||
771 | struct ctl_table *ref, *test; | ||
772 | int depth, cur_depth; | ||
773 | |||
774 | depth = sysctl_depth(table); | ||
775 | |||
776 | for (head = __sysctl_head_next(namespaces, NULL); head; | ||
777 | head = __sysctl_head_next(namespaces, head)) { | ||
778 | cur_depth = depth; | ||
779 | ref = head->ctl_table; | ||
780 | repeat: | ||
781 | test = sysctl_parent(table, cur_depth); | ||
782 | for (; ref->procname; ref++) { | ||
783 | int match = 0; | ||
784 | if (cur_depth && !ref->child) | ||
785 | continue; | 772 | continue; |
786 | 773 | if (head->attached_to != dir_table) | |
787 | if (test->procname && ref->procname && | 774 | continue; |
788 | (strcmp(test->procname, ref->procname) == 0)) | 775 | error = sysctl_check_table_dups(path, head->attached_by, |
789 | match++; | 776 | table); |
790 | |||
791 | if (match) { | ||
792 | if (cur_depth != 0) { | ||
793 | cur_depth--; | ||
794 | ref = ref->child; | ||
795 | goto repeat; | ||
796 | } | ||
797 | goto out; | ||
798 | } | ||
799 | } | 777 | } |
800 | } | 778 | root = list_entry(root->root_list.next, |
801 | ref = NULL; | 779 | struct ctl_table_root, root_list); |
802 | out: | 780 | } while (root != &sysctl_table_root); |
803 | sysctl_head_finish(head); | 781 | return error; |
804 | return ref; | ||
805 | } | 782 | } |
806 | 783 | ||
807 | static void set_fail(const char **fail, struct ctl_table *table, const char *str) | 784 | static int sysctl_err(const char *path, struct ctl_table *table, char *fmt, ...) |
808 | { | 785 | { |
809 | if (*fail) { | 786 | struct va_format vaf; |
810 | printk(KERN_ERR "sysctl table check failed: "); | 787 | va_list args; |
811 | sysctl_print_path(table); | ||
812 | printk(" %s\n", *fail); | ||
813 | dump_stack(); | ||
814 | } | ||
815 | *fail = str; | ||
816 | } | ||
817 | 788 | ||
818 | static void sysctl_check_leaf(struct nsproxy *namespaces, | 789 | va_start(args, fmt); |
819 | struct ctl_table *table, const char **fail) | 790 | vaf.fmt = fmt; |
820 | { | 791 | vaf.va = &args; |
821 | struct ctl_table *ref; | 792 | |
793 | printk(KERN_ERR "sysctl table check failed: %s/%s %pV\n", | ||
794 | path, table->procname, &vaf); | ||
822 | 795 | ||
823 | ref = sysctl_check_lookup(namespaces, table); | 796 | va_end(args); |
824 | if (ref && (ref != table)) | 797 | return -EINVAL; |
825 | set_fail(fail, table, "Sysctl already exists"); | ||
826 | } | 798 | } |
827 | 799 | ||
828 | static int sysctl_check_table(struct nsproxy *namespaces, struct ctl_table *table) | 800 | static int sysctl_check_table(const char *path, struct ctl_table *table) |
829 | { | 801 | { |
830 | int error = 0; | 802 | int err = 0; |
831 | for (; table->procname; table++) { | 803 | for (; table->procname; table++) { |
832 | const char *fail = NULL; | ||
833 | |||
834 | if (table->parent) { | ||
835 | if (!table->parent->procname) | ||
836 | set_fail(&fail, table, "Parent without procname"); | ||
837 | } | ||
838 | if (table->child) { | ||
839 | if (table->data) | ||
840 | set_fail(&fail, table, "Directory with data?"); | ||
841 | if (table->maxlen) | ||
842 | set_fail(&fail, table, "Directory with maxlen?"); | ||
843 | if ((table->mode & (S_IRUGO|S_IXUGO)) != table->mode) | ||
844 | set_fail(&fail, table, "Writable sysctl directory"); | ||
845 | if (table->proc_handler) | ||
846 | set_fail(&fail, table, "Directory with proc_handler"); | ||
847 | if (table->extra1) | ||
848 | set_fail(&fail, table, "Directory with extra1"); | ||
849 | if (table->extra2) | ||
850 | set_fail(&fail, table, "Directory with extra2"); | ||
851 | } else { | ||
852 | if ((table->proc_handler == proc_dostring) || | ||
853 | (table->proc_handler == proc_dointvec) || | ||
854 | (table->proc_handler == proc_dointvec_minmax) || | ||
855 | (table->proc_handler == proc_dointvec_jiffies) || | ||
856 | (table->proc_handler == proc_dointvec_userhz_jiffies) || | ||
857 | (table->proc_handler == proc_dointvec_ms_jiffies) || | ||
858 | (table->proc_handler == proc_doulongvec_minmax) || | ||
859 | (table->proc_handler == proc_doulongvec_ms_jiffies_minmax)) { | ||
860 | if (!table->data) | ||
861 | set_fail(&fail, table, "No data"); | ||
862 | if (!table->maxlen) | ||
863 | set_fail(&fail, table, "No maxlen"); | ||
864 | } | ||
865 | #ifdef CONFIG_PROC_SYSCTL | ||
866 | if (!table->proc_handler) | ||
867 | set_fail(&fail, table, "No proc_handler"); | ||
868 | #endif | ||
869 | sysctl_check_leaf(namespaces, table, &fail); | ||
870 | } | ||
871 | if (table->mode > 0777) | ||
872 | set_fail(&fail, table, "bogus .mode"); | ||
873 | if (fail) { | ||
874 | set_fail(&fail, table, NULL); | ||
875 | error = -EINVAL; | ||
876 | } | ||
877 | if (table->child) | 804 | if (table->child) |
878 | error |= sysctl_check_table(namespaces, table->child); | 805 | err = sysctl_err(path, table, "Not a file"); |
806 | |||
807 | if ((table->proc_handler == proc_dostring) || | ||
808 | (table->proc_handler == proc_dointvec) || | ||
809 | (table->proc_handler == proc_dointvec_minmax) || | ||
810 | (table->proc_handler == proc_dointvec_jiffies) || | ||
811 | (table->proc_handler == proc_dointvec_userhz_jiffies) || | ||
812 | (table->proc_handler == proc_dointvec_ms_jiffies) || | ||
813 | (table->proc_handler == proc_doulongvec_minmax) || | ||
814 | (table->proc_handler == proc_doulongvec_ms_jiffies_minmax)) { | ||
815 | if (!table->data) | ||
816 | err = sysctl_err(path, table, "No data"); | ||
817 | if (!table->maxlen) | ||
818 | err = sysctl_err(path, table, "No maxlen"); | ||
819 | } | ||
820 | if (!table->proc_handler) | ||
821 | err = sysctl_err(path, table, "No proc_handler"); | ||
822 | |||
823 | if ((table->mode & (S_IRUGO|S_IWUGO)) != table->mode) | ||
824 | err = sysctl_err(path, table, "bogus .mode 0%o", | ||
825 | table->mode); | ||
879 | } | 826 | } |
880 | return error; | 827 | return err; |
881 | } | 828 | } |
882 | #endif /* CONFIG_SYSCTL_SYSCALL_CHECK */ | ||
883 | 829 | ||
884 | /** | 830 | /** |
885 | * __register_sysctl_table - register a leaf sysctl table | 831 | * __register_sysctl_table - register a leaf sysctl table |
@@ -1003,12 +949,8 @@ struct ctl_table_header *__register_sysctl_table( | |||
1003 | header->root = root; | 949 | header->root = root; |
1004 | sysctl_set_parent(NULL, header->ctl_table); | 950 | sysctl_set_parent(NULL, header->ctl_table); |
1005 | header->count = 1; | 951 | header->count = 1; |
1006 | #ifdef CONFIG_SYSCTL_SYSCALL_CHECK | 952 | if (sysctl_check_table(path, table)) |
1007 | if (sysctl_check_table(namespaces, header->ctl_table)) { | 953 | goto fail; |
1008 | kfree(header); | ||
1009 | return NULL; | ||
1010 | } | ||
1011 | #endif | ||
1012 | spin_lock(&sysctl_lock); | 954 | spin_lock(&sysctl_lock); |
1013 | header->set = lookup_header_set(root, namespaces); | 955 | header->set = lookup_header_set(root, namespaces); |
1014 | header->attached_by = header->ctl_table; | 956 | header->attached_by = header->ctl_table; |
@@ -1029,11 +971,19 @@ struct ctl_table_header *__register_sysctl_table( | |||
1029 | struct ctl_table_root, root_list); | 971 | struct ctl_table_root, root_list); |
1030 | set = lookup_header_set(root, namespaces); | 972 | set = lookup_header_set(root, namespaces); |
1031 | } | 973 | } |
974 | if (sysctl_check_dups(namespaces, header, path, table)) | ||
975 | goto fail_locked; | ||
1032 | header->parent->count++; | 976 | header->parent->count++; |
1033 | list_add_tail(&header->ctl_entry, &header->set->list); | 977 | list_add_tail(&header->ctl_entry, &header->set->list); |
1034 | spin_unlock(&sysctl_lock); | 978 | spin_unlock(&sysctl_lock); |
1035 | 979 | ||
1036 | return header; | 980 | return header; |
981 | fail_locked: | ||
982 | spin_unlock(&sysctl_lock); | ||
983 | fail: | ||
984 | kfree(header); | ||
985 | dump_stack(); | ||
986 | return NULL; | ||
1037 | } | 987 | } |
1038 | 988 | ||
1039 | static char *append_path(const char *path, char *pos, const char *name) | 989 | static char *append_path(const char *path, char *pos, const char *name) |
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 8745ac7d1f75..943a6182cdf2 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug | |||
@@ -1113,14 +1113,6 @@ config LATENCYTOP | |||
1113 | Enable this option if you want to use the LatencyTOP tool | 1113 | Enable this option if you want to use the LatencyTOP tool |
1114 | to find out which userspace is blocking on what kernel operations. | 1114 | to find out which userspace is blocking on what kernel operations. |
1115 | 1115 | ||
1116 | config SYSCTL_SYSCALL_CHECK | ||
1117 | bool "Sysctl checks" | ||
1118 | depends on SYSCTL | ||
1119 | ---help--- | ||
1120 | sys_sysctl uses binary paths that have been found challenging | ||
1121 | to properly maintain and use. This enables checks that help | ||
1122 | you to keep things correct. | ||
1123 | |||
1124 | source mm/Kconfig.debug | 1116 | source mm/Kconfig.debug |
1125 | source kernel/trace/Kconfig | 1117 | source kernel/trace/Kconfig |
1126 | 1118 | ||