diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/Makefile | 77 | ||||
-rw-r--r-- | kernel/acct.c | 6 | ||||
-rw-r--r-- | kernel/audit.c | 2 | ||||
-rw-r--r-- | kernel/audit.h | 7 | ||||
-rw-r--r-- | kernel/audit_watch.c | 3 | ||||
-rw-r--r-- | kernel/auditfilter.c | 65 | ||||
-rw-r--r-- | kernel/auditsc.c | 217 | ||||
-rw-r--r-- | kernel/debug/debug_core.c | 18 | ||||
-rw-r--r-- | kernel/debug/kdb/kdb_bt.c | 2 | ||||
-rw-r--r-- | kernel/debug/kdb/kdb_io.c | 33 | ||||
-rw-r--r-- | kernel/debug/kdb/kdb_main.c | 2 | ||||
-rw-r--r-- | kernel/events/core.c | 21 | ||||
-rw-r--r-- | kernel/fork.c | 13 | ||||
-rw-r--r-- | kernel/irq/irqdomain.c | 33 | ||||
-rw-r--r-- | kernel/kmod.c | 7 | ||||
-rw-r--r-- | kernel/kthread.c | 1 | ||||
-rw-r--r-- | kernel/modsign_pubkey.c | 113 | ||||
-rw-r--r-- | kernel/module-internal.h | 15 | ||||
-rw-r--r-- | kernel/module.c | 157 | ||||
-rw-r--r-- | kernel/module_signing.c | 243 | ||||
-rw-r--r-- | kernel/rcutree.c | 21 | ||||
-rw-r--r-- | kernel/rcutree.h | 6 | ||||
-rw-r--r-- | kernel/sched/core.c | 71 | ||||
-rw-r--r-- | kernel/time.c | 2 | ||||
-rw-r--r-- | kernel/time/Kconfig | 4 | ||||
-rw-r--r-- | kernel/time/alarmtimer.c | 118 | ||||
-rw-r--r-- | kernel/time/jiffies.c | 32 | ||||
-rw-r--r-- | kernel/time/tick-sched.c | 2 | ||||
-rw-r--r-- | kernel/time/timekeeping.c | 117 | ||||
-rw-r--r-- | kernel/timer.c | 10 |
30 files changed, 1093 insertions, 325 deletions
diff --git a/kernel/Makefile b/kernel/Makefile index 5404911eaee9..0dfeca4324ee 100644 --- a/kernel/Makefile +++ b/kernel/Makefile | |||
@@ -54,6 +54,7 @@ obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock.o | |||
54 | obj-$(CONFIG_PROVE_LOCKING) += spinlock.o | 54 | obj-$(CONFIG_PROVE_LOCKING) += spinlock.o |
55 | obj-$(CONFIG_UID16) += uid16.o | 55 | obj-$(CONFIG_UID16) += uid16.o |
56 | obj-$(CONFIG_MODULES) += module.o | 56 | obj-$(CONFIG_MODULES) += module.o |
57 | obj-$(CONFIG_MODULE_SIG) += module_signing.o modsign_pubkey.o | ||
57 | obj-$(CONFIG_KALLSYMS) += kallsyms.o | 58 | obj-$(CONFIG_KALLSYMS) += kallsyms.o |
58 | obj-$(CONFIG_BSD_PROCESS_ACCT) += acct.o | 59 | obj-$(CONFIG_BSD_PROCESS_ACCT) += acct.o |
59 | obj-$(CONFIG_KEXEC) += kexec.o | 60 | obj-$(CONFIG_KEXEC) += kexec.o |
@@ -130,3 +131,79 @@ quiet_cmd_timeconst = TIMEC $@ | |||
130 | targets += timeconst.h | 131 | targets += timeconst.h |
131 | $(obj)/timeconst.h: $(src)/timeconst.pl FORCE | 132 | $(obj)/timeconst.h: $(src)/timeconst.pl FORCE |
132 | $(call if_changed,timeconst) | 133 | $(call if_changed,timeconst) |
134 | |||
135 | ifeq ($(CONFIG_MODULE_SIG),y) | ||
136 | # | ||
137 | # Pull the signing certificate and any extra certificates into the kernel | ||
138 | # | ||
139 | extra_certificates: | ||
140 | touch $@ | ||
141 | |||
142 | kernel/modsign_pubkey.o: signing_key.x509 extra_certificates | ||
143 | |||
144 | ############################################################################### | ||
145 | # | ||
146 | # If module signing is requested, say by allyesconfig, but a key has not been | ||
147 | # supplied, then one will need to be generated to make sure the build does not | ||
148 | # fail and that the kernel may be used afterwards. | ||
149 | # | ||
150 | ############################################################################### | ||
151 | sign_key_with_hash := | ||
152 | ifeq ($(CONFIG_MODULE_SIG_SHA1),y) | ||
153 | sign_key_with_hash := -sha1 | ||
154 | endif | ||
155 | ifeq ($(CONFIG_MODULE_SIG_SHA224),y) | ||
156 | sign_key_with_hash := -sha224 | ||
157 | endif | ||
158 | ifeq ($(CONFIG_MODULE_SIG_SHA256),y) | ||
159 | sign_key_with_hash := -sha256 | ||
160 | endif | ||
161 | ifeq ($(CONFIG_MODULE_SIG_SHA384),y) | ||
162 | sign_key_with_hash := -sha384 | ||
163 | endif | ||
164 | ifeq ($(CONFIG_MODULE_SIG_SHA512),y) | ||
165 | sign_key_with_hash := -sha512 | ||
166 | endif | ||
167 | ifeq ($(sign_key_with_hash),) | ||
168 | $(error Could not determine digest type to use from kernel config) | ||
169 | endif | ||
170 | |||
171 | signing_key.priv signing_key.x509: x509.genkey | ||
172 | @echo "###" | ||
173 | @echo "### Now generating an X.509 key pair to be used for signing modules." | ||
174 | @echo "###" | ||
175 | @echo "### If this takes a long time, you might wish to run rngd in the" | ||
176 | @echo "### background to keep the supply of entropy topped up. It" | ||
177 | @echo "### needs to be run as root, and should use a hardware random" | ||
178 | @echo "### number generator if one is available, eg:" | ||
179 | @echo "###" | ||
180 | @echo "### rngd -r /dev/hwrandom" | ||
181 | @echo "###" | ||
182 | openssl req -new -nodes -utf8 $(sign_key_with_hash) -days 36500 -batch \ | ||
183 | -x509 -config x509.genkey \ | ||
184 | -outform DER -out signing_key.x509 \ | ||
185 | -keyout signing_key.priv | ||
186 | @echo "###" | ||
187 | @echo "### Key pair generated." | ||
188 | @echo "###" | ||
189 | |||
190 | x509.genkey: | ||
191 | @echo Generating X.509 key generation config | ||
192 | @echo >x509.genkey "[ req ]" | ||
193 | @echo >>x509.genkey "default_bits = 4096" | ||
194 | @echo >>x509.genkey "distinguished_name = req_distinguished_name" | ||
195 | @echo >>x509.genkey "prompt = no" | ||
196 | @echo >>x509.genkey "string_mask = utf8only" | ||
197 | @echo >>x509.genkey "x509_extensions = myexts" | ||
198 | @echo >>x509.genkey | ||
199 | @echo >>x509.genkey "[ req_distinguished_name ]" | ||
200 | @echo >>x509.genkey "O = Magrathea" | ||
201 | @echo >>x509.genkey "CN = Glacier signing key" | ||
202 | @echo >>x509.genkey "emailAddress = slartibartfast@magrathea.h2g2" | ||
203 | @echo >>x509.genkey | ||
204 | @echo >>x509.genkey "[ myexts ]" | ||
205 | @echo >>x509.genkey "basicConstraints=critical,CA:FALSE" | ||
206 | @echo >>x509.genkey "keyUsage=digitalSignature" | ||
207 | @echo >>x509.genkey "subjectKeyIdentifier=hash" | ||
208 | @echo >>x509.genkey "authorityKeyIdentifier=keyid" | ||
209 | endif | ||
diff --git a/kernel/acct.c b/kernel/acct.c index 6cd7529c9e6a..051e071a06e7 100644 --- a/kernel/acct.c +++ b/kernel/acct.c | |||
@@ -193,7 +193,7 @@ static void acct_file_reopen(struct bsd_acct_struct *acct, struct file *file, | |||
193 | } | 193 | } |
194 | } | 194 | } |
195 | 195 | ||
196 | static int acct_on(char *name) | 196 | static int acct_on(struct filename *pathname) |
197 | { | 197 | { |
198 | struct file *file; | 198 | struct file *file; |
199 | struct vfsmount *mnt; | 199 | struct vfsmount *mnt; |
@@ -201,7 +201,7 @@ static int acct_on(char *name) | |||
201 | struct bsd_acct_struct *acct = NULL; | 201 | struct bsd_acct_struct *acct = NULL; |
202 | 202 | ||
203 | /* Difference from BSD - they don't do O_APPEND */ | 203 | /* Difference from BSD - they don't do O_APPEND */ |
204 | file = filp_open(name, O_WRONLY|O_APPEND|O_LARGEFILE, 0); | 204 | file = file_open_name(pathname, O_WRONLY|O_APPEND|O_LARGEFILE, 0); |
205 | if (IS_ERR(file)) | 205 | if (IS_ERR(file)) |
206 | return PTR_ERR(file); | 206 | return PTR_ERR(file); |
207 | 207 | ||
@@ -260,7 +260,7 @@ SYSCALL_DEFINE1(acct, const char __user *, name) | |||
260 | return -EPERM; | 260 | return -EPERM; |
261 | 261 | ||
262 | if (name) { | 262 | if (name) { |
263 | char *tmp = getname(name); | 263 | struct filename *tmp = getname(name); |
264 | if (IS_ERR(tmp)) | 264 | if (IS_ERR(tmp)) |
265 | return (PTR_ERR(tmp)); | 265 | return (PTR_ERR(tmp)); |
266 | error = acct_on(tmp); | 266 | error = acct_on(tmp); |
diff --git a/kernel/audit.c b/kernel/audit.c index 4d0ceede3319..40414e9143db 100644 --- a/kernel/audit.c +++ b/kernel/audit.c | |||
@@ -1440,6 +1440,8 @@ void audit_log_link_denied(const char *operation, struct path *link) | |||
1440 | 1440 | ||
1441 | ab = audit_log_start(current->audit_context, GFP_KERNEL, | 1441 | ab = audit_log_start(current->audit_context, GFP_KERNEL, |
1442 | AUDIT_ANOM_LINK); | 1442 | AUDIT_ANOM_LINK); |
1443 | if (!ab) | ||
1444 | return; | ||
1443 | audit_log_format(ab, "op=%s action=denied", operation); | 1445 | audit_log_format(ab, "op=%s action=denied", operation); |
1444 | audit_log_format(ab, " pid=%d comm=", current->pid); | 1446 | audit_log_format(ab, " pid=%d comm=", current->pid); |
1445 | audit_log_untrustedstring(ab, current->comm); | 1447 | audit_log_untrustedstring(ab, current->comm); |
diff --git a/kernel/audit.h b/kernel/audit.h index 9eb3d79482b6..d51cba868e1b 100644 --- a/kernel/audit.h +++ b/kernel/audit.h | |||
@@ -74,12 +74,15 @@ static inline int audit_hash_ino(u32 ino) | |||
74 | return (ino & (AUDIT_INODE_BUCKETS-1)); | 74 | return (ino & (AUDIT_INODE_BUCKETS-1)); |
75 | } | 75 | } |
76 | 76 | ||
77 | /* Indicates that audit should log the full pathname. */ | ||
78 | #define AUDIT_NAME_FULL -1 | ||
79 | |||
77 | extern int audit_match_class(int class, unsigned syscall); | 80 | extern int audit_match_class(int class, unsigned syscall); |
78 | extern int audit_comparator(const u32 left, const u32 op, const u32 right); | 81 | extern int audit_comparator(const u32 left, const u32 op, const u32 right); |
79 | extern int audit_uid_comparator(kuid_t left, u32 op, kuid_t right); | 82 | extern int audit_uid_comparator(kuid_t left, u32 op, kuid_t right); |
80 | extern int audit_gid_comparator(kgid_t left, u32 op, kgid_t right); | 83 | extern int audit_gid_comparator(kgid_t left, u32 op, kgid_t right); |
81 | extern int audit_compare_dname_path(const char *dname, const char *path, | 84 | extern int parent_len(const char *path); |
82 | int *dirlen); | 85 | extern int audit_compare_dname_path(const char *dname, const char *path, int plen); |
83 | extern struct sk_buff * audit_make_reply(int pid, int seq, int type, | 86 | extern struct sk_buff * audit_make_reply(int pid, int seq, int type, |
84 | int done, int multi, | 87 | int done, int multi, |
85 | const void *payload, int size); | 88 | const void *payload, int size); |
diff --git a/kernel/audit_watch.c b/kernel/audit_watch.c index 1c22ec3d87bc..9a9ae6e3d290 100644 --- a/kernel/audit_watch.c +++ b/kernel/audit_watch.c | |||
@@ -265,7 +265,8 @@ static void audit_update_watch(struct audit_parent *parent, | |||
265 | /* Run all of the watches on this parent looking for the one that | 265 | /* Run all of the watches on this parent looking for the one that |
266 | * matches the given dname */ | 266 | * matches the given dname */ |
267 | list_for_each_entry_safe(owatch, nextw, &parent->watches, wlist) { | 267 | list_for_each_entry_safe(owatch, nextw, &parent->watches, wlist) { |
268 | if (audit_compare_dname_path(dname, owatch->path, NULL)) | 268 | if (audit_compare_dname_path(dname, owatch->path, |
269 | AUDIT_NAME_FULL)) | ||
269 | continue; | 270 | continue; |
270 | 271 | ||
271 | /* If the update involves invalidating rules, do the inode-based | 272 | /* If the update involves invalidating rules, do the inode-based |
diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c index c4bcdbaf4d4d..7f19f23d38a3 100644 --- a/kernel/auditfilter.c +++ b/kernel/auditfilter.c | |||
@@ -1298,41 +1298,60 @@ int audit_gid_comparator(kgid_t left, u32 op, kgid_t right) | |||
1298 | } | 1298 | } |
1299 | } | 1299 | } |
1300 | 1300 | ||
1301 | /* Compare given dentry name with last component in given path, | 1301 | /** |
1302 | * return of 0 indicates a match. */ | 1302 | * parent_len - find the length of the parent portion of a pathname |
1303 | int audit_compare_dname_path(const char *dname, const char *path, | 1303 | * @path: pathname of which to determine length |
1304 | int *dirlen) | 1304 | */ |
1305 | int parent_len(const char *path) | ||
1305 | { | 1306 | { |
1306 | int dlen, plen; | 1307 | int plen; |
1307 | const char *p; | 1308 | const char *p; |
1308 | 1309 | ||
1309 | if (!dname || !path) | ||
1310 | return 1; | ||
1311 | |||
1312 | dlen = strlen(dname); | ||
1313 | plen = strlen(path); | 1310 | plen = strlen(path); |
1314 | if (plen < dlen) | 1311 | |
1315 | return 1; | 1312 | if (plen == 0) |
1313 | return plen; | ||
1316 | 1314 | ||
1317 | /* disregard trailing slashes */ | 1315 | /* disregard trailing slashes */ |
1318 | p = path + plen - 1; | 1316 | p = path + plen - 1; |
1319 | while ((*p == '/') && (p > path)) | 1317 | while ((*p == '/') && (p > path)) |
1320 | p--; | 1318 | p--; |
1321 | 1319 | ||
1322 | /* find last path component */ | 1320 | /* walk backward until we find the next slash or hit beginning */ |
1323 | p = p - dlen + 1; | 1321 | while ((*p != '/') && (p > path)) |
1324 | if (p < path) | 1322 | p--; |
1323 | |||
1324 | /* did we find a slash? Then increment to include it in path */ | ||
1325 | if (*p == '/') | ||
1326 | p++; | ||
1327 | |||
1328 | return p - path; | ||
1329 | } | ||
1330 | |||
1331 | /** | ||
1332 | * audit_compare_dname_path - compare given dentry name with last component in | ||
1333 | * given path. Return of 0 indicates a match. | ||
1334 | * @dname: dentry name that we're comparing | ||
1335 | * @path: full pathname that we're comparing | ||
1336 | * @parentlen: length of the parent if known. Passing in AUDIT_NAME_FULL | ||
1337 | * here indicates that we must compute this value. | ||
1338 | */ | ||
1339 | int audit_compare_dname_path(const char *dname, const char *path, int parentlen) | ||
1340 | { | ||
1341 | int dlen, pathlen; | ||
1342 | const char *p; | ||
1343 | |||
1344 | dlen = strlen(dname); | ||
1345 | pathlen = strlen(path); | ||
1346 | if (pathlen < dlen) | ||
1325 | return 1; | 1347 | return 1; |
1326 | else if (p > path) { | ||
1327 | if (*--p != '/') | ||
1328 | return 1; | ||
1329 | else | ||
1330 | p++; | ||
1331 | } | ||
1332 | 1348 | ||
1333 | /* return length of path's directory component */ | 1349 | parentlen = parentlen == AUDIT_NAME_FULL ? parent_len(path) : parentlen; |
1334 | if (dirlen) | 1350 | if (pathlen - parentlen != dlen) |
1335 | *dirlen = p - path; | 1351 | return 1; |
1352 | |||
1353 | p = path + parentlen; | ||
1354 | |||
1336 | return strncmp(p, dname, dlen); | 1355 | return strncmp(p, dname, dlen); |
1337 | } | 1356 | } |
1338 | 1357 | ||
diff --git a/kernel/auditsc.c b/kernel/auditsc.c index f4a7756f999c..2f186ed80c40 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c | |||
@@ -81,9 +81,6 @@ | |||
81 | * a name dynamically and also add those to the list anchored by names_list. */ | 81 | * a name dynamically and also add those to the list anchored by names_list. */ |
82 | #define AUDIT_NAMES 5 | 82 | #define AUDIT_NAMES 5 |
83 | 83 | ||
84 | /* Indicates that audit should log the full pathname. */ | ||
85 | #define AUDIT_NAME_FULL -1 | ||
86 | |||
87 | /* no execve audit message should be longer than this (userspace limits) */ | 84 | /* no execve audit message should be longer than this (userspace limits) */ |
88 | #define MAX_EXECVE_AUDIT_LEN 7500 | 85 | #define MAX_EXECVE_AUDIT_LEN 7500 |
89 | 86 | ||
@@ -106,27 +103,29 @@ struct audit_cap_data { | |||
106 | * we don't let putname() free it (instead we free all of the saved | 103 | * we don't let putname() free it (instead we free all of the saved |
107 | * pointers at syscall exit time). | 104 | * pointers at syscall exit time). |
108 | * | 105 | * |
109 | * Further, in fs/namei.c:path_lookup() we store the inode and device. */ | 106 | * Further, in fs/namei.c:path_lookup() we store the inode and device. |
107 | */ | ||
110 | struct audit_names { | 108 | struct audit_names { |
111 | struct list_head list; /* audit_context->names_list */ | 109 | struct list_head list; /* audit_context->names_list */ |
112 | const char *name; | 110 | struct filename *name; |
113 | unsigned long ino; | 111 | unsigned long ino; |
114 | dev_t dev; | 112 | dev_t dev; |
115 | umode_t mode; | 113 | umode_t mode; |
116 | kuid_t uid; | 114 | kuid_t uid; |
117 | kgid_t gid; | 115 | kgid_t gid; |
118 | dev_t rdev; | 116 | dev_t rdev; |
119 | u32 osid; | 117 | u32 osid; |
120 | struct audit_cap_data fcap; | 118 | struct audit_cap_data fcap; |
121 | unsigned int fcap_ver; | 119 | unsigned int fcap_ver; |
122 | int name_len; /* number of name's characters to log */ | 120 | int name_len; /* number of name's characters to log */ |
123 | bool name_put; /* call __putname() for this name */ | 121 | unsigned char type; /* record type */ |
122 | bool name_put; /* call __putname() for this name */ | ||
124 | /* | 123 | /* |
125 | * This was an allocated audit_names and not from the array of | 124 | * This was an allocated audit_names and not from the array of |
126 | * names allocated in the task audit context. Thus this name | 125 | * names allocated in the task audit context. Thus this name |
127 | * should be freed on syscall exit | 126 | * should be freed on syscall exit |
128 | */ | 127 | */ |
129 | bool should_free; | 128 | bool should_free; |
130 | }; | 129 | }; |
131 | 130 | ||
132 | struct audit_aux_data { | 131 | struct audit_aux_data { |
@@ -998,7 +997,7 @@ static inline void audit_free_names(struct audit_context *context) | |||
998 | context->ino_count); | 997 | context->ino_count); |
999 | list_for_each_entry(n, &context->names_list, list) { | 998 | list_for_each_entry(n, &context->names_list, list) { |
1000 | printk(KERN_ERR "names[%d] = %p = %s\n", i, | 999 | printk(KERN_ERR "names[%d] = %p = %s\n", i, |
1001 | n->name, n->name ?: "(null)"); | 1000 | n->name, n->name->name ?: "(null)"); |
1002 | } | 1001 | } |
1003 | dump_stack(); | 1002 | dump_stack(); |
1004 | return; | 1003 | return; |
@@ -1555,7 +1554,7 @@ static void audit_log_name(struct audit_context *context, struct audit_names *n, | |||
1555 | case AUDIT_NAME_FULL: | 1554 | case AUDIT_NAME_FULL: |
1556 | /* log the full path */ | 1555 | /* log the full path */ |
1557 | audit_log_format(ab, " name="); | 1556 | audit_log_format(ab, " name="); |
1558 | audit_log_untrustedstring(ab, n->name); | 1557 | audit_log_untrustedstring(ab, n->name->name); |
1559 | break; | 1558 | break; |
1560 | case 0: | 1559 | case 0: |
1561 | /* name was specified as a relative path and the | 1560 | /* name was specified as a relative path and the |
@@ -1565,7 +1564,7 @@ static void audit_log_name(struct audit_context *context, struct audit_names *n, | |||
1565 | default: | 1564 | default: |
1566 | /* log the name's directory component */ | 1565 | /* log the name's directory component */ |
1567 | audit_log_format(ab, " name="); | 1566 | audit_log_format(ab, " name="); |
1568 | audit_log_n_untrustedstring(ab, n->name, | 1567 | audit_log_n_untrustedstring(ab, n->name->name, |
1569 | n->name_len); | 1568 | n->name_len); |
1570 | } | 1569 | } |
1571 | } else | 1570 | } else |
@@ -1995,7 +1994,8 @@ retry: | |||
1995 | #endif | 1994 | #endif |
1996 | } | 1995 | } |
1997 | 1996 | ||
1998 | static struct audit_names *audit_alloc_name(struct audit_context *context) | 1997 | static struct audit_names *audit_alloc_name(struct audit_context *context, |
1998 | unsigned char type) | ||
1999 | { | 1999 | { |
2000 | struct audit_names *aname; | 2000 | struct audit_names *aname; |
2001 | 2001 | ||
@@ -2010,6 +2010,7 @@ static struct audit_names *audit_alloc_name(struct audit_context *context) | |||
2010 | } | 2010 | } |
2011 | 2011 | ||
2012 | aname->ino = (unsigned long)-1; | 2012 | aname->ino = (unsigned long)-1; |
2013 | aname->type = type; | ||
2013 | list_add_tail(&aname->list, &context->names_list); | 2014 | list_add_tail(&aname->list, &context->names_list); |
2014 | 2015 | ||
2015 | context->name_count++; | 2016 | context->name_count++; |
@@ -2020,13 +2021,36 @@ static struct audit_names *audit_alloc_name(struct audit_context *context) | |||
2020 | } | 2021 | } |
2021 | 2022 | ||
2022 | /** | 2023 | /** |
2024 | * audit_reusename - fill out filename with info from existing entry | ||
2025 | * @uptr: userland ptr to pathname | ||
2026 | * | ||
2027 | * Search the audit_names list for the current audit context. If there is an | ||
2028 | * existing entry with a matching "uptr" then return the filename | ||
2029 | * associated with that audit_name. If not, return NULL. | ||
2030 | */ | ||
2031 | struct filename * | ||
2032 | __audit_reusename(const __user char *uptr) | ||
2033 | { | ||
2034 | struct audit_context *context = current->audit_context; | ||
2035 | struct audit_names *n; | ||
2036 | |||
2037 | list_for_each_entry(n, &context->names_list, list) { | ||
2038 | if (!n->name) | ||
2039 | continue; | ||
2040 | if (n->name->uptr == uptr) | ||
2041 | return n->name; | ||
2042 | } | ||
2043 | return NULL; | ||
2044 | } | ||
2045 | |||
2046 | /** | ||
2023 | * audit_getname - add a name to the list | 2047 | * audit_getname - add a name to the list |
2024 | * @name: name to add | 2048 | * @name: name to add |
2025 | * | 2049 | * |
2026 | * Add a name to the list of audit names for this context. | 2050 | * Add a name to the list of audit names for this context. |
2027 | * Called from fs/namei.c:getname(). | 2051 | * Called from fs/namei.c:getname(). |
2028 | */ | 2052 | */ |
2029 | void __audit_getname(const char *name) | 2053 | void __audit_getname(struct filename *name) |
2030 | { | 2054 | { |
2031 | struct audit_context *context = current->audit_context; | 2055 | struct audit_context *context = current->audit_context; |
2032 | struct audit_names *n; | 2056 | struct audit_names *n; |
@@ -2040,13 +2064,19 @@ void __audit_getname(const char *name) | |||
2040 | return; | 2064 | return; |
2041 | } | 2065 | } |
2042 | 2066 | ||
2043 | n = audit_alloc_name(context); | 2067 | #if AUDIT_DEBUG |
2068 | /* The filename _must_ have a populated ->name */ | ||
2069 | BUG_ON(!name->name); | ||
2070 | #endif | ||
2071 | |||
2072 | n = audit_alloc_name(context, AUDIT_TYPE_UNKNOWN); | ||
2044 | if (!n) | 2073 | if (!n) |
2045 | return; | 2074 | return; |
2046 | 2075 | ||
2047 | n->name = name; | 2076 | n->name = name; |
2048 | n->name_len = AUDIT_NAME_FULL; | 2077 | n->name_len = AUDIT_NAME_FULL; |
2049 | n->name_put = true; | 2078 | n->name_put = true; |
2079 | name->aname = n; | ||
2050 | 2080 | ||
2051 | if (!context->pwd.dentry) | 2081 | if (!context->pwd.dentry) |
2052 | get_fs_pwd(current->fs, &context->pwd); | 2082 | get_fs_pwd(current->fs, &context->pwd); |
@@ -2059,7 +2089,7 @@ void __audit_getname(const char *name) | |||
2059 | * then we delay the putname until syscall exit. | 2089 | * then we delay the putname until syscall exit. |
2060 | * Called from include/linux/fs.h:putname(). | 2090 | * Called from include/linux/fs.h:putname(). |
2061 | */ | 2091 | */ |
2062 | void audit_putname(const char *name) | 2092 | void audit_putname(struct filename *name) |
2063 | { | 2093 | { |
2064 | struct audit_context *context = current->audit_context; | 2094 | struct audit_context *context = current->audit_context; |
2065 | 2095 | ||
@@ -2074,7 +2104,7 @@ void audit_putname(const char *name) | |||
2074 | 2104 | ||
2075 | list_for_each_entry(n, &context->names_list, list) | 2105 | list_for_each_entry(n, &context->names_list, list) |
2076 | printk(KERN_ERR "name[%d] = %p = %s\n", i, | 2106 | printk(KERN_ERR "name[%d] = %p = %s\n", i, |
2077 | n->name, n->name ?: "(null)"); | 2107 | n->name, n->name->name ?: "(null)"); |
2078 | } | 2108 | } |
2079 | #endif | 2109 | #endif |
2080 | __putname(name); | 2110 | __putname(name); |
@@ -2088,8 +2118,8 @@ void audit_putname(const char *name) | |||
2088 | " put_count=%d\n", | 2118 | " put_count=%d\n", |
2089 | __FILE__, __LINE__, | 2119 | __FILE__, __LINE__, |
2090 | context->serial, context->major, | 2120 | context->serial, context->major, |
2091 | context->in_syscall, name, context->name_count, | 2121 | context->in_syscall, name->name, |
2092 | context->put_count); | 2122 | context->name_count, context->put_count); |
2093 | dump_stack(); | 2123 | dump_stack(); |
2094 | } | 2124 | } |
2095 | } | 2125 | } |
@@ -2132,13 +2162,13 @@ static void audit_copy_inode(struct audit_names *name, const struct dentry *dent | |||
2132 | } | 2162 | } |
2133 | 2163 | ||
2134 | /** | 2164 | /** |
2135 | * audit_inode - store the inode and device from a lookup | 2165 | * __audit_inode - store the inode and device from a lookup |
2136 | * @name: name being audited | 2166 | * @name: name being audited |
2137 | * @dentry: dentry being audited | 2167 | * @dentry: dentry being audited |
2138 | * | 2168 | * @parent: does this dentry represent the parent? |
2139 | * Called from fs/namei.c:path_lookup(). | ||
2140 | */ | 2169 | */ |
2141 | void __audit_inode(const char *name, const struct dentry *dentry) | 2170 | void __audit_inode(struct filename *name, const struct dentry *dentry, |
2171 | unsigned int parent) | ||
2142 | { | 2172 | { |
2143 | struct audit_context *context = current->audit_context; | 2173 | struct audit_context *context = current->audit_context; |
2144 | const struct inode *inode = dentry->d_inode; | 2174 | const struct inode *inode = dentry->d_inode; |
@@ -2147,24 +2177,69 @@ void __audit_inode(const char *name, const struct dentry *dentry) | |||
2147 | if (!context->in_syscall) | 2177 | if (!context->in_syscall) |
2148 | return; | 2178 | return; |
2149 | 2179 | ||
2180 | if (!name) | ||
2181 | goto out_alloc; | ||
2182 | |||
2183 | #if AUDIT_DEBUG | ||
2184 | /* The struct filename _must_ have a populated ->name */ | ||
2185 | BUG_ON(!name->name); | ||
2186 | #endif | ||
2187 | /* | ||
2188 | * If we have a pointer to an audit_names entry already, then we can | ||
2189 | * just use it directly if the type is correct. | ||
2190 | */ | ||
2191 | n = name->aname; | ||
2192 | if (n) { | ||
2193 | if (parent) { | ||
2194 | if (n->type == AUDIT_TYPE_PARENT || | ||
2195 | n->type == AUDIT_TYPE_UNKNOWN) | ||
2196 | goto out; | ||
2197 | } else { | ||
2198 | if (n->type != AUDIT_TYPE_PARENT) | ||
2199 | goto out; | ||
2200 | } | ||
2201 | } | ||
2202 | |||
2150 | list_for_each_entry_reverse(n, &context->names_list, list) { | 2203 | list_for_each_entry_reverse(n, &context->names_list, list) { |
2151 | if (n->name && (n->name == name)) | 2204 | /* does the name pointer match? */ |
2152 | goto out; | 2205 | if (!n->name || n->name->name != name->name) |
2206 | continue; | ||
2207 | |||
2208 | /* match the correct record type */ | ||
2209 | if (parent) { | ||
2210 | if (n->type == AUDIT_TYPE_PARENT || | ||
2211 | n->type == AUDIT_TYPE_UNKNOWN) | ||
2212 | goto out; | ||
2213 | } else { | ||
2214 | if (n->type != AUDIT_TYPE_PARENT) | ||
2215 | goto out; | ||
2216 | } | ||
2153 | } | 2217 | } |
2154 | 2218 | ||
2155 | /* unable to find the name from a previous getname() */ | 2219 | out_alloc: |
2156 | n = audit_alloc_name(context); | 2220 | /* unable to find the name from a previous getname(). Allocate a new |
2221 | * anonymous entry. | ||
2222 | */ | ||
2223 | n = audit_alloc_name(context, AUDIT_TYPE_NORMAL); | ||
2157 | if (!n) | 2224 | if (!n) |
2158 | return; | 2225 | return; |
2159 | out: | 2226 | out: |
2227 | if (parent) { | ||
2228 | n->name_len = n->name ? parent_len(n->name->name) : AUDIT_NAME_FULL; | ||
2229 | n->type = AUDIT_TYPE_PARENT; | ||
2230 | } else { | ||
2231 | n->name_len = AUDIT_NAME_FULL; | ||
2232 | n->type = AUDIT_TYPE_NORMAL; | ||
2233 | } | ||
2160 | handle_path(dentry); | 2234 | handle_path(dentry); |
2161 | audit_copy_inode(n, dentry, inode); | 2235 | audit_copy_inode(n, dentry, inode); |
2162 | } | 2236 | } |
2163 | 2237 | ||
2164 | /** | 2238 | /** |
2165 | * audit_inode_child - collect inode info for created/removed objects | 2239 | * __audit_inode_child - collect inode info for created/removed objects |
2166 | * @dentry: dentry being audited | ||
2167 | * @parent: inode of dentry parent | 2240 | * @parent: inode of dentry parent |
2241 | * @dentry: dentry being audited | ||
2242 | * @type: AUDIT_TYPE_* value that we're looking for | ||
2168 | * | 2243 | * |
2169 | * For syscalls that create or remove filesystem objects, audit_inode | 2244 | * For syscalls that create or remove filesystem objects, audit_inode |
2170 | * can only collect information for the filesystem object's parent. | 2245 | * can only collect information for the filesystem object's parent. |
@@ -2174,15 +2249,14 @@ out: | |||
2174 | * must be hooked prior, in order to capture the target inode during | 2249 | * must be hooked prior, in order to capture the target inode during |
2175 | * unsuccessful attempts. | 2250 | * unsuccessful attempts. |
2176 | */ | 2251 | */ |
2177 | void __audit_inode_child(const struct dentry *dentry, | 2252 | void __audit_inode_child(const struct inode *parent, |
2178 | const struct inode *parent) | 2253 | const struct dentry *dentry, |
2254 | const unsigned char type) | ||
2179 | { | 2255 | { |
2180 | struct audit_context *context = current->audit_context; | 2256 | struct audit_context *context = current->audit_context; |
2181 | const char *found_parent = NULL, *found_child = NULL; | ||
2182 | const struct inode *inode = dentry->d_inode; | 2257 | const struct inode *inode = dentry->d_inode; |
2183 | const char *dname = dentry->d_name.name; | 2258 | const char *dname = dentry->d_name.name; |
2184 | struct audit_names *n; | 2259 | struct audit_names *n, *found_parent = NULL, *found_child = NULL; |
2185 | int dirlen = 0; | ||
2186 | 2260 | ||
2187 | if (!context->in_syscall) | 2261 | if (!context->in_syscall) |
2188 | return; | 2262 | return; |
@@ -2190,62 +2264,65 @@ void __audit_inode_child(const struct dentry *dentry, | |||
2190 | if (inode) | 2264 | if (inode) |
2191 | handle_one(inode); | 2265 | handle_one(inode); |
2192 | 2266 | ||
2193 | /* parent is more likely, look for it first */ | 2267 | /* look for a parent entry first */ |
2194 | list_for_each_entry(n, &context->names_list, list) { | 2268 | list_for_each_entry(n, &context->names_list, list) { |
2195 | if (!n->name) | 2269 | if (!n->name || n->type != AUDIT_TYPE_PARENT) |
2196 | continue; | 2270 | continue; |
2197 | 2271 | ||
2198 | if (n->ino == parent->i_ino && | 2272 | if (n->ino == parent->i_ino && |
2199 | !audit_compare_dname_path(dname, n->name, &dirlen)) { | 2273 | !audit_compare_dname_path(dname, n->name->name, n->name_len)) { |
2200 | n->name_len = dirlen; /* update parent data in place */ | 2274 | found_parent = n; |
2201 | found_parent = n->name; | 2275 | break; |
2202 | goto add_names; | ||
2203 | } | 2276 | } |
2204 | } | 2277 | } |
2205 | 2278 | ||
2206 | /* no matching parent, look for matching child */ | 2279 | /* is there a matching child entry? */ |
2207 | list_for_each_entry(n, &context->names_list, list) { | 2280 | list_for_each_entry(n, &context->names_list, list) { |
2208 | if (!n->name) | 2281 | /* can only match entries that have a name */ |
2282 | if (!n->name || n->type != type) | ||
2209 | continue; | 2283 | continue; |
2210 | 2284 | ||
2211 | /* strcmp() is the more likely scenario */ | 2285 | /* if we found a parent, make sure this one is a child of it */ |
2212 | if (!strcmp(dname, n->name) || | 2286 | if (found_parent && (n->name != found_parent->name)) |
2213 | !audit_compare_dname_path(dname, n->name, &dirlen)) { | 2287 | continue; |
2214 | if (inode) | 2288 | |
2215 | audit_copy_inode(n, NULL, inode); | 2289 | if (!strcmp(dname, n->name->name) || |
2216 | else | 2290 | !audit_compare_dname_path(dname, n->name->name, |
2217 | n->ino = (unsigned long)-1; | 2291 | found_parent ? |
2218 | found_child = n->name; | 2292 | found_parent->name_len : |
2219 | goto add_names; | 2293 | AUDIT_NAME_FULL)) { |
2294 | found_child = n; | ||
2295 | break; | ||
2220 | } | 2296 | } |
2221 | } | 2297 | } |
2222 | 2298 | ||
2223 | add_names: | ||
2224 | if (!found_parent) { | 2299 | if (!found_parent) { |
2225 | n = audit_alloc_name(context); | 2300 | /* create a new, "anonymous" parent record */ |
2301 | n = audit_alloc_name(context, AUDIT_TYPE_PARENT); | ||
2226 | if (!n) | 2302 | if (!n) |
2227 | return; | 2303 | return; |
2228 | audit_copy_inode(n, NULL, parent); | 2304 | audit_copy_inode(n, NULL, parent); |
2229 | } | 2305 | } |
2230 | 2306 | ||
2231 | if (!found_child) { | 2307 | if (!found_child) { |
2232 | n = audit_alloc_name(context); | 2308 | found_child = audit_alloc_name(context, type); |
2233 | if (!n) | 2309 | if (!found_child) |
2234 | return; | 2310 | return; |
2235 | 2311 | ||
2236 | /* Re-use the name belonging to the slot for a matching parent | 2312 | /* Re-use the name belonging to the slot for a matching parent |
2237 | * directory. All names for this context are relinquished in | 2313 | * directory. All names for this context are relinquished in |
2238 | * audit_free_names() */ | 2314 | * audit_free_names() */ |
2239 | if (found_parent) { | 2315 | if (found_parent) { |
2240 | n->name = found_parent; | 2316 | found_child->name = found_parent->name; |
2241 | n->name_len = AUDIT_NAME_FULL; | 2317 | found_child->name_len = AUDIT_NAME_FULL; |
2242 | /* don't call __putname() */ | 2318 | /* don't call __putname() */ |
2243 | n->name_put = false; | 2319 | found_child->name_put = false; |
2244 | } | 2320 | } |
2245 | |||
2246 | if (inode) | ||
2247 | audit_copy_inode(n, NULL, inode); | ||
2248 | } | 2321 | } |
2322 | if (inode) | ||
2323 | audit_copy_inode(found_child, dentry, inode); | ||
2324 | else | ||
2325 | found_child->ino = (unsigned long)-1; | ||
2249 | } | 2326 | } |
2250 | EXPORT_SYMBOL_GPL(__audit_inode_child); | 2327 | EXPORT_SYMBOL_GPL(__audit_inode_child); |
2251 | 2328 | ||
diff --git a/kernel/debug/debug_core.c b/kernel/debug/debug_core.c index 17e073c309e6..9a61738cefc8 100644 --- a/kernel/debug/debug_core.c +++ b/kernel/debug/debug_core.c | |||
@@ -696,6 +696,22 @@ out: | |||
696 | return ret; | 696 | return ret; |
697 | } | 697 | } |
698 | 698 | ||
699 | /* | ||
700 | * GDB places a breakpoint at this function to know dynamically | ||
701 | * loaded objects. It's not defined static so that only one instance with this | ||
702 | * name exists in the kernel. | ||
703 | */ | ||
704 | |||
705 | static int module_event(struct notifier_block *self, unsigned long val, | ||
706 | void *data) | ||
707 | { | ||
708 | return 0; | ||
709 | } | ||
710 | |||
711 | static struct notifier_block dbg_module_load_nb = { | ||
712 | .notifier_call = module_event, | ||
713 | }; | ||
714 | |||
699 | int kgdb_nmicallback(int cpu, void *regs) | 715 | int kgdb_nmicallback(int cpu, void *regs) |
700 | { | 716 | { |
701 | #ifdef CONFIG_SMP | 717 | #ifdef CONFIG_SMP |
@@ -824,6 +840,7 @@ static void kgdb_register_callbacks(void) | |||
824 | kgdb_arch_init(); | 840 | kgdb_arch_init(); |
825 | if (!dbg_is_early) | 841 | if (!dbg_is_early) |
826 | kgdb_arch_late(); | 842 | kgdb_arch_late(); |
843 | register_module_notifier(&dbg_module_load_nb); | ||
827 | register_reboot_notifier(&dbg_reboot_notifier); | 844 | register_reboot_notifier(&dbg_reboot_notifier); |
828 | atomic_notifier_chain_register(&panic_notifier_list, | 845 | atomic_notifier_chain_register(&panic_notifier_list, |
829 | &kgdb_panic_event_nb); | 846 | &kgdb_panic_event_nb); |
@@ -847,6 +864,7 @@ static void kgdb_unregister_callbacks(void) | |||
847 | if (kgdb_io_module_registered) { | 864 | if (kgdb_io_module_registered) { |
848 | kgdb_io_module_registered = 0; | 865 | kgdb_io_module_registered = 0; |
849 | unregister_reboot_notifier(&dbg_reboot_notifier); | 866 | unregister_reboot_notifier(&dbg_reboot_notifier); |
867 | unregister_module_notifier(&dbg_module_load_nb); | ||
850 | atomic_notifier_chain_unregister(&panic_notifier_list, | 868 | atomic_notifier_chain_unregister(&panic_notifier_list, |
851 | &kgdb_panic_event_nb); | 869 | &kgdb_panic_event_nb); |
852 | kgdb_arch_exit(); | 870 | kgdb_arch_exit(); |
diff --git a/kernel/debug/kdb/kdb_bt.c b/kernel/debug/kdb/kdb_bt.c index 07c9bbb94a0b..b03e0e814e43 100644 --- a/kernel/debug/kdb/kdb_bt.c +++ b/kernel/debug/kdb/kdb_bt.c | |||
@@ -129,6 +129,8 @@ kdb_bt(int argc, const char **argv) | |||
129 | } | 129 | } |
130 | /* Now the inactive tasks */ | 130 | /* Now the inactive tasks */ |
131 | kdb_do_each_thread(g, p) { | 131 | kdb_do_each_thread(g, p) { |
132 | if (KDB_FLAG(CMD_INTERRUPT)) | ||
133 | return 0; | ||
132 | if (task_curr(p)) | 134 | if (task_curr(p)) |
133 | continue; | 135 | continue; |
134 | if (kdb_bt1(p, mask, argcount, btaprompt)) | 136 | if (kdb_bt1(p, mask, argcount, btaprompt)) |
diff --git a/kernel/debug/kdb/kdb_io.c b/kernel/debug/kdb/kdb_io.c index 0a69d2adc4f3..14ff4849262c 100644 --- a/kernel/debug/kdb/kdb_io.c +++ b/kernel/debug/kdb/kdb_io.c | |||
@@ -552,6 +552,7 @@ int vkdb_printf(const char *fmt, va_list ap) | |||
552 | { | 552 | { |
553 | int diag; | 553 | int diag; |
554 | int linecount; | 554 | int linecount; |
555 | int colcount; | ||
555 | int logging, saved_loglevel = 0; | 556 | int logging, saved_loglevel = 0; |
556 | int saved_trap_printk; | 557 | int saved_trap_printk; |
557 | int got_printf_lock = 0; | 558 | int got_printf_lock = 0; |
@@ -584,6 +585,10 @@ int vkdb_printf(const char *fmt, va_list ap) | |||
584 | if (diag || linecount <= 1) | 585 | if (diag || linecount <= 1) |
585 | linecount = 24; | 586 | linecount = 24; |
586 | 587 | ||
588 | diag = kdbgetintenv("COLUMNS", &colcount); | ||
589 | if (diag || colcount <= 1) | ||
590 | colcount = 80; | ||
591 | |||
587 | diag = kdbgetintenv("LOGGING", &logging); | 592 | diag = kdbgetintenv("LOGGING", &logging); |
588 | if (diag) | 593 | if (diag) |
589 | logging = 0; | 594 | logging = 0; |
@@ -690,7 +695,7 @@ kdb_printit: | |||
690 | gdbstub_msg_write(kdb_buffer, retlen); | 695 | gdbstub_msg_write(kdb_buffer, retlen); |
691 | } else { | 696 | } else { |
692 | if (dbg_io_ops && !dbg_io_ops->is_console) { | 697 | if (dbg_io_ops && !dbg_io_ops->is_console) { |
693 | len = strlen(kdb_buffer); | 698 | len = retlen; |
694 | cp = kdb_buffer; | 699 | cp = kdb_buffer; |
695 | while (len--) { | 700 | while (len--) { |
696 | dbg_io_ops->write_char(*cp); | 701 | dbg_io_ops->write_char(*cp); |
@@ -709,11 +714,29 @@ kdb_printit: | |||
709 | printk(KERN_INFO "%s", kdb_buffer); | 714 | printk(KERN_INFO "%s", kdb_buffer); |
710 | } | 715 | } |
711 | 716 | ||
712 | if (KDB_STATE(PAGER) && strchr(kdb_buffer, '\n')) | 717 | if (KDB_STATE(PAGER)) { |
713 | kdb_nextline++; | 718 | /* |
719 | * Check printed string to decide how to bump the | ||
720 | * kdb_nextline to control when the more prompt should | ||
721 | * show up. | ||
722 | */ | ||
723 | int got = 0; | ||
724 | len = retlen; | ||
725 | while (len--) { | ||
726 | if (kdb_buffer[len] == '\n') { | ||
727 | kdb_nextline++; | ||
728 | got = 0; | ||
729 | } else if (kdb_buffer[len] == '\r') { | ||
730 | got = 0; | ||
731 | } else { | ||
732 | got++; | ||
733 | } | ||
734 | } | ||
735 | kdb_nextline += got / (colcount + 1); | ||
736 | } | ||
714 | 737 | ||
715 | /* check for having reached the LINES number of printed lines */ | 738 | /* check for having reached the LINES number of printed lines */ |
716 | if (kdb_nextline == linecount) { | 739 | if (kdb_nextline >= linecount) { |
717 | char buf1[16] = ""; | 740 | char buf1[16] = ""; |
718 | 741 | ||
719 | /* Watch out for recursion here. Any routine that calls | 742 | /* Watch out for recursion here. Any routine that calls |
@@ -765,7 +788,7 @@ kdb_printit: | |||
765 | kdb_grepping_flag = 0; | 788 | kdb_grepping_flag = 0; |
766 | kdb_printf("\n"); | 789 | kdb_printf("\n"); |
767 | } else if (buf1[0] == ' ') { | 790 | } else if (buf1[0] == ' ') { |
768 | kdb_printf("\n"); | 791 | kdb_printf("\r"); |
769 | suspend_grep = 1; /* for this recursion */ | 792 | suspend_grep = 1; /* for this recursion */ |
770 | } else if (buf1[0] == '\n') { | 793 | } else if (buf1[0] == '\n') { |
771 | kdb_nextline = linecount - 1; | 794 | kdb_nextline = linecount - 1; |
diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c index 1261dc7eaeb9..4d5f8d5612f3 100644 --- a/kernel/debug/kdb/kdb_main.c +++ b/kernel/debug/kdb/kdb_main.c | |||
@@ -2101,6 +2101,8 @@ static int kdb_dmesg(int argc, const char **argv) | |||
2101 | } | 2101 | } |
2102 | if (!lines--) | 2102 | if (!lines--) |
2103 | break; | 2103 | break; |
2104 | if (KDB_FLAG(CMD_INTERRUPT)) | ||
2105 | return 0; | ||
2104 | 2106 | ||
2105 | kdb_printf("%.*s\n", (int)len - 1, buf); | 2107 | kdb_printf("%.*s\n", (int)len - 1, buf); |
2106 | } | 2108 | } |
diff --git a/kernel/events/core.c b/kernel/events/core.c index cda3ebd49e86..dbccf83c134d 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c | |||
@@ -372,6 +372,8 @@ void perf_cgroup_switch(struct task_struct *task, int mode) | |||
372 | 372 | ||
373 | list_for_each_entry_rcu(pmu, &pmus, entry) { | 373 | list_for_each_entry_rcu(pmu, &pmus, entry) { |
374 | cpuctx = this_cpu_ptr(pmu->pmu_cpu_context); | 374 | cpuctx = this_cpu_ptr(pmu->pmu_cpu_context); |
375 | if (cpuctx->unique_pmu != pmu) | ||
376 | continue; /* ensure we process each cpuctx once */ | ||
375 | 377 | ||
376 | /* | 378 | /* |
377 | * perf_cgroup_events says at least one | 379 | * perf_cgroup_events says at least one |
@@ -395,9 +397,10 @@ void perf_cgroup_switch(struct task_struct *task, int mode) | |||
395 | 397 | ||
396 | if (mode & PERF_CGROUP_SWIN) { | 398 | if (mode & PERF_CGROUP_SWIN) { |
397 | WARN_ON_ONCE(cpuctx->cgrp); | 399 | WARN_ON_ONCE(cpuctx->cgrp); |
398 | /* set cgrp before ctxsw in to | 400 | /* |
399 | * allow event_filter_match() to not | 401 | * set cgrp before ctxsw in to allow |
400 | * have to pass task around | 402 | * event_filter_match() to not have to pass |
403 | * task around | ||
401 | */ | 404 | */ |
402 | cpuctx->cgrp = perf_cgroup_from_task(task); | 405 | cpuctx->cgrp = perf_cgroup_from_task(task); |
403 | cpu_ctx_sched_in(cpuctx, EVENT_ALL, task); | 406 | cpu_ctx_sched_in(cpuctx, EVENT_ALL, task); |
@@ -4412,7 +4415,7 @@ static void perf_event_task_event(struct perf_task_event *task_event) | |||
4412 | rcu_read_lock(); | 4415 | rcu_read_lock(); |
4413 | list_for_each_entry_rcu(pmu, &pmus, entry) { | 4416 | list_for_each_entry_rcu(pmu, &pmus, entry) { |
4414 | cpuctx = get_cpu_ptr(pmu->pmu_cpu_context); | 4417 | cpuctx = get_cpu_ptr(pmu->pmu_cpu_context); |
4415 | if (cpuctx->active_pmu != pmu) | 4418 | if (cpuctx->unique_pmu != pmu) |
4416 | goto next; | 4419 | goto next; |
4417 | perf_event_task_ctx(&cpuctx->ctx, task_event); | 4420 | perf_event_task_ctx(&cpuctx->ctx, task_event); |
4418 | 4421 | ||
@@ -4558,7 +4561,7 @@ static void perf_event_comm_event(struct perf_comm_event *comm_event) | |||
4558 | rcu_read_lock(); | 4561 | rcu_read_lock(); |
4559 | list_for_each_entry_rcu(pmu, &pmus, entry) { | 4562 | list_for_each_entry_rcu(pmu, &pmus, entry) { |
4560 | cpuctx = get_cpu_ptr(pmu->pmu_cpu_context); | 4563 | cpuctx = get_cpu_ptr(pmu->pmu_cpu_context); |
4561 | if (cpuctx->active_pmu != pmu) | 4564 | if (cpuctx->unique_pmu != pmu) |
4562 | goto next; | 4565 | goto next; |
4563 | perf_event_comm_ctx(&cpuctx->ctx, comm_event); | 4566 | perf_event_comm_ctx(&cpuctx->ctx, comm_event); |
4564 | 4567 | ||
@@ -4754,7 +4757,7 @@ got_name: | |||
4754 | rcu_read_lock(); | 4757 | rcu_read_lock(); |
4755 | list_for_each_entry_rcu(pmu, &pmus, entry) { | 4758 | list_for_each_entry_rcu(pmu, &pmus, entry) { |
4756 | cpuctx = get_cpu_ptr(pmu->pmu_cpu_context); | 4759 | cpuctx = get_cpu_ptr(pmu->pmu_cpu_context); |
4757 | if (cpuctx->active_pmu != pmu) | 4760 | if (cpuctx->unique_pmu != pmu) |
4758 | goto next; | 4761 | goto next; |
4759 | perf_event_mmap_ctx(&cpuctx->ctx, mmap_event, | 4762 | perf_event_mmap_ctx(&cpuctx->ctx, mmap_event, |
4760 | vma->vm_flags & VM_EXEC); | 4763 | vma->vm_flags & VM_EXEC); |
@@ -5855,8 +5858,8 @@ static void update_pmu_context(struct pmu *pmu, struct pmu *old_pmu) | |||
5855 | 5858 | ||
5856 | cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu); | 5859 | cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu); |
5857 | 5860 | ||
5858 | if (cpuctx->active_pmu == old_pmu) | 5861 | if (cpuctx->unique_pmu == old_pmu) |
5859 | cpuctx->active_pmu = pmu; | 5862 | cpuctx->unique_pmu = pmu; |
5860 | } | 5863 | } |
5861 | } | 5864 | } |
5862 | 5865 | ||
@@ -5991,7 +5994,7 @@ skip_type: | |||
5991 | cpuctx->ctx.pmu = pmu; | 5994 | cpuctx->ctx.pmu = pmu; |
5992 | cpuctx->jiffies_interval = 1; | 5995 | cpuctx->jiffies_interval = 1; |
5993 | INIT_LIST_HEAD(&cpuctx->rotation_list); | 5996 | INIT_LIST_HEAD(&cpuctx->rotation_list); |
5994 | cpuctx->active_pmu = pmu; | 5997 | cpuctx->unique_pmu = pmu; |
5995 | } | 5998 | } |
5996 | 5999 | ||
5997 | got_cpu_context: | 6000 | got_cpu_context: |
diff --git a/kernel/fork.c b/kernel/fork.c index 1cd7d581b3b2..8b20ab7d3aa2 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
@@ -1584,7 +1584,7 @@ long do_fork(unsigned long clone_flags, | |||
1584 | * requested, no event is reported; otherwise, report if the event | 1584 | * requested, no event is reported; otherwise, report if the event |
1585 | * for the type of forking is enabled. | 1585 | * for the type of forking is enabled. |
1586 | */ | 1586 | */ |
1587 | if (likely(user_mode(regs)) && !(clone_flags & CLONE_UNTRACED)) { | 1587 | if (!(clone_flags & CLONE_UNTRACED) && likely(user_mode(regs))) { |
1588 | if (clone_flags & CLONE_VFORK) | 1588 | if (clone_flags & CLONE_VFORK) |
1589 | trace = PTRACE_EVENT_VFORK; | 1589 | trace = PTRACE_EVENT_VFORK; |
1590 | else if ((clone_flags & CSIGNAL) != SIGCHLD) | 1590 | else if ((clone_flags & CSIGNAL) != SIGCHLD) |
@@ -1634,6 +1634,17 @@ long do_fork(unsigned long clone_flags, | |||
1634 | return nr; | 1634 | return nr; |
1635 | } | 1635 | } |
1636 | 1636 | ||
1637 | #ifdef CONFIG_GENERIC_KERNEL_THREAD | ||
1638 | /* | ||
1639 | * Create a kernel thread. | ||
1640 | */ | ||
1641 | pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags) | ||
1642 | { | ||
1643 | return do_fork(flags|CLONE_VM|CLONE_UNTRACED, (unsigned long)fn, NULL, | ||
1644 | (unsigned long)arg, NULL, NULL); | ||
1645 | } | ||
1646 | #endif | ||
1647 | |||
1637 | #ifndef ARCH_MIN_MMSTRUCT_ALIGN | 1648 | #ifndef ARCH_MIN_MMSTRUCT_ALIGN |
1638 | #define ARCH_MIN_MMSTRUCT_ALIGN 0 | 1649 | #define ARCH_MIN_MMSTRUCT_ALIGN 0 |
1639 | #endif | 1650 | #endif |
diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c index 49a77727db42..4e69e24d3d7d 100644 --- a/kernel/irq/irqdomain.c +++ b/kernel/irq/irqdomain.c | |||
@@ -148,7 +148,8 @@ static unsigned int irq_domain_legacy_revmap(struct irq_domain *domain, | |||
148 | * @host_data: Controller private data pointer | 148 | * @host_data: Controller private data pointer |
149 | * | 149 | * |
150 | * Allocates a legacy irq_domain if irq_base is positive or a linear | 150 | * Allocates a legacy irq_domain if irq_base is positive or a linear |
151 | * domain otherwise. | 151 | * domain otherwise. For the legacy domain, IRQ descriptors will also |
152 | * be allocated. | ||
152 | * | 153 | * |
153 | * This is intended to implement the expected behaviour for most | 154 | * This is intended to implement the expected behaviour for most |
154 | * interrupt controllers which is that a linear mapping should | 155 | * interrupt controllers which is that a linear mapping should |
@@ -162,11 +163,33 @@ struct irq_domain *irq_domain_add_simple(struct device_node *of_node, | |||
162 | const struct irq_domain_ops *ops, | 163 | const struct irq_domain_ops *ops, |
163 | void *host_data) | 164 | void *host_data) |
164 | { | 165 | { |
165 | if (first_irq > 0) | 166 | if (first_irq > 0) { |
166 | return irq_domain_add_legacy(of_node, size, first_irq, 0, | 167 | int irq_base; |
168 | |||
169 | if (IS_ENABLED(CONFIG_SPARSE_IRQ)) { | ||
170 | /* | ||
171 | * Set the descriptor allocator to search for a | ||
172 | * 1-to-1 mapping, such as irq_alloc_desc_at(). | ||
173 | * Use of_node_to_nid() which is defined to | ||
174 | * numa_node_id() on platforms that have no custom | ||
175 | * implementation. | ||
176 | */ | ||
177 | irq_base = irq_alloc_descs(first_irq, first_irq, size, | ||
178 | of_node_to_nid(of_node)); | ||
179 | if (irq_base < 0) { | ||
180 | WARN(1, "Cannot allocate irq_descs @ IRQ%d, assuming pre-allocated\n", | ||
181 | first_irq); | ||
182 | irq_base = first_irq; | ||
183 | } | ||
184 | } else | ||
185 | irq_base = first_irq; | ||
186 | |||
187 | return irq_domain_add_legacy(of_node, size, irq_base, 0, | ||
167 | ops, host_data); | 188 | ops, host_data); |
168 | else | 189 | } |
169 | return irq_domain_add_linear(of_node, size, ops, host_data); | 190 | |
191 | /* A linear domain is the default */ | ||
192 | return irq_domain_add_linear(of_node, size, ops, host_data); | ||
170 | } | 193 | } |
171 | 194 | ||
172 | /** | 195 | /** |
diff --git a/kernel/kmod.c b/kernel/kmod.c index 6f99aead66c6..1c317e386831 100644 --- a/kernel/kmod.c +++ b/kernel/kmod.c | |||
@@ -37,6 +37,7 @@ | |||
37 | #include <linux/notifier.h> | 37 | #include <linux/notifier.h> |
38 | #include <linux/suspend.h> | 38 | #include <linux/suspend.h> |
39 | #include <linux/rwsem.h> | 39 | #include <linux/rwsem.h> |
40 | #include <linux/ptrace.h> | ||
40 | #include <asm/uaccess.h> | 41 | #include <asm/uaccess.h> |
41 | 42 | ||
42 | #include <trace/events/module.h> | 43 | #include <trace/events/module.h> |
@@ -221,11 +222,13 @@ static int ____call_usermodehelper(void *data) | |||
221 | retval = kernel_execve(sub_info->path, | 222 | retval = kernel_execve(sub_info->path, |
222 | (const char *const *)sub_info->argv, | 223 | (const char *const *)sub_info->argv, |
223 | (const char *const *)sub_info->envp); | 224 | (const char *const *)sub_info->envp); |
225 | if (!retval) | ||
226 | return 0; | ||
224 | 227 | ||
225 | /* Exec failed? */ | 228 | /* Exec failed? */ |
226 | fail: | 229 | fail: |
227 | sub_info->retval = retval; | 230 | sub_info->retval = retval; |
228 | return 0; | 231 | do_exit(0); |
229 | } | 232 | } |
230 | 233 | ||
231 | static int call_helper(void *data) | 234 | static int call_helper(void *data) |
@@ -292,7 +295,7 @@ static int wait_for_helper(void *data) | |||
292 | } | 295 | } |
293 | 296 | ||
294 | umh_complete(sub_info); | 297 | umh_complete(sub_info); |
295 | return 0; | 298 | do_exit(0); |
296 | } | 299 | } |
297 | 300 | ||
298 | /* This is run by khelper thread */ | 301 | /* This is run by khelper thread */ |
diff --git a/kernel/kthread.c b/kernel/kthread.c index 146a6fa96825..29fb60caecb5 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c | |||
@@ -16,6 +16,7 @@ | |||
16 | #include <linux/mutex.h> | 16 | #include <linux/mutex.h> |
17 | #include <linux/slab.h> | 17 | #include <linux/slab.h> |
18 | #include <linux/freezer.h> | 18 | #include <linux/freezer.h> |
19 | #include <linux/ptrace.h> | ||
19 | #include <trace/events/sched.h> | 20 | #include <trace/events/sched.h> |
20 | 21 | ||
21 | static DEFINE_SPINLOCK(kthread_create_lock); | 22 | static DEFINE_SPINLOCK(kthread_create_lock); |
diff --git a/kernel/modsign_pubkey.c b/kernel/modsign_pubkey.c new file mode 100644 index 000000000000..4646eb2c3820 --- /dev/null +++ b/kernel/modsign_pubkey.c | |||
@@ -0,0 +1,113 @@ | |||
1 | /* Public keys for module signature verification | ||
2 | * | ||
3 | * Copyright (C) 2012 Red Hat, Inc. All Rights Reserved. | ||
4 | * Written by David Howells (dhowells@redhat.com) | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU General Public Licence | ||
8 | * as published by the Free Software Foundation; either version | ||
9 | * 2 of the Licence, or (at your option) any later version. | ||
10 | */ | ||
11 | |||
12 | #include <linux/kernel.h> | ||
13 | #include <linux/sched.h> | ||
14 | #include <linux/cred.h> | ||
15 | #include <linux/err.h> | ||
16 | #include <keys/asymmetric-type.h> | ||
17 | #include "module-internal.h" | ||
18 | |||
19 | struct key *modsign_keyring; | ||
20 | |||
21 | extern __initdata const u8 modsign_certificate_list[]; | ||
22 | extern __initdata const u8 modsign_certificate_list_end[]; | ||
23 | asm(".section .init.data,\"aw\"\n" | ||
24 | "modsign_certificate_list:\n" | ||
25 | ".incbin \"signing_key.x509\"\n" | ||
26 | ".incbin \"extra_certificates\"\n" | ||
27 | "modsign_certificate_list_end:" | ||
28 | ); | ||
29 | |||
30 | /* | ||
31 | * We need to make sure ccache doesn't cache the .o file as it doesn't notice | ||
32 | * if modsign.pub changes. | ||
33 | */ | ||
34 | static __initdata const char annoy_ccache[] = __TIME__ "foo"; | ||
35 | |||
36 | /* | ||
37 | * Load the compiled-in keys | ||
38 | */ | ||
39 | static __init int module_verify_init(void) | ||
40 | { | ||
41 | pr_notice("Initialise module verification\n"); | ||
42 | |||
43 | modsign_keyring = key_alloc(&key_type_keyring, ".module_sign", | ||
44 | KUIDT_INIT(0), KGIDT_INIT(0), | ||
45 | current_cred(), | ||
46 | (KEY_POS_ALL & ~KEY_POS_SETATTR) | | ||
47 | KEY_USR_VIEW | KEY_USR_READ, | ||
48 | KEY_ALLOC_NOT_IN_QUOTA); | ||
49 | if (IS_ERR(modsign_keyring)) | ||
50 | panic("Can't allocate module signing keyring\n"); | ||
51 | |||
52 | if (key_instantiate_and_link(modsign_keyring, NULL, 0, NULL, NULL) < 0) | ||
53 | panic("Can't instantiate module signing keyring\n"); | ||
54 | |||
55 | return 0; | ||
56 | } | ||
57 | |||
58 | /* | ||
59 | * Must be initialised before we try and load the keys into the keyring. | ||
60 | */ | ||
61 | device_initcall(module_verify_init); | ||
62 | |||
63 | /* | ||
64 | * Load the compiled-in keys | ||
65 | */ | ||
66 | static __init int load_module_signing_keys(void) | ||
67 | { | ||
68 | key_ref_t key; | ||
69 | const u8 *p, *end; | ||
70 | size_t plen; | ||
71 | |||
72 | pr_notice("Loading module verification certificates\n"); | ||
73 | |||
74 | end = modsign_certificate_list_end; | ||
75 | p = modsign_certificate_list; | ||
76 | while (p < end) { | ||
77 | /* Each cert begins with an ASN.1 SEQUENCE tag and must be more | ||
78 | * than 256 bytes in size. | ||
79 | */ | ||
80 | if (end - p < 4) | ||
81 | goto dodgy_cert; | ||
82 | if (p[0] != 0x30 && | ||
83 | p[1] != 0x82) | ||
84 | goto dodgy_cert; | ||
85 | plen = (p[2] << 8) | p[3]; | ||
86 | plen += 4; | ||
87 | if (plen > end - p) | ||
88 | goto dodgy_cert; | ||
89 | |||
90 | key = key_create_or_update(make_key_ref(modsign_keyring, 1), | ||
91 | "asymmetric", | ||
92 | NULL, | ||
93 | p, | ||
94 | plen, | ||
95 | (KEY_POS_ALL & ~KEY_POS_SETATTR) | | ||
96 | KEY_USR_VIEW, | ||
97 | KEY_ALLOC_NOT_IN_QUOTA); | ||
98 | if (IS_ERR(key)) | ||
99 | pr_err("MODSIGN: Problem loading in-kernel X.509 certificate (%ld)\n", | ||
100 | PTR_ERR(key)); | ||
101 | else | ||
102 | pr_notice("MODSIGN: Loaded cert '%s'\n", | ||
103 | key_ref_to_ptr(key)->description); | ||
104 | p += plen; | ||
105 | } | ||
106 | |||
107 | return 0; | ||
108 | |||
109 | dodgy_cert: | ||
110 | pr_err("MODSIGN: Problem parsing in-kernel X.509 certificate list\n"); | ||
111 | return 0; | ||
112 | } | ||
113 | late_initcall(load_module_signing_keys); | ||
diff --git a/kernel/module-internal.h b/kernel/module-internal.h new file mode 100644 index 000000000000..6114a13419bd --- /dev/null +++ b/kernel/module-internal.h | |||
@@ -0,0 +1,15 @@ | |||
1 | /* Module internals | ||
2 | * | ||
3 | * Copyright (C) 2012 Red Hat, Inc. All Rights Reserved. | ||
4 | * Written by David Howells (dhowells@redhat.com) | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU General Public Licence | ||
8 | * as published by the Free Software Foundation; either version | ||
9 | * 2 of the Licence, or (at your option) any later version. | ||
10 | */ | ||
11 | |||
12 | extern struct key *modsign_keyring; | ||
13 | |||
14 | extern int mod_verify_sig(const void *mod, unsigned long modlen, | ||
15 | const void *sig, unsigned long siglen); | ||
diff --git a/kernel/module.c b/kernel/module.c index 4edbd9c11aca..0e2da8695f8e 100644 --- a/kernel/module.c +++ b/kernel/module.c | |||
@@ -58,6 +58,8 @@ | |||
58 | #include <linux/jump_label.h> | 58 | #include <linux/jump_label.h> |
59 | #include <linux/pfn.h> | 59 | #include <linux/pfn.h> |
60 | #include <linux/bsearch.h> | 60 | #include <linux/bsearch.h> |
61 | #include <linux/fips.h> | ||
62 | #include "module-internal.h" | ||
61 | 63 | ||
62 | #define CREATE_TRACE_POINTS | 64 | #define CREATE_TRACE_POINTS |
63 | #include <trace/events/module.h> | 65 | #include <trace/events/module.h> |
@@ -102,6 +104,43 @@ static LIST_HEAD(modules); | |||
102 | struct list_head *kdb_modules = &modules; /* kdb needs the list of modules */ | 104 | struct list_head *kdb_modules = &modules; /* kdb needs the list of modules */ |
103 | #endif /* CONFIG_KGDB_KDB */ | 105 | #endif /* CONFIG_KGDB_KDB */ |
104 | 106 | ||
107 | #ifdef CONFIG_MODULE_SIG | ||
108 | #ifdef CONFIG_MODULE_SIG_FORCE | ||
109 | static bool sig_enforce = true; | ||
110 | #else | ||
111 | static bool sig_enforce = false; | ||
112 | |||
113 | static int param_set_bool_enable_only(const char *val, | ||
114 | const struct kernel_param *kp) | ||
115 | { | ||
116 | int err; | ||
117 | bool test; | ||
118 | struct kernel_param dummy_kp = *kp; | ||
119 | |||
120 | dummy_kp.arg = &test; | ||
121 | |||
122 | err = param_set_bool(val, &dummy_kp); | ||
123 | if (err) | ||
124 | return err; | ||
125 | |||
126 | /* Don't let them unset it once it's set! */ | ||
127 | if (!test && sig_enforce) | ||
128 | return -EROFS; | ||
129 | |||
130 | if (test) | ||
131 | sig_enforce = true; | ||
132 | return 0; | ||
133 | } | ||
134 | |||
135 | static const struct kernel_param_ops param_ops_bool_enable_only = { | ||
136 | .set = param_set_bool_enable_only, | ||
137 | .get = param_get_bool, | ||
138 | }; | ||
139 | #define param_check_bool_enable_only param_check_bool | ||
140 | |||
141 | module_param(sig_enforce, bool_enable_only, 0644); | ||
142 | #endif /* !CONFIG_MODULE_SIG_FORCE */ | ||
143 | #endif /* CONFIG_MODULE_SIG */ | ||
105 | 144 | ||
106 | /* Block module loading/unloading? */ | 145 | /* Block module loading/unloading? */ |
107 | int modules_disabled = 0; | 146 | int modules_disabled = 0; |
@@ -136,6 +175,7 @@ struct load_info { | |||
136 | unsigned long symoffs, stroffs; | 175 | unsigned long symoffs, stroffs; |
137 | struct _ddebug *debug; | 176 | struct _ddebug *debug; |
138 | unsigned int num_debug; | 177 | unsigned int num_debug; |
178 | bool sig_ok; | ||
139 | struct { | 179 | struct { |
140 | unsigned int sym, str, mod, vers, info, pcpu; | 180 | unsigned int sym, str, mod, vers, info, pcpu; |
141 | } index; | 181 | } index; |
@@ -1949,26 +1989,6 @@ static int simplify_symbols(struct module *mod, const struct load_info *info) | |||
1949 | return ret; | 1989 | return ret; |
1950 | } | 1990 | } |
1951 | 1991 | ||
1952 | int __weak apply_relocate(Elf_Shdr *sechdrs, | ||
1953 | const char *strtab, | ||
1954 | unsigned int symindex, | ||
1955 | unsigned int relsec, | ||
1956 | struct module *me) | ||
1957 | { | ||
1958 | pr_err("module %s: REL relocation unsupported\n", me->name); | ||
1959 | return -ENOEXEC; | ||
1960 | } | ||
1961 | |||
1962 | int __weak apply_relocate_add(Elf_Shdr *sechdrs, | ||
1963 | const char *strtab, | ||
1964 | unsigned int symindex, | ||
1965 | unsigned int relsec, | ||
1966 | struct module *me) | ||
1967 | { | ||
1968 | pr_err("module %s: RELA relocation unsupported\n", me->name); | ||
1969 | return -ENOEXEC; | ||
1970 | } | ||
1971 | |||
1972 | static int apply_relocations(struct module *mod, const struct load_info *info) | 1992 | static int apply_relocations(struct module *mod, const struct load_info *info) |
1973 | { | 1993 | { |
1974 | unsigned int i; | 1994 | unsigned int i; |
@@ -2399,7 +2419,52 @@ static inline void kmemleak_load_module(const struct module *mod, | |||
2399 | } | 2419 | } |
2400 | #endif | 2420 | #endif |
2401 | 2421 | ||
2402 | /* Sets info->hdr and info->len. */ | 2422 | #ifdef CONFIG_MODULE_SIG |
2423 | static int module_sig_check(struct load_info *info, | ||
2424 | const void *mod, unsigned long *len) | ||
2425 | { | ||
2426 | int err = -ENOKEY; | ||
2427 | const unsigned long markerlen = sizeof(MODULE_SIG_STRING) - 1; | ||
2428 | const void *p = mod, *end = mod + *len; | ||
2429 | |||
2430 | /* Poor man's memmem. */ | ||
2431 | while ((p = memchr(p, MODULE_SIG_STRING[0], end - p))) { | ||
2432 | if (p + markerlen > end) | ||
2433 | break; | ||
2434 | |||
2435 | if (memcmp(p, MODULE_SIG_STRING, markerlen) == 0) { | ||
2436 | const void *sig = p + markerlen; | ||
2437 | /* Truncate module up to signature. */ | ||
2438 | *len = p - mod; | ||
2439 | err = mod_verify_sig(mod, *len, sig, end - sig); | ||
2440 | break; | ||
2441 | } | ||
2442 | p++; | ||
2443 | } | ||
2444 | |||
2445 | if (!err) { | ||
2446 | info->sig_ok = true; | ||
2447 | return 0; | ||
2448 | } | ||
2449 | |||
2450 | /* Not having a signature is only an error if we're strict. */ | ||
2451 | if (err < 0 && fips_enabled) | ||
2452 | panic("Module verification failed with error %d in FIPS mode\n", | ||
2453 | err); | ||
2454 | if (err == -ENOKEY && !sig_enforce) | ||
2455 | err = 0; | ||
2456 | |||
2457 | return err; | ||
2458 | } | ||
2459 | #else /* !CONFIG_MODULE_SIG */ | ||
2460 | static int module_sig_check(struct load_info *info, | ||
2461 | void *mod, unsigned long *len) | ||
2462 | { | ||
2463 | return 0; | ||
2464 | } | ||
2465 | #endif /* !CONFIG_MODULE_SIG */ | ||
2466 | |||
2467 | /* Sets info->hdr, info->len and info->sig_ok. */ | ||
2403 | static int copy_and_check(struct load_info *info, | 2468 | static int copy_and_check(struct load_info *info, |
2404 | const void __user *umod, unsigned long len, | 2469 | const void __user *umod, unsigned long len, |
2405 | const char __user *uargs) | 2470 | const char __user *uargs) |
@@ -2419,6 +2484,10 @@ static int copy_and_check(struct load_info *info, | |||
2419 | goto free_hdr; | 2484 | goto free_hdr; |
2420 | } | 2485 | } |
2421 | 2486 | ||
2487 | err = module_sig_check(info, hdr, &len); | ||
2488 | if (err) | ||
2489 | goto free_hdr; | ||
2490 | |||
2422 | /* Sanity checks against insmoding binaries or wrong arch, | 2491 | /* Sanity checks against insmoding binaries or wrong arch, |
2423 | weird elf version */ | 2492 | weird elf version */ |
2424 | if (memcmp(hdr->e_ident, ELFMAG, SELFMAG) != 0 | 2493 | if (memcmp(hdr->e_ident, ELFMAG, SELFMAG) != 0 |
@@ -2730,6 +2799,10 @@ static int check_module_license_and_versions(struct module *mod) | |||
2730 | if (strcmp(mod->name, "driverloader") == 0) | 2799 | if (strcmp(mod->name, "driverloader") == 0) |
2731 | add_taint_module(mod, TAINT_PROPRIETARY_MODULE); | 2800 | add_taint_module(mod, TAINT_PROPRIETARY_MODULE); |
2732 | 2801 | ||
2802 | /* lve claims to be GPL but upstream won't provide source */ | ||
2803 | if (strcmp(mod->name, "lve") == 0) | ||
2804 | add_taint_module(mod, TAINT_PROPRIETARY_MODULE); | ||
2805 | |||
2733 | #ifdef CONFIG_MODVERSIONS | 2806 | #ifdef CONFIG_MODVERSIONS |
2734 | if ((mod->num_syms && !mod->crcs) | 2807 | if ((mod->num_syms && !mod->crcs) |
2735 | || (mod->num_gpl_syms && !mod->gpl_crcs) | 2808 | || (mod->num_gpl_syms && !mod->gpl_crcs) |
@@ -2861,6 +2934,20 @@ static int post_relocation(struct module *mod, const struct load_info *info) | |||
2861 | return module_finalize(info->hdr, info->sechdrs, mod); | 2934 | return module_finalize(info->hdr, info->sechdrs, mod); |
2862 | } | 2935 | } |
2863 | 2936 | ||
2937 | /* Is this module of this name done loading? No locks held. */ | ||
2938 | static bool finished_loading(const char *name) | ||
2939 | { | ||
2940 | struct module *mod; | ||
2941 | bool ret; | ||
2942 | |||
2943 | mutex_lock(&module_mutex); | ||
2944 | mod = find_module(name); | ||
2945 | ret = !mod || mod->state != MODULE_STATE_COMING; | ||
2946 | mutex_unlock(&module_mutex); | ||
2947 | |||
2948 | return ret; | ||
2949 | } | ||
2950 | |||
2864 | /* Allocate and load the module: note that size of section 0 is always | 2951 | /* Allocate and load the module: note that size of section 0 is always |
2865 | zero, and we rely on this for optional sections. */ | 2952 | zero, and we rely on this for optional sections. */ |
2866 | static struct module *load_module(void __user *umod, | 2953 | static struct module *load_module(void __user *umod, |
@@ -2868,7 +2955,7 @@ static struct module *load_module(void __user *umod, | |||
2868 | const char __user *uargs) | 2955 | const char __user *uargs) |
2869 | { | 2956 | { |
2870 | struct load_info info = { NULL, }; | 2957 | struct load_info info = { NULL, }; |
2871 | struct module *mod; | 2958 | struct module *mod, *old; |
2872 | long err; | 2959 | long err; |
2873 | 2960 | ||
2874 | pr_debug("load_module: umod=%p, len=%lu, uargs=%p\n", | 2961 | pr_debug("load_module: umod=%p, len=%lu, uargs=%p\n", |
@@ -2886,6 +2973,12 @@ static struct module *load_module(void __user *umod, | |||
2886 | goto free_copy; | 2973 | goto free_copy; |
2887 | } | 2974 | } |
2888 | 2975 | ||
2976 | #ifdef CONFIG_MODULE_SIG | ||
2977 | mod->sig_ok = info.sig_ok; | ||
2978 | if (!mod->sig_ok) | ||
2979 | add_taint_module(mod, TAINT_FORCED_MODULE); | ||
2980 | #endif | ||
2981 | |||
2889 | /* Now module is in final location, initialize linked lists, etc. */ | 2982 | /* Now module is in final location, initialize linked lists, etc. */ |
2890 | err = module_unload_init(mod); | 2983 | err = module_unload_init(mod); |
2891 | if (err) | 2984 | if (err) |
@@ -2934,8 +3027,18 @@ static struct module *load_module(void __user *umod, | |||
2934 | * function to insert in a way safe to concurrent readers. | 3027 | * function to insert in a way safe to concurrent readers. |
2935 | * The mutex protects against concurrent writers. | 3028 | * The mutex protects against concurrent writers. |
2936 | */ | 3029 | */ |
3030 | again: | ||
2937 | mutex_lock(&module_mutex); | 3031 | mutex_lock(&module_mutex); |
2938 | if (find_module(mod->name)) { | 3032 | if ((old = find_module(mod->name)) != NULL) { |
3033 | if (old->state == MODULE_STATE_COMING) { | ||
3034 | /* Wait in case it fails to load. */ | ||
3035 | mutex_unlock(&module_mutex); | ||
3036 | err = wait_event_interruptible(module_wq, | ||
3037 | finished_loading(mod->name)); | ||
3038 | if (err) | ||
3039 | goto free_arch_cleanup; | ||
3040 | goto again; | ||
3041 | } | ||
2939 | err = -EEXIST; | 3042 | err = -EEXIST; |
2940 | goto unlock; | 3043 | goto unlock; |
2941 | } | 3044 | } |
@@ -2975,7 +3078,7 @@ static struct module *load_module(void __user *umod, | |||
2975 | /* Unlink carefully: kallsyms could be walking list. */ | 3078 | /* Unlink carefully: kallsyms could be walking list. */ |
2976 | list_del_rcu(&mod->list); | 3079 | list_del_rcu(&mod->list); |
2977 | module_bug_cleanup(mod); | 3080 | module_bug_cleanup(mod); |
2978 | 3081 | wake_up_all(&module_wq); | |
2979 | ddebug: | 3082 | ddebug: |
2980 | dynamic_debug_remove(info.debug); | 3083 | dynamic_debug_remove(info.debug); |
2981 | unlock: | 3084 | unlock: |
@@ -3050,7 +3153,7 @@ SYSCALL_DEFINE3(init_module, void __user *, umod, | |||
3050 | blocking_notifier_call_chain(&module_notify_list, | 3153 | blocking_notifier_call_chain(&module_notify_list, |
3051 | MODULE_STATE_GOING, mod); | 3154 | MODULE_STATE_GOING, mod); |
3052 | free_module(mod); | 3155 | free_module(mod); |
3053 | wake_up(&module_wq); | 3156 | wake_up_all(&module_wq); |
3054 | return ret; | 3157 | return ret; |
3055 | } | 3158 | } |
3056 | if (ret > 0) { | 3159 | if (ret > 0) { |
@@ -3062,9 +3165,8 @@ SYSCALL_DEFINE3(init_module, void __user *, umod, | |||
3062 | dump_stack(); | 3165 | dump_stack(); |
3063 | } | 3166 | } |
3064 | 3167 | ||
3065 | /* Now it's a first class citizen! Wake up anyone waiting for it. */ | 3168 | /* Now it's a first class citizen! */ |
3066 | mod->state = MODULE_STATE_LIVE; | 3169 | mod->state = MODULE_STATE_LIVE; |
3067 | wake_up(&module_wq); | ||
3068 | blocking_notifier_call_chain(&module_notify_list, | 3170 | blocking_notifier_call_chain(&module_notify_list, |
3069 | MODULE_STATE_LIVE, mod); | 3171 | MODULE_STATE_LIVE, mod); |
3070 | 3172 | ||
@@ -3087,6 +3189,7 @@ SYSCALL_DEFINE3(init_module, void __user *, umod, | |||
3087 | mod->init_ro_size = 0; | 3189 | mod->init_ro_size = 0; |
3088 | mod->init_text_size = 0; | 3190 | mod->init_text_size = 0; |
3089 | mutex_unlock(&module_mutex); | 3191 | mutex_unlock(&module_mutex); |
3192 | wake_up_all(&module_wq); | ||
3090 | 3193 | ||
3091 | return 0; | 3194 | return 0; |
3092 | } | 3195 | } |
diff --git a/kernel/module_signing.c b/kernel/module_signing.c new file mode 100644 index 000000000000..6b09f6983ac0 --- /dev/null +++ b/kernel/module_signing.c | |||
@@ -0,0 +1,243 @@ | |||
1 | /* Module signature checker | ||
2 | * | ||
3 | * Copyright (C) 2012 Red Hat, Inc. All Rights Reserved. | ||
4 | * Written by David Howells (dhowells@redhat.com) | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU General Public Licence | ||
8 | * as published by the Free Software Foundation; either version | ||
9 | * 2 of the Licence, or (at your option) any later version. | ||
10 | */ | ||
11 | |||
12 | #include <linux/kernel.h> | ||
13 | #include <linux/err.h> | ||
14 | #include <crypto/public_key.h> | ||
15 | #include <crypto/hash.h> | ||
16 | #include <keys/asymmetric-type.h> | ||
17 | #include "module-internal.h" | ||
18 | |||
19 | /* | ||
20 | * Module signature information block. | ||
21 | * | ||
22 | * The constituents of the signature section are, in order: | ||
23 | * | ||
24 | * - Signer's name | ||
25 | * - Key identifier | ||
26 | * - Signature data | ||
27 | * - Information block | ||
28 | */ | ||
29 | struct module_signature { | ||
30 | enum pkey_algo algo : 8; /* Public-key crypto algorithm */ | ||
31 | enum pkey_hash_algo hash : 8; /* Digest algorithm */ | ||
32 | enum pkey_id_type id_type : 8; /* Key identifier type */ | ||
33 | u8 signer_len; /* Length of signer's name */ | ||
34 | u8 key_id_len; /* Length of key identifier */ | ||
35 | u8 __pad[3]; | ||
36 | __be32 sig_len; /* Length of signature data */ | ||
37 | }; | ||
38 | |||
39 | /* | ||
40 | * Digest the module contents. | ||
41 | */ | ||
42 | static struct public_key_signature *mod_make_digest(enum pkey_hash_algo hash, | ||
43 | const void *mod, | ||
44 | unsigned long modlen) | ||
45 | { | ||
46 | struct public_key_signature *pks; | ||
47 | struct crypto_shash *tfm; | ||
48 | struct shash_desc *desc; | ||
49 | size_t digest_size, desc_size; | ||
50 | int ret; | ||
51 | |||
52 | pr_devel("==>%s()\n", __func__); | ||
53 | |||
54 | /* Allocate the hashing algorithm we're going to need and find out how | ||
55 | * big the hash operational data will be. | ||
56 | */ | ||
57 | tfm = crypto_alloc_shash(pkey_hash_algo[hash], 0, 0); | ||
58 | if (IS_ERR(tfm)) | ||
59 | return (PTR_ERR(tfm) == -ENOENT) ? ERR_PTR(-ENOPKG) : ERR_CAST(tfm); | ||
60 | |||
61 | desc_size = crypto_shash_descsize(tfm) + sizeof(*desc); | ||
62 | digest_size = crypto_shash_digestsize(tfm); | ||
63 | |||
64 | /* We allocate the hash operational data storage on the end of our | ||
65 | * context data and the digest output buffer on the end of that. | ||
66 | */ | ||
67 | ret = -ENOMEM; | ||
68 | pks = kzalloc(digest_size + sizeof(*pks) + desc_size, GFP_KERNEL); | ||
69 | if (!pks) | ||
70 | goto error_no_pks; | ||
71 | |||
72 | pks->pkey_hash_algo = hash; | ||
73 | pks->digest = (u8 *)pks + sizeof(*pks) + desc_size; | ||
74 | pks->digest_size = digest_size; | ||
75 | |||
76 | desc = (void *)pks + sizeof(*pks); | ||
77 | desc->tfm = tfm; | ||
78 | desc->flags = CRYPTO_TFM_REQ_MAY_SLEEP; | ||
79 | |||
80 | ret = crypto_shash_init(desc); | ||
81 | if (ret < 0) | ||
82 | goto error; | ||
83 | |||
84 | ret = crypto_shash_finup(desc, mod, modlen, pks->digest); | ||
85 | if (ret < 0) | ||
86 | goto error; | ||
87 | |||
88 | crypto_free_shash(tfm); | ||
89 | pr_devel("<==%s() = ok\n", __func__); | ||
90 | return pks; | ||
91 | |||
92 | error: | ||
93 | kfree(pks); | ||
94 | error_no_pks: | ||
95 | crypto_free_shash(tfm); | ||
96 | pr_devel("<==%s() = %d\n", __func__, ret); | ||
97 | return ERR_PTR(ret); | ||
98 | } | ||
99 | |||
100 | /* | ||
101 | * Extract an MPI array from the signature data. This represents the actual | ||
102 | * signature. Each raw MPI is prefaced by a BE 2-byte value indicating the | ||
103 | * size of the MPI in bytes. | ||
104 | * | ||
105 | * RSA signatures only have one MPI, so currently we only read one. | ||
106 | */ | ||
107 | static int mod_extract_mpi_array(struct public_key_signature *pks, | ||
108 | const void *data, size_t len) | ||
109 | { | ||
110 | size_t nbytes; | ||
111 | MPI mpi; | ||
112 | |||
113 | if (len < 3) | ||
114 | return -EBADMSG; | ||
115 | nbytes = ((const u8 *)data)[0] << 8 | ((const u8 *)data)[1]; | ||
116 | data += 2; | ||
117 | len -= 2; | ||
118 | if (len != nbytes) | ||
119 | return -EBADMSG; | ||
120 | |||
121 | mpi = mpi_read_raw_data(data, nbytes); | ||
122 | if (!mpi) | ||
123 | return -ENOMEM; | ||
124 | pks->mpi[0] = mpi; | ||
125 | pks->nr_mpi = 1; | ||
126 | return 0; | ||
127 | } | ||
128 | |||
129 | /* | ||
130 | * Request an asymmetric key. | ||
131 | */ | ||
132 | static struct key *request_asymmetric_key(const char *signer, size_t signer_len, | ||
133 | const u8 *key_id, size_t key_id_len) | ||
134 | { | ||
135 | key_ref_t key; | ||
136 | size_t i; | ||
137 | char *id, *q; | ||
138 | |||
139 | pr_devel("==>%s(,%zu,,%zu)\n", __func__, signer_len, key_id_len); | ||
140 | |||
141 | /* Construct an identifier. */ | ||
142 | id = kmalloc(signer_len + 2 + key_id_len * 2 + 1, GFP_KERNEL); | ||
143 | if (!id) | ||
144 | return ERR_PTR(-ENOKEY); | ||
145 | |||
146 | memcpy(id, signer, signer_len); | ||
147 | |||
148 | q = id + signer_len; | ||
149 | *q++ = ':'; | ||
150 | *q++ = ' '; | ||
151 | for (i = 0; i < key_id_len; i++) { | ||
152 | *q++ = hex_asc[*key_id >> 4]; | ||
153 | *q++ = hex_asc[*key_id++ & 0x0f]; | ||
154 | } | ||
155 | |||
156 | *q = 0; | ||
157 | |||
158 | pr_debug("Look up: \"%s\"\n", id); | ||
159 | |||
160 | key = keyring_search(make_key_ref(modsign_keyring, 1), | ||
161 | &key_type_asymmetric, id); | ||
162 | if (IS_ERR(key)) | ||
163 | pr_warn("Request for unknown module key '%s' err %ld\n", | ||
164 | id, PTR_ERR(key)); | ||
165 | kfree(id); | ||
166 | |||
167 | if (IS_ERR(key)) { | ||
168 | switch (PTR_ERR(key)) { | ||
169 | /* Hide some search errors */ | ||
170 | case -EACCES: | ||
171 | case -ENOTDIR: | ||
172 | case -EAGAIN: | ||
173 | return ERR_PTR(-ENOKEY); | ||
174 | default: | ||
175 | return ERR_CAST(key); | ||
176 | } | ||
177 | } | ||
178 | |||
179 | pr_devel("<==%s() = 0 [%x]\n", __func__, key_serial(key_ref_to_ptr(key))); | ||
180 | return key_ref_to_ptr(key); | ||
181 | } | ||
182 | |||
183 | /* | ||
184 | * Verify the signature on a module. | ||
185 | */ | ||
186 | int mod_verify_sig(const void *mod, unsigned long modlen, | ||
187 | const void *sig, unsigned long siglen) | ||
188 | { | ||
189 | struct public_key_signature *pks; | ||
190 | struct module_signature ms; | ||
191 | struct key *key; | ||
192 | size_t sig_len; | ||
193 | int ret; | ||
194 | |||
195 | pr_devel("==>%s(,%lu,,%lu,)\n", __func__, modlen, siglen); | ||
196 | |||
197 | if (siglen <= sizeof(ms)) | ||
198 | return -EBADMSG; | ||
199 | |||
200 | memcpy(&ms, sig + (siglen - sizeof(ms)), sizeof(ms)); | ||
201 | siglen -= sizeof(ms); | ||
202 | |||
203 | sig_len = be32_to_cpu(ms.sig_len); | ||
204 | if (sig_len >= siglen || | ||
205 | siglen - sig_len != (size_t)ms.signer_len + ms.key_id_len) | ||
206 | return -EBADMSG; | ||
207 | |||
208 | /* For the moment, only support RSA and X.509 identifiers */ | ||
209 | if (ms.algo != PKEY_ALGO_RSA || | ||
210 | ms.id_type != PKEY_ID_X509) | ||
211 | return -ENOPKG; | ||
212 | |||
213 | if (ms.hash >= PKEY_HASH__LAST || | ||
214 | !pkey_hash_algo[ms.hash]) | ||
215 | return -ENOPKG; | ||
216 | |||
217 | key = request_asymmetric_key(sig, ms.signer_len, | ||
218 | sig + ms.signer_len, ms.key_id_len); | ||
219 | if (IS_ERR(key)) | ||
220 | return PTR_ERR(key); | ||
221 | |||
222 | pks = mod_make_digest(ms.hash, mod, modlen); | ||
223 | if (IS_ERR(pks)) { | ||
224 | ret = PTR_ERR(pks); | ||
225 | goto error_put_key; | ||
226 | } | ||
227 | |||
228 | ret = mod_extract_mpi_array(pks, sig + ms.signer_len + ms.key_id_len, | ||
229 | sig_len); | ||
230 | if (ret < 0) | ||
231 | goto error_free_pks; | ||
232 | |||
233 | ret = verify_signature(key, pks); | ||
234 | pr_devel("verify_signature() = %d\n", ret); | ||
235 | |||
236 | error_free_pks: | ||
237 | mpi_free(pks->rsa.s); | ||
238 | kfree(pks); | ||
239 | error_put_key: | ||
240 | key_put(key); | ||
241 | pr_devel("<==%s() = %d\n", __func__, ret); | ||
242 | return ret; | ||
243 | } | ||
diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 4fb2376ddf06..74df86bd9204 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c | |||
@@ -74,6 +74,7 @@ static struct lock_class_key rcu_fqs_class[RCU_NUM_LVLS]; | |||
74 | .orphan_nxttail = &sname##_state.orphan_nxtlist, \ | 74 | .orphan_nxttail = &sname##_state.orphan_nxtlist, \ |
75 | .orphan_donetail = &sname##_state.orphan_donelist, \ | 75 | .orphan_donetail = &sname##_state.orphan_donelist, \ |
76 | .barrier_mutex = __MUTEX_INITIALIZER(sname##_state.barrier_mutex), \ | 76 | .barrier_mutex = __MUTEX_INITIALIZER(sname##_state.barrier_mutex), \ |
77 | .onoff_mutex = __MUTEX_INITIALIZER(sname##_state.onoff_mutex), \ | ||
77 | .name = #sname, \ | 78 | .name = #sname, \ |
78 | } | 79 | } |
79 | 80 | ||
@@ -1197,7 +1198,7 @@ static int rcu_gp_init(struct rcu_state *rsp) | |||
1197 | raw_spin_unlock_irq(&rnp->lock); | 1198 | raw_spin_unlock_irq(&rnp->lock); |
1198 | 1199 | ||
1199 | /* Exclude any concurrent CPU-hotplug operations. */ | 1200 | /* Exclude any concurrent CPU-hotplug operations. */ |
1200 | get_online_cpus(); | 1201 | mutex_lock(&rsp->onoff_mutex); |
1201 | 1202 | ||
1202 | /* | 1203 | /* |
1203 | * Set the quiescent-state-needed bits in all the rcu_node | 1204 | * Set the quiescent-state-needed bits in all the rcu_node |
@@ -1234,7 +1235,7 @@ static int rcu_gp_init(struct rcu_state *rsp) | |||
1234 | cond_resched(); | 1235 | cond_resched(); |
1235 | } | 1236 | } |
1236 | 1237 | ||
1237 | put_online_cpus(); | 1238 | mutex_unlock(&rsp->onoff_mutex); |
1238 | return 1; | 1239 | return 1; |
1239 | } | 1240 | } |
1240 | 1241 | ||
@@ -1700,6 +1701,7 @@ static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp) | |||
1700 | /* Remove the dead CPU from the bitmasks in the rcu_node hierarchy. */ | 1701 | /* Remove the dead CPU from the bitmasks in the rcu_node hierarchy. */ |
1701 | 1702 | ||
1702 | /* Exclude any attempts to start a new grace period. */ | 1703 | /* Exclude any attempts to start a new grace period. */ |
1704 | mutex_lock(&rsp->onoff_mutex); | ||
1703 | raw_spin_lock_irqsave(&rsp->onofflock, flags); | 1705 | raw_spin_lock_irqsave(&rsp->onofflock, flags); |
1704 | 1706 | ||
1705 | /* Orphan the dead CPU's callbacks, and adopt them if appropriate. */ | 1707 | /* Orphan the dead CPU's callbacks, and adopt them if appropriate. */ |
@@ -1744,6 +1746,7 @@ static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp) | |||
1744 | init_callback_list(rdp); | 1746 | init_callback_list(rdp); |
1745 | /* Disallow further callbacks on this CPU. */ | 1747 | /* Disallow further callbacks on this CPU. */ |
1746 | rdp->nxttail[RCU_NEXT_TAIL] = NULL; | 1748 | rdp->nxttail[RCU_NEXT_TAIL] = NULL; |
1749 | mutex_unlock(&rsp->onoff_mutex); | ||
1747 | } | 1750 | } |
1748 | 1751 | ||
1749 | #else /* #ifdef CONFIG_HOTPLUG_CPU */ | 1752 | #else /* #ifdef CONFIG_HOTPLUG_CPU */ |
@@ -2648,6 +2651,9 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptible) | |||
2648 | struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu); | 2651 | struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu); |
2649 | struct rcu_node *rnp = rcu_get_root(rsp); | 2652 | struct rcu_node *rnp = rcu_get_root(rsp); |
2650 | 2653 | ||
2654 | /* Exclude new grace periods. */ | ||
2655 | mutex_lock(&rsp->onoff_mutex); | ||
2656 | |||
2651 | /* Set up local state, ensuring consistent view of global state. */ | 2657 | /* Set up local state, ensuring consistent view of global state. */ |
2652 | raw_spin_lock_irqsave(&rnp->lock, flags); | 2658 | raw_spin_lock_irqsave(&rnp->lock, flags); |
2653 | rdp->beenonline = 1; /* We have now been online. */ | 2659 | rdp->beenonline = 1; /* We have now been online. */ |
@@ -2662,14 +2668,6 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptible) | |||
2662 | rcu_prepare_for_idle_init(cpu); | 2668 | rcu_prepare_for_idle_init(cpu); |
2663 | raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ | 2669 | raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ |
2664 | 2670 | ||
2665 | /* | ||
2666 | * A new grace period might start here. If so, we won't be part | ||
2667 | * of it, but that is OK, as we are currently in a quiescent state. | ||
2668 | */ | ||
2669 | |||
2670 | /* Exclude any attempts to start a new GP on large systems. */ | ||
2671 | raw_spin_lock(&rsp->onofflock); /* irqs already disabled. */ | ||
2672 | |||
2673 | /* Add CPU to rcu_node bitmasks. */ | 2671 | /* Add CPU to rcu_node bitmasks. */ |
2674 | rnp = rdp->mynode; | 2672 | rnp = rdp->mynode; |
2675 | mask = rdp->grpmask; | 2673 | mask = rdp->grpmask; |
@@ -2693,8 +2691,9 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptible) | |||
2693 | raw_spin_unlock(&rnp->lock); /* irqs already disabled. */ | 2691 | raw_spin_unlock(&rnp->lock); /* irqs already disabled. */ |
2694 | rnp = rnp->parent; | 2692 | rnp = rnp->parent; |
2695 | } while (rnp != NULL && !(rnp->qsmaskinit & mask)); | 2693 | } while (rnp != NULL && !(rnp->qsmaskinit & mask)); |
2694 | local_irq_restore(flags); | ||
2696 | 2695 | ||
2697 | raw_spin_unlock_irqrestore(&rsp->onofflock, flags); | 2696 | mutex_unlock(&rsp->onoff_mutex); |
2698 | } | 2697 | } |
2699 | 2698 | ||
2700 | static void __cpuinit rcu_prepare_cpu(int cpu) | 2699 | static void __cpuinit rcu_prepare_cpu(int cpu) |
diff --git a/kernel/rcutree.h b/kernel/rcutree.h index 5faf05d68326..a240f032848e 100644 --- a/kernel/rcutree.h +++ b/kernel/rcutree.h | |||
@@ -394,11 +394,17 @@ struct rcu_state { | |||
394 | struct rcu_head **orphan_donetail; /* Tail of above. */ | 394 | struct rcu_head **orphan_donetail; /* Tail of above. */ |
395 | long qlen_lazy; /* Number of lazy callbacks. */ | 395 | long qlen_lazy; /* Number of lazy callbacks. */ |
396 | long qlen; /* Total number of callbacks. */ | 396 | long qlen; /* Total number of callbacks. */ |
397 | /* End of fields guarded by onofflock. */ | ||
398 | |||
399 | struct mutex onoff_mutex; /* Coordinate hotplug & GPs. */ | ||
400 | |||
397 | struct mutex barrier_mutex; /* Guards barrier fields. */ | 401 | struct mutex barrier_mutex; /* Guards barrier fields. */ |
398 | atomic_t barrier_cpu_count; /* # CPUs waiting on. */ | 402 | atomic_t barrier_cpu_count; /* # CPUs waiting on. */ |
399 | struct completion barrier_completion; /* Wake at barrier end. */ | 403 | struct completion barrier_completion; /* Wake at barrier end. */ |
400 | unsigned long n_barrier_done; /* ++ at start and end of */ | 404 | unsigned long n_barrier_done; /* ++ at start and end of */ |
401 | /* _rcu_barrier(). */ | 405 | /* _rcu_barrier(). */ |
406 | /* End of fields guarded by barrier_mutex. */ | ||
407 | |||
402 | unsigned long jiffies_force_qs; /* Time at which to invoke */ | 408 | unsigned long jiffies_force_qs; /* Time at which to invoke */ |
403 | /* force_quiescent_state(). */ | 409 | /* force_quiescent_state(). */ |
404 | unsigned long n_force_qs; /* Number of calls to */ | 410 | unsigned long n_force_qs; /* Number of calls to */ |
diff --git a/kernel/sched/core.c b/kernel/sched/core.c index c17747236438..2d8927fda712 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c | |||
@@ -505,7 +505,7 @@ static inline void init_hrtick(void) | |||
505 | #ifdef CONFIG_SMP | 505 | #ifdef CONFIG_SMP |
506 | 506 | ||
507 | #ifndef tsk_is_polling | 507 | #ifndef tsk_is_polling |
508 | #define tsk_is_polling(t) test_tsk_thread_flag(t, TIF_POLLING_NRFLAG) | 508 | #define tsk_is_polling(t) 0 |
509 | #endif | 509 | #endif |
510 | 510 | ||
511 | void resched_task(struct task_struct *p) | 511 | void resched_task(struct task_struct *p) |
@@ -6122,6 +6122,17 @@ static void sched_init_numa(void) | |||
6122 | * numbers. | 6122 | * numbers. |
6123 | */ | 6123 | */ |
6124 | 6124 | ||
6125 | /* | ||
6126 | * Here, we should temporarily reset sched_domains_numa_levels to 0. | ||
6127 | * If it fails to allocate memory for array sched_domains_numa_masks[][], | ||
6128 | * the array will contain less then 'level' members. This could be | ||
6129 | * dangerous when we use it to iterate array sched_domains_numa_masks[][] | ||
6130 | * in other functions. | ||
6131 | * | ||
6132 | * We reset it to 'level' at the end of this function. | ||
6133 | */ | ||
6134 | sched_domains_numa_levels = 0; | ||
6135 | |||
6125 | sched_domains_numa_masks = kzalloc(sizeof(void *) * level, GFP_KERNEL); | 6136 | sched_domains_numa_masks = kzalloc(sizeof(void *) * level, GFP_KERNEL); |
6126 | if (!sched_domains_numa_masks) | 6137 | if (!sched_domains_numa_masks) |
6127 | return; | 6138 | return; |
@@ -6176,11 +6187,68 @@ static void sched_init_numa(void) | |||
6176 | } | 6187 | } |
6177 | 6188 | ||
6178 | sched_domain_topology = tl; | 6189 | sched_domain_topology = tl; |
6190 | |||
6191 | sched_domains_numa_levels = level; | ||
6192 | } | ||
6193 | |||
6194 | static void sched_domains_numa_masks_set(int cpu) | ||
6195 | { | ||
6196 | int i, j; | ||
6197 | int node = cpu_to_node(cpu); | ||
6198 | |||
6199 | for (i = 0; i < sched_domains_numa_levels; i++) { | ||
6200 | for (j = 0; j < nr_node_ids; j++) { | ||
6201 | if (node_distance(j, node) <= sched_domains_numa_distance[i]) | ||
6202 | cpumask_set_cpu(cpu, sched_domains_numa_masks[i][j]); | ||
6203 | } | ||
6204 | } | ||
6205 | } | ||
6206 | |||
6207 | static void sched_domains_numa_masks_clear(int cpu) | ||
6208 | { | ||
6209 | int i, j; | ||
6210 | for (i = 0; i < sched_domains_numa_levels; i++) { | ||
6211 | for (j = 0; j < nr_node_ids; j++) | ||
6212 | cpumask_clear_cpu(cpu, sched_domains_numa_masks[i][j]); | ||
6213 | } | ||
6214 | } | ||
6215 | |||
6216 | /* | ||
6217 | * Update sched_domains_numa_masks[level][node] array when new cpus | ||
6218 | * are onlined. | ||
6219 | */ | ||
6220 | static int sched_domains_numa_masks_update(struct notifier_block *nfb, | ||
6221 | unsigned long action, | ||
6222 | void *hcpu) | ||
6223 | { | ||
6224 | int cpu = (long)hcpu; | ||
6225 | |||
6226 | switch (action & ~CPU_TASKS_FROZEN) { | ||
6227 | case CPU_ONLINE: | ||
6228 | sched_domains_numa_masks_set(cpu); | ||
6229 | break; | ||
6230 | |||
6231 | case CPU_DEAD: | ||
6232 | sched_domains_numa_masks_clear(cpu); | ||
6233 | break; | ||
6234 | |||
6235 | default: | ||
6236 | return NOTIFY_DONE; | ||
6237 | } | ||
6238 | |||
6239 | return NOTIFY_OK; | ||
6179 | } | 6240 | } |
6180 | #else | 6241 | #else |
6181 | static inline void sched_init_numa(void) | 6242 | static inline void sched_init_numa(void) |
6182 | { | 6243 | { |
6183 | } | 6244 | } |
6245 | |||
6246 | static int sched_domains_numa_masks_update(struct notifier_block *nfb, | ||
6247 | unsigned long action, | ||
6248 | void *hcpu) | ||
6249 | { | ||
6250 | return 0; | ||
6251 | } | ||
6184 | #endif /* CONFIG_NUMA */ | 6252 | #endif /* CONFIG_NUMA */ |
6185 | 6253 | ||
6186 | static int __sdt_alloc(const struct cpumask *cpu_map) | 6254 | static int __sdt_alloc(const struct cpumask *cpu_map) |
@@ -6629,6 +6697,7 @@ void __init sched_init_smp(void) | |||
6629 | mutex_unlock(&sched_domains_mutex); | 6697 | mutex_unlock(&sched_domains_mutex); |
6630 | put_online_cpus(); | 6698 | put_online_cpus(); |
6631 | 6699 | ||
6700 | hotcpu_notifier(sched_domains_numa_masks_update, CPU_PRI_SCHED_ACTIVE); | ||
6632 | hotcpu_notifier(cpuset_cpu_active, CPU_PRI_CPUSET_ACTIVE); | 6701 | hotcpu_notifier(cpuset_cpu_active, CPU_PRI_CPUSET_ACTIVE); |
6633 | hotcpu_notifier(cpuset_cpu_inactive, CPU_PRI_CPUSET_INACTIVE); | 6702 | hotcpu_notifier(cpuset_cpu_inactive, CPU_PRI_CPUSET_INACTIVE); |
6634 | 6703 | ||
diff --git a/kernel/time.c b/kernel/time.c index ba744cf80696..d226c6a3fd28 100644 --- a/kernel/time.c +++ b/kernel/time.c | |||
@@ -30,7 +30,7 @@ | |||
30 | #include <linux/export.h> | 30 | #include <linux/export.h> |
31 | #include <linux/timex.h> | 31 | #include <linux/timex.h> |
32 | #include <linux/capability.h> | 32 | #include <linux/capability.h> |
33 | #include <linux/clocksource.h> | 33 | #include <linux/timekeeper_internal.h> |
34 | #include <linux/errno.h> | 34 | #include <linux/errno.h> |
35 | #include <linux/syscalls.h> | 35 | #include <linux/syscalls.h> |
36 | #include <linux/security.h> | 36 | #include <linux/security.h> |
diff --git a/kernel/time/Kconfig b/kernel/time/Kconfig index fd42bd452b75..8601f0db1261 100644 --- a/kernel/time/Kconfig +++ b/kernel/time/Kconfig | |||
@@ -16,6 +16,10 @@ config ARCH_CLOCKSOURCE_DATA | |||
16 | config GENERIC_TIME_VSYSCALL | 16 | config GENERIC_TIME_VSYSCALL |
17 | bool | 17 | bool |
18 | 18 | ||
19 | # Timekeeping vsyscall support | ||
20 | config GENERIC_TIME_VSYSCALL_OLD | ||
21 | bool | ||
22 | |||
19 | # ktime_t scalar 64bit nsec representation | 23 | # ktime_t scalar 64bit nsec representation |
20 | config KTIME_SCALAR | 24 | config KTIME_SCALAR |
21 | bool | 25 | bool |
diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c index aa27d391bfc8..f11d83b12949 100644 --- a/kernel/time/alarmtimer.c +++ b/kernel/time/alarmtimer.c | |||
@@ -37,7 +37,6 @@ | |||
37 | static struct alarm_base { | 37 | static struct alarm_base { |
38 | spinlock_t lock; | 38 | spinlock_t lock; |
39 | struct timerqueue_head timerqueue; | 39 | struct timerqueue_head timerqueue; |
40 | struct hrtimer timer; | ||
41 | ktime_t (*gettime)(void); | 40 | ktime_t (*gettime)(void); |
42 | clockid_t base_clockid; | 41 | clockid_t base_clockid; |
43 | } alarm_bases[ALARM_NUMTYPE]; | 42 | } alarm_bases[ALARM_NUMTYPE]; |
@@ -46,6 +45,8 @@ static struct alarm_base { | |||
46 | static ktime_t freezer_delta; | 45 | static ktime_t freezer_delta; |
47 | static DEFINE_SPINLOCK(freezer_delta_lock); | 46 | static DEFINE_SPINLOCK(freezer_delta_lock); |
48 | 47 | ||
48 | static struct wakeup_source *ws; | ||
49 | |||
49 | #ifdef CONFIG_RTC_CLASS | 50 | #ifdef CONFIG_RTC_CLASS |
50 | /* rtc timer and device for setting alarm wakeups at suspend */ | 51 | /* rtc timer and device for setting alarm wakeups at suspend */ |
51 | static struct rtc_timer rtctimer; | 52 | static struct rtc_timer rtctimer; |
@@ -130,50 +131,35 @@ static inline void alarmtimer_rtc_timer_init(void) { } | |||
130 | * @base: pointer to the base where the timer is being run | 131 | * @base: pointer to the base where the timer is being run |
131 | * @alarm: pointer to alarm being enqueued. | 132 | * @alarm: pointer to alarm being enqueued. |
132 | * | 133 | * |
133 | * Adds alarm to a alarm_base timerqueue and if necessary sets | 134 | * Adds alarm to a alarm_base timerqueue |
134 | * an hrtimer to run. | ||
135 | * | 135 | * |
136 | * Must hold base->lock when calling. | 136 | * Must hold base->lock when calling. |
137 | */ | 137 | */ |
138 | static void alarmtimer_enqueue(struct alarm_base *base, struct alarm *alarm) | 138 | static void alarmtimer_enqueue(struct alarm_base *base, struct alarm *alarm) |
139 | { | 139 | { |
140 | if (alarm->state & ALARMTIMER_STATE_ENQUEUED) | ||
141 | timerqueue_del(&base->timerqueue, &alarm->node); | ||
142 | |||
140 | timerqueue_add(&base->timerqueue, &alarm->node); | 143 | timerqueue_add(&base->timerqueue, &alarm->node); |
141 | alarm->state |= ALARMTIMER_STATE_ENQUEUED; | 144 | alarm->state |= ALARMTIMER_STATE_ENQUEUED; |
142 | |||
143 | if (&alarm->node == timerqueue_getnext(&base->timerqueue)) { | ||
144 | hrtimer_try_to_cancel(&base->timer); | ||
145 | hrtimer_start(&base->timer, alarm->node.expires, | ||
146 | HRTIMER_MODE_ABS); | ||
147 | } | ||
148 | } | 145 | } |
149 | 146 | ||
150 | /** | 147 | /** |
151 | * alarmtimer_remove - Removes an alarm timer from an alarm_base timerqueue | 148 | * alarmtimer_dequeue - Removes an alarm timer from an alarm_base timerqueue |
152 | * @base: pointer to the base where the timer is running | 149 | * @base: pointer to the base where the timer is running |
153 | * @alarm: pointer to alarm being removed | 150 | * @alarm: pointer to alarm being removed |
154 | * | 151 | * |
155 | * Removes alarm to a alarm_base timerqueue and if necessary sets | 152 | * Removes alarm to a alarm_base timerqueue |
156 | * a new timer to run. | ||
157 | * | 153 | * |
158 | * Must hold base->lock when calling. | 154 | * Must hold base->lock when calling. |
159 | */ | 155 | */ |
160 | static void alarmtimer_remove(struct alarm_base *base, struct alarm *alarm) | 156 | static void alarmtimer_dequeue(struct alarm_base *base, struct alarm *alarm) |
161 | { | 157 | { |
162 | struct timerqueue_node *next = timerqueue_getnext(&base->timerqueue); | ||
163 | |||
164 | if (!(alarm->state & ALARMTIMER_STATE_ENQUEUED)) | 158 | if (!(alarm->state & ALARMTIMER_STATE_ENQUEUED)) |
165 | return; | 159 | return; |
166 | 160 | ||
167 | timerqueue_del(&base->timerqueue, &alarm->node); | 161 | timerqueue_del(&base->timerqueue, &alarm->node); |
168 | alarm->state &= ~ALARMTIMER_STATE_ENQUEUED; | 162 | alarm->state &= ~ALARMTIMER_STATE_ENQUEUED; |
169 | |||
170 | if (next == &alarm->node) { | ||
171 | hrtimer_try_to_cancel(&base->timer); | ||
172 | next = timerqueue_getnext(&base->timerqueue); | ||
173 | if (!next) | ||
174 | return; | ||
175 | hrtimer_start(&base->timer, next->expires, HRTIMER_MODE_ABS); | ||
176 | } | ||
177 | } | 163 | } |
178 | 164 | ||
179 | 165 | ||
@@ -188,42 +174,23 @@ static void alarmtimer_remove(struct alarm_base *base, struct alarm *alarm) | |||
188 | */ | 174 | */ |
189 | static enum hrtimer_restart alarmtimer_fired(struct hrtimer *timer) | 175 | static enum hrtimer_restart alarmtimer_fired(struct hrtimer *timer) |
190 | { | 176 | { |
191 | struct alarm_base *base = container_of(timer, struct alarm_base, timer); | 177 | struct alarm *alarm = container_of(timer, struct alarm, timer); |
192 | struct timerqueue_node *next; | 178 | struct alarm_base *base = &alarm_bases[alarm->type]; |
193 | unsigned long flags; | 179 | unsigned long flags; |
194 | ktime_t now; | ||
195 | int ret = HRTIMER_NORESTART; | 180 | int ret = HRTIMER_NORESTART; |
196 | int restart = ALARMTIMER_NORESTART; | 181 | int restart = ALARMTIMER_NORESTART; |
197 | 182 | ||
198 | spin_lock_irqsave(&base->lock, flags); | 183 | spin_lock_irqsave(&base->lock, flags); |
199 | now = base->gettime(); | 184 | alarmtimer_dequeue(base, alarm); |
200 | while ((next = timerqueue_getnext(&base->timerqueue))) { | 185 | spin_unlock_irqrestore(&base->lock, flags); |
201 | struct alarm *alarm; | ||
202 | ktime_t expired = next->expires; | ||
203 | |||
204 | if (expired.tv64 > now.tv64) | ||
205 | break; | ||
206 | |||
207 | alarm = container_of(next, struct alarm, node); | ||
208 | |||
209 | timerqueue_del(&base->timerqueue, &alarm->node); | ||
210 | alarm->state &= ~ALARMTIMER_STATE_ENQUEUED; | ||
211 | |||
212 | alarm->state |= ALARMTIMER_STATE_CALLBACK; | ||
213 | spin_unlock_irqrestore(&base->lock, flags); | ||
214 | if (alarm->function) | ||
215 | restart = alarm->function(alarm, now); | ||
216 | spin_lock_irqsave(&base->lock, flags); | ||
217 | alarm->state &= ~ALARMTIMER_STATE_CALLBACK; | ||
218 | 186 | ||
219 | if (restart != ALARMTIMER_NORESTART) { | 187 | if (alarm->function) |
220 | timerqueue_add(&base->timerqueue, &alarm->node); | 188 | restart = alarm->function(alarm, base->gettime()); |
221 | alarm->state |= ALARMTIMER_STATE_ENQUEUED; | ||
222 | } | ||
223 | } | ||
224 | 189 | ||
225 | if (next) { | 190 | spin_lock_irqsave(&base->lock, flags); |
226 | hrtimer_set_expires(&base->timer, next->expires); | 191 | if (restart != ALARMTIMER_NORESTART) { |
192 | hrtimer_set_expires(&alarm->timer, alarm->node.expires); | ||
193 | alarmtimer_enqueue(base, alarm); | ||
227 | ret = HRTIMER_RESTART; | 194 | ret = HRTIMER_RESTART; |
228 | } | 195 | } |
229 | spin_unlock_irqrestore(&base->lock, flags); | 196 | spin_unlock_irqrestore(&base->lock, flags); |
@@ -250,6 +217,7 @@ static int alarmtimer_suspend(struct device *dev) | |||
250 | unsigned long flags; | 217 | unsigned long flags; |
251 | struct rtc_device *rtc; | 218 | struct rtc_device *rtc; |
252 | int i; | 219 | int i; |
220 | int ret; | ||
253 | 221 | ||
254 | spin_lock_irqsave(&freezer_delta_lock, flags); | 222 | spin_lock_irqsave(&freezer_delta_lock, flags); |
255 | min = freezer_delta; | 223 | min = freezer_delta; |
@@ -279,8 +247,10 @@ static int alarmtimer_suspend(struct device *dev) | |||
279 | if (min.tv64 == 0) | 247 | if (min.tv64 == 0) |
280 | return 0; | 248 | return 0; |
281 | 249 | ||
282 | /* XXX - Should we enforce a minimum sleep time? */ | 250 | if (ktime_to_ns(min) < 2 * NSEC_PER_SEC) { |
283 | WARN_ON(min.tv64 < NSEC_PER_SEC); | 251 | __pm_wakeup_event(ws, 2 * MSEC_PER_SEC); |
252 | return -EBUSY; | ||
253 | } | ||
284 | 254 | ||
285 | /* Setup an rtc timer to fire that far in the future */ | 255 | /* Setup an rtc timer to fire that far in the future */ |
286 | rtc_timer_cancel(rtc, &rtctimer); | 256 | rtc_timer_cancel(rtc, &rtctimer); |
@@ -288,9 +258,11 @@ static int alarmtimer_suspend(struct device *dev) | |||
288 | now = rtc_tm_to_ktime(tm); | 258 | now = rtc_tm_to_ktime(tm); |
289 | now = ktime_add(now, min); | 259 | now = ktime_add(now, min); |
290 | 260 | ||
291 | rtc_timer_start(rtc, &rtctimer, now, ktime_set(0, 0)); | 261 | /* Set alarm, if in the past reject suspend briefly to handle */ |
292 | 262 | ret = rtc_timer_start(rtc, &rtctimer, now, ktime_set(0, 0)); | |
293 | return 0; | 263 | if (ret < 0) |
264 | __pm_wakeup_event(ws, MSEC_PER_SEC); | ||
265 | return ret; | ||
294 | } | 266 | } |
295 | #else | 267 | #else |
296 | static int alarmtimer_suspend(struct device *dev) | 268 | static int alarmtimer_suspend(struct device *dev) |
@@ -324,6 +296,9 @@ void alarm_init(struct alarm *alarm, enum alarmtimer_type type, | |||
324 | enum alarmtimer_restart (*function)(struct alarm *, ktime_t)) | 296 | enum alarmtimer_restart (*function)(struct alarm *, ktime_t)) |
325 | { | 297 | { |
326 | timerqueue_init(&alarm->node); | 298 | timerqueue_init(&alarm->node); |
299 | hrtimer_init(&alarm->timer, alarm_bases[type].base_clockid, | ||
300 | HRTIMER_MODE_ABS); | ||
301 | alarm->timer.function = alarmtimer_fired; | ||
327 | alarm->function = function; | 302 | alarm->function = function; |
328 | alarm->type = type; | 303 | alarm->type = type; |
329 | alarm->state = ALARMTIMER_STATE_INACTIVE; | 304 | alarm->state = ALARMTIMER_STATE_INACTIVE; |
@@ -334,17 +309,19 @@ void alarm_init(struct alarm *alarm, enum alarmtimer_type type, | |||
334 | * @alarm: ptr to alarm to set | 309 | * @alarm: ptr to alarm to set |
335 | * @start: time to run the alarm | 310 | * @start: time to run the alarm |
336 | */ | 311 | */ |
337 | void alarm_start(struct alarm *alarm, ktime_t start) | 312 | int alarm_start(struct alarm *alarm, ktime_t start) |
338 | { | 313 | { |
339 | struct alarm_base *base = &alarm_bases[alarm->type]; | 314 | struct alarm_base *base = &alarm_bases[alarm->type]; |
340 | unsigned long flags; | 315 | unsigned long flags; |
316 | int ret; | ||
341 | 317 | ||
342 | spin_lock_irqsave(&base->lock, flags); | 318 | spin_lock_irqsave(&base->lock, flags); |
343 | if (alarmtimer_active(alarm)) | ||
344 | alarmtimer_remove(base, alarm); | ||
345 | alarm->node.expires = start; | 319 | alarm->node.expires = start; |
346 | alarmtimer_enqueue(base, alarm); | 320 | alarmtimer_enqueue(base, alarm); |
321 | ret = hrtimer_start(&alarm->timer, alarm->node.expires, | ||
322 | HRTIMER_MODE_ABS); | ||
347 | spin_unlock_irqrestore(&base->lock, flags); | 323 | spin_unlock_irqrestore(&base->lock, flags); |
324 | return ret; | ||
348 | } | 325 | } |
349 | 326 | ||
350 | /** | 327 | /** |
@@ -358,18 +335,12 @@ int alarm_try_to_cancel(struct alarm *alarm) | |||
358 | { | 335 | { |
359 | struct alarm_base *base = &alarm_bases[alarm->type]; | 336 | struct alarm_base *base = &alarm_bases[alarm->type]; |
360 | unsigned long flags; | 337 | unsigned long flags; |
361 | int ret = -1; | 338 | int ret; |
362 | spin_lock_irqsave(&base->lock, flags); | ||
363 | |||
364 | if (alarmtimer_callback_running(alarm)) | ||
365 | goto out; | ||
366 | 339 | ||
367 | if (alarmtimer_is_queued(alarm)) { | 340 | spin_lock_irqsave(&base->lock, flags); |
368 | alarmtimer_remove(base, alarm); | 341 | ret = hrtimer_try_to_cancel(&alarm->timer); |
369 | ret = 1; | 342 | if (ret >= 0) |
370 | } else | 343 | alarmtimer_dequeue(base, alarm); |
371 | ret = 0; | ||
372 | out: | ||
373 | spin_unlock_irqrestore(&base->lock, flags); | 344 | spin_unlock_irqrestore(&base->lock, flags); |
374 | return ret; | 345 | return ret; |
375 | } | 346 | } |
@@ -802,10 +773,6 @@ static int __init alarmtimer_init(void) | |||
802 | for (i = 0; i < ALARM_NUMTYPE; i++) { | 773 | for (i = 0; i < ALARM_NUMTYPE; i++) { |
803 | timerqueue_init_head(&alarm_bases[i].timerqueue); | 774 | timerqueue_init_head(&alarm_bases[i].timerqueue); |
804 | spin_lock_init(&alarm_bases[i].lock); | 775 | spin_lock_init(&alarm_bases[i].lock); |
805 | hrtimer_init(&alarm_bases[i].timer, | ||
806 | alarm_bases[i].base_clockid, | ||
807 | HRTIMER_MODE_ABS); | ||
808 | alarm_bases[i].timer.function = alarmtimer_fired; | ||
809 | } | 776 | } |
810 | 777 | ||
811 | error = alarmtimer_rtc_interface_setup(); | 778 | error = alarmtimer_rtc_interface_setup(); |
@@ -821,6 +788,7 @@ static int __init alarmtimer_init(void) | |||
821 | error = PTR_ERR(pdev); | 788 | error = PTR_ERR(pdev); |
822 | goto out_drv; | 789 | goto out_drv; |
823 | } | 790 | } |
791 | ws = wakeup_source_register("alarmtimer"); | ||
824 | return 0; | 792 | return 0; |
825 | 793 | ||
826 | out_drv: | 794 | out_drv: |
diff --git a/kernel/time/jiffies.c b/kernel/time/jiffies.c index 46da0537c10b..6629bf7b5285 100644 --- a/kernel/time/jiffies.c +++ b/kernel/time/jiffies.c | |||
@@ -37,7 +37,7 @@ | |||
37 | * requested HZ value. It is also not recommended | 37 | * requested HZ value. It is also not recommended |
38 | * for "tick-less" systems. | 38 | * for "tick-less" systems. |
39 | */ | 39 | */ |
40 | #define NSEC_PER_JIFFY ((u32)((((u64)NSEC_PER_SEC)<<8)/SHIFTED_HZ)) | 40 | #define NSEC_PER_JIFFY ((NSEC_PER_SEC+HZ/2)/HZ) |
41 | 41 | ||
42 | /* Since jiffies uses a simple NSEC_PER_JIFFY multiplier | 42 | /* Since jiffies uses a simple NSEC_PER_JIFFY multiplier |
43 | * conversion, the .shift value could be zero. However | 43 | * conversion, the .shift value could be zero. However |
@@ -95,3 +95,33 @@ struct clocksource * __init __weak clocksource_default_clock(void) | |||
95 | { | 95 | { |
96 | return &clocksource_jiffies; | 96 | return &clocksource_jiffies; |
97 | } | 97 | } |
98 | |||
99 | struct clocksource refined_jiffies; | ||
100 | |||
101 | int register_refined_jiffies(long cycles_per_second) | ||
102 | { | ||
103 | u64 nsec_per_tick, shift_hz; | ||
104 | long cycles_per_tick; | ||
105 | |||
106 | |||
107 | |||
108 | refined_jiffies = clocksource_jiffies; | ||
109 | refined_jiffies.name = "refined-jiffies"; | ||
110 | refined_jiffies.rating++; | ||
111 | |||
112 | /* Calc cycles per tick */ | ||
113 | cycles_per_tick = (cycles_per_second + HZ/2)/HZ; | ||
114 | /* shift_hz stores hz<<8 for extra accuracy */ | ||
115 | shift_hz = (u64)cycles_per_second << 8; | ||
116 | shift_hz += cycles_per_tick/2; | ||
117 | do_div(shift_hz, cycles_per_tick); | ||
118 | /* Calculate nsec_per_tick using shift_hz */ | ||
119 | nsec_per_tick = (u64)NSEC_PER_SEC << 8; | ||
120 | nsec_per_tick += (u32)shift_hz/2; | ||
121 | do_div(nsec_per_tick, (u32)shift_hz); | ||
122 | |||
123 | refined_jiffies.mult = ((u32)nsec_per_tick) << JIFFIES_SHIFT; | ||
124 | |||
125 | clocksource_register(&refined_jiffies); | ||
126 | return 0; | ||
127 | } | ||
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index f423bdd035c2..a40260885265 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c | |||
@@ -835,7 +835,7 @@ static enum hrtimer_restart tick_sched_timer(struct hrtimer *timer) | |||
835 | */ | 835 | */ |
836 | if (ts->tick_stopped) { | 836 | if (ts->tick_stopped) { |
837 | touch_softlockup_watchdog(); | 837 | touch_softlockup_watchdog(); |
838 | if (idle_cpu(cpu)) | 838 | if (is_idle_task(current)) |
839 | ts->idle_jiffies++; | 839 | ts->idle_jiffies++; |
840 | } | 840 | } |
841 | update_process_times(user_mode(regs)); | 841 | update_process_times(user_mode(regs)); |
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 5ce06a3fa91e..e424970bb562 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c | |||
@@ -8,6 +8,7 @@ | |||
8 | * | 8 | * |
9 | */ | 9 | */ |
10 | 10 | ||
11 | #include <linux/timekeeper_internal.h> | ||
11 | #include <linux/module.h> | 12 | #include <linux/module.h> |
12 | #include <linux/interrupt.h> | 13 | #include <linux/interrupt.h> |
13 | #include <linux/percpu.h> | 14 | #include <linux/percpu.h> |
@@ -21,61 +22,6 @@ | |||
21 | #include <linux/tick.h> | 22 | #include <linux/tick.h> |
22 | #include <linux/stop_machine.h> | 23 | #include <linux/stop_machine.h> |
23 | 24 | ||
24 | /* Structure holding internal timekeeping values. */ | ||
25 | struct timekeeper { | ||
26 | /* Current clocksource used for timekeeping. */ | ||
27 | struct clocksource *clock; | ||
28 | /* NTP adjusted clock multiplier */ | ||
29 | u32 mult; | ||
30 | /* The shift value of the current clocksource. */ | ||
31 | u32 shift; | ||
32 | /* Number of clock cycles in one NTP interval. */ | ||
33 | cycle_t cycle_interval; | ||
34 | /* Number of clock shifted nano seconds in one NTP interval. */ | ||
35 | u64 xtime_interval; | ||
36 | /* shifted nano seconds left over when rounding cycle_interval */ | ||
37 | s64 xtime_remainder; | ||
38 | /* Raw nano seconds accumulated per NTP interval. */ | ||
39 | u32 raw_interval; | ||
40 | |||
41 | /* Current CLOCK_REALTIME time in seconds */ | ||
42 | u64 xtime_sec; | ||
43 | /* Clock shifted nano seconds */ | ||
44 | u64 xtime_nsec; | ||
45 | |||
46 | /* Difference between accumulated time and NTP time in ntp | ||
47 | * shifted nano seconds. */ | ||
48 | s64 ntp_error; | ||
49 | /* Shift conversion between clock shifted nano seconds and | ||
50 | * ntp shifted nano seconds. */ | ||
51 | u32 ntp_error_shift; | ||
52 | |||
53 | /* | ||
54 | * wall_to_monotonic is what we need to add to xtime (or xtime corrected | ||
55 | * for sub jiffie times) to get to monotonic time. Monotonic is pegged | ||
56 | * at zero at system boot time, so wall_to_monotonic will be negative, | ||
57 | * however, we will ALWAYS keep the tv_nsec part positive so we can use | ||
58 | * the usual normalization. | ||
59 | * | ||
60 | * wall_to_monotonic is moved after resume from suspend for the | ||
61 | * monotonic time not to jump. We need to add total_sleep_time to | ||
62 | * wall_to_monotonic to get the real boot based time offset. | ||
63 | * | ||
64 | * - wall_to_monotonic is no longer the boot time, getboottime must be | ||
65 | * used instead. | ||
66 | */ | ||
67 | struct timespec wall_to_monotonic; | ||
68 | /* Offset clock monotonic -> clock realtime */ | ||
69 | ktime_t offs_real; | ||
70 | /* time spent in suspend */ | ||
71 | struct timespec total_sleep_time; | ||
72 | /* Offset clock monotonic -> clock boottime */ | ||
73 | ktime_t offs_boot; | ||
74 | /* The raw monotonic time for the CLOCK_MONOTONIC_RAW posix clock. */ | ||
75 | struct timespec raw_time; | ||
76 | /* Seqlock for all timekeeper values */ | ||
77 | seqlock_t lock; | ||
78 | }; | ||
79 | 25 | ||
80 | static struct timekeeper timekeeper; | 26 | static struct timekeeper timekeeper; |
81 | 27 | ||
@@ -96,15 +42,6 @@ static inline void tk_normalize_xtime(struct timekeeper *tk) | |||
96 | } | 42 | } |
97 | } | 43 | } |
98 | 44 | ||
99 | static struct timespec tk_xtime(struct timekeeper *tk) | ||
100 | { | ||
101 | struct timespec ts; | ||
102 | |||
103 | ts.tv_sec = tk->xtime_sec; | ||
104 | ts.tv_nsec = (long)(tk->xtime_nsec >> tk->shift); | ||
105 | return ts; | ||
106 | } | ||
107 | |||
108 | static void tk_set_xtime(struct timekeeper *tk, const struct timespec *ts) | 45 | static void tk_set_xtime(struct timekeeper *tk, const struct timespec *ts) |
109 | { | 46 | { |
110 | tk->xtime_sec = ts->tv_sec; | 47 | tk->xtime_sec = ts->tv_sec; |
@@ -246,14 +183,11 @@ static inline s64 timekeeping_get_ns_raw(struct timekeeper *tk) | |||
246 | /* must hold write on timekeeper.lock */ | 183 | /* must hold write on timekeeper.lock */ |
247 | static void timekeeping_update(struct timekeeper *tk, bool clearntp) | 184 | static void timekeeping_update(struct timekeeper *tk, bool clearntp) |
248 | { | 185 | { |
249 | struct timespec xt; | ||
250 | |||
251 | if (clearntp) { | 186 | if (clearntp) { |
252 | tk->ntp_error = 0; | 187 | tk->ntp_error = 0; |
253 | ntp_clear(); | 188 | ntp_clear(); |
254 | } | 189 | } |
255 | xt = tk_xtime(tk); | 190 | update_vsyscall(tk); |
256 | update_vsyscall(&xt, &tk->wall_to_monotonic, tk->clock, tk->mult); | ||
257 | } | 191 | } |
258 | 192 | ||
259 | /** | 193 | /** |
@@ -1113,7 +1047,7 @@ static cycle_t logarithmic_accumulation(struct timekeeper *tk, cycle_t offset, | |||
1113 | accumulate_nsecs_to_secs(tk); | 1047 | accumulate_nsecs_to_secs(tk); |
1114 | 1048 | ||
1115 | /* Accumulate raw time */ | 1049 | /* Accumulate raw time */ |
1116 | raw_nsecs = tk->raw_interval << shift; | 1050 | raw_nsecs = (u64)tk->raw_interval << shift; |
1117 | raw_nsecs += tk->raw_time.tv_nsec; | 1051 | raw_nsecs += tk->raw_time.tv_nsec; |
1118 | if (raw_nsecs >= NSEC_PER_SEC) { | 1052 | if (raw_nsecs >= NSEC_PER_SEC) { |
1119 | u64 raw_secs = raw_nsecs; | 1053 | u64 raw_secs = raw_nsecs; |
@@ -1130,6 +1064,33 @@ static cycle_t logarithmic_accumulation(struct timekeeper *tk, cycle_t offset, | |||
1130 | return offset; | 1064 | return offset; |
1131 | } | 1065 | } |
1132 | 1066 | ||
1067 | #ifdef CONFIG_GENERIC_TIME_VSYSCALL_OLD | ||
1068 | static inline void old_vsyscall_fixup(struct timekeeper *tk) | ||
1069 | { | ||
1070 | s64 remainder; | ||
1071 | |||
1072 | /* | ||
1073 | * Store only full nanoseconds into xtime_nsec after rounding | ||
1074 | * it up and add the remainder to the error difference. | ||
1075 | * XXX - This is necessary to avoid small 1ns inconsistnecies caused | ||
1076 | * by truncating the remainder in vsyscalls. However, it causes | ||
1077 | * additional work to be done in timekeeping_adjust(). Once | ||
1078 | * the vsyscall implementations are converted to use xtime_nsec | ||
1079 | * (shifted nanoseconds), and CONFIG_GENERIC_TIME_VSYSCALL_OLD | ||
1080 | * users are removed, this can be killed. | ||
1081 | */ | ||
1082 | remainder = tk->xtime_nsec & ((1ULL << tk->shift) - 1); | ||
1083 | tk->xtime_nsec -= remainder; | ||
1084 | tk->xtime_nsec += 1ULL << tk->shift; | ||
1085 | tk->ntp_error += remainder << tk->ntp_error_shift; | ||
1086 | |||
1087 | } | ||
1088 | #else | ||
1089 | #define old_vsyscall_fixup(tk) | ||
1090 | #endif | ||
1091 | |||
1092 | |||
1093 | |||
1133 | /** | 1094 | /** |
1134 | * update_wall_time - Uses the current clocksource to increment the wall time | 1095 | * update_wall_time - Uses the current clocksource to increment the wall time |
1135 | * | 1096 | * |
@@ -1141,7 +1102,6 @@ static void update_wall_time(void) | |||
1141 | cycle_t offset; | 1102 | cycle_t offset; |
1142 | int shift = 0, maxshift; | 1103 | int shift = 0, maxshift; |
1143 | unsigned long flags; | 1104 | unsigned long flags; |
1144 | s64 remainder; | ||
1145 | 1105 | ||
1146 | write_seqlock_irqsave(&tk->lock, flags); | 1106 | write_seqlock_irqsave(&tk->lock, flags); |
1147 | 1107 | ||
@@ -1183,20 +1143,11 @@ static void update_wall_time(void) | |||
1183 | /* correct the clock when NTP error is too big */ | 1143 | /* correct the clock when NTP error is too big */ |
1184 | timekeeping_adjust(tk, offset); | 1144 | timekeeping_adjust(tk, offset); |
1185 | 1145 | ||
1186 | |||
1187 | /* | 1146 | /* |
1188 | * Store only full nanoseconds into xtime_nsec after rounding | 1147 | * XXX This can be killed once everyone converts |
1189 | * it up and add the remainder to the error difference. | 1148 | * to the new update_vsyscall. |
1190 | * XXX - This is necessary to avoid small 1ns inconsistnecies caused | 1149 | */ |
1191 | * by truncating the remainder in vsyscalls. However, it causes | 1150 | old_vsyscall_fixup(tk); |
1192 | * additional work to be done in timekeeping_adjust(). Once | ||
1193 | * the vsyscall implementations are converted to use xtime_nsec | ||
1194 | * (shifted nanoseconds), this can be killed. | ||
1195 | */ | ||
1196 | remainder = tk->xtime_nsec & ((1ULL << tk->shift) - 1); | ||
1197 | tk->xtime_nsec -= remainder; | ||
1198 | tk->xtime_nsec += 1ULL << tk->shift; | ||
1199 | tk->ntp_error += remainder << tk->ntp_error_shift; | ||
1200 | 1151 | ||
1201 | /* | 1152 | /* |
1202 | * Finally, make sure that after the rounding | 1153 | * Finally, make sure that after the rounding |
diff --git a/kernel/timer.c b/kernel/timer.c index d5de1b2292aa..367d00858482 100644 --- a/kernel/timer.c +++ b/kernel/timer.c | |||
@@ -63,6 +63,7 @@ EXPORT_SYMBOL(jiffies_64); | |||
63 | #define TVR_SIZE (1 << TVR_BITS) | 63 | #define TVR_SIZE (1 << TVR_BITS) |
64 | #define TVN_MASK (TVN_SIZE - 1) | 64 | #define TVN_MASK (TVN_SIZE - 1) |
65 | #define TVR_MASK (TVR_SIZE - 1) | 65 | #define TVR_MASK (TVR_SIZE - 1) |
66 | #define MAX_TVAL ((unsigned long)((1ULL << (TVR_BITS + 4*TVN_BITS)) - 1)) | ||
66 | 67 | ||
67 | struct tvec { | 68 | struct tvec { |
68 | struct list_head vec[TVN_SIZE]; | 69 | struct list_head vec[TVN_SIZE]; |
@@ -359,11 +360,12 @@ __internal_add_timer(struct tvec_base *base, struct timer_list *timer) | |||
359 | vec = base->tv1.vec + (base->timer_jiffies & TVR_MASK); | 360 | vec = base->tv1.vec + (base->timer_jiffies & TVR_MASK); |
360 | } else { | 361 | } else { |
361 | int i; | 362 | int i; |
362 | /* If the timeout is larger than 0xffffffff on 64-bit | 363 | /* If the timeout is larger than MAX_TVAL (on 64-bit |
363 | * architectures then we use the maximum timeout: | 364 | * architectures or with CONFIG_BASE_SMALL=1) then we |
365 | * use the maximum timeout. | ||
364 | */ | 366 | */ |
365 | if (idx > 0xffffffffUL) { | 367 | if (idx > MAX_TVAL) { |
366 | idx = 0xffffffffUL; | 368 | idx = MAX_TVAL; |
367 | expires = idx + base->timer_jiffies; | 369 | expires = idx + base->timer_jiffies; |
368 | } | 370 | } |
369 | i = (expires >> (TVR_BITS + 3 * TVN_BITS)) & TVN_MASK; | 371 | i = (expires >> (TVR_BITS + 3 * TVN_BITS)) & TVN_MASK; |