aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/Makefile77
-rw-r--r--kernel/acct.c6
-rw-r--r--kernel/audit.c2
-rw-r--r--kernel/audit.h7
-rw-r--r--kernel/audit_watch.c3
-rw-r--r--kernel/auditfilter.c65
-rw-r--r--kernel/auditsc.c217
-rw-r--r--kernel/debug/debug_core.c18
-rw-r--r--kernel/debug/kdb/kdb_bt.c2
-rw-r--r--kernel/debug/kdb/kdb_io.c33
-rw-r--r--kernel/debug/kdb/kdb_main.c2
-rw-r--r--kernel/events/core.c21
-rw-r--r--kernel/fork.c13
-rw-r--r--kernel/irq/irqdomain.c33
-rw-r--r--kernel/kmod.c7
-rw-r--r--kernel/kthread.c1
-rw-r--r--kernel/modsign_pubkey.c113
-rw-r--r--kernel/module-internal.h15
-rw-r--r--kernel/module.c157
-rw-r--r--kernel/module_signing.c243
-rw-r--r--kernel/rcutree.c21
-rw-r--r--kernel/rcutree.h6
-rw-r--r--kernel/sched/core.c71
-rw-r--r--kernel/time.c2
-rw-r--r--kernel/time/Kconfig4
-rw-r--r--kernel/time/alarmtimer.c118
-rw-r--r--kernel/time/jiffies.c32
-rw-r--r--kernel/time/tick-sched.c2
-rw-r--r--kernel/time/timekeeping.c117
-rw-r--r--kernel/timer.c10
30 files changed, 1093 insertions, 325 deletions
diff --git a/kernel/Makefile b/kernel/Makefile
index 5404911eaee9..0dfeca4324ee 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -54,6 +54,7 @@ obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock.o
54obj-$(CONFIG_PROVE_LOCKING) += spinlock.o 54obj-$(CONFIG_PROVE_LOCKING) += spinlock.o
55obj-$(CONFIG_UID16) += uid16.o 55obj-$(CONFIG_UID16) += uid16.o
56obj-$(CONFIG_MODULES) += module.o 56obj-$(CONFIG_MODULES) += module.o
57obj-$(CONFIG_MODULE_SIG) += module_signing.o modsign_pubkey.o
57obj-$(CONFIG_KALLSYMS) += kallsyms.o 58obj-$(CONFIG_KALLSYMS) += kallsyms.o
58obj-$(CONFIG_BSD_PROCESS_ACCT) += acct.o 59obj-$(CONFIG_BSD_PROCESS_ACCT) += acct.o
59obj-$(CONFIG_KEXEC) += kexec.o 60obj-$(CONFIG_KEXEC) += kexec.o
@@ -130,3 +131,79 @@ quiet_cmd_timeconst = TIMEC $@
130targets += timeconst.h 131targets += timeconst.h
131$(obj)/timeconst.h: $(src)/timeconst.pl FORCE 132$(obj)/timeconst.h: $(src)/timeconst.pl FORCE
132 $(call if_changed,timeconst) 133 $(call if_changed,timeconst)
134
135ifeq ($(CONFIG_MODULE_SIG),y)
136#
137# Pull the signing certificate and any extra certificates into the kernel
138#
139extra_certificates:
140 touch $@
141
142kernel/modsign_pubkey.o: signing_key.x509 extra_certificates
143
144###############################################################################
145#
146# If module signing is requested, say by allyesconfig, but a key has not been
147# supplied, then one will need to be generated to make sure the build does not
148# fail and that the kernel may be used afterwards.
149#
150###############################################################################
151sign_key_with_hash :=
152ifeq ($(CONFIG_MODULE_SIG_SHA1),y)
153sign_key_with_hash := -sha1
154endif
155ifeq ($(CONFIG_MODULE_SIG_SHA224),y)
156sign_key_with_hash := -sha224
157endif
158ifeq ($(CONFIG_MODULE_SIG_SHA256),y)
159sign_key_with_hash := -sha256
160endif
161ifeq ($(CONFIG_MODULE_SIG_SHA384),y)
162sign_key_with_hash := -sha384
163endif
164ifeq ($(CONFIG_MODULE_SIG_SHA512),y)
165sign_key_with_hash := -sha512
166endif
167ifeq ($(sign_key_with_hash),)
168$(error Could not determine digest type to use from kernel config)
169endif
170
171signing_key.priv signing_key.x509: x509.genkey
172 @echo "###"
173 @echo "### Now generating an X.509 key pair to be used for signing modules."
174 @echo "###"
175 @echo "### If this takes a long time, you might wish to run rngd in the"
176 @echo "### background to keep the supply of entropy topped up. It"
177 @echo "### needs to be run as root, and should use a hardware random"
178 @echo "### number generator if one is available, eg:"
179 @echo "###"
180 @echo "### rngd -r /dev/hwrandom"
181 @echo "###"
182 openssl req -new -nodes -utf8 $(sign_key_with_hash) -days 36500 -batch \
183 -x509 -config x509.genkey \
184 -outform DER -out signing_key.x509 \
185 -keyout signing_key.priv
186 @echo "###"
187 @echo "### Key pair generated."
188 @echo "###"
189
190x509.genkey:
191 @echo Generating X.509 key generation config
192 @echo >x509.genkey "[ req ]"
193 @echo >>x509.genkey "default_bits = 4096"
194 @echo >>x509.genkey "distinguished_name = req_distinguished_name"
195 @echo >>x509.genkey "prompt = no"
196 @echo >>x509.genkey "string_mask = utf8only"
197 @echo >>x509.genkey "x509_extensions = myexts"
198 @echo >>x509.genkey
199 @echo >>x509.genkey "[ req_distinguished_name ]"
200 @echo >>x509.genkey "O = Magrathea"
201 @echo >>x509.genkey "CN = Glacier signing key"
202 @echo >>x509.genkey "emailAddress = slartibartfast@magrathea.h2g2"
203 @echo >>x509.genkey
204 @echo >>x509.genkey "[ myexts ]"
205 @echo >>x509.genkey "basicConstraints=critical,CA:FALSE"
206 @echo >>x509.genkey "keyUsage=digitalSignature"
207 @echo >>x509.genkey "subjectKeyIdentifier=hash"
208 @echo >>x509.genkey "authorityKeyIdentifier=keyid"
209endif
diff --git a/kernel/acct.c b/kernel/acct.c
index 6cd7529c9e6a..051e071a06e7 100644
--- a/kernel/acct.c
+++ b/kernel/acct.c
@@ -193,7 +193,7 @@ static void acct_file_reopen(struct bsd_acct_struct *acct, struct file *file,
193 } 193 }
194} 194}
195 195
196static int acct_on(char *name) 196static int acct_on(struct filename *pathname)
197{ 197{
198 struct file *file; 198 struct file *file;
199 struct vfsmount *mnt; 199 struct vfsmount *mnt;
@@ -201,7 +201,7 @@ static int acct_on(char *name)
201 struct bsd_acct_struct *acct = NULL; 201 struct bsd_acct_struct *acct = NULL;
202 202
203 /* Difference from BSD - they don't do O_APPEND */ 203 /* Difference from BSD - they don't do O_APPEND */
204 file = filp_open(name, O_WRONLY|O_APPEND|O_LARGEFILE, 0); 204 file = file_open_name(pathname, O_WRONLY|O_APPEND|O_LARGEFILE, 0);
205 if (IS_ERR(file)) 205 if (IS_ERR(file))
206 return PTR_ERR(file); 206 return PTR_ERR(file);
207 207
@@ -260,7 +260,7 @@ SYSCALL_DEFINE1(acct, const char __user *, name)
260 return -EPERM; 260 return -EPERM;
261 261
262 if (name) { 262 if (name) {
263 char *tmp = getname(name); 263 struct filename *tmp = getname(name);
264 if (IS_ERR(tmp)) 264 if (IS_ERR(tmp))
265 return (PTR_ERR(tmp)); 265 return (PTR_ERR(tmp));
266 error = acct_on(tmp); 266 error = acct_on(tmp);
diff --git a/kernel/audit.c b/kernel/audit.c
index 4d0ceede3319..40414e9143db 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -1440,6 +1440,8 @@ void audit_log_link_denied(const char *operation, struct path *link)
1440 1440
1441 ab = audit_log_start(current->audit_context, GFP_KERNEL, 1441 ab = audit_log_start(current->audit_context, GFP_KERNEL,
1442 AUDIT_ANOM_LINK); 1442 AUDIT_ANOM_LINK);
1443 if (!ab)
1444 return;
1443 audit_log_format(ab, "op=%s action=denied", operation); 1445 audit_log_format(ab, "op=%s action=denied", operation);
1444 audit_log_format(ab, " pid=%d comm=", current->pid); 1446 audit_log_format(ab, " pid=%d comm=", current->pid);
1445 audit_log_untrustedstring(ab, current->comm); 1447 audit_log_untrustedstring(ab, current->comm);
diff --git a/kernel/audit.h b/kernel/audit.h
index 9eb3d79482b6..d51cba868e1b 100644
--- a/kernel/audit.h
+++ b/kernel/audit.h
@@ -74,12 +74,15 @@ static inline int audit_hash_ino(u32 ino)
74 return (ino & (AUDIT_INODE_BUCKETS-1)); 74 return (ino & (AUDIT_INODE_BUCKETS-1));
75} 75}
76 76
77/* Indicates that audit should log the full pathname. */
78#define AUDIT_NAME_FULL -1
79
77extern int audit_match_class(int class, unsigned syscall); 80extern int audit_match_class(int class, unsigned syscall);
78extern int audit_comparator(const u32 left, const u32 op, const u32 right); 81extern int audit_comparator(const u32 left, const u32 op, const u32 right);
79extern int audit_uid_comparator(kuid_t left, u32 op, kuid_t right); 82extern int audit_uid_comparator(kuid_t left, u32 op, kuid_t right);
80extern int audit_gid_comparator(kgid_t left, u32 op, kgid_t right); 83extern int audit_gid_comparator(kgid_t left, u32 op, kgid_t right);
81extern int audit_compare_dname_path(const char *dname, const char *path, 84extern int parent_len(const char *path);
82 int *dirlen); 85extern int audit_compare_dname_path(const char *dname, const char *path, int plen);
83extern struct sk_buff * audit_make_reply(int pid, int seq, int type, 86extern struct sk_buff * audit_make_reply(int pid, int seq, int type,
84 int done, int multi, 87 int done, int multi,
85 const void *payload, int size); 88 const void *payload, int size);
diff --git a/kernel/audit_watch.c b/kernel/audit_watch.c
index 1c22ec3d87bc..9a9ae6e3d290 100644
--- a/kernel/audit_watch.c
+++ b/kernel/audit_watch.c
@@ -265,7 +265,8 @@ static void audit_update_watch(struct audit_parent *parent,
265 /* Run all of the watches on this parent looking for the one that 265 /* Run all of the watches on this parent looking for the one that
266 * matches the given dname */ 266 * matches the given dname */
267 list_for_each_entry_safe(owatch, nextw, &parent->watches, wlist) { 267 list_for_each_entry_safe(owatch, nextw, &parent->watches, wlist) {
268 if (audit_compare_dname_path(dname, owatch->path, NULL)) 268 if (audit_compare_dname_path(dname, owatch->path,
269 AUDIT_NAME_FULL))
269 continue; 270 continue;
270 271
271 /* If the update involves invalidating rules, do the inode-based 272 /* If the update involves invalidating rules, do the inode-based
diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c
index c4bcdbaf4d4d..7f19f23d38a3 100644
--- a/kernel/auditfilter.c
+++ b/kernel/auditfilter.c
@@ -1298,41 +1298,60 @@ int audit_gid_comparator(kgid_t left, u32 op, kgid_t right)
1298 } 1298 }
1299} 1299}
1300 1300
1301/* Compare given dentry name with last component in given path, 1301/**
1302 * return of 0 indicates a match. */ 1302 * parent_len - find the length of the parent portion of a pathname
1303int audit_compare_dname_path(const char *dname, const char *path, 1303 * @path: pathname of which to determine length
1304 int *dirlen) 1304 */
1305int parent_len(const char *path)
1305{ 1306{
1306 int dlen, plen; 1307 int plen;
1307 const char *p; 1308 const char *p;
1308 1309
1309 if (!dname || !path)
1310 return 1;
1311
1312 dlen = strlen(dname);
1313 plen = strlen(path); 1310 plen = strlen(path);
1314 if (plen < dlen) 1311
1315 return 1; 1312 if (plen == 0)
1313 return plen;
1316 1314
1317 /* disregard trailing slashes */ 1315 /* disregard trailing slashes */
1318 p = path + plen - 1; 1316 p = path + plen - 1;
1319 while ((*p == '/') && (p > path)) 1317 while ((*p == '/') && (p > path))
1320 p--; 1318 p--;
1321 1319
1322 /* find last path component */ 1320 /* walk backward until we find the next slash or hit beginning */
1323 p = p - dlen + 1; 1321 while ((*p != '/') && (p > path))
1324 if (p < path) 1322 p--;
1323
1324 /* did we find a slash? Then increment to include it in path */
1325 if (*p == '/')
1326 p++;
1327
1328 return p - path;
1329}
1330
1331/**
1332 * audit_compare_dname_path - compare given dentry name with last component in
1333 * given path. Return of 0 indicates a match.
1334 * @dname: dentry name that we're comparing
1335 * @path: full pathname that we're comparing
1336 * @parentlen: length of the parent if known. Passing in AUDIT_NAME_FULL
1337 * here indicates that we must compute this value.
1338 */
1339int audit_compare_dname_path(const char *dname, const char *path, int parentlen)
1340{
1341 int dlen, pathlen;
1342 const char *p;
1343
1344 dlen = strlen(dname);
1345 pathlen = strlen(path);
1346 if (pathlen < dlen)
1325 return 1; 1347 return 1;
1326 else if (p > path) {
1327 if (*--p != '/')
1328 return 1;
1329 else
1330 p++;
1331 }
1332 1348
1333 /* return length of path's directory component */ 1349 parentlen = parentlen == AUDIT_NAME_FULL ? parent_len(path) : parentlen;
1334 if (dirlen) 1350 if (pathlen - parentlen != dlen)
1335 *dirlen = p - path; 1351 return 1;
1352
1353 p = path + parentlen;
1354
1336 return strncmp(p, dname, dlen); 1355 return strncmp(p, dname, dlen);
1337} 1356}
1338 1357
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index f4a7756f999c..2f186ed80c40 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -81,9 +81,6 @@
81 * a name dynamically and also add those to the list anchored by names_list. */ 81 * a name dynamically and also add those to the list anchored by names_list. */
82#define AUDIT_NAMES 5 82#define AUDIT_NAMES 5
83 83
84/* Indicates that audit should log the full pathname. */
85#define AUDIT_NAME_FULL -1
86
87/* no execve audit message should be longer than this (userspace limits) */ 84/* no execve audit message should be longer than this (userspace limits) */
88#define MAX_EXECVE_AUDIT_LEN 7500 85#define MAX_EXECVE_AUDIT_LEN 7500
89 86
@@ -106,27 +103,29 @@ struct audit_cap_data {
106 * we don't let putname() free it (instead we free all of the saved 103 * we don't let putname() free it (instead we free all of the saved
107 * pointers at syscall exit time). 104 * pointers at syscall exit time).
108 * 105 *
109 * Further, in fs/namei.c:path_lookup() we store the inode and device. */ 106 * Further, in fs/namei.c:path_lookup() we store the inode and device.
107 */
110struct audit_names { 108struct audit_names {
111 struct list_head list; /* audit_context->names_list */ 109 struct list_head list; /* audit_context->names_list */
112 const char *name; 110 struct filename *name;
113 unsigned long ino; 111 unsigned long ino;
114 dev_t dev; 112 dev_t dev;
115 umode_t mode; 113 umode_t mode;
116 kuid_t uid; 114 kuid_t uid;
117 kgid_t gid; 115 kgid_t gid;
118 dev_t rdev; 116 dev_t rdev;
119 u32 osid; 117 u32 osid;
120 struct audit_cap_data fcap; 118 struct audit_cap_data fcap;
121 unsigned int fcap_ver; 119 unsigned int fcap_ver;
122 int name_len; /* number of name's characters to log */ 120 int name_len; /* number of name's characters to log */
123 bool name_put; /* call __putname() for this name */ 121 unsigned char type; /* record type */
122 bool name_put; /* call __putname() for this name */
124 /* 123 /*
125 * This was an allocated audit_names and not from the array of 124 * This was an allocated audit_names and not from the array of
126 * names allocated in the task audit context. Thus this name 125 * names allocated in the task audit context. Thus this name
127 * should be freed on syscall exit 126 * should be freed on syscall exit
128 */ 127 */
129 bool should_free; 128 bool should_free;
130}; 129};
131 130
132struct audit_aux_data { 131struct audit_aux_data {
@@ -998,7 +997,7 @@ static inline void audit_free_names(struct audit_context *context)
998 context->ino_count); 997 context->ino_count);
999 list_for_each_entry(n, &context->names_list, list) { 998 list_for_each_entry(n, &context->names_list, list) {
1000 printk(KERN_ERR "names[%d] = %p = %s\n", i, 999 printk(KERN_ERR "names[%d] = %p = %s\n", i,
1001 n->name, n->name ?: "(null)"); 1000 n->name, n->name->name ?: "(null)");
1002 } 1001 }
1003 dump_stack(); 1002 dump_stack();
1004 return; 1003 return;
@@ -1555,7 +1554,7 @@ static void audit_log_name(struct audit_context *context, struct audit_names *n,
1555 case AUDIT_NAME_FULL: 1554 case AUDIT_NAME_FULL:
1556 /* log the full path */ 1555 /* log the full path */
1557 audit_log_format(ab, " name="); 1556 audit_log_format(ab, " name=");
1558 audit_log_untrustedstring(ab, n->name); 1557 audit_log_untrustedstring(ab, n->name->name);
1559 break; 1558 break;
1560 case 0: 1559 case 0:
1561 /* name was specified as a relative path and the 1560 /* name was specified as a relative path and the
@@ -1565,7 +1564,7 @@ static void audit_log_name(struct audit_context *context, struct audit_names *n,
1565 default: 1564 default:
1566 /* log the name's directory component */ 1565 /* log the name's directory component */
1567 audit_log_format(ab, " name="); 1566 audit_log_format(ab, " name=");
1568 audit_log_n_untrustedstring(ab, n->name, 1567 audit_log_n_untrustedstring(ab, n->name->name,
1569 n->name_len); 1568 n->name_len);
1570 } 1569 }
1571 } else 1570 } else
@@ -1995,7 +1994,8 @@ retry:
1995#endif 1994#endif
1996} 1995}
1997 1996
1998static struct audit_names *audit_alloc_name(struct audit_context *context) 1997static struct audit_names *audit_alloc_name(struct audit_context *context,
1998 unsigned char type)
1999{ 1999{
2000 struct audit_names *aname; 2000 struct audit_names *aname;
2001 2001
@@ -2010,6 +2010,7 @@ static struct audit_names *audit_alloc_name(struct audit_context *context)
2010 } 2010 }
2011 2011
2012 aname->ino = (unsigned long)-1; 2012 aname->ino = (unsigned long)-1;
2013 aname->type = type;
2013 list_add_tail(&aname->list, &context->names_list); 2014 list_add_tail(&aname->list, &context->names_list);
2014 2015
2015 context->name_count++; 2016 context->name_count++;
@@ -2020,13 +2021,36 @@ static struct audit_names *audit_alloc_name(struct audit_context *context)
2020} 2021}
2021 2022
2022/** 2023/**
2024 * audit_reusename - fill out filename with info from existing entry
2025 * @uptr: userland ptr to pathname
2026 *
2027 * Search the audit_names list for the current audit context. If there is an
2028 * existing entry with a matching "uptr" then return the filename
2029 * associated with that audit_name. If not, return NULL.
2030 */
2031struct filename *
2032__audit_reusename(const __user char *uptr)
2033{
2034 struct audit_context *context = current->audit_context;
2035 struct audit_names *n;
2036
2037 list_for_each_entry(n, &context->names_list, list) {
2038 if (!n->name)
2039 continue;
2040 if (n->name->uptr == uptr)
2041 return n->name;
2042 }
2043 return NULL;
2044}
2045
2046/**
2023 * audit_getname - add a name to the list 2047 * audit_getname - add a name to the list
2024 * @name: name to add 2048 * @name: name to add
2025 * 2049 *
2026 * Add a name to the list of audit names for this context. 2050 * Add a name to the list of audit names for this context.
2027 * Called from fs/namei.c:getname(). 2051 * Called from fs/namei.c:getname().
2028 */ 2052 */
2029void __audit_getname(const char *name) 2053void __audit_getname(struct filename *name)
2030{ 2054{
2031 struct audit_context *context = current->audit_context; 2055 struct audit_context *context = current->audit_context;
2032 struct audit_names *n; 2056 struct audit_names *n;
@@ -2040,13 +2064,19 @@ void __audit_getname(const char *name)
2040 return; 2064 return;
2041 } 2065 }
2042 2066
2043 n = audit_alloc_name(context); 2067#if AUDIT_DEBUG
2068 /* The filename _must_ have a populated ->name */
2069 BUG_ON(!name->name);
2070#endif
2071
2072 n = audit_alloc_name(context, AUDIT_TYPE_UNKNOWN);
2044 if (!n) 2073 if (!n)
2045 return; 2074 return;
2046 2075
2047 n->name = name; 2076 n->name = name;
2048 n->name_len = AUDIT_NAME_FULL; 2077 n->name_len = AUDIT_NAME_FULL;
2049 n->name_put = true; 2078 n->name_put = true;
2079 name->aname = n;
2050 2080
2051 if (!context->pwd.dentry) 2081 if (!context->pwd.dentry)
2052 get_fs_pwd(current->fs, &context->pwd); 2082 get_fs_pwd(current->fs, &context->pwd);
@@ -2059,7 +2089,7 @@ void __audit_getname(const char *name)
2059 * then we delay the putname until syscall exit. 2089 * then we delay the putname until syscall exit.
2060 * Called from include/linux/fs.h:putname(). 2090 * Called from include/linux/fs.h:putname().
2061 */ 2091 */
2062void audit_putname(const char *name) 2092void audit_putname(struct filename *name)
2063{ 2093{
2064 struct audit_context *context = current->audit_context; 2094 struct audit_context *context = current->audit_context;
2065 2095
@@ -2074,7 +2104,7 @@ void audit_putname(const char *name)
2074 2104
2075 list_for_each_entry(n, &context->names_list, list) 2105 list_for_each_entry(n, &context->names_list, list)
2076 printk(KERN_ERR "name[%d] = %p = %s\n", i, 2106 printk(KERN_ERR "name[%d] = %p = %s\n", i,
2077 n->name, n->name ?: "(null)"); 2107 n->name, n->name->name ?: "(null)");
2078 } 2108 }
2079#endif 2109#endif
2080 __putname(name); 2110 __putname(name);
@@ -2088,8 +2118,8 @@ void audit_putname(const char *name)
2088 " put_count=%d\n", 2118 " put_count=%d\n",
2089 __FILE__, __LINE__, 2119 __FILE__, __LINE__,
2090 context->serial, context->major, 2120 context->serial, context->major,
2091 context->in_syscall, name, context->name_count, 2121 context->in_syscall, name->name,
2092 context->put_count); 2122 context->name_count, context->put_count);
2093 dump_stack(); 2123 dump_stack();
2094 } 2124 }
2095 } 2125 }
@@ -2132,13 +2162,13 @@ static void audit_copy_inode(struct audit_names *name, const struct dentry *dent
2132} 2162}
2133 2163
2134/** 2164/**
2135 * audit_inode - store the inode and device from a lookup 2165 * __audit_inode - store the inode and device from a lookup
2136 * @name: name being audited 2166 * @name: name being audited
2137 * @dentry: dentry being audited 2167 * @dentry: dentry being audited
2138 * 2168 * @parent: does this dentry represent the parent?
2139 * Called from fs/namei.c:path_lookup().
2140 */ 2169 */
2141void __audit_inode(const char *name, const struct dentry *dentry) 2170void __audit_inode(struct filename *name, const struct dentry *dentry,
2171 unsigned int parent)
2142{ 2172{
2143 struct audit_context *context = current->audit_context; 2173 struct audit_context *context = current->audit_context;
2144 const struct inode *inode = dentry->d_inode; 2174 const struct inode *inode = dentry->d_inode;
@@ -2147,24 +2177,69 @@ void __audit_inode(const char *name, const struct dentry *dentry)
2147 if (!context->in_syscall) 2177 if (!context->in_syscall)
2148 return; 2178 return;
2149 2179
2180 if (!name)
2181 goto out_alloc;
2182
2183#if AUDIT_DEBUG
2184 /* The struct filename _must_ have a populated ->name */
2185 BUG_ON(!name->name);
2186#endif
2187 /*
2188 * If we have a pointer to an audit_names entry already, then we can
2189 * just use it directly if the type is correct.
2190 */
2191 n = name->aname;
2192 if (n) {
2193 if (parent) {
2194 if (n->type == AUDIT_TYPE_PARENT ||
2195 n->type == AUDIT_TYPE_UNKNOWN)
2196 goto out;
2197 } else {
2198 if (n->type != AUDIT_TYPE_PARENT)
2199 goto out;
2200 }
2201 }
2202
2150 list_for_each_entry_reverse(n, &context->names_list, list) { 2203 list_for_each_entry_reverse(n, &context->names_list, list) {
2151 if (n->name && (n->name == name)) 2204 /* does the name pointer match? */
2152 goto out; 2205 if (!n->name || n->name->name != name->name)
2206 continue;
2207
2208 /* match the correct record type */
2209 if (parent) {
2210 if (n->type == AUDIT_TYPE_PARENT ||
2211 n->type == AUDIT_TYPE_UNKNOWN)
2212 goto out;
2213 } else {
2214 if (n->type != AUDIT_TYPE_PARENT)
2215 goto out;
2216 }
2153 } 2217 }
2154 2218
2155 /* unable to find the name from a previous getname() */ 2219out_alloc:
2156 n = audit_alloc_name(context); 2220 /* unable to find the name from a previous getname(). Allocate a new
2221 * anonymous entry.
2222 */
2223 n = audit_alloc_name(context, AUDIT_TYPE_NORMAL);
2157 if (!n) 2224 if (!n)
2158 return; 2225 return;
2159out: 2226out:
2227 if (parent) {
2228 n->name_len = n->name ? parent_len(n->name->name) : AUDIT_NAME_FULL;
2229 n->type = AUDIT_TYPE_PARENT;
2230 } else {
2231 n->name_len = AUDIT_NAME_FULL;
2232 n->type = AUDIT_TYPE_NORMAL;
2233 }
2160 handle_path(dentry); 2234 handle_path(dentry);
2161 audit_copy_inode(n, dentry, inode); 2235 audit_copy_inode(n, dentry, inode);
2162} 2236}
2163 2237
2164/** 2238/**
2165 * audit_inode_child - collect inode info for created/removed objects 2239 * __audit_inode_child - collect inode info for created/removed objects
2166 * @dentry: dentry being audited
2167 * @parent: inode of dentry parent 2240 * @parent: inode of dentry parent
2241 * @dentry: dentry being audited
2242 * @type: AUDIT_TYPE_* value that we're looking for
2168 * 2243 *
2169 * For syscalls that create or remove filesystem objects, audit_inode 2244 * For syscalls that create or remove filesystem objects, audit_inode
2170 * can only collect information for the filesystem object's parent. 2245 * can only collect information for the filesystem object's parent.
@@ -2174,15 +2249,14 @@ out:
2174 * must be hooked prior, in order to capture the target inode during 2249 * must be hooked prior, in order to capture the target inode during
2175 * unsuccessful attempts. 2250 * unsuccessful attempts.
2176 */ 2251 */
2177void __audit_inode_child(const struct dentry *dentry, 2252void __audit_inode_child(const struct inode *parent,
2178 const struct inode *parent) 2253 const struct dentry *dentry,
2254 const unsigned char type)
2179{ 2255{
2180 struct audit_context *context = current->audit_context; 2256 struct audit_context *context = current->audit_context;
2181 const char *found_parent = NULL, *found_child = NULL;
2182 const struct inode *inode = dentry->d_inode; 2257 const struct inode *inode = dentry->d_inode;
2183 const char *dname = dentry->d_name.name; 2258 const char *dname = dentry->d_name.name;
2184 struct audit_names *n; 2259 struct audit_names *n, *found_parent = NULL, *found_child = NULL;
2185 int dirlen = 0;
2186 2260
2187 if (!context->in_syscall) 2261 if (!context->in_syscall)
2188 return; 2262 return;
@@ -2190,62 +2264,65 @@ void __audit_inode_child(const struct dentry *dentry,
2190 if (inode) 2264 if (inode)
2191 handle_one(inode); 2265 handle_one(inode);
2192 2266
2193 /* parent is more likely, look for it first */ 2267 /* look for a parent entry first */
2194 list_for_each_entry(n, &context->names_list, list) { 2268 list_for_each_entry(n, &context->names_list, list) {
2195 if (!n->name) 2269 if (!n->name || n->type != AUDIT_TYPE_PARENT)
2196 continue; 2270 continue;
2197 2271
2198 if (n->ino == parent->i_ino && 2272 if (n->ino == parent->i_ino &&
2199 !audit_compare_dname_path(dname, n->name, &dirlen)) { 2273 !audit_compare_dname_path(dname, n->name->name, n->name_len)) {
2200 n->name_len = dirlen; /* update parent data in place */ 2274 found_parent = n;
2201 found_parent = n->name; 2275 break;
2202 goto add_names;
2203 } 2276 }
2204 } 2277 }
2205 2278
2206 /* no matching parent, look for matching child */ 2279 /* is there a matching child entry? */
2207 list_for_each_entry(n, &context->names_list, list) { 2280 list_for_each_entry(n, &context->names_list, list) {
2208 if (!n->name) 2281 /* can only match entries that have a name */
2282 if (!n->name || n->type != type)
2209 continue; 2283 continue;
2210 2284
2211 /* strcmp() is the more likely scenario */ 2285 /* if we found a parent, make sure this one is a child of it */
2212 if (!strcmp(dname, n->name) || 2286 if (found_parent && (n->name != found_parent->name))
2213 !audit_compare_dname_path(dname, n->name, &dirlen)) { 2287 continue;
2214 if (inode) 2288
2215 audit_copy_inode(n, NULL, inode); 2289 if (!strcmp(dname, n->name->name) ||
2216 else 2290 !audit_compare_dname_path(dname, n->name->name,
2217 n->ino = (unsigned long)-1; 2291 found_parent ?
2218 found_child = n->name; 2292 found_parent->name_len :
2219 goto add_names; 2293 AUDIT_NAME_FULL)) {
2294 found_child = n;
2295 break;
2220 } 2296 }
2221 } 2297 }
2222 2298
2223add_names:
2224 if (!found_parent) { 2299 if (!found_parent) {
2225 n = audit_alloc_name(context); 2300 /* create a new, "anonymous" parent record */
2301 n = audit_alloc_name(context, AUDIT_TYPE_PARENT);
2226 if (!n) 2302 if (!n)
2227 return; 2303 return;
2228 audit_copy_inode(n, NULL, parent); 2304 audit_copy_inode(n, NULL, parent);
2229 } 2305 }
2230 2306
2231 if (!found_child) { 2307 if (!found_child) {
2232 n = audit_alloc_name(context); 2308 found_child = audit_alloc_name(context, type);
2233 if (!n) 2309 if (!found_child)
2234 return; 2310 return;
2235 2311
2236 /* Re-use the name belonging to the slot for a matching parent 2312 /* Re-use the name belonging to the slot for a matching parent
2237 * directory. All names for this context are relinquished in 2313 * directory. All names for this context are relinquished in
2238 * audit_free_names() */ 2314 * audit_free_names() */
2239 if (found_parent) { 2315 if (found_parent) {
2240 n->name = found_parent; 2316 found_child->name = found_parent->name;
2241 n->name_len = AUDIT_NAME_FULL; 2317 found_child->name_len = AUDIT_NAME_FULL;
2242 /* don't call __putname() */ 2318 /* don't call __putname() */
2243 n->name_put = false; 2319 found_child->name_put = false;
2244 } 2320 }
2245
2246 if (inode)
2247 audit_copy_inode(n, NULL, inode);
2248 } 2321 }
2322 if (inode)
2323 audit_copy_inode(found_child, dentry, inode);
2324 else
2325 found_child->ino = (unsigned long)-1;
2249} 2326}
2250EXPORT_SYMBOL_GPL(__audit_inode_child); 2327EXPORT_SYMBOL_GPL(__audit_inode_child);
2251 2328
diff --git a/kernel/debug/debug_core.c b/kernel/debug/debug_core.c
index 17e073c309e6..9a61738cefc8 100644
--- a/kernel/debug/debug_core.c
+++ b/kernel/debug/debug_core.c
@@ -696,6 +696,22 @@ out:
696 return ret; 696 return ret;
697} 697}
698 698
699/*
700 * GDB places a breakpoint at this function to know dynamically
701 * loaded objects. It's not defined static so that only one instance with this
702 * name exists in the kernel.
703 */
704
705static int module_event(struct notifier_block *self, unsigned long val,
706 void *data)
707{
708 return 0;
709}
710
711static struct notifier_block dbg_module_load_nb = {
712 .notifier_call = module_event,
713};
714
699int kgdb_nmicallback(int cpu, void *regs) 715int kgdb_nmicallback(int cpu, void *regs)
700{ 716{
701#ifdef CONFIG_SMP 717#ifdef CONFIG_SMP
@@ -824,6 +840,7 @@ static void kgdb_register_callbacks(void)
824 kgdb_arch_init(); 840 kgdb_arch_init();
825 if (!dbg_is_early) 841 if (!dbg_is_early)
826 kgdb_arch_late(); 842 kgdb_arch_late();
843 register_module_notifier(&dbg_module_load_nb);
827 register_reboot_notifier(&dbg_reboot_notifier); 844 register_reboot_notifier(&dbg_reboot_notifier);
828 atomic_notifier_chain_register(&panic_notifier_list, 845 atomic_notifier_chain_register(&panic_notifier_list,
829 &kgdb_panic_event_nb); 846 &kgdb_panic_event_nb);
@@ -847,6 +864,7 @@ static void kgdb_unregister_callbacks(void)
847 if (kgdb_io_module_registered) { 864 if (kgdb_io_module_registered) {
848 kgdb_io_module_registered = 0; 865 kgdb_io_module_registered = 0;
849 unregister_reboot_notifier(&dbg_reboot_notifier); 866 unregister_reboot_notifier(&dbg_reboot_notifier);
867 unregister_module_notifier(&dbg_module_load_nb);
850 atomic_notifier_chain_unregister(&panic_notifier_list, 868 atomic_notifier_chain_unregister(&panic_notifier_list,
851 &kgdb_panic_event_nb); 869 &kgdb_panic_event_nb);
852 kgdb_arch_exit(); 870 kgdb_arch_exit();
diff --git a/kernel/debug/kdb/kdb_bt.c b/kernel/debug/kdb/kdb_bt.c
index 07c9bbb94a0b..b03e0e814e43 100644
--- a/kernel/debug/kdb/kdb_bt.c
+++ b/kernel/debug/kdb/kdb_bt.c
@@ -129,6 +129,8 @@ kdb_bt(int argc, const char **argv)
129 } 129 }
130 /* Now the inactive tasks */ 130 /* Now the inactive tasks */
131 kdb_do_each_thread(g, p) { 131 kdb_do_each_thread(g, p) {
132 if (KDB_FLAG(CMD_INTERRUPT))
133 return 0;
132 if (task_curr(p)) 134 if (task_curr(p))
133 continue; 135 continue;
134 if (kdb_bt1(p, mask, argcount, btaprompt)) 136 if (kdb_bt1(p, mask, argcount, btaprompt))
diff --git a/kernel/debug/kdb/kdb_io.c b/kernel/debug/kdb/kdb_io.c
index 0a69d2adc4f3..14ff4849262c 100644
--- a/kernel/debug/kdb/kdb_io.c
+++ b/kernel/debug/kdb/kdb_io.c
@@ -552,6 +552,7 @@ int vkdb_printf(const char *fmt, va_list ap)
552{ 552{
553 int diag; 553 int diag;
554 int linecount; 554 int linecount;
555 int colcount;
555 int logging, saved_loglevel = 0; 556 int logging, saved_loglevel = 0;
556 int saved_trap_printk; 557 int saved_trap_printk;
557 int got_printf_lock = 0; 558 int got_printf_lock = 0;
@@ -584,6 +585,10 @@ int vkdb_printf(const char *fmt, va_list ap)
584 if (diag || linecount <= 1) 585 if (diag || linecount <= 1)
585 linecount = 24; 586 linecount = 24;
586 587
588 diag = kdbgetintenv("COLUMNS", &colcount);
589 if (diag || colcount <= 1)
590 colcount = 80;
591
587 diag = kdbgetintenv("LOGGING", &logging); 592 diag = kdbgetintenv("LOGGING", &logging);
588 if (diag) 593 if (diag)
589 logging = 0; 594 logging = 0;
@@ -690,7 +695,7 @@ kdb_printit:
690 gdbstub_msg_write(kdb_buffer, retlen); 695 gdbstub_msg_write(kdb_buffer, retlen);
691 } else { 696 } else {
692 if (dbg_io_ops && !dbg_io_ops->is_console) { 697 if (dbg_io_ops && !dbg_io_ops->is_console) {
693 len = strlen(kdb_buffer); 698 len = retlen;
694 cp = kdb_buffer; 699 cp = kdb_buffer;
695 while (len--) { 700 while (len--) {
696 dbg_io_ops->write_char(*cp); 701 dbg_io_ops->write_char(*cp);
@@ -709,11 +714,29 @@ kdb_printit:
709 printk(KERN_INFO "%s", kdb_buffer); 714 printk(KERN_INFO "%s", kdb_buffer);
710 } 715 }
711 716
712 if (KDB_STATE(PAGER) && strchr(kdb_buffer, '\n')) 717 if (KDB_STATE(PAGER)) {
713 kdb_nextline++; 718 /*
719 * Check printed string to decide how to bump the
720 * kdb_nextline to control when the more prompt should
721 * show up.
722 */
723 int got = 0;
724 len = retlen;
725 while (len--) {
726 if (kdb_buffer[len] == '\n') {
727 kdb_nextline++;
728 got = 0;
729 } else if (kdb_buffer[len] == '\r') {
730 got = 0;
731 } else {
732 got++;
733 }
734 }
735 kdb_nextline += got / (colcount + 1);
736 }
714 737
715 /* check for having reached the LINES number of printed lines */ 738 /* check for having reached the LINES number of printed lines */
716 if (kdb_nextline == linecount) { 739 if (kdb_nextline >= linecount) {
717 char buf1[16] = ""; 740 char buf1[16] = "";
718 741
719 /* Watch out for recursion here. Any routine that calls 742 /* Watch out for recursion here. Any routine that calls
@@ -765,7 +788,7 @@ kdb_printit:
765 kdb_grepping_flag = 0; 788 kdb_grepping_flag = 0;
766 kdb_printf("\n"); 789 kdb_printf("\n");
767 } else if (buf1[0] == ' ') { 790 } else if (buf1[0] == ' ') {
768 kdb_printf("\n"); 791 kdb_printf("\r");
769 suspend_grep = 1; /* for this recursion */ 792 suspend_grep = 1; /* for this recursion */
770 } else if (buf1[0] == '\n') { 793 } else if (buf1[0] == '\n') {
771 kdb_nextline = linecount - 1; 794 kdb_nextline = linecount - 1;
diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c
index 1261dc7eaeb9..4d5f8d5612f3 100644
--- a/kernel/debug/kdb/kdb_main.c
+++ b/kernel/debug/kdb/kdb_main.c
@@ -2101,6 +2101,8 @@ static int kdb_dmesg(int argc, const char **argv)
2101 } 2101 }
2102 if (!lines--) 2102 if (!lines--)
2103 break; 2103 break;
2104 if (KDB_FLAG(CMD_INTERRUPT))
2105 return 0;
2104 2106
2105 kdb_printf("%.*s\n", (int)len - 1, buf); 2107 kdb_printf("%.*s\n", (int)len - 1, buf);
2106 } 2108 }
diff --git a/kernel/events/core.c b/kernel/events/core.c
index cda3ebd49e86..dbccf83c134d 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -372,6 +372,8 @@ void perf_cgroup_switch(struct task_struct *task, int mode)
372 372
373 list_for_each_entry_rcu(pmu, &pmus, entry) { 373 list_for_each_entry_rcu(pmu, &pmus, entry) {
374 cpuctx = this_cpu_ptr(pmu->pmu_cpu_context); 374 cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
375 if (cpuctx->unique_pmu != pmu)
376 continue; /* ensure we process each cpuctx once */
375 377
376 /* 378 /*
377 * perf_cgroup_events says at least one 379 * perf_cgroup_events says at least one
@@ -395,9 +397,10 @@ void perf_cgroup_switch(struct task_struct *task, int mode)
395 397
396 if (mode & PERF_CGROUP_SWIN) { 398 if (mode & PERF_CGROUP_SWIN) {
397 WARN_ON_ONCE(cpuctx->cgrp); 399 WARN_ON_ONCE(cpuctx->cgrp);
398 /* set cgrp before ctxsw in to 400 /*
399 * allow event_filter_match() to not 401 * set cgrp before ctxsw in to allow
400 * have to pass task around 402 * event_filter_match() to not have to pass
403 * task around
401 */ 404 */
402 cpuctx->cgrp = perf_cgroup_from_task(task); 405 cpuctx->cgrp = perf_cgroup_from_task(task);
403 cpu_ctx_sched_in(cpuctx, EVENT_ALL, task); 406 cpu_ctx_sched_in(cpuctx, EVENT_ALL, task);
@@ -4412,7 +4415,7 @@ static void perf_event_task_event(struct perf_task_event *task_event)
4412 rcu_read_lock(); 4415 rcu_read_lock();
4413 list_for_each_entry_rcu(pmu, &pmus, entry) { 4416 list_for_each_entry_rcu(pmu, &pmus, entry) {
4414 cpuctx = get_cpu_ptr(pmu->pmu_cpu_context); 4417 cpuctx = get_cpu_ptr(pmu->pmu_cpu_context);
4415 if (cpuctx->active_pmu != pmu) 4418 if (cpuctx->unique_pmu != pmu)
4416 goto next; 4419 goto next;
4417 perf_event_task_ctx(&cpuctx->ctx, task_event); 4420 perf_event_task_ctx(&cpuctx->ctx, task_event);
4418 4421
@@ -4558,7 +4561,7 @@ static void perf_event_comm_event(struct perf_comm_event *comm_event)
4558 rcu_read_lock(); 4561 rcu_read_lock();
4559 list_for_each_entry_rcu(pmu, &pmus, entry) { 4562 list_for_each_entry_rcu(pmu, &pmus, entry) {
4560 cpuctx = get_cpu_ptr(pmu->pmu_cpu_context); 4563 cpuctx = get_cpu_ptr(pmu->pmu_cpu_context);
4561 if (cpuctx->active_pmu != pmu) 4564 if (cpuctx->unique_pmu != pmu)
4562 goto next; 4565 goto next;
4563 perf_event_comm_ctx(&cpuctx->ctx, comm_event); 4566 perf_event_comm_ctx(&cpuctx->ctx, comm_event);
4564 4567
@@ -4754,7 +4757,7 @@ got_name:
4754 rcu_read_lock(); 4757 rcu_read_lock();
4755 list_for_each_entry_rcu(pmu, &pmus, entry) { 4758 list_for_each_entry_rcu(pmu, &pmus, entry) {
4756 cpuctx = get_cpu_ptr(pmu->pmu_cpu_context); 4759 cpuctx = get_cpu_ptr(pmu->pmu_cpu_context);
4757 if (cpuctx->active_pmu != pmu) 4760 if (cpuctx->unique_pmu != pmu)
4758 goto next; 4761 goto next;
4759 perf_event_mmap_ctx(&cpuctx->ctx, mmap_event, 4762 perf_event_mmap_ctx(&cpuctx->ctx, mmap_event,
4760 vma->vm_flags & VM_EXEC); 4763 vma->vm_flags & VM_EXEC);
@@ -5855,8 +5858,8 @@ static void update_pmu_context(struct pmu *pmu, struct pmu *old_pmu)
5855 5858
5856 cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu); 5859 cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu);
5857 5860
5858 if (cpuctx->active_pmu == old_pmu) 5861 if (cpuctx->unique_pmu == old_pmu)
5859 cpuctx->active_pmu = pmu; 5862 cpuctx->unique_pmu = pmu;
5860 } 5863 }
5861} 5864}
5862 5865
@@ -5991,7 +5994,7 @@ skip_type:
5991 cpuctx->ctx.pmu = pmu; 5994 cpuctx->ctx.pmu = pmu;
5992 cpuctx->jiffies_interval = 1; 5995 cpuctx->jiffies_interval = 1;
5993 INIT_LIST_HEAD(&cpuctx->rotation_list); 5996 INIT_LIST_HEAD(&cpuctx->rotation_list);
5994 cpuctx->active_pmu = pmu; 5997 cpuctx->unique_pmu = pmu;
5995 } 5998 }
5996 5999
5997got_cpu_context: 6000got_cpu_context:
diff --git a/kernel/fork.c b/kernel/fork.c
index 1cd7d581b3b2..8b20ab7d3aa2 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1584,7 +1584,7 @@ long do_fork(unsigned long clone_flags,
1584 * requested, no event is reported; otherwise, report if the event 1584 * requested, no event is reported; otherwise, report if the event
1585 * for the type of forking is enabled. 1585 * for the type of forking is enabled.
1586 */ 1586 */
1587 if (likely(user_mode(regs)) && !(clone_flags & CLONE_UNTRACED)) { 1587 if (!(clone_flags & CLONE_UNTRACED) && likely(user_mode(regs))) {
1588 if (clone_flags & CLONE_VFORK) 1588 if (clone_flags & CLONE_VFORK)
1589 trace = PTRACE_EVENT_VFORK; 1589 trace = PTRACE_EVENT_VFORK;
1590 else if ((clone_flags & CSIGNAL) != SIGCHLD) 1590 else if ((clone_flags & CSIGNAL) != SIGCHLD)
@@ -1634,6 +1634,17 @@ long do_fork(unsigned long clone_flags,
1634 return nr; 1634 return nr;
1635} 1635}
1636 1636
1637#ifdef CONFIG_GENERIC_KERNEL_THREAD
1638/*
1639 * Create a kernel thread.
1640 */
1641pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags)
1642{
1643 return do_fork(flags|CLONE_VM|CLONE_UNTRACED, (unsigned long)fn, NULL,
1644 (unsigned long)arg, NULL, NULL);
1645}
1646#endif
1647
1637#ifndef ARCH_MIN_MMSTRUCT_ALIGN 1648#ifndef ARCH_MIN_MMSTRUCT_ALIGN
1638#define ARCH_MIN_MMSTRUCT_ALIGN 0 1649#define ARCH_MIN_MMSTRUCT_ALIGN 0
1639#endif 1650#endif
diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
index 49a77727db42..4e69e24d3d7d 100644
--- a/kernel/irq/irqdomain.c
+++ b/kernel/irq/irqdomain.c
@@ -148,7 +148,8 @@ static unsigned int irq_domain_legacy_revmap(struct irq_domain *domain,
148 * @host_data: Controller private data pointer 148 * @host_data: Controller private data pointer
149 * 149 *
150 * Allocates a legacy irq_domain if irq_base is positive or a linear 150 * Allocates a legacy irq_domain if irq_base is positive or a linear
151 * domain otherwise. 151 * domain otherwise. For the legacy domain, IRQ descriptors will also
152 * be allocated.
152 * 153 *
153 * This is intended to implement the expected behaviour for most 154 * This is intended to implement the expected behaviour for most
154 * interrupt controllers which is that a linear mapping should 155 * interrupt controllers which is that a linear mapping should
@@ -162,11 +163,33 @@ struct irq_domain *irq_domain_add_simple(struct device_node *of_node,
162 const struct irq_domain_ops *ops, 163 const struct irq_domain_ops *ops,
163 void *host_data) 164 void *host_data)
164{ 165{
165 if (first_irq > 0) 166 if (first_irq > 0) {
166 return irq_domain_add_legacy(of_node, size, first_irq, 0, 167 int irq_base;
168
169 if (IS_ENABLED(CONFIG_SPARSE_IRQ)) {
170 /*
171 * Set the descriptor allocator to search for a
172 * 1-to-1 mapping, such as irq_alloc_desc_at().
173 * Use of_node_to_nid() which is defined to
174 * numa_node_id() on platforms that have no custom
175 * implementation.
176 */
177 irq_base = irq_alloc_descs(first_irq, first_irq, size,
178 of_node_to_nid(of_node));
179 if (irq_base < 0) {
180 WARN(1, "Cannot allocate irq_descs @ IRQ%d, assuming pre-allocated\n",
181 first_irq);
182 irq_base = first_irq;
183 }
184 } else
185 irq_base = first_irq;
186
187 return irq_domain_add_legacy(of_node, size, irq_base, 0,
167 ops, host_data); 188 ops, host_data);
168 else 189 }
169 return irq_domain_add_linear(of_node, size, ops, host_data); 190
191 /* A linear domain is the default */
192 return irq_domain_add_linear(of_node, size, ops, host_data);
170} 193}
171 194
172/** 195/**
diff --git a/kernel/kmod.c b/kernel/kmod.c
index 6f99aead66c6..1c317e386831 100644
--- a/kernel/kmod.c
+++ b/kernel/kmod.c
@@ -37,6 +37,7 @@
37#include <linux/notifier.h> 37#include <linux/notifier.h>
38#include <linux/suspend.h> 38#include <linux/suspend.h>
39#include <linux/rwsem.h> 39#include <linux/rwsem.h>
40#include <linux/ptrace.h>
40#include <asm/uaccess.h> 41#include <asm/uaccess.h>
41 42
42#include <trace/events/module.h> 43#include <trace/events/module.h>
@@ -221,11 +222,13 @@ static int ____call_usermodehelper(void *data)
221 retval = kernel_execve(sub_info->path, 222 retval = kernel_execve(sub_info->path,
222 (const char *const *)sub_info->argv, 223 (const char *const *)sub_info->argv,
223 (const char *const *)sub_info->envp); 224 (const char *const *)sub_info->envp);
225 if (!retval)
226 return 0;
224 227
225 /* Exec failed? */ 228 /* Exec failed? */
226fail: 229fail:
227 sub_info->retval = retval; 230 sub_info->retval = retval;
228 return 0; 231 do_exit(0);
229} 232}
230 233
231static int call_helper(void *data) 234static int call_helper(void *data)
@@ -292,7 +295,7 @@ static int wait_for_helper(void *data)
292 } 295 }
293 296
294 umh_complete(sub_info); 297 umh_complete(sub_info);
295 return 0; 298 do_exit(0);
296} 299}
297 300
298/* This is run by khelper thread */ 301/* This is run by khelper thread */
diff --git a/kernel/kthread.c b/kernel/kthread.c
index 146a6fa96825..29fb60caecb5 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -16,6 +16,7 @@
16#include <linux/mutex.h> 16#include <linux/mutex.h>
17#include <linux/slab.h> 17#include <linux/slab.h>
18#include <linux/freezer.h> 18#include <linux/freezer.h>
19#include <linux/ptrace.h>
19#include <trace/events/sched.h> 20#include <trace/events/sched.h>
20 21
21static DEFINE_SPINLOCK(kthread_create_lock); 22static DEFINE_SPINLOCK(kthread_create_lock);
diff --git a/kernel/modsign_pubkey.c b/kernel/modsign_pubkey.c
new file mode 100644
index 000000000000..4646eb2c3820
--- /dev/null
+++ b/kernel/modsign_pubkey.c
@@ -0,0 +1,113 @@
1/* Public keys for module signature verification
2 *
3 * Copyright (C) 2012 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public Licence
8 * as published by the Free Software Foundation; either version
9 * 2 of the Licence, or (at your option) any later version.
10 */
11
12#include <linux/kernel.h>
13#include <linux/sched.h>
14#include <linux/cred.h>
15#include <linux/err.h>
16#include <keys/asymmetric-type.h>
17#include "module-internal.h"
18
19struct key *modsign_keyring;
20
21extern __initdata const u8 modsign_certificate_list[];
22extern __initdata const u8 modsign_certificate_list_end[];
23asm(".section .init.data,\"aw\"\n"
24 "modsign_certificate_list:\n"
25 ".incbin \"signing_key.x509\"\n"
26 ".incbin \"extra_certificates\"\n"
27 "modsign_certificate_list_end:"
28 );
29
30/*
31 * We need to make sure ccache doesn't cache the .o file as it doesn't notice
32 * if modsign.pub changes.
33 */
34static __initdata const char annoy_ccache[] = __TIME__ "foo";
35
36/*
37 * Load the compiled-in keys
38 */
39static __init int module_verify_init(void)
40{
41 pr_notice("Initialise module verification\n");
42
43 modsign_keyring = key_alloc(&key_type_keyring, ".module_sign",
44 KUIDT_INIT(0), KGIDT_INIT(0),
45 current_cred(),
46 (KEY_POS_ALL & ~KEY_POS_SETATTR) |
47 KEY_USR_VIEW | KEY_USR_READ,
48 KEY_ALLOC_NOT_IN_QUOTA);
49 if (IS_ERR(modsign_keyring))
50 panic("Can't allocate module signing keyring\n");
51
52 if (key_instantiate_and_link(modsign_keyring, NULL, 0, NULL, NULL) < 0)
53 panic("Can't instantiate module signing keyring\n");
54
55 return 0;
56}
57
58/*
59 * Must be initialised before we try and load the keys into the keyring.
60 */
61device_initcall(module_verify_init);
62
63/*
64 * Load the compiled-in keys
65 */
66static __init int load_module_signing_keys(void)
67{
68 key_ref_t key;
69 const u8 *p, *end;
70 size_t plen;
71
72 pr_notice("Loading module verification certificates\n");
73
74 end = modsign_certificate_list_end;
75 p = modsign_certificate_list;
76 while (p < end) {
77 /* Each cert begins with an ASN.1 SEQUENCE tag and must be more
78 * than 256 bytes in size.
79 */
80 if (end - p < 4)
81 goto dodgy_cert;
82 if (p[0] != 0x30 &&
83 p[1] != 0x82)
84 goto dodgy_cert;
85 plen = (p[2] << 8) | p[3];
86 plen += 4;
87 if (plen > end - p)
88 goto dodgy_cert;
89
90 key = key_create_or_update(make_key_ref(modsign_keyring, 1),
91 "asymmetric",
92 NULL,
93 p,
94 plen,
95 (KEY_POS_ALL & ~KEY_POS_SETATTR) |
96 KEY_USR_VIEW,
97 KEY_ALLOC_NOT_IN_QUOTA);
98 if (IS_ERR(key))
99 pr_err("MODSIGN: Problem loading in-kernel X.509 certificate (%ld)\n",
100 PTR_ERR(key));
101 else
102 pr_notice("MODSIGN: Loaded cert '%s'\n",
103 key_ref_to_ptr(key)->description);
104 p += plen;
105 }
106
107 return 0;
108
109dodgy_cert:
110 pr_err("MODSIGN: Problem parsing in-kernel X.509 certificate list\n");
111 return 0;
112}
113late_initcall(load_module_signing_keys);
diff --git a/kernel/module-internal.h b/kernel/module-internal.h
new file mode 100644
index 000000000000..6114a13419bd
--- /dev/null
+++ b/kernel/module-internal.h
@@ -0,0 +1,15 @@
1/* Module internals
2 *
3 * Copyright (C) 2012 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public Licence
8 * as published by the Free Software Foundation; either version
9 * 2 of the Licence, or (at your option) any later version.
10 */
11
12extern struct key *modsign_keyring;
13
14extern int mod_verify_sig(const void *mod, unsigned long modlen,
15 const void *sig, unsigned long siglen);
diff --git a/kernel/module.c b/kernel/module.c
index 4edbd9c11aca..0e2da8695f8e 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -58,6 +58,8 @@
58#include <linux/jump_label.h> 58#include <linux/jump_label.h>
59#include <linux/pfn.h> 59#include <linux/pfn.h>
60#include <linux/bsearch.h> 60#include <linux/bsearch.h>
61#include <linux/fips.h>
62#include "module-internal.h"
61 63
62#define CREATE_TRACE_POINTS 64#define CREATE_TRACE_POINTS
63#include <trace/events/module.h> 65#include <trace/events/module.h>
@@ -102,6 +104,43 @@ static LIST_HEAD(modules);
102struct list_head *kdb_modules = &modules; /* kdb needs the list of modules */ 104struct list_head *kdb_modules = &modules; /* kdb needs the list of modules */
103#endif /* CONFIG_KGDB_KDB */ 105#endif /* CONFIG_KGDB_KDB */
104 106
107#ifdef CONFIG_MODULE_SIG
108#ifdef CONFIG_MODULE_SIG_FORCE
109static bool sig_enforce = true;
110#else
111static bool sig_enforce = false;
112
113static int param_set_bool_enable_only(const char *val,
114 const struct kernel_param *kp)
115{
116 int err;
117 bool test;
118 struct kernel_param dummy_kp = *kp;
119
120 dummy_kp.arg = &test;
121
122 err = param_set_bool(val, &dummy_kp);
123 if (err)
124 return err;
125
126 /* Don't let them unset it once it's set! */
127 if (!test && sig_enforce)
128 return -EROFS;
129
130 if (test)
131 sig_enforce = true;
132 return 0;
133}
134
135static const struct kernel_param_ops param_ops_bool_enable_only = {
136 .set = param_set_bool_enable_only,
137 .get = param_get_bool,
138};
139#define param_check_bool_enable_only param_check_bool
140
141module_param(sig_enforce, bool_enable_only, 0644);
142#endif /* !CONFIG_MODULE_SIG_FORCE */
143#endif /* CONFIG_MODULE_SIG */
105 144
106/* Block module loading/unloading? */ 145/* Block module loading/unloading? */
107int modules_disabled = 0; 146int modules_disabled = 0;
@@ -136,6 +175,7 @@ struct load_info {
136 unsigned long symoffs, stroffs; 175 unsigned long symoffs, stroffs;
137 struct _ddebug *debug; 176 struct _ddebug *debug;
138 unsigned int num_debug; 177 unsigned int num_debug;
178 bool sig_ok;
139 struct { 179 struct {
140 unsigned int sym, str, mod, vers, info, pcpu; 180 unsigned int sym, str, mod, vers, info, pcpu;
141 } index; 181 } index;
@@ -1949,26 +1989,6 @@ static int simplify_symbols(struct module *mod, const struct load_info *info)
1949 return ret; 1989 return ret;
1950} 1990}
1951 1991
1952int __weak apply_relocate(Elf_Shdr *sechdrs,
1953 const char *strtab,
1954 unsigned int symindex,
1955 unsigned int relsec,
1956 struct module *me)
1957{
1958 pr_err("module %s: REL relocation unsupported\n", me->name);
1959 return -ENOEXEC;
1960}
1961
1962int __weak apply_relocate_add(Elf_Shdr *sechdrs,
1963 const char *strtab,
1964 unsigned int symindex,
1965 unsigned int relsec,
1966 struct module *me)
1967{
1968 pr_err("module %s: RELA relocation unsupported\n", me->name);
1969 return -ENOEXEC;
1970}
1971
1972static int apply_relocations(struct module *mod, const struct load_info *info) 1992static int apply_relocations(struct module *mod, const struct load_info *info)
1973{ 1993{
1974 unsigned int i; 1994 unsigned int i;
@@ -2399,7 +2419,52 @@ static inline void kmemleak_load_module(const struct module *mod,
2399} 2419}
2400#endif 2420#endif
2401 2421
2402/* Sets info->hdr and info->len. */ 2422#ifdef CONFIG_MODULE_SIG
2423static int module_sig_check(struct load_info *info,
2424 const void *mod, unsigned long *len)
2425{
2426 int err = -ENOKEY;
2427 const unsigned long markerlen = sizeof(MODULE_SIG_STRING) - 1;
2428 const void *p = mod, *end = mod + *len;
2429
2430 /* Poor man's memmem. */
2431 while ((p = memchr(p, MODULE_SIG_STRING[0], end - p))) {
2432 if (p + markerlen > end)
2433 break;
2434
2435 if (memcmp(p, MODULE_SIG_STRING, markerlen) == 0) {
2436 const void *sig = p + markerlen;
2437 /* Truncate module up to signature. */
2438 *len = p - mod;
2439 err = mod_verify_sig(mod, *len, sig, end - sig);
2440 break;
2441 }
2442 p++;
2443 }
2444
2445 if (!err) {
2446 info->sig_ok = true;
2447 return 0;
2448 }
2449
2450 /* Not having a signature is only an error if we're strict. */
2451 if (err < 0 && fips_enabled)
2452 panic("Module verification failed with error %d in FIPS mode\n",
2453 err);
2454 if (err == -ENOKEY && !sig_enforce)
2455 err = 0;
2456
2457 return err;
2458}
2459#else /* !CONFIG_MODULE_SIG */
2460static int module_sig_check(struct load_info *info,
2461 void *mod, unsigned long *len)
2462{
2463 return 0;
2464}
2465#endif /* !CONFIG_MODULE_SIG */
2466
2467/* Sets info->hdr, info->len and info->sig_ok. */
2403static int copy_and_check(struct load_info *info, 2468static int copy_and_check(struct load_info *info,
2404 const void __user *umod, unsigned long len, 2469 const void __user *umod, unsigned long len,
2405 const char __user *uargs) 2470 const char __user *uargs)
@@ -2419,6 +2484,10 @@ static int copy_and_check(struct load_info *info,
2419 goto free_hdr; 2484 goto free_hdr;
2420 } 2485 }
2421 2486
2487 err = module_sig_check(info, hdr, &len);
2488 if (err)
2489 goto free_hdr;
2490
2422 /* Sanity checks against insmoding binaries or wrong arch, 2491 /* Sanity checks against insmoding binaries or wrong arch,
2423 weird elf version */ 2492 weird elf version */
2424 if (memcmp(hdr->e_ident, ELFMAG, SELFMAG) != 0 2493 if (memcmp(hdr->e_ident, ELFMAG, SELFMAG) != 0
@@ -2730,6 +2799,10 @@ static int check_module_license_and_versions(struct module *mod)
2730 if (strcmp(mod->name, "driverloader") == 0) 2799 if (strcmp(mod->name, "driverloader") == 0)
2731 add_taint_module(mod, TAINT_PROPRIETARY_MODULE); 2800 add_taint_module(mod, TAINT_PROPRIETARY_MODULE);
2732 2801
2802 /* lve claims to be GPL but upstream won't provide source */
2803 if (strcmp(mod->name, "lve") == 0)
2804 add_taint_module(mod, TAINT_PROPRIETARY_MODULE);
2805
2733#ifdef CONFIG_MODVERSIONS 2806#ifdef CONFIG_MODVERSIONS
2734 if ((mod->num_syms && !mod->crcs) 2807 if ((mod->num_syms && !mod->crcs)
2735 || (mod->num_gpl_syms && !mod->gpl_crcs) 2808 || (mod->num_gpl_syms && !mod->gpl_crcs)
@@ -2861,6 +2934,20 @@ static int post_relocation(struct module *mod, const struct load_info *info)
2861 return module_finalize(info->hdr, info->sechdrs, mod); 2934 return module_finalize(info->hdr, info->sechdrs, mod);
2862} 2935}
2863 2936
2937/* Is this module of this name done loading? No locks held. */
2938static bool finished_loading(const char *name)
2939{
2940 struct module *mod;
2941 bool ret;
2942
2943 mutex_lock(&module_mutex);
2944 mod = find_module(name);
2945 ret = !mod || mod->state != MODULE_STATE_COMING;
2946 mutex_unlock(&module_mutex);
2947
2948 return ret;
2949}
2950
2864/* Allocate and load the module: note that size of section 0 is always 2951/* Allocate and load the module: note that size of section 0 is always
2865 zero, and we rely on this for optional sections. */ 2952 zero, and we rely on this for optional sections. */
2866static struct module *load_module(void __user *umod, 2953static struct module *load_module(void __user *umod,
@@ -2868,7 +2955,7 @@ static struct module *load_module(void __user *umod,
2868 const char __user *uargs) 2955 const char __user *uargs)
2869{ 2956{
2870 struct load_info info = { NULL, }; 2957 struct load_info info = { NULL, };
2871 struct module *mod; 2958 struct module *mod, *old;
2872 long err; 2959 long err;
2873 2960
2874 pr_debug("load_module: umod=%p, len=%lu, uargs=%p\n", 2961 pr_debug("load_module: umod=%p, len=%lu, uargs=%p\n",
@@ -2886,6 +2973,12 @@ static struct module *load_module(void __user *umod,
2886 goto free_copy; 2973 goto free_copy;
2887 } 2974 }
2888 2975
2976#ifdef CONFIG_MODULE_SIG
2977 mod->sig_ok = info.sig_ok;
2978 if (!mod->sig_ok)
2979 add_taint_module(mod, TAINT_FORCED_MODULE);
2980#endif
2981
2889 /* Now module is in final location, initialize linked lists, etc. */ 2982 /* Now module is in final location, initialize linked lists, etc. */
2890 err = module_unload_init(mod); 2983 err = module_unload_init(mod);
2891 if (err) 2984 if (err)
@@ -2934,8 +3027,18 @@ static struct module *load_module(void __user *umod,
2934 * function to insert in a way safe to concurrent readers. 3027 * function to insert in a way safe to concurrent readers.
2935 * The mutex protects against concurrent writers. 3028 * The mutex protects against concurrent writers.
2936 */ 3029 */
3030again:
2937 mutex_lock(&module_mutex); 3031 mutex_lock(&module_mutex);
2938 if (find_module(mod->name)) { 3032 if ((old = find_module(mod->name)) != NULL) {
3033 if (old->state == MODULE_STATE_COMING) {
3034 /* Wait in case it fails to load. */
3035 mutex_unlock(&module_mutex);
3036 err = wait_event_interruptible(module_wq,
3037 finished_loading(mod->name));
3038 if (err)
3039 goto free_arch_cleanup;
3040 goto again;
3041 }
2939 err = -EEXIST; 3042 err = -EEXIST;
2940 goto unlock; 3043 goto unlock;
2941 } 3044 }
@@ -2975,7 +3078,7 @@ static struct module *load_module(void __user *umod,
2975 /* Unlink carefully: kallsyms could be walking list. */ 3078 /* Unlink carefully: kallsyms could be walking list. */
2976 list_del_rcu(&mod->list); 3079 list_del_rcu(&mod->list);
2977 module_bug_cleanup(mod); 3080 module_bug_cleanup(mod);
2978 3081 wake_up_all(&module_wq);
2979 ddebug: 3082 ddebug:
2980 dynamic_debug_remove(info.debug); 3083 dynamic_debug_remove(info.debug);
2981 unlock: 3084 unlock:
@@ -3050,7 +3153,7 @@ SYSCALL_DEFINE3(init_module, void __user *, umod,
3050 blocking_notifier_call_chain(&module_notify_list, 3153 blocking_notifier_call_chain(&module_notify_list,
3051 MODULE_STATE_GOING, mod); 3154 MODULE_STATE_GOING, mod);
3052 free_module(mod); 3155 free_module(mod);
3053 wake_up(&module_wq); 3156 wake_up_all(&module_wq);
3054 return ret; 3157 return ret;
3055 } 3158 }
3056 if (ret > 0) { 3159 if (ret > 0) {
@@ -3062,9 +3165,8 @@ SYSCALL_DEFINE3(init_module, void __user *, umod,
3062 dump_stack(); 3165 dump_stack();
3063 } 3166 }
3064 3167
3065 /* Now it's a first class citizen! Wake up anyone waiting for it. */ 3168 /* Now it's a first class citizen! */
3066 mod->state = MODULE_STATE_LIVE; 3169 mod->state = MODULE_STATE_LIVE;
3067 wake_up(&module_wq);
3068 blocking_notifier_call_chain(&module_notify_list, 3170 blocking_notifier_call_chain(&module_notify_list,
3069 MODULE_STATE_LIVE, mod); 3171 MODULE_STATE_LIVE, mod);
3070 3172
@@ -3087,6 +3189,7 @@ SYSCALL_DEFINE3(init_module, void __user *, umod,
3087 mod->init_ro_size = 0; 3189 mod->init_ro_size = 0;
3088 mod->init_text_size = 0; 3190 mod->init_text_size = 0;
3089 mutex_unlock(&module_mutex); 3191 mutex_unlock(&module_mutex);
3192 wake_up_all(&module_wq);
3090 3193
3091 return 0; 3194 return 0;
3092} 3195}
diff --git a/kernel/module_signing.c b/kernel/module_signing.c
new file mode 100644
index 000000000000..6b09f6983ac0
--- /dev/null
+++ b/kernel/module_signing.c
@@ -0,0 +1,243 @@
1/* Module signature checker
2 *
3 * Copyright (C) 2012 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public Licence
8 * as published by the Free Software Foundation; either version
9 * 2 of the Licence, or (at your option) any later version.
10 */
11
12#include <linux/kernel.h>
13#include <linux/err.h>
14#include <crypto/public_key.h>
15#include <crypto/hash.h>
16#include <keys/asymmetric-type.h>
17#include "module-internal.h"
18
19/*
20 * Module signature information block.
21 *
22 * The constituents of the signature section are, in order:
23 *
24 * - Signer's name
25 * - Key identifier
26 * - Signature data
27 * - Information block
28 */
29struct module_signature {
30 enum pkey_algo algo : 8; /* Public-key crypto algorithm */
31 enum pkey_hash_algo hash : 8; /* Digest algorithm */
32 enum pkey_id_type id_type : 8; /* Key identifier type */
33 u8 signer_len; /* Length of signer's name */
34 u8 key_id_len; /* Length of key identifier */
35 u8 __pad[3];
36 __be32 sig_len; /* Length of signature data */
37};
38
39/*
40 * Digest the module contents.
41 */
42static struct public_key_signature *mod_make_digest(enum pkey_hash_algo hash,
43 const void *mod,
44 unsigned long modlen)
45{
46 struct public_key_signature *pks;
47 struct crypto_shash *tfm;
48 struct shash_desc *desc;
49 size_t digest_size, desc_size;
50 int ret;
51
52 pr_devel("==>%s()\n", __func__);
53
54 /* Allocate the hashing algorithm we're going to need and find out how
55 * big the hash operational data will be.
56 */
57 tfm = crypto_alloc_shash(pkey_hash_algo[hash], 0, 0);
58 if (IS_ERR(tfm))
59 return (PTR_ERR(tfm) == -ENOENT) ? ERR_PTR(-ENOPKG) : ERR_CAST(tfm);
60
61 desc_size = crypto_shash_descsize(tfm) + sizeof(*desc);
62 digest_size = crypto_shash_digestsize(tfm);
63
64 /* We allocate the hash operational data storage on the end of our
65 * context data and the digest output buffer on the end of that.
66 */
67 ret = -ENOMEM;
68 pks = kzalloc(digest_size + sizeof(*pks) + desc_size, GFP_KERNEL);
69 if (!pks)
70 goto error_no_pks;
71
72 pks->pkey_hash_algo = hash;
73 pks->digest = (u8 *)pks + sizeof(*pks) + desc_size;
74 pks->digest_size = digest_size;
75
76 desc = (void *)pks + sizeof(*pks);
77 desc->tfm = tfm;
78 desc->flags = CRYPTO_TFM_REQ_MAY_SLEEP;
79
80 ret = crypto_shash_init(desc);
81 if (ret < 0)
82 goto error;
83
84 ret = crypto_shash_finup(desc, mod, modlen, pks->digest);
85 if (ret < 0)
86 goto error;
87
88 crypto_free_shash(tfm);
89 pr_devel("<==%s() = ok\n", __func__);
90 return pks;
91
92error:
93 kfree(pks);
94error_no_pks:
95 crypto_free_shash(tfm);
96 pr_devel("<==%s() = %d\n", __func__, ret);
97 return ERR_PTR(ret);
98}
99
100/*
101 * Extract an MPI array from the signature data. This represents the actual
102 * signature. Each raw MPI is prefaced by a BE 2-byte value indicating the
103 * size of the MPI in bytes.
104 *
105 * RSA signatures only have one MPI, so currently we only read one.
106 */
107static int mod_extract_mpi_array(struct public_key_signature *pks,
108 const void *data, size_t len)
109{
110 size_t nbytes;
111 MPI mpi;
112
113 if (len < 3)
114 return -EBADMSG;
115 nbytes = ((const u8 *)data)[0] << 8 | ((const u8 *)data)[1];
116 data += 2;
117 len -= 2;
118 if (len != nbytes)
119 return -EBADMSG;
120
121 mpi = mpi_read_raw_data(data, nbytes);
122 if (!mpi)
123 return -ENOMEM;
124 pks->mpi[0] = mpi;
125 pks->nr_mpi = 1;
126 return 0;
127}
128
129/*
130 * Request an asymmetric key.
131 */
132static struct key *request_asymmetric_key(const char *signer, size_t signer_len,
133 const u8 *key_id, size_t key_id_len)
134{
135 key_ref_t key;
136 size_t i;
137 char *id, *q;
138
139 pr_devel("==>%s(,%zu,,%zu)\n", __func__, signer_len, key_id_len);
140
141 /* Construct an identifier. */
142 id = kmalloc(signer_len + 2 + key_id_len * 2 + 1, GFP_KERNEL);
143 if (!id)
144 return ERR_PTR(-ENOKEY);
145
146 memcpy(id, signer, signer_len);
147
148 q = id + signer_len;
149 *q++ = ':';
150 *q++ = ' ';
151 for (i = 0; i < key_id_len; i++) {
152 *q++ = hex_asc[*key_id >> 4];
153 *q++ = hex_asc[*key_id++ & 0x0f];
154 }
155
156 *q = 0;
157
158 pr_debug("Look up: \"%s\"\n", id);
159
160 key = keyring_search(make_key_ref(modsign_keyring, 1),
161 &key_type_asymmetric, id);
162 if (IS_ERR(key))
163 pr_warn("Request for unknown module key '%s' err %ld\n",
164 id, PTR_ERR(key));
165 kfree(id);
166
167 if (IS_ERR(key)) {
168 switch (PTR_ERR(key)) {
169 /* Hide some search errors */
170 case -EACCES:
171 case -ENOTDIR:
172 case -EAGAIN:
173 return ERR_PTR(-ENOKEY);
174 default:
175 return ERR_CAST(key);
176 }
177 }
178
179 pr_devel("<==%s() = 0 [%x]\n", __func__, key_serial(key_ref_to_ptr(key)));
180 return key_ref_to_ptr(key);
181}
182
183/*
184 * Verify the signature on a module.
185 */
186int mod_verify_sig(const void *mod, unsigned long modlen,
187 const void *sig, unsigned long siglen)
188{
189 struct public_key_signature *pks;
190 struct module_signature ms;
191 struct key *key;
192 size_t sig_len;
193 int ret;
194
195 pr_devel("==>%s(,%lu,,%lu,)\n", __func__, modlen, siglen);
196
197 if (siglen <= sizeof(ms))
198 return -EBADMSG;
199
200 memcpy(&ms, sig + (siglen - sizeof(ms)), sizeof(ms));
201 siglen -= sizeof(ms);
202
203 sig_len = be32_to_cpu(ms.sig_len);
204 if (sig_len >= siglen ||
205 siglen - sig_len != (size_t)ms.signer_len + ms.key_id_len)
206 return -EBADMSG;
207
208 /* For the moment, only support RSA and X.509 identifiers */
209 if (ms.algo != PKEY_ALGO_RSA ||
210 ms.id_type != PKEY_ID_X509)
211 return -ENOPKG;
212
213 if (ms.hash >= PKEY_HASH__LAST ||
214 !pkey_hash_algo[ms.hash])
215 return -ENOPKG;
216
217 key = request_asymmetric_key(sig, ms.signer_len,
218 sig + ms.signer_len, ms.key_id_len);
219 if (IS_ERR(key))
220 return PTR_ERR(key);
221
222 pks = mod_make_digest(ms.hash, mod, modlen);
223 if (IS_ERR(pks)) {
224 ret = PTR_ERR(pks);
225 goto error_put_key;
226 }
227
228 ret = mod_extract_mpi_array(pks, sig + ms.signer_len + ms.key_id_len,
229 sig_len);
230 if (ret < 0)
231 goto error_free_pks;
232
233 ret = verify_signature(key, pks);
234 pr_devel("verify_signature() = %d\n", ret);
235
236error_free_pks:
237 mpi_free(pks->rsa.s);
238 kfree(pks);
239error_put_key:
240 key_put(key);
241 pr_devel("<==%s() = %d\n", __func__, ret);
242 return ret;
243}
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index 4fb2376ddf06..74df86bd9204 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -74,6 +74,7 @@ static struct lock_class_key rcu_fqs_class[RCU_NUM_LVLS];
74 .orphan_nxttail = &sname##_state.orphan_nxtlist, \ 74 .orphan_nxttail = &sname##_state.orphan_nxtlist, \
75 .orphan_donetail = &sname##_state.orphan_donelist, \ 75 .orphan_donetail = &sname##_state.orphan_donelist, \
76 .barrier_mutex = __MUTEX_INITIALIZER(sname##_state.barrier_mutex), \ 76 .barrier_mutex = __MUTEX_INITIALIZER(sname##_state.barrier_mutex), \
77 .onoff_mutex = __MUTEX_INITIALIZER(sname##_state.onoff_mutex), \
77 .name = #sname, \ 78 .name = #sname, \
78} 79}
79 80
@@ -1197,7 +1198,7 @@ static int rcu_gp_init(struct rcu_state *rsp)
1197 raw_spin_unlock_irq(&rnp->lock); 1198 raw_spin_unlock_irq(&rnp->lock);
1198 1199
1199 /* Exclude any concurrent CPU-hotplug operations. */ 1200 /* Exclude any concurrent CPU-hotplug operations. */
1200 get_online_cpus(); 1201 mutex_lock(&rsp->onoff_mutex);
1201 1202
1202 /* 1203 /*
1203 * Set the quiescent-state-needed bits in all the rcu_node 1204 * Set the quiescent-state-needed bits in all the rcu_node
@@ -1234,7 +1235,7 @@ static int rcu_gp_init(struct rcu_state *rsp)
1234 cond_resched(); 1235 cond_resched();
1235 } 1236 }
1236 1237
1237 put_online_cpus(); 1238 mutex_unlock(&rsp->onoff_mutex);
1238 return 1; 1239 return 1;
1239} 1240}
1240 1241
@@ -1700,6 +1701,7 @@ static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp)
1700 /* Remove the dead CPU from the bitmasks in the rcu_node hierarchy. */ 1701 /* Remove the dead CPU from the bitmasks in the rcu_node hierarchy. */
1701 1702
1702 /* Exclude any attempts to start a new grace period. */ 1703 /* Exclude any attempts to start a new grace period. */
1704 mutex_lock(&rsp->onoff_mutex);
1703 raw_spin_lock_irqsave(&rsp->onofflock, flags); 1705 raw_spin_lock_irqsave(&rsp->onofflock, flags);
1704 1706
1705 /* Orphan the dead CPU's callbacks, and adopt them if appropriate. */ 1707 /* Orphan the dead CPU's callbacks, and adopt them if appropriate. */
@@ -1744,6 +1746,7 @@ static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp)
1744 init_callback_list(rdp); 1746 init_callback_list(rdp);
1745 /* Disallow further callbacks on this CPU. */ 1747 /* Disallow further callbacks on this CPU. */
1746 rdp->nxttail[RCU_NEXT_TAIL] = NULL; 1748 rdp->nxttail[RCU_NEXT_TAIL] = NULL;
1749 mutex_unlock(&rsp->onoff_mutex);
1747} 1750}
1748 1751
1749#else /* #ifdef CONFIG_HOTPLUG_CPU */ 1752#else /* #ifdef CONFIG_HOTPLUG_CPU */
@@ -2648,6 +2651,9 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptible)
2648 struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu); 2651 struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
2649 struct rcu_node *rnp = rcu_get_root(rsp); 2652 struct rcu_node *rnp = rcu_get_root(rsp);
2650 2653
2654 /* Exclude new grace periods. */
2655 mutex_lock(&rsp->onoff_mutex);
2656
2651 /* Set up local state, ensuring consistent view of global state. */ 2657 /* Set up local state, ensuring consistent view of global state. */
2652 raw_spin_lock_irqsave(&rnp->lock, flags); 2658 raw_spin_lock_irqsave(&rnp->lock, flags);
2653 rdp->beenonline = 1; /* We have now been online. */ 2659 rdp->beenonline = 1; /* We have now been online. */
@@ -2662,14 +2668,6 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptible)
2662 rcu_prepare_for_idle_init(cpu); 2668 rcu_prepare_for_idle_init(cpu);
2663 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ 2669 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
2664 2670
2665 /*
2666 * A new grace period might start here. If so, we won't be part
2667 * of it, but that is OK, as we are currently in a quiescent state.
2668 */
2669
2670 /* Exclude any attempts to start a new GP on large systems. */
2671 raw_spin_lock(&rsp->onofflock); /* irqs already disabled. */
2672
2673 /* Add CPU to rcu_node bitmasks. */ 2671 /* Add CPU to rcu_node bitmasks. */
2674 rnp = rdp->mynode; 2672 rnp = rdp->mynode;
2675 mask = rdp->grpmask; 2673 mask = rdp->grpmask;
@@ -2693,8 +2691,9 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptible)
2693 raw_spin_unlock(&rnp->lock); /* irqs already disabled. */ 2691 raw_spin_unlock(&rnp->lock); /* irqs already disabled. */
2694 rnp = rnp->parent; 2692 rnp = rnp->parent;
2695 } while (rnp != NULL && !(rnp->qsmaskinit & mask)); 2693 } while (rnp != NULL && !(rnp->qsmaskinit & mask));
2694 local_irq_restore(flags);
2696 2695
2697 raw_spin_unlock_irqrestore(&rsp->onofflock, flags); 2696 mutex_unlock(&rsp->onoff_mutex);
2698} 2697}
2699 2698
2700static void __cpuinit rcu_prepare_cpu(int cpu) 2699static void __cpuinit rcu_prepare_cpu(int cpu)
diff --git a/kernel/rcutree.h b/kernel/rcutree.h
index 5faf05d68326..a240f032848e 100644
--- a/kernel/rcutree.h
+++ b/kernel/rcutree.h
@@ -394,11 +394,17 @@ struct rcu_state {
394 struct rcu_head **orphan_donetail; /* Tail of above. */ 394 struct rcu_head **orphan_donetail; /* Tail of above. */
395 long qlen_lazy; /* Number of lazy callbacks. */ 395 long qlen_lazy; /* Number of lazy callbacks. */
396 long qlen; /* Total number of callbacks. */ 396 long qlen; /* Total number of callbacks. */
397 /* End of fields guarded by onofflock. */
398
399 struct mutex onoff_mutex; /* Coordinate hotplug & GPs. */
400
397 struct mutex barrier_mutex; /* Guards barrier fields. */ 401 struct mutex barrier_mutex; /* Guards barrier fields. */
398 atomic_t barrier_cpu_count; /* # CPUs waiting on. */ 402 atomic_t barrier_cpu_count; /* # CPUs waiting on. */
399 struct completion barrier_completion; /* Wake at barrier end. */ 403 struct completion barrier_completion; /* Wake at barrier end. */
400 unsigned long n_barrier_done; /* ++ at start and end of */ 404 unsigned long n_barrier_done; /* ++ at start and end of */
401 /* _rcu_barrier(). */ 405 /* _rcu_barrier(). */
406 /* End of fields guarded by barrier_mutex. */
407
402 unsigned long jiffies_force_qs; /* Time at which to invoke */ 408 unsigned long jiffies_force_qs; /* Time at which to invoke */
403 /* force_quiescent_state(). */ 409 /* force_quiescent_state(). */
404 unsigned long n_force_qs; /* Number of calls to */ 410 unsigned long n_force_qs; /* Number of calls to */
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index c17747236438..2d8927fda712 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -505,7 +505,7 @@ static inline void init_hrtick(void)
505#ifdef CONFIG_SMP 505#ifdef CONFIG_SMP
506 506
507#ifndef tsk_is_polling 507#ifndef tsk_is_polling
508#define tsk_is_polling(t) test_tsk_thread_flag(t, TIF_POLLING_NRFLAG) 508#define tsk_is_polling(t) 0
509#endif 509#endif
510 510
511void resched_task(struct task_struct *p) 511void resched_task(struct task_struct *p)
@@ -6122,6 +6122,17 @@ static void sched_init_numa(void)
6122 * numbers. 6122 * numbers.
6123 */ 6123 */
6124 6124
6125 /*
6126 * Here, we should temporarily reset sched_domains_numa_levels to 0.
6127 * If it fails to allocate memory for array sched_domains_numa_masks[][],
6128 * the array will contain less then 'level' members. This could be
6129 * dangerous when we use it to iterate array sched_domains_numa_masks[][]
6130 * in other functions.
6131 *
6132 * We reset it to 'level' at the end of this function.
6133 */
6134 sched_domains_numa_levels = 0;
6135
6125 sched_domains_numa_masks = kzalloc(sizeof(void *) * level, GFP_KERNEL); 6136 sched_domains_numa_masks = kzalloc(sizeof(void *) * level, GFP_KERNEL);
6126 if (!sched_domains_numa_masks) 6137 if (!sched_domains_numa_masks)
6127 return; 6138 return;
@@ -6176,11 +6187,68 @@ static void sched_init_numa(void)
6176 } 6187 }
6177 6188
6178 sched_domain_topology = tl; 6189 sched_domain_topology = tl;
6190
6191 sched_domains_numa_levels = level;
6192}
6193
6194static void sched_domains_numa_masks_set(int cpu)
6195{
6196 int i, j;
6197 int node = cpu_to_node(cpu);
6198
6199 for (i = 0; i < sched_domains_numa_levels; i++) {
6200 for (j = 0; j < nr_node_ids; j++) {
6201 if (node_distance(j, node) <= sched_domains_numa_distance[i])
6202 cpumask_set_cpu(cpu, sched_domains_numa_masks[i][j]);
6203 }
6204 }
6205}
6206
6207static void sched_domains_numa_masks_clear(int cpu)
6208{
6209 int i, j;
6210 for (i = 0; i < sched_domains_numa_levels; i++) {
6211 for (j = 0; j < nr_node_ids; j++)
6212 cpumask_clear_cpu(cpu, sched_domains_numa_masks[i][j]);
6213 }
6214}
6215
6216/*
6217 * Update sched_domains_numa_masks[level][node] array when new cpus
6218 * are onlined.
6219 */
6220static int sched_domains_numa_masks_update(struct notifier_block *nfb,
6221 unsigned long action,
6222 void *hcpu)
6223{
6224 int cpu = (long)hcpu;
6225
6226 switch (action & ~CPU_TASKS_FROZEN) {
6227 case CPU_ONLINE:
6228 sched_domains_numa_masks_set(cpu);
6229 break;
6230
6231 case CPU_DEAD:
6232 sched_domains_numa_masks_clear(cpu);
6233 break;
6234
6235 default:
6236 return NOTIFY_DONE;
6237 }
6238
6239 return NOTIFY_OK;
6179} 6240}
6180#else 6241#else
6181static inline void sched_init_numa(void) 6242static inline void sched_init_numa(void)
6182{ 6243{
6183} 6244}
6245
6246static int sched_domains_numa_masks_update(struct notifier_block *nfb,
6247 unsigned long action,
6248 void *hcpu)
6249{
6250 return 0;
6251}
6184#endif /* CONFIG_NUMA */ 6252#endif /* CONFIG_NUMA */
6185 6253
6186static int __sdt_alloc(const struct cpumask *cpu_map) 6254static int __sdt_alloc(const struct cpumask *cpu_map)
@@ -6629,6 +6697,7 @@ void __init sched_init_smp(void)
6629 mutex_unlock(&sched_domains_mutex); 6697 mutex_unlock(&sched_domains_mutex);
6630 put_online_cpus(); 6698 put_online_cpus();
6631 6699
6700 hotcpu_notifier(sched_domains_numa_masks_update, CPU_PRI_SCHED_ACTIVE);
6632 hotcpu_notifier(cpuset_cpu_active, CPU_PRI_CPUSET_ACTIVE); 6701 hotcpu_notifier(cpuset_cpu_active, CPU_PRI_CPUSET_ACTIVE);
6633 hotcpu_notifier(cpuset_cpu_inactive, CPU_PRI_CPUSET_INACTIVE); 6702 hotcpu_notifier(cpuset_cpu_inactive, CPU_PRI_CPUSET_INACTIVE);
6634 6703
diff --git a/kernel/time.c b/kernel/time.c
index ba744cf80696..d226c6a3fd28 100644
--- a/kernel/time.c
+++ b/kernel/time.c
@@ -30,7 +30,7 @@
30#include <linux/export.h> 30#include <linux/export.h>
31#include <linux/timex.h> 31#include <linux/timex.h>
32#include <linux/capability.h> 32#include <linux/capability.h>
33#include <linux/clocksource.h> 33#include <linux/timekeeper_internal.h>
34#include <linux/errno.h> 34#include <linux/errno.h>
35#include <linux/syscalls.h> 35#include <linux/syscalls.h>
36#include <linux/security.h> 36#include <linux/security.h>
diff --git a/kernel/time/Kconfig b/kernel/time/Kconfig
index fd42bd452b75..8601f0db1261 100644
--- a/kernel/time/Kconfig
+++ b/kernel/time/Kconfig
@@ -16,6 +16,10 @@ config ARCH_CLOCKSOURCE_DATA
16config GENERIC_TIME_VSYSCALL 16config GENERIC_TIME_VSYSCALL
17 bool 17 bool
18 18
19# Timekeeping vsyscall support
20config GENERIC_TIME_VSYSCALL_OLD
21 bool
22
19# ktime_t scalar 64bit nsec representation 23# ktime_t scalar 64bit nsec representation
20config KTIME_SCALAR 24config KTIME_SCALAR
21 bool 25 bool
diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c
index aa27d391bfc8..f11d83b12949 100644
--- a/kernel/time/alarmtimer.c
+++ b/kernel/time/alarmtimer.c
@@ -37,7 +37,6 @@
37static struct alarm_base { 37static struct alarm_base {
38 spinlock_t lock; 38 spinlock_t lock;
39 struct timerqueue_head timerqueue; 39 struct timerqueue_head timerqueue;
40 struct hrtimer timer;
41 ktime_t (*gettime)(void); 40 ktime_t (*gettime)(void);
42 clockid_t base_clockid; 41 clockid_t base_clockid;
43} alarm_bases[ALARM_NUMTYPE]; 42} alarm_bases[ALARM_NUMTYPE];
@@ -46,6 +45,8 @@ static struct alarm_base {
46static ktime_t freezer_delta; 45static ktime_t freezer_delta;
47static DEFINE_SPINLOCK(freezer_delta_lock); 46static DEFINE_SPINLOCK(freezer_delta_lock);
48 47
48static struct wakeup_source *ws;
49
49#ifdef CONFIG_RTC_CLASS 50#ifdef CONFIG_RTC_CLASS
50/* rtc timer and device for setting alarm wakeups at suspend */ 51/* rtc timer and device for setting alarm wakeups at suspend */
51static struct rtc_timer rtctimer; 52static struct rtc_timer rtctimer;
@@ -130,50 +131,35 @@ static inline void alarmtimer_rtc_timer_init(void) { }
130 * @base: pointer to the base where the timer is being run 131 * @base: pointer to the base where the timer is being run
131 * @alarm: pointer to alarm being enqueued. 132 * @alarm: pointer to alarm being enqueued.
132 * 133 *
133 * Adds alarm to a alarm_base timerqueue and if necessary sets 134 * Adds alarm to a alarm_base timerqueue
134 * an hrtimer to run.
135 * 135 *
136 * Must hold base->lock when calling. 136 * Must hold base->lock when calling.
137 */ 137 */
138static void alarmtimer_enqueue(struct alarm_base *base, struct alarm *alarm) 138static void alarmtimer_enqueue(struct alarm_base *base, struct alarm *alarm)
139{ 139{
140 if (alarm->state & ALARMTIMER_STATE_ENQUEUED)
141 timerqueue_del(&base->timerqueue, &alarm->node);
142
140 timerqueue_add(&base->timerqueue, &alarm->node); 143 timerqueue_add(&base->timerqueue, &alarm->node);
141 alarm->state |= ALARMTIMER_STATE_ENQUEUED; 144 alarm->state |= ALARMTIMER_STATE_ENQUEUED;
142
143 if (&alarm->node == timerqueue_getnext(&base->timerqueue)) {
144 hrtimer_try_to_cancel(&base->timer);
145 hrtimer_start(&base->timer, alarm->node.expires,
146 HRTIMER_MODE_ABS);
147 }
148} 145}
149 146
150/** 147/**
151 * alarmtimer_remove - Removes an alarm timer from an alarm_base timerqueue 148 * alarmtimer_dequeue - Removes an alarm timer from an alarm_base timerqueue
152 * @base: pointer to the base where the timer is running 149 * @base: pointer to the base where the timer is running
153 * @alarm: pointer to alarm being removed 150 * @alarm: pointer to alarm being removed
154 * 151 *
155 * Removes alarm to a alarm_base timerqueue and if necessary sets 152 * Removes alarm to a alarm_base timerqueue
156 * a new timer to run.
157 * 153 *
158 * Must hold base->lock when calling. 154 * Must hold base->lock when calling.
159 */ 155 */
160static void alarmtimer_remove(struct alarm_base *base, struct alarm *alarm) 156static void alarmtimer_dequeue(struct alarm_base *base, struct alarm *alarm)
161{ 157{
162 struct timerqueue_node *next = timerqueue_getnext(&base->timerqueue);
163
164 if (!(alarm->state & ALARMTIMER_STATE_ENQUEUED)) 158 if (!(alarm->state & ALARMTIMER_STATE_ENQUEUED))
165 return; 159 return;
166 160
167 timerqueue_del(&base->timerqueue, &alarm->node); 161 timerqueue_del(&base->timerqueue, &alarm->node);
168 alarm->state &= ~ALARMTIMER_STATE_ENQUEUED; 162 alarm->state &= ~ALARMTIMER_STATE_ENQUEUED;
169
170 if (next == &alarm->node) {
171 hrtimer_try_to_cancel(&base->timer);
172 next = timerqueue_getnext(&base->timerqueue);
173 if (!next)
174 return;
175 hrtimer_start(&base->timer, next->expires, HRTIMER_MODE_ABS);
176 }
177} 163}
178 164
179 165
@@ -188,42 +174,23 @@ static void alarmtimer_remove(struct alarm_base *base, struct alarm *alarm)
188 */ 174 */
189static enum hrtimer_restart alarmtimer_fired(struct hrtimer *timer) 175static enum hrtimer_restart alarmtimer_fired(struct hrtimer *timer)
190{ 176{
191 struct alarm_base *base = container_of(timer, struct alarm_base, timer); 177 struct alarm *alarm = container_of(timer, struct alarm, timer);
192 struct timerqueue_node *next; 178 struct alarm_base *base = &alarm_bases[alarm->type];
193 unsigned long flags; 179 unsigned long flags;
194 ktime_t now;
195 int ret = HRTIMER_NORESTART; 180 int ret = HRTIMER_NORESTART;
196 int restart = ALARMTIMER_NORESTART; 181 int restart = ALARMTIMER_NORESTART;
197 182
198 spin_lock_irqsave(&base->lock, flags); 183 spin_lock_irqsave(&base->lock, flags);
199 now = base->gettime(); 184 alarmtimer_dequeue(base, alarm);
200 while ((next = timerqueue_getnext(&base->timerqueue))) { 185 spin_unlock_irqrestore(&base->lock, flags);
201 struct alarm *alarm;
202 ktime_t expired = next->expires;
203
204 if (expired.tv64 > now.tv64)
205 break;
206
207 alarm = container_of(next, struct alarm, node);
208
209 timerqueue_del(&base->timerqueue, &alarm->node);
210 alarm->state &= ~ALARMTIMER_STATE_ENQUEUED;
211
212 alarm->state |= ALARMTIMER_STATE_CALLBACK;
213 spin_unlock_irqrestore(&base->lock, flags);
214 if (alarm->function)
215 restart = alarm->function(alarm, now);
216 spin_lock_irqsave(&base->lock, flags);
217 alarm->state &= ~ALARMTIMER_STATE_CALLBACK;
218 186
219 if (restart != ALARMTIMER_NORESTART) { 187 if (alarm->function)
220 timerqueue_add(&base->timerqueue, &alarm->node); 188 restart = alarm->function(alarm, base->gettime());
221 alarm->state |= ALARMTIMER_STATE_ENQUEUED;
222 }
223 }
224 189
225 if (next) { 190 spin_lock_irqsave(&base->lock, flags);
226 hrtimer_set_expires(&base->timer, next->expires); 191 if (restart != ALARMTIMER_NORESTART) {
192 hrtimer_set_expires(&alarm->timer, alarm->node.expires);
193 alarmtimer_enqueue(base, alarm);
227 ret = HRTIMER_RESTART; 194 ret = HRTIMER_RESTART;
228 } 195 }
229 spin_unlock_irqrestore(&base->lock, flags); 196 spin_unlock_irqrestore(&base->lock, flags);
@@ -250,6 +217,7 @@ static int alarmtimer_suspend(struct device *dev)
250 unsigned long flags; 217 unsigned long flags;
251 struct rtc_device *rtc; 218 struct rtc_device *rtc;
252 int i; 219 int i;
220 int ret;
253 221
254 spin_lock_irqsave(&freezer_delta_lock, flags); 222 spin_lock_irqsave(&freezer_delta_lock, flags);
255 min = freezer_delta; 223 min = freezer_delta;
@@ -279,8 +247,10 @@ static int alarmtimer_suspend(struct device *dev)
279 if (min.tv64 == 0) 247 if (min.tv64 == 0)
280 return 0; 248 return 0;
281 249
282 /* XXX - Should we enforce a minimum sleep time? */ 250 if (ktime_to_ns(min) < 2 * NSEC_PER_SEC) {
283 WARN_ON(min.tv64 < NSEC_PER_SEC); 251 __pm_wakeup_event(ws, 2 * MSEC_PER_SEC);
252 return -EBUSY;
253 }
284 254
285 /* Setup an rtc timer to fire that far in the future */ 255 /* Setup an rtc timer to fire that far in the future */
286 rtc_timer_cancel(rtc, &rtctimer); 256 rtc_timer_cancel(rtc, &rtctimer);
@@ -288,9 +258,11 @@ static int alarmtimer_suspend(struct device *dev)
288 now = rtc_tm_to_ktime(tm); 258 now = rtc_tm_to_ktime(tm);
289 now = ktime_add(now, min); 259 now = ktime_add(now, min);
290 260
291 rtc_timer_start(rtc, &rtctimer, now, ktime_set(0, 0)); 261 /* Set alarm, if in the past reject suspend briefly to handle */
292 262 ret = rtc_timer_start(rtc, &rtctimer, now, ktime_set(0, 0));
293 return 0; 263 if (ret < 0)
264 __pm_wakeup_event(ws, MSEC_PER_SEC);
265 return ret;
294} 266}
295#else 267#else
296static int alarmtimer_suspend(struct device *dev) 268static int alarmtimer_suspend(struct device *dev)
@@ -324,6 +296,9 @@ void alarm_init(struct alarm *alarm, enum alarmtimer_type type,
324 enum alarmtimer_restart (*function)(struct alarm *, ktime_t)) 296 enum alarmtimer_restart (*function)(struct alarm *, ktime_t))
325{ 297{
326 timerqueue_init(&alarm->node); 298 timerqueue_init(&alarm->node);
299 hrtimer_init(&alarm->timer, alarm_bases[type].base_clockid,
300 HRTIMER_MODE_ABS);
301 alarm->timer.function = alarmtimer_fired;
327 alarm->function = function; 302 alarm->function = function;
328 alarm->type = type; 303 alarm->type = type;
329 alarm->state = ALARMTIMER_STATE_INACTIVE; 304 alarm->state = ALARMTIMER_STATE_INACTIVE;
@@ -334,17 +309,19 @@ void alarm_init(struct alarm *alarm, enum alarmtimer_type type,
334 * @alarm: ptr to alarm to set 309 * @alarm: ptr to alarm to set
335 * @start: time to run the alarm 310 * @start: time to run the alarm
336 */ 311 */
337void alarm_start(struct alarm *alarm, ktime_t start) 312int alarm_start(struct alarm *alarm, ktime_t start)
338{ 313{
339 struct alarm_base *base = &alarm_bases[alarm->type]; 314 struct alarm_base *base = &alarm_bases[alarm->type];
340 unsigned long flags; 315 unsigned long flags;
316 int ret;
341 317
342 spin_lock_irqsave(&base->lock, flags); 318 spin_lock_irqsave(&base->lock, flags);
343 if (alarmtimer_active(alarm))
344 alarmtimer_remove(base, alarm);
345 alarm->node.expires = start; 319 alarm->node.expires = start;
346 alarmtimer_enqueue(base, alarm); 320 alarmtimer_enqueue(base, alarm);
321 ret = hrtimer_start(&alarm->timer, alarm->node.expires,
322 HRTIMER_MODE_ABS);
347 spin_unlock_irqrestore(&base->lock, flags); 323 spin_unlock_irqrestore(&base->lock, flags);
324 return ret;
348} 325}
349 326
350/** 327/**
@@ -358,18 +335,12 @@ int alarm_try_to_cancel(struct alarm *alarm)
358{ 335{
359 struct alarm_base *base = &alarm_bases[alarm->type]; 336 struct alarm_base *base = &alarm_bases[alarm->type];
360 unsigned long flags; 337 unsigned long flags;
361 int ret = -1; 338 int ret;
362 spin_lock_irqsave(&base->lock, flags);
363
364 if (alarmtimer_callback_running(alarm))
365 goto out;
366 339
367 if (alarmtimer_is_queued(alarm)) { 340 spin_lock_irqsave(&base->lock, flags);
368 alarmtimer_remove(base, alarm); 341 ret = hrtimer_try_to_cancel(&alarm->timer);
369 ret = 1; 342 if (ret >= 0)
370 } else 343 alarmtimer_dequeue(base, alarm);
371 ret = 0;
372out:
373 spin_unlock_irqrestore(&base->lock, flags); 344 spin_unlock_irqrestore(&base->lock, flags);
374 return ret; 345 return ret;
375} 346}
@@ -802,10 +773,6 @@ static int __init alarmtimer_init(void)
802 for (i = 0; i < ALARM_NUMTYPE; i++) { 773 for (i = 0; i < ALARM_NUMTYPE; i++) {
803 timerqueue_init_head(&alarm_bases[i].timerqueue); 774 timerqueue_init_head(&alarm_bases[i].timerqueue);
804 spin_lock_init(&alarm_bases[i].lock); 775 spin_lock_init(&alarm_bases[i].lock);
805 hrtimer_init(&alarm_bases[i].timer,
806 alarm_bases[i].base_clockid,
807 HRTIMER_MODE_ABS);
808 alarm_bases[i].timer.function = alarmtimer_fired;
809 } 776 }
810 777
811 error = alarmtimer_rtc_interface_setup(); 778 error = alarmtimer_rtc_interface_setup();
@@ -821,6 +788,7 @@ static int __init alarmtimer_init(void)
821 error = PTR_ERR(pdev); 788 error = PTR_ERR(pdev);
822 goto out_drv; 789 goto out_drv;
823 } 790 }
791 ws = wakeup_source_register("alarmtimer");
824 return 0; 792 return 0;
825 793
826out_drv: 794out_drv:
diff --git a/kernel/time/jiffies.c b/kernel/time/jiffies.c
index 46da0537c10b..6629bf7b5285 100644
--- a/kernel/time/jiffies.c
+++ b/kernel/time/jiffies.c
@@ -37,7 +37,7 @@
37 * requested HZ value. It is also not recommended 37 * requested HZ value. It is also not recommended
38 * for "tick-less" systems. 38 * for "tick-less" systems.
39 */ 39 */
40#define NSEC_PER_JIFFY ((u32)((((u64)NSEC_PER_SEC)<<8)/SHIFTED_HZ)) 40#define NSEC_PER_JIFFY ((NSEC_PER_SEC+HZ/2)/HZ)
41 41
42/* Since jiffies uses a simple NSEC_PER_JIFFY multiplier 42/* Since jiffies uses a simple NSEC_PER_JIFFY multiplier
43 * conversion, the .shift value could be zero. However 43 * conversion, the .shift value could be zero. However
@@ -95,3 +95,33 @@ struct clocksource * __init __weak clocksource_default_clock(void)
95{ 95{
96 return &clocksource_jiffies; 96 return &clocksource_jiffies;
97} 97}
98
99struct clocksource refined_jiffies;
100
101int register_refined_jiffies(long cycles_per_second)
102{
103 u64 nsec_per_tick, shift_hz;
104 long cycles_per_tick;
105
106
107
108 refined_jiffies = clocksource_jiffies;
109 refined_jiffies.name = "refined-jiffies";
110 refined_jiffies.rating++;
111
112 /* Calc cycles per tick */
113 cycles_per_tick = (cycles_per_second + HZ/2)/HZ;
114 /* shift_hz stores hz<<8 for extra accuracy */
115 shift_hz = (u64)cycles_per_second << 8;
116 shift_hz += cycles_per_tick/2;
117 do_div(shift_hz, cycles_per_tick);
118 /* Calculate nsec_per_tick using shift_hz */
119 nsec_per_tick = (u64)NSEC_PER_SEC << 8;
120 nsec_per_tick += (u32)shift_hz/2;
121 do_div(nsec_per_tick, (u32)shift_hz);
122
123 refined_jiffies.mult = ((u32)nsec_per_tick) << JIFFIES_SHIFT;
124
125 clocksource_register(&refined_jiffies);
126 return 0;
127}
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index f423bdd035c2..a40260885265 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -835,7 +835,7 @@ static enum hrtimer_restart tick_sched_timer(struct hrtimer *timer)
835 */ 835 */
836 if (ts->tick_stopped) { 836 if (ts->tick_stopped) {
837 touch_softlockup_watchdog(); 837 touch_softlockup_watchdog();
838 if (idle_cpu(cpu)) 838 if (is_idle_task(current))
839 ts->idle_jiffies++; 839 ts->idle_jiffies++;
840 } 840 }
841 update_process_times(user_mode(regs)); 841 update_process_times(user_mode(regs));
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 5ce06a3fa91e..e424970bb562 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -8,6 +8,7 @@
8 * 8 *
9 */ 9 */
10 10
11#include <linux/timekeeper_internal.h>
11#include <linux/module.h> 12#include <linux/module.h>
12#include <linux/interrupt.h> 13#include <linux/interrupt.h>
13#include <linux/percpu.h> 14#include <linux/percpu.h>
@@ -21,61 +22,6 @@
21#include <linux/tick.h> 22#include <linux/tick.h>
22#include <linux/stop_machine.h> 23#include <linux/stop_machine.h>
23 24
24/* Structure holding internal timekeeping values. */
25struct timekeeper {
26 /* Current clocksource used for timekeeping. */
27 struct clocksource *clock;
28 /* NTP adjusted clock multiplier */
29 u32 mult;
30 /* The shift value of the current clocksource. */
31 u32 shift;
32 /* Number of clock cycles in one NTP interval. */
33 cycle_t cycle_interval;
34 /* Number of clock shifted nano seconds in one NTP interval. */
35 u64 xtime_interval;
36 /* shifted nano seconds left over when rounding cycle_interval */
37 s64 xtime_remainder;
38 /* Raw nano seconds accumulated per NTP interval. */
39 u32 raw_interval;
40
41 /* Current CLOCK_REALTIME time in seconds */
42 u64 xtime_sec;
43 /* Clock shifted nano seconds */
44 u64 xtime_nsec;
45
46 /* Difference between accumulated time and NTP time in ntp
47 * shifted nano seconds. */
48 s64 ntp_error;
49 /* Shift conversion between clock shifted nano seconds and
50 * ntp shifted nano seconds. */
51 u32 ntp_error_shift;
52
53 /*
54 * wall_to_monotonic is what we need to add to xtime (or xtime corrected
55 * for sub jiffie times) to get to monotonic time. Monotonic is pegged
56 * at zero at system boot time, so wall_to_monotonic will be negative,
57 * however, we will ALWAYS keep the tv_nsec part positive so we can use
58 * the usual normalization.
59 *
60 * wall_to_monotonic is moved after resume from suspend for the
61 * monotonic time not to jump. We need to add total_sleep_time to
62 * wall_to_monotonic to get the real boot based time offset.
63 *
64 * - wall_to_monotonic is no longer the boot time, getboottime must be
65 * used instead.
66 */
67 struct timespec wall_to_monotonic;
68 /* Offset clock monotonic -> clock realtime */
69 ktime_t offs_real;
70 /* time spent in suspend */
71 struct timespec total_sleep_time;
72 /* Offset clock monotonic -> clock boottime */
73 ktime_t offs_boot;
74 /* The raw monotonic time for the CLOCK_MONOTONIC_RAW posix clock. */
75 struct timespec raw_time;
76 /* Seqlock for all timekeeper values */
77 seqlock_t lock;
78};
79 25
80static struct timekeeper timekeeper; 26static struct timekeeper timekeeper;
81 27
@@ -96,15 +42,6 @@ static inline void tk_normalize_xtime(struct timekeeper *tk)
96 } 42 }
97} 43}
98 44
99static struct timespec tk_xtime(struct timekeeper *tk)
100{
101 struct timespec ts;
102
103 ts.tv_sec = tk->xtime_sec;
104 ts.tv_nsec = (long)(tk->xtime_nsec >> tk->shift);
105 return ts;
106}
107
108static void tk_set_xtime(struct timekeeper *tk, const struct timespec *ts) 45static void tk_set_xtime(struct timekeeper *tk, const struct timespec *ts)
109{ 46{
110 tk->xtime_sec = ts->tv_sec; 47 tk->xtime_sec = ts->tv_sec;
@@ -246,14 +183,11 @@ static inline s64 timekeeping_get_ns_raw(struct timekeeper *tk)
246/* must hold write on timekeeper.lock */ 183/* must hold write on timekeeper.lock */
247static void timekeeping_update(struct timekeeper *tk, bool clearntp) 184static void timekeeping_update(struct timekeeper *tk, bool clearntp)
248{ 185{
249 struct timespec xt;
250
251 if (clearntp) { 186 if (clearntp) {
252 tk->ntp_error = 0; 187 tk->ntp_error = 0;
253 ntp_clear(); 188 ntp_clear();
254 } 189 }
255 xt = tk_xtime(tk); 190 update_vsyscall(tk);
256 update_vsyscall(&xt, &tk->wall_to_monotonic, tk->clock, tk->mult);
257} 191}
258 192
259/** 193/**
@@ -1113,7 +1047,7 @@ static cycle_t logarithmic_accumulation(struct timekeeper *tk, cycle_t offset,
1113 accumulate_nsecs_to_secs(tk); 1047 accumulate_nsecs_to_secs(tk);
1114 1048
1115 /* Accumulate raw time */ 1049 /* Accumulate raw time */
1116 raw_nsecs = tk->raw_interval << shift; 1050 raw_nsecs = (u64)tk->raw_interval << shift;
1117 raw_nsecs += tk->raw_time.tv_nsec; 1051 raw_nsecs += tk->raw_time.tv_nsec;
1118 if (raw_nsecs >= NSEC_PER_SEC) { 1052 if (raw_nsecs >= NSEC_PER_SEC) {
1119 u64 raw_secs = raw_nsecs; 1053 u64 raw_secs = raw_nsecs;
@@ -1130,6 +1064,33 @@ static cycle_t logarithmic_accumulation(struct timekeeper *tk, cycle_t offset,
1130 return offset; 1064 return offset;
1131} 1065}
1132 1066
1067#ifdef CONFIG_GENERIC_TIME_VSYSCALL_OLD
1068static inline void old_vsyscall_fixup(struct timekeeper *tk)
1069{
1070 s64 remainder;
1071
1072 /*
1073 * Store only full nanoseconds into xtime_nsec after rounding
1074 * it up and add the remainder to the error difference.
1075 * XXX - This is necessary to avoid small 1ns inconsistnecies caused
1076 * by truncating the remainder in vsyscalls. However, it causes
1077 * additional work to be done in timekeeping_adjust(). Once
1078 * the vsyscall implementations are converted to use xtime_nsec
1079 * (shifted nanoseconds), and CONFIG_GENERIC_TIME_VSYSCALL_OLD
1080 * users are removed, this can be killed.
1081 */
1082 remainder = tk->xtime_nsec & ((1ULL << tk->shift) - 1);
1083 tk->xtime_nsec -= remainder;
1084 tk->xtime_nsec += 1ULL << tk->shift;
1085 tk->ntp_error += remainder << tk->ntp_error_shift;
1086
1087}
1088#else
1089#define old_vsyscall_fixup(tk)
1090#endif
1091
1092
1093
1133/** 1094/**
1134 * update_wall_time - Uses the current clocksource to increment the wall time 1095 * update_wall_time - Uses the current clocksource to increment the wall time
1135 * 1096 *
@@ -1141,7 +1102,6 @@ static void update_wall_time(void)
1141 cycle_t offset; 1102 cycle_t offset;
1142 int shift = 0, maxshift; 1103 int shift = 0, maxshift;
1143 unsigned long flags; 1104 unsigned long flags;
1144 s64 remainder;
1145 1105
1146 write_seqlock_irqsave(&tk->lock, flags); 1106 write_seqlock_irqsave(&tk->lock, flags);
1147 1107
@@ -1183,20 +1143,11 @@ static void update_wall_time(void)
1183 /* correct the clock when NTP error is too big */ 1143 /* correct the clock when NTP error is too big */
1184 timekeeping_adjust(tk, offset); 1144 timekeeping_adjust(tk, offset);
1185 1145
1186
1187 /* 1146 /*
1188 * Store only full nanoseconds into xtime_nsec after rounding 1147 * XXX This can be killed once everyone converts
1189 * it up and add the remainder to the error difference. 1148 * to the new update_vsyscall.
1190 * XXX - This is necessary to avoid small 1ns inconsistnecies caused 1149 */
1191 * by truncating the remainder in vsyscalls. However, it causes 1150 old_vsyscall_fixup(tk);
1192 * additional work to be done in timekeeping_adjust(). Once
1193 * the vsyscall implementations are converted to use xtime_nsec
1194 * (shifted nanoseconds), this can be killed.
1195 */
1196 remainder = tk->xtime_nsec & ((1ULL << tk->shift) - 1);
1197 tk->xtime_nsec -= remainder;
1198 tk->xtime_nsec += 1ULL << tk->shift;
1199 tk->ntp_error += remainder << tk->ntp_error_shift;
1200 1151
1201 /* 1152 /*
1202 * Finally, make sure that after the rounding 1153 * Finally, make sure that after the rounding
diff --git a/kernel/timer.c b/kernel/timer.c
index d5de1b2292aa..367d00858482 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -63,6 +63,7 @@ EXPORT_SYMBOL(jiffies_64);
63#define TVR_SIZE (1 << TVR_BITS) 63#define TVR_SIZE (1 << TVR_BITS)
64#define TVN_MASK (TVN_SIZE - 1) 64#define TVN_MASK (TVN_SIZE - 1)
65#define TVR_MASK (TVR_SIZE - 1) 65#define TVR_MASK (TVR_SIZE - 1)
66#define MAX_TVAL ((unsigned long)((1ULL << (TVR_BITS + 4*TVN_BITS)) - 1))
66 67
67struct tvec { 68struct tvec {
68 struct list_head vec[TVN_SIZE]; 69 struct list_head vec[TVN_SIZE];
@@ -359,11 +360,12 @@ __internal_add_timer(struct tvec_base *base, struct timer_list *timer)
359 vec = base->tv1.vec + (base->timer_jiffies & TVR_MASK); 360 vec = base->tv1.vec + (base->timer_jiffies & TVR_MASK);
360 } else { 361 } else {
361 int i; 362 int i;
362 /* If the timeout is larger than 0xffffffff on 64-bit 363 /* If the timeout is larger than MAX_TVAL (on 64-bit
363 * architectures then we use the maximum timeout: 364 * architectures or with CONFIG_BASE_SMALL=1) then we
365 * use the maximum timeout.
364 */ 366 */
365 if (idx > 0xffffffffUL) { 367 if (idx > MAX_TVAL) {
366 idx = 0xffffffffUL; 368 idx = MAX_TVAL;
367 expires = idx + base->timer_jiffies; 369 expires = idx + base->timer_jiffies;
368 } 370 }
369 i = (expires >> (TVR_BITS + 3 * TVN_BITS)) & TVN_MASK; 371 i = (expires >> (TVR_BITS + 3 * TVN_BITS)) & TVN_MASK;