aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/acct.c6
-rw-r--r--kernel/audit.c2
-rw-r--r--kernel/audit.h7
-rw-r--r--kernel/audit_watch.c3
-rw-r--r--kernel/auditfilter.c65
-rw-r--r--kernel/auditsc.c217
-rw-r--r--kernel/debug/debug_core.c18
-rw-r--r--kernel/debug/kdb/kdb_bt.c2
-rw-r--r--kernel/debug/kdb/kdb_io.c33
-rw-r--r--kernel/debug/kdb/kdb_main.c2
-rw-r--r--kernel/events/core.c21
-rw-r--r--kernel/fork.c13
-rw-r--r--kernel/irq/irqdomain.c33
-rw-r--r--kernel/kmod.c7
-rw-r--r--kernel/kthread.c1
-rw-r--r--kernel/rcutree.c21
-rw-r--r--kernel/rcutree.h6
-rw-r--r--kernel/sched/core.c71
-rw-r--r--kernel/time.c2
-rw-r--r--kernel/time/Kconfig4
-rw-r--r--kernel/time/alarmtimer.c118
-rw-r--r--kernel/time/jiffies.c32
-rw-r--r--kernel/time/tick-sched.c2
-rw-r--r--kernel/time/timekeeping.c117
-rw-r--r--kernel/timer.c10
25 files changed, 515 insertions, 298 deletions
diff --git a/kernel/acct.c b/kernel/acct.c
index 6cd7529c9e6a..051e071a06e7 100644
--- a/kernel/acct.c
+++ b/kernel/acct.c
@@ -193,7 +193,7 @@ static void acct_file_reopen(struct bsd_acct_struct *acct, struct file *file,
193 } 193 }
194} 194}
195 195
196static int acct_on(char *name) 196static int acct_on(struct filename *pathname)
197{ 197{
198 struct file *file; 198 struct file *file;
199 struct vfsmount *mnt; 199 struct vfsmount *mnt;
@@ -201,7 +201,7 @@ static int acct_on(char *name)
201 struct bsd_acct_struct *acct = NULL; 201 struct bsd_acct_struct *acct = NULL;
202 202
203 /* Difference from BSD - they don't do O_APPEND */ 203 /* Difference from BSD - they don't do O_APPEND */
204 file = filp_open(name, O_WRONLY|O_APPEND|O_LARGEFILE, 0); 204 file = file_open_name(pathname, O_WRONLY|O_APPEND|O_LARGEFILE, 0);
205 if (IS_ERR(file)) 205 if (IS_ERR(file))
206 return PTR_ERR(file); 206 return PTR_ERR(file);
207 207
@@ -260,7 +260,7 @@ SYSCALL_DEFINE1(acct, const char __user *, name)
260 return -EPERM; 260 return -EPERM;
261 261
262 if (name) { 262 if (name) {
263 char *tmp = getname(name); 263 struct filename *tmp = getname(name);
264 if (IS_ERR(tmp)) 264 if (IS_ERR(tmp))
265 return (PTR_ERR(tmp)); 265 return (PTR_ERR(tmp));
266 error = acct_on(tmp); 266 error = acct_on(tmp);
diff --git a/kernel/audit.c b/kernel/audit.c
index 4d0ceede3319..40414e9143db 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -1440,6 +1440,8 @@ void audit_log_link_denied(const char *operation, struct path *link)
1440 1440
1441 ab = audit_log_start(current->audit_context, GFP_KERNEL, 1441 ab = audit_log_start(current->audit_context, GFP_KERNEL,
1442 AUDIT_ANOM_LINK); 1442 AUDIT_ANOM_LINK);
1443 if (!ab)
1444 return;
1443 audit_log_format(ab, "op=%s action=denied", operation); 1445 audit_log_format(ab, "op=%s action=denied", operation);
1444 audit_log_format(ab, " pid=%d comm=", current->pid); 1446 audit_log_format(ab, " pid=%d comm=", current->pid);
1445 audit_log_untrustedstring(ab, current->comm); 1447 audit_log_untrustedstring(ab, current->comm);
diff --git a/kernel/audit.h b/kernel/audit.h
index 9eb3d79482b6..d51cba868e1b 100644
--- a/kernel/audit.h
+++ b/kernel/audit.h
@@ -74,12 +74,15 @@ static inline int audit_hash_ino(u32 ino)
74 return (ino & (AUDIT_INODE_BUCKETS-1)); 74 return (ino & (AUDIT_INODE_BUCKETS-1));
75} 75}
76 76
77/* Indicates that audit should log the full pathname. */
78#define AUDIT_NAME_FULL -1
79
77extern int audit_match_class(int class, unsigned syscall); 80extern int audit_match_class(int class, unsigned syscall);
78extern int audit_comparator(const u32 left, const u32 op, const u32 right); 81extern int audit_comparator(const u32 left, const u32 op, const u32 right);
79extern int audit_uid_comparator(kuid_t left, u32 op, kuid_t right); 82extern int audit_uid_comparator(kuid_t left, u32 op, kuid_t right);
80extern int audit_gid_comparator(kgid_t left, u32 op, kgid_t right); 83extern int audit_gid_comparator(kgid_t left, u32 op, kgid_t right);
81extern int audit_compare_dname_path(const char *dname, const char *path, 84extern int parent_len(const char *path);
82 int *dirlen); 85extern int audit_compare_dname_path(const char *dname, const char *path, int plen);
83extern struct sk_buff * audit_make_reply(int pid, int seq, int type, 86extern struct sk_buff * audit_make_reply(int pid, int seq, int type,
84 int done, int multi, 87 int done, int multi,
85 const void *payload, int size); 88 const void *payload, int size);
diff --git a/kernel/audit_watch.c b/kernel/audit_watch.c
index 1c22ec3d87bc..9a9ae6e3d290 100644
--- a/kernel/audit_watch.c
+++ b/kernel/audit_watch.c
@@ -265,7 +265,8 @@ static void audit_update_watch(struct audit_parent *parent,
265 /* Run all of the watches on this parent looking for the one that 265 /* Run all of the watches on this parent looking for the one that
266 * matches the given dname */ 266 * matches the given dname */
267 list_for_each_entry_safe(owatch, nextw, &parent->watches, wlist) { 267 list_for_each_entry_safe(owatch, nextw, &parent->watches, wlist) {
268 if (audit_compare_dname_path(dname, owatch->path, NULL)) 268 if (audit_compare_dname_path(dname, owatch->path,
269 AUDIT_NAME_FULL))
269 continue; 270 continue;
270 271
271 /* If the update involves invalidating rules, do the inode-based 272 /* If the update involves invalidating rules, do the inode-based
diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c
index c4bcdbaf4d4d..7f19f23d38a3 100644
--- a/kernel/auditfilter.c
+++ b/kernel/auditfilter.c
@@ -1298,41 +1298,60 @@ int audit_gid_comparator(kgid_t left, u32 op, kgid_t right)
1298 } 1298 }
1299} 1299}
1300 1300
1301/* Compare given dentry name with last component in given path, 1301/**
1302 * return of 0 indicates a match. */ 1302 * parent_len - find the length of the parent portion of a pathname
1303int audit_compare_dname_path(const char *dname, const char *path, 1303 * @path: pathname of which to determine length
1304 int *dirlen) 1304 */
1305int parent_len(const char *path)
1305{ 1306{
1306 int dlen, plen; 1307 int plen;
1307 const char *p; 1308 const char *p;
1308 1309
1309 if (!dname || !path)
1310 return 1;
1311
1312 dlen = strlen(dname);
1313 plen = strlen(path); 1310 plen = strlen(path);
1314 if (plen < dlen) 1311
1315 return 1; 1312 if (plen == 0)
1313 return plen;
1316 1314
1317 /* disregard trailing slashes */ 1315 /* disregard trailing slashes */
1318 p = path + plen - 1; 1316 p = path + plen - 1;
1319 while ((*p == '/') && (p > path)) 1317 while ((*p == '/') && (p > path))
1320 p--; 1318 p--;
1321 1319
1322 /* find last path component */ 1320 /* walk backward until we find the next slash or hit beginning */
1323 p = p - dlen + 1; 1321 while ((*p != '/') && (p > path))
1324 if (p < path) 1322 p--;
1323
1324 /* did we find a slash? Then increment to include it in path */
1325 if (*p == '/')
1326 p++;
1327
1328 return p - path;
1329}
1330
1331/**
1332 * audit_compare_dname_path - compare given dentry name with last component in
1333 * given path. Return of 0 indicates a match.
1334 * @dname: dentry name that we're comparing
1335 * @path: full pathname that we're comparing
1336 * @parentlen: length of the parent if known. Passing in AUDIT_NAME_FULL
1337 * here indicates that we must compute this value.
1338 */
1339int audit_compare_dname_path(const char *dname, const char *path, int parentlen)
1340{
1341 int dlen, pathlen;
1342 const char *p;
1343
1344 dlen = strlen(dname);
1345 pathlen = strlen(path);
1346 if (pathlen < dlen)
1325 return 1; 1347 return 1;
1326 else if (p > path) {
1327 if (*--p != '/')
1328 return 1;
1329 else
1330 p++;
1331 }
1332 1348
1333 /* return length of path's directory component */ 1349 parentlen = parentlen == AUDIT_NAME_FULL ? parent_len(path) : parentlen;
1334 if (dirlen) 1350 if (pathlen - parentlen != dlen)
1335 *dirlen = p - path; 1351 return 1;
1352
1353 p = path + parentlen;
1354
1336 return strncmp(p, dname, dlen); 1355 return strncmp(p, dname, dlen);
1337} 1356}
1338 1357
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index f4a7756f999c..2f186ed80c40 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -81,9 +81,6 @@
81 * a name dynamically and also add those to the list anchored by names_list. */ 81 * a name dynamically and also add those to the list anchored by names_list. */
82#define AUDIT_NAMES 5 82#define AUDIT_NAMES 5
83 83
84/* Indicates that audit should log the full pathname. */
85#define AUDIT_NAME_FULL -1
86
87/* no execve audit message should be longer than this (userspace limits) */ 84/* no execve audit message should be longer than this (userspace limits) */
88#define MAX_EXECVE_AUDIT_LEN 7500 85#define MAX_EXECVE_AUDIT_LEN 7500
89 86
@@ -106,27 +103,29 @@ struct audit_cap_data {
106 * we don't let putname() free it (instead we free all of the saved 103 * we don't let putname() free it (instead we free all of the saved
107 * pointers at syscall exit time). 104 * pointers at syscall exit time).
108 * 105 *
109 * Further, in fs/namei.c:path_lookup() we store the inode and device. */ 106 * Further, in fs/namei.c:path_lookup() we store the inode and device.
107 */
110struct audit_names { 108struct audit_names {
111 struct list_head list; /* audit_context->names_list */ 109 struct list_head list; /* audit_context->names_list */
112 const char *name; 110 struct filename *name;
113 unsigned long ino; 111 unsigned long ino;
114 dev_t dev; 112 dev_t dev;
115 umode_t mode; 113 umode_t mode;
116 kuid_t uid; 114 kuid_t uid;
117 kgid_t gid; 115 kgid_t gid;
118 dev_t rdev; 116 dev_t rdev;
119 u32 osid; 117 u32 osid;
120 struct audit_cap_data fcap; 118 struct audit_cap_data fcap;
121 unsigned int fcap_ver; 119 unsigned int fcap_ver;
122 int name_len; /* number of name's characters to log */ 120 int name_len; /* number of name's characters to log */
123 bool name_put; /* call __putname() for this name */ 121 unsigned char type; /* record type */
122 bool name_put; /* call __putname() for this name */
124 /* 123 /*
125 * This was an allocated audit_names and not from the array of 124 * This was an allocated audit_names and not from the array of
126 * names allocated in the task audit context. Thus this name 125 * names allocated in the task audit context. Thus this name
127 * should be freed on syscall exit 126 * should be freed on syscall exit
128 */ 127 */
129 bool should_free; 128 bool should_free;
130}; 129};
131 130
132struct audit_aux_data { 131struct audit_aux_data {
@@ -998,7 +997,7 @@ static inline void audit_free_names(struct audit_context *context)
998 context->ino_count); 997 context->ino_count);
999 list_for_each_entry(n, &context->names_list, list) { 998 list_for_each_entry(n, &context->names_list, list) {
1000 printk(KERN_ERR "names[%d] = %p = %s\n", i, 999 printk(KERN_ERR "names[%d] = %p = %s\n", i,
1001 n->name, n->name ?: "(null)"); 1000 n->name, n->name->name ?: "(null)");
1002 } 1001 }
1003 dump_stack(); 1002 dump_stack();
1004 return; 1003 return;
@@ -1555,7 +1554,7 @@ static void audit_log_name(struct audit_context *context, struct audit_names *n,
1555 case AUDIT_NAME_FULL: 1554 case AUDIT_NAME_FULL:
1556 /* log the full path */ 1555 /* log the full path */
1557 audit_log_format(ab, " name="); 1556 audit_log_format(ab, " name=");
1558 audit_log_untrustedstring(ab, n->name); 1557 audit_log_untrustedstring(ab, n->name->name);
1559 break; 1558 break;
1560 case 0: 1559 case 0:
1561 /* name was specified as a relative path and the 1560 /* name was specified as a relative path and the
@@ -1565,7 +1564,7 @@ static void audit_log_name(struct audit_context *context, struct audit_names *n,
1565 default: 1564 default:
1566 /* log the name's directory component */ 1565 /* log the name's directory component */
1567 audit_log_format(ab, " name="); 1566 audit_log_format(ab, " name=");
1568 audit_log_n_untrustedstring(ab, n->name, 1567 audit_log_n_untrustedstring(ab, n->name->name,
1569 n->name_len); 1568 n->name_len);
1570 } 1569 }
1571 } else 1570 } else
@@ -1995,7 +1994,8 @@ retry:
1995#endif 1994#endif
1996} 1995}
1997 1996
1998static struct audit_names *audit_alloc_name(struct audit_context *context) 1997static struct audit_names *audit_alloc_name(struct audit_context *context,
1998 unsigned char type)
1999{ 1999{
2000 struct audit_names *aname; 2000 struct audit_names *aname;
2001 2001
@@ -2010,6 +2010,7 @@ static struct audit_names *audit_alloc_name(struct audit_context *context)
2010 } 2010 }
2011 2011
2012 aname->ino = (unsigned long)-1; 2012 aname->ino = (unsigned long)-1;
2013 aname->type = type;
2013 list_add_tail(&aname->list, &context->names_list); 2014 list_add_tail(&aname->list, &context->names_list);
2014 2015
2015 context->name_count++; 2016 context->name_count++;
@@ -2020,13 +2021,36 @@ static struct audit_names *audit_alloc_name(struct audit_context *context)
2020} 2021}
2021 2022
2022/** 2023/**
2024 * audit_reusename - fill out filename with info from existing entry
2025 * @uptr: userland ptr to pathname
2026 *
2027 * Search the audit_names list for the current audit context. If there is an
2028 * existing entry with a matching "uptr" then return the filename
2029 * associated with that audit_name. If not, return NULL.
2030 */
2031struct filename *
2032__audit_reusename(const __user char *uptr)
2033{
2034 struct audit_context *context = current->audit_context;
2035 struct audit_names *n;
2036
2037 list_for_each_entry(n, &context->names_list, list) {
2038 if (!n->name)
2039 continue;
2040 if (n->name->uptr == uptr)
2041 return n->name;
2042 }
2043 return NULL;
2044}
2045
2046/**
2023 * audit_getname - add a name to the list 2047 * audit_getname - add a name to the list
2024 * @name: name to add 2048 * @name: name to add
2025 * 2049 *
2026 * Add a name to the list of audit names for this context. 2050 * Add a name to the list of audit names for this context.
2027 * Called from fs/namei.c:getname(). 2051 * Called from fs/namei.c:getname().
2028 */ 2052 */
2029void __audit_getname(const char *name) 2053void __audit_getname(struct filename *name)
2030{ 2054{
2031 struct audit_context *context = current->audit_context; 2055 struct audit_context *context = current->audit_context;
2032 struct audit_names *n; 2056 struct audit_names *n;
@@ -2040,13 +2064,19 @@ void __audit_getname(const char *name)
2040 return; 2064 return;
2041 } 2065 }
2042 2066
2043 n = audit_alloc_name(context); 2067#if AUDIT_DEBUG
2068 /* The filename _must_ have a populated ->name */
2069 BUG_ON(!name->name);
2070#endif
2071
2072 n = audit_alloc_name(context, AUDIT_TYPE_UNKNOWN);
2044 if (!n) 2073 if (!n)
2045 return; 2074 return;
2046 2075
2047 n->name = name; 2076 n->name = name;
2048 n->name_len = AUDIT_NAME_FULL; 2077 n->name_len = AUDIT_NAME_FULL;
2049 n->name_put = true; 2078 n->name_put = true;
2079 name->aname = n;
2050 2080
2051 if (!context->pwd.dentry) 2081 if (!context->pwd.dentry)
2052 get_fs_pwd(current->fs, &context->pwd); 2082 get_fs_pwd(current->fs, &context->pwd);
@@ -2059,7 +2089,7 @@ void __audit_getname(const char *name)
2059 * then we delay the putname until syscall exit. 2089 * then we delay the putname until syscall exit.
2060 * Called from include/linux/fs.h:putname(). 2090 * Called from include/linux/fs.h:putname().
2061 */ 2091 */
2062void audit_putname(const char *name) 2092void audit_putname(struct filename *name)
2063{ 2093{
2064 struct audit_context *context = current->audit_context; 2094 struct audit_context *context = current->audit_context;
2065 2095
@@ -2074,7 +2104,7 @@ void audit_putname(const char *name)
2074 2104
2075 list_for_each_entry(n, &context->names_list, list) 2105 list_for_each_entry(n, &context->names_list, list)
2076 printk(KERN_ERR "name[%d] = %p = %s\n", i, 2106 printk(KERN_ERR "name[%d] = %p = %s\n", i,
2077 n->name, n->name ?: "(null)"); 2107 n->name, n->name->name ?: "(null)");
2078 } 2108 }
2079#endif 2109#endif
2080 __putname(name); 2110 __putname(name);
@@ -2088,8 +2118,8 @@ void audit_putname(const char *name)
2088 " put_count=%d\n", 2118 " put_count=%d\n",
2089 __FILE__, __LINE__, 2119 __FILE__, __LINE__,
2090 context->serial, context->major, 2120 context->serial, context->major,
2091 context->in_syscall, name, context->name_count, 2121 context->in_syscall, name->name,
2092 context->put_count); 2122 context->name_count, context->put_count);
2093 dump_stack(); 2123 dump_stack();
2094 } 2124 }
2095 } 2125 }
@@ -2132,13 +2162,13 @@ static void audit_copy_inode(struct audit_names *name, const struct dentry *dent
2132} 2162}
2133 2163
2134/** 2164/**
2135 * audit_inode - store the inode and device from a lookup 2165 * __audit_inode - store the inode and device from a lookup
2136 * @name: name being audited 2166 * @name: name being audited
2137 * @dentry: dentry being audited 2167 * @dentry: dentry being audited
2138 * 2168 * @parent: does this dentry represent the parent?
2139 * Called from fs/namei.c:path_lookup().
2140 */ 2169 */
2141void __audit_inode(const char *name, const struct dentry *dentry) 2170void __audit_inode(struct filename *name, const struct dentry *dentry,
2171 unsigned int parent)
2142{ 2172{
2143 struct audit_context *context = current->audit_context; 2173 struct audit_context *context = current->audit_context;
2144 const struct inode *inode = dentry->d_inode; 2174 const struct inode *inode = dentry->d_inode;
@@ -2147,24 +2177,69 @@ void __audit_inode(const char *name, const struct dentry *dentry)
2147 if (!context->in_syscall) 2177 if (!context->in_syscall)
2148 return; 2178 return;
2149 2179
2180 if (!name)
2181 goto out_alloc;
2182
2183#if AUDIT_DEBUG
2184 /* The struct filename _must_ have a populated ->name */
2185 BUG_ON(!name->name);
2186#endif
2187 /*
2188 * If we have a pointer to an audit_names entry already, then we can
2189 * just use it directly if the type is correct.
2190 */
2191 n = name->aname;
2192 if (n) {
2193 if (parent) {
2194 if (n->type == AUDIT_TYPE_PARENT ||
2195 n->type == AUDIT_TYPE_UNKNOWN)
2196 goto out;
2197 } else {
2198 if (n->type != AUDIT_TYPE_PARENT)
2199 goto out;
2200 }
2201 }
2202
2150 list_for_each_entry_reverse(n, &context->names_list, list) { 2203 list_for_each_entry_reverse(n, &context->names_list, list) {
2151 if (n->name && (n->name == name)) 2204 /* does the name pointer match? */
2152 goto out; 2205 if (!n->name || n->name->name != name->name)
2206 continue;
2207
2208 /* match the correct record type */
2209 if (parent) {
2210 if (n->type == AUDIT_TYPE_PARENT ||
2211 n->type == AUDIT_TYPE_UNKNOWN)
2212 goto out;
2213 } else {
2214 if (n->type != AUDIT_TYPE_PARENT)
2215 goto out;
2216 }
2153 } 2217 }
2154 2218
2155 /* unable to find the name from a previous getname() */ 2219out_alloc:
2156 n = audit_alloc_name(context); 2220 /* unable to find the name from a previous getname(). Allocate a new
2221 * anonymous entry.
2222 */
2223 n = audit_alloc_name(context, AUDIT_TYPE_NORMAL);
2157 if (!n) 2224 if (!n)
2158 return; 2225 return;
2159out: 2226out:
2227 if (parent) {
2228 n->name_len = n->name ? parent_len(n->name->name) : AUDIT_NAME_FULL;
2229 n->type = AUDIT_TYPE_PARENT;
2230 } else {
2231 n->name_len = AUDIT_NAME_FULL;
2232 n->type = AUDIT_TYPE_NORMAL;
2233 }
2160 handle_path(dentry); 2234 handle_path(dentry);
2161 audit_copy_inode(n, dentry, inode); 2235 audit_copy_inode(n, dentry, inode);
2162} 2236}
2163 2237
2164/** 2238/**
2165 * audit_inode_child - collect inode info for created/removed objects 2239 * __audit_inode_child - collect inode info for created/removed objects
2166 * @dentry: dentry being audited
2167 * @parent: inode of dentry parent 2240 * @parent: inode of dentry parent
2241 * @dentry: dentry being audited
2242 * @type: AUDIT_TYPE_* value that we're looking for
2168 * 2243 *
2169 * For syscalls that create or remove filesystem objects, audit_inode 2244 * For syscalls that create or remove filesystem objects, audit_inode
2170 * can only collect information for the filesystem object's parent. 2245 * can only collect information for the filesystem object's parent.
@@ -2174,15 +2249,14 @@ out:
2174 * must be hooked prior, in order to capture the target inode during 2249 * must be hooked prior, in order to capture the target inode during
2175 * unsuccessful attempts. 2250 * unsuccessful attempts.
2176 */ 2251 */
2177void __audit_inode_child(const struct dentry *dentry, 2252void __audit_inode_child(const struct inode *parent,
2178 const struct inode *parent) 2253 const struct dentry *dentry,
2254 const unsigned char type)
2179{ 2255{
2180 struct audit_context *context = current->audit_context; 2256 struct audit_context *context = current->audit_context;
2181 const char *found_parent = NULL, *found_child = NULL;
2182 const struct inode *inode = dentry->d_inode; 2257 const struct inode *inode = dentry->d_inode;
2183 const char *dname = dentry->d_name.name; 2258 const char *dname = dentry->d_name.name;
2184 struct audit_names *n; 2259 struct audit_names *n, *found_parent = NULL, *found_child = NULL;
2185 int dirlen = 0;
2186 2260
2187 if (!context->in_syscall) 2261 if (!context->in_syscall)
2188 return; 2262 return;
@@ -2190,62 +2264,65 @@ void __audit_inode_child(const struct dentry *dentry,
2190 if (inode) 2264 if (inode)
2191 handle_one(inode); 2265 handle_one(inode);
2192 2266
2193 /* parent is more likely, look for it first */ 2267 /* look for a parent entry first */
2194 list_for_each_entry(n, &context->names_list, list) { 2268 list_for_each_entry(n, &context->names_list, list) {
2195 if (!n->name) 2269 if (!n->name || n->type != AUDIT_TYPE_PARENT)
2196 continue; 2270 continue;
2197 2271
2198 if (n->ino == parent->i_ino && 2272 if (n->ino == parent->i_ino &&
2199 !audit_compare_dname_path(dname, n->name, &dirlen)) { 2273 !audit_compare_dname_path(dname, n->name->name, n->name_len)) {
2200 n->name_len = dirlen; /* update parent data in place */ 2274 found_parent = n;
2201 found_parent = n->name; 2275 break;
2202 goto add_names;
2203 } 2276 }
2204 } 2277 }
2205 2278
2206 /* no matching parent, look for matching child */ 2279 /* is there a matching child entry? */
2207 list_for_each_entry(n, &context->names_list, list) { 2280 list_for_each_entry(n, &context->names_list, list) {
2208 if (!n->name) 2281 /* can only match entries that have a name */
2282 if (!n->name || n->type != type)
2209 continue; 2283 continue;
2210 2284
2211 /* strcmp() is the more likely scenario */ 2285 /* if we found a parent, make sure this one is a child of it */
2212 if (!strcmp(dname, n->name) || 2286 if (found_parent && (n->name != found_parent->name))
2213 !audit_compare_dname_path(dname, n->name, &dirlen)) { 2287 continue;
2214 if (inode) 2288
2215 audit_copy_inode(n, NULL, inode); 2289 if (!strcmp(dname, n->name->name) ||
2216 else 2290 !audit_compare_dname_path(dname, n->name->name,
2217 n->ino = (unsigned long)-1; 2291 found_parent ?
2218 found_child = n->name; 2292 found_parent->name_len :
2219 goto add_names; 2293 AUDIT_NAME_FULL)) {
2294 found_child = n;
2295 break;
2220 } 2296 }
2221 } 2297 }
2222 2298
2223add_names:
2224 if (!found_parent) { 2299 if (!found_parent) {
2225 n = audit_alloc_name(context); 2300 /* create a new, "anonymous" parent record */
2301 n = audit_alloc_name(context, AUDIT_TYPE_PARENT);
2226 if (!n) 2302 if (!n)
2227 return; 2303 return;
2228 audit_copy_inode(n, NULL, parent); 2304 audit_copy_inode(n, NULL, parent);
2229 } 2305 }
2230 2306
2231 if (!found_child) { 2307 if (!found_child) {
2232 n = audit_alloc_name(context); 2308 found_child = audit_alloc_name(context, type);
2233 if (!n) 2309 if (!found_child)
2234 return; 2310 return;
2235 2311
2236 /* Re-use the name belonging to the slot for a matching parent 2312 /* Re-use the name belonging to the slot for a matching parent
2237 * directory. All names for this context are relinquished in 2313 * directory. All names for this context are relinquished in
2238 * audit_free_names() */ 2314 * audit_free_names() */
2239 if (found_parent) { 2315 if (found_parent) {
2240 n->name = found_parent; 2316 found_child->name = found_parent->name;
2241 n->name_len = AUDIT_NAME_FULL; 2317 found_child->name_len = AUDIT_NAME_FULL;
2242 /* don't call __putname() */ 2318 /* don't call __putname() */
2243 n->name_put = false; 2319 found_child->name_put = false;
2244 } 2320 }
2245
2246 if (inode)
2247 audit_copy_inode(n, NULL, inode);
2248 } 2321 }
2322 if (inode)
2323 audit_copy_inode(found_child, dentry, inode);
2324 else
2325 found_child->ino = (unsigned long)-1;
2249} 2326}
2250EXPORT_SYMBOL_GPL(__audit_inode_child); 2327EXPORT_SYMBOL_GPL(__audit_inode_child);
2251 2328
diff --git a/kernel/debug/debug_core.c b/kernel/debug/debug_core.c
index 17e073c309e6..9a61738cefc8 100644
--- a/kernel/debug/debug_core.c
+++ b/kernel/debug/debug_core.c
@@ -696,6 +696,22 @@ out:
696 return ret; 696 return ret;
697} 697}
698 698
699/*
700 * GDB places a breakpoint at this function to know dynamically
701 * loaded objects. It's not defined static so that only one instance with this
702 * name exists in the kernel.
703 */
704
705static int module_event(struct notifier_block *self, unsigned long val,
706 void *data)
707{
708 return 0;
709}
710
711static struct notifier_block dbg_module_load_nb = {
712 .notifier_call = module_event,
713};
714
699int kgdb_nmicallback(int cpu, void *regs) 715int kgdb_nmicallback(int cpu, void *regs)
700{ 716{
701#ifdef CONFIG_SMP 717#ifdef CONFIG_SMP
@@ -824,6 +840,7 @@ static void kgdb_register_callbacks(void)
824 kgdb_arch_init(); 840 kgdb_arch_init();
825 if (!dbg_is_early) 841 if (!dbg_is_early)
826 kgdb_arch_late(); 842 kgdb_arch_late();
843 register_module_notifier(&dbg_module_load_nb);
827 register_reboot_notifier(&dbg_reboot_notifier); 844 register_reboot_notifier(&dbg_reboot_notifier);
828 atomic_notifier_chain_register(&panic_notifier_list, 845 atomic_notifier_chain_register(&panic_notifier_list,
829 &kgdb_panic_event_nb); 846 &kgdb_panic_event_nb);
@@ -847,6 +864,7 @@ static void kgdb_unregister_callbacks(void)
847 if (kgdb_io_module_registered) { 864 if (kgdb_io_module_registered) {
848 kgdb_io_module_registered = 0; 865 kgdb_io_module_registered = 0;
849 unregister_reboot_notifier(&dbg_reboot_notifier); 866 unregister_reboot_notifier(&dbg_reboot_notifier);
867 unregister_module_notifier(&dbg_module_load_nb);
850 atomic_notifier_chain_unregister(&panic_notifier_list, 868 atomic_notifier_chain_unregister(&panic_notifier_list,
851 &kgdb_panic_event_nb); 869 &kgdb_panic_event_nb);
852 kgdb_arch_exit(); 870 kgdb_arch_exit();
diff --git a/kernel/debug/kdb/kdb_bt.c b/kernel/debug/kdb/kdb_bt.c
index 07c9bbb94a0b..b03e0e814e43 100644
--- a/kernel/debug/kdb/kdb_bt.c
+++ b/kernel/debug/kdb/kdb_bt.c
@@ -129,6 +129,8 @@ kdb_bt(int argc, const char **argv)
129 } 129 }
130 /* Now the inactive tasks */ 130 /* Now the inactive tasks */
131 kdb_do_each_thread(g, p) { 131 kdb_do_each_thread(g, p) {
132 if (KDB_FLAG(CMD_INTERRUPT))
133 return 0;
132 if (task_curr(p)) 134 if (task_curr(p))
133 continue; 135 continue;
134 if (kdb_bt1(p, mask, argcount, btaprompt)) 136 if (kdb_bt1(p, mask, argcount, btaprompt))
diff --git a/kernel/debug/kdb/kdb_io.c b/kernel/debug/kdb/kdb_io.c
index 0a69d2adc4f3..14ff4849262c 100644
--- a/kernel/debug/kdb/kdb_io.c
+++ b/kernel/debug/kdb/kdb_io.c
@@ -552,6 +552,7 @@ int vkdb_printf(const char *fmt, va_list ap)
552{ 552{
553 int diag; 553 int diag;
554 int linecount; 554 int linecount;
555 int colcount;
555 int logging, saved_loglevel = 0; 556 int logging, saved_loglevel = 0;
556 int saved_trap_printk; 557 int saved_trap_printk;
557 int got_printf_lock = 0; 558 int got_printf_lock = 0;
@@ -584,6 +585,10 @@ int vkdb_printf(const char *fmt, va_list ap)
584 if (diag || linecount <= 1) 585 if (diag || linecount <= 1)
585 linecount = 24; 586 linecount = 24;
586 587
588 diag = kdbgetintenv("COLUMNS", &colcount);
589 if (diag || colcount <= 1)
590 colcount = 80;
591
587 diag = kdbgetintenv("LOGGING", &logging); 592 diag = kdbgetintenv("LOGGING", &logging);
588 if (diag) 593 if (diag)
589 logging = 0; 594 logging = 0;
@@ -690,7 +695,7 @@ kdb_printit:
690 gdbstub_msg_write(kdb_buffer, retlen); 695 gdbstub_msg_write(kdb_buffer, retlen);
691 } else { 696 } else {
692 if (dbg_io_ops && !dbg_io_ops->is_console) { 697 if (dbg_io_ops && !dbg_io_ops->is_console) {
693 len = strlen(kdb_buffer); 698 len = retlen;
694 cp = kdb_buffer; 699 cp = kdb_buffer;
695 while (len--) { 700 while (len--) {
696 dbg_io_ops->write_char(*cp); 701 dbg_io_ops->write_char(*cp);
@@ -709,11 +714,29 @@ kdb_printit:
709 printk(KERN_INFO "%s", kdb_buffer); 714 printk(KERN_INFO "%s", kdb_buffer);
710 } 715 }
711 716
712 if (KDB_STATE(PAGER) && strchr(kdb_buffer, '\n')) 717 if (KDB_STATE(PAGER)) {
713 kdb_nextline++; 718 /*
719 * Check printed string to decide how to bump the
720 * kdb_nextline to control when the more prompt should
721 * show up.
722 */
723 int got = 0;
724 len = retlen;
725 while (len--) {
726 if (kdb_buffer[len] == '\n') {
727 kdb_nextline++;
728 got = 0;
729 } else if (kdb_buffer[len] == '\r') {
730 got = 0;
731 } else {
732 got++;
733 }
734 }
735 kdb_nextline += got / (colcount + 1);
736 }
714 737
715 /* check for having reached the LINES number of printed lines */ 738 /* check for having reached the LINES number of printed lines */
716 if (kdb_nextline == linecount) { 739 if (kdb_nextline >= linecount) {
717 char buf1[16] = ""; 740 char buf1[16] = "";
718 741
719 /* Watch out for recursion here. Any routine that calls 742 /* Watch out for recursion here. Any routine that calls
@@ -765,7 +788,7 @@ kdb_printit:
765 kdb_grepping_flag = 0; 788 kdb_grepping_flag = 0;
766 kdb_printf("\n"); 789 kdb_printf("\n");
767 } else if (buf1[0] == ' ') { 790 } else if (buf1[0] == ' ') {
768 kdb_printf("\n"); 791 kdb_printf("\r");
769 suspend_grep = 1; /* for this recursion */ 792 suspend_grep = 1; /* for this recursion */
770 } else if (buf1[0] == '\n') { 793 } else if (buf1[0] == '\n') {
771 kdb_nextline = linecount - 1; 794 kdb_nextline = linecount - 1;
diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c
index 1261dc7eaeb9..4d5f8d5612f3 100644
--- a/kernel/debug/kdb/kdb_main.c
+++ b/kernel/debug/kdb/kdb_main.c
@@ -2101,6 +2101,8 @@ static int kdb_dmesg(int argc, const char **argv)
2101 } 2101 }
2102 if (!lines--) 2102 if (!lines--)
2103 break; 2103 break;
2104 if (KDB_FLAG(CMD_INTERRUPT))
2105 return 0;
2104 2106
2105 kdb_printf("%.*s\n", (int)len - 1, buf); 2107 kdb_printf("%.*s\n", (int)len - 1, buf);
2106 } 2108 }
diff --git a/kernel/events/core.c b/kernel/events/core.c
index cda3ebd49e86..dbccf83c134d 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -372,6 +372,8 @@ void perf_cgroup_switch(struct task_struct *task, int mode)
372 372
373 list_for_each_entry_rcu(pmu, &pmus, entry) { 373 list_for_each_entry_rcu(pmu, &pmus, entry) {
374 cpuctx = this_cpu_ptr(pmu->pmu_cpu_context); 374 cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
375 if (cpuctx->unique_pmu != pmu)
376 continue; /* ensure we process each cpuctx once */
375 377
376 /* 378 /*
377 * perf_cgroup_events says at least one 379 * perf_cgroup_events says at least one
@@ -395,9 +397,10 @@ void perf_cgroup_switch(struct task_struct *task, int mode)
395 397
396 if (mode & PERF_CGROUP_SWIN) { 398 if (mode & PERF_CGROUP_SWIN) {
397 WARN_ON_ONCE(cpuctx->cgrp); 399 WARN_ON_ONCE(cpuctx->cgrp);
398 /* set cgrp before ctxsw in to 400 /*
399 * allow event_filter_match() to not 401 * set cgrp before ctxsw in to allow
400 * have to pass task around 402 * event_filter_match() to not have to pass
403 * task around
401 */ 404 */
402 cpuctx->cgrp = perf_cgroup_from_task(task); 405 cpuctx->cgrp = perf_cgroup_from_task(task);
403 cpu_ctx_sched_in(cpuctx, EVENT_ALL, task); 406 cpu_ctx_sched_in(cpuctx, EVENT_ALL, task);
@@ -4412,7 +4415,7 @@ static void perf_event_task_event(struct perf_task_event *task_event)
4412 rcu_read_lock(); 4415 rcu_read_lock();
4413 list_for_each_entry_rcu(pmu, &pmus, entry) { 4416 list_for_each_entry_rcu(pmu, &pmus, entry) {
4414 cpuctx = get_cpu_ptr(pmu->pmu_cpu_context); 4417 cpuctx = get_cpu_ptr(pmu->pmu_cpu_context);
4415 if (cpuctx->active_pmu != pmu) 4418 if (cpuctx->unique_pmu != pmu)
4416 goto next; 4419 goto next;
4417 perf_event_task_ctx(&cpuctx->ctx, task_event); 4420 perf_event_task_ctx(&cpuctx->ctx, task_event);
4418 4421
@@ -4558,7 +4561,7 @@ static void perf_event_comm_event(struct perf_comm_event *comm_event)
4558 rcu_read_lock(); 4561 rcu_read_lock();
4559 list_for_each_entry_rcu(pmu, &pmus, entry) { 4562 list_for_each_entry_rcu(pmu, &pmus, entry) {
4560 cpuctx = get_cpu_ptr(pmu->pmu_cpu_context); 4563 cpuctx = get_cpu_ptr(pmu->pmu_cpu_context);
4561 if (cpuctx->active_pmu != pmu) 4564 if (cpuctx->unique_pmu != pmu)
4562 goto next; 4565 goto next;
4563 perf_event_comm_ctx(&cpuctx->ctx, comm_event); 4566 perf_event_comm_ctx(&cpuctx->ctx, comm_event);
4564 4567
@@ -4754,7 +4757,7 @@ got_name:
4754 rcu_read_lock(); 4757 rcu_read_lock();
4755 list_for_each_entry_rcu(pmu, &pmus, entry) { 4758 list_for_each_entry_rcu(pmu, &pmus, entry) {
4756 cpuctx = get_cpu_ptr(pmu->pmu_cpu_context); 4759 cpuctx = get_cpu_ptr(pmu->pmu_cpu_context);
4757 if (cpuctx->active_pmu != pmu) 4760 if (cpuctx->unique_pmu != pmu)
4758 goto next; 4761 goto next;
4759 perf_event_mmap_ctx(&cpuctx->ctx, mmap_event, 4762 perf_event_mmap_ctx(&cpuctx->ctx, mmap_event,
4760 vma->vm_flags & VM_EXEC); 4763 vma->vm_flags & VM_EXEC);
@@ -5855,8 +5858,8 @@ static void update_pmu_context(struct pmu *pmu, struct pmu *old_pmu)
5855 5858
5856 cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu); 5859 cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu);
5857 5860
5858 if (cpuctx->active_pmu == old_pmu) 5861 if (cpuctx->unique_pmu == old_pmu)
5859 cpuctx->active_pmu = pmu; 5862 cpuctx->unique_pmu = pmu;
5860 } 5863 }
5861} 5864}
5862 5865
@@ -5991,7 +5994,7 @@ skip_type:
5991 cpuctx->ctx.pmu = pmu; 5994 cpuctx->ctx.pmu = pmu;
5992 cpuctx->jiffies_interval = 1; 5995 cpuctx->jiffies_interval = 1;
5993 INIT_LIST_HEAD(&cpuctx->rotation_list); 5996 INIT_LIST_HEAD(&cpuctx->rotation_list);
5994 cpuctx->active_pmu = pmu; 5997 cpuctx->unique_pmu = pmu;
5995 } 5998 }
5996 5999
5997got_cpu_context: 6000got_cpu_context:
diff --git a/kernel/fork.c b/kernel/fork.c
index 1cd7d581b3b2..8b20ab7d3aa2 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1584,7 +1584,7 @@ long do_fork(unsigned long clone_flags,
1584 * requested, no event is reported; otherwise, report if the event 1584 * requested, no event is reported; otherwise, report if the event
1585 * for the type of forking is enabled. 1585 * for the type of forking is enabled.
1586 */ 1586 */
1587 if (likely(user_mode(regs)) && !(clone_flags & CLONE_UNTRACED)) { 1587 if (!(clone_flags & CLONE_UNTRACED) && likely(user_mode(regs))) {
1588 if (clone_flags & CLONE_VFORK) 1588 if (clone_flags & CLONE_VFORK)
1589 trace = PTRACE_EVENT_VFORK; 1589 trace = PTRACE_EVENT_VFORK;
1590 else if ((clone_flags & CSIGNAL) != SIGCHLD) 1590 else if ((clone_flags & CSIGNAL) != SIGCHLD)
@@ -1634,6 +1634,17 @@ long do_fork(unsigned long clone_flags,
1634 return nr; 1634 return nr;
1635} 1635}
1636 1636
1637#ifdef CONFIG_GENERIC_KERNEL_THREAD
1638/*
1639 * Create a kernel thread.
1640 */
1641pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags)
1642{
1643 return do_fork(flags|CLONE_VM|CLONE_UNTRACED, (unsigned long)fn, NULL,
1644 (unsigned long)arg, NULL, NULL);
1645}
1646#endif
1647
1637#ifndef ARCH_MIN_MMSTRUCT_ALIGN 1648#ifndef ARCH_MIN_MMSTRUCT_ALIGN
1638#define ARCH_MIN_MMSTRUCT_ALIGN 0 1649#define ARCH_MIN_MMSTRUCT_ALIGN 0
1639#endif 1650#endif
diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
index 49a77727db42..4e69e24d3d7d 100644
--- a/kernel/irq/irqdomain.c
+++ b/kernel/irq/irqdomain.c
@@ -148,7 +148,8 @@ static unsigned int irq_domain_legacy_revmap(struct irq_domain *domain,
148 * @host_data: Controller private data pointer 148 * @host_data: Controller private data pointer
149 * 149 *
150 * Allocates a legacy irq_domain if irq_base is positive or a linear 150 * Allocates a legacy irq_domain if irq_base is positive or a linear
151 * domain otherwise. 151 * domain otherwise. For the legacy domain, IRQ descriptors will also
152 * be allocated.
152 * 153 *
153 * This is intended to implement the expected behaviour for most 154 * This is intended to implement the expected behaviour for most
154 * interrupt controllers which is that a linear mapping should 155 * interrupt controllers which is that a linear mapping should
@@ -162,11 +163,33 @@ struct irq_domain *irq_domain_add_simple(struct device_node *of_node,
162 const struct irq_domain_ops *ops, 163 const struct irq_domain_ops *ops,
163 void *host_data) 164 void *host_data)
164{ 165{
165 if (first_irq > 0) 166 if (first_irq > 0) {
166 return irq_domain_add_legacy(of_node, size, first_irq, 0, 167 int irq_base;
168
169 if (IS_ENABLED(CONFIG_SPARSE_IRQ)) {
170 /*
171 * Set the descriptor allocator to search for a
172 * 1-to-1 mapping, such as irq_alloc_desc_at().
173 * Use of_node_to_nid() which is defined to
174 * numa_node_id() on platforms that have no custom
175 * implementation.
176 */
177 irq_base = irq_alloc_descs(first_irq, first_irq, size,
178 of_node_to_nid(of_node));
179 if (irq_base < 0) {
180 WARN(1, "Cannot allocate irq_descs @ IRQ%d, assuming pre-allocated\n",
181 first_irq);
182 irq_base = first_irq;
183 }
184 } else
185 irq_base = first_irq;
186
187 return irq_domain_add_legacy(of_node, size, irq_base, 0,
167 ops, host_data); 188 ops, host_data);
168 else 189 }
169 return irq_domain_add_linear(of_node, size, ops, host_data); 190
191 /* A linear domain is the default */
192 return irq_domain_add_linear(of_node, size, ops, host_data);
170} 193}
171 194
172/** 195/**
diff --git a/kernel/kmod.c b/kernel/kmod.c
index 6f99aead66c6..1c317e386831 100644
--- a/kernel/kmod.c
+++ b/kernel/kmod.c
@@ -37,6 +37,7 @@
37#include <linux/notifier.h> 37#include <linux/notifier.h>
38#include <linux/suspend.h> 38#include <linux/suspend.h>
39#include <linux/rwsem.h> 39#include <linux/rwsem.h>
40#include <linux/ptrace.h>
40#include <asm/uaccess.h> 41#include <asm/uaccess.h>
41 42
42#include <trace/events/module.h> 43#include <trace/events/module.h>
@@ -221,11 +222,13 @@ static int ____call_usermodehelper(void *data)
221 retval = kernel_execve(sub_info->path, 222 retval = kernel_execve(sub_info->path,
222 (const char *const *)sub_info->argv, 223 (const char *const *)sub_info->argv,
223 (const char *const *)sub_info->envp); 224 (const char *const *)sub_info->envp);
225 if (!retval)
226 return 0;
224 227
225 /* Exec failed? */ 228 /* Exec failed? */
226fail: 229fail:
227 sub_info->retval = retval; 230 sub_info->retval = retval;
228 return 0; 231 do_exit(0);
229} 232}
230 233
231static int call_helper(void *data) 234static int call_helper(void *data)
@@ -292,7 +295,7 @@ static int wait_for_helper(void *data)
292 } 295 }
293 296
294 umh_complete(sub_info); 297 umh_complete(sub_info);
295 return 0; 298 do_exit(0);
296} 299}
297 300
298/* This is run by khelper thread */ 301/* This is run by khelper thread */
diff --git a/kernel/kthread.c b/kernel/kthread.c
index 146a6fa96825..29fb60caecb5 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -16,6 +16,7 @@
16#include <linux/mutex.h> 16#include <linux/mutex.h>
17#include <linux/slab.h> 17#include <linux/slab.h>
18#include <linux/freezer.h> 18#include <linux/freezer.h>
19#include <linux/ptrace.h>
19#include <trace/events/sched.h> 20#include <trace/events/sched.h>
20 21
21static DEFINE_SPINLOCK(kthread_create_lock); 22static DEFINE_SPINLOCK(kthread_create_lock);
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index 4fb2376ddf06..74df86bd9204 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -74,6 +74,7 @@ static struct lock_class_key rcu_fqs_class[RCU_NUM_LVLS];
74 .orphan_nxttail = &sname##_state.orphan_nxtlist, \ 74 .orphan_nxttail = &sname##_state.orphan_nxtlist, \
75 .orphan_donetail = &sname##_state.orphan_donelist, \ 75 .orphan_donetail = &sname##_state.orphan_donelist, \
76 .barrier_mutex = __MUTEX_INITIALIZER(sname##_state.barrier_mutex), \ 76 .barrier_mutex = __MUTEX_INITIALIZER(sname##_state.barrier_mutex), \
77 .onoff_mutex = __MUTEX_INITIALIZER(sname##_state.onoff_mutex), \
77 .name = #sname, \ 78 .name = #sname, \
78} 79}
79 80
@@ -1197,7 +1198,7 @@ static int rcu_gp_init(struct rcu_state *rsp)
1197 raw_spin_unlock_irq(&rnp->lock); 1198 raw_spin_unlock_irq(&rnp->lock);
1198 1199
1199 /* Exclude any concurrent CPU-hotplug operations. */ 1200 /* Exclude any concurrent CPU-hotplug operations. */
1200 get_online_cpus(); 1201 mutex_lock(&rsp->onoff_mutex);
1201 1202
1202 /* 1203 /*
1203 * Set the quiescent-state-needed bits in all the rcu_node 1204 * Set the quiescent-state-needed bits in all the rcu_node
@@ -1234,7 +1235,7 @@ static int rcu_gp_init(struct rcu_state *rsp)
1234 cond_resched(); 1235 cond_resched();
1235 } 1236 }
1236 1237
1237 put_online_cpus(); 1238 mutex_unlock(&rsp->onoff_mutex);
1238 return 1; 1239 return 1;
1239} 1240}
1240 1241
@@ -1700,6 +1701,7 @@ static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp)
1700 /* Remove the dead CPU from the bitmasks in the rcu_node hierarchy. */ 1701 /* Remove the dead CPU from the bitmasks in the rcu_node hierarchy. */
1701 1702
1702 /* Exclude any attempts to start a new grace period. */ 1703 /* Exclude any attempts to start a new grace period. */
1704 mutex_lock(&rsp->onoff_mutex);
1703 raw_spin_lock_irqsave(&rsp->onofflock, flags); 1705 raw_spin_lock_irqsave(&rsp->onofflock, flags);
1704 1706
1705 /* Orphan the dead CPU's callbacks, and adopt them if appropriate. */ 1707 /* Orphan the dead CPU's callbacks, and adopt them if appropriate. */
@@ -1744,6 +1746,7 @@ static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp)
1744 init_callback_list(rdp); 1746 init_callback_list(rdp);
1745 /* Disallow further callbacks on this CPU. */ 1747 /* Disallow further callbacks on this CPU. */
1746 rdp->nxttail[RCU_NEXT_TAIL] = NULL; 1748 rdp->nxttail[RCU_NEXT_TAIL] = NULL;
1749 mutex_unlock(&rsp->onoff_mutex);
1747} 1750}
1748 1751
1749#else /* #ifdef CONFIG_HOTPLUG_CPU */ 1752#else /* #ifdef CONFIG_HOTPLUG_CPU */
@@ -2648,6 +2651,9 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptible)
2648 struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu); 2651 struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
2649 struct rcu_node *rnp = rcu_get_root(rsp); 2652 struct rcu_node *rnp = rcu_get_root(rsp);
2650 2653
2654 /* Exclude new grace periods. */
2655 mutex_lock(&rsp->onoff_mutex);
2656
2651 /* Set up local state, ensuring consistent view of global state. */ 2657 /* Set up local state, ensuring consistent view of global state. */
2652 raw_spin_lock_irqsave(&rnp->lock, flags); 2658 raw_spin_lock_irqsave(&rnp->lock, flags);
2653 rdp->beenonline = 1; /* We have now been online. */ 2659 rdp->beenonline = 1; /* We have now been online. */
@@ -2662,14 +2668,6 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptible)
2662 rcu_prepare_for_idle_init(cpu); 2668 rcu_prepare_for_idle_init(cpu);
2663 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ 2669 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
2664 2670
2665 /*
2666 * A new grace period might start here. If so, we won't be part
2667 * of it, but that is OK, as we are currently in a quiescent state.
2668 */
2669
2670 /* Exclude any attempts to start a new GP on large systems. */
2671 raw_spin_lock(&rsp->onofflock); /* irqs already disabled. */
2672
2673 /* Add CPU to rcu_node bitmasks. */ 2671 /* Add CPU to rcu_node bitmasks. */
2674 rnp = rdp->mynode; 2672 rnp = rdp->mynode;
2675 mask = rdp->grpmask; 2673 mask = rdp->grpmask;
@@ -2693,8 +2691,9 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptible)
2693 raw_spin_unlock(&rnp->lock); /* irqs already disabled. */ 2691 raw_spin_unlock(&rnp->lock); /* irqs already disabled. */
2694 rnp = rnp->parent; 2692 rnp = rnp->parent;
2695 } while (rnp != NULL && !(rnp->qsmaskinit & mask)); 2693 } while (rnp != NULL && !(rnp->qsmaskinit & mask));
2694 local_irq_restore(flags);
2696 2695
2697 raw_spin_unlock_irqrestore(&rsp->onofflock, flags); 2696 mutex_unlock(&rsp->onoff_mutex);
2698} 2697}
2699 2698
2700static void __cpuinit rcu_prepare_cpu(int cpu) 2699static void __cpuinit rcu_prepare_cpu(int cpu)
diff --git a/kernel/rcutree.h b/kernel/rcutree.h
index 5faf05d68326..a240f032848e 100644
--- a/kernel/rcutree.h
+++ b/kernel/rcutree.h
@@ -394,11 +394,17 @@ struct rcu_state {
394 struct rcu_head **orphan_donetail; /* Tail of above. */ 394 struct rcu_head **orphan_donetail; /* Tail of above. */
395 long qlen_lazy; /* Number of lazy callbacks. */ 395 long qlen_lazy; /* Number of lazy callbacks. */
396 long qlen; /* Total number of callbacks. */ 396 long qlen; /* Total number of callbacks. */
397 /* End of fields guarded by onofflock. */
398
399 struct mutex onoff_mutex; /* Coordinate hotplug & GPs. */
400
397 struct mutex barrier_mutex; /* Guards barrier fields. */ 401 struct mutex barrier_mutex; /* Guards barrier fields. */
398 atomic_t barrier_cpu_count; /* # CPUs waiting on. */ 402 atomic_t barrier_cpu_count; /* # CPUs waiting on. */
399 struct completion barrier_completion; /* Wake at barrier end. */ 403 struct completion barrier_completion; /* Wake at barrier end. */
400 unsigned long n_barrier_done; /* ++ at start and end of */ 404 unsigned long n_barrier_done; /* ++ at start and end of */
401 /* _rcu_barrier(). */ 405 /* _rcu_barrier(). */
406 /* End of fields guarded by barrier_mutex. */
407
402 unsigned long jiffies_force_qs; /* Time at which to invoke */ 408 unsigned long jiffies_force_qs; /* Time at which to invoke */
403 /* force_quiescent_state(). */ 409 /* force_quiescent_state(). */
404 unsigned long n_force_qs; /* Number of calls to */ 410 unsigned long n_force_qs; /* Number of calls to */
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index c17747236438..2d8927fda712 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -505,7 +505,7 @@ static inline void init_hrtick(void)
505#ifdef CONFIG_SMP 505#ifdef CONFIG_SMP
506 506
507#ifndef tsk_is_polling 507#ifndef tsk_is_polling
508#define tsk_is_polling(t) test_tsk_thread_flag(t, TIF_POLLING_NRFLAG) 508#define tsk_is_polling(t) 0
509#endif 509#endif
510 510
511void resched_task(struct task_struct *p) 511void resched_task(struct task_struct *p)
@@ -6122,6 +6122,17 @@ static void sched_init_numa(void)
6122 * numbers. 6122 * numbers.
6123 */ 6123 */
6124 6124
6125 /*
6126 * Here, we should temporarily reset sched_domains_numa_levels to 0.
6127 * If it fails to allocate memory for array sched_domains_numa_masks[][],
6128 * the array will contain less then 'level' members. This could be
6129 * dangerous when we use it to iterate array sched_domains_numa_masks[][]
6130 * in other functions.
6131 *
6132 * We reset it to 'level' at the end of this function.
6133 */
6134 sched_domains_numa_levels = 0;
6135
6125 sched_domains_numa_masks = kzalloc(sizeof(void *) * level, GFP_KERNEL); 6136 sched_domains_numa_masks = kzalloc(sizeof(void *) * level, GFP_KERNEL);
6126 if (!sched_domains_numa_masks) 6137 if (!sched_domains_numa_masks)
6127 return; 6138 return;
@@ -6176,11 +6187,68 @@ static void sched_init_numa(void)
6176 } 6187 }
6177 6188
6178 sched_domain_topology = tl; 6189 sched_domain_topology = tl;
6190
6191 sched_domains_numa_levels = level;
6192}
6193
6194static void sched_domains_numa_masks_set(int cpu)
6195{
6196 int i, j;
6197 int node = cpu_to_node(cpu);
6198
6199 for (i = 0; i < sched_domains_numa_levels; i++) {
6200 for (j = 0; j < nr_node_ids; j++) {
6201 if (node_distance(j, node) <= sched_domains_numa_distance[i])
6202 cpumask_set_cpu(cpu, sched_domains_numa_masks[i][j]);
6203 }
6204 }
6205}
6206
6207static void sched_domains_numa_masks_clear(int cpu)
6208{
6209 int i, j;
6210 for (i = 0; i < sched_domains_numa_levels; i++) {
6211 for (j = 0; j < nr_node_ids; j++)
6212 cpumask_clear_cpu(cpu, sched_domains_numa_masks[i][j]);
6213 }
6214}
6215
6216/*
6217 * Update sched_domains_numa_masks[level][node] array when new cpus
6218 * are onlined.
6219 */
6220static int sched_domains_numa_masks_update(struct notifier_block *nfb,
6221 unsigned long action,
6222 void *hcpu)
6223{
6224 int cpu = (long)hcpu;
6225
6226 switch (action & ~CPU_TASKS_FROZEN) {
6227 case CPU_ONLINE:
6228 sched_domains_numa_masks_set(cpu);
6229 break;
6230
6231 case CPU_DEAD:
6232 sched_domains_numa_masks_clear(cpu);
6233 break;
6234
6235 default:
6236 return NOTIFY_DONE;
6237 }
6238
6239 return NOTIFY_OK;
6179} 6240}
6180#else 6241#else
6181static inline void sched_init_numa(void) 6242static inline void sched_init_numa(void)
6182{ 6243{
6183} 6244}
6245
6246static int sched_domains_numa_masks_update(struct notifier_block *nfb,
6247 unsigned long action,
6248 void *hcpu)
6249{
6250 return 0;
6251}
6184#endif /* CONFIG_NUMA */ 6252#endif /* CONFIG_NUMA */
6185 6253
6186static int __sdt_alloc(const struct cpumask *cpu_map) 6254static int __sdt_alloc(const struct cpumask *cpu_map)
@@ -6629,6 +6697,7 @@ void __init sched_init_smp(void)
6629 mutex_unlock(&sched_domains_mutex); 6697 mutex_unlock(&sched_domains_mutex);
6630 put_online_cpus(); 6698 put_online_cpus();
6631 6699
6700 hotcpu_notifier(sched_domains_numa_masks_update, CPU_PRI_SCHED_ACTIVE);
6632 hotcpu_notifier(cpuset_cpu_active, CPU_PRI_CPUSET_ACTIVE); 6701 hotcpu_notifier(cpuset_cpu_active, CPU_PRI_CPUSET_ACTIVE);
6633 hotcpu_notifier(cpuset_cpu_inactive, CPU_PRI_CPUSET_INACTIVE); 6702 hotcpu_notifier(cpuset_cpu_inactive, CPU_PRI_CPUSET_INACTIVE);
6634 6703
diff --git a/kernel/time.c b/kernel/time.c
index ba744cf80696..d226c6a3fd28 100644
--- a/kernel/time.c
+++ b/kernel/time.c
@@ -30,7 +30,7 @@
30#include <linux/export.h> 30#include <linux/export.h>
31#include <linux/timex.h> 31#include <linux/timex.h>
32#include <linux/capability.h> 32#include <linux/capability.h>
33#include <linux/clocksource.h> 33#include <linux/timekeeper_internal.h>
34#include <linux/errno.h> 34#include <linux/errno.h>
35#include <linux/syscalls.h> 35#include <linux/syscalls.h>
36#include <linux/security.h> 36#include <linux/security.h>
diff --git a/kernel/time/Kconfig b/kernel/time/Kconfig
index fd42bd452b75..8601f0db1261 100644
--- a/kernel/time/Kconfig
+++ b/kernel/time/Kconfig
@@ -16,6 +16,10 @@ config ARCH_CLOCKSOURCE_DATA
16config GENERIC_TIME_VSYSCALL 16config GENERIC_TIME_VSYSCALL
17 bool 17 bool
18 18
19# Timekeeping vsyscall support
20config GENERIC_TIME_VSYSCALL_OLD
21 bool
22
19# ktime_t scalar 64bit nsec representation 23# ktime_t scalar 64bit nsec representation
20config KTIME_SCALAR 24config KTIME_SCALAR
21 bool 25 bool
diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c
index aa27d391bfc8..f11d83b12949 100644
--- a/kernel/time/alarmtimer.c
+++ b/kernel/time/alarmtimer.c
@@ -37,7 +37,6 @@
37static struct alarm_base { 37static struct alarm_base {
38 spinlock_t lock; 38 spinlock_t lock;
39 struct timerqueue_head timerqueue; 39 struct timerqueue_head timerqueue;
40 struct hrtimer timer;
41 ktime_t (*gettime)(void); 40 ktime_t (*gettime)(void);
42 clockid_t base_clockid; 41 clockid_t base_clockid;
43} alarm_bases[ALARM_NUMTYPE]; 42} alarm_bases[ALARM_NUMTYPE];
@@ -46,6 +45,8 @@ static struct alarm_base {
46static ktime_t freezer_delta; 45static ktime_t freezer_delta;
47static DEFINE_SPINLOCK(freezer_delta_lock); 46static DEFINE_SPINLOCK(freezer_delta_lock);
48 47
48static struct wakeup_source *ws;
49
49#ifdef CONFIG_RTC_CLASS 50#ifdef CONFIG_RTC_CLASS
50/* rtc timer and device for setting alarm wakeups at suspend */ 51/* rtc timer and device for setting alarm wakeups at suspend */
51static struct rtc_timer rtctimer; 52static struct rtc_timer rtctimer;
@@ -130,50 +131,35 @@ static inline void alarmtimer_rtc_timer_init(void) { }
130 * @base: pointer to the base where the timer is being run 131 * @base: pointer to the base where the timer is being run
131 * @alarm: pointer to alarm being enqueued. 132 * @alarm: pointer to alarm being enqueued.
132 * 133 *
133 * Adds alarm to a alarm_base timerqueue and if necessary sets 134 * Adds alarm to a alarm_base timerqueue
134 * an hrtimer to run.
135 * 135 *
136 * Must hold base->lock when calling. 136 * Must hold base->lock when calling.
137 */ 137 */
138static void alarmtimer_enqueue(struct alarm_base *base, struct alarm *alarm) 138static void alarmtimer_enqueue(struct alarm_base *base, struct alarm *alarm)
139{ 139{
140 if (alarm->state & ALARMTIMER_STATE_ENQUEUED)
141 timerqueue_del(&base->timerqueue, &alarm->node);
142
140 timerqueue_add(&base->timerqueue, &alarm->node); 143 timerqueue_add(&base->timerqueue, &alarm->node);
141 alarm->state |= ALARMTIMER_STATE_ENQUEUED; 144 alarm->state |= ALARMTIMER_STATE_ENQUEUED;
142
143 if (&alarm->node == timerqueue_getnext(&base->timerqueue)) {
144 hrtimer_try_to_cancel(&base->timer);
145 hrtimer_start(&base->timer, alarm->node.expires,
146 HRTIMER_MODE_ABS);
147 }
148} 145}
149 146
150/** 147/**
151 * alarmtimer_remove - Removes an alarm timer from an alarm_base timerqueue 148 * alarmtimer_dequeue - Removes an alarm timer from an alarm_base timerqueue
152 * @base: pointer to the base where the timer is running 149 * @base: pointer to the base where the timer is running
153 * @alarm: pointer to alarm being removed 150 * @alarm: pointer to alarm being removed
154 * 151 *
155 * Removes alarm to a alarm_base timerqueue and if necessary sets 152 * Removes alarm to a alarm_base timerqueue
156 * a new timer to run.
157 * 153 *
158 * Must hold base->lock when calling. 154 * Must hold base->lock when calling.
159 */ 155 */
160static void alarmtimer_remove(struct alarm_base *base, struct alarm *alarm) 156static void alarmtimer_dequeue(struct alarm_base *base, struct alarm *alarm)
161{ 157{
162 struct timerqueue_node *next = timerqueue_getnext(&base->timerqueue);
163
164 if (!(alarm->state & ALARMTIMER_STATE_ENQUEUED)) 158 if (!(alarm->state & ALARMTIMER_STATE_ENQUEUED))
165 return; 159 return;
166 160
167 timerqueue_del(&base->timerqueue, &alarm->node); 161 timerqueue_del(&base->timerqueue, &alarm->node);
168 alarm->state &= ~ALARMTIMER_STATE_ENQUEUED; 162 alarm->state &= ~ALARMTIMER_STATE_ENQUEUED;
169
170 if (next == &alarm->node) {
171 hrtimer_try_to_cancel(&base->timer);
172 next = timerqueue_getnext(&base->timerqueue);
173 if (!next)
174 return;
175 hrtimer_start(&base->timer, next->expires, HRTIMER_MODE_ABS);
176 }
177} 163}
178 164
179 165
@@ -188,42 +174,23 @@ static void alarmtimer_remove(struct alarm_base *base, struct alarm *alarm)
188 */ 174 */
189static enum hrtimer_restart alarmtimer_fired(struct hrtimer *timer) 175static enum hrtimer_restart alarmtimer_fired(struct hrtimer *timer)
190{ 176{
191 struct alarm_base *base = container_of(timer, struct alarm_base, timer); 177 struct alarm *alarm = container_of(timer, struct alarm, timer);
192 struct timerqueue_node *next; 178 struct alarm_base *base = &alarm_bases[alarm->type];
193 unsigned long flags; 179 unsigned long flags;
194 ktime_t now;
195 int ret = HRTIMER_NORESTART; 180 int ret = HRTIMER_NORESTART;
196 int restart = ALARMTIMER_NORESTART; 181 int restart = ALARMTIMER_NORESTART;
197 182
198 spin_lock_irqsave(&base->lock, flags); 183 spin_lock_irqsave(&base->lock, flags);
199 now = base->gettime(); 184 alarmtimer_dequeue(base, alarm);
200 while ((next = timerqueue_getnext(&base->timerqueue))) { 185 spin_unlock_irqrestore(&base->lock, flags);
201 struct alarm *alarm;
202 ktime_t expired = next->expires;
203
204 if (expired.tv64 > now.tv64)
205 break;
206
207 alarm = container_of(next, struct alarm, node);
208
209 timerqueue_del(&base->timerqueue, &alarm->node);
210 alarm->state &= ~ALARMTIMER_STATE_ENQUEUED;
211
212 alarm->state |= ALARMTIMER_STATE_CALLBACK;
213 spin_unlock_irqrestore(&base->lock, flags);
214 if (alarm->function)
215 restart = alarm->function(alarm, now);
216 spin_lock_irqsave(&base->lock, flags);
217 alarm->state &= ~ALARMTIMER_STATE_CALLBACK;
218 186
219 if (restart != ALARMTIMER_NORESTART) { 187 if (alarm->function)
220 timerqueue_add(&base->timerqueue, &alarm->node); 188 restart = alarm->function(alarm, base->gettime());
221 alarm->state |= ALARMTIMER_STATE_ENQUEUED;
222 }
223 }
224 189
225 if (next) { 190 spin_lock_irqsave(&base->lock, flags);
226 hrtimer_set_expires(&base->timer, next->expires); 191 if (restart != ALARMTIMER_NORESTART) {
192 hrtimer_set_expires(&alarm->timer, alarm->node.expires);
193 alarmtimer_enqueue(base, alarm);
227 ret = HRTIMER_RESTART; 194 ret = HRTIMER_RESTART;
228 } 195 }
229 spin_unlock_irqrestore(&base->lock, flags); 196 spin_unlock_irqrestore(&base->lock, flags);
@@ -250,6 +217,7 @@ static int alarmtimer_suspend(struct device *dev)
250 unsigned long flags; 217 unsigned long flags;
251 struct rtc_device *rtc; 218 struct rtc_device *rtc;
252 int i; 219 int i;
220 int ret;
253 221
254 spin_lock_irqsave(&freezer_delta_lock, flags); 222 spin_lock_irqsave(&freezer_delta_lock, flags);
255 min = freezer_delta; 223 min = freezer_delta;
@@ -279,8 +247,10 @@ static int alarmtimer_suspend(struct device *dev)
279 if (min.tv64 == 0) 247 if (min.tv64 == 0)
280 return 0; 248 return 0;
281 249
282 /* XXX - Should we enforce a minimum sleep time? */ 250 if (ktime_to_ns(min) < 2 * NSEC_PER_SEC) {
283 WARN_ON(min.tv64 < NSEC_PER_SEC); 251 __pm_wakeup_event(ws, 2 * MSEC_PER_SEC);
252 return -EBUSY;
253 }
284 254
285 /* Setup an rtc timer to fire that far in the future */ 255 /* Setup an rtc timer to fire that far in the future */
286 rtc_timer_cancel(rtc, &rtctimer); 256 rtc_timer_cancel(rtc, &rtctimer);
@@ -288,9 +258,11 @@ static int alarmtimer_suspend(struct device *dev)
288 now = rtc_tm_to_ktime(tm); 258 now = rtc_tm_to_ktime(tm);
289 now = ktime_add(now, min); 259 now = ktime_add(now, min);
290 260
291 rtc_timer_start(rtc, &rtctimer, now, ktime_set(0, 0)); 261 /* Set alarm, if in the past reject suspend briefly to handle */
292 262 ret = rtc_timer_start(rtc, &rtctimer, now, ktime_set(0, 0));
293 return 0; 263 if (ret < 0)
264 __pm_wakeup_event(ws, MSEC_PER_SEC);
265 return ret;
294} 266}
295#else 267#else
296static int alarmtimer_suspend(struct device *dev) 268static int alarmtimer_suspend(struct device *dev)
@@ -324,6 +296,9 @@ void alarm_init(struct alarm *alarm, enum alarmtimer_type type,
324 enum alarmtimer_restart (*function)(struct alarm *, ktime_t)) 296 enum alarmtimer_restart (*function)(struct alarm *, ktime_t))
325{ 297{
326 timerqueue_init(&alarm->node); 298 timerqueue_init(&alarm->node);
299 hrtimer_init(&alarm->timer, alarm_bases[type].base_clockid,
300 HRTIMER_MODE_ABS);
301 alarm->timer.function = alarmtimer_fired;
327 alarm->function = function; 302 alarm->function = function;
328 alarm->type = type; 303 alarm->type = type;
329 alarm->state = ALARMTIMER_STATE_INACTIVE; 304 alarm->state = ALARMTIMER_STATE_INACTIVE;
@@ -334,17 +309,19 @@ void alarm_init(struct alarm *alarm, enum alarmtimer_type type,
334 * @alarm: ptr to alarm to set 309 * @alarm: ptr to alarm to set
335 * @start: time to run the alarm 310 * @start: time to run the alarm
336 */ 311 */
337void alarm_start(struct alarm *alarm, ktime_t start) 312int alarm_start(struct alarm *alarm, ktime_t start)
338{ 313{
339 struct alarm_base *base = &alarm_bases[alarm->type]; 314 struct alarm_base *base = &alarm_bases[alarm->type];
340 unsigned long flags; 315 unsigned long flags;
316 int ret;
341 317
342 spin_lock_irqsave(&base->lock, flags); 318 spin_lock_irqsave(&base->lock, flags);
343 if (alarmtimer_active(alarm))
344 alarmtimer_remove(base, alarm);
345 alarm->node.expires = start; 319 alarm->node.expires = start;
346 alarmtimer_enqueue(base, alarm); 320 alarmtimer_enqueue(base, alarm);
321 ret = hrtimer_start(&alarm->timer, alarm->node.expires,
322 HRTIMER_MODE_ABS);
347 spin_unlock_irqrestore(&base->lock, flags); 323 spin_unlock_irqrestore(&base->lock, flags);
324 return ret;
348} 325}
349 326
350/** 327/**
@@ -358,18 +335,12 @@ int alarm_try_to_cancel(struct alarm *alarm)
358{ 335{
359 struct alarm_base *base = &alarm_bases[alarm->type]; 336 struct alarm_base *base = &alarm_bases[alarm->type];
360 unsigned long flags; 337 unsigned long flags;
361 int ret = -1; 338 int ret;
362 spin_lock_irqsave(&base->lock, flags);
363
364 if (alarmtimer_callback_running(alarm))
365 goto out;
366 339
367 if (alarmtimer_is_queued(alarm)) { 340 spin_lock_irqsave(&base->lock, flags);
368 alarmtimer_remove(base, alarm); 341 ret = hrtimer_try_to_cancel(&alarm->timer);
369 ret = 1; 342 if (ret >= 0)
370 } else 343 alarmtimer_dequeue(base, alarm);
371 ret = 0;
372out:
373 spin_unlock_irqrestore(&base->lock, flags); 344 spin_unlock_irqrestore(&base->lock, flags);
374 return ret; 345 return ret;
375} 346}
@@ -802,10 +773,6 @@ static int __init alarmtimer_init(void)
802 for (i = 0; i < ALARM_NUMTYPE; i++) { 773 for (i = 0; i < ALARM_NUMTYPE; i++) {
803 timerqueue_init_head(&alarm_bases[i].timerqueue); 774 timerqueue_init_head(&alarm_bases[i].timerqueue);
804 spin_lock_init(&alarm_bases[i].lock); 775 spin_lock_init(&alarm_bases[i].lock);
805 hrtimer_init(&alarm_bases[i].timer,
806 alarm_bases[i].base_clockid,
807 HRTIMER_MODE_ABS);
808 alarm_bases[i].timer.function = alarmtimer_fired;
809 } 776 }
810 777
811 error = alarmtimer_rtc_interface_setup(); 778 error = alarmtimer_rtc_interface_setup();
@@ -821,6 +788,7 @@ static int __init alarmtimer_init(void)
821 error = PTR_ERR(pdev); 788 error = PTR_ERR(pdev);
822 goto out_drv; 789 goto out_drv;
823 } 790 }
791 ws = wakeup_source_register("alarmtimer");
824 return 0; 792 return 0;
825 793
826out_drv: 794out_drv:
diff --git a/kernel/time/jiffies.c b/kernel/time/jiffies.c
index 46da0537c10b..6629bf7b5285 100644
--- a/kernel/time/jiffies.c
+++ b/kernel/time/jiffies.c
@@ -37,7 +37,7 @@
37 * requested HZ value. It is also not recommended 37 * requested HZ value. It is also not recommended
38 * for "tick-less" systems. 38 * for "tick-less" systems.
39 */ 39 */
40#define NSEC_PER_JIFFY ((u32)((((u64)NSEC_PER_SEC)<<8)/SHIFTED_HZ)) 40#define NSEC_PER_JIFFY ((NSEC_PER_SEC+HZ/2)/HZ)
41 41
42/* Since jiffies uses a simple NSEC_PER_JIFFY multiplier 42/* Since jiffies uses a simple NSEC_PER_JIFFY multiplier
43 * conversion, the .shift value could be zero. However 43 * conversion, the .shift value could be zero. However
@@ -95,3 +95,33 @@ struct clocksource * __init __weak clocksource_default_clock(void)
95{ 95{
96 return &clocksource_jiffies; 96 return &clocksource_jiffies;
97} 97}
98
99struct clocksource refined_jiffies;
100
101int register_refined_jiffies(long cycles_per_second)
102{
103 u64 nsec_per_tick, shift_hz;
104 long cycles_per_tick;
105
106
107
108 refined_jiffies = clocksource_jiffies;
109 refined_jiffies.name = "refined-jiffies";
110 refined_jiffies.rating++;
111
112 /* Calc cycles per tick */
113 cycles_per_tick = (cycles_per_second + HZ/2)/HZ;
114 /* shift_hz stores hz<<8 for extra accuracy */
115 shift_hz = (u64)cycles_per_second << 8;
116 shift_hz += cycles_per_tick/2;
117 do_div(shift_hz, cycles_per_tick);
118 /* Calculate nsec_per_tick using shift_hz */
119 nsec_per_tick = (u64)NSEC_PER_SEC << 8;
120 nsec_per_tick += (u32)shift_hz/2;
121 do_div(nsec_per_tick, (u32)shift_hz);
122
123 refined_jiffies.mult = ((u32)nsec_per_tick) << JIFFIES_SHIFT;
124
125 clocksource_register(&refined_jiffies);
126 return 0;
127}
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index f423bdd035c2..a40260885265 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -835,7 +835,7 @@ static enum hrtimer_restart tick_sched_timer(struct hrtimer *timer)
835 */ 835 */
836 if (ts->tick_stopped) { 836 if (ts->tick_stopped) {
837 touch_softlockup_watchdog(); 837 touch_softlockup_watchdog();
838 if (idle_cpu(cpu)) 838 if (is_idle_task(current))
839 ts->idle_jiffies++; 839 ts->idle_jiffies++;
840 } 840 }
841 update_process_times(user_mode(regs)); 841 update_process_times(user_mode(regs));
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 5ce06a3fa91e..e424970bb562 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -8,6 +8,7 @@
8 * 8 *
9 */ 9 */
10 10
11#include <linux/timekeeper_internal.h>
11#include <linux/module.h> 12#include <linux/module.h>
12#include <linux/interrupt.h> 13#include <linux/interrupt.h>
13#include <linux/percpu.h> 14#include <linux/percpu.h>
@@ -21,61 +22,6 @@
21#include <linux/tick.h> 22#include <linux/tick.h>
22#include <linux/stop_machine.h> 23#include <linux/stop_machine.h>
23 24
24/* Structure holding internal timekeeping values. */
25struct timekeeper {
26 /* Current clocksource used for timekeeping. */
27 struct clocksource *clock;
28 /* NTP adjusted clock multiplier */
29 u32 mult;
30 /* The shift value of the current clocksource. */
31 u32 shift;
32 /* Number of clock cycles in one NTP interval. */
33 cycle_t cycle_interval;
34 /* Number of clock shifted nano seconds in one NTP interval. */
35 u64 xtime_interval;
36 /* shifted nano seconds left over when rounding cycle_interval */
37 s64 xtime_remainder;
38 /* Raw nano seconds accumulated per NTP interval. */
39 u32 raw_interval;
40
41 /* Current CLOCK_REALTIME time in seconds */
42 u64 xtime_sec;
43 /* Clock shifted nano seconds */
44 u64 xtime_nsec;
45
46 /* Difference between accumulated time and NTP time in ntp
47 * shifted nano seconds. */
48 s64 ntp_error;
49 /* Shift conversion between clock shifted nano seconds and
50 * ntp shifted nano seconds. */
51 u32 ntp_error_shift;
52
53 /*
54 * wall_to_monotonic is what we need to add to xtime (or xtime corrected
55 * for sub jiffie times) to get to monotonic time. Monotonic is pegged
56 * at zero at system boot time, so wall_to_monotonic will be negative,
57 * however, we will ALWAYS keep the tv_nsec part positive so we can use
58 * the usual normalization.
59 *
60 * wall_to_monotonic is moved after resume from suspend for the
61 * monotonic time not to jump. We need to add total_sleep_time to
62 * wall_to_monotonic to get the real boot based time offset.
63 *
64 * - wall_to_monotonic is no longer the boot time, getboottime must be
65 * used instead.
66 */
67 struct timespec wall_to_monotonic;
68 /* Offset clock monotonic -> clock realtime */
69 ktime_t offs_real;
70 /* time spent in suspend */
71 struct timespec total_sleep_time;
72 /* Offset clock monotonic -> clock boottime */
73 ktime_t offs_boot;
74 /* The raw monotonic time for the CLOCK_MONOTONIC_RAW posix clock. */
75 struct timespec raw_time;
76 /* Seqlock for all timekeeper values */
77 seqlock_t lock;
78};
79 25
80static struct timekeeper timekeeper; 26static struct timekeeper timekeeper;
81 27
@@ -96,15 +42,6 @@ static inline void tk_normalize_xtime(struct timekeeper *tk)
96 } 42 }
97} 43}
98 44
99static struct timespec tk_xtime(struct timekeeper *tk)
100{
101 struct timespec ts;
102
103 ts.tv_sec = tk->xtime_sec;
104 ts.tv_nsec = (long)(tk->xtime_nsec >> tk->shift);
105 return ts;
106}
107
108static void tk_set_xtime(struct timekeeper *tk, const struct timespec *ts) 45static void tk_set_xtime(struct timekeeper *tk, const struct timespec *ts)
109{ 46{
110 tk->xtime_sec = ts->tv_sec; 47 tk->xtime_sec = ts->tv_sec;
@@ -246,14 +183,11 @@ static inline s64 timekeeping_get_ns_raw(struct timekeeper *tk)
246/* must hold write on timekeeper.lock */ 183/* must hold write on timekeeper.lock */
247static void timekeeping_update(struct timekeeper *tk, bool clearntp) 184static void timekeeping_update(struct timekeeper *tk, bool clearntp)
248{ 185{
249 struct timespec xt;
250
251 if (clearntp) { 186 if (clearntp) {
252 tk->ntp_error = 0; 187 tk->ntp_error = 0;
253 ntp_clear(); 188 ntp_clear();
254 } 189 }
255 xt = tk_xtime(tk); 190 update_vsyscall(tk);
256 update_vsyscall(&xt, &tk->wall_to_monotonic, tk->clock, tk->mult);
257} 191}
258 192
259/** 193/**
@@ -1113,7 +1047,7 @@ static cycle_t logarithmic_accumulation(struct timekeeper *tk, cycle_t offset,
1113 accumulate_nsecs_to_secs(tk); 1047 accumulate_nsecs_to_secs(tk);
1114 1048
1115 /* Accumulate raw time */ 1049 /* Accumulate raw time */
1116 raw_nsecs = tk->raw_interval << shift; 1050 raw_nsecs = (u64)tk->raw_interval << shift;
1117 raw_nsecs += tk->raw_time.tv_nsec; 1051 raw_nsecs += tk->raw_time.tv_nsec;
1118 if (raw_nsecs >= NSEC_PER_SEC) { 1052 if (raw_nsecs >= NSEC_PER_SEC) {
1119 u64 raw_secs = raw_nsecs; 1053 u64 raw_secs = raw_nsecs;
@@ -1130,6 +1064,33 @@ static cycle_t logarithmic_accumulation(struct timekeeper *tk, cycle_t offset,
1130 return offset; 1064 return offset;
1131} 1065}
1132 1066
1067#ifdef CONFIG_GENERIC_TIME_VSYSCALL_OLD
1068static inline void old_vsyscall_fixup(struct timekeeper *tk)
1069{
1070 s64 remainder;
1071
1072 /*
1073 * Store only full nanoseconds into xtime_nsec after rounding
1074 * it up and add the remainder to the error difference.
1075 * XXX - This is necessary to avoid small 1ns inconsistnecies caused
1076 * by truncating the remainder in vsyscalls. However, it causes
1077 * additional work to be done in timekeeping_adjust(). Once
1078 * the vsyscall implementations are converted to use xtime_nsec
1079 * (shifted nanoseconds), and CONFIG_GENERIC_TIME_VSYSCALL_OLD
1080 * users are removed, this can be killed.
1081 */
1082 remainder = tk->xtime_nsec & ((1ULL << tk->shift) - 1);
1083 tk->xtime_nsec -= remainder;
1084 tk->xtime_nsec += 1ULL << tk->shift;
1085 tk->ntp_error += remainder << tk->ntp_error_shift;
1086
1087}
1088#else
1089#define old_vsyscall_fixup(tk)
1090#endif
1091
1092
1093
1133/** 1094/**
1134 * update_wall_time - Uses the current clocksource to increment the wall time 1095 * update_wall_time - Uses the current clocksource to increment the wall time
1135 * 1096 *
@@ -1141,7 +1102,6 @@ static void update_wall_time(void)
1141 cycle_t offset; 1102 cycle_t offset;
1142 int shift = 0, maxshift; 1103 int shift = 0, maxshift;
1143 unsigned long flags; 1104 unsigned long flags;
1144 s64 remainder;
1145 1105
1146 write_seqlock_irqsave(&tk->lock, flags); 1106 write_seqlock_irqsave(&tk->lock, flags);
1147 1107
@@ -1183,20 +1143,11 @@ static void update_wall_time(void)
1183 /* correct the clock when NTP error is too big */ 1143 /* correct the clock when NTP error is too big */
1184 timekeeping_adjust(tk, offset); 1144 timekeeping_adjust(tk, offset);
1185 1145
1186
1187 /* 1146 /*
1188 * Store only full nanoseconds into xtime_nsec after rounding 1147 * XXX This can be killed once everyone converts
1189 * it up and add the remainder to the error difference. 1148 * to the new update_vsyscall.
1190 * XXX - This is necessary to avoid small 1ns inconsistnecies caused 1149 */
1191 * by truncating the remainder in vsyscalls. However, it causes 1150 old_vsyscall_fixup(tk);
1192 * additional work to be done in timekeeping_adjust(). Once
1193 * the vsyscall implementations are converted to use xtime_nsec
1194 * (shifted nanoseconds), this can be killed.
1195 */
1196 remainder = tk->xtime_nsec & ((1ULL << tk->shift) - 1);
1197 tk->xtime_nsec -= remainder;
1198 tk->xtime_nsec += 1ULL << tk->shift;
1199 tk->ntp_error += remainder << tk->ntp_error_shift;
1200 1151
1201 /* 1152 /*
1202 * Finally, make sure that after the rounding 1153 * Finally, make sure that after the rounding
diff --git a/kernel/timer.c b/kernel/timer.c
index d5de1b2292aa..367d00858482 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -63,6 +63,7 @@ EXPORT_SYMBOL(jiffies_64);
63#define TVR_SIZE (1 << TVR_BITS) 63#define TVR_SIZE (1 << TVR_BITS)
64#define TVN_MASK (TVN_SIZE - 1) 64#define TVN_MASK (TVN_SIZE - 1)
65#define TVR_MASK (TVR_SIZE - 1) 65#define TVR_MASK (TVR_SIZE - 1)
66#define MAX_TVAL ((unsigned long)((1ULL << (TVR_BITS + 4*TVN_BITS)) - 1))
66 67
67struct tvec { 68struct tvec {
68 struct list_head vec[TVN_SIZE]; 69 struct list_head vec[TVN_SIZE];
@@ -359,11 +360,12 @@ __internal_add_timer(struct tvec_base *base, struct timer_list *timer)
359 vec = base->tv1.vec + (base->timer_jiffies & TVR_MASK); 360 vec = base->tv1.vec + (base->timer_jiffies & TVR_MASK);
360 } else { 361 } else {
361 int i; 362 int i;
362 /* If the timeout is larger than 0xffffffff on 64-bit 363 /* If the timeout is larger than MAX_TVAL (on 64-bit
363 * architectures then we use the maximum timeout: 364 * architectures or with CONFIG_BASE_SMALL=1) then we
365 * use the maximum timeout.
364 */ 366 */
365 if (idx > 0xffffffffUL) { 367 if (idx > MAX_TVAL) {
366 idx = 0xffffffffUL; 368 idx = MAX_TVAL;
367 expires = idx + base->timer_jiffies; 369 expires = idx + base->timer_jiffies;
368 } 370 }
369 i = (expires >> (TVR_BITS + 3 * TVN_BITS)) & TVN_MASK; 371 i = (expires >> (TVR_BITS + 3 * TVN_BITS)) & TVN_MASK;