diff options
Diffstat (limited to 'kernel')
39 files changed, 4658 insertions, 513 deletions
diff --git a/kernel/audit.c b/kernel/audit.c index d9b690ac684b..76c9a11b72d6 100644 --- a/kernel/audit.c +++ b/kernel/audit.c | |||
| @@ -2,7 +2,7 @@ | |||
| 2 | * Gateway between the kernel (e.g., selinux) and the user-space audit daemon. | 2 | * Gateway between the kernel (e.g., selinux) and the user-space audit daemon. |
| 3 | * System-call specific features have moved to auditsc.c | 3 | * System-call specific features have moved to auditsc.c |
| 4 | * | 4 | * |
| 5 | * Copyright 2003-2004 Red Hat Inc., Durham, North Carolina. | 5 | * Copyright 2003-2007 Red Hat Inc., Durham, North Carolina. |
| 6 | * All Rights Reserved. | 6 | * All Rights Reserved. |
| 7 | * | 7 | * |
| 8 | * This program is free software; you can redistribute it and/or modify | 8 | * This program is free software; you can redistribute it and/or modify |
| @@ -65,7 +65,9 @@ | |||
| 65 | * (Initialization happens after skb_init is called.) */ | 65 | * (Initialization happens after skb_init is called.) */ |
| 66 | static int audit_initialized; | 66 | static int audit_initialized; |
| 67 | 67 | ||
| 68 | /* No syscall auditing will take place unless audit_enabled != 0. */ | 68 | /* 0 - no auditing |
| 69 | * 1 - auditing enabled | ||
| 70 | * 2 - auditing enabled and configuration is locked/unchangeable. */ | ||
| 69 | int audit_enabled; | 71 | int audit_enabled; |
| 70 | 72 | ||
| 71 | /* Default state when kernel boots without any parameters. */ | 73 | /* Default state when kernel boots without any parameters. */ |
| @@ -239,102 +241,150 @@ void audit_log_lost(const char *message) | |||
| 239 | 241 | ||
| 240 | static int audit_set_rate_limit(int limit, uid_t loginuid, u32 sid) | 242 | static int audit_set_rate_limit(int limit, uid_t loginuid, u32 sid) |
| 241 | { | 243 | { |
| 242 | int old = audit_rate_limit; | 244 | int res, rc = 0, old = audit_rate_limit; |
| 245 | |||
| 246 | /* check if we are locked */ | ||
| 247 | if (audit_enabled == 2) | ||
| 248 | res = 0; | ||
| 249 | else | ||
| 250 | res = 1; | ||
| 243 | 251 | ||
| 244 | if (sid) { | 252 | if (sid) { |
| 245 | char *ctx = NULL; | 253 | char *ctx = NULL; |
| 246 | u32 len; | 254 | u32 len; |
| 247 | int rc; | 255 | if ((rc = selinux_sid_to_string(sid, &ctx, &len)) == 0) { |
| 248 | if ((rc = selinux_sid_to_string(sid, &ctx, &len))) | ||
| 249 | return rc; | ||
| 250 | else | ||
| 251 | audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE, | 256 | audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE, |
| 252 | "audit_rate_limit=%d old=%d by auid=%u subj=%s", | 257 | "audit_rate_limit=%d old=%d by auid=%u" |
| 253 | limit, old, loginuid, ctx); | 258 | " subj=%s res=%d", |
| 254 | kfree(ctx); | 259 | limit, old, loginuid, ctx, res); |
| 255 | } else | 260 | kfree(ctx); |
| 256 | audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE, | 261 | } else |
| 257 | "audit_rate_limit=%d old=%d by auid=%u", | 262 | res = 0; /* Something weird, deny request */ |
| 258 | limit, old, loginuid); | 263 | } |
| 259 | audit_rate_limit = limit; | 264 | audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE, |
| 260 | return 0; | 265 | "audit_rate_limit=%d old=%d by auid=%u res=%d", |
| 266 | limit, old, loginuid, res); | ||
| 267 | |||
| 268 | /* If we are allowed, make the change */ | ||
| 269 | if (res == 1) | ||
| 270 | audit_rate_limit = limit; | ||
| 271 | /* Not allowed, update reason */ | ||
| 272 | else if (rc == 0) | ||
| 273 | rc = -EPERM; | ||
| 274 | return rc; | ||
| 261 | } | 275 | } |
| 262 | 276 | ||
| 263 | static int audit_set_backlog_limit(int limit, uid_t loginuid, u32 sid) | 277 | static int audit_set_backlog_limit(int limit, uid_t loginuid, u32 sid) |
| 264 | { | 278 | { |
| 265 | int old = audit_backlog_limit; | 279 | int res, rc = 0, old = audit_backlog_limit; |
| 280 | |||
| 281 | /* check if we are locked */ | ||
| 282 | if (audit_enabled == 2) | ||
| 283 | res = 0; | ||
| 284 | else | ||
| 285 | res = 1; | ||
| 266 | 286 | ||
| 267 | if (sid) { | 287 | if (sid) { |
| 268 | char *ctx = NULL; | 288 | char *ctx = NULL; |
| 269 | u32 len; | 289 | u32 len; |
| 270 | int rc; | 290 | if ((rc = selinux_sid_to_string(sid, &ctx, &len)) == 0) { |
| 271 | if ((rc = selinux_sid_to_string(sid, &ctx, &len))) | ||
| 272 | return rc; | ||
| 273 | else | ||
| 274 | audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE, | 291 | audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE, |
| 275 | "audit_backlog_limit=%d old=%d by auid=%u subj=%s", | 292 | "audit_backlog_limit=%d old=%d by auid=%u" |
| 276 | limit, old, loginuid, ctx); | 293 | " subj=%s res=%d", |
| 277 | kfree(ctx); | 294 | limit, old, loginuid, ctx, res); |
| 278 | } else | 295 | kfree(ctx); |
| 279 | audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE, | 296 | } else |
| 280 | "audit_backlog_limit=%d old=%d by auid=%u", | 297 | res = 0; /* Something weird, deny request */ |
| 281 | limit, old, loginuid); | 298 | } |
| 282 | audit_backlog_limit = limit; | 299 | audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE, |
| 283 | return 0; | 300 | "audit_backlog_limit=%d old=%d by auid=%u res=%d", |
| 301 | limit, old, loginuid, res); | ||
| 302 | |||
| 303 | /* If we are allowed, make the change */ | ||
| 304 | if (res == 1) | ||
| 305 | audit_backlog_limit = limit; | ||
| 306 | /* Not allowed, update reason */ | ||
| 307 | else if (rc == 0) | ||
| 308 | rc = -EPERM; | ||
| 309 | return rc; | ||
| 284 | } | 310 | } |
| 285 | 311 | ||
| 286 | static int audit_set_enabled(int state, uid_t loginuid, u32 sid) | 312 | static int audit_set_enabled(int state, uid_t loginuid, u32 sid) |
| 287 | { | 313 | { |
| 288 | int old = audit_enabled; | 314 | int res, rc = 0, old = audit_enabled; |
| 289 | 315 | ||
| 290 | if (state != 0 && state != 1) | 316 | if (state < 0 || state > 2) |
| 291 | return -EINVAL; | 317 | return -EINVAL; |
| 292 | 318 | ||
| 319 | /* check if we are locked */ | ||
| 320 | if (audit_enabled == 2) | ||
| 321 | res = 0; | ||
| 322 | else | ||
| 323 | res = 1; | ||
| 324 | |||
| 293 | if (sid) { | 325 | if (sid) { |
| 294 | char *ctx = NULL; | 326 | char *ctx = NULL; |
| 295 | u32 len; | 327 | u32 len; |
| 296 | int rc; | 328 | if ((rc = selinux_sid_to_string(sid, &ctx, &len)) == 0) { |
| 297 | if ((rc = selinux_sid_to_string(sid, &ctx, &len))) | ||
| 298 | return rc; | ||
| 299 | else | ||
| 300 | audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE, | 329 | audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE, |
| 301 | "audit_enabled=%d old=%d by auid=%u subj=%s", | 330 | "audit_enabled=%d old=%d by auid=%u" |
| 302 | state, old, loginuid, ctx); | 331 | " subj=%s res=%d", |
| 303 | kfree(ctx); | 332 | state, old, loginuid, ctx, res); |
| 304 | } else | 333 | kfree(ctx); |
| 305 | audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE, | 334 | } else |
| 306 | "audit_enabled=%d old=%d by auid=%u", | 335 | res = 0; /* Something weird, deny request */ |
| 307 | state, old, loginuid); | 336 | } |
| 308 | audit_enabled = state; | 337 | audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE, |
| 309 | return 0; | 338 | "audit_enabled=%d old=%d by auid=%u res=%d", |
| 339 | state, old, loginuid, res); | ||
| 340 | |||
| 341 | /* If we are allowed, make the change */ | ||
| 342 | if (res == 1) | ||
| 343 | audit_enabled = state; | ||
| 344 | /* Not allowed, update reason */ | ||
| 345 | else if (rc == 0) | ||
| 346 | rc = -EPERM; | ||
| 347 | return rc; | ||
| 310 | } | 348 | } |
| 311 | 349 | ||
| 312 | static int audit_set_failure(int state, uid_t loginuid, u32 sid) | 350 | static int audit_set_failure(int state, uid_t loginuid, u32 sid) |
| 313 | { | 351 | { |
| 314 | int old = audit_failure; | 352 | int res, rc = 0, old = audit_failure; |
| 315 | 353 | ||
| 316 | if (state != AUDIT_FAIL_SILENT | 354 | if (state != AUDIT_FAIL_SILENT |
| 317 | && state != AUDIT_FAIL_PRINTK | 355 | && state != AUDIT_FAIL_PRINTK |
| 318 | && state != AUDIT_FAIL_PANIC) | 356 | && state != AUDIT_FAIL_PANIC) |
| 319 | return -EINVAL; | 357 | return -EINVAL; |
| 320 | 358 | ||
| 359 | /* check if we are locked */ | ||
| 360 | if (audit_enabled == 2) | ||
| 361 | res = 0; | ||
| 362 | else | ||
| 363 | res = 1; | ||
| 364 | |||
| 321 | if (sid) { | 365 | if (sid) { |
| 322 | char *ctx = NULL; | 366 | char *ctx = NULL; |
| 323 | u32 len; | 367 | u32 len; |
| 324 | int rc; | 368 | if ((rc = selinux_sid_to_string(sid, &ctx, &len)) == 0) { |
| 325 | if ((rc = selinux_sid_to_string(sid, &ctx, &len))) | ||
| 326 | return rc; | ||
| 327 | else | ||
| 328 | audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE, | 369 | audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE, |
| 329 | "audit_failure=%d old=%d by auid=%u subj=%s", | 370 | "audit_failure=%d old=%d by auid=%u" |
| 330 | state, old, loginuid, ctx); | 371 | " subj=%s res=%d", |
| 331 | kfree(ctx); | 372 | state, old, loginuid, ctx, res); |
| 332 | } else | 373 | kfree(ctx); |
| 333 | audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE, | 374 | } else |
| 334 | "audit_failure=%d old=%d by auid=%u", | 375 | res = 0; /* Something weird, deny request */ |
| 335 | state, old, loginuid); | 376 | } |
| 336 | audit_failure = state; | 377 | audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE, |
| 337 | return 0; | 378 | "audit_failure=%d old=%d by auid=%u res=%d", |
| 379 | state, old, loginuid, res); | ||
| 380 | |||
| 381 | /* If we are allowed, make the change */ | ||
| 382 | if (res == 1) | ||
| 383 | audit_failure = state; | ||
| 384 | /* Not allowed, update reason */ | ||
| 385 | else if (rc == 0) | ||
| 386 | rc = -EPERM; | ||
| 387 | return rc; | ||
| 338 | } | 388 | } |
| 339 | 389 | ||
| 340 | static int kauditd_thread(void *dummy) | 390 | static int kauditd_thread(void *dummy) |
| @@ -599,6 +649,30 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) | |||
| 599 | case AUDIT_DEL: | 649 | case AUDIT_DEL: |
| 600 | if (nlmsg_len(nlh) < sizeof(struct audit_rule)) | 650 | if (nlmsg_len(nlh) < sizeof(struct audit_rule)) |
| 601 | return -EINVAL; | 651 | return -EINVAL; |
| 652 | if (audit_enabled == 2) { | ||
| 653 | ab = audit_log_start(NULL, GFP_KERNEL, | ||
| 654 | AUDIT_CONFIG_CHANGE); | ||
| 655 | if (ab) { | ||
| 656 | audit_log_format(ab, | ||
| 657 | "pid=%d uid=%u auid=%u", | ||
| 658 | pid, uid, loginuid); | ||
| 659 | if (sid) { | ||
| 660 | if (selinux_sid_to_string( | ||
| 661 | sid, &ctx, &len)) { | ||
| 662 | audit_log_format(ab, | ||
| 663 | " ssid=%u", sid); | ||
| 664 | /* Maybe call audit_panic? */ | ||
| 665 | } else | ||
| 666 | audit_log_format(ab, | ||
| 667 | " subj=%s", ctx); | ||
| 668 | kfree(ctx); | ||
| 669 | } | ||
| 670 | audit_log_format(ab, " audit_enabled=%d res=0", | ||
| 671 | audit_enabled); | ||
| 672 | audit_log_end(ab); | ||
| 673 | } | ||
| 674 | return -EPERM; | ||
| 675 | } | ||
| 602 | /* fallthrough */ | 676 | /* fallthrough */ |
| 603 | case AUDIT_LIST: | 677 | case AUDIT_LIST: |
| 604 | err = audit_receive_filter(nlh->nlmsg_type, NETLINK_CB(skb).pid, | 678 | err = audit_receive_filter(nlh->nlmsg_type, NETLINK_CB(skb).pid, |
| @@ -609,6 +683,30 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) | |||
| 609 | case AUDIT_DEL_RULE: | 683 | case AUDIT_DEL_RULE: |
| 610 | if (nlmsg_len(nlh) < sizeof(struct audit_rule_data)) | 684 | if (nlmsg_len(nlh) < sizeof(struct audit_rule_data)) |
| 611 | return -EINVAL; | 685 | return -EINVAL; |
| 686 | if (audit_enabled == 2) { | ||
| 687 | ab = audit_log_start(NULL, GFP_KERNEL, | ||
| 688 | AUDIT_CONFIG_CHANGE); | ||
| 689 | if (ab) { | ||
| 690 | audit_log_format(ab, | ||
| 691 | "pid=%d uid=%u auid=%u", | ||
| 692 | pid, uid, loginuid); | ||
| 693 | if (sid) { | ||
| 694 | if (selinux_sid_to_string( | ||
| 695 | sid, &ctx, &len)) { | ||
| 696 | audit_log_format(ab, | ||
| 697 | " ssid=%u", sid); | ||
| 698 | /* Maybe call audit_panic? */ | ||
| 699 | } else | ||
| 700 | audit_log_format(ab, | ||
| 701 | " subj=%s", ctx); | ||
| 702 | kfree(ctx); | ||
| 703 | } | ||
| 704 | audit_log_format(ab, " audit_enabled=%d res=0", | ||
| 705 | audit_enabled); | ||
| 706 | audit_log_end(ab); | ||
| 707 | } | ||
| 708 | return -EPERM; | ||
| 709 | } | ||
| 612 | /* fallthrough */ | 710 | /* fallthrough */ |
| 613 | case AUDIT_LIST_RULES: | 711 | case AUDIT_LIST_RULES: |
| 614 | err = audit_receive_filter(nlh->nlmsg_type, NETLINK_CB(skb).pid, | 712 | err = audit_receive_filter(nlh->nlmsg_type, NETLINK_CB(skb).pid, |
diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c index 87865f8b4ce3..3749193aed8c 100644 --- a/kernel/auditfilter.c +++ b/kernel/auditfilter.c | |||
| @@ -937,9 +937,10 @@ static void audit_update_watch(struct audit_parent *parent, | |||
| 937 | } | 937 | } |
| 938 | 938 | ||
| 939 | ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE); | 939 | ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE); |
| 940 | audit_log_format(ab, "audit updated rules specifying path="); | 940 | audit_log_format(ab, "op=updated rules specifying path="); |
| 941 | audit_log_untrustedstring(ab, owatch->path); | 941 | audit_log_untrustedstring(ab, owatch->path); |
| 942 | audit_log_format(ab, " with dev=%u ino=%lu\n", dev, ino); | 942 | audit_log_format(ab, " with dev=%u ino=%lu\n", dev, ino); |
| 943 | audit_log_format(ab, " list=%d res=1", r->listnr); | ||
| 943 | audit_log_end(ab); | 944 | audit_log_end(ab); |
| 944 | 945 | ||
| 945 | audit_remove_watch(owatch); | 946 | audit_remove_watch(owatch); |
| @@ -969,14 +970,14 @@ static void audit_remove_parent_watches(struct audit_parent *parent) | |||
| 969 | e = container_of(r, struct audit_entry, rule); | 970 | e = container_of(r, struct audit_entry, rule); |
| 970 | 971 | ||
| 971 | ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE); | 972 | ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE); |
| 972 | audit_log_format(ab, "audit implicitly removed rule path="); | 973 | audit_log_format(ab, "op=remove rule path="); |
| 973 | audit_log_untrustedstring(ab, w->path); | 974 | audit_log_untrustedstring(ab, w->path); |
| 974 | if (r->filterkey) { | 975 | if (r->filterkey) { |
| 975 | audit_log_format(ab, " key="); | 976 | audit_log_format(ab, " key="); |
| 976 | audit_log_untrustedstring(ab, r->filterkey); | 977 | audit_log_untrustedstring(ab, r->filterkey); |
| 977 | } else | 978 | } else |
| 978 | audit_log_format(ab, " key=(null)"); | 979 | audit_log_format(ab, " key=(null)"); |
| 979 | audit_log_format(ab, " list=%d", r->listnr); | 980 | audit_log_format(ab, " list=%d res=1", r->listnr); |
| 980 | audit_log_end(ab); | 981 | audit_log_end(ab); |
| 981 | 982 | ||
| 982 | list_del(&r->rlist); | 983 | list_del(&r->rlist); |
| @@ -1410,7 +1411,7 @@ static void audit_log_rule_change(uid_t loginuid, u32 sid, char *action, | |||
| 1410 | audit_log_format(ab, " subj=%s", ctx); | 1411 | audit_log_format(ab, " subj=%s", ctx); |
| 1411 | kfree(ctx); | 1412 | kfree(ctx); |
| 1412 | } | 1413 | } |
| 1413 | audit_log_format(ab, " %s rule key=", action); | 1414 | audit_log_format(ab, " op=%s rule key=", action); |
| 1414 | if (rule->filterkey) | 1415 | if (rule->filterkey) |
| 1415 | audit_log_untrustedstring(ab, rule->filterkey); | 1416 | audit_log_untrustedstring(ab, rule->filterkey); |
| 1416 | else | 1417 | else |
diff --git a/kernel/auditsc.c b/kernel/auditsc.c index 298897559ca4..359955800dd2 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c | |||
| @@ -170,6 +170,11 @@ struct audit_aux_data_sockaddr { | |||
| 170 | char a[0]; | 170 | char a[0]; |
| 171 | }; | 171 | }; |
| 172 | 172 | ||
| 173 | struct audit_aux_data_fd_pair { | ||
| 174 | struct audit_aux_data d; | ||
| 175 | int fd[2]; | ||
| 176 | }; | ||
| 177 | |||
| 173 | struct audit_aux_data_path { | 178 | struct audit_aux_data_path { |
| 174 | struct audit_aux_data d; | 179 | struct audit_aux_data d; |
| 175 | struct dentry *dentry; | 180 | struct dentry *dentry; |
| @@ -961,6 +966,11 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts | |||
| 961 | audit_log_d_path(ab, "path=", axi->dentry, axi->mnt); | 966 | audit_log_d_path(ab, "path=", axi->dentry, axi->mnt); |
| 962 | break; } | 967 | break; } |
| 963 | 968 | ||
| 969 | case AUDIT_FD_PAIR: { | ||
| 970 | struct audit_aux_data_fd_pair *axs = (void *)aux; | ||
| 971 | audit_log_format(ab, "fd0=%d fd1=%d", axs->fd[0], axs->fd[1]); | ||
| 972 | break; } | ||
| 973 | |||
| 964 | } | 974 | } |
| 965 | audit_log_end(ab); | 975 | audit_log_end(ab); |
| 966 | } | 976 | } |
| @@ -1815,6 +1825,36 @@ int audit_socketcall(int nargs, unsigned long *args) | |||
| 1815 | } | 1825 | } |
| 1816 | 1826 | ||
| 1817 | /** | 1827 | /** |
| 1828 | * __audit_fd_pair - record audit data for pipe and socketpair | ||
| 1829 | * @fd1: the first file descriptor | ||
| 1830 | * @fd2: the second file descriptor | ||
| 1831 | * | ||
| 1832 | * Returns 0 for success or NULL context or < 0 on error. | ||
| 1833 | */ | ||
| 1834 | int __audit_fd_pair(int fd1, int fd2) | ||
| 1835 | { | ||
| 1836 | struct audit_context *context = current->audit_context; | ||
| 1837 | struct audit_aux_data_fd_pair *ax; | ||
| 1838 | |||
| 1839 | if (likely(!context)) { | ||
| 1840 | return 0; | ||
| 1841 | } | ||
| 1842 | |||
| 1843 | ax = kmalloc(sizeof(*ax), GFP_KERNEL); | ||
| 1844 | if (!ax) { | ||
| 1845 | return -ENOMEM; | ||
| 1846 | } | ||
| 1847 | |||
| 1848 | ax->fd[0] = fd1; | ||
| 1849 | ax->fd[1] = fd2; | ||
| 1850 | |||
| 1851 | ax->d.type = AUDIT_FD_PAIR; | ||
| 1852 | ax->d.next = context->aux; | ||
| 1853 | context->aux = (void *)ax; | ||
| 1854 | return 0; | ||
| 1855 | } | ||
| 1856 | |||
| 1857 | /** | ||
| 1818 | * audit_sockaddr - record audit data for sys_bind, sys_connect, sys_sendto | 1858 | * audit_sockaddr - record audit data for sys_bind, sys_connect, sys_sendto |
| 1819 | * @len: data length in user space | 1859 | * @len: data length in user space |
| 1820 | * @a: data address in kernel space | 1860 | * @a: data address in kernel space |
diff --git a/kernel/fork.c b/kernel/fork.c index 0b6293d94d96..d154cc786489 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
| @@ -858,7 +858,7 @@ static inline int copy_signal(unsigned long clone_flags, struct task_struct * ts | |||
| 858 | init_sigpending(&sig->shared_pending); | 858 | init_sigpending(&sig->shared_pending); |
| 859 | INIT_LIST_HEAD(&sig->posix_timers); | 859 | INIT_LIST_HEAD(&sig->posix_timers); |
| 860 | 860 | ||
| 861 | hrtimer_init(&sig->real_timer, CLOCK_MONOTONIC, HRTIMER_REL); | 861 | hrtimer_init(&sig->real_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); |
| 862 | sig->it_real_incr.tv64 = 0; | 862 | sig->it_real_incr.tv64 = 0; |
| 863 | sig->real_timer.function = it_real_fn; | 863 | sig->real_timer.function = it_real_fn; |
| 864 | sig->tsk = tsk; | 864 | sig->tsk = tsk; |
diff --git a/kernel/futex.c b/kernel/futex.c index 5a737de857d3..e749e7df14b1 100644 --- a/kernel/futex.c +++ b/kernel/futex.c | |||
| @@ -1134,7 +1134,7 @@ static int futex_lock_pi(u32 __user *uaddr, int detect, unsigned long sec, | |||
| 1134 | 1134 | ||
| 1135 | if (sec != MAX_SCHEDULE_TIMEOUT) { | 1135 | if (sec != MAX_SCHEDULE_TIMEOUT) { |
| 1136 | to = &timeout; | 1136 | to = &timeout; |
| 1137 | hrtimer_init(&to->timer, CLOCK_REALTIME, HRTIMER_ABS); | 1137 | hrtimer_init(&to->timer, CLOCK_REALTIME, HRTIMER_MODE_ABS); |
| 1138 | hrtimer_init_sleeper(to, current); | 1138 | hrtimer_init_sleeper(to, current); |
| 1139 | to->timer.expires = ktime_set(sec, nsec); | 1139 | to->timer.expires = ktime_set(sec, nsec); |
| 1140 | } | 1140 | } |
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index f44e499e8fca..476cb0c0b4a4 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c | |||
| @@ -1,8 +1,9 @@ | |||
| 1 | /* | 1 | /* |
| 2 | * linux/kernel/hrtimer.c | 2 | * linux/kernel/hrtimer.c |
| 3 | * | 3 | * |
| 4 | * Copyright(C) 2005, Thomas Gleixner <tglx@linutronix.de> | 4 | * Copyright(C) 2005-2006, Thomas Gleixner <tglx@linutronix.de> |
| 5 | * Copyright(C) 2005, Red Hat, Inc., Ingo Molnar | 5 | * Copyright(C) 2005-2007, Red Hat, Inc., Ingo Molnar |
| 6 | * Copyright(C) 2006-2007 Timesys Corp., Thomas Gleixner | ||
| 6 | * | 7 | * |
| 7 | * High-resolution kernel timers | 8 | * High-resolution kernel timers |
| 8 | * | 9 | * |
| @@ -31,12 +32,17 @@ | |||
| 31 | */ | 32 | */ |
| 32 | 33 | ||
| 33 | #include <linux/cpu.h> | 34 | #include <linux/cpu.h> |
| 35 | #include <linux/irq.h> | ||
| 34 | #include <linux/module.h> | 36 | #include <linux/module.h> |
| 35 | #include <linux/percpu.h> | 37 | #include <linux/percpu.h> |
| 36 | #include <linux/hrtimer.h> | 38 | #include <linux/hrtimer.h> |
| 37 | #include <linux/notifier.h> | 39 | #include <linux/notifier.h> |
| 38 | #include <linux/syscalls.h> | 40 | #include <linux/syscalls.h> |
| 41 | #include <linux/kallsyms.h> | ||
| 39 | #include <linux/interrupt.h> | 42 | #include <linux/interrupt.h> |
| 43 | #include <linux/tick.h> | ||
| 44 | #include <linux/seq_file.h> | ||
| 45 | #include <linux/err.h> | ||
| 40 | 46 | ||
| 41 | #include <asm/uaccess.h> | 47 | #include <asm/uaccess.h> |
| 42 | 48 | ||
| @@ -45,7 +51,7 @@ | |||
| 45 | * | 51 | * |
| 46 | * returns the time in ktime_t format | 52 | * returns the time in ktime_t format |
| 47 | */ | 53 | */ |
| 48 | static ktime_t ktime_get(void) | 54 | ktime_t ktime_get(void) |
| 49 | { | 55 | { |
| 50 | struct timespec now; | 56 | struct timespec now; |
| 51 | 57 | ||
| @@ -59,7 +65,7 @@ static ktime_t ktime_get(void) | |||
| 59 | * | 65 | * |
| 60 | * returns the time in ktime_t format | 66 | * returns the time in ktime_t format |
| 61 | */ | 67 | */ |
| 62 | static ktime_t ktime_get_real(void) | 68 | ktime_t ktime_get_real(void) |
| 63 | { | 69 | { |
| 64 | struct timespec now; | 70 | struct timespec now; |
| 65 | 71 | ||
| @@ -79,21 +85,22 @@ EXPORT_SYMBOL_GPL(ktime_get_real); | |||
| 79 | * This ensures that we capture erroneous accesses to these clock ids | 85 | * This ensures that we capture erroneous accesses to these clock ids |
| 80 | * rather than moving them into the range of valid clock id's. | 86 | * rather than moving them into the range of valid clock id's. |
| 81 | */ | 87 | */ |
| 82 | 88 | DEFINE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases) = | |
| 83 | #define MAX_HRTIMER_BASES 2 | ||
| 84 | |||
| 85 | static DEFINE_PER_CPU(struct hrtimer_base, hrtimer_bases[MAX_HRTIMER_BASES]) = | ||
| 86 | { | 89 | { |
| 90 | |||
| 91 | .clock_base = | ||
| 87 | { | 92 | { |
| 88 | .index = CLOCK_REALTIME, | 93 | { |
| 89 | .get_time = &ktime_get_real, | 94 | .index = CLOCK_REALTIME, |
| 90 | .resolution = KTIME_REALTIME_RES, | 95 | .get_time = &ktime_get_real, |
| 91 | }, | 96 | .resolution = KTIME_LOW_RES, |
| 92 | { | 97 | }, |
| 93 | .index = CLOCK_MONOTONIC, | 98 | { |
| 94 | .get_time = &ktime_get, | 99 | .index = CLOCK_MONOTONIC, |
| 95 | .resolution = KTIME_MONOTONIC_RES, | 100 | .get_time = &ktime_get, |
| 96 | }, | 101 | .resolution = KTIME_LOW_RES, |
| 102 | }, | ||
| 103 | } | ||
| 97 | }; | 104 | }; |
| 98 | 105 | ||
| 99 | /** | 106 | /** |
| @@ -125,20 +132,35 @@ EXPORT_SYMBOL_GPL(ktime_get_ts); | |||
| 125 | * Get the coarse grained time at the softirq based on xtime and | 132 | * Get the coarse grained time at the softirq based on xtime and |
| 126 | * wall_to_monotonic. | 133 | * wall_to_monotonic. |
| 127 | */ | 134 | */ |
| 128 | static void hrtimer_get_softirq_time(struct hrtimer_base *base) | 135 | static void hrtimer_get_softirq_time(struct hrtimer_cpu_base *base) |
| 129 | { | 136 | { |
| 130 | ktime_t xtim, tomono; | 137 | ktime_t xtim, tomono; |
| 138 | struct timespec xts; | ||
| 131 | unsigned long seq; | 139 | unsigned long seq; |
| 132 | 140 | ||
| 133 | do { | 141 | do { |
| 134 | seq = read_seqbegin(&xtime_lock); | 142 | seq = read_seqbegin(&xtime_lock); |
| 135 | xtim = timespec_to_ktime(xtime); | 143 | #ifdef CONFIG_NO_HZ |
| 136 | tomono = timespec_to_ktime(wall_to_monotonic); | 144 | getnstimeofday(&xts); |
| 137 | 145 | #else | |
| 146 | xts = xtime; | ||
| 147 | #endif | ||
| 138 | } while (read_seqretry(&xtime_lock, seq)); | 148 | } while (read_seqretry(&xtime_lock, seq)); |
| 139 | 149 | ||
| 140 | base[CLOCK_REALTIME].softirq_time = xtim; | 150 | xtim = timespec_to_ktime(xts); |
| 141 | base[CLOCK_MONOTONIC].softirq_time = ktime_add(xtim, tomono); | 151 | tomono = timespec_to_ktime(wall_to_monotonic); |
| 152 | base->clock_base[CLOCK_REALTIME].softirq_time = xtim; | ||
| 153 | base->clock_base[CLOCK_MONOTONIC].softirq_time = | ||
| 154 | ktime_add(xtim, tomono); | ||
| 155 | } | ||
| 156 | |||
| 157 | /* | ||
| 158 | * Helper function to check, whether the timer is running the callback | ||
| 159 | * function | ||
| 160 | */ | ||
| 161 | static inline int hrtimer_callback_running(struct hrtimer *timer) | ||
| 162 | { | ||
| 163 | return timer->state & HRTIMER_STATE_CALLBACK; | ||
| 142 | } | 164 | } |
| 143 | 165 | ||
| 144 | /* | 166 | /* |
| @@ -147,8 +169,6 @@ static void hrtimer_get_softirq_time(struct hrtimer_base *base) | |||
| 147 | */ | 169 | */ |
| 148 | #ifdef CONFIG_SMP | 170 | #ifdef CONFIG_SMP |
| 149 | 171 | ||
| 150 | #define set_curr_timer(b, t) do { (b)->curr_timer = (t); } while (0) | ||
| 151 | |||
| 152 | /* | 172 | /* |
| 153 | * We are using hashed locking: holding per_cpu(hrtimer_bases)[n].lock | 173 | * We are using hashed locking: holding per_cpu(hrtimer_bases)[n].lock |
| 154 | * means that all timers which are tied to this base via timer->base are | 174 | * means that all timers which are tied to this base via timer->base are |
| @@ -161,19 +181,20 @@ static void hrtimer_get_softirq_time(struct hrtimer_base *base) | |||
| 161 | * possible to set timer->base = NULL and drop the lock: the timer remains | 181 | * possible to set timer->base = NULL and drop the lock: the timer remains |
| 162 | * locked. | 182 | * locked. |
| 163 | */ | 183 | */ |
| 164 | static struct hrtimer_base *lock_hrtimer_base(const struct hrtimer *timer, | 184 | static |
| 165 | unsigned long *flags) | 185 | struct hrtimer_clock_base *lock_hrtimer_base(const struct hrtimer *timer, |
| 186 | unsigned long *flags) | ||
| 166 | { | 187 | { |
| 167 | struct hrtimer_base *base; | 188 | struct hrtimer_clock_base *base; |
| 168 | 189 | ||
| 169 | for (;;) { | 190 | for (;;) { |
| 170 | base = timer->base; | 191 | base = timer->base; |
| 171 | if (likely(base != NULL)) { | 192 | if (likely(base != NULL)) { |
| 172 | spin_lock_irqsave(&base->lock, *flags); | 193 | spin_lock_irqsave(&base->cpu_base->lock, *flags); |
| 173 | if (likely(base == timer->base)) | 194 | if (likely(base == timer->base)) |
| 174 | return base; | 195 | return base; |
| 175 | /* The timer has migrated to another CPU: */ | 196 | /* The timer has migrated to another CPU: */ |
| 176 | spin_unlock_irqrestore(&base->lock, *flags); | 197 | spin_unlock_irqrestore(&base->cpu_base->lock, *flags); |
| 177 | } | 198 | } |
| 178 | cpu_relax(); | 199 | cpu_relax(); |
| 179 | } | 200 | } |
| @@ -182,12 +203,14 @@ static struct hrtimer_base *lock_hrtimer_base(const struct hrtimer *timer, | |||
| 182 | /* | 203 | /* |
| 183 | * Switch the timer base to the current CPU when possible. | 204 | * Switch the timer base to the current CPU when possible. |
| 184 | */ | 205 | */ |
| 185 | static inline struct hrtimer_base * | 206 | static inline struct hrtimer_clock_base * |
| 186 | switch_hrtimer_base(struct hrtimer *timer, struct hrtimer_base *base) | 207 | switch_hrtimer_base(struct hrtimer *timer, struct hrtimer_clock_base *base) |
| 187 | { | 208 | { |
| 188 | struct hrtimer_base *new_base; | 209 | struct hrtimer_clock_base *new_base; |
| 210 | struct hrtimer_cpu_base *new_cpu_base; | ||
| 189 | 211 | ||
| 190 | new_base = &__get_cpu_var(hrtimer_bases)[base->index]; | 212 | new_cpu_base = &__get_cpu_var(hrtimer_bases); |
| 213 | new_base = &new_cpu_base->clock_base[base->index]; | ||
| 191 | 214 | ||
| 192 | if (base != new_base) { | 215 | if (base != new_base) { |
| 193 | /* | 216 | /* |
| @@ -199,13 +222,13 @@ switch_hrtimer_base(struct hrtimer *timer, struct hrtimer_base *base) | |||
| 199 | * completed. There is no conflict as we hold the lock until | 222 | * completed. There is no conflict as we hold the lock until |
| 200 | * the timer is enqueued. | 223 | * the timer is enqueued. |
| 201 | */ | 224 | */ |
| 202 | if (unlikely(base->curr_timer == timer)) | 225 | if (unlikely(hrtimer_callback_running(timer))) |
| 203 | return base; | 226 | return base; |
| 204 | 227 | ||
| 205 | /* See the comment in lock_timer_base() */ | 228 | /* See the comment in lock_timer_base() */ |
| 206 | timer->base = NULL; | 229 | timer->base = NULL; |
| 207 | spin_unlock(&base->lock); | 230 | spin_unlock(&base->cpu_base->lock); |
| 208 | spin_lock(&new_base->lock); | 231 | spin_lock(&new_base->cpu_base->lock); |
| 209 | timer->base = new_base; | 232 | timer->base = new_base; |
| 210 | } | 233 | } |
| 211 | return new_base; | 234 | return new_base; |
| @@ -213,19 +236,17 @@ switch_hrtimer_base(struct hrtimer *timer, struct hrtimer_base *base) | |||
| 213 | 236 | ||
| 214 | #else /* CONFIG_SMP */ | 237 | #else /* CONFIG_SMP */ |
| 215 | 238 | ||
| 216 | #define set_curr_timer(b, t) do { } while (0) | 239 | static inline struct hrtimer_clock_base * |
| 217 | |||
| 218 | static inline struct hrtimer_base * | ||
| 219 | lock_hrtimer_base(const struct hrtimer *timer, unsigned long *flags) | 240 | lock_hrtimer_base(const struct hrtimer *timer, unsigned long *flags) |
| 220 | { | 241 | { |
| 221 | struct hrtimer_base *base = timer->base; | 242 | struct hrtimer_clock_base *base = timer->base; |
| 222 | 243 | ||
| 223 | spin_lock_irqsave(&base->lock, *flags); | 244 | spin_lock_irqsave(&base->cpu_base->lock, *flags); |
| 224 | 245 | ||
| 225 | return base; | 246 | return base; |
| 226 | } | 247 | } |
| 227 | 248 | ||
| 228 | #define switch_hrtimer_base(t, b) (b) | 249 | # define switch_hrtimer_base(t, b) (b) |
| 229 | 250 | ||
| 230 | #endif /* !CONFIG_SMP */ | 251 | #endif /* !CONFIG_SMP */ |
| 231 | 252 | ||
| @@ -256,15 +277,12 @@ ktime_t ktime_add_ns(const ktime_t kt, u64 nsec) | |||
| 256 | 277 | ||
| 257 | return ktime_add(kt, tmp); | 278 | return ktime_add(kt, tmp); |
| 258 | } | 279 | } |
| 259 | |||
| 260 | #else /* CONFIG_KTIME_SCALAR */ | ||
| 261 | |||
| 262 | # endif /* !CONFIG_KTIME_SCALAR */ | 280 | # endif /* !CONFIG_KTIME_SCALAR */ |
| 263 | 281 | ||
| 264 | /* | 282 | /* |
| 265 | * Divide a ktime value by a nanosecond value | 283 | * Divide a ktime value by a nanosecond value |
| 266 | */ | 284 | */ |
| 267 | static unsigned long ktime_divns(const ktime_t kt, s64 div) | 285 | unsigned long ktime_divns(const ktime_t kt, s64 div) |
| 268 | { | 286 | { |
| 269 | u64 dclc, inc, dns; | 287 | u64 dclc, inc, dns; |
| 270 | int sft = 0; | 288 | int sft = 0; |
| @@ -281,18 +299,311 @@ static unsigned long ktime_divns(const ktime_t kt, s64 div) | |||
| 281 | 299 | ||
| 282 | return (unsigned long) dclc; | 300 | return (unsigned long) dclc; |
| 283 | } | 301 | } |
| 284 | |||
| 285 | #else /* BITS_PER_LONG < 64 */ | ||
| 286 | # define ktime_divns(kt, div) (unsigned long)((kt).tv64 / (div)) | ||
| 287 | #endif /* BITS_PER_LONG >= 64 */ | 302 | #endif /* BITS_PER_LONG >= 64 */ |
| 288 | 303 | ||
| 304 | /* High resolution timer related functions */ | ||
| 305 | #ifdef CONFIG_HIGH_RES_TIMERS | ||
| 306 | |||
| 307 | /* | ||
| 308 | * High resolution timer enabled ? | ||
| 309 | */ | ||
| 310 | static int hrtimer_hres_enabled __read_mostly = 1; | ||
| 311 | |||
| 312 | /* | ||
| 313 | * Enable / Disable high resolution mode | ||
| 314 | */ | ||
| 315 | static int __init setup_hrtimer_hres(char *str) | ||
| 316 | { | ||
| 317 | if (!strcmp(str, "off")) | ||
| 318 | hrtimer_hres_enabled = 0; | ||
| 319 | else if (!strcmp(str, "on")) | ||
| 320 | hrtimer_hres_enabled = 1; | ||
| 321 | else | ||
| 322 | return 0; | ||
| 323 | return 1; | ||
| 324 | } | ||
| 325 | |||
| 326 | __setup("highres=", setup_hrtimer_hres); | ||
| 327 | |||
| 328 | /* | ||
| 329 | * hrtimer_high_res_enabled - query, if the highres mode is enabled | ||
| 330 | */ | ||
| 331 | static inline int hrtimer_is_hres_enabled(void) | ||
| 332 | { | ||
| 333 | return hrtimer_hres_enabled; | ||
| 334 | } | ||
| 335 | |||
| 336 | /* | ||
| 337 | * Is the high resolution mode active ? | ||
| 338 | */ | ||
| 339 | static inline int hrtimer_hres_active(void) | ||
| 340 | { | ||
| 341 | return __get_cpu_var(hrtimer_bases).hres_active; | ||
| 342 | } | ||
| 343 | |||
| 344 | /* | ||
| 345 | * Reprogram the event source with checking both queues for the | ||
| 346 | * next event | ||
| 347 | * Called with interrupts disabled and base->lock held | ||
| 348 | */ | ||
| 349 | static void hrtimer_force_reprogram(struct hrtimer_cpu_base *cpu_base) | ||
| 350 | { | ||
| 351 | int i; | ||
| 352 | struct hrtimer_clock_base *base = cpu_base->clock_base; | ||
| 353 | ktime_t expires; | ||
| 354 | |||
| 355 | cpu_base->expires_next.tv64 = KTIME_MAX; | ||
| 356 | |||
| 357 | for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++, base++) { | ||
| 358 | struct hrtimer *timer; | ||
| 359 | |||
| 360 | if (!base->first) | ||
| 361 | continue; | ||
| 362 | timer = rb_entry(base->first, struct hrtimer, node); | ||
| 363 | expires = ktime_sub(timer->expires, base->offset); | ||
| 364 | if (expires.tv64 < cpu_base->expires_next.tv64) | ||
| 365 | cpu_base->expires_next = expires; | ||
| 366 | } | ||
| 367 | |||
| 368 | if (cpu_base->expires_next.tv64 != KTIME_MAX) | ||
| 369 | tick_program_event(cpu_base->expires_next, 1); | ||
| 370 | } | ||
| 371 | |||
| 372 | /* | ||
| 373 | * Shared reprogramming for clock_realtime and clock_monotonic | ||
| 374 | * | ||
| 375 | * When a timer is enqueued and expires earlier than the already enqueued | ||
| 376 | * timers, we have to check, whether it expires earlier than the timer for | ||
| 377 | * which the clock event device was armed. | ||
| 378 | * | ||
| 379 | * Called with interrupts disabled and base->cpu_base.lock held | ||
| 380 | */ | ||
| 381 | static int hrtimer_reprogram(struct hrtimer *timer, | ||
| 382 | struct hrtimer_clock_base *base) | ||
| 383 | { | ||
| 384 | ktime_t *expires_next = &__get_cpu_var(hrtimer_bases).expires_next; | ||
| 385 | ktime_t expires = ktime_sub(timer->expires, base->offset); | ||
| 386 | int res; | ||
| 387 | |||
| 388 | /* | ||
| 389 | * When the callback is running, we do not reprogram the clock event | ||
| 390 | * device. The timer callback is either running on a different CPU or | ||
| 391 | * the callback is executed in the hrtimer_interupt context. The | ||
| 392 | * reprogramming is handled either by the softirq, which called the | ||
| 393 | * callback or at the end of the hrtimer_interrupt. | ||
| 394 | */ | ||
| 395 | if (hrtimer_callback_running(timer)) | ||
| 396 | return 0; | ||
| 397 | |||
| 398 | if (expires.tv64 >= expires_next->tv64) | ||
| 399 | return 0; | ||
| 400 | |||
| 401 | /* | ||
| 402 | * Clockevents returns -ETIME, when the event was in the past. | ||
| 403 | */ | ||
| 404 | res = tick_program_event(expires, 0); | ||
| 405 | if (!IS_ERR_VALUE(res)) | ||
| 406 | *expires_next = expires; | ||
| 407 | return res; | ||
| 408 | } | ||
| 409 | |||
| 410 | |||
| 411 | /* | ||
| 412 | * Retrigger next event is called after clock was set | ||
| 413 | * | ||
| 414 | * Called with interrupts disabled via on_each_cpu() | ||
| 415 | */ | ||
| 416 | static void retrigger_next_event(void *arg) | ||
| 417 | { | ||
| 418 | struct hrtimer_cpu_base *base; | ||
| 419 | struct timespec realtime_offset; | ||
| 420 | unsigned long seq; | ||
| 421 | |||
| 422 | if (!hrtimer_hres_active()) | ||
| 423 | return; | ||
| 424 | |||
| 425 | do { | ||
| 426 | seq = read_seqbegin(&xtime_lock); | ||
| 427 | set_normalized_timespec(&realtime_offset, | ||
| 428 | -wall_to_monotonic.tv_sec, | ||
| 429 | -wall_to_monotonic.tv_nsec); | ||
| 430 | } while (read_seqretry(&xtime_lock, seq)); | ||
| 431 | |||
| 432 | base = &__get_cpu_var(hrtimer_bases); | ||
| 433 | |||
| 434 | /* Adjust CLOCK_REALTIME offset */ | ||
| 435 | spin_lock(&base->lock); | ||
| 436 | base->clock_base[CLOCK_REALTIME].offset = | ||
| 437 | timespec_to_ktime(realtime_offset); | ||
| 438 | |||
| 439 | hrtimer_force_reprogram(base); | ||
| 440 | spin_unlock(&base->lock); | ||
| 441 | } | ||
| 442 | |||
| 443 | /* | ||
| 444 | * Clock realtime was set | ||
| 445 | * | ||
| 446 | * Change the offset of the realtime clock vs. the monotonic | ||
| 447 | * clock. | ||
| 448 | * | ||
| 449 | * We might have to reprogram the high resolution timer interrupt. On | ||
| 450 | * SMP we call the architecture specific code to retrigger _all_ high | ||
| 451 | * resolution timer interrupts. On UP we just disable interrupts and | ||
| 452 | * call the high resolution interrupt code. | ||
| 453 | */ | ||
| 454 | void clock_was_set(void) | ||
| 455 | { | ||
| 456 | /* Retrigger the CPU local events everywhere */ | ||
| 457 | on_each_cpu(retrigger_next_event, NULL, 0, 1); | ||
| 458 | } | ||
| 459 | |||
| 460 | /* | ||
| 461 | * Check, whether the timer is on the callback pending list | ||
| 462 | */ | ||
| 463 | static inline int hrtimer_cb_pending(const struct hrtimer *timer) | ||
| 464 | { | ||
| 465 | return timer->state & HRTIMER_STATE_PENDING; | ||
| 466 | } | ||
| 467 | |||
| 468 | /* | ||
| 469 | * Remove a timer from the callback pending list | ||
| 470 | */ | ||
| 471 | static inline void hrtimer_remove_cb_pending(struct hrtimer *timer) | ||
| 472 | { | ||
| 473 | list_del_init(&timer->cb_entry); | ||
| 474 | } | ||
| 475 | |||
| 476 | /* | ||
| 477 | * Initialize the high resolution related parts of cpu_base | ||
| 478 | */ | ||
| 479 | static inline void hrtimer_init_hres(struct hrtimer_cpu_base *base) | ||
| 480 | { | ||
| 481 | base->expires_next.tv64 = KTIME_MAX; | ||
| 482 | base->hres_active = 0; | ||
| 483 | INIT_LIST_HEAD(&base->cb_pending); | ||
| 484 | } | ||
| 485 | |||
| 486 | /* | ||
| 487 | * Initialize the high resolution related parts of a hrtimer | ||
| 488 | */ | ||
| 489 | static inline void hrtimer_init_timer_hres(struct hrtimer *timer) | ||
| 490 | { | ||
| 491 | INIT_LIST_HEAD(&timer->cb_entry); | ||
| 492 | } | ||
| 493 | |||
| 494 | /* | ||
| 495 | * When High resolution timers are active, try to reprogram. Note, that in case | ||
| 496 | * the state has HRTIMER_STATE_CALLBACK set, no reprogramming and no expiry | ||
| 497 | * check happens. The timer gets enqueued into the rbtree. The reprogramming | ||
| 498 | * and expiry check is done in the hrtimer_interrupt or in the softirq. | ||
| 499 | */ | ||
| 500 | static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer, | ||
| 501 | struct hrtimer_clock_base *base) | ||
| 502 | { | ||
| 503 | if (base->cpu_base->hres_active && hrtimer_reprogram(timer, base)) { | ||
| 504 | |||
| 505 | /* Timer is expired, act upon the callback mode */ | ||
| 506 | switch(timer->cb_mode) { | ||
| 507 | case HRTIMER_CB_IRQSAFE_NO_RESTART: | ||
| 508 | /* | ||
| 509 | * We can call the callback from here. No restart | ||
| 510 | * happens, so no danger of recursion | ||
| 511 | */ | ||
| 512 | BUG_ON(timer->function(timer) != HRTIMER_NORESTART); | ||
| 513 | return 1; | ||
| 514 | case HRTIMER_CB_IRQSAFE_NO_SOFTIRQ: | ||
| 515 | /* | ||
| 516 | * This is solely for the sched tick emulation with | ||
| 517 | * dynamic tick support to ensure that we do not | ||
| 518 | * restart the tick right on the edge and end up with | ||
| 519 | * the tick timer in the softirq ! The calling site | ||
| 520 | * takes care of this. | ||
| 521 | */ | ||
| 522 | return 1; | ||
| 523 | case HRTIMER_CB_IRQSAFE: | ||
| 524 | case HRTIMER_CB_SOFTIRQ: | ||
| 525 | /* | ||
| 526 | * Move everything else into the softirq pending list ! | ||
| 527 | */ | ||
| 528 | list_add_tail(&timer->cb_entry, | ||
| 529 | &base->cpu_base->cb_pending); | ||
| 530 | timer->state = HRTIMER_STATE_PENDING; | ||
| 531 | raise_softirq(HRTIMER_SOFTIRQ); | ||
| 532 | return 1; | ||
| 533 | default: | ||
| 534 | BUG(); | ||
| 535 | } | ||
| 536 | } | ||
| 537 | return 0; | ||
| 538 | } | ||
| 539 | |||
| 540 | /* | ||
| 541 | * Switch to high resolution mode | ||
| 542 | */ | ||
| 543 | static void hrtimer_switch_to_hres(void) | ||
| 544 | { | ||
| 545 | struct hrtimer_cpu_base *base = &__get_cpu_var(hrtimer_bases); | ||
| 546 | unsigned long flags; | ||
| 547 | |||
| 548 | if (base->hres_active) | ||
| 549 | return; | ||
| 550 | |||
| 551 | local_irq_save(flags); | ||
| 552 | |||
| 553 | if (tick_init_highres()) { | ||
| 554 | local_irq_restore(flags); | ||
| 555 | return; | ||
| 556 | } | ||
| 557 | base->hres_active = 1; | ||
| 558 | base->clock_base[CLOCK_REALTIME].resolution = KTIME_HIGH_RES; | ||
| 559 | base->clock_base[CLOCK_MONOTONIC].resolution = KTIME_HIGH_RES; | ||
| 560 | |||
| 561 | tick_setup_sched_timer(); | ||
| 562 | |||
| 563 | /* "Retrigger" the interrupt to get things going */ | ||
| 564 | retrigger_next_event(NULL); | ||
| 565 | local_irq_restore(flags); | ||
| 566 | printk(KERN_INFO "Switched to high resolution mode on CPU %d\n", | ||
| 567 | smp_processor_id()); | ||
| 568 | } | ||
| 569 | |||
| 570 | #else | ||
| 571 | |||
| 572 | static inline int hrtimer_hres_active(void) { return 0; } | ||
| 573 | static inline int hrtimer_is_hres_enabled(void) { return 0; } | ||
| 574 | static inline void hrtimer_switch_to_hres(void) { } | ||
| 575 | static inline void hrtimer_force_reprogram(struct hrtimer_cpu_base *base) { } | ||
| 576 | static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer, | ||
| 577 | struct hrtimer_clock_base *base) | ||
| 578 | { | ||
| 579 | return 0; | ||
| 580 | } | ||
| 581 | static inline int hrtimer_cb_pending(struct hrtimer *timer) { return 0; } | ||
| 582 | static inline void hrtimer_remove_cb_pending(struct hrtimer *timer) { } | ||
| 583 | static inline void hrtimer_init_hres(struct hrtimer_cpu_base *base) { } | ||
| 584 | static inline void hrtimer_init_timer_hres(struct hrtimer *timer) { } | ||
| 585 | |||
| 586 | #endif /* CONFIG_HIGH_RES_TIMERS */ | ||
| 587 | |||
| 588 | #ifdef CONFIG_TIMER_STATS | ||
| 589 | void __timer_stats_hrtimer_set_start_info(struct hrtimer *timer, void *addr) | ||
| 590 | { | ||
| 591 | if (timer->start_site) | ||
| 592 | return; | ||
| 593 | |||
| 594 | timer->start_site = addr; | ||
| 595 | memcpy(timer->start_comm, current->comm, TASK_COMM_LEN); | ||
| 596 | timer->start_pid = current->pid; | ||
| 597 | } | ||
| 598 | #endif | ||
| 599 | |||
| 289 | /* | 600 | /* |
| 290 | * Counterpart to lock_timer_base above: | 601 | * Counterpart to lock_timer_base above: |
| 291 | */ | 602 | */ |
| 292 | static inline | 603 | static inline |
| 293 | void unlock_hrtimer_base(const struct hrtimer *timer, unsigned long *flags) | 604 | void unlock_hrtimer_base(const struct hrtimer *timer, unsigned long *flags) |
| 294 | { | 605 | { |
| 295 | spin_unlock_irqrestore(&timer->base->lock, *flags); | 606 | spin_unlock_irqrestore(&timer->base->cpu_base->lock, *flags); |
| 296 | } | 607 | } |
| 297 | 608 | ||
| 298 | /** | 609 | /** |
| @@ -342,7 +653,8 @@ hrtimer_forward(struct hrtimer *timer, ktime_t now, ktime_t interval) | |||
| 342 | * The timer is inserted in expiry order. Insertion into the | 653 | * The timer is inserted in expiry order. Insertion into the |
| 343 | * red black tree is O(log(n)). Must hold the base lock. | 654 | * red black tree is O(log(n)). Must hold the base lock. |
| 344 | */ | 655 | */ |
| 345 | static void enqueue_hrtimer(struct hrtimer *timer, struct hrtimer_base *base) | 656 | static void enqueue_hrtimer(struct hrtimer *timer, |
| 657 | struct hrtimer_clock_base *base, int reprogram) | ||
| 346 | { | 658 | { |
| 347 | struct rb_node **link = &base->active.rb_node; | 659 | struct rb_node **link = &base->active.rb_node; |
| 348 | struct rb_node *parent = NULL; | 660 | struct rb_node *parent = NULL; |
| @@ -368,39 +680,85 @@ static void enqueue_hrtimer(struct hrtimer *timer, struct hrtimer_base *base) | |||
| 368 | * Insert the timer to the rbtree and check whether it | 680 | * Insert the timer to the rbtree and check whether it |
| 369 | * replaces the first pending timer | 681 | * replaces the first pending timer |
| 370 | */ | 682 | */ |
| 371 | rb_link_node(&timer->node, parent, link); | ||
| 372 | rb_insert_color(&timer->node, &base->active); | ||
| 373 | |||
| 374 | if (!base->first || timer->expires.tv64 < | 683 | if (!base->first || timer->expires.tv64 < |
| 375 | rb_entry(base->first, struct hrtimer, node)->expires.tv64) | 684 | rb_entry(base->first, struct hrtimer, node)->expires.tv64) { |
| 685 | /* | ||
| 686 | * Reprogram the clock event device. When the timer is already | ||
| 687 | * expired hrtimer_enqueue_reprogram has either called the | ||
| 688 | * callback or added it to the pending list and raised the | ||
| 689 | * softirq. | ||
| 690 | * | ||
| 691 | * This is a NOP for !HIGHRES | ||
| 692 | */ | ||
| 693 | if (reprogram && hrtimer_enqueue_reprogram(timer, base)) | ||
| 694 | return; | ||
| 695 | |||
| 376 | base->first = &timer->node; | 696 | base->first = &timer->node; |
| 697 | } | ||
| 698 | |||
| 699 | rb_link_node(&timer->node, parent, link); | ||
| 700 | rb_insert_color(&timer->node, &base->active); | ||
| 701 | /* | ||
| 702 | * HRTIMER_STATE_ENQUEUED is or'ed to the current state to preserve the | ||
| 703 | * state of a possibly running callback. | ||
| 704 | */ | ||
| 705 | timer->state |= HRTIMER_STATE_ENQUEUED; | ||
| 377 | } | 706 | } |
| 378 | 707 | ||
| 379 | /* | 708 | /* |
| 380 | * __remove_hrtimer - internal function to remove a timer | 709 | * __remove_hrtimer - internal function to remove a timer |
| 381 | * | 710 | * |
| 382 | * Caller must hold the base lock. | 711 | * Caller must hold the base lock. |
| 712 | * | ||
| 713 | * High resolution timer mode reprograms the clock event device when the | ||
| 714 | * timer is the one which expires next. The caller can disable this by setting | ||
| 715 | * reprogram to zero. This is useful, when the context does a reprogramming | ||
| 716 | * anyway (e.g. timer interrupt) | ||
| 383 | */ | 717 | */ |
| 384 | static void __remove_hrtimer(struct hrtimer *timer, struct hrtimer_base *base) | 718 | static void __remove_hrtimer(struct hrtimer *timer, |
| 719 | struct hrtimer_clock_base *base, | ||
| 720 | unsigned long newstate, int reprogram) | ||
| 385 | { | 721 | { |
| 386 | /* | 722 | /* High res. callback list. NOP for !HIGHRES */ |
| 387 | * Remove the timer from the rbtree and replace the | 723 | if (hrtimer_cb_pending(timer)) |
| 388 | * first entry pointer if necessary. | 724 | hrtimer_remove_cb_pending(timer); |
| 389 | */ | 725 | else { |
| 390 | if (base->first == &timer->node) | 726 | /* |
| 391 | base->first = rb_next(&timer->node); | 727 | * Remove the timer from the rbtree and replace the |
| 392 | rb_erase(&timer->node, &base->active); | 728 | * first entry pointer if necessary. |
| 393 | rb_set_parent(&timer->node, &timer->node); | 729 | */ |
| 730 | if (base->first == &timer->node) { | ||
| 731 | base->first = rb_next(&timer->node); | ||
| 732 | /* Reprogram the clock event device. if enabled */ | ||
| 733 | if (reprogram && hrtimer_hres_active()) | ||
| 734 | hrtimer_force_reprogram(base->cpu_base); | ||
| 735 | } | ||
| 736 | rb_erase(&timer->node, &base->active); | ||
| 737 | } | ||
| 738 | timer->state = newstate; | ||
| 394 | } | 739 | } |
| 395 | 740 | ||
| 396 | /* | 741 | /* |
| 397 | * remove hrtimer, called with base lock held | 742 | * remove hrtimer, called with base lock held |
| 398 | */ | 743 | */ |
| 399 | static inline int | 744 | static inline int |
| 400 | remove_hrtimer(struct hrtimer *timer, struct hrtimer_base *base) | 745 | remove_hrtimer(struct hrtimer *timer, struct hrtimer_clock_base *base) |
| 401 | { | 746 | { |
| 402 | if (hrtimer_active(timer)) { | 747 | if (hrtimer_is_queued(timer)) { |
| 403 | __remove_hrtimer(timer, base); | 748 | int reprogram; |
| 749 | |||
| 750 | /* | ||
| 751 | * Remove the timer and force reprogramming when high | ||
| 752 | * resolution mode is active and the timer is on the current | ||
| 753 | * CPU. If we remove a timer on another CPU, reprogramming is | ||
| 754 | * skipped. The interrupt event on this CPU is fired and | ||
| 755 | * reprogramming happens in the interrupt handler. This is a | ||
| 756 | * rare case and less expensive than a smp call. | ||
| 757 | */ | ||
| 758 | timer_stats_hrtimer_clear_start_info(timer); | ||
| 759 | reprogram = base->cpu_base == &__get_cpu_var(hrtimer_bases); | ||
| 760 | __remove_hrtimer(timer, base, HRTIMER_STATE_INACTIVE, | ||
| 761 | reprogram); | ||
| 404 | return 1; | 762 | return 1; |
| 405 | } | 763 | } |
| 406 | return 0; | 764 | return 0; |
| @@ -419,7 +777,7 @@ remove_hrtimer(struct hrtimer *timer, struct hrtimer_base *base) | |||
| 419 | int | 777 | int |
| 420 | hrtimer_start(struct hrtimer *timer, ktime_t tim, const enum hrtimer_mode mode) | 778 | hrtimer_start(struct hrtimer *timer, ktime_t tim, const enum hrtimer_mode mode) |
| 421 | { | 779 | { |
| 422 | struct hrtimer_base *base, *new_base; | 780 | struct hrtimer_clock_base *base, *new_base; |
| 423 | unsigned long flags; | 781 | unsigned long flags; |
| 424 | int ret; | 782 | int ret; |
| 425 | 783 | ||
| @@ -431,7 +789,7 @@ hrtimer_start(struct hrtimer *timer, ktime_t tim, const enum hrtimer_mode mode) | |||
| 431 | /* Switch the timer base, if necessary: */ | 789 | /* Switch the timer base, if necessary: */ |
| 432 | new_base = switch_hrtimer_base(timer, base); | 790 | new_base = switch_hrtimer_base(timer, base); |
| 433 | 791 | ||
| 434 | if (mode == HRTIMER_REL) { | 792 | if (mode == HRTIMER_MODE_REL) { |
| 435 | tim = ktime_add(tim, new_base->get_time()); | 793 | tim = ktime_add(tim, new_base->get_time()); |
| 436 | /* | 794 | /* |
| 437 | * CONFIG_TIME_LOW_RES is a temporary way for architectures | 795 | * CONFIG_TIME_LOW_RES is a temporary way for architectures |
| @@ -446,7 +804,9 @@ hrtimer_start(struct hrtimer *timer, ktime_t tim, const enum hrtimer_mode mode) | |||
| 446 | } | 804 | } |
| 447 | timer->expires = tim; | 805 | timer->expires = tim; |
| 448 | 806 | ||
| 449 | enqueue_hrtimer(timer, new_base); | 807 | timer_stats_hrtimer_set_start_info(timer); |
| 808 | |||
| 809 | enqueue_hrtimer(timer, new_base, base == new_base); | ||
| 450 | 810 | ||
| 451 | unlock_hrtimer_base(timer, &flags); | 811 | unlock_hrtimer_base(timer, &flags); |
| 452 | 812 | ||
| @@ -466,13 +826,13 @@ EXPORT_SYMBOL_GPL(hrtimer_start); | |||
| 466 | */ | 826 | */ |
| 467 | int hrtimer_try_to_cancel(struct hrtimer *timer) | 827 | int hrtimer_try_to_cancel(struct hrtimer *timer) |
| 468 | { | 828 | { |
| 469 | struct hrtimer_base *base; | 829 | struct hrtimer_clock_base *base; |
| 470 | unsigned long flags; | 830 | unsigned long flags; |
| 471 | int ret = -1; | 831 | int ret = -1; |
| 472 | 832 | ||
| 473 | base = lock_hrtimer_base(timer, &flags); | 833 | base = lock_hrtimer_base(timer, &flags); |
| 474 | 834 | ||
| 475 | if (base->curr_timer != timer) | 835 | if (!hrtimer_callback_running(timer)) |
| 476 | ret = remove_hrtimer(timer, base); | 836 | ret = remove_hrtimer(timer, base); |
| 477 | 837 | ||
| 478 | unlock_hrtimer_base(timer, &flags); | 838 | unlock_hrtimer_base(timer, &flags); |
| @@ -508,19 +868,19 @@ EXPORT_SYMBOL_GPL(hrtimer_cancel); | |||
| 508 | */ | 868 | */ |
| 509 | ktime_t hrtimer_get_remaining(const struct hrtimer *timer) | 869 | ktime_t hrtimer_get_remaining(const struct hrtimer *timer) |
| 510 | { | 870 | { |
| 511 | struct hrtimer_base *base; | 871 | struct hrtimer_clock_base *base; |
| 512 | unsigned long flags; | 872 | unsigned long flags; |
| 513 | ktime_t rem; | 873 | ktime_t rem; |
| 514 | 874 | ||
| 515 | base = lock_hrtimer_base(timer, &flags); | 875 | base = lock_hrtimer_base(timer, &flags); |
| 516 | rem = ktime_sub(timer->expires, timer->base->get_time()); | 876 | rem = ktime_sub(timer->expires, base->get_time()); |
| 517 | unlock_hrtimer_base(timer, &flags); | 877 | unlock_hrtimer_base(timer, &flags); |
| 518 | 878 | ||
| 519 | return rem; | 879 | return rem; |
| 520 | } | 880 | } |
| 521 | EXPORT_SYMBOL_GPL(hrtimer_get_remaining); | 881 | EXPORT_SYMBOL_GPL(hrtimer_get_remaining); |
| 522 | 882 | ||
| 523 | #ifdef CONFIG_NO_IDLE_HZ | 883 | #if defined(CONFIG_NO_IDLE_HZ) || defined(CONFIG_NO_HZ) |
| 524 | /** | 884 | /** |
| 525 | * hrtimer_get_next_event - get the time until next expiry event | 885 | * hrtimer_get_next_event - get the time until next expiry event |
| 526 | * | 886 | * |
| @@ -529,26 +889,31 @@ EXPORT_SYMBOL_GPL(hrtimer_get_remaining); | |||
| 529 | */ | 889 | */ |
| 530 | ktime_t hrtimer_get_next_event(void) | 890 | ktime_t hrtimer_get_next_event(void) |
| 531 | { | 891 | { |
| 532 | struct hrtimer_base *base = __get_cpu_var(hrtimer_bases); | 892 | struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases); |
| 893 | struct hrtimer_clock_base *base = cpu_base->clock_base; | ||
| 533 | ktime_t delta, mindelta = { .tv64 = KTIME_MAX }; | 894 | ktime_t delta, mindelta = { .tv64 = KTIME_MAX }; |
| 534 | unsigned long flags; | 895 | unsigned long flags; |
| 535 | int i; | 896 | int i; |
| 536 | 897 | ||
| 537 | for (i = 0; i < MAX_HRTIMER_BASES; i++, base++) { | 898 | spin_lock_irqsave(&cpu_base->lock, flags); |
| 538 | struct hrtimer *timer; | ||
| 539 | 899 | ||
| 540 | spin_lock_irqsave(&base->lock, flags); | 900 | if (!hrtimer_hres_active()) { |
| 541 | if (!base->first) { | 901 | for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++, base++) { |
| 542 | spin_unlock_irqrestore(&base->lock, flags); | 902 | struct hrtimer *timer; |
| 543 | continue; | 903 | |
| 904 | if (!base->first) | ||
| 905 | continue; | ||
| 906 | |||
| 907 | timer = rb_entry(base->first, struct hrtimer, node); | ||
| 908 | delta.tv64 = timer->expires.tv64; | ||
| 909 | delta = ktime_sub(delta, base->get_time()); | ||
| 910 | if (delta.tv64 < mindelta.tv64) | ||
| 911 | mindelta.tv64 = delta.tv64; | ||
| 544 | } | 912 | } |
| 545 | timer = rb_entry(base->first, struct hrtimer, node); | ||
| 546 | delta.tv64 = timer->expires.tv64; | ||
| 547 | spin_unlock_irqrestore(&base->lock, flags); | ||
| 548 | delta = ktime_sub(delta, base->get_time()); | ||
| 549 | if (delta.tv64 < mindelta.tv64) | ||
| 550 | mindelta.tv64 = delta.tv64; | ||
| 551 | } | 913 | } |
| 914 | |||
| 915 | spin_unlock_irqrestore(&cpu_base->lock, flags); | ||
| 916 | |||
| 552 | if (mindelta.tv64 < 0) | 917 | if (mindelta.tv64 < 0) |
| 553 | mindelta.tv64 = 0; | 918 | mindelta.tv64 = 0; |
| 554 | return mindelta; | 919 | return mindelta; |
| @@ -564,17 +929,23 @@ ktime_t hrtimer_get_next_event(void) | |||
| 564 | void hrtimer_init(struct hrtimer *timer, clockid_t clock_id, | 929 | void hrtimer_init(struct hrtimer *timer, clockid_t clock_id, |
| 565 | enum hrtimer_mode mode) | 930 | enum hrtimer_mode mode) |
| 566 | { | 931 | { |
| 567 | struct hrtimer_base *bases; | 932 | struct hrtimer_cpu_base *cpu_base; |
| 568 | 933 | ||
| 569 | memset(timer, 0, sizeof(struct hrtimer)); | 934 | memset(timer, 0, sizeof(struct hrtimer)); |
| 570 | 935 | ||
| 571 | bases = __raw_get_cpu_var(hrtimer_bases); | 936 | cpu_base = &__raw_get_cpu_var(hrtimer_bases); |
| 572 | 937 | ||
| 573 | if (clock_id == CLOCK_REALTIME && mode != HRTIMER_ABS) | 938 | if (clock_id == CLOCK_REALTIME && mode != HRTIMER_MODE_ABS) |
| 574 | clock_id = CLOCK_MONOTONIC; | 939 | clock_id = CLOCK_MONOTONIC; |
| 575 | 940 | ||
| 576 | timer->base = &bases[clock_id]; | 941 | timer->base = &cpu_base->clock_base[clock_id]; |
| 577 | rb_set_parent(&timer->node, &timer->node); | 942 | hrtimer_init_timer_hres(timer); |
| 943 | |||
| 944 | #ifdef CONFIG_TIMER_STATS | ||
| 945 | timer->start_site = NULL; | ||
| 946 | timer->start_pid = -1; | ||
| 947 | memset(timer->start_comm, 0, TASK_COMM_LEN); | ||
| 948 | #endif | ||
| 578 | } | 949 | } |
| 579 | EXPORT_SYMBOL_GPL(hrtimer_init); | 950 | EXPORT_SYMBOL_GPL(hrtimer_init); |
| 580 | 951 | ||
| @@ -588,21 +959,159 @@ EXPORT_SYMBOL_GPL(hrtimer_init); | |||
| 588 | */ | 959 | */ |
| 589 | int hrtimer_get_res(const clockid_t which_clock, struct timespec *tp) | 960 | int hrtimer_get_res(const clockid_t which_clock, struct timespec *tp) |
| 590 | { | 961 | { |
| 591 | struct hrtimer_base *bases; | 962 | struct hrtimer_cpu_base *cpu_base; |
| 592 | 963 | ||
| 593 | bases = __raw_get_cpu_var(hrtimer_bases); | 964 | cpu_base = &__raw_get_cpu_var(hrtimer_bases); |
| 594 | *tp = ktime_to_timespec(bases[which_clock].resolution); | 965 | *tp = ktime_to_timespec(cpu_base->clock_base[which_clock].resolution); |
| 595 | 966 | ||
| 596 | return 0; | 967 | return 0; |
| 597 | } | 968 | } |
| 598 | EXPORT_SYMBOL_GPL(hrtimer_get_res); | 969 | EXPORT_SYMBOL_GPL(hrtimer_get_res); |
| 599 | 970 | ||
| 971 | #ifdef CONFIG_HIGH_RES_TIMERS | ||
| 972 | |||
| 973 | /* | ||
| 974 | * High resolution timer interrupt | ||
| 975 | * Called with interrupts disabled | ||
| 976 | */ | ||
| 977 | void hrtimer_interrupt(struct clock_event_device *dev) | ||
| 978 | { | ||
| 979 | struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases); | ||
| 980 | struct hrtimer_clock_base *base; | ||
| 981 | ktime_t expires_next, now; | ||
| 982 | int i, raise = 0; | ||
| 983 | |||
| 984 | BUG_ON(!cpu_base->hres_active); | ||
| 985 | cpu_base->nr_events++; | ||
| 986 | dev->next_event.tv64 = KTIME_MAX; | ||
| 987 | |||
| 988 | retry: | ||
| 989 | now = ktime_get(); | ||
| 990 | |||
| 991 | expires_next.tv64 = KTIME_MAX; | ||
| 992 | |||
| 993 | base = cpu_base->clock_base; | ||
| 994 | |||
| 995 | for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) { | ||
| 996 | ktime_t basenow; | ||
| 997 | struct rb_node *node; | ||
| 998 | |||
| 999 | spin_lock(&cpu_base->lock); | ||
| 1000 | |||
| 1001 | basenow = ktime_add(now, base->offset); | ||
| 1002 | |||
| 1003 | while ((node = base->first)) { | ||
| 1004 | struct hrtimer *timer; | ||
| 1005 | |||
| 1006 | timer = rb_entry(node, struct hrtimer, node); | ||
| 1007 | |||
| 1008 | if (basenow.tv64 < timer->expires.tv64) { | ||
| 1009 | ktime_t expires; | ||
| 1010 | |||
| 1011 | expires = ktime_sub(timer->expires, | ||
| 1012 | base->offset); | ||
| 1013 | if (expires.tv64 < expires_next.tv64) | ||
| 1014 | expires_next = expires; | ||
| 1015 | break; | ||
| 1016 | } | ||
| 1017 | |||
| 1018 | /* Move softirq callbacks to the pending list */ | ||
| 1019 | if (timer->cb_mode == HRTIMER_CB_SOFTIRQ) { | ||
| 1020 | __remove_hrtimer(timer, base, | ||
| 1021 | HRTIMER_STATE_PENDING, 0); | ||
| 1022 | list_add_tail(&timer->cb_entry, | ||
| 1023 | &base->cpu_base->cb_pending); | ||
| 1024 | raise = 1; | ||
| 1025 | continue; | ||
| 1026 | } | ||
| 1027 | |||
| 1028 | __remove_hrtimer(timer, base, | ||
| 1029 | HRTIMER_STATE_CALLBACK, 0); | ||
| 1030 | timer_stats_account_hrtimer(timer); | ||
| 1031 | |||
| 1032 | /* | ||
| 1033 | * Note: We clear the CALLBACK bit after | ||
| 1034 | * enqueue_hrtimer to avoid reprogramming of | ||
| 1035 | * the event hardware. This happens at the end | ||
| 1036 | * of this function anyway. | ||
| 1037 | */ | ||
| 1038 | if (timer->function(timer) != HRTIMER_NORESTART) { | ||
| 1039 | BUG_ON(timer->state != HRTIMER_STATE_CALLBACK); | ||
| 1040 | enqueue_hrtimer(timer, base, 0); | ||
| 1041 | } | ||
| 1042 | timer->state &= ~HRTIMER_STATE_CALLBACK; | ||
| 1043 | } | ||
| 1044 | spin_unlock(&cpu_base->lock); | ||
| 1045 | base++; | ||
| 1046 | } | ||
| 1047 | |||
| 1048 | cpu_base->expires_next = expires_next; | ||
| 1049 | |||
| 1050 | /* Reprogramming necessary ? */ | ||
| 1051 | if (expires_next.tv64 != KTIME_MAX) { | ||
| 1052 | if (tick_program_event(expires_next, 0)) | ||
| 1053 | goto retry; | ||
| 1054 | } | ||
| 1055 | |||
| 1056 | /* Raise softirq ? */ | ||
| 1057 | if (raise) | ||
| 1058 | raise_softirq(HRTIMER_SOFTIRQ); | ||
| 1059 | } | ||
| 1060 | |||
| 1061 | static void run_hrtimer_softirq(struct softirq_action *h) | ||
| 1062 | { | ||
| 1063 | struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases); | ||
| 1064 | |||
| 1065 | spin_lock_irq(&cpu_base->lock); | ||
| 1066 | |||
| 1067 | while (!list_empty(&cpu_base->cb_pending)) { | ||
| 1068 | enum hrtimer_restart (*fn)(struct hrtimer *); | ||
| 1069 | struct hrtimer *timer; | ||
| 1070 | int restart; | ||
| 1071 | |||
| 1072 | timer = list_entry(cpu_base->cb_pending.next, | ||
| 1073 | struct hrtimer, cb_entry); | ||
| 1074 | |||
| 1075 | timer_stats_account_hrtimer(timer); | ||
| 1076 | |||
| 1077 | fn = timer->function; | ||
| 1078 | __remove_hrtimer(timer, timer->base, HRTIMER_STATE_CALLBACK, 0); | ||
| 1079 | spin_unlock_irq(&cpu_base->lock); | ||
| 1080 | |||
| 1081 | restart = fn(timer); | ||
| 1082 | |||
| 1083 | spin_lock_irq(&cpu_base->lock); | ||
| 1084 | |||
| 1085 | timer->state &= ~HRTIMER_STATE_CALLBACK; | ||
| 1086 | if (restart == HRTIMER_RESTART) { | ||
| 1087 | BUG_ON(hrtimer_active(timer)); | ||
| 1088 | /* | ||
| 1089 | * Enqueue the timer, allow reprogramming of the event | ||
| 1090 | * device | ||
| 1091 | */ | ||
| 1092 | enqueue_hrtimer(timer, timer->base, 1); | ||
| 1093 | } else if (hrtimer_active(timer)) { | ||
| 1094 | /* | ||
| 1095 | * If the timer was rearmed on another CPU, reprogram | ||
| 1096 | * the event device. | ||
| 1097 | */ | ||
| 1098 | if (timer->base->first == &timer->node) | ||
| 1099 | hrtimer_reprogram(timer, timer->base); | ||
| 1100 | } | ||
| 1101 | } | ||
| 1102 | spin_unlock_irq(&cpu_base->lock); | ||
| 1103 | } | ||
| 1104 | |||
| 1105 | #endif /* CONFIG_HIGH_RES_TIMERS */ | ||
| 1106 | |||
| 600 | /* | 1107 | /* |
| 601 | * Expire the per base hrtimer-queue: | 1108 | * Expire the per base hrtimer-queue: |
| 602 | */ | 1109 | */ |
| 603 | static inline void run_hrtimer_queue(struct hrtimer_base *base) | 1110 | static inline void run_hrtimer_queue(struct hrtimer_cpu_base *cpu_base, |
| 1111 | int index) | ||
| 604 | { | 1112 | { |
| 605 | struct rb_node *node; | 1113 | struct rb_node *node; |
| 1114 | struct hrtimer_clock_base *base = &cpu_base->clock_base[index]; | ||
| 606 | 1115 | ||
| 607 | if (!base->first) | 1116 | if (!base->first) |
| 608 | return; | 1117 | return; |
| @@ -610,53 +1119,72 @@ static inline void run_hrtimer_queue(struct hrtimer_base *base) | |||
| 610 | if (base->get_softirq_time) | 1119 | if (base->get_softirq_time) |
| 611 | base->softirq_time = base->get_softirq_time(); | 1120 | base->softirq_time = base->get_softirq_time(); |
| 612 | 1121 | ||
| 613 | spin_lock_irq(&base->lock); | 1122 | spin_lock_irq(&cpu_base->lock); |
| 614 | 1123 | ||
| 615 | while ((node = base->first)) { | 1124 | while ((node = base->first)) { |
| 616 | struct hrtimer *timer; | 1125 | struct hrtimer *timer; |
| 617 | int (*fn)(struct hrtimer *); | 1126 | enum hrtimer_restart (*fn)(struct hrtimer *); |
| 618 | int restart; | 1127 | int restart; |
| 619 | 1128 | ||
| 620 | timer = rb_entry(node, struct hrtimer, node); | 1129 | timer = rb_entry(node, struct hrtimer, node); |
| 621 | if (base->softirq_time.tv64 <= timer->expires.tv64) | 1130 | if (base->softirq_time.tv64 <= timer->expires.tv64) |
| 622 | break; | 1131 | break; |
| 623 | 1132 | ||
| 1133 | timer_stats_account_hrtimer(timer); | ||
| 1134 | |||
| 624 | fn = timer->function; | 1135 | fn = timer->function; |
| 625 | set_curr_timer(base, timer); | 1136 | __remove_hrtimer(timer, base, HRTIMER_STATE_CALLBACK, 0); |
| 626 | __remove_hrtimer(timer, base); | 1137 | spin_unlock_irq(&cpu_base->lock); |
| 627 | spin_unlock_irq(&base->lock); | ||
| 628 | 1138 | ||
| 629 | restart = fn(timer); | 1139 | restart = fn(timer); |
| 630 | 1140 | ||
| 631 | spin_lock_irq(&base->lock); | 1141 | spin_lock_irq(&cpu_base->lock); |
| 632 | 1142 | ||
| 1143 | timer->state &= ~HRTIMER_STATE_CALLBACK; | ||
| 633 | if (restart != HRTIMER_NORESTART) { | 1144 | if (restart != HRTIMER_NORESTART) { |
| 634 | BUG_ON(hrtimer_active(timer)); | 1145 | BUG_ON(hrtimer_active(timer)); |
| 635 | enqueue_hrtimer(timer, base); | 1146 | enqueue_hrtimer(timer, base, 0); |
| 636 | } | 1147 | } |
| 637 | } | 1148 | } |
| 638 | set_curr_timer(base, NULL); | 1149 | spin_unlock_irq(&cpu_base->lock); |
| 639 | spin_unlock_irq(&base->lock); | ||
| 640 | } | 1150 | } |
| 641 | 1151 | ||
| 642 | /* | 1152 | /* |
| 643 | * Called from timer softirq every jiffy, expire hrtimers: | 1153 | * Called from timer softirq every jiffy, expire hrtimers: |
| 1154 | * | ||
| 1155 | * For HRT its the fall back code to run the softirq in the timer | ||
| 1156 | * softirq context in case the hrtimer initialization failed or has | ||
| 1157 | * not been done yet. | ||
| 644 | */ | 1158 | */ |
| 645 | void hrtimer_run_queues(void) | 1159 | void hrtimer_run_queues(void) |
| 646 | { | 1160 | { |
| 647 | struct hrtimer_base *base = __get_cpu_var(hrtimer_bases); | 1161 | struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases); |
| 648 | int i; | 1162 | int i; |
| 649 | 1163 | ||
| 650 | hrtimer_get_softirq_time(base); | 1164 | if (hrtimer_hres_active()) |
| 1165 | return; | ||
| 1166 | |||
| 1167 | /* | ||
| 1168 | * This _is_ ugly: We have to check in the softirq context, | ||
| 1169 | * whether we can switch to highres and / or nohz mode. The | ||
| 1170 | * clocksource switch happens in the timer interrupt with | ||
| 1171 | * xtime_lock held. Notification from there only sets the | ||
| 1172 | * check bit in the tick_oneshot code, otherwise we might | ||
| 1173 | * deadlock vs. xtime_lock. | ||
| 1174 | */ | ||
| 1175 | if (tick_check_oneshot_change(!hrtimer_is_hres_enabled())) | ||
| 1176 | hrtimer_switch_to_hres(); | ||
| 651 | 1177 | ||
| 652 | for (i = 0; i < MAX_HRTIMER_BASES; i++) | 1178 | hrtimer_get_softirq_time(cpu_base); |
| 653 | run_hrtimer_queue(&base[i]); | 1179 | |
| 1180 | for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) | ||
| 1181 | run_hrtimer_queue(cpu_base, i); | ||
| 654 | } | 1182 | } |
| 655 | 1183 | ||
| 656 | /* | 1184 | /* |
| 657 | * Sleep related functions: | 1185 | * Sleep related functions: |
| 658 | */ | 1186 | */ |
| 659 | static int hrtimer_wakeup(struct hrtimer *timer) | 1187 | static enum hrtimer_restart hrtimer_wakeup(struct hrtimer *timer) |
| 660 | { | 1188 | { |
| 661 | struct hrtimer_sleeper *t = | 1189 | struct hrtimer_sleeper *t = |
| 662 | container_of(timer, struct hrtimer_sleeper, timer); | 1190 | container_of(timer, struct hrtimer_sleeper, timer); |
| @@ -673,6 +1201,9 @@ void hrtimer_init_sleeper(struct hrtimer_sleeper *sl, struct task_struct *task) | |||
| 673 | { | 1201 | { |
| 674 | sl->timer.function = hrtimer_wakeup; | 1202 | sl->timer.function = hrtimer_wakeup; |
| 675 | sl->task = task; | 1203 | sl->task = task; |
| 1204 | #ifdef CONFIG_HIGH_RES_TIMERS | ||
| 1205 | sl->timer.cb_mode = HRTIMER_CB_IRQSAFE_NO_RESTART; | ||
| 1206 | #endif | ||
| 676 | } | 1207 | } |
| 677 | 1208 | ||
| 678 | static int __sched do_nanosleep(struct hrtimer_sleeper *t, enum hrtimer_mode mode) | 1209 | static int __sched do_nanosleep(struct hrtimer_sleeper *t, enum hrtimer_mode mode) |
| @@ -683,10 +1214,11 @@ static int __sched do_nanosleep(struct hrtimer_sleeper *t, enum hrtimer_mode mod | |||
| 683 | set_current_state(TASK_INTERRUPTIBLE); | 1214 | set_current_state(TASK_INTERRUPTIBLE); |
| 684 | hrtimer_start(&t->timer, t->timer.expires, mode); | 1215 | hrtimer_start(&t->timer, t->timer.expires, mode); |
| 685 | 1216 | ||
| 686 | schedule(); | 1217 | if (likely(t->task)) |
| 1218 | schedule(); | ||
| 687 | 1219 | ||
| 688 | hrtimer_cancel(&t->timer); | 1220 | hrtimer_cancel(&t->timer); |
| 689 | mode = HRTIMER_ABS; | 1221 | mode = HRTIMER_MODE_ABS; |
| 690 | 1222 | ||
| 691 | } while (t->task && !signal_pending(current)); | 1223 | } while (t->task && !signal_pending(current)); |
| 692 | 1224 | ||
| @@ -702,10 +1234,10 @@ long __sched hrtimer_nanosleep_restart(struct restart_block *restart) | |||
| 702 | 1234 | ||
| 703 | restart->fn = do_no_restart_syscall; | 1235 | restart->fn = do_no_restart_syscall; |
| 704 | 1236 | ||
| 705 | hrtimer_init(&t.timer, restart->arg0, HRTIMER_ABS); | 1237 | hrtimer_init(&t.timer, restart->arg0, HRTIMER_MODE_ABS); |
| 706 | t.timer.expires.tv64 = ((u64)restart->arg3 << 32) | (u64) restart->arg2; | 1238 | t.timer.expires.tv64 = ((u64)restart->arg3 << 32) | (u64) restart->arg2; |
| 707 | 1239 | ||
| 708 | if (do_nanosleep(&t, HRTIMER_ABS)) | 1240 | if (do_nanosleep(&t, HRTIMER_MODE_ABS)) |
| 709 | return 0; | 1241 | return 0; |
| 710 | 1242 | ||
| 711 | rmtp = (struct timespec __user *) restart->arg1; | 1243 | rmtp = (struct timespec __user *) restart->arg1; |
| @@ -738,7 +1270,7 @@ long hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp, | |||
| 738 | return 0; | 1270 | return 0; |
| 739 | 1271 | ||
| 740 | /* Absolute timers do not update the rmtp value and restart: */ | 1272 | /* Absolute timers do not update the rmtp value and restart: */ |
| 741 | if (mode == HRTIMER_ABS) | 1273 | if (mode == HRTIMER_MODE_ABS) |
| 742 | return -ERESTARTNOHAND; | 1274 | return -ERESTARTNOHAND; |
| 743 | 1275 | ||
| 744 | if (rmtp) { | 1276 | if (rmtp) { |
| @@ -771,7 +1303,7 @@ sys_nanosleep(struct timespec __user *rqtp, struct timespec __user *rmtp) | |||
| 771 | if (!timespec_valid(&tu)) | 1303 | if (!timespec_valid(&tu)) |
| 772 | return -EINVAL; | 1304 | return -EINVAL; |
| 773 | 1305 | ||
| 774 | return hrtimer_nanosleep(&tu, rmtp, HRTIMER_REL, CLOCK_MONOTONIC); | 1306 | return hrtimer_nanosleep(&tu, rmtp, HRTIMER_MODE_REL, CLOCK_MONOTONIC); |
| 775 | } | 1307 | } |
| 776 | 1308 | ||
| 777 | /* | 1309 | /* |
| @@ -779,56 +1311,60 @@ sys_nanosleep(struct timespec __user *rqtp, struct timespec __user *rmtp) | |||
| 779 | */ | 1311 | */ |
| 780 | static void __devinit init_hrtimers_cpu(int cpu) | 1312 | static void __devinit init_hrtimers_cpu(int cpu) |
| 781 | { | 1313 | { |
| 782 | struct hrtimer_base *base = per_cpu(hrtimer_bases, cpu); | 1314 | struct hrtimer_cpu_base *cpu_base = &per_cpu(hrtimer_bases, cpu); |
| 783 | int i; | 1315 | int i; |
| 784 | 1316 | ||
| 785 | for (i = 0; i < MAX_HRTIMER_BASES; i++, base++) { | 1317 | spin_lock_init(&cpu_base->lock); |
| 786 | spin_lock_init(&base->lock); | 1318 | lockdep_set_class(&cpu_base->lock, &cpu_base->lock_key); |
| 787 | lockdep_set_class(&base->lock, &base->lock_key); | 1319 | |
| 788 | } | 1320 | for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) |
| 1321 | cpu_base->clock_base[i].cpu_base = cpu_base; | ||
| 1322 | |||
| 1323 | hrtimer_init_hres(cpu_base); | ||
| 789 | } | 1324 | } |
| 790 | 1325 | ||
| 791 | #ifdef CONFIG_HOTPLUG_CPU | 1326 | #ifdef CONFIG_HOTPLUG_CPU |
| 792 | 1327 | ||
| 793 | static void migrate_hrtimer_list(struct hrtimer_base *old_base, | 1328 | static void migrate_hrtimer_list(struct hrtimer_clock_base *old_base, |
| 794 | struct hrtimer_base *new_base) | 1329 | struct hrtimer_clock_base *new_base) |
| 795 | { | 1330 | { |
| 796 | struct hrtimer *timer; | 1331 | struct hrtimer *timer; |
| 797 | struct rb_node *node; | 1332 | struct rb_node *node; |
| 798 | 1333 | ||
| 799 | while ((node = rb_first(&old_base->active))) { | 1334 | while ((node = rb_first(&old_base->active))) { |
| 800 | timer = rb_entry(node, struct hrtimer, node); | 1335 | timer = rb_entry(node, struct hrtimer, node); |
| 801 | __remove_hrtimer(timer, old_base); | 1336 | BUG_ON(hrtimer_callback_running(timer)); |
| 1337 | __remove_hrtimer(timer, old_base, HRTIMER_STATE_INACTIVE, 0); | ||
| 802 | timer->base = new_base; | 1338 | timer->base = new_base; |
| 803 | enqueue_hrtimer(timer, new_base); | 1339 | /* |
| 1340 | * Enqueue the timer. Allow reprogramming of the event device | ||
| 1341 | */ | ||
| 1342 | enqueue_hrtimer(timer, new_base, 1); | ||
| 804 | } | 1343 | } |
| 805 | } | 1344 | } |
| 806 | 1345 | ||
| 807 | static void migrate_hrtimers(int cpu) | 1346 | static void migrate_hrtimers(int cpu) |
| 808 | { | 1347 | { |
| 809 | struct hrtimer_base *old_base, *new_base; | 1348 | struct hrtimer_cpu_base *old_base, *new_base; |
| 810 | int i; | 1349 | int i; |
| 811 | 1350 | ||
| 812 | BUG_ON(cpu_online(cpu)); | 1351 | BUG_ON(cpu_online(cpu)); |
| 813 | old_base = per_cpu(hrtimer_bases, cpu); | 1352 | old_base = &per_cpu(hrtimer_bases, cpu); |
| 814 | new_base = get_cpu_var(hrtimer_bases); | 1353 | new_base = &get_cpu_var(hrtimer_bases); |
| 815 | |||
| 816 | local_irq_disable(); | ||
| 817 | 1354 | ||
| 818 | for (i = 0; i < MAX_HRTIMER_BASES; i++) { | 1355 | tick_cancel_sched_timer(cpu); |
| 819 | 1356 | ||
| 820 | spin_lock(&new_base->lock); | 1357 | local_irq_disable(); |
| 821 | spin_lock(&old_base->lock); | ||
| 822 | |||
| 823 | BUG_ON(old_base->curr_timer); | ||
| 824 | 1358 | ||
| 825 | migrate_hrtimer_list(old_base, new_base); | 1359 | spin_lock(&new_base->lock); |
| 1360 | spin_lock(&old_base->lock); | ||
| 826 | 1361 | ||
| 827 | spin_unlock(&old_base->lock); | 1362 | for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) { |
| 828 | spin_unlock(&new_base->lock); | 1363 | migrate_hrtimer_list(&old_base->clock_base[i], |
| 829 | old_base++; | 1364 | &new_base->clock_base[i]); |
| 830 | new_base++; | ||
| 831 | } | 1365 | } |
| 1366 | spin_unlock(&old_base->lock); | ||
| 1367 | spin_unlock(&new_base->lock); | ||
| 832 | 1368 | ||
| 833 | local_irq_enable(); | 1369 | local_irq_enable(); |
| 834 | put_cpu_var(hrtimer_bases); | 1370 | put_cpu_var(hrtimer_bases); |
| @@ -848,6 +1384,7 @@ static int __cpuinit hrtimer_cpu_notify(struct notifier_block *self, | |||
| 848 | 1384 | ||
| 849 | #ifdef CONFIG_HOTPLUG_CPU | 1385 | #ifdef CONFIG_HOTPLUG_CPU |
| 850 | case CPU_DEAD: | 1386 | case CPU_DEAD: |
| 1387 | clockevents_notify(CLOCK_EVT_NOTIFY_CPU_DEAD, &cpu); | ||
| 851 | migrate_hrtimers(cpu); | 1388 | migrate_hrtimers(cpu); |
| 852 | break; | 1389 | break; |
| 853 | #endif | 1390 | #endif |
| @@ -868,5 +1405,8 @@ void __init hrtimers_init(void) | |||
| 868 | hrtimer_cpu_notify(&hrtimers_nb, (unsigned long)CPU_UP_PREPARE, | 1405 | hrtimer_cpu_notify(&hrtimers_nb, (unsigned long)CPU_UP_PREPARE, |
| 869 | (void *)(long)smp_processor_id()); | 1406 | (void *)(long)smp_processor_id()); |
| 870 | register_cpu_notifier(&hrtimers_nb); | 1407 | register_cpu_notifier(&hrtimers_nb); |
| 1408 | #ifdef CONFIG_HIGH_RES_TIMERS | ||
| 1409 | open_softirq(HRTIMER_SOFTIRQ, run_hrtimer_softirq, NULL); | ||
| 1410 | #endif | ||
| 871 | } | 1411 | } |
| 872 | 1412 | ||
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c index 475e8a71bcdc..0133f4f9e9f0 100644 --- a/kernel/irq/chip.c +++ b/kernel/irq/chip.c | |||
| @@ -168,7 +168,7 @@ EXPORT_SYMBOL(set_irq_data); | |||
| 168 | /** | 168 | /** |
| 169 | * set_irq_data - set irq type data for an irq | 169 | * set_irq_data - set irq type data for an irq |
| 170 | * @irq: Interrupt number | 170 | * @irq: Interrupt number |
| 171 | * @data: Pointer to interrupt specific data | 171 | * @entry: Pointer to MSI descriptor data |
| 172 | * | 172 | * |
| 173 | * Set the hardware irq controller data for an irq | 173 | * Set the hardware irq controller data for an irq |
| 174 | */ | 174 | */ |
| @@ -230,10 +230,6 @@ static void default_enable(unsigned int irq) | |||
| 230 | */ | 230 | */ |
| 231 | static void default_disable(unsigned int irq) | 231 | static void default_disable(unsigned int irq) |
| 232 | { | 232 | { |
| 233 | struct irq_desc *desc = irq_desc + irq; | ||
| 234 | |||
| 235 | if (!(desc->status & IRQ_DELAYED_DISABLE)) | ||
| 236 | desc->chip->mask(irq); | ||
| 237 | } | 233 | } |
| 238 | 234 | ||
| 239 | /* | 235 | /* |
| @@ -298,13 +294,18 @@ handle_simple_irq(unsigned int irq, struct irq_desc *desc) | |||
| 298 | 294 | ||
| 299 | if (unlikely(desc->status & IRQ_INPROGRESS)) | 295 | if (unlikely(desc->status & IRQ_INPROGRESS)) |
| 300 | goto out_unlock; | 296 | goto out_unlock; |
| 301 | desc->status &= ~(IRQ_REPLAY | IRQ_WAITING); | ||
| 302 | kstat_cpu(cpu).irqs[irq]++; | 297 | kstat_cpu(cpu).irqs[irq]++; |
| 303 | 298 | ||
| 304 | action = desc->action; | 299 | action = desc->action; |
| 305 | if (unlikely(!action || (desc->status & IRQ_DISABLED))) | 300 | if (unlikely(!action || (desc->status & IRQ_DISABLED))) { |
| 301 | if (desc->chip->mask) | ||
| 302 | desc->chip->mask(irq); | ||
| 303 | desc->status &= ~(IRQ_REPLAY | IRQ_WAITING); | ||
| 304 | desc->status |= IRQ_PENDING; | ||
| 306 | goto out_unlock; | 305 | goto out_unlock; |
| 306 | } | ||
| 307 | 307 | ||
| 308 | desc->status &= ~(IRQ_REPLAY | IRQ_WAITING | IRQ_PENDING); | ||
| 308 | desc->status |= IRQ_INPROGRESS; | 309 | desc->status |= IRQ_INPROGRESS; |
| 309 | spin_unlock(&desc->lock); | 310 | spin_unlock(&desc->lock); |
| 310 | 311 | ||
| @@ -396,11 +397,13 @@ handle_fasteoi_irq(unsigned int irq, struct irq_desc *desc) | |||
| 396 | 397 | ||
| 397 | /* | 398 | /* |
| 398 | * If its disabled or no action available | 399 | * If its disabled or no action available |
| 399 | * keep it masked and get out of here | 400 | * then mask it and get out of here: |
| 400 | */ | 401 | */ |
| 401 | action = desc->action; | 402 | action = desc->action; |
| 402 | if (unlikely(!action || (desc->status & IRQ_DISABLED))) { | 403 | if (unlikely(!action || (desc->status & IRQ_DISABLED))) { |
| 403 | desc->status |= IRQ_PENDING; | 404 | desc->status |= IRQ_PENDING; |
| 405 | if (desc->chip->mask) | ||
| 406 | desc->chip->mask(irq); | ||
| 404 | goto out; | 407 | goto out; |
| 405 | } | 408 | } |
| 406 | 409 | ||
| @@ -562,10 +565,8 @@ __set_irq_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained, | |||
| 562 | 565 | ||
| 563 | /* Uninstall? */ | 566 | /* Uninstall? */ |
| 564 | if (handle == handle_bad_irq) { | 567 | if (handle == handle_bad_irq) { |
| 565 | if (desc->chip != &no_irq_chip) { | 568 | if (desc->chip != &no_irq_chip) |
| 566 | desc->chip->mask(irq); | 569 | mask_ack_irq(desc, irq); |
| 567 | desc->chip->ack(irq); | ||
| 568 | } | ||
| 569 | desc->status |= IRQ_DISABLED; | 570 | desc->status |= IRQ_DISABLED; |
| 570 | desc->depth = 1; | 571 | desc->depth = 1; |
| 571 | } | 572 | } |
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index acc5d9fe462b..5597c157442a 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c | |||
| @@ -38,6 +38,46 @@ void synchronize_irq(unsigned int irq) | |||
| 38 | } | 38 | } |
| 39 | EXPORT_SYMBOL(synchronize_irq); | 39 | EXPORT_SYMBOL(synchronize_irq); |
| 40 | 40 | ||
| 41 | /** | ||
| 42 | * irq_can_set_affinity - Check if the affinity of a given irq can be set | ||
| 43 | * @irq: Interrupt to check | ||
| 44 | * | ||
| 45 | */ | ||
| 46 | int irq_can_set_affinity(unsigned int irq) | ||
| 47 | { | ||
| 48 | struct irq_desc *desc = irq_desc + irq; | ||
| 49 | |||
| 50 | if (CHECK_IRQ_PER_CPU(desc->status) || !desc->chip || | ||
| 51 | !desc->chip->set_affinity) | ||
| 52 | return 0; | ||
| 53 | |||
| 54 | return 1; | ||
| 55 | } | ||
| 56 | |||
| 57 | /** | ||
| 58 | * irq_set_affinity - Set the irq affinity of a given irq | ||
| 59 | * @irq: Interrupt to set affinity | ||
| 60 | * @cpumask: cpumask | ||
| 61 | * | ||
| 62 | */ | ||
| 63 | int irq_set_affinity(unsigned int irq, cpumask_t cpumask) | ||
| 64 | { | ||
| 65 | struct irq_desc *desc = irq_desc + irq; | ||
| 66 | |||
| 67 | if (!desc->chip->set_affinity) | ||
| 68 | return -EINVAL; | ||
| 69 | |||
| 70 | set_balance_irq_affinity(irq, cpumask); | ||
| 71 | |||
| 72 | #ifdef CONFIG_GENERIC_PENDING_IRQ | ||
| 73 | set_pending_irq(irq, cpumask); | ||
| 74 | #else | ||
| 75 | desc->affinity = cpumask; | ||
| 76 | desc->chip->set_affinity(irq, cpumask); | ||
| 77 | #endif | ||
| 78 | return 0; | ||
| 79 | } | ||
| 80 | |||
| 41 | #endif | 81 | #endif |
| 42 | 82 | ||
| 43 | /** | 83 | /** |
| @@ -281,6 +321,10 @@ int setup_irq(unsigned int irq, struct irqaction *new) | |||
| 281 | if (new->flags & IRQF_PERCPU) | 321 | if (new->flags & IRQF_PERCPU) |
| 282 | desc->status |= IRQ_PER_CPU; | 322 | desc->status |= IRQ_PER_CPU; |
| 283 | #endif | 323 | #endif |
| 324 | /* Exclude IRQ from balancing */ | ||
| 325 | if (new->flags & IRQF_NOBALANCING) | ||
| 326 | desc->status |= IRQ_NO_BALANCING; | ||
| 327 | |||
| 284 | if (!shared) { | 328 | if (!shared) { |
| 285 | irq_chip_set_defaults(desc->chip); | 329 | irq_chip_set_defaults(desc->chip); |
| 286 | 330 | ||
diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c index 6d3be06e8ce6..2db91eb54ad8 100644 --- a/kernel/irq/proc.c +++ b/kernel/irq/proc.c | |||
| @@ -16,26 +16,6 @@ static struct proc_dir_entry *root_irq_dir; | |||
| 16 | 16 | ||
| 17 | #ifdef CONFIG_SMP | 17 | #ifdef CONFIG_SMP |
| 18 | 18 | ||
| 19 | #ifdef CONFIG_GENERIC_PENDING_IRQ | ||
| 20 | void proc_set_irq_affinity(unsigned int irq, cpumask_t mask_val) | ||
| 21 | { | ||
| 22 | set_balance_irq_affinity(irq, mask_val); | ||
| 23 | |||
| 24 | /* | ||
| 25 | * Save these away for later use. Re-progam when the | ||
| 26 | * interrupt is pending | ||
| 27 | */ | ||
| 28 | set_pending_irq(irq, mask_val); | ||
| 29 | } | ||
| 30 | #else | ||
| 31 | void proc_set_irq_affinity(unsigned int irq, cpumask_t mask_val) | ||
| 32 | { | ||
| 33 | set_balance_irq_affinity(irq, mask_val); | ||
| 34 | irq_desc[irq].affinity = mask_val; | ||
| 35 | irq_desc[irq].chip->set_affinity(irq, mask_val); | ||
| 36 | } | ||
| 37 | #endif | ||
| 38 | |||
| 39 | static int irq_affinity_read_proc(char *page, char **start, off_t off, | 19 | static int irq_affinity_read_proc(char *page, char **start, off_t off, |
| 40 | int count, int *eof, void *data) | 20 | int count, int *eof, void *data) |
| 41 | { | 21 | { |
| @@ -55,7 +35,7 @@ static int irq_affinity_write_proc(struct file *file, const char __user *buffer, | |||
| 55 | cpumask_t new_value, tmp; | 35 | cpumask_t new_value, tmp; |
| 56 | 36 | ||
| 57 | if (!irq_desc[irq].chip->set_affinity || no_irq_affinity || | 37 | if (!irq_desc[irq].chip->set_affinity || no_irq_affinity || |
| 58 | CHECK_IRQ_PER_CPU(irq_desc[irq].status)) | 38 | irq_balancing_disabled(irq)) |
| 59 | return -EIO; | 39 | return -EIO; |
| 60 | 40 | ||
| 61 | err = cpumask_parse_user(buffer, count, new_value); | 41 | err = cpumask_parse_user(buffer, count, new_value); |
| @@ -73,7 +53,7 @@ static int irq_affinity_write_proc(struct file *file, const char __user *buffer, | |||
| 73 | code to set default SMP affinity. */ | 53 | code to set default SMP affinity. */ |
| 74 | return select_smp_affinity(irq) ? -EINVAL : full_count; | 54 | return select_smp_affinity(irq) ? -EINVAL : full_count; |
| 75 | 55 | ||
| 76 | proc_set_irq_affinity(irq, new_value); | 56 | irq_set_affinity(irq, new_value); |
| 77 | 57 | ||
| 78 | return full_count; | 58 | return full_count; |
| 79 | } | 59 | } |
diff --git a/kernel/itimer.c b/kernel/itimer.c index 204ed7939e75..307c6a632ef6 100644 --- a/kernel/itimer.c +++ b/kernel/itimer.c | |||
| @@ -128,18 +128,13 @@ asmlinkage long sys_getitimer(int which, struct itimerval __user *value) | |||
| 128 | /* | 128 | /* |
| 129 | * The timer is automagically restarted, when interval != 0 | 129 | * The timer is automagically restarted, when interval != 0 |
| 130 | */ | 130 | */ |
| 131 | int it_real_fn(struct hrtimer *timer) | 131 | enum hrtimer_restart it_real_fn(struct hrtimer *timer) |
| 132 | { | 132 | { |
| 133 | struct signal_struct *sig = | 133 | struct signal_struct *sig = |
| 134 | container_of(timer, struct signal_struct, real_timer); | 134 | container_of(timer, struct signal_struct, real_timer); |
| 135 | 135 | ||
| 136 | send_group_sig_info(SIGALRM, SEND_SIG_PRIV, sig->tsk); | 136 | send_group_sig_info(SIGALRM, SEND_SIG_PRIV, sig->tsk); |
| 137 | 137 | ||
| 138 | if (sig->it_real_incr.tv64 != 0) { | ||
| 139 | hrtimer_forward(timer, timer->base->softirq_time, | ||
| 140 | sig->it_real_incr); | ||
| 141 | return HRTIMER_RESTART; | ||
| 142 | } | ||
| 143 | return HRTIMER_NORESTART; | 138 | return HRTIMER_NORESTART; |
| 144 | } | 139 | } |
| 145 | 140 | ||
| @@ -231,11 +226,14 @@ again: | |||
| 231 | spin_unlock_irq(&tsk->sighand->siglock); | 226 | spin_unlock_irq(&tsk->sighand->siglock); |
| 232 | goto again; | 227 | goto again; |
| 233 | } | 228 | } |
| 234 | tsk->signal->it_real_incr = | ||
| 235 | timeval_to_ktime(value->it_interval); | ||
| 236 | expires = timeval_to_ktime(value->it_value); | 229 | expires = timeval_to_ktime(value->it_value); |
| 237 | if (expires.tv64 != 0) | 230 | if (expires.tv64 != 0) { |
| 238 | hrtimer_start(timer, expires, HRTIMER_REL); | 231 | tsk->signal->it_real_incr = |
| 232 | timeval_to_ktime(value->it_interval); | ||
| 233 | hrtimer_start(timer, expires, HRTIMER_MODE_REL); | ||
| 234 | } else | ||
| 235 | tsk->signal->it_real_incr.tv64 = 0; | ||
| 236 | |||
| 239 | spin_unlock_irq(&tsk->sighand->siglock); | 237 | spin_unlock_irq(&tsk->sighand->siglock); |
| 240 | break; | 238 | break; |
| 241 | case ITIMER_VIRTUAL: | 239 | case ITIMER_VIRTUAL: |
diff --git a/kernel/kmod.c b/kernel/kmod.c index 3a7379aa31ca..9f923f8ce6a0 100644 --- a/kernel/kmod.c +++ b/kernel/kmod.c | |||
| @@ -36,6 +36,8 @@ | |||
| 36 | #include <linux/resource.h> | 36 | #include <linux/resource.h> |
| 37 | #include <asm/uaccess.h> | 37 | #include <asm/uaccess.h> |
| 38 | 38 | ||
| 39 | extern int delete_module(const char *name, unsigned int flags); | ||
| 40 | |||
| 39 | extern int max_threads; | 41 | extern int max_threads; |
| 40 | 42 | ||
| 41 | static struct workqueue_struct *khelper_wq; | 43 | static struct workqueue_struct *khelper_wq; |
| @@ -46,6 +48,7 @@ static struct workqueue_struct *khelper_wq; | |||
| 46 | modprobe_path is set via /proc/sys. | 48 | modprobe_path is set via /proc/sys. |
| 47 | */ | 49 | */ |
| 48 | char modprobe_path[KMOD_PATH_LEN] = "/sbin/modprobe"; | 50 | char modprobe_path[KMOD_PATH_LEN] = "/sbin/modprobe"; |
| 51 | struct module_kobject kmod_mk; | ||
| 49 | 52 | ||
| 50 | /** | 53 | /** |
| 51 | * request_module - try to load a kernel module | 54 | * request_module - try to load a kernel module |
| @@ -75,6 +78,11 @@ int request_module(const char *fmt, ...) | |||
| 75 | static atomic_t kmod_concurrent = ATOMIC_INIT(0); | 78 | static atomic_t kmod_concurrent = ATOMIC_INIT(0); |
| 76 | #define MAX_KMOD_CONCURRENT 50 /* Completely arbitrary value - KAO */ | 79 | #define MAX_KMOD_CONCURRENT 50 /* Completely arbitrary value - KAO */ |
| 77 | static int kmod_loop_msg; | 80 | static int kmod_loop_msg; |
| 81 | char modalias[16 + MODULE_NAME_LEN] = "MODALIAS="; | ||
| 82 | char *uevent_envp[2] = { | ||
| 83 | modalias, | ||
| 84 | NULL | ||
| 85 | }; | ||
| 78 | 86 | ||
| 79 | va_start(args, fmt); | 87 | va_start(args, fmt); |
| 80 | ret = vsnprintf(module_name, MODULE_NAME_LEN, fmt, args); | 88 | ret = vsnprintf(module_name, MODULE_NAME_LEN, fmt, args); |
| @@ -82,6 +90,12 @@ int request_module(const char *fmt, ...) | |||
| 82 | if (ret >= MODULE_NAME_LEN) | 90 | if (ret >= MODULE_NAME_LEN) |
| 83 | return -ENAMETOOLONG; | 91 | return -ENAMETOOLONG; |
| 84 | 92 | ||
| 93 | strcpy(&modalias[strlen("MODALIAS=")], module_name); | ||
| 94 | kobject_uevent_env(&kmod_mk.kobj, KOBJ_CHANGE, uevent_envp); | ||
| 95 | |||
| 96 | if (modprobe_path[0] == '\0') | ||
| 97 | goto out; | ||
| 98 | |||
| 85 | /* If modprobe needs a service that is in a module, we get a recursive | 99 | /* If modprobe needs a service that is in a module, we get a recursive |
| 86 | * loop. Limit the number of running kmod threads to max_threads/2 or | 100 | * loop. Limit the number of running kmod threads to max_threads/2 or |
| 87 | * MAX_KMOD_CONCURRENT, whichever is the smaller. A cleaner method | 101 | * MAX_KMOD_CONCURRENT, whichever is the smaller. A cleaner method |
| @@ -108,9 +122,115 @@ int request_module(const char *fmt, ...) | |||
| 108 | 122 | ||
| 109 | ret = call_usermodehelper(modprobe_path, argv, envp, 1); | 123 | ret = call_usermodehelper(modprobe_path, argv, envp, 1); |
| 110 | atomic_dec(&kmod_concurrent); | 124 | atomic_dec(&kmod_concurrent); |
| 125 | out: | ||
| 111 | return ret; | 126 | return ret; |
| 112 | } | 127 | } |
| 113 | EXPORT_SYMBOL(request_module); | 128 | EXPORT_SYMBOL(request_module); |
| 129 | |||
| 130 | static ssize_t store_mod_request(struct module_attribute *mattr, | ||
| 131 | struct module *mod, | ||
| 132 | const char *buffer, size_t count) | ||
| 133 | { | ||
| 134 | char name[MODULE_NAME_LEN]; | ||
| 135 | int ret; | ||
| 136 | |||
| 137 | if (count < 1 || count+1 > MODULE_NAME_LEN) | ||
| 138 | return -EINVAL; | ||
| 139 | memcpy(name, buffer, count); | ||
| 140 | name[count] = '\0'; | ||
| 141 | if (name[count-1] == '\n') | ||
| 142 | name[count-1] = '\0'; | ||
| 143 | |||
| 144 | ret = request_module(name); | ||
| 145 | if (ret < 0) | ||
| 146 | return ret; | ||
| 147 | return count; | ||
| 148 | } | ||
| 149 | |||
| 150 | static struct module_attribute mod_request = { | ||
| 151 | .attr = { .name = "mod_request", .mode = S_IWUSR, .owner = THIS_MODULE }, | ||
| 152 | .store = store_mod_request, | ||
| 153 | }; | ||
| 154 | |||
| 155 | #ifdef CONFIG_MODULE_UNLOAD | ||
| 156 | static ssize_t store_mod_unload(struct module_attribute *mattr, | ||
| 157 | struct module *mod, | ||
| 158 | const char *buffer, size_t count) | ||
| 159 | { | ||
| 160 | char name[MODULE_NAME_LEN]; | ||
| 161 | int ret; | ||
| 162 | |||
| 163 | if (count < 1 || count+1 > MODULE_NAME_LEN) | ||
| 164 | return -EINVAL; | ||
| 165 | memcpy(name, buffer, count); | ||
| 166 | name[count] = '\0'; | ||
| 167 | if (name[count-1] == '\n') | ||
| 168 | name[count-1] = '\0'; | ||
| 169 | |||
| 170 | ret = delete_module(name, O_NONBLOCK); | ||
| 171 | if (ret < 0) | ||
| 172 | return ret; | ||
| 173 | return count; | ||
| 174 | } | ||
| 175 | |||
| 176 | static struct module_attribute mod_unload = { | ||
| 177 | .attr = { .name = "mod_unload", .mode = S_IWUSR, .owner = THIS_MODULE }, | ||
| 178 | .store = store_mod_unload, | ||
| 179 | }; | ||
| 180 | #endif | ||
| 181 | |||
| 182 | static ssize_t show_mod_request_helper(struct module_attribute *mattr, | ||
| 183 | struct module *mod, | ||
| 184 | char *buffer) | ||
| 185 | { | ||
| 186 | return sprintf(buffer, "%s\n", modprobe_path); | ||
| 187 | } | ||
| 188 | |||
| 189 | static ssize_t store_mod_request_helper(struct module_attribute *mattr, | ||
| 190 | struct module *mod, | ||
| 191 | const char *buffer, size_t count) | ||
| 192 | { | ||
| 193 | if (count < 1 || count+1 > KMOD_PATH_LEN) | ||
| 194 | return -EINVAL; | ||
| 195 | memcpy(modprobe_path, buffer, count); | ||
| 196 | modprobe_path[count] = '\0'; | ||
| 197 | if (modprobe_path[count-1] == '\n') | ||
| 198 | modprobe_path[count-1] = '\0'; | ||
| 199 | return count; | ||
| 200 | } | ||
| 201 | |||
| 202 | static struct module_attribute mod_request_helper = { | ||
| 203 | .attr = { | ||
| 204 | .name = "mod_request_helper", | ||
| 205 | .mode = S_IWUSR | S_IRUGO, | ||
| 206 | .owner = THIS_MODULE | ||
| 207 | }, | ||
| 208 | .show = show_mod_request_helper, | ||
| 209 | .store = store_mod_request_helper, | ||
| 210 | }; | ||
| 211 | |||
| 212 | void __init kmod_sysfs_init(void) | ||
| 213 | { | ||
| 214 | int ret; | ||
| 215 | |||
| 216 | kmod_mk.mod = THIS_MODULE; | ||
| 217 | kobj_set_kset_s(&kmod_mk, module_subsys); | ||
| 218 | kobject_set_name(&kmod_mk.kobj, "kmod"); | ||
| 219 | kobject_init(&kmod_mk.kobj); | ||
| 220 | ret = kobject_add(&kmod_mk.kobj); | ||
| 221 | if (ret < 0) | ||
| 222 | goto out; | ||
| 223 | |||
| 224 | ret = sysfs_create_file(&kmod_mk.kobj, &mod_request_helper.attr); | ||
| 225 | ret = sysfs_create_file(&kmod_mk.kobj, &mod_request.attr); | ||
| 226 | #ifdef CONFIG_MODULE_UNLOAD | ||
| 227 | ret = sysfs_create_file(&kmod_mk.kobj, &mod_unload.attr); | ||
| 228 | #endif | ||
| 229 | |||
| 230 | kobject_uevent(&kmod_mk.kobj, KOBJ_ADD); | ||
| 231 | out: | ||
| 232 | return; | ||
| 233 | } | ||
| 114 | #endif /* CONFIG_KMOD */ | 234 | #endif /* CONFIG_KMOD */ |
| 115 | 235 | ||
| 116 | struct subprocess_info { | 236 | struct subprocess_info { |
| @@ -217,7 +337,10 @@ static int wait_for_helper(void *data) | |||
| 217 | sub_info->retval = ret; | 337 | sub_info->retval = ret; |
| 218 | } | 338 | } |
| 219 | 339 | ||
| 220 | complete(sub_info->complete); | 340 | if (sub_info->wait < 0) |
| 341 | kfree(sub_info); | ||
| 342 | else | ||
| 343 | complete(sub_info->complete); | ||
| 221 | return 0; | 344 | return 0; |
| 222 | } | 345 | } |
| 223 | 346 | ||
| @@ -239,6 +362,9 @@ static void __call_usermodehelper(struct work_struct *work) | |||
| 239 | pid = kernel_thread(____call_usermodehelper, sub_info, | 362 | pid = kernel_thread(____call_usermodehelper, sub_info, |
| 240 | CLONE_VFORK | SIGCHLD); | 363 | CLONE_VFORK | SIGCHLD); |
| 241 | 364 | ||
| 365 | if (wait < 0) | ||
| 366 | return; | ||
| 367 | |||
| 242 | if (pid < 0) { | 368 | if (pid < 0) { |
| 243 | sub_info->retval = pid; | 369 | sub_info->retval = pid; |
| 244 | complete(sub_info->complete); | 370 | complete(sub_info->complete); |
| @@ -253,6 +379,9 @@ static void __call_usermodehelper(struct work_struct *work) | |||
| 253 | * @envp: null-terminated environment list | 379 | * @envp: null-terminated environment list |
| 254 | * @session_keyring: session keyring for process (NULL for an empty keyring) | 380 | * @session_keyring: session keyring for process (NULL for an empty keyring) |
| 255 | * @wait: wait for the application to finish and return status. | 381 | * @wait: wait for the application to finish and return status. |
| 382 | * when -1 don't wait at all, but you get no useful error back when | ||
| 383 | * the program couldn't be exec'ed. This makes it safe to call | ||
| 384 | * from interrupt context. | ||
| 256 | * | 385 | * |
| 257 | * Runs a user-space application. The application is started | 386 | * Runs a user-space application. The application is started |
| 258 | * asynchronously if wait is not set, and runs as a child of keventd. | 387 | * asynchronously if wait is not set, and runs as a child of keventd. |
| @@ -265,17 +394,8 @@ int call_usermodehelper_keys(char *path, char **argv, char **envp, | |||
| 265 | struct key *session_keyring, int wait) | 394 | struct key *session_keyring, int wait) |
| 266 | { | 395 | { |
| 267 | DECLARE_COMPLETION_ONSTACK(done); | 396 | DECLARE_COMPLETION_ONSTACK(done); |
| 268 | struct subprocess_info sub_info = { | 397 | struct subprocess_info *sub_info; |
| 269 | .work = __WORK_INITIALIZER(sub_info.work, | 398 | int retval; |
| 270 | __call_usermodehelper), | ||
| 271 | .complete = &done, | ||
| 272 | .path = path, | ||
| 273 | .argv = argv, | ||
| 274 | .envp = envp, | ||
| 275 | .ring = session_keyring, | ||
| 276 | .wait = wait, | ||
| 277 | .retval = 0, | ||
| 278 | }; | ||
| 279 | 399 | ||
| 280 | if (!khelper_wq) | 400 | if (!khelper_wq) |
| 281 | return -EBUSY; | 401 | return -EBUSY; |
| @@ -283,9 +403,25 @@ int call_usermodehelper_keys(char *path, char **argv, char **envp, | |||
| 283 | if (path[0] == '\0') | 403 | if (path[0] == '\0') |
| 284 | return 0; | 404 | return 0; |
| 285 | 405 | ||
| 286 | queue_work(khelper_wq, &sub_info.work); | 406 | sub_info = kzalloc(sizeof(struct subprocess_info), GFP_ATOMIC); |
| 407 | if (!sub_info) | ||
| 408 | return -ENOMEM; | ||
| 409 | |||
| 410 | INIT_WORK(&sub_info->work, __call_usermodehelper); | ||
| 411 | sub_info->complete = &done; | ||
| 412 | sub_info->path = path; | ||
| 413 | sub_info->argv = argv; | ||
| 414 | sub_info->envp = envp; | ||
| 415 | sub_info->ring = session_keyring; | ||
| 416 | sub_info->wait = wait; | ||
| 417 | |||
| 418 | queue_work(khelper_wq, &sub_info->work); | ||
| 419 | if (wait < 0) /* task has freed sub_info */ | ||
| 420 | return 0; | ||
| 287 | wait_for_completion(&done); | 421 | wait_for_completion(&done); |
| 288 | return sub_info.retval; | 422 | retval = sub_info->retval; |
| 423 | kfree(sub_info); | ||
| 424 | return retval; | ||
| 289 | } | 425 | } |
| 290 | EXPORT_SYMBOL(call_usermodehelper_keys); | 426 | EXPORT_SYMBOL(call_usermodehelper_keys); |
| 291 | 427 | ||
diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 6fcf8dd148d0..d25a9ada3f8e 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c | |||
| @@ -39,6 +39,8 @@ | |||
| 39 | #include <linux/moduleloader.h> | 39 | #include <linux/moduleloader.h> |
| 40 | #include <linux/kallsyms.h> | 40 | #include <linux/kallsyms.h> |
| 41 | #include <linux/freezer.h> | 41 | #include <linux/freezer.h> |
| 42 | #include <linux/seq_file.h> | ||
| 43 | #include <linux/debugfs.h> | ||
| 42 | #include <asm-generic/sections.h> | 44 | #include <asm-generic/sections.h> |
| 43 | #include <asm/cacheflush.h> | 45 | #include <asm/cacheflush.h> |
| 44 | #include <asm/errno.h> | 46 | #include <asm/errno.h> |
| @@ -778,6 +780,12 @@ int __kprobes register_kretprobe(struct kretprobe *rp) | |||
| 778 | return -ENOSYS; | 780 | return -ENOSYS; |
| 779 | } | 781 | } |
| 780 | 782 | ||
| 783 | static int __kprobes pre_handler_kretprobe(struct kprobe *p, | ||
| 784 | struct pt_regs *regs) | ||
| 785 | { | ||
| 786 | return 0; | ||
| 787 | } | ||
| 788 | |||
| 781 | #endif /* ARCH_SUPPORTS_KRETPROBES */ | 789 | #endif /* ARCH_SUPPORTS_KRETPROBES */ |
| 782 | 790 | ||
| 783 | void __kprobes unregister_kretprobe(struct kretprobe *rp) | 791 | void __kprobes unregister_kretprobe(struct kretprobe *rp) |
| @@ -815,7 +823,109 @@ static int __init init_kprobes(void) | |||
| 815 | return err; | 823 | return err; |
| 816 | } | 824 | } |
| 817 | 825 | ||
| 818 | __initcall(init_kprobes); | 826 | #ifdef CONFIG_DEBUG_FS |
| 827 | static void __kprobes report_probe(struct seq_file *pi, struct kprobe *p, | ||
| 828 | const char *sym, int offset,char *modname) | ||
| 829 | { | ||
| 830 | char *kprobe_type; | ||
| 831 | |||
| 832 | if (p->pre_handler == pre_handler_kretprobe) | ||
| 833 | kprobe_type = "r"; | ||
| 834 | else if (p->pre_handler == setjmp_pre_handler) | ||
| 835 | kprobe_type = "j"; | ||
| 836 | else | ||
| 837 | kprobe_type = "k"; | ||
| 838 | if (sym) | ||
| 839 | seq_printf(pi, "%p %s %s+0x%x %s\n", p->addr, kprobe_type, | ||
| 840 | sym, offset, (modname ? modname : " ")); | ||
| 841 | else | ||
| 842 | seq_printf(pi, "%p %s %p\n", p->addr, kprobe_type, p->addr); | ||
| 843 | } | ||
| 844 | |||
| 845 | static void __kprobes *kprobe_seq_start(struct seq_file *f, loff_t *pos) | ||
| 846 | { | ||
| 847 | return (*pos < KPROBE_TABLE_SIZE) ? pos : NULL; | ||
| 848 | } | ||
| 849 | |||
| 850 | static void __kprobes *kprobe_seq_next(struct seq_file *f, void *v, loff_t *pos) | ||
| 851 | { | ||
| 852 | (*pos)++; | ||
| 853 | if (*pos >= KPROBE_TABLE_SIZE) | ||
| 854 | return NULL; | ||
| 855 | return pos; | ||
| 856 | } | ||
| 857 | |||
| 858 | static void __kprobes kprobe_seq_stop(struct seq_file *f, void *v) | ||
| 859 | { | ||
| 860 | /* Nothing to do */ | ||
| 861 | } | ||
| 862 | |||
| 863 | static int __kprobes show_kprobe_addr(struct seq_file *pi, void *v) | ||
| 864 | { | ||
| 865 | struct hlist_head *head; | ||
| 866 | struct hlist_node *node; | ||
| 867 | struct kprobe *p, *kp; | ||
| 868 | const char *sym = NULL; | ||
| 869 | unsigned int i = *(loff_t *) v; | ||
| 870 | unsigned long size, offset = 0; | ||
| 871 | char *modname, namebuf[128]; | ||
| 872 | |||
| 873 | head = &kprobe_table[i]; | ||
| 874 | preempt_disable(); | ||
| 875 | hlist_for_each_entry_rcu(p, node, head, hlist) { | ||
| 876 | sym = kallsyms_lookup((unsigned long)p->addr, &size, | ||
| 877 | &offset, &modname, namebuf); | ||
| 878 | if (p->pre_handler == aggr_pre_handler) { | ||
| 879 | list_for_each_entry_rcu(kp, &p->list, list) | ||
| 880 | report_probe(pi, kp, sym, offset, modname); | ||
| 881 | } else | ||
| 882 | report_probe(pi, p, sym, offset, modname); | ||
| 883 | } | ||
| 884 | preempt_enable(); | ||
| 885 | return 0; | ||
| 886 | } | ||
| 887 | |||
| 888 | static struct seq_operations kprobes_seq_ops = { | ||
| 889 | .start = kprobe_seq_start, | ||
| 890 | .next = kprobe_seq_next, | ||
| 891 | .stop = kprobe_seq_stop, | ||
| 892 | .show = show_kprobe_addr | ||
| 893 | }; | ||
| 894 | |||
| 895 | static int __kprobes kprobes_open(struct inode *inode, struct file *filp) | ||
| 896 | { | ||
| 897 | return seq_open(filp, &kprobes_seq_ops); | ||
| 898 | } | ||
| 899 | |||
| 900 | static struct file_operations debugfs_kprobes_operations = { | ||
| 901 | .open = kprobes_open, | ||
| 902 | .read = seq_read, | ||
| 903 | .llseek = seq_lseek, | ||
| 904 | .release = seq_release, | ||
| 905 | }; | ||
| 906 | |||
| 907 | static int __kprobes debugfs_kprobe_init(void) | ||
| 908 | { | ||
| 909 | struct dentry *dir, *file; | ||
| 910 | |||
| 911 | dir = debugfs_create_dir("kprobes", NULL); | ||
| 912 | if (!dir) | ||
| 913 | return -ENOMEM; | ||
| 914 | |||
| 915 | file = debugfs_create_file("list", 0444, dir , 0 , | ||
| 916 | &debugfs_kprobes_operations); | ||
| 917 | if (!file) { | ||
| 918 | debugfs_remove(dir); | ||
| 919 | return -ENOMEM; | ||
| 920 | } | ||
| 921 | |||
| 922 | return 0; | ||
| 923 | } | ||
| 924 | |||
| 925 | late_initcall(debugfs_kprobe_init); | ||
| 926 | #endif /* CONFIG_DEBUG_FS */ | ||
| 927 | |||
| 928 | module_init(init_kprobes); | ||
| 819 | 929 | ||
| 820 | EXPORT_SYMBOL_GPL(register_kprobe); | 930 | EXPORT_SYMBOL_GPL(register_kprobe); |
| 821 | EXPORT_SYMBOL_GPL(unregister_kprobe); | 931 | EXPORT_SYMBOL_GPL(unregister_kprobe); |
| @@ -824,4 +934,3 @@ EXPORT_SYMBOL_GPL(unregister_jprobe); | |||
| 824 | EXPORT_SYMBOL_GPL(jprobe_return); | 934 | EXPORT_SYMBOL_GPL(jprobe_return); |
| 825 | EXPORT_SYMBOL_GPL(register_kretprobe); | 935 | EXPORT_SYMBOL_GPL(register_kretprobe); |
| 826 | EXPORT_SYMBOL_GPL(unregister_kretprobe); | 936 | EXPORT_SYMBOL_GPL(unregister_kretprobe); |
| 827 | |||
diff --git a/kernel/lockdep.c b/kernel/lockdep.c index 592c576d77a7..a08a17218dfa 100644 --- a/kernel/lockdep.c +++ b/kernel/lockdep.c | |||
| @@ -2228,6 +2228,10 @@ out_calc_hash: | |||
| 2228 | 2228 | ||
| 2229 | curr->lockdep_depth++; | 2229 | curr->lockdep_depth++; |
| 2230 | check_chain_key(curr); | 2230 | check_chain_key(curr); |
| 2231 | #ifdef CONFIG_DEBUG_LOCKDEP | ||
| 2232 | if (unlikely(!debug_locks)) | ||
| 2233 | return 0; | ||
| 2234 | #endif | ||
| 2231 | if (unlikely(curr->lockdep_depth >= MAX_LOCK_DEPTH)) { | 2235 | if (unlikely(curr->lockdep_depth >= MAX_LOCK_DEPTH)) { |
| 2232 | debug_locks_off(); | 2236 | debug_locks_off(); |
| 2233 | printk("BUG: MAX_LOCK_DEPTH too low!\n"); | 2237 | printk("BUG: MAX_LOCK_DEPTH too low!\n"); |
diff --git a/kernel/module.c b/kernel/module.c index 8a94e054230c..8c25b1a04fa6 100644 --- a/kernel/module.c +++ b/kernel/module.c | |||
| @@ -653,20 +653,11 @@ static void wait_for_zero_refcount(struct module *mod) | |||
| 653 | mutex_lock(&module_mutex); | 653 | mutex_lock(&module_mutex); |
| 654 | } | 654 | } |
| 655 | 655 | ||
| 656 | asmlinkage long | 656 | int delete_module(const char *name, unsigned int flags) |
| 657 | sys_delete_module(const char __user *name_user, unsigned int flags) | ||
| 658 | { | 657 | { |
| 659 | struct module *mod; | 658 | struct module *mod; |
| 660 | char name[MODULE_NAME_LEN]; | ||
| 661 | int ret, forced = 0; | 659 | int ret, forced = 0; |
| 662 | 660 | ||
| 663 | if (!capable(CAP_SYS_MODULE)) | ||
| 664 | return -EPERM; | ||
| 665 | |||
| 666 | if (strncpy_from_user(name, name_user, MODULE_NAME_LEN-1) < 0) | ||
| 667 | return -EFAULT; | ||
| 668 | name[MODULE_NAME_LEN-1] = '\0'; | ||
| 669 | |||
| 670 | if (mutex_lock_interruptible(&module_mutex) != 0) | 661 | if (mutex_lock_interruptible(&module_mutex) != 0) |
| 671 | return -EINTR; | 662 | return -EINTR; |
| 672 | 663 | ||
| @@ -727,6 +718,21 @@ sys_delete_module(const char __user *name_user, unsigned int flags) | |||
| 727 | return ret; | 718 | return ret; |
| 728 | } | 719 | } |
| 729 | 720 | ||
| 721 | asmlinkage long | ||
| 722 | sys_delete_module(const char __user *name_user, unsigned int flags) | ||
| 723 | { | ||
| 724 | char name[MODULE_NAME_LEN]; | ||
| 725 | |||
| 726 | if (!capable(CAP_SYS_MODULE)) | ||
| 727 | return -EPERM; | ||
| 728 | |||
| 729 | if (strncpy_from_user(name, name_user, MODULE_NAME_LEN-1) < 0) | ||
| 730 | return -EFAULT; | ||
| 731 | name[MODULE_NAME_LEN-1] = '\0'; | ||
| 732 | |||
| 733 | return delete_module(name, flags); | ||
| 734 | } | ||
| 735 | |||
| 730 | static void print_unload_info(struct seq_file *m, struct module *mod) | 736 | static void print_unload_info(struct seq_file *m, struct module *mod) |
| 731 | { | 737 | { |
| 732 | struct module_use *use; | 738 | struct module_use *use; |
| @@ -1068,7 +1074,8 @@ static inline void remove_sect_attrs(struct module *mod) | |||
| 1068 | } | 1074 | } |
| 1069 | #endif /* CONFIG_KALLSYMS */ | 1075 | #endif /* CONFIG_KALLSYMS */ |
| 1070 | 1076 | ||
| 1071 | static int module_add_modinfo_attrs(struct module *mod) | 1077 | #ifdef CONFIG_SYSFS |
| 1078 | int module_add_modinfo_attrs(struct module *mod) | ||
| 1072 | { | 1079 | { |
| 1073 | struct module_attribute *attr; | 1080 | struct module_attribute *attr; |
| 1074 | struct module_attribute *temp_attr; | 1081 | struct module_attribute *temp_attr; |
| @@ -1094,7 +1101,7 @@ static int module_add_modinfo_attrs(struct module *mod) | |||
| 1094 | return error; | 1101 | return error; |
| 1095 | } | 1102 | } |
| 1096 | 1103 | ||
| 1097 | static void module_remove_modinfo_attrs(struct module *mod) | 1104 | void module_remove_modinfo_attrs(struct module *mod) |
| 1098 | { | 1105 | { |
| 1099 | struct module_attribute *attr; | 1106 | struct module_attribute *attr; |
| 1100 | int i; | 1107 | int i; |
| @@ -1109,8 +1116,10 @@ static void module_remove_modinfo_attrs(struct module *mod) | |||
| 1109 | } | 1116 | } |
| 1110 | kfree(mod->modinfo_attrs); | 1117 | kfree(mod->modinfo_attrs); |
| 1111 | } | 1118 | } |
| 1119 | #endif | ||
| 1112 | 1120 | ||
| 1113 | static int mod_sysfs_init(struct module *mod) | 1121 | #ifdef CONFIG_SYSFS |
| 1122 | int mod_sysfs_init(struct module *mod) | ||
| 1114 | { | 1123 | { |
| 1115 | int err; | 1124 | int err; |
| 1116 | 1125 | ||
| @@ -1133,7 +1142,7 @@ out: | |||
| 1133 | return err; | 1142 | return err; |
| 1134 | } | 1143 | } |
| 1135 | 1144 | ||
| 1136 | static int mod_sysfs_setup(struct module *mod, | 1145 | int mod_sysfs_setup(struct module *mod, |
| 1137 | struct kernel_param *kparam, | 1146 | struct kernel_param *kparam, |
| 1138 | unsigned int num_params) | 1147 | unsigned int num_params) |
| 1139 | { | 1148 | { |
| @@ -1169,16 +1178,14 @@ out_unreg: | |||
| 1169 | out: | 1178 | out: |
| 1170 | return err; | 1179 | return err; |
| 1171 | } | 1180 | } |
| 1181 | #endif | ||
| 1172 | 1182 | ||
| 1173 | static void mod_kobject_remove(struct module *mod) | 1183 | static void mod_kobject_remove(struct module *mod) |
| 1174 | { | 1184 | { |
| 1175 | module_remove_modinfo_attrs(mod); | 1185 | module_remove_modinfo_attrs(mod); |
| 1176 | module_param_sysfs_remove(mod); | 1186 | module_param_sysfs_remove(mod); |
| 1177 | if (mod->mkobj.drivers_dir) | 1187 | kobject_unregister(mod->mkobj.drivers_dir); |
| 1178 | kobject_unregister(mod->mkobj.drivers_dir); | 1188 | kobject_unregister(mod->holders_dir); |
| 1179 | if (mod->holders_dir) | ||
| 1180 | kobject_unregister(mod->holders_dir); | ||
| 1181 | |||
| 1182 | kobject_unregister(&mod->mkobj.kobj); | 1189 | kobject_unregister(&mod->mkobj.kobj); |
| 1183 | } | 1190 | } |
| 1184 | 1191 | ||
| @@ -2345,6 +2352,7 @@ void print_modules(void) | |||
| 2345 | printk("\n"); | 2352 | printk("\n"); |
| 2346 | } | 2353 | } |
| 2347 | 2354 | ||
| 2355 | #ifdef CONFIG_SYSFS | ||
| 2348 | static char *make_driver_name(struct device_driver *drv) | 2356 | static char *make_driver_name(struct device_driver *drv) |
| 2349 | { | 2357 | { |
| 2350 | char *driver_name; | 2358 | char *driver_name; |
| @@ -2419,6 +2427,7 @@ void module_remove_driver(struct device_driver *drv) | |||
| 2419 | } | 2427 | } |
| 2420 | } | 2428 | } |
| 2421 | EXPORT_SYMBOL(module_remove_driver); | 2429 | EXPORT_SYMBOL(module_remove_driver); |
| 2430 | #endif | ||
| 2422 | 2431 | ||
| 2423 | #ifdef CONFIG_MODVERSIONS | 2432 | #ifdef CONFIG_MODVERSIONS |
| 2424 | /* Generate the signature for struct module here, too, for modversions. */ | 2433 | /* Generate the signature for struct module here, too, for modversions. */ |
diff --git a/kernel/params.c b/kernel/params.c index 553cf7d6a4be..7a751570b56d 100644 --- a/kernel/params.c +++ b/kernel/params.c | |||
| @@ -30,8 +30,6 @@ | |||
| 30 | #define DEBUGP(fmt, a...) | 30 | #define DEBUGP(fmt, a...) |
| 31 | #endif | 31 | #endif |
| 32 | 32 | ||
| 33 | static struct kobj_type module_ktype; | ||
| 34 | |||
| 35 | static inline char dash2underscore(char c) | 33 | static inline char dash2underscore(char c) |
| 36 | { | 34 | { |
| 37 | if (c == '-') | 35 | if (c == '-') |
| @@ -391,6 +389,7 @@ struct module_param_attrs | |||
| 391 | struct param_attribute attrs[0]; | 389 | struct param_attribute attrs[0]; |
| 392 | }; | 390 | }; |
| 393 | 391 | ||
| 392 | #ifdef CONFIG_SYSFS | ||
| 394 | #define to_param_attr(n) container_of(n, struct param_attribute, mattr); | 393 | #define to_param_attr(n) container_of(n, struct param_attribute, mattr); |
| 395 | 394 | ||
| 396 | static ssize_t param_attr_show(struct module_attribute *mattr, | 395 | static ssize_t param_attr_show(struct module_attribute *mattr, |
| @@ -426,6 +425,7 @@ static ssize_t param_attr_store(struct module_attribute *mattr, | |||
| 426 | return len; | 425 | return len; |
| 427 | return err; | 426 | return err; |
| 428 | } | 427 | } |
| 428 | #endif | ||
| 429 | 429 | ||
| 430 | #ifdef CONFIG_MODULES | 430 | #ifdef CONFIG_MODULES |
| 431 | #define __modinit | 431 | #define __modinit |
| @@ -433,6 +433,7 @@ static ssize_t param_attr_store(struct module_attribute *mattr, | |||
| 433 | #define __modinit __init | 433 | #define __modinit __init |
| 434 | #endif | 434 | #endif |
| 435 | 435 | ||
| 436 | #ifdef CONFIG_SYSFS | ||
| 436 | /* | 437 | /* |
| 437 | * param_sysfs_setup - setup sysfs support for one module or KBUILD_MODNAME | 438 | * param_sysfs_setup - setup sysfs support for one module or KBUILD_MODNAME |
| 438 | * @mk: struct module_kobject (contains parent kobject) | 439 | * @mk: struct module_kobject (contains parent kobject) |
| @@ -500,9 +501,7 @@ param_sysfs_setup(struct module_kobject *mk, | |||
| 500 | return mp; | 501 | return mp; |
| 501 | } | 502 | } |
| 502 | 503 | ||
| 503 | |||
| 504 | #ifdef CONFIG_MODULES | 504 | #ifdef CONFIG_MODULES |
| 505 | |||
| 506 | /* | 505 | /* |
| 507 | * module_param_sysfs_setup - setup sysfs support for one module | 506 | * module_param_sysfs_setup - setup sysfs support for one module |
| 508 | * @mod: module | 507 | * @mod: module |
| @@ -625,7 +624,6 @@ static void __init param_sysfs_builtin(void) | |||
| 625 | 624 | ||
| 626 | 625 | ||
| 627 | /* module-related sysfs stuff */ | 626 | /* module-related sysfs stuff */ |
| 628 | #ifdef CONFIG_SYSFS | ||
| 629 | 627 | ||
| 630 | #define to_module_attr(n) container_of(n, struct module_attribute, attr); | 628 | #define to_module_attr(n) container_of(n, struct module_attribute, attr); |
| 631 | #define to_module_kobject(n) container_of(n, struct module_kobject, kobj); | 629 | #define to_module_kobject(n) container_of(n, struct module_kobject, kobj); |
| @@ -673,6 +671,8 @@ static struct sysfs_ops module_sysfs_ops = { | |||
| 673 | .store = module_attr_store, | 671 | .store = module_attr_store, |
| 674 | }; | 672 | }; |
| 675 | 673 | ||
| 674 | static struct kobj_type module_ktype; | ||
| 675 | |||
| 676 | static int uevent_filter(struct kset *kset, struct kobject *kobj) | 676 | static int uevent_filter(struct kset *kset, struct kobject *kobj) |
| 677 | { | 677 | { |
| 678 | struct kobj_type *ktype = get_ktype(kobj); | 678 | struct kobj_type *ktype = get_ktype(kobj); |
| @@ -686,19 +686,12 @@ static struct kset_uevent_ops module_uevent_ops = { | |||
| 686 | .filter = uevent_filter, | 686 | .filter = uevent_filter, |
| 687 | }; | 687 | }; |
| 688 | 688 | ||
| 689 | #else | 689 | decl_subsys(module, &module_ktype, &module_uevent_ops); |
| 690 | static struct sysfs_ops module_sysfs_ops = { | ||
| 691 | .show = NULL, | ||
| 692 | .store = NULL, | ||
| 693 | }; | ||
| 694 | #endif | ||
| 695 | 690 | ||
| 696 | static struct kobj_type module_ktype = { | 691 | static struct kobj_type module_ktype = { |
| 697 | .sysfs_ops = &module_sysfs_ops, | 692 | .sysfs_ops = &module_sysfs_ops, |
| 698 | }; | 693 | }; |
| 699 | 694 | ||
| 700 | decl_subsys(module, &module_ktype, &module_uevent_ops); | ||
| 701 | |||
| 702 | /* | 695 | /* |
| 703 | * param_sysfs_init - wrapper for built-in params support | 696 | * param_sysfs_init - wrapper for built-in params support |
| 704 | */ | 697 | */ |
| @@ -714,11 +707,21 @@ static int __init param_sysfs_init(void) | |||
| 714 | } | 707 | } |
| 715 | 708 | ||
| 716 | param_sysfs_builtin(); | 709 | param_sysfs_builtin(); |
| 710 | kmod_sysfs_init(); | ||
| 717 | 711 | ||
| 718 | return 0; | 712 | return 0; |
| 719 | } | 713 | } |
| 720 | subsys_initcall(param_sysfs_init); | 714 | subsys_initcall(param_sysfs_init); |
| 721 | 715 | ||
| 716 | #else | ||
| 717 | #if 0 | ||
| 718 | static struct sysfs_ops module_sysfs_ops = { | ||
| 719 | .show = NULL, | ||
| 720 | .store = NULL, | ||
| 721 | }; | ||
| 722 | #endif | ||
| 723 | #endif | ||
| 724 | |||
| 722 | EXPORT_SYMBOL(param_set_byte); | 725 | EXPORT_SYMBOL(param_set_byte); |
| 723 | EXPORT_SYMBOL(param_get_byte); | 726 | EXPORT_SYMBOL(param_get_byte); |
| 724 | EXPORT_SYMBOL(param_set_short); | 727 | EXPORT_SYMBOL(param_set_short); |
diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c index 7c3e1e6dfb5b..657f77697415 100644 --- a/kernel/posix-cpu-timers.c +++ b/kernel/posix-cpu-timers.c | |||
| @@ -304,7 +304,7 @@ int posix_cpu_clock_get(const clockid_t which_clock, struct timespec *tp) | |||
| 304 | * should be able to see it. | 304 | * should be able to see it. |
| 305 | */ | 305 | */ |
| 306 | struct task_struct *p; | 306 | struct task_struct *p; |
| 307 | read_lock(&tasklist_lock); | 307 | rcu_read_lock(); |
| 308 | p = find_task_by_pid(pid); | 308 | p = find_task_by_pid(pid); |
| 309 | if (p) { | 309 | if (p) { |
| 310 | if (CPUCLOCK_PERTHREAD(which_clock)) { | 310 | if (CPUCLOCK_PERTHREAD(which_clock)) { |
| @@ -312,12 +312,17 @@ int posix_cpu_clock_get(const clockid_t which_clock, struct timespec *tp) | |||
| 312 | error = cpu_clock_sample(which_clock, | 312 | error = cpu_clock_sample(which_clock, |
| 313 | p, &rtn); | 313 | p, &rtn); |
| 314 | } | 314 | } |
| 315 | } else if (p->tgid == pid && p->signal) { | 315 | } else { |
| 316 | error = cpu_clock_sample_group(which_clock, | 316 | read_lock(&tasklist_lock); |
| 317 | p, &rtn); | 317 | if (p->tgid == pid && p->signal) { |
| 318 | error = | ||
| 319 | cpu_clock_sample_group(which_clock, | ||
| 320 | p, &rtn); | ||
| 321 | } | ||
| 322 | read_unlock(&tasklist_lock); | ||
| 318 | } | 323 | } |
| 319 | } | 324 | } |
| 320 | read_unlock(&tasklist_lock); | 325 | rcu_read_unlock(); |
| 321 | } | 326 | } |
| 322 | 327 | ||
| 323 | if (error) | 328 | if (error) |
diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c index a1bf61617839..44318ca71978 100644 --- a/kernel/posix-timers.c +++ b/kernel/posix-timers.c | |||
| @@ -145,7 +145,7 @@ static int common_timer_set(struct k_itimer *, int, | |||
| 145 | struct itimerspec *, struct itimerspec *); | 145 | struct itimerspec *, struct itimerspec *); |
| 146 | static int common_timer_del(struct k_itimer *timer); | 146 | static int common_timer_del(struct k_itimer *timer); |
| 147 | 147 | ||
| 148 | static int posix_timer_fn(struct hrtimer *data); | 148 | static enum hrtimer_restart posix_timer_fn(struct hrtimer *data); |
| 149 | 149 | ||
| 150 | static struct k_itimer *lock_timer(timer_t timer_id, unsigned long *flags); | 150 | static struct k_itimer *lock_timer(timer_t timer_id, unsigned long *flags); |
| 151 | 151 | ||
| @@ -334,12 +334,12 @@ EXPORT_SYMBOL_GPL(posix_timer_event); | |||
| 334 | 334 | ||
| 335 | * This code is for CLOCK_REALTIME* and CLOCK_MONOTONIC* timers. | 335 | * This code is for CLOCK_REALTIME* and CLOCK_MONOTONIC* timers. |
| 336 | */ | 336 | */ |
| 337 | static int posix_timer_fn(struct hrtimer *timer) | 337 | static enum hrtimer_restart posix_timer_fn(struct hrtimer *timer) |
| 338 | { | 338 | { |
| 339 | struct k_itimer *timr; | 339 | struct k_itimer *timr; |
| 340 | unsigned long flags; | 340 | unsigned long flags; |
| 341 | int si_private = 0; | 341 | int si_private = 0; |
| 342 | int ret = HRTIMER_NORESTART; | 342 | enum hrtimer_restart ret = HRTIMER_NORESTART; |
| 343 | 343 | ||
| 344 | timr = container_of(timer, struct k_itimer, it.real.timer); | 344 | timr = container_of(timer, struct k_itimer, it.real.timer); |
| 345 | spin_lock_irqsave(&timr->it_lock, flags); | 345 | spin_lock_irqsave(&timr->it_lock, flags); |
| @@ -356,7 +356,7 @@ static int posix_timer_fn(struct hrtimer *timer) | |||
| 356 | if (timr->it.real.interval.tv64 != 0) { | 356 | if (timr->it.real.interval.tv64 != 0) { |
| 357 | timr->it_overrun += | 357 | timr->it_overrun += |
| 358 | hrtimer_forward(timer, | 358 | hrtimer_forward(timer, |
| 359 | timer->base->softirq_time, | 359 | hrtimer_cb_get_time(timer), |
| 360 | timr->it.real.interval); | 360 | timr->it.real.interval); |
| 361 | ret = HRTIMER_RESTART; | 361 | ret = HRTIMER_RESTART; |
| 362 | ++timr->it_requeue_pending; | 362 | ++timr->it_requeue_pending; |
| @@ -722,7 +722,7 @@ common_timer_set(struct k_itimer *timr, int flags, | |||
| 722 | if (!new_setting->it_value.tv_sec && !new_setting->it_value.tv_nsec) | 722 | if (!new_setting->it_value.tv_sec && !new_setting->it_value.tv_nsec) |
| 723 | return 0; | 723 | return 0; |
| 724 | 724 | ||
| 725 | mode = flags & TIMER_ABSTIME ? HRTIMER_ABS : HRTIMER_REL; | 725 | mode = flags & TIMER_ABSTIME ? HRTIMER_MODE_ABS : HRTIMER_MODE_REL; |
| 726 | hrtimer_init(&timr->it.real.timer, timr->it_clock, mode); | 726 | hrtimer_init(&timr->it.real.timer, timr->it_clock, mode); |
| 727 | timr->it.real.timer.function = posix_timer_fn; | 727 | timr->it.real.timer.function = posix_timer_fn; |
| 728 | 728 | ||
| @@ -734,7 +734,7 @@ common_timer_set(struct k_itimer *timr, int flags, | |||
| 734 | /* SIGEV_NONE timers are not queued ! See common_timer_get */ | 734 | /* SIGEV_NONE timers are not queued ! See common_timer_get */ |
| 735 | if (((timr->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE)) { | 735 | if (((timr->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE)) { |
| 736 | /* Setup correct expiry time for relative timers */ | 736 | /* Setup correct expiry time for relative timers */ |
| 737 | if (mode == HRTIMER_REL) | 737 | if (mode == HRTIMER_MODE_REL) |
| 738 | timer->expires = ktime_add(timer->expires, | 738 | timer->expires = ktime_add(timer->expires, |
| 739 | timer->base->get_time()); | 739 | timer->base->get_time()); |
| 740 | return 0; | 740 | return 0; |
| @@ -950,7 +950,8 @@ static int common_nsleep(const clockid_t which_clock, int flags, | |||
| 950 | struct timespec *tsave, struct timespec __user *rmtp) | 950 | struct timespec *tsave, struct timespec __user *rmtp) |
| 951 | { | 951 | { |
| 952 | return hrtimer_nanosleep(tsave, rmtp, flags & TIMER_ABSTIME ? | 952 | return hrtimer_nanosleep(tsave, rmtp, flags & TIMER_ABSTIME ? |
| 953 | HRTIMER_ABS : HRTIMER_REL, which_clock); | 953 | HRTIMER_MODE_ABS : HRTIMER_MODE_REL, |
| 954 | which_clock); | ||
| 954 | } | 955 | } |
| 955 | 956 | ||
| 956 | asmlinkage long | 957 | asmlinkage long |
diff --git a/kernel/printk.c b/kernel/printk.c index 0c151877ff71..4b47e59248df 100644 --- a/kernel/printk.c +++ b/kernel/printk.c | |||
| @@ -54,7 +54,7 @@ int console_printk[4] = { | |||
| 54 | }; | 54 | }; |
| 55 | 55 | ||
| 56 | /* | 56 | /* |
| 57 | * Low lever drivers may need that to know if they can schedule in | 57 | * Low level drivers may need that to know if they can schedule in |
| 58 | * their unblank() callback or not. So let's export it. | 58 | * their unblank() callback or not. So let's export it. |
| 59 | */ | 59 | */ |
| 60 | int oops_in_progress; | 60 | int oops_in_progress; |
diff --git a/kernel/rtmutex.c b/kernel/rtmutex.c index 4ab17da46fd8..180978cb2f75 100644 --- a/kernel/rtmutex.c +++ b/kernel/rtmutex.c | |||
| @@ -625,7 +625,7 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state, | |||
| 625 | /* Setup the timer, when timeout != NULL */ | 625 | /* Setup the timer, when timeout != NULL */ |
| 626 | if (unlikely(timeout)) | 626 | if (unlikely(timeout)) |
| 627 | hrtimer_start(&timeout->timer, timeout->timer.expires, | 627 | hrtimer_start(&timeout->timer, timeout->timer.expires, |
| 628 | HRTIMER_ABS); | 628 | HRTIMER_MODE_ABS); |
| 629 | 629 | ||
| 630 | for (;;) { | 630 | for (;;) { |
| 631 | /* Try to acquire the lock: */ | 631 | /* Try to acquire the lock: */ |
diff --git a/kernel/sched.c b/kernel/sched.c index 08f86178aa34..0dc757246d89 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
| @@ -1853,6 +1853,13 @@ context_switch(struct rq *rq, struct task_struct *prev, | |||
| 1853 | struct mm_struct *mm = next->mm; | 1853 | struct mm_struct *mm = next->mm; |
| 1854 | struct mm_struct *oldmm = prev->active_mm; | 1854 | struct mm_struct *oldmm = prev->active_mm; |
| 1855 | 1855 | ||
| 1856 | /* | ||
| 1857 | * For paravirt, this is coupled with an exit in switch_to to | ||
| 1858 | * combine the page table reload and the switch backend into | ||
| 1859 | * one hypercall. | ||
| 1860 | */ | ||
| 1861 | arch_enter_lazy_cpu_mode(); | ||
| 1862 | |||
| 1856 | if (!mm) { | 1863 | if (!mm) { |
| 1857 | next->active_mm = oldmm; | 1864 | next->active_mm = oldmm; |
| 1858 | atomic_inc(&oldmm->mm_count); | 1865 | atomic_inc(&oldmm->mm_count); |
diff --git a/kernel/signal.c b/kernel/signal.c index 8072e568bbe0..e2a7d4bf7d57 100644 --- a/kernel/signal.c +++ b/kernel/signal.c | |||
| @@ -456,26 +456,50 @@ static int __dequeue_signal(struct sigpending *pending, sigset_t *mask, | |||
| 456 | int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info) | 456 | int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info) |
| 457 | { | 457 | { |
| 458 | int signr = __dequeue_signal(&tsk->pending, mask, info); | 458 | int signr = __dequeue_signal(&tsk->pending, mask, info); |
| 459 | if (!signr) | 459 | if (!signr) { |
| 460 | signr = __dequeue_signal(&tsk->signal->shared_pending, | 460 | signr = __dequeue_signal(&tsk->signal->shared_pending, |
| 461 | mask, info); | 461 | mask, info); |
| 462 | /* | ||
| 463 | * itimer signal ? | ||
| 464 | * | ||
| 465 | * itimers are process shared and we restart periodic | ||
| 466 | * itimers in the signal delivery path to prevent DoS | ||
| 467 | * attacks in the high resolution timer case. This is | ||
| 468 | * compliant with the old way of self restarting | ||
| 469 | * itimers, as the SIGALRM is a legacy signal and only | ||
| 470 | * queued once. Changing the restart behaviour to | ||
| 471 | * restart the timer in the signal dequeue path is | ||
| 472 | * reducing the timer noise on heavy loaded !highres | ||
| 473 | * systems too. | ||
| 474 | */ | ||
| 475 | if (unlikely(signr == SIGALRM)) { | ||
| 476 | struct hrtimer *tmr = &tsk->signal->real_timer; | ||
| 477 | |||
| 478 | if (!hrtimer_is_queued(tmr) && | ||
| 479 | tsk->signal->it_real_incr.tv64 != 0) { | ||
| 480 | hrtimer_forward(tmr, tmr->base->get_time(), | ||
| 481 | tsk->signal->it_real_incr); | ||
| 482 | hrtimer_restart(tmr); | ||
| 483 | } | ||
| 484 | } | ||
| 485 | } | ||
| 462 | recalc_sigpending_tsk(tsk); | 486 | recalc_sigpending_tsk(tsk); |
| 463 | if (signr && unlikely(sig_kernel_stop(signr))) { | 487 | if (signr && unlikely(sig_kernel_stop(signr))) { |
| 464 | /* | 488 | /* |
| 465 | * Set a marker that we have dequeued a stop signal. Our | 489 | * Set a marker that we have dequeued a stop signal. Our |
| 466 | * caller might release the siglock and then the pending | 490 | * caller might release the siglock and then the pending |
| 467 | * stop signal it is about to process is no longer in the | 491 | * stop signal it is about to process is no longer in the |
| 468 | * pending bitmasks, but must still be cleared by a SIGCONT | 492 | * pending bitmasks, but must still be cleared by a SIGCONT |
| 469 | * (and overruled by a SIGKILL). So those cases clear this | 493 | * (and overruled by a SIGKILL). So those cases clear this |
| 470 | * shared flag after we've set it. Note that this flag may | 494 | * shared flag after we've set it. Note that this flag may |
| 471 | * remain set after the signal we return is ignored or | 495 | * remain set after the signal we return is ignored or |
| 472 | * handled. That doesn't matter because its only purpose | 496 | * handled. That doesn't matter because its only purpose |
| 473 | * is to alert stop-signal processing code when another | 497 | * is to alert stop-signal processing code when another |
| 474 | * processor has come along and cleared the flag. | 498 | * processor has come along and cleared the flag. |
| 475 | */ | 499 | */ |
| 476 | if (!(tsk->signal->flags & SIGNAL_GROUP_EXIT)) | 500 | if (!(tsk->signal->flags & SIGNAL_GROUP_EXIT)) |
| 477 | tsk->signal->flags |= SIGNAL_STOP_DEQUEUED; | 501 | tsk->signal->flags |= SIGNAL_STOP_DEQUEUED; |
| 478 | } | 502 | } |
| 479 | if ( signr && | 503 | if ( signr && |
| 480 | ((info->si_code & __SI_MASK) == __SI_TIMER) && | 504 | ((info->si_code & __SI_MASK) == __SI_TIMER) && |
| 481 | info->si_sys_private){ | 505 | info->si_sys_private){ |
diff --git a/kernel/softirq.c b/kernel/softirq.c index 918e52df090e..8b75008e2bd8 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c | |||
| @@ -17,6 +17,7 @@ | |||
| 17 | #include <linux/kthread.h> | 17 | #include <linux/kthread.h> |
| 18 | #include <linux/rcupdate.h> | 18 | #include <linux/rcupdate.h> |
| 19 | #include <linux/smp.h> | 19 | #include <linux/smp.h> |
| 20 | #include <linux/tick.h> | ||
| 20 | 21 | ||
| 21 | #include <asm/irq.h> | 22 | #include <asm/irq.h> |
| 22 | /* | 23 | /* |
| @@ -273,6 +274,18 @@ EXPORT_SYMBOL(do_softirq); | |||
| 273 | 274 | ||
| 274 | #endif | 275 | #endif |
| 275 | 276 | ||
| 277 | /* | ||
| 278 | * Enter an interrupt context. | ||
| 279 | */ | ||
| 280 | void irq_enter(void) | ||
| 281 | { | ||
| 282 | __irq_enter(); | ||
| 283 | #ifdef CONFIG_NO_HZ | ||
| 284 | if (idle_cpu(smp_processor_id())) | ||
| 285 | tick_nohz_update_jiffies(); | ||
| 286 | #endif | ||
| 287 | } | ||
| 288 | |||
| 276 | #ifdef __ARCH_IRQ_EXIT_IRQS_DISABLED | 289 | #ifdef __ARCH_IRQ_EXIT_IRQS_DISABLED |
| 277 | # define invoke_softirq() __do_softirq() | 290 | # define invoke_softirq() __do_softirq() |
| 278 | #else | 291 | #else |
| @@ -289,6 +302,12 @@ void irq_exit(void) | |||
| 289 | sub_preempt_count(IRQ_EXIT_OFFSET); | 302 | sub_preempt_count(IRQ_EXIT_OFFSET); |
| 290 | if (!in_interrupt() && local_softirq_pending()) | 303 | if (!in_interrupt() && local_softirq_pending()) |
| 291 | invoke_softirq(); | 304 | invoke_softirq(); |
| 305 | |||
| 306 | #ifdef CONFIG_NO_HZ | ||
| 307 | /* Make sure that timer wheel updates are propagated */ | ||
| 308 | if (!in_interrupt() && idle_cpu(smp_processor_id()) && !need_resched()) | ||
| 309 | tick_nohz_stop_sched_tick(); | ||
| 310 | #endif | ||
| 292 | preempt_enable_no_resched(); | 311 | preempt_enable_no_resched(); |
| 293 | } | 312 | } |
| 294 | 313 | ||
diff --git a/kernel/time.c b/kernel/time.c index 0e017bff4c19..c6c80ea5d0ea 100644 --- a/kernel/time.c +++ b/kernel/time.c | |||
| @@ -470,6 +470,260 @@ struct timeval ns_to_timeval(const s64 nsec) | |||
| 470 | return tv; | 470 | return tv; |
| 471 | } | 471 | } |
| 472 | 472 | ||
| 473 | /* | ||
| 474 | * Convert jiffies to milliseconds and back. | ||
| 475 | * | ||
| 476 | * Avoid unnecessary multiplications/divisions in the | ||
| 477 | * two most common HZ cases: | ||
| 478 | */ | ||
| 479 | unsigned int jiffies_to_msecs(const unsigned long j) | ||
| 480 | { | ||
| 481 | #if HZ <= MSEC_PER_SEC && !(MSEC_PER_SEC % HZ) | ||
| 482 | return (MSEC_PER_SEC / HZ) * j; | ||
| 483 | #elif HZ > MSEC_PER_SEC && !(HZ % MSEC_PER_SEC) | ||
| 484 | return (j + (HZ / MSEC_PER_SEC) - 1)/(HZ / MSEC_PER_SEC); | ||
| 485 | #else | ||
| 486 | return (j * MSEC_PER_SEC) / HZ; | ||
| 487 | #endif | ||
| 488 | } | ||
| 489 | EXPORT_SYMBOL(jiffies_to_msecs); | ||
| 490 | |||
| 491 | unsigned int jiffies_to_usecs(const unsigned long j) | ||
| 492 | { | ||
| 493 | #if HZ <= USEC_PER_SEC && !(USEC_PER_SEC % HZ) | ||
| 494 | return (USEC_PER_SEC / HZ) * j; | ||
| 495 | #elif HZ > USEC_PER_SEC && !(HZ % USEC_PER_SEC) | ||
| 496 | return (j + (HZ / USEC_PER_SEC) - 1)/(HZ / USEC_PER_SEC); | ||
| 497 | #else | ||
| 498 | return (j * USEC_PER_SEC) / HZ; | ||
| 499 | #endif | ||
| 500 | } | ||
| 501 | EXPORT_SYMBOL(jiffies_to_usecs); | ||
| 502 | |||
| 503 | /* | ||
| 504 | * When we convert to jiffies then we interpret incoming values | ||
| 505 | * the following way: | ||
| 506 | * | ||
| 507 | * - negative values mean 'infinite timeout' (MAX_JIFFY_OFFSET) | ||
| 508 | * | ||
| 509 | * - 'too large' values [that would result in larger than | ||
| 510 | * MAX_JIFFY_OFFSET values] mean 'infinite timeout' too. | ||
| 511 | * | ||
| 512 | * - all other values are converted to jiffies by either multiplying | ||
| 513 | * the input value by a factor or dividing it with a factor | ||
| 514 | * | ||
| 515 | * We must also be careful about 32-bit overflows. | ||
| 516 | */ | ||
| 517 | unsigned long msecs_to_jiffies(const unsigned int m) | ||
| 518 | { | ||
| 519 | /* | ||
| 520 | * Negative value, means infinite timeout: | ||
| 521 | */ | ||
| 522 | if ((int)m < 0) | ||
| 523 | return MAX_JIFFY_OFFSET; | ||
| 524 | |||
| 525 | #if HZ <= MSEC_PER_SEC && !(MSEC_PER_SEC % HZ) | ||
| 526 | /* | ||
| 527 | * HZ is equal to or smaller than 1000, and 1000 is a nice | ||
| 528 | * round multiple of HZ, divide with the factor between them, | ||
| 529 | * but round upwards: | ||
| 530 | */ | ||
| 531 | return (m + (MSEC_PER_SEC / HZ) - 1) / (MSEC_PER_SEC / HZ); | ||
| 532 | #elif HZ > MSEC_PER_SEC && !(HZ % MSEC_PER_SEC) | ||
| 533 | /* | ||
| 534 | * HZ is larger than 1000, and HZ is a nice round multiple of | ||
| 535 | * 1000 - simply multiply with the factor between them. | ||
| 536 | * | ||
| 537 | * But first make sure the multiplication result cannot | ||
| 538 | * overflow: | ||
| 539 | */ | ||
| 540 | if (m > jiffies_to_msecs(MAX_JIFFY_OFFSET)) | ||
| 541 | return MAX_JIFFY_OFFSET; | ||
| 542 | |||
| 543 | return m * (HZ / MSEC_PER_SEC); | ||
| 544 | #else | ||
| 545 | /* | ||
| 546 | * Generic case - multiply, round and divide. But first | ||
| 547 | * check that if we are doing a net multiplication, that | ||
| 548 | * we wouldnt overflow: | ||
| 549 | */ | ||
| 550 | if (HZ > MSEC_PER_SEC && m > jiffies_to_msecs(MAX_JIFFY_OFFSET)) | ||
| 551 | return MAX_JIFFY_OFFSET; | ||
| 552 | |||
| 553 | return (m * HZ + MSEC_PER_SEC - 1) / MSEC_PER_SEC; | ||
| 554 | #endif | ||
| 555 | } | ||
| 556 | EXPORT_SYMBOL(msecs_to_jiffies); | ||
| 557 | |||
| 558 | unsigned long usecs_to_jiffies(const unsigned int u) | ||
| 559 | { | ||
| 560 | if (u > jiffies_to_usecs(MAX_JIFFY_OFFSET)) | ||
| 561 | return MAX_JIFFY_OFFSET; | ||
| 562 | #if HZ <= USEC_PER_SEC && !(USEC_PER_SEC % HZ) | ||
| 563 | return (u + (USEC_PER_SEC / HZ) - 1) / (USEC_PER_SEC / HZ); | ||
| 564 | #elif HZ > USEC_PER_SEC && !(HZ % USEC_PER_SEC) | ||
| 565 | return u * (HZ / USEC_PER_SEC); | ||
| 566 | #else | ||
| 567 | return (u * HZ + USEC_PER_SEC - 1) / USEC_PER_SEC; | ||
| 568 | #endif | ||
| 569 | } | ||
| 570 | EXPORT_SYMBOL(usecs_to_jiffies); | ||
| 571 | |||
| 572 | /* | ||
| 573 | * The TICK_NSEC - 1 rounds up the value to the next resolution. Note | ||
| 574 | * that a remainder subtract here would not do the right thing as the | ||
| 575 | * resolution values don't fall on second boundries. I.e. the line: | ||
| 576 | * nsec -= nsec % TICK_NSEC; is NOT a correct resolution rounding. | ||
| 577 | * | ||
| 578 | * Rather, we just shift the bits off the right. | ||
| 579 | * | ||
| 580 | * The >> (NSEC_JIFFIE_SC - SEC_JIFFIE_SC) converts the scaled nsec | ||
| 581 | * value to a scaled second value. | ||
| 582 | */ | ||
| 583 | unsigned long | ||
| 584 | timespec_to_jiffies(const struct timespec *value) | ||
| 585 | { | ||
| 586 | unsigned long sec = value->tv_sec; | ||
| 587 | long nsec = value->tv_nsec + TICK_NSEC - 1; | ||
| 588 | |||
| 589 | if (sec >= MAX_SEC_IN_JIFFIES){ | ||
| 590 | sec = MAX_SEC_IN_JIFFIES; | ||
| 591 | nsec = 0; | ||
| 592 | } | ||
| 593 | return (((u64)sec * SEC_CONVERSION) + | ||
| 594 | (((u64)nsec * NSEC_CONVERSION) >> | ||
| 595 | (NSEC_JIFFIE_SC - SEC_JIFFIE_SC))) >> SEC_JIFFIE_SC; | ||
| 596 | |||
| 597 | } | ||
| 598 | EXPORT_SYMBOL(timespec_to_jiffies); | ||
| 599 | |||
| 600 | void | ||
| 601 | jiffies_to_timespec(const unsigned long jiffies, struct timespec *value) | ||
| 602 | { | ||
| 603 | /* | ||
| 604 | * Convert jiffies to nanoseconds and separate with | ||
| 605 | * one divide. | ||
| 606 | */ | ||
| 607 | u64 nsec = (u64)jiffies * TICK_NSEC; | ||
| 608 | value->tv_sec = div_long_long_rem(nsec, NSEC_PER_SEC, &value->tv_nsec); | ||
| 609 | } | ||
| 610 | EXPORT_SYMBOL(jiffies_to_timespec); | ||
| 611 | |||
| 612 | /* Same for "timeval" | ||
| 613 | * | ||
| 614 | * Well, almost. The problem here is that the real system resolution is | ||
| 615 | * in nanoseconds and the value being converted is in micro seconds. | ||
| 616 | * Also for some machines (those that use HZ = 1024, in-particular), | ||
| 617 | * there is a LARGE error in the tick size in microseconds. | ||
| 618 | |||
| 619 | * The solution we use is to do the rounding AFTER we convert the | ||
| 620 | * microsecond part. Thus the USEC_ROUND, the bits to be shifted off. | ||
| 621 | * Instruction wise, this should cost only an additional add with carry | ||
| 622 | * instruction above the way it was done above. | ||
| 623 | */ | ||
| 624 | unsigned long | ||
| 625 | timeval_to_jiffies(const struct timeval *value) | ||
| 626 | { | ||
| 627 | unsigned long sec = value->tv_sec; | ||
| 628 | long usec = value->tv_usec; | ||
| 629 | |||
| 630 | if (sec >= MAX_SEC_IN_JIFFIES){ | ||
| 631 | sec = MAX_SEC_IN_JIFFIES; | ||
| 632 | usec = 0; | ||
| 633 | } | ||
| 634 | return (((u64)sec * SEC_CONVERSION) + | ||
| 635 | (((u64)usec * USEC_CONVERSION + USEC_ROUND) >> | ||
| 636 | (USEC_JIFFIE_SC - SEC_JIFFIE_SC))) >> SEC_JIFFIE_SC; | ||
| 637 | } | ||
| 638 | |||
| 639 | void jiffies_to_timeval(const unsigned long jiffies, struct timeval *value) | ||
| 640 | { | ||
| 641 | /* | ||
| 642 | * Convert jiffies to nanoseconds and separate with | ||
| 643 | * one divide. | ||
| 644 | */ | ||
| 645 | u64 nsec = (u64)jiffies * TICK_NSEC; | ||
| 646 | long tv_usec; | ||
| 647 | |||
| 648 | value->tv_sec = div_long_long_rem(nsec, NSEC_PER_SEC, &tv_usec); | ||
| 649 | tv_usec /= NSEC_PER_USEC; | ||
| 650 | value->tv_usec = tv_usec; | ||
| 651 | } | ||
| 652 | |||
| 653 | /* | ||
| 654 | * Convert jiffies/jiffies_64 to clock_t and back. | ||
| 655 | */ | ||
| 656 | clock_t jiffies_to_clock_t(long x) | ||
| 657 | { | ||
| 658 | #if (TICK_NSEC % (NSEC_PER_SEC / USER_HZ)) == 0 | ||
| 659 | return x / (HZ / USER_HZ); | ||
| 660 | #else | ||
| 661 | u64 tmp = (u64)x * TICK_NSEC; | ||
| 662 | do_div(tmp, (NSEC_PER_SEC / USER_HZ)); | ||
| 663 | return (long)tmp; | ||
| 664 | #endif | ||
| 665 | } | ||
| 666 | EXPORT_SYMBOL(jiffies_to_clock_t); | ||
| 667 | |||
| 668 | unsigned long clock_t_to_jiffies(unsigned long x) | ||
| 669 | { | ||
| 670 | #if (HZ % USER_HZ)==0 | ||
| 671 | if (x >= ~0UL / (HZ / USER_HZ)) | ||
| 672 | return ~0UL; | ||
| 673 | return x * (HZ / USER_HZ); | ||
| 674 | #else | ||
| 675 | u64 jif; | ||
| 676 | |||
| 677 | /* Don't worry about loss of precision here .. */ | ||
| 678 | if (x >= ~0UL / HZ * USER_HZ) | ||
| 679 | return ~0UL; | ||
| 680 | |||
| 681 | /* .. but do try to contain it here */ | ||
| 682 | jif = x * (u64) HZ; | ||
| 683 | do_div(jif, USER_HZ); | ||
| 684 | return jif; | ||
| 685 | #endif | ||
| 686 | } | ||
| 687 | EXPORT_SYMBOL(clock_t_to_jiffies); | ||
| 688 | |||
| 689 | u64 jiffies_64_to_clock_t(u64 x) | ||
| 690 | { | ||
| 691 | #if (TICK_NSEC % (NSEC_PER_SEC / USER_HZ)) == 0 | ||
| 692 | do_div(x, HZ / USER_HZ); | ||
| 693 | #else | ||
| 694 | /* | ||
| 695 | * There are better ways that don't overflow early, | ||
| 696 | * but even this doesn't overflow in hundreds of years | ||
| 697 | * in 64 bits, so.. | ||
| 698 | */ | ||
| 699 | x *= TICK_NSEC; | ||
| 700 | do_div(x, (NSEC_PER_SEC / USER_HZ)); | ||
| 701 | #endif | ||
| 702 | return x; | ||
| 703 | } | ||
| 704 | |||
| 705 | EXPORT_SYMBOL(jiffies_64_to_clock_t); | ||
| 706 | |||
| 707 | u64 nsec_to_clock_t(u64 x) | ||
| 708 | { | ||
| 709 | #if (NSEC_PER_SEC % USER_HZ) == 0 | ||
| 710 | do_div(x, (NSEC_PER_SEC / USER_HZ)); | ||
| 711 | #elif (USER_HZ % 512) == 0 | ||
| 712 | x *= USER_HZ/512; | ||
| 713 | do_div(x, (NSEC_PER_SEC / 512)); | ||
| 714 | #else | ||
| 715 | /* | ||
| 716 | * max relative error 5.7e-8 (1.8s per year) for USER_HZ <= 1024, | ||
| 717 | * overflow after 64.99 years. | ||
| 718 | * exact for HZ=60, 72, 90, 120, 144, 180, 300, 600, 900, ... | ||
| 719 | */ | ||
| 720 | x *= 9; | ||
| 721 | do_div(x, (unsigned long)((9ull * NSEC_PER_SEC + (USER_HZ/2)) / | ||
| 722 | USER_HZ)); | ||
| 723 | #endif | ||
| 724 | return x; | ||
| 725 | } | ||
| 726 | |||
| 473 | #if (BITS_PER_LONG < 64) | 727 | #if (BITS_PER_LONG < 64) |
| 474 | u64 get_jiffies_64(void) | 728 | u64 get_jiffies_64(void) |
| 475 | { | 729 | { |
diff --git a/kernel/time/Kconfig b/kernel/time/Kconfig new file mode 100644 index 000000000000..f66351126544 --- /dev/null +++ b/kernel/time/Kconfig | |||
| @@ -0,0 +1,25 @@ | |||
| 1 | # | ||
| 2 | # Timer subsystem related configuration options | ||
| 3 | # | ||
| 4 | config TICK_ONESHOT | ||
| 5 | bool | ||
| 6 | default n | ||
| 7 | |||
| 8 | config NO_HZ | ||
| 9 | bool "Tickless System (Dynamic Ticks)" | ||
| 10 | depends on GENERIC_TIME && GENERIC_CLOCKEVENTS | ||
| 11 | select TICK_ONESHOT | ||
| 12 | help | ||
| 13 | This option enables a tickless system: timer interrupts will | ||
| 14 | only trigger on an as-needed basis both when the system is | ||
| 15 | busy and when the system is idle. | ||
| 16 | |||
| 17 | config HIGH_RES_TIMERS | ||
| 18 | bool "High Resolution Timer Support" | ||
| 19 | depends on GENERIC_TIME && GENERIC_CLOCKEVENTS | ||
| 20 | select TICK_ONESHOT | ||
| 21 | help | ||
| 22 | This option enables high resolution timer support. If your | ||
| 23 | hardware is not capable then this option only increases | ||
| 24 | the size of the kernel image. | ||
| 25 | |||
diff --git a/kernel/time/Makefile b/kernel/time/Makefile index 61a3907d16fb..93bccba1f265 100644 --- a/kernel/time/Makefile +++ b/kernel/time/Makefile | |||
| @@ -1 +1,8 @@ | |||
| 1 | obj-y += ntp.o clocksource.o jiffies.o | 1 | obj-y += ntp.o clocksource.o jiffies.o timer_list.o |
| 2 | |||
| 3 | obj-$(CONFIG_GENERIC_CLOCKEVENTS) += clockevents.o | ||
| 4 | obj-$(CONFIG_GENERIC_CLOCKEVENTS) += tick-common.o | ||
| 5 | obj-$(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST) += tick-broadcast.o | ||
| 6 | obj-$(CONFIG_TICK_ONESHOT) += tick-oneshot.o | ||
| 7 | obj-$(CONFIG_TICK_ONESHOT) += tick-sched.o | ||
| 8 | obj-$(CONFIG_TIMER_STATS) += timer_stats.o | ||
diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c new file mode 100644 index 000000000000..67932ea78c17 --- /dev/null +++ b/kernel/time/clockevents.c | |||
| @@ -0,0 +1,345 @@ | |||
| 1 | /* | ||
| 2 | * linux/kernel/time/clockevents.c | ||
| 3 | * | ||
| 4 | * This file contains functions which manage clock event devices. | ||
| 5 | * | ||
| 6 | * Copyright(C) 2005-2006, Thomas Gleixner <tglx@linutronix.de> | ||
| 7 | * Copyright(C) 2005-2007, Red Hat, Inc., Ingo Molnar | ||
| 8 | * Copyright(C) 2006-2007, Timesys Corp., Thomas Gleixner | ||
| 9 | * | ||
| 10 | * This code is licenced under the GPL version 2. For details see | ||
| 11 | * kernel-base/COPYING. | ||
| 12 | */ | ||
| 13 | |||
| 14 | #include <linux/clockchips.h> | ||
| 15 | #include <linux/hrtimer.h> | ||
| 16 | #include <linux/init.h> | ||
| 17 | #include <linux/module.h> | ||
| 18 | #include <linux/notifier.h> | ||
| 19 | #include <linux/smp.h> | ||
| 20 | #include <linux/sysdev.h> | ||
| 21 | |||
| 22 | /* The registered clock event devices */ | ||
| 23 | static LIST_HEAD(clockevent_devices); | ||
| 24 | static LIST_HEAD(clockevents_released); | ||
| 25 | |||
| 26 | /* Notification for clock events */ | ||
| 27 | static RAW_NOTIFIER_HEAD(clockevents_chain); | ||
| 28 | |||
| 29 | /* Protection for the above */ | ||
| 30 | static DEFINE_SPINLOCK(clockevents_lock); | ||
| 31 | |||
| 32 | /** | ||
| 33 | * clockevents_delta2ns - Convert a latch value (device ticks) to nanoseconds | ||
| 34 | * @latch: value to convert | ||
| 35 | * @evt: pointer to clock event device descriptor | ||
| 36 | * | ||
| 37 | * Math helper, returns latch value converted to nanoseconds (bound checked) | ||
| 38 | */ | ||
| 39 | unsigned long clockevent_delta2ns(unsigned long latch, | ||
| 40 | struct clock_event_device *evt) | ||
| 41 | { | ||
| 42 | u64 clc = ((u64) latch << evt->shift); | ||
| 43 | |||
| 44 | do_div(clc, evt->mult); | ||
| 45 | if (clc < 1000) | ||
| 46 | clc = 1000; | ||
| 47 | if (clc > LONG_MAX) | ||
| 48 | clc = LONG_MAX; | ||
| 49 | |||
| 50 | return (unsigned long) clc; | ||
| 51 | } | ||
| 52 | |||
| 53 | /** | ||
| 54 | * clockevents_set_mode - set the operating mode of a clock event device | ||
| 55 | * @dev: device to modify | ||
| 56 | * @mode: new mode | ||
| 57 | * | ||
| 58 | * Must be called with interrupts disabled ! | ||
| 59 | */ | ||
| 60 | void clockevents_set_mode(struct clock_event_device *dev, | ||
| 61 | enum clock_event_mode mode) | ||
| 62 | { | ||
| 63 | if (dev->mode != mode) { | ||
| 64 | dev->set_mode(mode, dev); | ||
| 65 | dev->mode = mode; | ||
| 66 | } | ||
| 67 | } | ||
| 68 | |||
| 69 | /** | ||
| 70 | * clockevents_program_event - Reprogram the clock event device. | ||
| 71 | * @expires: absolute expiry time (monotonic clock) | ||
| 72 | * | ||
| 73 | * Returns 0 on success, -ETIME when the event is in the past. | ||
| 74 | */ | ||
| 75 | int clockevents_program_event(struct clock_event_device *dev, ktime_t expires, | ||
| 76 | ktime_t now) | ||
| 77 | { | ||
| 78 | unsigned long long clc; | ||
| 79 | int64_t delta; | ||
| 80 | |||
| 81 | delta = ktime_to_ns(ktime_sub(expires, now)); | ||
| 82 | |||
| 83 | if (delta <= 0) | ||
| 84 | return -ETIME; | ||
| 85 | |||
| 86 | dev->next_event = expires; | ||
| 87 | |||
| 88 | if (dev->mode == CLOCK_EVT_MODE_SHUTDOWN) | ||
| 89 | return 0; | ||
| 90 | |||
| 91 | if (delta > dev->max_delta_ns) | ||
| 92 | delta = dev->max_delta_ns; | ||
| 93 | if (delta < dev->min_delta_ns) | ||
| 94 | delta = dev->min_delta_ns; | ||
| 95 | |||
| 96 | clc = delta * dev->mult; | ||
| 97 | clc >>= dev->shift; | ||
| 98 | |||
| 99 | return dev->set_next_event((unsigned long) clc, dev); | ||
| 100 | } | ||
| 101 | |||
| 102 | /** | ||
| 103 | * clockevents_register_notifier - register a clock events change listener | ||
| 104 | */ | ||
| 105 | int clockevents_register_notifier(struct notifier_block *nb) | ||
| 106 | { | ||
| 107 | int ret; | ||
| 108 | |||
| 109 | spin_lock(&clockevents_lock); | ||
| 110 | ret = raw_notifier_chain_register(&clockevents_chain, nb); | ||
| 111 | spin_unlock(&clockevents_lock); | ||
| 112 | |||
| 113 | return ret; | ||
| 114 | } | ||
| 115 | |||
| 116 | /** | ||
| 117 | * clockevents_unregister_notifier - unregister a clock events change listener | ||
| 118 | */ | ||
| 119 | void clockevents_unregister_notifier(struct notifier_block *nb) | ||
| 120 | { | ||
| 121 | spin_lock(&clockevents_lock); | ||
| 122 | raw_notifier_chain_unregister(&clockevents_chain, nb); | ||
| 123 | spin_unlock(&clockevents_lock); | ||
| 124 | } | ||
| 125 | |||
| 126 | /* | ||
| 127 | * Notify about a clock event change. Called with clockevents_lock | ||
| 128 | * held. | ||
| 129 | */ | ||
| 130 | static void clockevents_do_notify(unsigned long reason, void *dev) | ||
| 131 | { | ||
| 132 | raw_notifier_call_chain(&clockevents_chain, reason, dev); | ||
| 133 | } | ||
| 134 | |||
| 135 | /* | ||
| 136 | * Called after a notify add to make devices availble which were | ||
| 137 | * released from the notifier call. | ||
| 138 | */ | ||
| 139 | static void clockevents_notify_released(void) | ||
| 140 | { | ||
| 141 | struct clock_event_device *dev; | ||
| 142 | |||
| 143 | while (!list_empty(&clockevents_released)) { | ||
| 144 | dev = list_entry(clockevents_released.next, | ||
| 145 | struct clock_event_device, list); | ||
| 146 | list_del(&dev->list); | ||
| 147 | list_add(&dev->list, &clockevent_devices); | ||
| 148 | clockevents_do_notify(CLOCK_EVT_NOTIFY_ADD, dev); | ||
| 149 | } | ||
| 150 | } | ||
| 151 | |||
| 152 | /** | ||
| 153 | * clockevents_register_device - register a clock event device | ||
| 154 | * @dev: device to register | ||
| 155 | */ | ||
| 156 | void clockevents_register_device(struct clock_event_device *dev) | ||
| 157 | { | ||
| 158 | BUG_ON(dev->mode != CLOCK_EVT_MODE_UNUSED); | ||
| 159 | |||
| 160 | spin_lock(&clockevents_lock); | ||
| 161 | |||
| 162 | list_add(&dev->list, &clockevent_devices); | ||
| 163 | clockevents_do_notify(CLOCK_EVT_NOTIFY_ADD, dev); | ||
| 164 | clockevents_notify_released(); | ||
| 165 | |||
| 166 | spin_unlock(&clockevents_lock); | ||
| 167 | } | ||
| 168 | |||
| 169 | /* | ||
| 170 | * Noop handler when we shut down an event device | ||
| 171 | */ | ||
| 172 | static void clockevents_handle_noop(struct clock_event_device *dev) | ||
| 173 | { | ||
| 174 | } | ||
| 175 | |||
| 176 | /** | ||
| 177 | * clockevents_exchange_device - release and request clock devices | ||
| 178 | * @old: device to release (can be NULL) | ||
| 179 | * @new: device to request (can be NULL) | ||
| 180 | * | ||
| 181 | * Called from the notifier chain. clockevents_lock is held already | ||
| 182 | */ | ||
| 183 | void clockevents_exchange_device(struct clock_event_device *old, | ||
| 184 | struct clock_event_device *new) | ||
| 185 | { | ||
| 186 | unsigned long flags; | ||
| 187 | |||
| 188 | local_irq_save(flags); | ||
| 189 | /* | ||
| 190 | * Caller releases a clock event device. We queue it into the | ||
| 191 | * released list and do a notify add later. | ||
| 192 | */ | ||
| 193 | if (old) { | ||
| 194 | old->event_handler = clockevents_handle_noop; | ||
| 195 | clockevents_set_mode(old, CLOCK_EVT_MODE_UNUSED); | ||
| 196 | list_del(&old->list); | ||
| 197 | list_add(&old->list, &clockevents_released); | ||
| 198 | } | ||
| 199 | |||
| 200 | if (new) { | ||
| 201 | BUG_ON(new->mode != CLOCK_EVT_MODE_UNUSED); | ||
| 202 | clockevents_set_mode(new, CLOCK_EVT_MODE_SHUTDOWN); | ||
| 203 | } | ||
| 204 | local_irq_restore(flags); | ||
| 205 | } | ||
| 206 | |||
| 207 | /** | ||
| 208 | * clockevents_request_device | ||
| 209 | */ | ||
| 210 | struct clock_event_device *clockevents_request_device(unsigned int features, | ||
| 211 | cpumask_t cpumask) | ||
| 212 | { | ||
| 213 | struct clock_event_device *cur, *dev = NULL; | ||
| 214 | struct list_head *tmp; | ||
| 215 | |||
| 216 | spin_lock(&clockevents_lock); | ||
| 217 | |||
| 218 | list_for_each(tmp, &clockevent_devices) { | ||
| 219 | cur = list_entry(tmp, struct clock_event_device, list); | ||
| 220 | |||
| 221 | if ((cur->features & features) == features && | ||
| 222 | cpus_equal(cpumask, cur->cpumask)) { | ||
| 223 | if (!dev || dev->rating < cur->rating) | ||
| 224 | dev = cur; | ||
| 225 | } | ||
| 226 | } | ||
| 227 | |||
| 228 | clockevents_exchange_device(NULL, dev); | ||
| 229 | |||
| 230 | spin_unlock(&clockevents_lock); | ||
| 231 | |||
| 232 | return dev; | ||
| 233 | } | ||
| 234 | |||
| 235 | /** | ||
| 236 | * clockevents_release_device | ||
| 237 | */ | ||
| 238 | void clockevents_release_device(struct clock_event_device *dev) | ||
| 239 | { | ||
| 240 | spin_lock(&clockevents_lock); | ||
| 241 | |||
| 242 | clockevents_exchange_device(dev, NULL); | ||
| 243 | clockevents_notify_released(); | ||
| 244 | |||
| 245 | spin_unlock(&clockevents_lock); | ||
| 246 | } | ||
| 247 | |||
| 248 | /** | ||
| 249 | * clockevents_notify - notification about relevant events | ||
| 250 | */ | ||
| 251 | void clockevents_notify(unsigned long reason, void *arg) | ||
| 252 | { | ||
| 253 | spin_lock(&clockevents_lock); | ||
| 254 | clockevents_do_notify(reason, arg); | ||
| 255 | |||
| 256 | switch (reason) { | ||
| 257 | case CLOCK_EVT_NOTIFY_CPU_DEAD: | ||
| 258 | /* | ||
| 259 | * Unregister the clock event devices which were | ||
| 260 | * released from the users in the notify chain. | ||
| 261 | */ | ||
| 262 | while (!list_empty(&clockevents_released)) { | ||
| 263 | struct clock_event_device *dev; | ||
| 264 | |||
| 265 | dev = list_entry(clockevents_released.next, | ||
| 266 | struct clock_event_device, list); | ||
| 267 | list_del(&dev->list); | ||
| 268 | } | ||
| 269 | break; | ||
| 270 | default: | ||
| 271 | break; | ||
| 272 | } | ||
| 273 | spin_unlock(&clockevents_lock); | ||
| 274 | } | ||
| 275 | EXPORT_SYMBOL_GPL(clockevents_notify); | ||
| 276 | |||
| 277 | #ifdef CONFIG_SYSFS | ||
| 278 | |||
| 279 | /** | ||
| 280 | * clockevents_show_registered - sysfs interface for listing clockevents | ||
| 281 | * @dev: unused | ||
| 282 | * @buf: char buffer to be filled with clock events list | ||
| 283 | * | ||
| 284 | * Provides sysfs interface for listing registered clock event devices | ||
| 285 | */ | ||
| 286 | static ssize_t clockevents_show_registered(struct sys_device *dev, char *buf) | ||
| 287 | { | ||
| 288 | struct list_head *tmp; | ||
| 289 | char *p = buf; | ||
| 290 | int cpu; | ||
| 291 | |||
| 292 | spin_lock(&clockevents_lock); | ||
| 293 | |||
| 294 | list_for_each(tmp, &clockevent_devices) { | ||
| 295 | struct clock_event_device *ce; | ||
| 296 | |||
| 297 | ce = list_entry(tmp, struct clock_event_device, list); | ||
| 298 | p += sprintf(p, "%-20s F:%04x M:%d", ce->name, | ||
| 299 | ce->features, ce->mode); | ||
| 300 | p += sprintf(p, " C:"); | ||
| 301 | if (!cpus_equal(ce->cpumask, cpu_possible_map)) { | ||
| 302 | for_each_cpu_mask(cpu, ce->cpumask) | ||
| 303 | p += sprintf(p, " %d", cpu); | ||
| 304 | } else { | ||
| 305 | /* | ||
| 306 | * FIXME: Add the cpu which is handling this sucker | ||
| 307 | */ | ||
| 308 | } | ||
| 309 | p += sprintf(p, "\n"); | ||
| 310 | } | ||
| 311 | |||
| 312 | spin_unlock(&clockevents_lock); | ||
| 313 | |||
| 314 | return p - buf; | ||
| 315 | } | ||
| 316 | |||
| 317 | /* | ||
| 318 | * Sysfs setup bits: | ||
| 319 | */ | ||
| 320 | static SYSDEV_ATTR(registered, 0600, | ||
| 321 | clockevents_show_registered, NULL); | ||
| 322 | |||
| 323 | static struct sysdev_class clockevents_sysclass = { | ||
| 324 | set_kset_name("clockevents"), | ||
| 325 | }; | ||
| 326 | |||
| 327 | static struct sys_device clockevents_sys_device = { | ||
| 328 | .id = 0, | ||
| 329 | .cls = &clockevents_sysclass, | ||
| 330 | }; | ||
| 331 | |||
| 332 | static int __init clockevents_sysfs_init(void) | ||
| 333 | { | ||
| 334 | int error = sysdev_class_register(&clockevents_sysclass); | ||
| 335 | |||
| 336 | if (!error) | ||
| 337 | error = sysdev_register(&clockevents_sys_device); | ||
| 338 | if (!error) | ||
| 339 | error = sysdev_create_file( | ||
| 340 | &clockevents_sys_device, | ||
| 341 | &attr_registered); | ||
| 342 | return error; | ||
| 343 | } | ||
| 344 | device_initcall(clockevents_sysfs_init); | ||
| 345 | #endif | ||
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index d9ef176c4e09..193a0793af95 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c | |||
| @@ -29,6 +29,7 @@ | |||
| 29 | #include <linux/init.h> | 29 | #include <linux/init.h> |
| 30 | #include <linux/module.h> | 30 | #include <linux/module.h> |
| 31 | #include <linux/sched.h> /* for spin_unlock_irq() using preempt_count() m68k */ | 31 | #include <linux/sched.h> /* for spin_unlock_irq() using preempt_count() m68k */ |
| 32 | #include <linux/tick.h> | ||
| 32 | 33 | ||
| 33 | /* XXX - Would like a better way for initializing curr_clocksource */ | 34 | /* XXX - Would like a better way for initializing curr_clocksource */ |
| 34 | extern struct clocksource clocksource_jiffies; | 35 | extern struct clocksource clocksource_jiffies; |
| @@ -48,6 +49,7 @@ extern struct clocksource clocksource_jiffies; | |||
| 48 | */ | 49 | */ |
| 49 | static struct clocksource *curr_clocksource = &clocksource_jiffies; | 50 | static struct clocksource *curr_clocksource = &clocksource_jiffies; |
| 50 | static struct clocksource *next_clocksource; | 51 | static struct clocksource *next_clocksource; |
| 52 | static struct clocksource *clocksource_override; | ||
| 51 | static LIST_HEAD(clocksource_list); | 53 | static LIST_HEAD(clocksource_list); |
| 52 | static DEFINE_SPINLOCK(clocksource_lock); | 54 | static DEFINE_SPINLOCK(clocksource_lock); |
| 53 | static char override_name[32]; | 55 | static char override_name[32]; |
| @@ -62,9 +64,123 @@ static int __init clocksource_done_booting(void) | |||
| 62 | finished_booting = 1; | 64 | finished_booting = 1; |
| 63 | return 0; | 65 | return 0; |
| 64 | } | 66 | } |
| 65 | |||
| 66 | late_initcall(clocksource_done_booting); | 67 | late_initcall(clocksource_done_booting); |
| 67 | 68 | ||
| 69 | #ifdef CONFIG_CLOCKSOURCE_WATCHDOG | ||
| 70 | static LIST_HEAD(watchdog_list); | ||
| 71 | static struct clocksource *watchdog; | ||
| 72 | static struct timer_list watchdog_timer; | ||
| 73 | static DEFINE_SPINLOCK(watchdog_lock); | ||
| 74 | static cycle_t watchdog_last; | ||
| 75 | /* | ||
| 76 | * Interval: 0.5sec Treshold: 0.0625s | ||
| 77 | */ | ||
| 78 | #define WATCHDOG_INTERVAL (HZ >> 1) | ||
| 79 | #define WATCHDOG_TRESHOLD (NSEC_PER_SEC >> 4) | ||
| 80 | |||
| 81 | static void clocksource_ratewd(struct clocksource *cs, int64_t delta) | ||
| 82 | { | ||
| 83 | if (delta > -WATCHDOG_TRESHOLD && delta < WATCHDOG_TRESHOLD) | ||
| 84 | return; | ||
| 85 | |||
| 86 | printk(KERN_WARNING "Clocksource %s unstable (delta = %Ld ns)\n", | ||
| 87 | cs->name, delta); | ||
| 88 | cs->flags &= ~(CLOCK_SOURCE_VALID_FOR_HRES | CLOCK_SOURCE_WATCHDOG); | ||
| 89 | clocksource_change_rating(cs, 0); | ||
| 90 | cs->flags &= ~CLOCK_SOURCE_WATCHDOG; | ||
| 91 | list_del(&cs->wd_list); | ||
| 92 | } | ||
| 93 | |||
| 94 | static void clocksource_watchdog(unsigned long data) | ||
| 95 | { | ||
| 96 | struct clocksource *cs, *tmp; | ||
| 97 | cycle_t csnow, wdnow; | ||
| 98 | int64_t wd_nsec, cs_nsec; | ||
| 99 | |||
| 100 | spin_lock(&watchdog_lock); | ||
| 101 | |||
| 102 | wdnow = watchdog->read(); | ||
| 103 | wd_nsec = cyc2ns(watchdog, (wdnow - watchdog_last) & watchdog->mask); | ||
| 104 | watchdog_last = wdnow; | ||
| 105 | |||
| 106 | list_for_each_entry_safe(cs, tmp, &watchdog_list, wd_list) { | ||
| 107 | csnow = cs->read(); | ||
| 108 | /* Initialized ? */ | ||
| 109 | if (!(cs->flags & CLOCK_SOURCE_WATCHDOG)) { | ||
| 110 | if ((cs->flags & CLOCK_SOURCE_IS_CONTINUOUS) && | ||
| 111 | (watchdog->flags & CLOCK_SOURCE_IS_CONTINUOUS)) { | ||
| 112 | cs->flags |= CLOCK_SOURCE_VALID_FOR_HRES; | ||
| 113 | /* | ||
| 114 | * We just marked the clocksource as | ||
| 115 | * highres-capable, notify the rest of the | ||
| 116 | * system as well so that we transition | ||
| 117 | * into high-res mode: | ||
| 118 | */ | ||
| 119 | tick_clock_notify(); | ||
| 120 | } | ||
| 121 | cs->flags |= CLOCK_SOURCE_WATCHDOG; | ||
| 122 | cs->wd_last = csnow; | ||
| 123 | } else { | ||
| 124 | cs_nsec = cyc2ns(cs, (csnow - cs->wd_last) & cs->mask); | ||
| 125 | cs->wd_last = csnow; | ||
| 126 | /* Check the delta. Might remove from the list ! */ | ||
| 127 | clocksource_ratewd(cs, cs_nsec - wd_nsec); | ||
| 128 | } | ||
| 129 | } | ||
| 130 | |||
| 131 | if (!list_empty(&watchdog_list)) { | ||
| 132 | __mod_timer(&watchdog_timer, | ||
| 133 | watchdog_timer.expires + WATCHDOG_INTERVAL); | ||
| 134 | } | ||
| 135 | spin_unlock(&watchdog_lock); | ||
| 136 | } | ||
| 137 | static void clocksource_check_watchdog(struct clocksource *cs) | ||
| 138 | { | ||
| 139 | struct clocksource *cse; | ||
| 140 | unsigned long flags; | ||
| 141 | |||
| 142 | spin_lock_irqsave(&watchdog_lock, flags); | ||
| 143 | if (cs->flags & CLOCK_SOURCE_MUST_VERIFY) { | ||
| 144 | int started = !list_empty(&watchdog_list); | ||
| 145 | |||
| 146 | list_add(&cs->wd_list, &watchdog_list); | ||
| 147 | if (!started && watchdog) { | ||
| 148 | watchdog_last = watchdog->read(); | ||
| 149 | watchdog_timer.expires = jiffies + WATCHDOG_INTERVAL; | ||
| 150 | add_timer(&watchdog_timer); | ||
| 151 | } | ||
| 152 | } else if (cs->flags & CLOCK_SOURCE_IS_CONTINUOUS) { | ||
| 153 | cs->flags |= CLOCK_SOURCE_VALID_FOR_HRES; | ||
| 154 | |||
| 155 | if (!watchdog || cs->rating > watchdog->rating) { | ||
| 156 | if (watchdog) | ||
| 157 | del_timer(&watchdog_timer); | ||
| 158 | watchdog = cs; | ||
| 159 | init_timer(&watchdog_timer); | ||
| 160 | watchdog_timer.function = clocksource_watchdog; | ||
| 161 | |||
| 162 | /* Reset watchdog cycles */ | ||
| 163 | list_for_each_entry(cse, &watchdog_list, wd_list) | ||
| 164 | cse->flags &= ~CLOCK_SOURCE_WATCHDOG; | ||
| 165 | /* Start if list is not empty */ | ||
| 166 | if (!list_empty(&watchdog_list)) { | ||
| 167 | watchdog_last = watchdog->read(); | ||
| 168 | watchdog_timer.expires = | ||
| 169 | jiffies + WATCHDOG_INTERVAL; | ||
| 170 | add_timer(&watchdog_timer); | ||
| 171 | } | ||
| 172 | } | ||
| 173 | } | ||
| 174 | spin_unlock_irqrestore(&watchdog_lock, flags); | ||
| 175 | } | ||
| 176 | #else | ||
| 177 | static void clocksource_check_watchdog(struct clocksource *cs) | ||
| 178 | { | ||
| 179 | if (cs->flags & CLOCK_SOURCE_IS_CONTINUOUS) | ||
| 180 | cs->flags |= CLOCK_SOURCE_VALID_FOR_HRES; | ||
| 181 | } | ||
| 182 | #endif | ||
| 183 | |||
| 68 | /** | 184 | /** |
| 69 | * clocksource_get_next - Returns the selected clocksource | 185 | * clocksource_get_next - Returns the selected clocksource |
| 70 | * | 186 | * |
| @@ -84,60 +200,54 @@ struct clocksource *clocksource_get_next(void) | |||
| 84 | } | 200 | } |
| 85 | 201 | ||
| 86 | /** | 202 | /** |
| 87 | * select_clocksource - Finds the best registered clocksource. | 203 | * select_clocksource - Selects the best registered clocksource. |
| 88 | * | 204 | * |
| 89 | * Private function. Must hold clocksource_lock when called. | 205 | * Private function. Must hold clocksource_lock when called. |
| 90 | * | 206 | * |
| 91 | * Looks through the list of registered clocksources, returning | 207 | * Select the clocksource with the best rating, or the clocksource, |
| 92 | * the one with the highest rating value. If there is a clocksource | 208 | * which is selected by userspace override. |
| 93 | * name that matches the override string, it returns that clocksource. | ||
| 94 | */ | 209 | */ |
| 95 | static struct clocksource *select_clocksource(void) | 210 | static struct clocksource *select_clocksource(void) |
| 96 | { | 211 | { |
| 97 | struct clocksource *best = NULL; | 212 | struct clocksource *next; |
| 98 | struct list_head *tmp; | ||
| 99 | 213 | ||
| 100 | list_for_each(tmp, &clocksource_list) { | 214 | if (list_empty(&clocksource_list)) |
| 101 | struct clocksource *src; | 215 | return NULL; |
| 102 | 216 | ||
| 103 | src = list_entry(tmp, struct clocksource, list); | 217 | if (clocksource_override) |
| 104 | if (!best) | 218 | next = clocksource_override; |
| 105 | best = src; | 219 | else |
| 106 | 220 | next = list_entry(clocksource_list.next, struct clocksource, | |
| 107 | /* check for override: */ | 221 | list); |
| 108 | if (strlen(src->name) == strlen(override_name) && | 222 | |
| 109 | !strcmp(src->name, override_name)) { | 223 | if (next == curr_clocksource) |
| 110 | best = src; | 224 | return NULL; |
| 111 | break; | ||
| 112 | } | ||
| 113 | /* pick the highest rating: */ | ||
| 114 | if (src->rating > best->rating) | ||
| 115 | best = src; | ||
| 116 | } | ||
| 117 | 225 | ||
| 118 | return best; | 226 | return next; |
| 119 | } | 227 | } |
| 120 | 228 | ||
| 121 | /** | 229 | /* |
| 122 | * is_registered_source - Checks if clocksource is registered | 230 | * Enqueue the clocksource sorted by rating |
| 123 | * @c: pointer to a clocksource | ||
| 124 | * | ||
| 125 | * Private helper function. Must hold clocksource_lock when called. | ||
| 126 | * | ||
| 127 | * Returns one if the clocksource is already registered, zero otherwise. | ||
| 128 | */ | 231 | */ |
| 129 | static int is_registered_source(struct clocksource *c) | 232 | static int clocksource_enqueue(struct clocksource *c) |
| 130 | { | 233 | { |
| 131 | int len = strlen(c->name); | 234 | struct list_head *tmp, *entry = &clocksource_list; |
| 132 | struct list_head *tmp; | ||
| 133 | 235 | ||
| 134 | list_for_each(tmp, &clocksource_list) { | 236 | list_for_each(tmp, &clocksource_list) { |
| 135 | struct clocksource *src; | 237 | struct clocksource *cs; |
| 136 | 238 | ||
| 137 | src = list_entry(tmp, struct clocksource, list); | 239 | cs = list_entry(tmp, struct clocksource, list); |
| 138 | if (strlen(src->name) == len && !strcmp(src->name, c->name)) | 240 | if (cs == c) |
| 139 | return 1; | 241 | return -EBUSY; |
| 242 | /* Keep track of the place, where to insert */ | ||
| 243 | if (cs->rating >= c->rating) | ||
| 244 | entry = tmp; | ||
| 140 | } | 245 | } |
| 246 | list_add(&c->list, entry); | ||
| 247 | |||
| 248 | if (strlen(c->name) == strlen(override_name) && | ||
| 249 | !strcmp(c->name, override_name)) | ||
| 250 | clocksource_override = c; | ||
| 141 | 251 | ||
| 142 | return 0; | 252 | return 0; |
| 143 | } | 253 | } |
| @@ -150,42 +260,35 @@ static int is_registered_source(struct clocksource *c) | |||
| 150 | */ | 260 | */ |
| 151 | int clocksource_register(struct clocksource *c) | 261 | int clocksource_register(struct clocksource *c) |
| 152 | { | 262 | { |
| 153 | int ret = 0; | ||
| 154 | unsigned long flags; | 263 | unsigned long flags; |
| 264 | int ret; | ||
| 155 | 265 | ||
| 156 | spin_lock_irqsave(&clocksource_lock, flags); | 266 | spin_lock_irqsave(&clocksource_lock, flags); |
| 157 | /* check if clocksource is already registered */ | 267 | ret = clocksource_enqueue(c); |
| 158 | if (is_registered_source(c)) { | 268 | if (!ret) |
| 159 | printk("register_clocksource: Cannot register %s. " | ||
| 160 | "Already registered!", c->name); | ||
| 161 | ret = -EBUSY; | ||
| 162 | } else { | ||
| 163 | /* register it */ | ||
| 164 | list_add(&c->list, &clocksource_list); | ||
| 165 | /* scan the registered clocksources, and pick the best one */ | ||
| 166 | next_clocksource = select_clocksource(); | 269 | next_clocksource = select_clocksource(); |
| 167 | } | ||
| 168 | spin_unlock_irqrestore(&clocksource_lock, flags); | 270 | spin_unlock_irqrestore(&clocksource_lock, flags); |
| 271 | if (!ret) | ||
| 272 | clocksource_check_watchdog(c); | ||
| 169 | return ret; | 273 | return ret; |
| 170 | } | 274 | } |
| 171 | EXPORT_SYMBOL(clocksource_register); | 275 | EXPORT_SYMBOL(clocksource_register); |
| 172 | 276 | ||
| 173 | /** | 277 | /** |
| 174 | * clocksource_reselect - Rescan list for next clocksource | 278 | * clocksource_change_rating - Change the rating of a registered clocksource |
| 175 | * | 279 | * |
| 176 | * A quick helper function to be used if a clocksource changes its | ||
| 177 | * rating. Forces the clocksource list to be re-scanned for the best | ||
| 178 | * clocksource. | ||
| 179 | */ | 280 | */ |
| 180 | void clocksource_reselect(void) | 281 | void clocksource_change_rating(struct clocksource *cs, int rating) |
| 181 | { | 282 | { |
| 182 | unsigned long flags; | 283 | unsigned long flags; |
| 183 | 284 | ||
| 184 | spin_lock_irqsave(&clocksource_lock, flags); | 285 | spin_lock_irqsave(&clocksource_lock, flags); |
| 286 | list_del(&cs->list); | ||
| 287 | cs->rating = rating; | ||
| 288 | clocksource_enqueue(cs); | ||
| 185 | next_clocksource = select_clocksource(); | 289 | next_clocksource = select_clocksource(); |
| 186 | spin_unlock_irqrestore(&clocksource_lock, flags); | 290 | spin_unlock_irqrestore(&clocksource_lock, flags); |
| 187 | } | 291 | } |
| 188 | EXPORT_SYMBOL(clocksource_reselect); | ||
| 189 | 292 | ||
| 190 | #ifdef CONFIG_SYSFS | 293 | #ifdef CONFIG_SYSFS |
| 191 | /** | 294 | /** |
| @@ -221,7 +324,11 @@ sysfs_show_current_clocksources(struct sys_device *dev, char *buf) | |||
| 221 | static ssize_t sysfs_override_clocksource(struct sys_device *dev, | 324 | static ssize_t sysfs_override_clocksource(struct sys_device *dev, |
| 222 | const char *buf, size_t count) | 325 | const char *buf, size_t count) |
| 223 | { | 326 | { |
| 327 | struct clocksource *ovr = NULL; | ||
| 328 | struct list_head *tmp; | ||
| 224 | size_t ret = count; | 329 | size_t ret = count; |
| 330 | int len; | ||
| 331 | |||
| 225 | /* strings from sysfs write are not 0 terminated! */ | 332 | /* strings from sysfs write are not 0 terminated! */ |
| 226 | if (count >= sizeof(override_name)) | 333 | if (count >= sizeof(override_name)) |
| 227 | return -EINVAL; | 334 | return -EINVAL; |
| @@ -229,17 +336,32 @@ static ssize_t sysfs_override_clocksource(struct sys_device *dev, | |||
| 229 | /* strip of \n: */ | 336 | /* strip of \n: */ |
| 230 | if (buf[count-1] == '\n') | 337 | if (buf[count-1] == '\n') |
| 231 | count--; | 338 | count--; |
| 232 | if (count < 1) | ||
| 233 | return -EINVAL; | ||
| 234 | 339 | ||
| 235 | spin_lock_irq(&clocksource_lock); | 340 | spin_lock_irq(&clocksource_lock); |
| 236 | 341 | ||
| 237 | /* copy the name given: */ | 342 | if (count > 0) |
| 238 | memcpy(override_name, buf, count); | 343 | memcpy(override_name, buf, count); |
| 239 | override_name[count] = 0; | 344 | override_name[count] = 0; |
| 240 | 345 | ||
| 241 | /* try to select it: */ | 346 | len = strlen(override_name); |
| 242 | next_clocksource = select_clocksource(); | 347 | if (len) { |
| 348 | ovr = clocksource_override; | ||
| 349 | /* try to select it: */ | ||
| 350 | list_for_each(tmp, &clocksource_list) { | ||
| 351 | struct clocksource *cs; | ||
| 352 | |||
| 353 | cs = list_entry(tmp, struct clocksource, list); | ||
| 354 | if (strlen(cs->name) == len && | ||
| 355 | !strcmp(cs->name, override_name)) | ||
| 356 | ovr = cs; | ||
| 357 | } | ||
| 358 | } | ||
| 359 | |||
| 360 | /* Reselect, when the override name has changed */ | ||
| 361 | if (ovr != clocksource_override) { | ||
| 362 | clocksource_override = ovr; | ||
| 363 | next_clocksource = select_clocksource(); | ||
| 364 | } | ||
| 243 | 365 | ||
| 244 | spin_unlock_irq(&clocksource_lock); | 366 | spin_unlock_irq(&clocksource_lock); |
| 245 | 367 | ||
diff --git a/kernel/time/jiffies.c b/kernel/time/jiffies.c index a99b2a6e6a07..3be8da8fed7e 100644 --- a/kernel/time/jiffies.c +++ b/kernel/time/jiffies.c | |||
| @@ -62,7 +62,6 @@ struct clocksource clocksource_jiffies = { | |||
| 62 | .mask = 0xffffffff, /*32bits*/ | 62 | .mask = 0xffffffff, /*32bits*/ |
| 63 | .mult = NSEC_PER_JIFFY << JIFFIES_SHIFT, /* details above */ | 63 | .mult = NSEC_PER_JIFFY << JIFFIES_SHIFT, /* details above */ |
| 64 | .shift = JIFFIES_SHIFT, | 64 | .shift = JIFFIES_SHIFT, |
| 65 | .is_continuous = 0, /* tick based, not free running */ | ||
| 66 | }; | 65 | }; |
| 67 | 66 | ||
| 68 | static int __init init_jiffies_clocksource(void) | 67 | static int __init init_jiffies_clocksource(void) |
diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c index 3afeaa3a73f9..eb12509e00bd 100644 --- a/kernel/time/ntp.c +++ b/kernel/time/ntp.c | |||
| @@ -24,7 +24,7 @@ static u64 tick_length, tick_length_base; | |||
| 24 | 24 | ||
| 25 | #define MAX_TICKADJ 500 /* microsecs */ | 25 | #define MAX_TICKADJ 500 /* microsecs */ |
| 26 | #define MAX_TICKADJ_SCALED (((u64)(MAX_TICKADJ * NSEC_PER_USEC) << \ | 26 | #define MAX_TICKADJ_SCALED (((u64)(MAX_TICKADJ * NSEC_PER_USEC) << \ |
| 27 | TICK_LENGTH_SHIFT) / HZ) | 27 | TICK_LENGTH_SHIFT) / NTP_INTERVAL_FREQ) |
| 28 | 28 | ||
| 29 | /* | 29 | /* |
| 30 | * phase-lock loop variables | 30 | * phase-lock loop variables |
| @@ -46,13 +46,17 @@ long time_adjust; | |||
| 46 | 46 | ||
| 47 | static void ntp_update_frequency(void) | 47 | static void ntp_update_frequency(void) |
| 48 | { | 48 | { |
| 49 | tick_length_base = (u64)(tick_usec * NSEC_PER_USEC * USER_HZ) << TICK_LENGTH_SHIFT; | 49 | u64 second_length = (u64)(tick_usec * NSEC_PER_USEC * USER_HZ) |
| 50 | tick_length_base += (s64)CLOCK_TICK_ADJUST << TICK_LENGTH_SHIFT; | 50 | << TICK_LENGTH_SHIFT; |
| 51 | tick_length_base += (s64)time_freq << (TICK_LENGTH_SHIFT - SHIFT_NSEC); | 51 | second_length += (s64)CLOCK_TICK_ADJUST << TICK_LENGTH_SHIFT; |
| 52 | second_length += (s64)time_freq << (TICK_LENGTH_SHIFT - SHIFT_NSEC); | ||
| 52 | 53 | ||
| 53 | do_div(tick_length_base, HZ); | 54 | tick_length_base = second_length; |
| 54 | 55 | ||
| 55 | tick_nsec = tick_length_base >> TICK_LENGTH_SHIFT; | 56 | do_div(second_length, HZ); |
| 57 | tick_nsec = second_length >> TICK_LENGTH_SHIFT; | ||
| 58 | |||
| 59 | do_div(tick_length_base, NTP_INTERVAL_FREQ); | ||
| 56 | } | 60 | } |
| 57 | 61 | ||
| 58 | /** | 62 | /** |
| @@ -162,7 +166,7 @@ void second_overflow(void) | |||
| 162 | tick_length -= MAX_TICKADJ_SCALED; | 166 | tick_length -= MAX_TICKADJ_SCALED; |
| 163 | } else { | 167 | } else { |
| 164 | tick_length += (s64)(time_adjust * NSEC_PER_USEC / | 168 | tick_length += (s64)(time_adjust * NSEC_PER_USEC / |
| 165 | HZ) << TICK_LENGTH_SHIFT; | 169 | NTP_INTERVAL_FREQ) << TICK_LENGTH_SHIFT; |
| 166 | time_adjust = 0; | 170 | time_adjust = 0; |
| 167 | } | 171 | } |
| 168 | } | 172 | } |
| @@ -239,7 +243,8 @@ int do_adjtimex(struct timex *txc) | |||
| 239 | result = -EINVAL; | 243 | result = -EINVAL; |
| 240 | goto leave; | 244 | goto leave; |
| 241 | } | 245 | } |
| 242 | time_freq = ((s64)txc->freq * NSEC_PER_USEC) >> (SHIFT_USEC - SHIFT_NSEC); | 246 | time_freq = ((s64)txc->freq * NSEC_PER_USEC) |
| 247 | >> (SHIFT_USEC - SHIFT_NSEC); | ||
| 243 | } | 248 | } |
| 244 | 249 | ||
| 245 | if (txc->modes & ADJ_MAXERROR) { | 250 | if (txc->modes & ADJ_MAXERROR) { |
| @@ -309,7 +314,8 @@ int do_adjtimex(struct timex *txc) | |||
| 309 | freq_adj += time_freq; | 314 | freq_adj += time_freq; |
| 310 | freq_adj = min(freq_adj, (s64)MAXFREQ_NSEC); | 315 | freq_adj = min(freq_adj, (s64)MAXFREQ_NSEC); |
| 311 | time_freq = max(freq_adj, (s64)-MAXFREQ_NSEC); | 316 | time_freq = max(freq_adj, (s64)-MAXFREQ_NSEC); |
| 312 | time_offset = (time_offset / HZ) << SHIFT_UPDATE; | 317 | time_offset = (time_offset / NTP_INTERVAL_FREQ) |
| 318 | << SHIFT_UPDATE; | ||
| 313 | } /* STA_PLL */ | 319 | } /* STA_PLL */ |
| 314 | } /* txc->modes & ADJ_OFFSET */ | 320 | } /* txc->modes & ADJ_OFFSET */ |
| 315 | if (txc->modes & ADJ_TICK) | 321 | if (txc->modes & ADJ_TICK) |
| @@ -324,8 +330,10 @@ leave: if ((time_status & (STA_UNSYNC|STA_CLOCKERR)) != 0) | |||
| 324 | if ((txc->modes & ADJ_OFFSET_SINGLESHOT) == ADJ_OFFSET_SINGLESHOT) | 330 | if ((txc->modes & ADJ_OFFSET_SINGLESHOT) == ADJ_OFFSET_SINGLESHOT) |
| 325 | txc->offset = save_adjust; | 331 | txc->offset = save_adjust; |
| 326 | else | 332 | else |
| 327 | txc->offset = shift_right(time_offset, SHIFT_UPDATE) * HZ / 1000; | 333 | txc->offset = shift_right(time_offset, SHIFT_UPDATE) |
| 328 | txc->freq = (time_freq / NSEC_PER_USEC) << (SHIFT_USEC - SHIFT_NSEC); | 334 | * NTP_INTERVAL_FREQ / 1000; |
| 335 | txc->freq = (time_freq / NSEC_PER_USEC) | ||
| 336 | << (SHIFT_USEC - SHIFT_NSEC); | ||
| 329 | txc->maxerror = time_maxerror; | 337 | txc->maxerror = time_maxerror; |
| 330 | txc->esterror = time_esterror; | 338 | txc->esterror = time_esterror; |
| 331 | txc->status = time_status; | 339 | txc->status = time_status; |
diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c new file mode 100644 index 000000000000..12b3efeb9f6f --- /dev/null +++ b/kernel/time/tick-broadcast.c | |||
| @@ -0,0 +1,480 @@ | |||
| 1 | /* | ||
| 2 | * linux/kernel/time/tick-broadcast.c | ||
| 3 | * | ||
| 4 | * This file contains functions which emulate a local clock-event | ||
| 5 | * device via a broadcast event source. | ||
| 6 | * | ||
| 7 | * Copyright(C) 2005-2006, Thomas Gleixner <tglx@linutronix.de> | ||
| 8 | * Copyright(C) 2005-2007, Red Hat, Inc., Ingo Molnar | ||
| 9 | * Copyright(C) 2006-2007, Timesys Corp., Thomas Gleixner | ||
| 10 | * | ||
| 11 | * This code is licenced under the GPL version 2. For details see | ||
| 12 | * kernel-base/COPYING. | ||
| 13 | */ | ||
| 14 | #include <linux/cpu.h> | ||
| 15 | #include <linux/err.h> | ||
| 16 | #include <linux/hrtimer.h> | ||
| 17 | #include <linux/irq.h> | ||
| 18 | #include <linux/percpu.h> | ||
| 19 | #include <linux/profile.h> | ||
| 20 | #include <linux/sched.h> | ||
| 21 | #include <linux/tick.h> | ||
| 22 | |||
| 23 | #include "tick-internal.h" | ||
| 24 | |||
| 25 | /* | ||
| 26 | * Broadcast support for broken x86 hardware, where the local apic | ||
| 27 | * timer stops in C3 state. | ||
| 28 | */ | ||
| 29 | |||
| 30 | struct tick_device tick_broadcast_device; | ||
| 31 | static cpumask_t tick_broadcast_mask; | ||
| 32 | static DEFINE_SPINLOCK(tick_broadcast_lock); | ||
| 33 | |||
| 34 | /* | ||
| 35 | * Debugging: see timer_list.c | ||
| 36 | */ | ||
| 37 | struct tick_device *tick_get_broadcast_device(void) | ||
| 38 | { | ||
| 39 | return &tick_broadcast_device; | ||
| 40 | } | ||
| 41 | |||
| 42 | cpumask_t *tick_get_broadcast_mask(void) | ||
| 43 | { | ||
| 44 | return &tick_broadcast_mask; | ||
| 45 | } | ||
| 46 | |||
| 47 | /* | ||
| 48 | * Start the device in periodic mode | ||
| 49 | */ | ||
| 50 | static void tick_broadcast_start_periodic(struct clock_event_device *bc) | ||
| 51 | { | ||
| 52 | if (bc && bc->mode == CLOCK_EVT_MODE_SHUTDOWN) | ||
| 53 | tick_setup_periodic(bc, 1); | ||
| 54 | } | ||
| 55 | |||
| 56 | /* | ||
| 57 | * Check, if the device can be utilized as broadcast device: | ||
| 58 | */ | ||
| 59 | int tick_check_broadcast_device(struct clock_event_device *dev) | ||
| 60 | { | ||
| 61 | if (tick_broadcast_device.evtdev || | ||
| 62 | (dev->features & CLOCK_EVT_FEAT_C3STOP)) | ||
| 63 | return 0; | ||
| 64 | |||
| 65 | clockevents_exchange_device(NULL, dev); | ||
| 66 | tick_broadcast_device.evtdev = dev; | ||
| 67 | if (!cpus_empty(tick_broadcast_mask)) | ||
| 68 | tick_broadcast_start_periodic(dev); | ||
| 69 | return 1; | ||
| 70 | } | ||
| 71 | |||
| 72 | /* | ||
| 73 | * Check, if the device is the broadcast device | ||
| 74 | */ | ||
| 75 | int tick_is_broadcast_device(struct clock_event_device *dev) | ||
| 76 | { | ||
| 77 | return (dev && tick_broadcast_device.evtdev == dev); | ||
| 78 | } | ||
| 79 | |||
| 80 | /* | ||
| 81 | * Check, if the device is disfunctional and a place holder, which | ||
| 82 | * needs to be handled by the broadcast device. | ||
| 83 | */ | ||
| 84 | int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu) | ||
| 85 | { | ||
| 86 | unsigned long flags; | ||
| 87 | int ret = 0; | ||
| 88 | |||
| 89 | spin_lock_irqsave(&tick_broadcast_lock, flags); | ||
| 90 | |||
| 91 | /* | ||
| 92 | * Devices might be registered with both periodic and oneshot | ||
| 93 | * mode disabled. This signals, that the device needs to be | ||
| 94 | * operated from the broadcast device and is a placeholder for | ||
| 95 | * the cpu local device. | ||
| 96 | */ | ||
| 97 | if (!tick_device_is_functional(dev)) { | ||
| 98 | dev->event_handler = tick_handle_periodic; | ||
| 99 | cpu_set(cpu, tick_broadcast_mask); | ||
| 100 | tick_broadcast_start_periodic(tick_broadcast_device.evtdev); | ||
| 101 | ret = 1; | ||
| 102 | } | ||
| 103 | |||
| 104 | spin_unlock_irqrestore(&tick_broadcast_lock, flags); | ||
| 105 | return ret; | ||
| 106 | } | ||
| 107 | |||
| 108 | /* | ||
| 109 | * Broadcast the event to the cpus, which are set in the mask | ||
| 110 | */ | ||
| 111 | int tick_do_broadcast(cpumask_t mask) | ||
| 112 | { | ||
| 113 | int ret = 0, cpu = smp_processor_id(); | ||
| 114 | struct tick_device *td; | ||
| 115 | |||
| 116 | /* | ||
| 117 | * Check, if the current cpu is in the mask | ||
| 118 | */ | ||
| 119 | if (cpu_isset(cpu, mask)) { | ||
| 120 | cpu_clear(cpu, mask); | ||
| 121 | td = &per_cpu(tick_cpu_device, cpu); | ||
| 122 | td->evtdev->event_handler(td->evtdev); | ||
| 123 | ret = 1; | ||
| 124 | } | ||
| 125 | |||
| 126 | if (!cpus_empty(mask)) { | ||
| 127 | /* | ||
| 128 | * It might be necessary to actually check whether the devices | ||
| 129 | * have different broadcast functions. For now, just use the | ||
| 130 | * one of the first device. This works as long as we have this | ||
| 131 | * misfeature only on x86 (lapic) | ||
| 132 | */ | ||
| 133 | cpu = first_cpu(mask); | ||
| 134 | td = &per_cpu(tick_cpu_device, cpu); | ||
| 135 | td->evtdev->broadcast(mask); | ||
| 136 | ret = 1; | ||
| 137 | } | ||
| 138 | return ret; | ||
| 139 | } | ||
| 140 | |||
| 141 | /* | ||
| 142 | * Periodic broadcast: | ||
| 143 | * - invoke the broadcast handlers | ||
| 144 | */ | ||
| 145 | static void tick_do_periodic_broadcast(void) | ||
| 146 | { | ||
| 147 | cpumask_t mask; | ||
| 148 | |||
| 149 | spin_lock(&tick_broadcast_lock); | ||
| 150 | |||
| 151 | cpus_and(mask, cpu_online_map, tick_broadcast_mask); | ||
| 152 | tick_do_broadcast(mask); | ||
| 153 | |||
| 154 | spin_unlock(&tick_broadcast_lock); | ||
| 155 | } | ||
| 156 | |||
| 157 | /* | ||
| 158 | * Event handler for periodic broadcast ticks | ||
| 159 | */ | ||
| 160 | static void tick_handle_periodic_broadcast(struct clock_event_device *dev) | ||
| 161 | { | ||
| 162 | dev->next_event.tv64 = KTIME_MAX; | ||
| 163 | |||
| 164 | tick_do_periodic_broadcast(); | ||
| 165 | |||
| 166 | /* | ||
| 167 | * The device is in periodic mode. No reprogramming necessary: | ||
| 168 | */ | ||
| 169 | if (dev->mode == CLOCK_EVT_MODE_PERIODIC) | ||
| 170 | return; | ||
| 171 | |||
| 172 | /* | ||
| 173 | * Setup the next period for devices, which do not have | ||
| 174 | * periodic mode: | ||
| 175 | */ | ||
| 176 | for (;;) { | ||
| 177 | ktime_t next = ktime_add(dev->next_event, tick_period); | ||
| 178 | |||
| 179 | if (!clockevents_program_event(dev, next, ktime_get())) | ||
| 180 | return; | ||
| 181 | tick_do_periodic_broadcast(); | ||
| 182 | } | ||
| 183 | } | ||
| 184 | |||
| 185 | /* | ||
| 186 | * Powerstate information: The system enters/leaves a state, where | ||
| 187 | * affected devices might stop | ||
| 188 | */ | ||
| 189 | static void tick_do_broadcast_on_off(void *why) | ||
| 190 | { | ||
| 191 | struct clock_event_device *bc, *dev; | ||
| 192 | struct tick_device *td; | ||
| 193 | unsigned long flags, *reason = why; | ||
| 194 | int cpu; | ||
| 195 | |||
| 196 | spin_lock_irqsave(&tick_broadcast_lock, flags); | ||
| 197 | |||
| 198 | cpu = smp_processor_id(); | ||
| 199 | td = &per_cpu(tick_cpu_device, cpu); | ||
| 200 | dev = td->evtdev; | ||
| 201 | bc = tick_broadcast_device.evtdev; | ||
| 202 | |||
| 203 | /* | ||
| 204 | * Is the device in broadcast mode forever or is it not | ||
| 205 | * affected by the powerstate ? | ||
| 206 | */ | ||
| 207 | if (!dev || !tick_device_is_functional(dev) || | ||
| 208 | !(dev->features & CLOCK_EVT_FEAT_C3STOP)) | ||
| 209 | goto out; | ||
| 210 | |||
| 211 | if (*reason == CLOCK_EVT_NOTIFY_BROADCAST_ON) { | ||
| 212 | if (!cpu_isset(cpu, tick_broadcast_mask)) { | ||
| 213 | cpu_set(cpu, tick_broadcast_mask); | ||
| 214 | if (td->mode == TICKDEV_MODE_PERIODIC) | ||
| 215 | clockevents_set_mode(dev, | ||
| 216 | CLOCK_EVT_MODE_SHUTDOWN); | ||
| 217 | } | ||
| 218 | } else { | ||
| 219 | if (cpu_isset(cpu, tick_broadcast_mask)) { | ||
| 220 | cpu_clear(cpu, tick_broadcast_mask); | ||
| 221 | if (td->mode == TICKDEV_MODE_PERIODIC) | ||
| 222 | tick_setup_periodic(dev, 0); | ||
| 223 | } | ||
| 224 | } | ||
| 225 | |||
| 226 | if (cpus_empty(tick_broadcast_mask)) | ||
| 227 | clockevents_set_mode(bc, CLOCK_EVT_MODE_SHUTDOWN); | ||
| 228 | else { | ||
| 229 | if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) | ||
| 230 | tick_broadcast_start_periodic(bc); | ||
| 231 | else | ||
| 232 | tick_broadcast_setup_oneshot(bc); | ||
| 233 | } | ||
| 234 | out: | ||
| 235 | spin_unlock_irqrestore(&tick_broadcast_lock, flags); | ||
| 236 | } | ||
| 237 | |||
| 238 | /* | ||
| 239 | * Powerstate information: The system enters/leaves a state, where | ||
| 240 | * affected devices might stop. | ||
| 241 | */ | ||
| 242 | void tick_broadcast_on_off(unsigned long reason, int *oncpu) | ||
| 243 | { | ||
| 244 | int cpu = get_cpu(); | ||
| 245 | |||
| 246 | if (cpu == *oncpu) | ||
| 247 | tick_do_broadcast_on_off(&reason); | ||
| 248 | else | ||
| 249 | smp_call_function_single(*oncpu, tick_do_broadcast_on_off, | ||
| 250 | &reason, 1, 1); | ||
| 251 | put_cpu(); | ||
| 252 | } | ||
| 253 | |||
| 254 | /* | ||
| 255 | * Set the periodic handler depending on broadcast on/off | ||
| 256 | */ | ||
| 257 | void tick_set_periodic_handler(struct clock_event_device *dev, int broadcast) | ||
| 258 | { | ||
| 259 | if (!broadcast) | ||
| 260 | dev->event_handler = tick_handle_periodic; | ||
| 261 | else | ||
| 262 | dev->event_handler = tick_handle_periodic_broadcast; | ||
| 263 | } | ||
| 264 | |||
| 265 | /* | ||
| 266 | * Remove a CPU from broadcasting | ||
| 267 | */ | ||
| 268 | void tick_shutdown_broadcast(unsigned int *cpup) | ||
| 269 | { | ||
| 270 | struct clock_event_device *bc; | ||
| 271 | unsigned long flags; | ||
| 272 | unsigned int cpu = *cpup; | ||
| 273 | |||
| 274 | spin_lock_irqsave(&tick_broadcast_lock, flags); | ||
| 275 | |||
| 276 | bc = tick_broadcast_device.evtdev; | ||
| 277 | cpu_clear(cpu, tick_broadcast_mask); | ||
| 278 | |||
| 279 | if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) { | ||
| 280 | if (bc && cpus_empty(tick_broadcast_mask)) | ||
| 281 | clockevents_set_mode(bc, CLOCK_EVT_MODE_SHUTDOWN); | ||
| 282 | } | ||
| 283 | |||
| 284 | spin_unlock_irqrestore(&tick_broadcast_lock, flags); | ||
| 285 | } | ||
| 286 | |||
| 287 | #ifdef CONFIG_TICK_ONESHOT | ||
| 288 | |||
| 289 | static cpumask_t tick_broadcast_oneshot_mask; | ||
| 290 | |||
| 291 | /* | ||
| 292 | * Debugging: see timer_list.c | ||
| 293 | */ | ||
| 294 | cpumask_t *tick_get_broadcast_oneshot_mask(void) | ||
| 295 | { | ||
| 296 | return &tick_broadcast_oneshot_mask; | ||
| 297 | } | ||
| 298 | |||
| 299 | static int tick_broadcast_set_event(ktime_t expires, int force) | ||
| 300 | { | ||
| 301 | struct clock_event_device *bc = tick_broadcast_device.evtdev; | ||
| 302 | ktime_t now = ktime_get(); | ||
| 303 | int res; | ||
| 304 | |||
| 305 | for(;;) { | ||
| 306 | res = clockevents_program_event(bc, expires, now); | ||
| 307 | if (!res || !force) | ||
| 308 | return res; | ||
| 309 | now = ktime_get(); | ||
| 310 | expires = ktime_add(now, ktime_set(0, bc->min_delta_ns)); | ||
| 311 | } | ||
| 312 | } | ||
| 313 | |||
| 314 | /* | ||
| 315 | * Reprogram the broadcast device: | ||
| 316 | * | ||
| 317 | * Called with tick_broadcast_lock held and interrupts disabled. | ||
| 318 | */ | ||
| 319 | static int tick_broadcast_reprogram(void) | ||
| 320 | { | ||
| 321 | ktime_t expires = { .tv64 = KTIME_MAX }; | ||
| 322 | struct tick_device *td; | ||
| 323 | int cpu; | ||
| 324 | |||
| 325 | /* | ||
| 326 | * Find the event which expires next: | ||
| 327 | */ | ||
| 328 | for (cpu = first_cpu(tick_broadcast_oneshot_mask); cpu != NR_CPUS; | ||
| 329 | cpu = next_cpu(cpu, tick_broadcast_oneshot_mask)) { | ||
| 330 | td = &per_cpu(tick_cpu_device, cpu); | ||
| 331 | if (td->evtdev->next_event.tv64 < expires.tv64) | ||
| 332 | expires = td->evtdev->next_event; | ||
| 333 | } | ||
| 334 | |||
| 335 | if (expires.tv64 == KTIME_MAX) | ||
| 336 | return 0; | ||
| 337 | |||
| 338 | return tick_broadcast_set_event(expires, 0); | ||
| 339 | } | ||
| 340 | |||
| 341 | /* | ||
| 342 | * Handle oneshot mode broadcasting | ||
| 343 | */ | ||
| 344 | static void tick_handle_oneshot_broadcast(struct clock_event_device *dev) | ||
| 345 | { | ||
| 346 | struct tick_device *td; | ||
| 347 | cpumask_t mask; | ||
| 348 | ktime_t now; | ||
| 349 | int cpu; | ||
| 350 | |||
| 351 | spin_lock(&tick_broadcast_lock); | ||
| 352 | again: | ||
| 353 | dev->next_event.tv64 = KTIME_MAX; | ||
| 354 | mask = CPU_MASK_NONE; | ||
| 355 | now = ktime_get(); | ||
| 356 | /* Find all expired events */ | ||
| 357 | for (cpu = first_cpu(tick_broadcast_oneshot_mask); cpu != NR_CPUS; | ||
| 358 | cpu = next_cpu(cpu, tick_broadcast_oneshot_mask)) { | ||
| 359 | td = &per_cpu(tick_cpu_device, cpu); | ||
| 360 | if (td->evtdev->next_event.tv64 <= now.tv64) | ||
| 361 | cpu_set(cpu, mask); | ||
| 362 | } | ||
| 363 | |||
| 364 | /* | ||
| 365 | * Wakeup the cpus which have an expired event. The broadcast | ||
| 366 | * device is reprogrammed in the return from idle code. | ||
| 367 | */ | ||
| 368 | if (!tick_do_broadcast(mask)) { | ||
| 369 | /* | ||
| 370 | * The global event did not expire any CPU local | ||
| 371 | * events. This happens in dyntick mode, as the | ||
| 372 | * maximum PIT delta is quite small. | ||
| 373 | */ | ||
| 374 | if (tick_broadcast_reprogram()) | ||
| 375 | goto again; | ||
| 376 | } | ||
| 377 | spin_unlock(&tick_broadcast_lock); | ||
| 378 | } | ||
| 379 | |||
| 380 | /* | ||
| 381 | * Powerstate information: The system enters/leaves a state, where | ||
| 382 | * affected devices might stop | ||
| 383 | */ | ||
| 384 | void tick_broadcast_oneshot_control(unsigned long reason) | ||
| 385 | { | ||
| 386 | struct clock_event_device *bc, *dev; | ||
| 387 | struct tick_device *td; | ||
| 388 | unsigned long flags; | ||
| 389 | int cpu; | ||
| 390 | |||
| 391 | spin_lock_irqsave(&tick_broadcast_lock, flags); | ||
| 392 | |||
| 393 | /* | ||
| 394 | * Periodic mode does not care about the enter/exit of power | ||
| 395 | * states | ||
| 396 | */ | ||
| 397 | if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) | ||
| 398 | goto out; | ||
| 399 | |||
| 400 | bc = tick_broadcast_device.evtdev; | ||
| 401 | cpu = smp_processor_id(); | ||
| 402 | td = &per_cpu(tick_cpu_device, cpu); | ||
| 403 | dev = td->evtdev; | ||
| 404 | |||
| 405 | if (!(dev->features & CLOCK_EVT_FEAT_C3STOP)) | ||
| 406 | goto out; | ||
| 407 | |||
| 408 | if (reason == CLOCK_EVT_NOTIFY_BROADCAST_ENTER) { | ||
| 409 | if (!cpu_isset(cpu, tick_broadcast_oneshot_mask)) { | ||
| 410 | cpu_set(cpu, tick_broadcast_oneshot_mask); | ||
| 411 | clockevents_set_mode(dev, CLOCK_EVT_MODE_SHUTDOWN); | ||
| 412 | if (dev->next_event.tv64 < bc->next_event.tv64) | ||
| 413 | tick_broadcast_set_event(dev->next_event, 1); | ||
| 414 | } | ||
| 415 | } else { | ||
| 416 | if (cpu_isset(cpu, tick_broadcast_oneshot_mask)) { | ||
| 417 | cpu_clear(cpu, tick_broadcast_oneshot_mask); | ||
| 418 | clockevents_set_mode(dev, CLOCK_EVT_MODE_ONESHOT); | ||
| 419 | if (dev->next_event.tv64 != KTIME_MAX) | ||
| 420 | tick_program_event(dev->next_event, 1); | ||
| 421 | } | ||
| 422 | } | ||
| 423 | |||
| 424 | out: | ||
| 425 | spin_unlock_irqrestore(&tick_broadcast_lock, flags); | ||
| 426 | } | ||
| 427 | |||
| 428 | /** | ||
| 429 | * tick_broadcast_setup_highres - setup the broadcast device for highres | ||
| 430 | */ | ||
| 431 | void tick_broadcast_setup_oneshot(struct clock_event_device *bc) | ||
| 432 | { | ||
| 433 | if (bc->mode != CLOCK_EVT_MODE_ONESHOT) { | ||
| 434 | bc->event_handler = tick_handle_oneshot_broadcast; | ||
| 435 | clockevents_set_mode(bc, CLOCK_EVT_MODE_ONESHOT); | ||
| 436 | bc->next_event.tv64 = KTIME_MAX; | ||
| 437 | } | ||
| 438 | } | ||
| 439 | |||
| 440 | /* | ||
| 441 | * Select oneshot operating mode for the broadcast device | ||
| 442 | */ | ||
| 443 | void tick_broadcast_switch_to_oneshot(void) | ||
| 444 | { | ||
| 445 | struct clock_event_device *bc; | ||
| 446 | unsigned long flags; | ||
| 447 | |||
| 448 | spin_lock_irqsave(&tick_broadcast_lock, flags); | ||
| 449 | |||
| 450 | tick_broadcast_device.mode = TICKDEV_MODE_ONESHOT; | ||
| 451 | bc = tick_broadcast_device.evtdev; | ||
| 452 | if (bc) | ||
| 453 | tick_broadcast_setup_oneshot(bc); | ||
| 454 | spin_unlock_irqrestore(&tick_broadcast_lock, flags); | ||
| 455 | } | ||
| 456 | |||
| 457 | |||
| 458 | /* | ||
| 459 | * Remove a dead CPU from broadcasting | ||
| 460 | */ | ||
| 461 | void tick_shutdown_broadcast_oneshot(unsigned int *cpup) | ||
| 462 | { | ||
| 463 | struct clock_event_device *bc; | ||
| 464 | unsigned long flags; | ||
| 465 | unsigned int cpu = *cpup; | ||
| 466 | |||
| 467 | spin_lock_irqsave(&tick_broadcast_lock, flags); | ||
| 468 | |||
| 469 | bc = tick_broadcast_device.evtdev; | ||
| 470 | cpu_clear(cpu, tick_broadcast_oneshot_mask); | ||
| 471 | |||
| 472 | if (tick_broadcast_device.mode == TICKDEV_MODE_ONESHOT) { | ||
| 473 | if (bc && cpus_empty(tick_broadcast_oneshot_mask)) | ||
| 474 | clockevents_set_mode(bc, CLOCK_EVT_MODE_SHUTDOWN); | ||
| 475 | } | ||
| 476 | |||
| 477 | spin_unlock_irqrestore(&tick_broadcast_lock, flags); | ||
| 478 | } | ||
| 479 | |||
| 480 | #endif | ||
diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c new file mode 100644 index 000000000000..4500e347f1bb --- /dev/null +++ b/kernel/time/tick-common.c | |||
| @@ -0,0 +1,346 @@ | |||
| 1 | /* | ||
| 2 | * linux/kernel/time/tick-common.c | ||
| 3 | * | ||
| 4 | * This file contains the base functions to manage periodic tick | ||
| 5 | * related events. | ||
| 6 | * | ||
| 7 | * Copyright(C) 2005-2006, Thomas Gleixner <tglx@linutronix.de> | ||
| 8 | * Copyright(C) 2005-2007, Red Hat, Inc., Ingo Molnar | ||
| 9 | * Copyright(C) 2006-2007, Timesys Corp., Thomas Gleixner | ||
| 10 | * | ||
| 11 | * This code is licenced under the GPL version 2. For details see | ||
| 12 | * kernel-base/COPYING. | ||
| 13 | */ | ||
| 14 | #include <linux/cpu.h> | ||
| 15 | #include <linux/err.h> | ||
| 16 | #include <linux/hrtimer.h> | ||
| 17 | #include <linux/irq.h> | ||
| 18 | #include <linux/percpu.h> | ||
| 19 | #include <linux/profile.h> | ||
| 20 | #include <linux/sched.h> | ||
| 21 | #include <linux/tick.h> | ||
| 22 | |||
| 23 | #include "tick-internal.h" | ||
| 24 | |||
| 25 | /* | ||
| 26 | * Tick devices | ||
| 27 | */ | ||
| 28 | DEFINE_PER_CPU(struct tick_device, tick_cpu_device); | ||
| 29 | /* | ||
| 30 | * Tick next event: keeps track of the tick time | ||
| 31 | */ | ||
| 32 | ktime_t tick_next_period; | ||
| 33 | ktime_t tick_period; | ||
| 34 | static int tick_do_timer_cpu = -1; | ||
| 35 | DEFINE_SPINLOCK(tick_device_lock); | ||
| 36 | |||
| 37 | /* | ||
| 38 | * Debugging: see timer_list.c | ||
| 39 | */ | ||
| 40 | struct tick_device *tick_get_device(int cpu) | ||
| 41 | { | ||
| 42 | return &per_cpu(tick_cpu_device, cpu); | ||
| 43 | } | ||
| 44 | |||
| 45 | /** | ||
| 46 | * tick_is_oneshot_available - check for a oneshot capable event device | ||
| 47 | */ | ||
| 48 | int tick_is_oneshot_available(void) | ||
| 49 | { | ||
| 50 | struct clock_event_device *dev = __get_cpu_var(tick_cpu_device).evtdev; | ||
| 51 | |||
| 52 | return dev && (dev->features & CLOCK_EVT_FEAT_ONESHOT); | ||
| 53 | } | ||
| 54 | |||
| 55 | /* | ||
| 56 | * Periodic tick | ||
| 57 | */ | ||
| 58 | static void tick_periodic(int cpu) | ||
| 59 | { | ||
| 60 | if (tick_do_timer_cpu == cpu) { | ||
| 61 | write_seqlock(&xtime_lock); | ||
| 62 | |||
| 63 | /* Keep track of the next tick event */ | ||
| 64 | tick_next_period = ktime_add(tick_next_period, tick_period); | ||
| 65 | |||
| 66 | do_timer(1); | ||
| 67 | write_sequnlock(&xtime_lock); | ||
| 68 | } | ||
| 69 | |||
| 70 | update_process_times(user_mode(get_irq_regs())); | ||
| 71 | profile_tick(CPU_PROFILING); | ||
| 72 | } | ||
| 73 | |||
| 74 | /* | ||
| 75 | * Event handler for periodic ticks | ||
| 76 | */ | ||
| 77 | void tick_handle_periodic(struct clock_event_device *dev) | ||
| 78 | { | ||
| 79 | int cpu = smp_processor_id(); | ||
| 80 | |||
| 81 | tick_periodic(cpu); | ||
| 82 | |||
| 83 | if (dev->mode != CLOCK_EVT_MODE_ONESHOT) | ||
| 84 | return; | ||
| 85 | /* | ||
| 86 | * Setup the next period for devices, which do not have | ||
| 87 | * periodic mode: | ||
| 88 | */ | ||
| 89 | for (;;) { | ||
| 90 | ktime_t next = ktime_add(dev->next_event, tick_period); | ||
| 91 | |||
| 92 | if (!clockevents_program_event(dev, next, ktime_get())) | ||
| 93 | return; | ||
| 94 | tick_periodic(cpu); | ||
| 95 | } | ||
| 96 | } | ||
| 97 | |||
| 98 | /* | ||
| 99 | * Setup the device for a periodic tick | ||
| 100 | */ | ||
| 101 | void tick_setup_periodic(struct clock_event_device *dev, int broadcast) | ||
| 102 | { | ||
| 103 | tick_set_periodic_handler(dev, broadcast); | ||
| 104 | |||
| 105 | /* Broadcast setup ? */ | ||
| 106 | if (!tick_device_is_functional(dev)) | ||
| 107 | return; | ||
| 108 | |||
| 109 | if (dev->features & CLOCK_EVT_FEAT_PERIODIC) { | ||
| 110 | clockevents_set_mode(dev, CLOCK_EVT_MODE_PERIODIC); | ||
| 111 | } else { | ||
| 112 | unsigned long seq; | ||
| 113 | ktime_t next; | ||
| 114 | |||
| 115 | do { | ||
| 116 | seq = read_seqbegin(&xtime_lock); | ||
| 117 | next = tick_next_period; | ||
| 118 | } while (read_seqretry(&xtime_lock, seq)); | ||
| 119 | |||
| 120 | clockevents_set_mode(dev, CLOCK_EVT_MODE_ONESHOT); | ||
| 121 | |||
| 122 | for (;;) { | ||
| 123 | if (!clockevents_program_event(dev, next, ktime_get())) | ||
| 124 | return; | ||
| 125 | next = ktime_add(next, tick_period); | ||
| 126 | } | ||
| 127 | } | ||
| 128 | } | ||
| 129 | |||
| 130 | /* | ||
| 131 | * Setup the tick device | ||
| 132 | */ | ||
| 133 | static void tick_setup_device(struct tick_device *td, | ||
| 134 | struct clock_event_device *newdev, int cpu, | ||
| 135 | cpumask_t cpumask) | ||
| 136 | { | ||
| 137 | ktime_t next_event; | ||
| 138 | void (*handler)(struct clock_event_device *) = NULL; | ||
| 139 | |||
| 140 | /* | ||
| 141 | * First device setup ? | ||
| 142 | */ | ||
| 143 | if (!td->evtdev) { | ||
| 144 | /* | ||
| 145 | * If no cpu took the do_timer update, assign it to | ||
| 146 | * this cpu: | ||
| 147 | */ | ||
| 148 | if (tick_do_timer_cpu == -1) { | ||
| 149 | tick_do_timer_cpu = cpu; | ||
| 150 | tick_next_period = ktime_get(); | ||
| 151 | tick_period = ktime_set(0, NSEC_PER_SEC / HZ); | ||
| 152 | } | ||
| 153 | |||
| 154 | /* | ||
| 155 | * Startup in periodic mode first. | ||
| 156 | */ | ||
| 157 | td->mode = TICKDEV_MODE_PERIODIC; | ||
| 158 | } else { | ||
| 159 | handler = td->evtdev->event_handler; | ||
| 160 | next_event = td->evtdev->next_event; | ||
| 161 | } | ||
| 162 | |||
| 163 | td->evtdev = newdev; | ||
| 164 | |||
| 165 | /* | ||
| 166 | * When the device is not per cpu, pin the interrupt to the | ||
| 167 | * current cpu: | ||
| 168 | */ | ||
| 169 | if (!cpus_equal(newdev->cpumask, cpumask)) | ||
| 170 | irq_set_affinity(newdev->irq, cpumask); | ||
| 171 | |||
| 172 | /* | ||
| 173 | * When global broadcasting is active, check if the current | ||
| 174 | * device is registered as a placeholder for broadcast mode. | ||
| 175 | * This allows us to handle this x86 misfeature in a generic | ||
| 176 | * way. | ||
| 177 | */ | ||
| 178 | if (tick_device_uses_broadcast(newdev, cpu)) | ||
| 179 | return; | ||
| 180 | |||
| 181 | if (td->mode == TICKDEV_MODE_PERIODIC) | ||
| 182 | tick_setup_periodic(newdev, 0); | ||
| 183 | else | ||
| 184 | tick_setup_oneshot(newdev, handler, next_event); | ||
| 185 | } | ||
| 186 | |||
| 187 | /* | ||
| 188 | * Check, if the new registered device should be used. | ||
| 189 | */ | ||
| 190 | static int tick_check_new_device(struct clock_event_device *newdev) | ||
| 191 | { | ||
| 192 | struct clock_event_device *curdev; | ||
| 193 | struct tick_device *td; | ||
| 194 | int cpu, ret = NOTIFY_OK; | ||
| 195 | unsigned long flags; | ||
| 196 | cpumask_t cpumask; | ||
| 197 | |||
| 198 | spin_lock_irqsave(&tick_device_lock, flags); | ||
| 199 | |||
| 200 | cpu = smp_processor_id(); | ||
| 201 | if (!cpu_isset(cpu, newdev->cpumask)) | ||
| 202 | goto out; | ||
| 203 | |||
| 204 | td = &per_cpu(tick_cpu_device, cpu); | ||
| 205 | curdev = td->evtdev; | ||
| 206 | cpumask = cpumask_of_cpu(cpu); | ||
| 207 | |||
| 208 | /* cpu local device ? */ | ||
| 209 | if (!cpus_equal(newdev->cpumask, cpumask)) { | ||
| 210 | |||
| 211 | /* | ||
| 212 | * If the cpu affinity of the device interrupt can not | ||
| 213 | * be set, ignore it. | ||
| 214 | */ | ||
| 215 | if (!irq_can_set_affinity(newdev->irq)) | ||
| 216 | goto out_bc; | ||
| 217 | |||
| 218 | /* | ||
| 219 | * If we have a cpu local device already, do not replace it | ||
| 220 | * by a non cpu local device | ||
| 221 | */ | ||
| 222 | if (curdev && cpus_equal(curdev->cpumask, cpumask)) | ||
| 223 | goto out_bc; | ||
| 224 | } | ||
| 225 | |||
| 226 | /* | ||
| 227 | * If we have an active device, then check the rating and the oneshot | ||
| 228 | * feature. | ||
| 229 | */ | ||
| 230 | if (curdev) { | ||
| 231 | /* | ||
| 232 | * Prefer one shot capable devices ! | ||
| 233 | */ | ||
| 234 | if ((curdev->features & CLOCK_EVT_FEAT_ONESHOT) && | ||
| 235 | !(newdev->features & CLOCK_EVT_FEAT_ONESHOT)) | ||
| 236 | goto out_bc; | ||
| 237 | /* | ||
| 238 | * Check the rating | ||
| 239 | */ | ||
| 240 | if (curdev->rating >= newdev->rating) | ||
| 241 | goto out_bc; | ||
| 242 | } | ||
| 243 | |||
| 244 | /* | ||
| 245 | * Replace the eventually existing device by the new | ||
| 246 | * device. If the current device is the broadcast device, do | ||
| 247 | * not give it back to the clockevents layer ! | ||
| 248 | */ | ||
| 249 | if (tick_is_broadcast_device(curdev)) { | ||
| 250 | clockevents_set_mode(curdev, CLOCK_EVT_MODE_SHUTDOWN); | ||
| 251 | curdev = NULL; | ||
| 252 | } | ||
| 253 | clockevents_exchange_device(curdev, newdev); | ||
| 254 | tick_setup_device(td, newdev, cpu, cpumask); | ||
| 255 | if (newdev->features & CLOCK_EVT_FEAT_ONESHOT) | ||
| 256 | tick_oneshot_notify(); | ||
| 257 | |||
| 258 | spin_unlock_irqrestore(&tick_device_lock, flags); | ||
| 259 | return NOTIFY_STOP; | ||
| 260 | |||
| 261 | out_bc: | ||
| 262 | /* | ||
| 263 | * Can the new device be used as a broadcast device ? | ||
| 264 | */ | ||
| 265 | if (tick_check_broadcast_device(newdev)) | ||
| 266 | ret = NOTIFY_STOP; | ||
| 267 | out: | ||
| 268 | spin_unlock_irqrestore(&tick_device_lock, flags); | ||
| 269 | |||
| 270 | return ret; | ||
| 271 | } | ||
| 272 | |||
| 273 | /* | ||
| 274 | * Shutdown an event device on a given cpu: | ||
| 275 | * | ||
| 276 | * This is called on a life CPU, when a CPU is dead. So we cannot | ||
| 277 | * access the hardware device itself. | ||
| 278 | * We just set the mode and remove it from the lists. | ||
| 279 | */ | ||
| 280 | static void tick_shutdown(unsigned int *cpup) | ||
| 281 | { | ||
| 282 | struct tick_device *td = &per_cpu(tick_cpu_device, *cpup); | ||
| 283 | struct clock_event_device *dev = td->evtdev; | ||
| 284 | unsigned long flags; | ||
| 285 | |||
| 286 | spin_lock_irqsave(&tick_device_lock, flags); | ||
| 287 | td->mode = TICKDEV_MODE_PERIODIC; | ||
| 288 | if (dev) { | ||
| 289 | /* | ||
| 290 | * Prevent that the clock events layer tries to call | ||
| 291 | * the set mode function! | ||
| 292 | */ | ||
| 293 | dev->mode = CLOCK_EVT_MODE_UNUSED; | ||
| 294 | clockevents_exchange_device(dev, NULL); | ||
| 295 | td->evtdev = NULL; | ||
| 296 | } | ||
| 297 | spin_unlock_irqrestore(&tick_device_lock, flags); | ||
| 298 | } | ||
| 299 | |||
| 300 | /* | ||
| 301 | * Notification about clock event devices | ||
| 302 | */ | ||
| 303 | static int tick_notify(struct notifier_block *nb, unsigned long reason, | ||
| 304 | void *dev) | ||
| 305 | { | ||
| 306 | switch (reason) { | ||
| 307 | |||
| 308 | case CLOCK_EVT_NOTIFY_ADD: | ||
| 309 | return tick_check_new_device(dev); | ||
| 310 | |||
| 311 | case CLOCK_EVT_NOTIFY_BROADCAST_ON: | ||
| 312 | case CLOCK_EVT_NOTIFY_BROADCAST_OFF: | ||
| 313 | tick_broadcast_on_off(reason, dev); | ||
| 314 | break; | ||
| 315 | |||
| 316 | case CLOCK_EVT_NOTIFY_BROADCAST_ENTER: | ||
| 317 | case CLOCK_EVT_NOTIFY_BROADCAST_EXIT: | ||
| 318 | tick_broadcast_oneshot_control(reason); | ||
| 319 | break; | ||
| 320 | |||
| 321 | case CLOCK_EVT_NOTIFY_CPU_DEAD: | ||
| 322 | tick_shutdown_broadcast_oneshot(dev); | ||
| 323 | tick_shutdown_broadcast(dev); | ||
| 324 | tick_shutdown(dev); | ||
| 325 | break; | ||
| 326 | |||
| 327 | default: | ||
| 328 | break; | ||
| 329 | } | ||
| 330 | |||
| 331 | return NOTIFY_OK; | ||
| 332 | } | ||
| 333 | |||
| 334 | static struct notifier_block tick_notifier = { | ||
| 335 | .notifier_call = tick_notify, | ||
| 336 | }; | ||
| 337 | |||
| 338 | /** | ||
| 339 | * tick_init - initialize the tick control | ||
| 340 | * | ||
| 341 | * Register the notifier with the clockevents framework | ||
| 342 | */ | ||
| 343 | void __init tick_init(void) | ||
| 344 | { | ||
| 345 | clockevents_register_notifier(&tick_notifier); | ||
| 346 | } | ||
diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h new file mode 100644 index 000000000000..54861a0f29ff --- /dev/null +++ b/kernel/time/tick-internal.h | |||
| @@ -0,0 +1,110 @@ | |||
| 1 | /* | ||
| 2 | * tick internal variable and functions used by low/high res code | ||
| 3 | */ | ||
| 4 | DECLARE_PER_CPU(struct tick_device, tick_cpu_device); | ||
| 5 | extern spinlock_t tick_device_lock; | ||
| 6 | extern ktime_t tick_next_period; | ||
| 7 | extern ktime_t tick_period; | ||
| 8 | |||
| 9 | extern void tick_setup_periodic(struct clock_event_device *dev, int broadcast); | ||
| 10 | extern void tick_handle_periodic(struct clock_event_device *dev); | ||
| 11 | |||
| 12 | /* | ||
| 13 | * NO_HZ / high resolution timer shared code | ||
| 14 | */ | ||
| 15 | #ifdef CONFIG_TICK_ONESHOT | ||
| 16 | extern void tick_setup_oneshot(struct clock_event_device *newdev, | ||
| 17 | void (*handler)(struct clock_event_device *), | ||
| 18 | ktime_t nextevt); | ||
| 19 | extern int tick_program_event(ktime_t expires, int force); | ||
| 20 | extern void tick_oneshot_notify(void); | ||
| 21 | extern int tick_switch_to_oneshot(void (*handler)(struct clock_event_device *)); | ||
| 22 | |||
| 23 | # ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST | ||
| 24 | extern void tick_broadcast_setup_oneshot(struct clock_event_device *bc); | ||
| 25 | extern void tick_broadcast_oneshot_control(unsigned long reason); | ||
| 26 | extern void tick_broadcast_switch_to_oneshot(void); | ||
| 27 | extern void tick_shutdown_broadcast_oneshot(unsigned int *cpup); | ||
| 28 | # else /* BROADCAST */ | ||
| 29 | static inline void tick_broadcast_setup_oneshot(struct clock_event_device *bc) | ||
| 30 | { | ||
| 31 | BUG(); | ||
| 32 | } | ||
| 33 | static inline void tick_broadcast_oneshot_control(unsigned long reason) { } | ||
| 34 | static inline void tick_broadcast_switch_to_oneshot(void) { } | ||
| 35 | static inline void tick_shutdown_broadcast_oneshot(unsigned int *cpup) { } | ||
| 36 | # endif /* !BROADCAST */ | ||
| 37 | |||
| 38 | #else /* !ONESHOT */ | ||
| 39 | static inline | ||
| 40 | void tick_setup_oneshot(struct clock_event_device *newdev, | ||
| 41 | void (*handler)(struct clock_event_device *), | ||
| 42 | ktime_t nextevt) | ||
| 43 | { | ||
| 44 | BUG(); | ||
| 45 | } | ||
| 46 | static inline int tick_program_event(ktime_t expires, int force) | ||
| 47 | { | ||
| 48 | return 0; | ||
| 49 | } | ||
| 50 | static inline void tick_oneshot_notify(void) { } | ||
| 51 | static inline void tick_broadcast_setup_oneshot(struct clock_event_device *bc) | ||
| 52 | { | ||
| 53 | BUG(); | ||
| 54 | } | ||
| 55 | static inline void tick_broadcast_oneshot_control(unsigned long reason) { } | ||
| 56 | static inline void tick_shutdown_broadcast_oneshot(unsigned int *cpup) { } | ||
| 57 | #endif /* !TICK_ONESHOT */ | ||
| 58 | |||
| 59 | /* | ||
| 60 | * Broadcasting support | ||
| 61 | */ | ||
| 62 | #ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST | ||
| 63 | extern int tick_do_broadcast(cpumask_t mask); | ||
| 64 | |||
| 65 | extern int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu); | ||
| 66 | extern int tick_check_broadcast_device(struct clock_event_device *dev); | ||
| 67 | extern int tick_is_broadcast_device(struct clock_event_device *dev); | ||
| 68 | extern void tick_broadcast_on_off(unsigned long reason, int *oncpu); | ||
| 69 | extern void tick_shutdown_broadcast(unsigned int *cpup); | ||
| 70 | |||
| 71 | extern void | ||
| 72 | tick_set_periodic_handler(struct clock_event_device *dev, int broadcast); | ||
| 73 | |||
| 74 | #else /* !BROADCAST */ | ||
| 75 | |||
| 76 | static inline int tick_check_broadcast_device(struct clock_event_device *dev) | ||
| 77 | { | ||
| 78 | return 0; | ||
| 79 | } | ||
| 80 | |||
| 81 | static inline int tick_is_broadcast_device(struct clock_event_device *dev) | ||
| 82 | { | ||
| 83 | return 0; | ||
| 84 | } | ||
| 85 | static inline int tick_device_uses_broadcast(struct clock_event_device *dev, | ||
| 86 | int cpu) | ||
| 87 | { | ||
| 88 | return 0; | ||
| 89 | } | ||
| 90 | static inline void tick_do_periodic_broadcast(struct clock_event_device *d) { } | ||
| 91 | static inline void tick_broadcast_on_off(unsigned long reason, int *oncpu) { } | ||
| 92 | static inline void tick_shutdown_broadcast(unsigned int *cpup) { } | ||
| 93 | |||
| 94 | /* | ||
| 95 | * Set the periodic handler in non broadcast mode | ||
| 96 | */ | ||
| 97 | static inline void tick_set_periodic_handler(struct clock_event_device *dev, | ||
| 98 | int broadcast) | ||
| 99 | { | ||
| 100 | dev->event_handler = tick_handle_periodic; | ||
| 101 | } | ||
| 102 | #endif /* !BROADCAST */ | ||
| 103 | |||
| 104 | /* | ||
| 105 | * Check, if the device is functional or a dummy for broadcast | ||
| 106 | */ | ||
| 107 | static inline int tick_device_is_functional(struct clock_event_device *dev) | ||
| 108 | { | ||
| 109 | return !(dev->features & CLOCK_EVT_FEAT_DUMMY); | ||
| 110 | } | ||
diff --git a/kernel/time/tick-oneshot.c b/kernel/time/tick-oneshot.c new file mode 100644 index 000000000000..2e8b7ff863cc --- /dev/null +++ b/kernel/time/tick-oneshot.c | |||
| @@ -0,0 +1,84 @@ | |||
| 1 | /* | ||
| 2 | * linux/kernel/time/tick-oneshot.c | ||
| 3 | * | ||
| 4 | * This file contains functions which manage high resolution tick | ||
| 5 | * related events. | ||
| 6 | * | ||
| 7 | * Copyright(C) 2005-2006, Thomas Gleixner <tglx@linutronix.de> | ||
| 8 | * Copyright(C) 2005-2007, Red Hat, Inc., Ingo Molnar | ||
| 9 | * Copyright(C) 2006-2007, Timesys Corp., Thomas Gleixner | ||
| 10 | * | ||
| 11 | * This code is licenced under the GPL version 2. For details see | ||
| 12 | * kernel-base/COPYING. | ||
| 13 | */ | ||
| 14 | #include <linux/cpu.h> | ||
| 15 | #include <linux/err.h> | ||
| 16 | #include <linux/hrtimer.h> | ||
| 17 | #include <linux/irq.h> | ||
| 18 | #include <linux/percpu.h> | ||
| 19 | #include <linux/profile.h> | ||
| 20 | #include <linux/sched.h> | ||
| 21 | #include <linux/tick.h> | ||
| 22 | |||
| 23 | #include "tick-internal.h" | ||
| 24 | |||
| 25 | /** | ||
| 26 | * tick_program_event | ||
| 27 | */ | ||
| 28 | int tick_program_event(ktime_t expires, int force) | ||
| 29 | { | ||
| 30 | struct clock_event_device *dev = __get_cpu_var(tick_cpu_device).evtdev; | ||
| 31 | ktime_t now = ktime_get(); | ||
| 32 | |||
| 33 | while (1) { | ||
| 34 | int ret = clockevents_program_event(dev, expires, now); | ||
| 35 | |||
| 36 | if (!ret || !force) | ||
| 37 | return ret; | ||
| 38 | now = ktime_get(); | ||
| 39 | expires = ktime_add(now, ktime_set(0, dev->min_delta_ns)); | ||
| 40 | } | ||
| 41 | } | ||
| 42 | |||
| 43 | /** | ||
| 44 | * tick_setup_oneshot - setup the event device for oneshot mode (hres or nohz) | ||
| 45 | */ | ||
| 46 | void tick_setup_oneshot(struct clock_event_device *newdev, | ||
| 47 | void (*handler)(struct clock_event_device *), | ||
| 48 | ktime_t next_event) | ||
| 49 | { | ||
| 50 | newdev->event_handler = handler; | ||
| 51 | clockevents_set_mode(newdev, CLOCK_EVT_MODE_ONESHOT); | ||
| 52 | clockevents_program_event(newdev, next_event, ktime_get()); | ||
| 53 | } | ||
| 54 | |||
| 55 | /** | ||
| 56 | * tick_switch_to_oneshot - switch to oneshot mode | ||
| 57 | */ | ||
| 58 | int tick_switch_to_oneshot(void (*handler)(struct clock_event_device *)) | ||
| 59 | { | ||
| 60 | struct tick_device *td = &__get_cpu_var(tick_cpu_device); | ||
| 61 | struct clock_event_device *dev = td->evtdev; | ||
| 62 | |||
| 63 | if (!dev || !(dev->features & CLOCK_EVT_FEAT_ONESHOT) || | ||
| 64 | !tick_device_is_functional(dev)) | ||
| 65 | return -EINVAL; | ||
| 66 | |||
| 67 | td->mode = TICKDEV_MODE_ONESHOT; | ||
| 68 | dev->event_handler = handler; | ||
| 69 | clockevents_set_mode(dev, CLOCK_EVT_MODE_ONESHOT); | ||
| 70 | tick_broadcast_switch_to_oneshot(); | ||
| 71 | return 0; | ||
| 72 | } | ||
| 73 | |||
| 74 | #ifdef CONFIG_HIGH_RES_TIMERS | ||
| 75 | /** | ||
| 76 | * tick_init_highres - switch to high resolution mode | ||
| 77 | * | ||
| 78 | * Called with interrupts disabled. | ||
| 79 | */ | ||
| 80 | int tick_init_highres(void) | ||
| 81 | { | ||
| 82 | return tick_switch_to_oneshot(hrtimer_interrupt); | ||
| 83 | } | ||
| 84 | #endif | ||
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c new file mode 100644 index 000000000000..512a4a906467 --- /dev/null +++ b/kernel/time/tick-sched.c | |||
| @@ -0,0 +1,565 @@ | |||
| 1 | /* | ||
| 2 | * linux/kernel/time/tick-sched.c | ||
| 3 | * | ||
| 4 | * Copyright(C) 2005-2006, Thomas Gleixner <tglx@linutronix.de> | ||
| 5 | * Copyright(C) 2005-2007, Red Hat, Inc., Ingo Molnar | ||
| 6 | * Copyright(C) 2006-2007 Timesys Corp., Thomas Gleixner | ||
| 7 | * | ||
| 8 | * No idle tick implementation for low and high resolution timers | ||
| 9 | * | ||
| 10 | * Started by: Thomas Gleixner and Ingo Molnar | ||
| 11 | * | ||
| 12 | * For licencing details see kernel-base/COPYING | ||
| 13 | */ | ||
| 14 | #include <linux/cpu.h> | ||
| 15 | #include <linux/err.h> | ||
| 16 | #include <linux/hrtimer.h> | ||
| 17 | #include <linux/interrupt.h> | ||
| 18 | #include <linux/kernel_stat.h> | ||
| 19 | #include <linux/percpu.h> | ||
| 20 | #include <linux/profile.h> | ||
| 21 | #include <linux/sched.h> | ||
| 22 | #include <linux/tick.h> | ||
| 23 | |||
| 24 | #include "tick-internal.h" | ||
| 25 | |||
| 26 | /* | ||
| 27 | * Per cpu nohz control structure | ||
| 28 | */ | ||
| 29 | static DEFINE_PER_CPU(struct tick_sched, tick_cpu_sched); | ||
| 30 | |||
| 31 | /* | ||
| 32 | * The time, when the last jiffy update happened. Protected by xtime_lock. | ||
| 33 | */ | ||
| 34 | static ktime_t last_jiffies_update; | ||
| 35 | |||
| 36 | struct tick_sched *tick_get_tick_sched(int cpu) | ||
| 37 | { | ||
| 38 | return &per_cpu(tick_cpu_sched, cpu); | ||
| 39 | } | ||
| 40 | |||
| 41 | /* | ||
| 42 | * Must be called with interrupts disabled ! | ||
| 43 | */ | ||
| 44 | static void tick_do_update_jiffies64(ktime_t now) | ||
| 45 | { | ||
| 46 | unsigned long ticks = 0; | ||
| 47 | ktime_t delta; | ||
| 48 | |||
| 49 | /* Reevalute with xtime_lock held */ | ||
| 50 | write_seqlock(&xtime_lock); | ||
| 51 | |||
| 52 | delta = ktime_sub(now, last_jiffies_update); | ||
| 53 | if (delta.tv64 >= tick_period.tv64) { | ||
| 54 | |||
| 55 | delta = ktime_sub(delta, tick_period); | ||
| 56 | last_jiffies_update = ktime_add(last_jiffies_update, | ||
| 57 | tick_period); | ||
| 58 | |||
| 59 | /* Slow path for long timeouts */ | ||
| 60 | if (unlikely(delta.tv64 >= tick_period.tv64)) { | ||
| 61 | s64 incr = ktime_to_ns(tick_period); | ||
| 62 | |||
| 63 | ticks = ktime_divns(delta, incr); | ||
| 64 | |||
| 65 | last_jiffies_update = ktime_add_ns(last_jiffies_update, | ||
| 66 | incr * ticks); | ||
| 67 | } | ||
| 68 | do_timer(++ticks); | ||
| 69 | } | ||
| 70 | write_sequnlock(&xtime_lock); | ||
| 71 | } | ||
| 72 | |||
| 73 | /* | ||
| 74 | * Initialize and return retrieve the jiffies update. | ||
| 75 | */ | ||
| 76 | static ktime_t tick_init_jiffy_update(void) | ||
| 77 | { | ||
| 78 | ktime_t period; | ||
| 79 | |||
| 80 | write_seqlock(&xtime_lock); | ||
| 81 | /* Did we start the jiffies update yet ? */ | ||
| 82 | if (last_jiffies_update.tv64 == 0) | ||
| 83 | last_jiffies_update = tick_next_period; | ||
| 84 | period = last_jiffies_update; | ||
| 85 | write_sequnlock(&xtime_lock); | ||
| 86 | return period; | ||
| 87 | } | ||
| 88 | |||
| 89 | /* | ||
| 90 | * NOHZ - aka dynamic tick functionality | ||
| 91 | */ | ||
| 92 | #ifdef CONFIG_NO_HZ | ||
| 93 | /* | ||
| 94 | * NO HZ enabled ? | ||
| 95 | */ | ||
| 96 | static int tick_nohz_enabled __read_mostly = 1; | ||
| 97 | |||
| 98 | /* | ||
| 99 | * Enable / Disable tickless mode | ||
| 100 | */ | ||
| 101 | static int __init setup_tick_nohz(char *str) | ||
| 102 | { | ||
| 103 | if (!strcmp(str, "off")) | ||
| 104 | tick_nohz_enabled = 0; | ||
| 105 | else if (!strcmp(str, "on")) | ||
| 106 | tick_nohz_enabled = 1; | ||
| 107 | else | ||
| 108 | return 0; | ||
| 109 | return 1; | ||
| 110 | } | ||
| 111 | |||
| 112 | __setup("nohz=", setup_tick_nohz); | ||
| 113 | |||
| 114 | /** | ||
| 115 | * tick_nohz_update_jiffies - update jiffies when idle was interrupted | ||
| 116 | * | ||
| 117 | * Called from interrupt entry when the CPU was idle | ||
| 118 | * | ||
| 119 | * In case the sched_tick was stopped on this CPU, we have to check if jiffies | ||
| 120 | * must be updated. Otherwise an interrupt handler could use a stale jiffy | ||
| 121 | * value. We do this unconditionally on any cpu, as we don't know whether the | ||
| 122 | * cpu, which has the update task assigned is in a long sleep. | ||
| 123 | */ | ||
| 124 | void tick_nohz_update_jiffies(void) | ||
| 125 | { | ||
| 126 | int cpu = smp_processor_id(); | ||
| 127 | struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); | ||
| 128 | unsigned long flags; | ||
| 129 | ktime_t now; | ||
| 130 | |||
| 131 | if (!ts->tick_stopped) | ||
| 132 | return; | ||
| 133 | |||
| 134 | cpu_clear(cpu, nohz_cpu_mask); | ||
| 135 | now = ktime_get(); | ||
| 136 | |||
| 137 | local_irq_save(flags); | ||
| 138 | tick_do_update_jiffies64(now); | ||
| 139 | local_irq_restore(flags); | ||
| 140 | } | ||
| 141 | |||
| 142 | /** | ||
| 143 | * tick_nohz_stop_sched_tick - stop the idle tick from the idle task | ||
| 144 | * | ||
| 145 | * When the next event is more than a tick into the future, stop the idle tick | ||
| 146 | * Called either from the idle loop or from irq_exit() when an idle period was | ||
| 147 | * just interrupted by an interrupt which did not cause a reschedule. | ||
| 148 | */ | ||
| 149 | void tick_nohz_stop_sched_tick(void) | ||
| 150 | { | ||
| 151 | unsigned long seq, last_jiffies, next_jiffies, delta_jiffies, flags; | ||
| 152 | struct tick_sched *ts; | ||
| 153 | ktime_t last_update, expires, now, delta; | ||
| 154 | int cpu; | ||
| 155 | |||
| 156 | local_irq_save(flags); | ||
| 157 | |||
| 158 | cpu = smp_processor_id(); | ||
| 159 | ts = &per_cpu(tick_cpu_sched, cpu); | ||
| 160 | |||
| 161 | if (unlikely(ts->nohz_mode == NOHZ_MODE_INACTIVE)) | ||
| 162 | goto end; | ||
| 163 | |||
| 164 | if (need_resched()) | ||
| 165 | goto end; | ||
| 166 | |||
| 167 | cpu = smp_processor_id(); | ||
| 168 | if (unlikely(local_softirq_pending())) | ||
| 169 | printk(KERN_ERR "NOHZ: local_softirq_pending %02x\n", | ||
| 170 | local_softirq_pending()); | ||
| 171 | |||
| 172 | now = ktime_get(); | ||
| 173 | /* | ||
| 174 | * When called from irq_exit we need to account the idle sleep time | ||
| 175 | * correctly. | ||
| 176 | */ | ||
| 177 | if (ts->tick_stopped) { | ||
| 178 | delta = ktime_sub(now, ts->idle_entrytime); | ||
| 179 | ts->idle_sleeptime = ktime_add(ts->idle_sleeptime, delta); | ||
| 180 | } | ||
| 181 | |||
| 182 | ts->idle_entrytime = now; | ||
| 183 | ts->idle_calls++; | ||
| 184 | |||
| 185 | /* Read jiffies and the time when jiffies were updated last */ | ||
| 186 | do { | ||
| 187 | seq = read_seqbegin(&xtime_lock); | ||
| 188 | last_update = last_jiffies_update; | ||
| 189 | last_jiffies = jiffies; | ||
| 190 | } while (read_seqretry(&xtime_lock, seq)); | ||
| 191 | |||
| 192 | /* Get the next timer wheel timer */ | ||
| 193 | next_jiffies = get_next_timer_interrupt(last_jiffies); | ||
| 194 | delta_jiffies = next_jiffies - last_jiffies; | ||
| 195 | |||
| 196 | if (rcu_needs_cpu(cpu)) | ||
| 197 | delta_jiffies = 1; | ||
| 198 | /* | ||
| 199 | * Do not stop the tick, if we are only one off | ||
| 200 | * or if the cpu is required for rcu | ||
| 201 | */ | ||
| 202 | if (!ts->tick_stopped && delta_jiffies == 1) | ||
| 203 | goto out; | ||
| 204 | |||
| 205 | /* Schedule the tick, if we are at least one jiffie off */ | ||
| 206 | if ((long)delta_jiffies >= 1) { | ||
| 207 | |||
| 208 | if (delta_jiffies > 1) | ||
| 209 | cpu_set(cpu, nohz_cpu_mask); | ||
| 210 | /* | ||
| 211 | * nohz_stop_sched_tick can be called several times before | ||
| 212 | * the nohz_restart_sched_tick is called. This happens when | ||
| 213 | * interrupts arrive which do not cause a reschedule. In the | ||
| 214 | * first call we save the current tick time, so we can restart | ||
| 215 | * the scheduler tick in nohz_restart_sched_tick. | ||
| 216 | */ | ||
| 217 | if (!ts->tick_stopped) { | ||
| 218 | ts->idle_tick = ts->sched_timer.expires; | ||
| 219 | ts->tick_stopped = 1; | ||
| 220 | ts->idle_jiffies = last_jiffies; | ||
| 221 | } | ||
| 222 | /* | ||
| 223 | * calculate the expiry time for the next timer wheel | ||
| 224 | * timer | ||
| 225 | */ | ||
| 226 | expires = ktime_add_ns(last_update, tick_period.tv64 * | ||
| 227 | delta_jiffies); | ||
| 228 | ts->idle_expires = expires; | ||
| 229 | ts->idle_sleeps++; | ||
| 230 | |||
| 231 | if (ts->nohz_mode == NOHZ_MODE_HIGHRES) { | ||
| 232 | hrtimer_start(&ts->sched_timer, expires, | ||
| 233 | HRTIMER_MODE_ABS); | ||
| 234 | /* Check, if the timer was already in the past */ | ||
| 235 | if (hrtimer_active(&ts->sched_timer)) | ||
| 236 | goto out; | ||
| 237 | } else if(!tick_program_event(expires, 0)) | ||
| 238 | goto out; | ||
| 239 | /* | ||
| 240 | * We are past the event already. So we crossed a | ||
| 241 | * jiffie boundary. Update jiffies and raise the | ||
| 242 | * softirq. | ||
| 243 | */ | ||
| 244 | tick_do_update_jiffies64(ktime_get()); | ||
| 245 | cpu_clear(cpu, nohz_cpu_mask); | ||
| 246 | } | ||
| 247 | raise_softirq_irqoff(TIMER_SOFTIRQ); | ||
| 248 | out: | ||
| 249 | ts->next_jiffies = next_jiffies; | ||
| 250 | ts->last_jiffies = last_jiffies; | ||
| 251 | end: | ||
| 252 | local_irq_restore(flags); | ||
| 253 | } | ||
| 254 | |||
| 255 | /** | ||
| 256 | * nohz_restart_sched_tick - restart the idle tick from the idle task | ||
| 257 | * | ||
| 258 | * Restart the idle tick when the CPU is woken up from idle | ||
| 259 | */ | ||
| 260 | void tick_nohz_restart_sched_tick(void) | ||
| 261 | { | ||
| 262 | int cpu = smp_processor_id(); | ||
| 263 | struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); | ||
| 264 | unsigned long ticks; | ||
| 265 | ktime_t now, delta; | ||
| 266 | |||
| 267 | if (!ts->tick_stopped) | ||
| 268 | return; | ||
| 269 | |||
| 270 | /* Update jiffies first */ | ||
| 271 | now = ktime_get(); | ||
| 272 | |||
| 273 | local_irq_disable(); | ||
| 274 | tick_do_update_jiffies64(now); | ||
| 275 | cpu_clear(cpu, nohz_cpu_mask); | ||
| 276 | |||
| 277 | /* Account the idle time */ | ||
| 278 | delta = ktime_sub(now, ts->idle_entrytime); | ||
| 279 | ts->idle_sleeptime = ktime_add(ts->idle_sleeptime, delta); | ||
| 280 | |||
| 281 | /* | ||
| 282 | * We stopped the tick in idle. Update process times would miss the | ||
| 283 | * time we slept as update_process_times does only a 1 tick | ||
| 284 | * accounting. Enforce that this is accounted to idle ! | ||
| 285 | */ | ||
| 286 | ticks = jiffies - ts->idle_jiffies; | ||
| 287 | /* | ||
| 288 | * We might be one off. Do not randomly account a huge number of ticks! | ||
| 289 | */ | ||
| 290 | if (ticks && ticks < LONG_MAX) { | ||
| 291 | add_preempt_count(HARDIRQ_OFFSET); | ||
| 292 | account_system_time(current, HARDIRQ_OFFSET, | ||
| 293 | jiffies_to_cputime(ticks)); | ||
| 294 | sub_preempt_count(HARDIRQ_OFFSET); | ||
| 295 | } | ||
| 296 | |||
| 297 | /* | ||
| 298 | * Cancel the scheduled timer and restore the tick | ||
| 299 | */ | ||
| 300 | ts->tick_stopped = 0; | ||
| 301 | hrtimer_cancel(&ts->sched_timer); | ||
| 302 | ts->sched_timer.expires = ts->idle_tick; | ||
| 303 | |||
| 304 | while (1) { | ||
| 305 | /* Forward the time to expire in the future */ | ||
| 306 | hrtimer_forward(&ts->sched_timer, now, tick_period); | ||
| 307 | |||
| 308 | if (ts->nohz_mode == NOHZ_MODE_HIGHRES) { | ||
| 309 | hrtimer_start(&ts->sched_timer, | ||
| 310 | ts->sched_timer.expires, | ||
| 311 | HRTIMER_MODE_ABS); | ||
| 312 | /* Check, if the timer was already in the past */ | ||
| 313 | if (hrtimer_active(&ts->sched_timer)) | ||
| 314 | break; | ||
| 315 | } else { | ||
| 316 | if (!tick_program_event(ts->sched_timer.expires, 0)) | ||
| 317 | break; | ||
| 318 | } | ||
| 319 | /* Update jiffies and reread time */ | ||
| 320 | tick_do_update_jiffies64(now); | ||
| 321 | now = ktime_get(); | ||
| 322 | } | ||
| 323 | local_irq_enable(); | ||
| 324 | } | ||
| 325 | |||
| 326 | static int tick_nohz_reprogram(struct tick_sched *ts, ktime_t now) | ||
| 327 | { | ||
| 328 | hrtimer_forward(&ts->sched_timer, now, tick_period); | ||
| 329 | return tick_program_event(ts->sched_timer.expires, 0); | ||
| 330 | } | ||
| 331 | |||
| 332 | /* | ||
| 333 | * The nohz low res interrupt handler | ||
| 334 | */ | ||
| 335 | static void tick_nohz_handler(struct clock_event_device *dev) | ||
| 336 | { | ||
| 337 | struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched); | ||
| 338 | struct pt_regs *regs = get_irq_regs(); | ||
| 339 | ktime_t now = ktime_get(); | ||
| 340 | |||
| 341 | dev->next_event.tv64 = KTIME_MAX; | ||
| 342 | |||
| 343 | /* Check, if the jiffies need an update */ | ||
| 344 | tick_do_update_jiffies64(now); | ||
| 345 | |||
| 346 | /* | ||
| 347 | * When we are idle and the tick is stopped, we have to touch | ||
| 348 | * the watchdog as we might not schedule for a really long | ||
| 349 | * time. This happens on complete idle SMP systems while | ||
| 350 | * waiting on the login prompt. We also increment the "start | ||
| 351 | * of idle" jiffy stamp so the idle accounting adjustment we | ||
| 352 | * do when we go busy again does not account too much ticks. | ||
| 353 | */ | ||
| 354 | if (ts->tick_stopped) { | ||
| 355 | touch_softlockup_watchdog(); | ||
| 356 | ts->idle_jiffies++; | ||
| 357 | } | ||
| 358 | |||
| 359 | update_process_times(user_mode(regs)); | ||
| 360 | profile_tick(CPU_PROFILING); | ||
| 361 | |||
| 362 | /* Do not restart, when we are in the idle loop */ | ||
| 363 | if (ts->tick_stopped) | ||
| 364 | return; | ||
| 365 | |||
| 366 | while (tick_nohz_reprogram(ts, now)) { | ||
| 367 | now = ktime_get(); | ||
| 368 | tick_do_update_jiffies64(now); | ||
| 369 | } | ||
| 370 | } | ||
| 371 | |||
| 372 | /** | ||
| 373 | * tick_nohz_switch_to_nohz - switch to nohz mode | ||
| 374 | */ | ||
| 375 | static void tick_nohz_switch_to_nohz(void) | ||
| 376 | { | ||
| 377 | struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched); | ||
| 378 | ktime_t next; | ||
| 379 | |||
| 380 | if (!tick_nohz_enabled) | ||
| 381 | return; | ||
| 382 | |||
| 383 | local_irq_disable(); | ||
| 384 | if (tick_switch_to_oneshot(tick_nohz_handler)) { | ||
| 385 | local_irq_enable(); | ||
| 386 | return; | ||
| 387 | } | ||
| 388 | |||
| 389 | ts->nohz_mode = NOHZ_MODE_LOWRES; | ||
| 390 | |||
| 391 | /* | ||
| 392 | * Recycle the hrtimer in ts, so we can share the | ||
| 393 | * hrtimer_forward with the highres code. | ||
| 394 | */ | ||
| 395 | hrtimer_init(&ts->sched_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); | ||
| 396 | /* Get the next period */ | ||
| 397 | next = tick_init_jiffy_update(); | ||
| 398 | |||
| 399 | for (;;) { | ||
| 400 | ts->sched_timer.expires = next; | ||
| 401 | if (!tick_program_event(next, 0)) | ||
| 402 | break; | ||
| 403 | next = ktime_add(next, tick_period); | ||
| 404 | } | ||
| 405 | local_irq_enable(); | ||
| 406 | |||
| 407 | printk(KERN_INFO "Switched to NOHz mode on CPU #%d\n", | ||
| 408 | smp_processor_id()); | ||
| 409 | } | ||
| 410 | |||
| 411 | #else | ||
| 412 | |||
| 413 | static inline void tick_nohz_switch_to_nohz(void) { } | ||
| 414 | |||
| 415 | #endif /* NO_HZ */ | ||
| 416 | |||
| 417 | /* | ||
| 418 | * High resolution timer specific code | ||
| 419 | */ | ||
| 420 | #ifdef CONFIG_HIGH_RES_TIMERS | ||
| 421 | /* | ||
| 422 | * We rearm the timer until we get disabled by the idle code | ||
| 423 | * Called with interrupts disabled and timer->base->cpu_base->lock held. | ||
| 424 | */ | ||
| 425 | static enum hrtimer_restart tick_sched_timer(struct hrtimer *timer) | ||
| 426 | { | ||
| 427 | struct tick_sched *ts = | ||
| 428 | container_of(timer, struct tick_sched, sched_timer); | ||
| 429 | struct hrtimer_cpu_base *base = timer->base->cpu_base; | ||
| 430 | struct pt_regs *regs = get_irq_regs(); | ||
| 431 | ktime_t now = ktime_get(); | ||
| 432 | |||
| 433 | /* Check, if the jiffies need an update */ | ||
| 434 | tick_do_update_jiffies64(now); | ||
| 435 | |||
| 436 | /* | ||
| 437 | * Do not call, when we are not in irq context and have | ||
| 438 | * no valid regs pointer | ||
| 439 | */ | ||
| 440 | if (regs) { | ||
| 441 | /* | ||
| 442 | * When we are idle and the tick is stopped, we have to touch | ||
| 443 | * the watchdog as we might not schedule for a really long | ||
| 444 | * time. This happens on complete idle SMP systems while | ||
| 445 | * waiting on the login prompt. We also increment the "start of | ||
| 446 | * idle" jiffy stamp so the idle accounting adjustment we do | ||
| 447 | * when we go busy again does not account too much ticks. | ||
| 448 | */ | ||
| 449 | if (ts->tick_stopped) { | ||
| 450 | touch_softlockup_watchdog(); | ||
| 451 | ts->idle_jiffies++; | ||
| 452 | } | ||
| 453 | /* | ||
| 454 | * update_process_times() might take tasklist_lock, hence | ||
| 455 | * drop the base lock. sched-tick hrtimers are per-CPU and | ||
| 456 | * never accessible by userspace APIs, so this is safe to do. | ||
| 457 | */ | ||
| 458 | spin_unlock(&base->lock); | ||
| 459 | update_process_times(user_mode(regs)); | ||
| 460 | profile_tick(CPU_PROFILING); | ||
| 461 | spin_lock(&base->lock); | ||
| 462 | } | ||
| 463 | |||
| 464 | /* Do not restart, when we are in the idle loop */ | ||
| 465 | if (ts->tick_stopped) | ||
| 466 | return HRTIMER_NORESTART; | ||
| 467 | |||
| 468 | hrtimer_forward(timer, now, tick_period); | ||
| 469 | |||
| 470 | return HRTIMER_RESTART; | ||
| 471 | } | ||
| 472 | |||
| 473 | /** | ||
| 474 | * tick_setup_sched_timer - setup the tick emulation timer | ||
| 475 | */ | ||
| 476 | void tick_setup_sched_timer(void) | ||
| 477 | { | ||
| 478 | struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched); | ||
| 479 | ktime_t now = ktime_get(); | ||
| 480 | |||
| 481 | /* | ||
| 482 | * Emulate tick processing via per-CPU hrtimers: | ||
| 483 | */ | ||
| 484 | hrtimer_init(&ts->sched_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); | ||
| 485 | ts->sched_timer.function = tick_sched_timer; | ||
| 486 | ts->sched_timer.cb_mode = HRTIMER_CB_IRQSAFE_NO_SOFTIRQ; | ||
| 487 | |||
| 488 | /* Get the next period */ | ||
| 489 | ts->sched_timer.expires = tick_init_jiffy_update(); | ||
| 490 | |||
| 491 | for (;;) { | ||
| 492 | hrtimer_forward(&ts->sched_timer, now, tick_period); | ||
| 493 | hrtimer_start(&ts->sched_timer, ts->sched_timer.expires, | ||
| 494 | HRTIMER_MODE_ABS); | ||
| 495 | /* Check, if the timer was already in the past */ | ||
| 496 | if (hrtimer_active(&ts->sched_timer)) | ||
| 497 | break; | ||
| 498 | now = ktime_get(); | ||
| 499 | } | ||
| 500 | |||
| 501 | #ifdef CONFIG_NO_HZ | ||
| 502 | if (tick_nohz_enabled) | ||
| 503 | ts->nohz_mode = NOHZ_MODE_HIGHRES; | ||
| 504 | #endif | ||
| 505 | } | ||
| 506 | |||
| 507 | void tick_cancel_sched_timer(int cpu) | ||
| 508 | { | ||
| 509 | struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); | ||
| 510 | |||
| 511 | if (ts->sched_timer.base) | ||
| 512 | hrtimer_cancel(&ts->sched_timer); | ||
| 513 | ts->tick_stopped = 0; | ||
| 514 | ts->nohz_mode = NOHZ_MODE_INACTIVE; | ||
| 515 | } | ||
| 516 | #endif /* HIGH_RES_TIMERS */ | ||
| 517 | |||
| 518 | /** | ||
| 519 | * Async notification about clocksource changes | ||
| 520 | */ | ||
| 521 | void tick_clock_notify(void) | ||
| 522 | { | ||
| 523 | int cpu; | ||
| 524 | |||
| 525 | for_each_possible_cpu(cpu) | ||
| 526 | set_bit(0, &per_cpu(tick_cpu_sched, cpu).check_clocks); | ||
| 527 | } | ||
| 528 | |||
| 529 | /* | ||
| 530 | * Async notification about clock event changes | ||
| 531 | */ | ||
| 532 | void tick_oneshot_notify(void) | ||
| 533 | { | ||
| 534 | struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched); | ||
| 535 | |||
| 536 | set_bit(0, &ts->check_clocks); | ||
| 537 | } | ||
| 538 | |||
| 539 | /** | ||
| 540 | * Check, if a change happened, which makes oneshot possible. | ||
| 541 | * | ||
| 542 | * Called cyclic from the hrtimer softirq (driven by the timer | ||
| 543 | * softirq) allow_nohz signals, that we can switch into low-res nohz | ||
| 544 | * mode, because high resolution timers are disabled (either compile | ||
| 545 | * or runtime). | ||
| 546 | */ | ||
| 547 | int tick_check_oneshot_change(int allow_nohz) | ||
| 548 | { | ||
| 549 | struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched); | ||
| 550 | |||
| 551 | if (!test_and_clear_bit(0, &ts->check_clocks)) | ||
| 552 | return 0; | ||
| 553 | |||
| 554 | if (ts->nohz_mode != NOHZ_MODE_INACTIVE) | ||
| 555 | return 0; | ||
| 556 | |||
| 557 | if (!timekeeping_is_continuous() || !tick_is_oneshot_available()) | ||
| 558 | return 0; | ||
| 559 | |||
| 560 | if (!allow_nohz) | ||
| 561 | return 1; | ||
| 562 | |||
| 563 | tick_nohz_switch_to_nohz(); | ||
| 564 | return 0; | ||
| 565 | } | ||
diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c new file mode 100644 index 000000000000..f82c635c3d5c --- /dev/null +++ b/kernel/time/timer_list.c | |||
| @@ -0,0 +1,287 @@ | |||
| 1 | /* | ||
| 2 | * kernel/time/timer_list.c | ||
| 3 | * | ||
| 4 | * List pending timers | ||
| 5 | * | ||
| 6 | * Copyright(C) 2006, Red Hat, Inc., Ingo Molnar | ||
| 7 | * | ||
| 8 | * This program is free software; you can redistribute it and/or modify | ||
| 9 | * it under the terms of the GNU General Public License version 2 as | ||
| 10 | * published by the Free Software Foundation. | ||
| 11 | */ | ||
| 12 | |||
| 13 | #include <linux/proc_fs.h> | ||
| 14 | #include <linux/module.h> | ||
| 15 | #include <linux/spinlock.h> | ||
| 16 | #include <linux/sched.h> | ||
| 17 | #include <linux/seq_file.h> | ||
| 18 | #include <linux/kallsyms.h> | ||
| 19 | #include <linux/tick.h> | ||
| 20 | |||
| 21 | #include <asm/uaccess.h> | ||
| 22 | |||
| 23 | typedef void (*print_fn_t)(struct seq_file *m, unsigned int *classes); | ||
| 24 | |||
| 25 | DECLARE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases); | ||
| 26 | |||
| 27 | /* | ||
| 28 | * This allows printing both to /proc/timer_list and | ||
| 29 | * to the console (on SysRq-Q): | ||
| 30 | */ | ||
| 31 | #define SEQ_printf(m, x...) \ | ||
| 32 | do { \ | ||
| 33 | if (m) \ | ||
| 34 | seq_printf(m, x); \ | ||
| 35 | else \ | ||
| 36 | printk(x); \ | ||
| 37 | } while (0) | ||
| 38 | |||
| 39 | static void print_name_offset(struct seq_file *m, void *sym) | ||
| 40 | { | ||
| 41 | unsigned long addr = (unsigned long)sym; | ||
| 42 | char namebuf[KSYM_NAME_LEN+1]; | ||
| 43 | unsigned long size, offset; | ||
| 44 | const char *sym_name; | ||
| 45 | char *modname; | ||
| 46 | |||
| 47 | sym_name = kallsyms_lookup(addr, &size, &offset, &modname, namebuf); | ||
| 48 | if (sym_name) | ||
| 49 | SEQ_printf(m, "%s", sym_name); | ||
| 50 | else | ||
| 51 | SEQ_printf(m, "<%p>", sym); | ||
| 52 | } | ||
| 53 | |||
| 54 | static void | ||
| 55 | print_timer(struct seq_file *m, struct hrtimer *timer, int idx, u64 now) | ||
| 56 | { | ||
| 57 | #ifdef CONFIG_TIMER_STATS | ||
| 58 | char tmp[TASK_COMM_LEN + 1]; | ||
| 59 | #endif | ||
| 60 | SEQ_printf(m, " #%d: ", idx); | ||
| 61 | print_name_offset(m, timer); | ||
| 62 | SEQ_printf(m, ", "); | ||
| 63 | print_name_offset(m, timer->function); | ||
| 64 | SEQ_printf(m, ", S:%02lx", timer->state); | ||
| 65 | #ifdef CONFIG_TIMER_STATS | ||
| 66 | SEQ_printf(m, ", "); | ||
| 67 | print_name_offset(m, timer->start_site); | ||
| 68 | memcpy(tmp, timer->start_comm, TASK_COMM_LEN); | ||
| 69 | tmp[TASK_COMM_LEN] = 0; | ||
| 70 | SEQ_printf(m, ", %s/%d", tmp, timer->start_pid); | ||
| 71 | #endif | ||
| 72 | SEQ_printf(m, "\n"); | ||
| 73 | SEQ_printf(m, " # expires at %Ld nsecs [in %Ld nsecs]\n", | ||
| 74 | (unsigned long long)ktime_to_ns(timer->expires), | ||
| 75 | (unsigned long long)(ktime_to_ns(timer->expires) - now)); | ||
| 76 | } | ||
| 77 | |||
| 78 | static void | ||
| 79 | print_active_timers(struct seq_file *m, struct hrtimer_clock_base *base, | ||
| 80 | u64 now) | ||
| 81 | { | ||
| 82 | struct hrtimer *timer, tmp; | ||
| 83 | unsigned long next = 0, i; | ||
| 84 | struct rb_node *curr; | ||
| 85 | unsigned long flags; | ||
| 86 | |||
| 87 | next_one: | ||
| 88 | i = 0; | ||
| 89 | spin_lock_irqsave(&base->cpu_base->lock, flags); | ||
| 90 | |||
| 91 | curr = base->first; | ||
| 92 | /* | ||
| 93 | * Crude but we have to do this O(N*N) thing, because | ||
| 94 | * we have to unlock the base when printing: | ||
| 95 | */ | ||
| 96 | while (curr && i < next) { | ||
| 97 | curr = rb_next(curr); | ||
| 98 | i++; | ||
| 99 | } | ||
| 100 | |||
| 101 | if (curr) { | ||
| 102 | |||
| 103 | timer = rb_entry(curr, struct hrtimer, node); | ||
| 104 | tmp = *timer; | ||
| 105 | spin_unlock_irqrestore(&base->cpu_base->lock, flags); | ||
| 106 | |||
| 107 | print_timer(m, &tmp, i, now); | ||
| 108 | next++; | ||
| 109 | goto next_one; | ||
| 110 | } | ||
| 111 | spin_unlock_irqrestore(&base->cpu_base->lock, flags); | ||
| 112 | } | ||
| 113 | |||
| 114 | static void | ||
| 115 | print_base(struct seq_file *m, struct hrtimer_clock_base *base, u64 now) | ||
| 116 | { | ||
| 117 | SEQ_printf(m, " .index: %d\n", | ||
| 118 | base->index); | ||
| 119 | SEQ_printf(m, " .resolution: %Ld nsecs\n", | ||
| 120 | (unsigned long long)ktime_to_ns(base->resolution)); | ||
| 121 | SEQ_printf(m, " .get_time: "); | ||
| 122 | print_name_offset(m, base->get_time); | ||
| 123 | SEQ_printf(m, "\n"); | ||
| 124 | #ifdef CONFIG_HIGH_RES_TIMERS | ||
| 125 | SEQ_printf(m, " .offset: %Ld nsecs\n", | ||
| 126 | ktime_to_ns(base->offset)); | ||
| 127 | #endif | ||
| 128 | SEQ_printf(m, "active timers:\n"); | ||
| 129 | print_active_timers(m, base, now); | ||
| 130 | } | ||
| 131 | |||
| 132 | static void print_cpu(struct seq_file *m, int cpu, u64 now) | ||
| 133 | { | ||
| 134 | struct hrtimer_cpu_base *cpu_base = &per_cpu(hrtimer_bases, cpu); | ||
| 135 | int i; | ||
| 136 | |||
| 137 | SEQ_printf(m, "\ncpu: %d\n", cpu); | ||
| 138 | for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) { | ||
| 139 | SEQ_printf(m, " clock %d:\n", i); | ||
| 140 | print_base(m, cpu_base->clock_base + i, now); | ||
| 141 | } | ||
| 142 | #define P(x) \ | ||
| 143 | SEQ_printf(m, " .%-15s: %Ld\n", #x, (u64)(cpu_base->x)) | ||
| 144 | #define P_ns(x) \ | ||
| 145 | SEQ_printf(m, " .%-15s: %Ld nsecs\n", #x, \ | ||
| 146 | (u64)(ktime_to_ns(cpu_base->x))) | ||
| 147 | |||
| 148 | #ifdef CONFIG_HIGH_RES_TIMERS | ||
| 149 | P_ns(expires_next); | ||
| 150 | P(hres_active); | ||
| 151 | P(nr_events); | ||
| 152 | #endif | ||
| 153 | #undef P | ||
| 154 | #undef P_ns | ||
| 155 | |||
| 156 | #ifdef CONFIG_TICK_ONESHOT | ||
| 157 | # define P(x) \ | ||
| 158 | SEQ_printf(m, " .%-15s: %Ld\n", #x, (u64)(ts->x)) | ||
| 159 | # define P_ns(x) \ | ||
| 160 | SEQ_printf(m, " .%-15s: %Ld nsecs\n", #x, \ | ||
| 161 | (u64)(ktime_to_ns(ts->x))) | ||
| 162 | { | ||
| 163 | struct tick_sched *ts = tick_get_tick_sched(cpu); | ||
| 164 | P(nohz_mode); | ||
| 165 | P_ns(idle_tick); | ||
| 166 | P(tick_stopped); | ||
| 167 | P(idle_jiffies); | ||
| 168 | P(idle_calls); | ||
| 169 | P(idle_sleeps); | ||
| 170 | P_ns(idle_entrytime); | ||
| 171 | P_ns(idle_sleeptime); | ||
| 172 | P(last_jiffies); | ||
| 173 | P(next_jiffies); | ||
| 174 | P_ns(idle_expires); | ||
| 175 | SEQ_printf(m, "jiffies: %Ld\n", (u64)jiffies); | ||
| 176 | } | ||
| 177 | #endif | ||
| 178 | |||
| 179 | #undef P | ||
| 180 | #undef P_ns | ||
| 181 | } | ||
| 182 | |||
| 183 | #ifdef CONFIG_GENERIC_CLOCKEVENTS | ||
| 184 | static void | ||
| 185 | print_tickdevice(struct seq_file *m, struct tick_device *td) | ||
| 186 | { | ||
| 187 | struct clock_event_device *dev = td->evtdev; | ||
| 188 | |||
| 189 | SEQ_printf(m, "\nTick Device: mode: %d\n", td->mode); | ||
| 190 | |||
| 191 | SEQ_printf(m, "Clock Event Device: "); | ||
| 192 | if (!dev) { | ||
| 193 | SEQ_printf(m, "<NULL>\n"); | ||
| 194 | return; | ||
| 195 | } | ||
| 196 | SEQ_printf(m, "%s\n", dev->name); | ||
| 197 | SEQ_printf(m, " max_delta_ns: %ld\n", dev->max_delta_ns); | ||
| 198 | SEQ_printf(m, " min_delta_ns: %ld\n", dev->min_delta_ns); | ||
| 199 | SEQ_printf(m, " mult: %ld\n", dev->mult); | ||
| 200 | SEQ_printf(m, " shift: %d\n", dev->shift); | ||
| 201 | SEQ_printf(m, " mode: %d\n", dev->mode); | ||
| 202 | SEQ_printf(m, " next_event: %Ld nsecs\n", | ||
| 203 | (unsigned long long) ktime_to_ns(dev->next_event)); | ||
| 204 | |||
| 205 | SEQ_printf(m, " set_next_event: "); | ||
| 206 | print_name_offset(m, dev->set_next_event); | ||
| 207 | SEQ_printf(m, "\n"); | ||
| 208 | |||
| 209 | SEQ_printf(m, " set_mode: "); | ||
| 210 | print_name_offset(m, dev->set_mode); | ||
| 211 | SEQ_printf(m, "\n"); | ||
| 212 | |||
| 213 | SEQ_printf(m, " event_handler: "); | ||
| 214 | print_name_offset(m, dev->event_handler); | ||
| 215 | SEQ_printf(m, "\n"); | ||
| 216 | } | ||
| 217 | |||
| 218 | static void timer_list_show_tickdevices(struct seq_file *m) | ||
| 219 | { | ||
| 220 | int cpu; | ||
| 221 | |||
| 222 | #ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST | ||
| 223 | print_tickdevice(m, tick_get_broadcast_device()); | ||
| 224 | SEQ_printf(m, "tick_broadcast_mask: %08lx\n", | ||
| 225 | tick_get_broadcast_mask()->bits[0]); | ||
| 226 | #ifdef CONFIG_TICK_ONESHOT | ||
| 227 | SEQ_printf(m, "tick_broadcast_oneshot_mask: %08lx\n", | ||
| 228 | tick_get_broadcast_oneshot_mask()->bits[0]); | ||
| 229 | #endif | ||
| 230 | SEQ_printf(m, "\n"); | ||
| 231 | #endif | ||
| 232 | for_each_online_cpu(cpu) | ||
| 233 | print_tickdevice(m, tick_get_device(cpu)); | ||
| 234 | SEQ_printf(m, "\n"); | ||
| 235 | } | ||
| 236 | #else | ||
| 237 | static void timer_list_show_tickdevices(struct seq_file *m) { } | ||
| 238 | #endif | ||
| 239 | |||
| 240 | static int timer_list_show(struct seq_file *m, void *v) | ||
| 241 | { | ||
| 242 | u64 now = ktime_to_ns(ktime_get()); | ||
| 243 | int cpu; | ||
| 244 | |||
| 245 | SEQ_printf(m, "Timer List Version: v0.3\n"); | ||
| 246 | SEQ_printf(m, "HRTIMER_MAX_CLOCK_BASES: %d\n", HRTIMER_MAX_CLOCK_BASES); | ||
| 247 | SEQ_printf(m, "now at %Ld nsecs\n", (unsigned long long)now); | ||
| 248 | |||
| 249 | for_each_online_cpu(cpu) | ||
| 250 | print_cpu(m, cpu, now); | ||
| 251 | |||
| 252 | SEQ_printf(m, "\n"); | ||
| 253 | timer_list_show_tickdevices(m); | ||
| 254 | |||
| 255 | return 0; | ||
| 256 | } | ||
| 257 | |||
| 258 | void sysrq_timer_list_show(void) | ||
| 259 | { | ||
| 260 | timer_list_show(NULL, NULL); | ||
| 261 | } | ||
| 262 | |||
| 263 | static int timer_list_open(struct inode *inode, struct file *filp) | ||
| 264 | { | ||
| 265 | return single_open(filp, timer_list_show, NULL); | ||
| 266 | } | ||
| 267 | |||
| 268 | static struct file_operations timer_list_fops = { | ||
| 269 | .open = timer_list_open, | ||
| 270 | .read = seq_read, | ||
| 271 | .llseek = seq_lseek, | ||
| 272 | .release = seq_release, | ||
| 273 | }; | ||
| 274 | |||
| 275 | static int __init init_timer_list_procfs(void) | ||
| 276 | { | ||
| 277 | struct proc_dir_entry *pe; | ||
| 278 | |||
| 279 | pe = create_proc_entry("timer_list", 0644, NULL); | ||
| 280 | if (!pe) | ||
| 281 | return -ENOMEM; | ||
| 282 | |||
| 283 | pe->proc_fops = &timer_list_fops; | ||
| 284 | |||
| 285 | return 0; | ||
| 286 | } | ||
| 287 | __initcall(init_timer_list_procfs); | ||
diff --git a/kernel/time/timer_stats.c b/kernel/time/timer_stats.c new file mode 100644 index 000000000000..1bc4882e28e0 --- /dev/null +++ b/kernel/time/timer_stats.c | |||
| @@ -0,0 +1,411 @@ | |||
| 1 | /* | ||
| 2 | * kernel/time/timer_stats.c | ||
| 3 | * | ||
| 4 | * Collect timer usage statistics. | ||
| 5 | * | ||
| 6 | * Copyright(C) 2006, Red Hat, Inc., Ingo Molnar | ||
| 7 | * Copyright(C) 2006 Timesys Corp., Thomas Gleixner <tglx@timesys.com> | ||
| 8 | * | ||
| 9 | * timer_stats is based on timer_top, a similar functionality which was part of | ||
| 10 | * Con Kolivas dyntick patch set. It was developed by Daniel Petrini at the | ||
| 11 | * Instituto Nokia de Tecnologia - INdT - Manaus. timer_top's design was based | ||
| 12 | * on dynamic allocation of the statistics entries and linear search based | ||
| 13 | * lookup combined with a global lock, rather than the static array, hash | ||
| 14 | * and per-CPU locking which is used by timer_stats. It was written for the | ||
| 15 | * pre hrtimer kernel code and therefore did not take hrtimers into account. | ||
| 16 | * Nevertheless it provided the base for the timer_stats implementation and | ||
| 17 | * was a helpful source of inspiration. Kudos to Daniel and the Nokia folks | ||
| 18 | * for this effort. | ||
| 19 | * | ||
| 20 | * timer_top.c is | ||
| 21 | * Copyright (C) 2005 Instituto Nokia de Tecnologia - INdT - Manaus | ||
| 22 | * Written by Daniel Petrini <d.pensator@gmail.com> | ||
| 23 | * timer_top.c was released under the GNU General Public License version 2 | ||
| 24 | * | ||
| 25 | * We export the addresses and counting of timer functions being called, | ||
| 26 | * the pid and cmdline from the owner process if applicable. | ||
| 27 | * | ||
| 28 | * Start/stop data collection: | ||
| 29 | * # echo 1[0] >/proc/timer_stats | ||
| 30 | * | ||
| 31 | * Display the information collected so far: | ||
| 32 | * # cat /proc/timer_stats | ||
| 33 | * | ||
| 34 | * This program is free software; you can redistribute it and/or modify | ||
| 35 | * it under the terms of the GNU General Public License version 2 as | ||
| 36 | * published by the Free Software Foundation. | ||
| 37 | */ | ||
| 38 | |||
| 39 | #include <linux/proc_fs.h> | ||
| 40 | #include <linux/module.h> | ||
| 41 | #include <linux/spinlock.h> | ||
| 42 | #include <linux/sched.h> | ||
| 43 | #include <linux/seq_file.h> | ||
| 44 | #include <linux/kallsyms.h> | ||
| 45 | |||
| 46 | #include <asm/uaccess.h> | ||
| 47 | |||
| 48 | /* | ||
| 49 | * This is our basic unit of interest: a timer expiry event identified | ||
| 50 | * by the timer, its start/expire functions and the PID of the task that | ||
| 51 | * started the timer. We count the number of times an event happens: | ||
| 52 | */ | ||
| 53 | struct entry { | ||
| 54 | /* | ||
| 55 | * Hash list: | ||
| 56 | */ | ||
| 57 | struct entry *next; | ||
| 58 | |||
| 59 | /* | ||
| 60 | * Hash keys: | ||
| 61 | */ | ||
| 62 | void *timer; | ||
| 63 | void *start_func; | ||
| 64 | void *expire_func; | ||
| 65 | pid_t pid; | ||
| 66 | |||
| 67 | /* | ||
| 68 | * Number of timeout events: | ||
| 69 | */ | ||
| 70 | unsigned long count; | ||
| 71 | |||
| 72 | /* | ||
| 73 | * We save the command-line string to preserve | ||
| 74 | * this information past task exit: | ||
| 75 | */ | ||
| 76 | char comm[TASK_COMM_LEN + 1]; | ||
| 77 | |||
| 78 | } ____cacheline_aligned_in_smp; | ||
| 79 | |||
| 80 | /* | ||
| 81 | * Spinlock protecting the tables - not taken during lookup: | ||
| 82 | */ | ||
| 83 | static DEFINE_SPINLOCK(table_lock); | ||
| 84 | |||
| 85 | /* | ||
| 86 | * Per-CPU lookup locks for fast hash lookup: | ||
| 87 | */ | ||
| 88 | static DEFINE_PER_CPU(spinlock_t, lookup_lock); | ||
| 89 | |||
| 90 | /* | ||
| 91 | * Mutex to serialize state changes with show-stats activities: | ||
| 92 | */ | ||
| 93 | static DEFINE_MUTEX(show_mutex); | ||
| 94 | |||
| 95 | /* | ||
| 96 | * Collection status, active/inactive: | ||
| 97 | */ | ||
| 98 | static int __read_mostly active; | ||
| 99 | |||
| 100 | /* | ||
| 101 | * Beginning/end timestamps of measurement: | ||
| 102 | */ | ||
| 103 | static ktime_t time_start, time_stop; | ||
| 104 | |||
| 105 | /* | ||
| 106 | * tstat entry structs only get allocated while collection is | ||
| 107 | * active and never freed during that time - this simplifies | ||
| 108 | * things quite a bit. | ||
| 109 | * | ||
| 110 | * They get freed when a new collection period is started. | ||
| 111 | */ | ||
| 112 | #define MAX_ENTRIES_BITS 10 | ||
| 113 | #define MAX_ENTRIES (1UL << MAX_ENTRIES_BITS) | ||
| 114 | |||
| 115 | static unsigned long nr_entries; | ||
| 116 | static struct entry entries[MAX_ENTRIES]; | ||
| 117 | |||
| 118 | static atomic_t overflow_count; | ||
| 119 | |||
| 120 | static void reset_entries(void) | ||
| 121 | { | ||
| 122 | nr_entries = 0; | ||
| 123 | memset(entries, 0, sizeof(entries)); | ||
| 124 | atomic_set(&overflow_count, 0); | ||
| 125 | } | ||
| 126 | |||
| 127 | static struct entry *alloc_entry(void) | ||
| 128 | { | ||
| 129 | if (nr_entries >= MAX_ENTRIES) | ||
| 130 | return NULL; | ||
| 131 | |||
| 132 | return entries + nr_entries++; | ||
| 133 | } | ||
| 134 | |||
| 135 | /* | ||
| 136 | * The entries are in a hash-table, for fast lookup: | ||
| 137 | */ | ||
| 138 | #define TSTAT_HASH_BITS (MAX_ENTRIES_BITS - 1) | ||
| 139 | #define TSTAT_HASH_SIZE (1UL << TSTAT_HASH_BITS) | ||
| 140 | #define TSTAT_HASH_MASK (TSTAT_HASH_SIZE - 1) | ||
| 141 | |||
| 142 | #define __tstat_hashfn(entry) \ | ||
| 143 | (((unsigned long)(entry)->timer ^ \ | ||
| 144 | (unsigned long)(entry)->start_func ^ \ | ||
| 145 | (unsigned long)(entry)->expire_func ^ \ | ||
| 146 | (unsigned long)(entry)->pid ) & TSTAT_HASH_MASK) | ||
| 147 | |||
| 148 | #define tstat_hashentry(entry) (tstat_hash_table + __tstat_hashfn(entry)) | ||
| 149 | |||
| 150 | static struct entry *tstat_hash_table[TSTAT_HASH_SIZE] __read_mostly; | ||
| 151 | |||
| 152 | static int match_entries(struct entry *entry1, struct entry *entry2) | ||
| 153 | { | ||
| 154 | return entry1->timer == entry2->timer && | ||
| 155 | entry1->start_func == entry2->start_func && | ||
| 156 | entry1->expire_func == entry2->expire_func && | ||
| 157 | entry1->pid == entry2->pid; | ||
| 158 | } | ||
| 159 | |||
| 160 | /* | ||
| 161 | * Look up whether an entry matching this item is present | ||
| 162 | * in the hash already. Must be called with irqs off and the | ||
| 163 | * lookup lock held: | ||
| 164 | */ | ||
| 165 | static struct entry *tstat_lookup(struct entry *entry, char *comm) | ||
| 166 | { | ||
| 167 | struct entry **head, *curr, *prev; | ||
| 168 | |||
| 169 | head = tstat_hashentry(entry); | ||
| 170 | curr = *head; | ||
| 171 | |||
| 172 | /* | ||
| 173 | * The fastpath is when the entry is already hashed, | ||
| 174 | * we do this with the lookup lock held, but with the | ||
| 175 | * table lock not held: | ||
| 176 | */ | ||
| 177 | while (curr) { | ||
| 178 | if (match_entries(curr, entry)) | ||
| 179 | return curr; | ||
| 180 | |||
| 181 | curr = curr->next; | ||
| 182 | } | ||
| 183 | /* | ||
| 184 | * Slowpath: allocate, set up and link a new hash entry: | ||
| 185 | */ | ||
| 186 | prev = NULL; | ||
| 187 | curr = *head; | ||
| 188 | |||
| 189 | spin_lock(&table_lock); | ||
| 190 | /* | ||
| 191 | * Make sure we have not raced with another CPU: | ||
| 192 | */ | ||
| 193 | while (curr) { | ||
| 194 | if (match_entries(curr, entry)) | ||
| 195 | goto out_unlock; | ||
| 196 | |||
| 197 | prev = curr; | ||
| 198 | curr = curr->next; | ||
| 199 | } | ||
| 200 | |||
| 201 | curr = alloc_entry(); | ||
| 202 | if (curr) { | ||
| 203 | *curr = *entry; | ||
| 204 | curr->count = 0; | ||
| 205 | memcpy(curr->comm, comm, TASK_COMM_LEN); | ||
| 206 | if (prev) | ||
| 207 | prev->next = curr; | ||
| 208 | else | ||
| 209 | *head = curr; | ||
| 210 | curr->next = NULL; | ||
| 211 | } | ||
| 212 | out_unlock: | ||
| 213 | spin_unlock(&table_lock); | ||
| 214 | |||
| 215 | return curr; | ||
| 216 | } | ||
| 217 | |||
| 218 | /** | ||
| 219 | * timer_stats_update_stats - Update the statistics for a timer. | ||
| 220 | * @timer: pointer to either a timer_list or a hrtimer | ||
| 221 | * @pid: the pid of the task which set up the timer | ||
| 222 | * @startf: pointer to the function which did the timer setup | ||
| 223 | * @timerf: pointer to the timer callback function of the timer | ||
| 224 | * @comm: name of the process which set up the timer | ||
| 225 | * | ||
| 226 | * When the timer is already registered, then the event counter is | ||
| 227 | * incremented. Otherwise the timer is registered in a free slot. | ||
| 228 | */ | ||
| 229 | void timer_stats_update_stats(void *timer, pid_t pid, void *startf, | ||
| 230 | void *timerf, char * comm) | ||
| 231 | { | ||
| 232 | /* | ||
| 233 | * It doesnt matter which lock we take: | ||
| 234 | */ | ||
| 235 | spinlock_t *lock = &per_cpu(lookup_lock, raw_smp_processor_id()); | ||
| 236 | struct entry *entry, input; | ||
| 237 | unsigned long flags; | ||
| 238 | |||
| 239 | input.timer = timer; | ||
| 240 | input.start_func = startf; | ||
| 241 | input.expire_func = timerf; | ||
| 242 | input.pid = pid; | ||
| 243 | |||
| 244 | spin_lock_irqsave(lock, flags); | ||
| 245 | if (!active) | ||
| 246 | goto out_unlock; | ||
| 247 | |||
| 248 | entry = tstat_lookup(&input, comm); | ||
| 249 | if (likely(entry)) | ||
| 250 | entry->count++; | ||
| 251 | else | ||
| 252 | atomic_inc(&overflow_count); | ||
| 253 | |||
| 254 | out_unlock: | ||
| 255 | spin_unlock_irqrestore(lock, flags); | ||
| 256 | } | ||
| 257 | |||
| 258 | static void print_name_offset(struct seq_file *m, unsigned long addr) | ||
| 259 | { | ||
| 260 | char namebuf[KSYM_NAME_LEN+1]; | ||
| 261 | unsigned long size, offset; | ||
| 262 | const char *sym_name; | ||
| 263 | char *modname; | ||
| 264 | |||
| 265 | sym_name = kallsyms_lookup(addr, &size, &offset, &modname, namebuf); | ||
| 266 | if (sym_name) | ||
| 267 | seq_printf(m, "%s", sym_name); | ||
| 268 | else | ||
| 269 | seq_printf(m, "<%p>", (void *)addr); | ||
| 270 | } | ||
| 271 | |||
| 272 | static int tstats_show(struct seq_file *m, void *v) | ||
| 273 | { | ||
| 274 | struct timespec period; | ||
| 275 | struct entry *entry; | ||
| 276 | unsigned long ms; | ||
| 277 | long events = 0; | ||
| 278 | ktime_t time; | ||
| 279 | int i; | ||
| 280 | |||
| 281 | mutex_lock(&show_mutex); | ||
| 282 | /* | ||
| 283 | * If still active then calculate up to now: | ||
| 284 | */ | ||
| 285 | if (active) | ||
| 286 | time_stop = ktime_get(); | ||
| 287 | |||
| 288 | time = ktime_sub(time_stop, time_start); | ||
| 289 | |||
| 290 | period = ktime_to_timespec(time); | ||
| 291 | ms = period.tv_nsec / 1000000; | ||
| 292 | |||
| 293 | seq_puts(m, "Timer Stats Version: v0.1\n"); | ||
| 294 | seq_printf(m, "Sample period: %ld.%03ld s\n", period.tv_sec, ms); | ||
| 295 | if (atomic_read(&overflow_count)) | ||
| 296 | seq_printf(m, "Overflow: %d entries\n", | ||
| 297 | atomic_read(&overflow_count)); | ||
| 298 | |||
| 299 | for (i = 0; i < nr_entries; i++) { | ||
| 300 | entry = entries + i; | ||
| 301 | seq_printf(m, "%4lu, %5d %-16s ", | ||
| 302 | entry->count, entry->pid, entry->comm); | ||
| 303 | |||
| 304 | print_name_offset(m, (unsigned long)entry->start_func); | ||
| 305 | seq_puts(m, " ("); | ||
| 306 | print_name_offset(m, (unsigned long)entry->expire_func); | ||
| 307 | seq_puts(m, ")\n"); | ||
| 308 | |||
| 309 | events += entry->count; | ||
| 310 | } | ||
| 311 | |||
| 312 | ms += period.tv_sec * 1000; | ||
| 313 | if (!ms) | ||
| 314 | ms = 1; | ||
| 315 | |||
| 316 | if (events && period.tv_sec) | ||
| 317 | seq_printf(m, "%ld total events, %ld.%ld events/sec\n", events, | ||
| 318 | events / period.tv_sec, events * 1000 / ms); | ||
| 319 | else | ||
| 320 | seq_printf(m, "%ld total events\n", events); | ||
| 321 | |||
| 322 | mutex_unlock(&show_mutex); | ||
| 323 | |||
| 324 | return 0; | ||
| 325 | } | ||
| 326 | |||
| 327 | /* | ||
| 328 | * After a state change, make sure all concurrent lookup/update | ||
| 329 | * activities have stopped: | ||
| 330 | */ | ||
| 331 | static void sync_access(void) | ||
| 332 | { | ||
| 333 | unsigned long flags; | ||
| 334 | int cpu; | ||
| 335 | |||
| 336 | for_each_online_cpu(cpu) { | ||
| 337 | spin_lock_irqsave(&per_cpu(lookup_lock, cpu), flags); | ||
| 338 | /* nothing */ | ||
| 339 | spin_unlock_irqrestore(&per_cpu(lookup_lock, cpu), flags); | ||
| 340 | } | ||
| 341 | } | ||
| 342 | |||
| 343 | static ssize_t tstats_write(struct file *file, const char __user *buf, | ||
| 344 | size_t count, loff_t *offs) | ||
| 345 | { | ||
| 346 | char ctl[2]; | ||
| 347 | |||
| 348 | if (count != 2 || *offs) | ||
| 349 | return -EINVAL; | ||
| 350 | |||
| 351 | if (copy_from_user(ctl, buf, count)) | ||
| 352 | return -EFAULT; | ||
| 353 | |||
| 354 | mutex_lock(&show_mutex); | ||
| 355 | switch (ctl[0]) { | ||
| 356 | case '0': | ||
| 357 | if (active) { | ||
| 358 | active = 0; | ||
| 359 | time_stop = ktime_get(); | ||
| 360 | sync_access(); | ||
| 361 | } | ||
| 362 | break; | ||
| 363 | case '1': | ||
| 364 | if (!active) { | ||
| 365 | reset_entries(); | ||
| 366 | time_start = ktime_get(); | ||
| 367 | active = 1; | ||
| 368 | } | ||
| 369 | break; | ||
| 370 | default: | ||
| 371 | count = -EINVAL; | ||
| 372 | } | ||
| 373 | mutex_unlock(&show_mutex); | ||
| 374 | |||
| 375 | return count; | ||
| 376 | } | ||
| 377 | |||
| 378 | static int tstats_open(struct inode *inode, struct file *filp) | ||
| 379 | { | ||
| 380 | return single_open(filp, tstats_show, NULL); | ||
| 381 | } | ||
| 382 | |||
| 383 | static struct file_operations tstats_fops = { | ||
| 384 | .open = tstats_open, | ||
| 385 | .read = seq_read, | ||
| 386 | .write = tstats_write, | ||
| 387 | .llseek = seq_lseek, | ||
| 388 | .release = seq_release, | ||
| 389 | }; | ||
| 390 | |||
| 391 | void __init init_timer_stats(void) | ||
| 392 | { | ||
| 393 | int cpu; | ||
| 394 | |||
| 395 | for_each_possible_cpu(cpu) | ||
| 396 | spin_lock_init(&per_cpu(lookup_lock, cpu)); | ||
| 397 | } | ||
| 398 | |||
| 399 | static int __init init_tstats_procfs(void) | ||
| 400 | { | ||
| 401 | struct proc_dir_entry *pe; | ||
| 402 | |||
| 403 | pe = create_proc_entry("timer_stats", 0644, NULL); | ||
| 404 | if (!pe) | ||
| 405 | return -ENOMEM; | ||
| 406 | |||
| 407 | pe->proc_fops = &tstats_fops; | ||
| 408 | |||
| 409 | return 0; | ||
| 410 | } | ||
| 411 | __initcall(init_tstats_procfs); | ||
diff --git a/kernel/timer.c b/kernel/timer.c index 8533c3796082..cb1b86a9c52f 100644 --- a/kernel/timer.c +++ b/kernel/timer.c | |||
| @@ -34,6 +34,8 @@ | |||
| 34 | #include <linux/cpu.h> | 34 | #include <linux/cpu.h> |
| 35 | #include <linux/syscalls.h> | 35 | #include <linux/syscalls.h> |
| 36 | #include <linux/delay.h> | 36 | #include <linux/delay.h> |
| 37 | #include <linux/tick.h> | ||
| 38 | #include <linux/kallsyms.h> | ||
| 37 | 39 | ||
| 38 | #include <asm/uaccess.h> | 40 | #include <asm/uaccess.h> |
| 39 | #include <asm/unistd.h> | 41 | #include <asm/unistd.h> |
| @@ -262,6 +264,18 @@ static void internal_add_timer(tvec_base_t *base, struct timer_list *timer) | |||
| 262 | list_add_tail(&timer->entry, vec); | 264 | list_add_tail(&timer->entry, vec); |
| 263 | } | 265 | } |
| 264 | 266 | ||
| 267 | #ifdef CONFIG_TIMER_STATS | ||
| 268 | void __timer_stats_timer_set_start_info(struct timer_list *timer, void *addr) | ||
| 269 | { | ||
| 270 | if (timer->start_site) | ||
| 271 | return; | ||
| 272 | |||
| 273 | timer->start_site = addr; | ||
| 274 | memcpy(timer->start_comm, current->comm, TASK_COMM_LEN); | ||
| 275 | timer->start_pid = current->pid; | ||
| 276 | } | ||
| 277 | #endif | ||
| 278 | |||
| 265 | /** | 279 | /** |
| 266 | * init_timer - initialize a timer. | 280 | * init_timer - initialize a timer. |
| 267 | * @timer: the timer to be initialized | 281 | * @timer: the timer to be initialized |
| @@ -273,11 +287,16 @@ void fastcall init_timer(struct timer_list *timer) | |||
| 273 | { | 287 | { |
| 274 | timer->entry.next = NULL; | 288 | timer->entry.next = NULL; |
| 275 | timer->base = __raw_get_cpu_var(tvec_bases); | 289 | timer->base = __raw_get_cpu_var(tvec_bases); |
| 290 | #ifdef CONFIG_TIMER_STATS | ||
| 291 | timer->start_site = NULL; | ||
| 292 | timer->start_pid = -1; | ||
| 293 | memset(timer->start_comm, 0, TASK_COMM_LEN); | ||
| 294 | #endif | ||
| 276 | } | 295 | } |
| 277 | EXPORT_SYMBOL(init_timer); | 296 | EXPORT_SYMBOL(init_timer); |
| 278 | 297 | ||
| 279 | static inline void detach_timer(struct timer_list *timer, | 298 | static inline void detach_timer(struct timer_list *timer, |
| 280 | int clear_pending) | 299 | int clear_pending) |
| 281 | { | 300 | { |
| 282 | struct list_head *entry = &timer->entry; | 301 | struct list_head *entry = &timer->entry; |
| 283 | 302 | ||
| @@ -324,6 +343,7 @@ int __mod_timer(struct timer_list *timer, unsigned long expires) | |||
| 324 | unsigned long flags; | 343 | unsigned long flags; |
| 325 | int ret = 0; | 344 | int ret = 0; |
| 326 | 345 | ||
| 346 | timer_stats_timer_set_start_info(timer); | ||
| 327 | BUG_ON(!timer->function); | 347 | BUG_ON(!timer->function); |
| 328 | 348 | ||
| 329 | base = lock_timer_base(timer, &flags); | 349 | base = lock_timer_base(timer, &flags); |
| @@ -374,6 +394,7 @@ void add_timer_on(struct timer_list *timer, int cpu) | |||
| 374 | tvec_base_t *base = per_cpu(tvec_bases, cpu); | 394 | tvec_base_t *base = per_cpu(tvec_bases, cpu); |
| 375 | unsigned long flags; | 395 | unsigned long flags; |
| 376 | 396 | ||
| 397 | timer_stats_timer_set_start_info(timer); | ||
| 377 | BUG_ON(timer_pending(timer) || !timer->function); | 398 | BUG_ON(timer_pending(timer) || !timer->function); |
| 378 | spin_lock_irqsave(&base->lock, flags); | 399 | spin_lock_irqsave(&base->lock, flags); |
| 379 | timer->base = base; | 400 | timer->base = base; |
| @@ -406,6 +427,7 @@ int mod_timer(struct timer_list *timer, unsigned long expires) | |||
| 406 | { | 427 | { |
| 407 | BUG_ON(!timer->function); | 428 | BUG_ON(!timer->function); |
| 408 | 429 | ||
| 430 | timer_stats_timer_set_start_info(timer); | ||
| 409 | /* | 431 | /* |
| 410 | * This is a common optimization triggered by the | 432 | * This is a common optimization triggered by the |
| 411 | * networking code - if the timer is re-modified | 433 | * networking code - if the timer is re-modified |
| @@ -436,6 +458,7 @@ int del_timer(struct timer_list *timer) | |||
| 436 | unsigned long flags; | 458 | unsigned long flags; |
| 437 | int ret = 0; | 459 | int ret = 0; |
| 438 | 460 | ||
| 461 | timer_stats_timer_clear_start_info(timer); | ||
| 439 | if (timer_pending(timer)) { | 462 | if (timer_pending(timer)) { |
| 440 | base = lock_timer_base(timer, &flags); | 463 | base = lock_timer_base(timer, &flags); |
| 441 | if (timer_pending(timer)) { | 464 | if (timer_pending(timer)) { |
| @@ -569,6 +592,8 @@ static inline void __run_timers(tvec_base_t *base) | |||
| 569 | fn = timer->function; | 592 | fn = timer->function; |
| 570 | data = timer->data; | 593 | data = timer->data; |
| 571 | 594 | ||
| 595 | timer_stats_account_timer(timer); | ||
| 596 | |||
| 572 | set_running_timer(base, timer); | 597 | set_running_timer(base, timer); |
| 573 | detach_timer(timer, 1); | 598 | detach_timer(timer, 1); |
| 574 | spin_unlock_irq(&base->lock); | 599 | spin_unlock_irq(&base->lock); |
| @@ -591,105 +616,124 @@ static inline void __run_timers(tvec_base_t *base) | |||
| 591 | spin_unlock_irq(&base->lock); | 616 | spin_unlock_irq(&base->lock); |
| 592 | } | 617 | } |
| 593 | 618 | ||
| 594 | #ifdef CONFIG_NO_IDLE_HZ | 619 | #if defined(CONFIG_NO_IDLE_HZ) || defined(CONFIG_NO_HZ) |
| 595 | /* | 620 | /* |
| 596 | * Find out when the next timer event is due to happen. This | 621 | * Find out when the next timer event is due to happen. This |
| 597 | * is used on S/390 to stop all activity when a cpus is idle. | 622 | * is used on S/390 to stop all activity when a cpus is idle. |
| 598 | * This functions needs to be called disabled. | 623 | * This functions needs to be called disabled. |
| 599 | */ | 624 | */ |
| 600 | unsigned long next_timer_interrupt(void) | 625 | static unsigned long __next_timer_interrupt(tvec_base_t *base) |
| 601 | { | 626 | { |
| 602 | tvec_base_t *base; | 627 | unsigned long timer_jiffies = base->timer_jiffies; |
| 603 | struct list_head *list; | 628 | unsigned long expires = timer_jiffies + (LONG_MAX >> 1); |
| 629 | int index, slot, array, found = 0; | ||
| 604 | struct timer_list *nte; | 630 | struct timer_list *nte; |
| 605 | unsigned long expires; | ||
| 606 | unsigned long hr_expires = MAX_JIFFY_OFFSET; | ||
| 607 | ktime_t hr_delta; | ||
| 608 | tvec_t *varray[4]; | 631 | tvec_t *varray[4]; |
| 609 | int i, j; | ||
| 610 | |||
| 611 | hr_delta = hrtimer_get_next_event(); | ||
| 612 | if (hr_delta.tv64 != KTIME_MAX) { | ||
| 613 | struct timespec tsdelta; | ||
| 614 | tsdelta = ktime_to_timespec(hr_delta); | ||
| 615 | hr_expires = timespec_to_jiffies(&tsdelta); | ||
| 616 | if (hr_expires < 3) | ||
| 617 | return hr_expires + jiffies; | ||
| 618 | } | ||
| 619 | hr_expires += jiffies; | ||
| 620 | |||
| 621 | base = __get_cpu_var(tvec_bases); | ||
| 622 | spin_lock(&base->lock); | ||
| 623 | expires = base->timer_jiffies + (LONG_MAX >> 1); | ||
| 624 | list = NULL; | ||
| 625 | 632 | ||
| 626 | /* Look for timer events in tv1. */ | 633 | /* Look for timer events in tv1. */ |
| 627 | j = base->timer_jiffies & TVR_MASK; | 634 | index = slot = timer_jiffies & TVR_MASK; |
| 628 | do { | 635 | do { |
| 629 | list_for_each_entry(nte, base->tv1.vec + j, entry) { | 636 | list_for_each_entry(nte, base->tv1.vec + slot, entry) { |
| 637 | found = 1; | ||
| 630 | expires = nte->expires; | 638 | expires = nte->expires; |
| 631 | if (j < (base->timer_jiffies & TVR_MASK)) | 639 | /* Look at the cascade bucket(s)? */ |
| 632 | list = base->tv2.vec + (INDEX(0)); | 640 | if (!index || slot < index) |
| 633 | goto found; | 641 | goto cascade; |
| 642 | return expires; | ||
| 634 | } | 643 | } |
| 635 | j = (j + 1) & TVR_MASK; | 644 | slot = (slot + 1) & TVR_MASK; |
| 636 | } while (j != (base->timer_jiffies & TVR_MASK)); | 645 | } while (slot != index); |
| 646 | |||
| 647 | cascade: | ||
| 648 | /* Calculate the next cascade event */ | ||
| 649 | if (index) | ||
| 650 | timer_jiffies += TVR_SIZE - index; | ||
| 651 | timer_jiffies >>= TVR_BITS; | ||
| 637 | 652 | ||
| 638 | /* Check tv2-tv5. */ | 653 | /* Check tv2-tv5. */ |
| 639 | varray[0] = &base->tv2; | 654 | varray[0] = &base->tv2; |
| 640 | varray[1] = &base->tv3; | 655 | varray[1] = &base->tv3; |
| 641 | varray[2] = &base->tv4; | 656 | varray[2] = &base->tv4; |
| 642 | varray[3] = &base->tv5; | 657 | varray[3] = &base->tv5; |
| 643 | for (i = 0; i < 4; i++) { | 658 | |
| 644 | j = INDEX(i); | 659 | for (array = 0; array < 4; array++) { |
| 660 | tvec_t *varp = varray[array]; | ||
| 661 | |||
| 662 | index = slot = timer_jiffies & TVN_MASK; | ||
| 645 | do { | 663 | do { |
| 646 | if (list_empty(varray[i]->vec + j)) { | 664 | list_for_each_entry(nte, varp->vec + slot, entry) { |
| 647 | j = (j + 1) & TVN_MASK; | 665 | found = 1; |
| 648 | continue; | ||
| 649 | } | ||
| 650 | list_for_each_entry(nte, varray[i]->vec + j, entry) | ||
| 651 | if (time_before(nte->expires, expires)) | 666 | if (time_before(nte->expires, expires)) |
| 652 | expires = nte->expires; | 667 | expires = nte->expires; |
| 653 | if (j < (INDEX(i)) && i < 3) | 668 | } |
| 654 | list = varray[i + 1]->vec + (INDEX(i + 1)); | 669 | /* |
| 655 | goto found; | 670 | * Do we still search for the first timer or are |
| 656 | } while (j != (INDEX(i))); | 671 | * we looking up the cascade buckets ? |
| 657 | } | 672 | */ |
| 658 | found: | 673 | if (found) { |
| 659 | if (list) { | 674 | /* Look at the cascade bucket(s)? */ |
| 660 | /* | 675 | if (!index || slot < index) |
| 661 | * The search wrapped. We need to look at the next list | 676 | break; |
| 662 | * from next tv element that would cascade into tv element | 677 | return expires; |
| 663 | * where we found the timer element. | 678 | } |
| 664 | */ | 679 | slot = (slot + 1) & TVN_MASK; |
| 665 | list_for_each_entry(nte, list, entry) { | 680 | } while (slot != index); |
| 666 | if (time_before(nte->expires, expires)) | 681 | |
| 667 | expires = nte->expires; | 682 | if (index) |
| 668 | } | 683 | timer_jiffies += TVN_SIZE - index; |
| 684 | timer_jiffies >>= TVN_BITS; | ||
| 669 | } | 685 | } |
| 670 | spin_unlock(&base->lock); | 686 | return expires; |
| 687 | } | ||
| 671 | 688 | ||
| 672 | /* | 689 | /* |
| 673 | * It can happen that other CPUs service timer IRQs and increment | 690 | * Check, if the next hrtimer event is before the next timer wheel |
| 674 | * jiffies, but we have not yet got a local timer tick to process | 691 | * event: |
| 675 | * the timer wheels. In that case, the expiry time can be before | 692 | */ |
| 676 | * jiffies, but since the high-resolution timer here is relative to | 693 | static unsigned long cmp_next_hrtimer_event(unsigned long now, |
| 677 | * jiffies, the default expression when high-resolution timers are | 694 | unsigned long expires) |
| 678 | * not active, | 695 | { |
| 679 | * | 696 | ktime_t hr_delta = hrtimer_get_next_event(); |
| 680 | * time_before(MAX_JIFFY_OFFSET + jiffies, expires) | 697 | struct timespec tsdelta; |
| 681 | * | 698 | |
| 682 | * would falsely evaluate to true. If that is the case, just | 699 | if (hr_delta.tv64 == KTIME_MAX) |
| 683 | * return jiffies so that we can immediately fire the local timer | 700 | return expires; |
| 684 | */ | ||
| 685 | if (time_before(expires, jiffies)) | ||
| 686 | return jiffies; | ||
| 687 | 701 | ||
| 688 | if (time_before(hr_expires, expires)) | 702 | if (hr_delta.tv64 <= TICK_NSEC) |
| 689 | return hr_expires; | 703 | return now; |
| 690 | 704 | ||
| 705 | tsdelta = ktime_to_timespec(hr_delta); | ||
| 706 | now += timespec_to_jiffies(&tsdelta); | ||
| 707 | if (time_before(now, expires)) | ||
| 708 | return now; | ||
| 691 | return expires; | 709 | return expires; |
| 692 | } | 710 | } |
| 711 | |||
| 712 | /** | ||
| 713 | * next_timer_interrupt - return the jiffy of the next pending timer | ||
| 714 | */ | ||
| 715 | unsigned long get_next_timer_interrupt(unsigned long now) | ||
| 716 | { | ||
| 717 | tvec_base_t *base = __get_cpu_var(tvec_bases); | ||
| 718 | unsigned long expires; | ||
| 719 | |||
| 720 | spin_lock(&base->lock); | ||
| 721 | expires = __next_timer_interrupt(base); | ||
| 722 | spin_unlock(&base->lock); | ||
| 723 | |||
| 724 | if (time_before_eq(expires, now)) | ||
| 725 | return now; | ||
| 726 | |||
| 727 | return cmp_next_hrtimer_event(now, expires); | ||
| 728 | } | ||
| 729 | |||
| 730 | #ifdef CONFIG_NO_IDLE_HZ | ||
| 731 | unsigned long next_timer_interrupt(void) | ||
| 732 | { | ||
| 733 | return get_next_timer_interrupt(jiffies); | ||
| 734 | } | ||
| 735 | #endif | ||
| 736 | |||
| 693 | #endif | 737 | #endif |
| 694 | 738 | ||
| 695 | /******************************************************************/ | 739 | /******************************************************************/ |
| @@ -832,32 +876,35 @@ EXPORT_SYMBOL(do_settimeofday); | |||
| 832 | * | 876 | * |
| 833 | * Accumulates current time interval and initializes new clocksource | 877 | * Accumulates current time interval and initializes new clocksource |
| 834 | */ | 878 | */ |
| 835 | static int change_clocksource(void) | 879 | static void change_clocksource(void) |
| 836 | { | 880 | { |
| 837 | struct clocksource *new; | 881 | struct clocksource *new; |
| 838 | cycle_t now; | 882 | cycle_t now; |
| 839 | u64 nsec; | 883 | u64 nsec; |
| 884 | |||
| 840 | new = clocksource_get_next(); | 885 | new = clocksource_get_next(); |
| 841 | if (clock != new) { | 886 | |
| 842 | now = clocksource_read(new); | 887 | if (clock == new) |
| 843 | nsec = __get_nsec_offset(); | 888 | return; |
| 844 | timespec_add_ns(&xtime, nsec); | 889 | |
| 845 | 890 | now = clocksource_read(new); | |
| 846 | clock = new; | 891 | nsec = __get_nsec_offset(); |
| 847 | clock->cycle_last = now; | 892 | timespec_add_ns(&xtime, nsec); |
| 848 | printk(KERN_INFO "Time: %s clocksource has been installed.\n", | 893 | |
| 849 | clock->name); | 894 | clock = new; |
| 850 | return 1; | 895 | clock->cycle_last = now; |
| 851 | } else if (clock->update_callback) { | 896 | |
| 852 | return clock->update_callback(); | 897 | clock->error = 0; |
| 853 | } | 898 | clock->xtime_nsec = 0; |
| 854 | return 0; | 899 | clocksource_calculate_interval(clock, NTP_INTERVAL_LENGTH); |
| 900 | |||
| 901 | tick_clock_notify(); | ||
| 902 | |||
| 903 | printk(KERN_INFO "Time: %s clocksource has been installed.\n", | ||
| 904 | clock->name); | ||
| 855 | } | 905 | } |
| 856 | #else | 906 | #else |
| 857 | static inline int change_clocksource(void) | 907 | static inline void change_clocksource(void) { } |
| 858 | { | ||
| 859 | return 0; | ||
| 860 | } | ||
| 861 | #endif | 908 | #endif |
| 862 | 909 | ||
| 863 | /** | 910 | /** |
| @@ -871,33 +918,56 @@ int timekeeping_is_continuous(void) | |||
| 871 | do { | 918 | do { |
| 872 | seq = read_seqbegin(&xtime_lock); | 919 | seq = read_seqbegin(&xtime_lock); |
| 873 | 920 | ||
| 874 | ret = clock->is_continuous; | 921 | ret = clock->flags & CLOCK_SOURCE_VALID_FOR_HRES; |
| 875 | 922 | ||
| 876 | } while (read_seqretry(&xtime_lock, seq)); | 923 | } while (read_seqretry(&xtime_lock, seq)); |
| 877 | 924 | ||
| 878 | return ret; | 925 | return ret; |
| 879 | } | 926 | } |
| 880 | 927 | ||
| 928 | /** | ||
| 929 | * read_persistent_clock - Return time in seconds from the persistent clock. | ||
| 930 | * | ||
| 931 | * Weak dummy function for arches that do not yet support it. | ||
| 932 | * Returns seconds from epoch using the battery backed persistent clock. | ||
| 933 | * Returns zero if unsupported. | ||
| 934 | * | ||
| 935 | * XXX - Do be sure to remove it once all arches implement it. | ||
| 936 | */ | ||
| 937 | unsigned long __attribute__((weak)) read_persistent_clock(void) | ||
| 938 | { | ||
| 939 | return 0; | ||
| 940 | } | ||
| 941 | |||
| 881 | /* | 942 | /* |
| 882 | * timekeeping_init - Initializes the clocksource and common timekeeping values | 943 | * timekeeping_init - Initializes the clocksource and common timekeeping values |
| 883 | */ | 944 | */ |
| 884 | void __init timekeeping_init(void) | 945 | void __init timekeeping_init(void) |
| 885 | { | 946 | { |
| 886 | unsigned long flags; | 947 | unsigned long flags; |
| 948 | unsigned long sec = read_persistent_clock(); | ||
| 887 | 949 | ||
| 888 | write_seqlock_irqsave(&xtime_lock, flags); | 950 | write_seqlock_irqsave(&xtime_lock, flags); |
| 889 | 951 | ||
| 890 | ntp_clear(); | 952 | ntp_clear(); |
| 891 | 953 | ||
| 892 | clock = clocksource_get_next(); | 954 | clock = clocksource_get_next(); |
| 893 | clocksource_calculate_interval(clock, tick_nsec); | 955 | clocksource_calculate_interval(clock, NTP_INTERVAL_LENGTH); |
| 894 | clock->cycle_last = clocksource_read(clock); | 956 | clock->cycle_last = clocksource_read(clock); |
| 895 | 957 | ||
| 958 | xtime.tv_sec = sec; | ||
| 959 | xtime.tv_nsec = 0; | ||
| 960 | set_normalized_timespec(&wall_to_monotonic, | ||
| 961 | -xtime.tv_sec, -xtime.tv_nsec); | ||
| 962 | |||
| 896 | write_sequnlock_irqrestore(&xtime_lock, flags); | 963 | write_sequnlock_irqrestore(&xtime_lock, flags); |
| 897 | } | 964 | } |
| 898 | 965 | ||
| 899 | 966 | /* flag for if timekeeping is suspended */ | |
| 900 | static int timekeeping_suspended; | 967 | static int timekeeping_suspended; |
| 968 | /* time in seconds when suspend began */ | ||
| 969 | static unsigned long timekeeping_suspend_time; | ||
| 970 | |||
| 901 | /** | 971 | /** |
| 902 | * timekeeping_resume - Resumes the generic timekeeping subsystem. | 972 | * timekeeping_resume - Resumes the generic timekeeping subsystem. |
| 903 | * @dev: unused | 973 | * @dev: unused |
| @@ -909,13 +979,26 @@ static int timekeeping_suspended; | |||
| 909 | static int timekeeping_resume(struct sys_device *dev) | 979 | static int timekeeping_resume(struct sys_device *dev) |
| 910 | { | 980 | { |
| 911 | unsigned long flags; | 981 | unsigned long flags; |
| 982 | unsigned long now = read_persistent_clock(); | ||
| 912 | 983 | ||
| 913 | write_seqlock_irqsave(&xtime_lock, flags); | 984 | write_seqlock_irqsave(&xtime_lock, flags); |
| 914 | /* restart the last cycle value */ | 985 | |
| 986 | if (now && (now > timekeeping_suspend_time)) { | ||
| 987 | unsigned long sleep_length = now - timekeeping_suspend_time; | ||
| 988 | |||
| 989 | xtime.tv_sec += sleep_length; | ||
| 990 | wall_to_monotonic.tv_sec -= sleep_length; | ||
| 991 | } | ||
| 992 | /* re-base the last cycle value */ | ||
| 915 | clock->cycle_last = clocksource_read(clock); | 993 | clock->cycle_last = clocksource_read(clock); |
| 916 | clock->error = 0; | 994 | clock->error = 0; |
| 917 | timekeeping_suspended = 0; | 995 | timekeeping_suspended = 0; |
| 918 | write_sequnlock_irqrestore(&xtime_lock, flags); | 996 | write_sequnlock_irqrestore(&xtime_lock, flags); |
| 997 | |||
| 998 | touch_softlockup_watchdog(); | ||
| 999 | /* Resume hrtimers */ | ||
| 1000 | clock_was_set(); | ||
| 1001 | |||
| 919 | return 0; | 1002 | return 0; |
| 920 | } | 1003 | } |
| 921 | 1004 | ||
| @@ -925,6 +1008,7 @@ static int timekeeping_suspend(struct sys_device *dev, pm_message_t state) | |||
| 925 | 1008 | ||
| 926 | write_seqlock_irqsave(&xtime_lock, flags); | 1009 | write_seqlock_irqsave(&xtime_lock, flags); |
| 927 | timekeeping_suspended = 1; | 1010 | timekeeping_suspended = 1; |
| 1011 | timekeeping_suspend_time = read_persistent_clock(); | ||
| 928 | write_sequnlock_irqrestore(&xtime_lock, flags); | 1012 | write_sequnlock_irqrestore(&xtime_lock, flags); |
| 929 | return 0; | 1013 | return 0; |
| 930 | } | 1014 | } |
| @@ -1089,11 +1173,8 @@ static void update_wall_time(void) | |||
| 1089 | clock->xtime_nsec -= (s64)xtime.tv_nsec << clock->shift; | 1173 | clock->xtime_nsec -= (s64)xtime.tv_nsec << clock->shift; |
| 1090 | 1174 | ||
| 1091 | /* check to see if there is a new clocksource to use */ | 1175 | /* check to see if there is a new clocksource to use */ |
| 1092 | if (change_clocksource()) { | 1176 | change_clocksource(); |
| 1093 | clock->error = 0; | 1177 | update_vsyscall(&xtime, clock); |
| 1094 | clock->xtime_nsec = 0; | ||
| 1095 | clocksource_calculate_interval(clock, tick_nsec); | ||
| 1096 | } | ||
| 1097 | } | 1178 | } |
| 1098 | 1179 | ||
| 1099 | /* | 1180 | /* |
| @@ -1162,11 +1243,9 @@ static inline void calc_load(unsigned long ticks) | |||
| 1162 | * This read-write spinlock protects us from races in SMP while | 1243 | * This read-write spinlock protects us from races in SMP while |
| 1163 | * playing with xtime and avenrun. | 1244 | * playing with xtime and avenrun. |
| 1164 | */ | 1245 | */ |
| 1165 | #ifndef ARCH_HAVE_XTIME_LOCK | 1246 | __attribute__((weak)) __cacheline_aligned_in_smp DEFINE_SEQLOCK(xtime_lock); |
| 1166 | __cacheline_aligned_in_smp DEFINE_SEQLOCK(xtime_lock); | ||
| 1167 | 1247 | ||
| 1168 | EXPORT_SYMBOL(xtime_lock); | 1248 | EXPORT_SYMBOL(xtime_lock); |
| 1169 | #endif | ||
| 1170 | 1249 | ||
| 1171 | /* | 1250 | /* |
| 1172 | * This function runs timers and the timer-tq in bottom half context. | 1251 | * This function runs timers and the timer-tq in bottom half context. |
| @@ -1175,7 +1254,8 @@ static void run_timer_softirq(struct softirq_action *h) | |||
| 1175 | { | 1254 | { |
| 1176 | tvec_base_t *base = __get_cpu_var(tvec_bases); | 1255 | tvec_base_t *base = __get_cpu_var(tvec_bases); |
| 1177 | 1256 | ||
| 1178 | hrtimer_run_queues(); | 1257 | hrtimer_run_queues(); |
| 1258 | |||
| 1179 | if (time_after_eq(jiffies, base->timer_jiffies)) | 1259 | if (time_after_eq(jiffies, base->timer_jiffies)) |
| 1180 | __run_timers(base); | 1260 | __run_timers(base); |
| 1181 | } | 1261 | } |
| @@ -1621,6 +1701,8 @@ void __init init_timers(void) | |||
| 1621 | int err = timer_cpu_notify(&timers_nb, (unsigned long)CPU_UP_PREPARE, | 1701 | int err = timer_cpu_notify(&timers_nb, (unsigned long)CPU_UP_PREPARE, |
| 1622 | (void *)(long)smp_processor_id()); | 1702 | (void *)(long)smp_processor_id()); |
| 1623 | 1703 | ||
| 1704 | init_timer_stats(); | ||
| 1705 | |||
| 1624 | BUG_ON(err == NOTIFY_BAD); | 1706 | BUG_ON(err == NOTIFY_BAD); |
| 1625 | register_cpu_notifier(&timers_nb); | 1707 | register_cpu_notifier(&timers_nb); |
| 1626 | open_softirq(TIMER_SOFTIRQ, run_timer_softirq, NULL); | 1708 | open_softirq(TIMER_SOFTIRQ, run_timer_softirq, NULL); |
diff --git a/kernel/tsacct.c b/kernel/tsacct.c index baacc3691415..658f638c402c 100644 --- a/kernel/tsacct.c +++ b/kernel/tsacct.c | |||
| @@ -22,8 +22,6 @@ | |||
| 22 | #include <linux/acct.h> | 22 | #include <linux/acct.h> |
| 23 | #include <linux/jiffies.h> | 23 | #include <linux/jiffies.h> |
| 24 | 24 | ||
| 25 | |||
| 26 | #define USEC_PER_TICK (USEC_PER_SEC/HZ) | ||
| 27 | /* | 25 | /* |
| 28 | * fill in basic accounting fields | 26 | * fill in basic accounting fields |
| 29 | */ | 27 | */ |
diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 020d1fff57dc..b6fa5e63085d 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c | |||
| @@ -218,7 +218,7 @@ int fastcall queue_work(struct workqueue_struct *wq, struct work_struct *work) | |||
| 218 | } | 218 | } |
| 219 | EXPORT_SYMBOL_GPL(queue_work); | 219 | EXPORT_SYMBOL_GPL(queue_work); |
| 220 | 220 | ||
| 221 | static void delayed_work_timer_fn(unsigned long __data) | 221 | void delayed_work_timer_fn(unsigned long __data) |
| 222 | { | 222 | { |
| 223 | struct delayed_work *dwork = (struct delayed_work *)__data; | 223 | struct delayed_work *dwork = (struct delayed_work *)__data; |
| 224 | struct workqueue_struct *wq = get_wq_data(&dwork->work); | 224 | struct workqueue_struct *wq = get_wq_data(&dwork->work); |
| @@ -245,6 +245,7 @@ int fastcall queue_delayed_work(struct workqueue_struct *wq, | |||
| 245 | struct timer_list *timer = &dwork->timer; | 245 | struct timer_list *timer = &dwork->timer; |
| 246 | struct work_struct *work = &dwork->work; | 246 | struct work_struct *work = &dwork->work; |
| 247 | 247 | ||
| 248 | timer_stats_timer_set_start_info(timer); | ||
| 248 | if (delay == 0) | 249 | if (delay == 0) |
| 249 | return queue_work(wq, work); | 250 | return queue_work(wq, work); |
| 250 | 251 | ||
| @@ -593,8 +594,10 @@ EXPORT_SYMBOL(schedule_work); | |||
| 593 | * After waiting for a given time this puts a job in the kernel-global | 594 | * After waiting for a given time this puts a job in the kernel-global |
| 594 | * workqueue. | 595 | * workqueue. |
| 595 | */ | 596 | */ |
| 596 | int fastcall schedule_delayed_work(struct delayed_work *dwork, unsigned long delay) | 597 | int fastcall schedule_delayed_work(struct delayed_work *dwork, |
| 598 | unsigned long delay) | ||
| 597 | { | 599 | { |
| 600 | timer_stats_timer_set_start_info(&dwork->timer); | ||
| 598 | return queue_delayed_work(keventd_wq, dwork, delay); | 601 | return queue_delayed_work(keventd_wq, dwork, delay); |
| 599 | } | 602 | } |
| 600 | EXPORT_SYMBOL(schedule_delayed_work); | 603 | EXPORT_SYMBOL(schedule_delayed_work); |
