diff options
Diffstat (limited to 'include/linux/sched.h')
-rw-r--r-- | include/linux/sched.h | 251 |
1 files changed, 186 insertions, 65 deletions
diff --git a/include/linux/sched.h b/include/linux/sched.h index 4d0754269884..75e6e60bf583 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h | |||
@@ -38,6 +38,8 @@ | |||
38 | #define SCHED_BATCH 3 | 38 | #define SCHED_BATCH 3 |
39 | /* SCHED_ISO: reserved but not implemented yet */ | 39 | /* SCHED_ISO: reserved but not implemented yet */ |
40 | #define SCHED_IDLE 5 | 40 | #define SCHED_IDLE 5 |
41 | /* Can be ORed in to make sure the process is reverted back to SCHED_NORMAL on fork */ | ||
42 | #define SCHED_RESET_ON_FORK 0x40000000 | ||
41 | 43 | ||
42 | #ifdef __KERNEL__ | 44 | #ifdef __KERNEL__ |
43 | 45 | ||
@@ -98,7 +100,7 @@ struct robust_list_head; | |||
98 | struct bio; | 100 | struct bio; |
99 | struct fs_struct; | 101 | struct fs_struct; |
100 | struct bts_context; | 102 | struct bts_context; |
101 | struct perf_counter_context; | 103 | struct perf_event_context; |
102 | 104 | ||
103 | /* | 105 | /* |
104 | * List of flags we want to share for kernel threads, | 106 | * List of flags we want to share for kernel threads, |
@@ -138,6 +140,10 @@ extern int nr_processes(void); | |||
138 | extern unsigned long nr_running(void); | 140 | extern unsigned long nr_running(void); |
139 | extern unsigned long nr_uninterruptible(void); | 141 | extern unsigned long nr_uninterruptible(void); |
140 | extern unsigned long nr_iowait(void); | 142 | extern unsigned long nr_iowait(void); |
143 | extern unsigned long nr_iowait_cpu(void); | ||
144 | extern unsigned long this_cpu_load(void); | ||
145 | |||
146 | |||
141 | extern void calc_global_load(void); | 147 | extern void calc_global_load(void); |
142 | extern u64 cpu_nr_migrations(int cpu); | 148 | extern u64 cpu_nr_migrations(int cpu); |
143 | 149 | ||
@@ -188,6 +194,7 @@ extern unsigned long long time_sync_thresh; | |||
188 | /* in tsk->state again */ | 194 | /* in tsk->state again */ |
189 | #define TASK_DEAD 64 | 195 | #define TASK_DEAD 64 |
190 | #define TASK_WAKEKILL 128 | 196 | #define TASK_WAKEKILL 128 |
197 | #define TASK_WAKING 256 | ||
191 | 198 | ||
192 | /* Convenience macros for the sake of set_task_state */ | 199 | /* Convenience macros for the sake of set_task_state */ |
193 | #define TASK_KILLABLE (TASK_WAKEKILL | TASK_UNINTERRUPTIBLE) | 200 | #define TASK_KILLABLE (TASK_WAKEKILL | TASK_UNINTERRUPTIBLE) |
@@ -209,7 +216,7 @@ extern unsigned long long time_sync_thresh; | |||
209 | ((task->state & (__TASK_STOPPED | __TASK_TRACED)) != 0) | 216 | ((task->state & (__TASK_STOPPED | __TASK_TRACED)) != 0) |
210 | #define task_contributes_to_load(task) \ | 217 | #define task_contributes_to_load(task) \ |
211 | ((task->state & TASK_UNINTERRUPTIBLE) != 0 && \ | 218 | ((task->state & TASK_UNINTERRUPTIBLE) != 0 && \ |
212 | (task->flags & PF_FROZEN) == 0) | 219 | (task->flags & PF_FREEZING) == 0) |
213 | 220 | ||
214 | #define __set_task_state(tsk, state_value) \ | 221 | #define __set_task_state(tsk, state_value) \ |
215 | do { (tsk)->state = (state_value); } while (0) | 222 | do { (tsk)->state = (state_value); } while (0) |
@@ -254,7 +261,7 @@ extern asmlinkage void schedule_tail(struct task_struct *prev); | |||
254 | extern void init_idle(struct task_struct *idle, int cpu); | 261 | extern void init_idle(struct task_struct *idle, int cpu); |
255 | extern void init_idle_bootup_task(struct task_struct *idle); | 262 | extern void init_idle_bootup_task(struct task_struct *idle); |
256 | 263 | ||
257 | extern int runqueue_is_locked(void); | 264 | extern int runqueue_is_locked(int cpu); |
258 | extern void task_rq_unlock_wait(struct task_struct *p); | 265 | extern void task_rq_unlock_wait(struct task_struct *p); |
259 | 266 | ||
260 | extern cpumask_var_t nohz_cpu_mask; | 267 | extern cpumask_var_t nohz_cpu_mask; |
@@ -302,7 +309,7 @@ extern void softlockup_tick(void); | |||
302 | extern void touch_softlockup_watchdog(void); | 309 | extern void touch_softlockup_watchdog(void); |
303 | extern void touch_all_softlockup_watchdogs(void); | 310 | extern void touch_all_softlockup_watchdogs(void); |
304 | extern int proc_dosoftlockup_thresh(struct ctl_table *table, int write, | 311 | extern int proc_dosoftlockup_thresh(struct ctl_table *table, int write, |
305 | struct file *filp, void __user *buffer, | 312 | void __user *buffer, |
306 | size_t *lenp, loff_t *ppos); | 313 | size_t *lenp, loff_t *ppos); |
307 | extern unsigned int softlockup_panic; | 314 | extern unsigned int softlockup_panic; |
308 | extern int softlockup_thresh; | 315 | extern int softlockup_thresh; |
@@ -324,7 +331,7 @@ extern unsigned long sysctl_hung_task_check_count; | |||
324 | extern unsigned long sysctl_hung_task_timeout_secs; | 331 | extern unsigned long sysctl_hung_task_timeout_secs; |
325 | extern unsigned long sysctl_hung_task_warnings; | 332 | extern unsigned long sysctl_hung_task_warnings; |
326 | extern int proc_dohung_task_timeout_secs(struct ctl_table *table, int write, | 333 | extern int proc_dohung_task_timeout_secs(struct ctl_table *table, int write, |
327 | struct file *filp, void __user *buffer, | 334 | void __user *buffer, |
328 | size_t *lenp, loff_t *ppos); | 335 | size_t *lenp, loff_t *ppos); |
329 | #endif | 336 | #endif |
330 | 337 | ||
@@ -349,8 +356,20 @@ extern int mutex_spin_on_owner(struct mutex *lock, struct thread_info *owner); | |||
349 | struct nsproxy; | 356 | struct nsproxy; |
350 | struct user_namespace; | 357 | struct user_namespace; |
351 | 358 | ||
352 | /* Maximum number of active map areas.. This is a random (large) number */ | 359 | /* |
353 | #define DEFAULT_MAX_MAP_COUNT 65536 | 360 | * Default maximum number of active map areas, this limits the number of vmas |
361 | * per mm struct. Users can overwrite this number by sysctl but there is a | ||
362 | * problem. | ||
363 | * | ||
364 | * When a program's coredump is generated as ELF format, a section is created | ||
365 | * per a vma. In ELF, the number of sections is represented in unsigned short. | ||
366 | * This means the number of sections should be smaller than 65535 at coredump. | ||
367 | * Because the kernel adds some informative sections to a image of program at | ||
368 | * generating coredump, we need some margin. The number of extra sections is | ||
369 | * 1-3 now and depends on arch. We use "5" as safe margin, here. | ||
370 | */ | ||
371 | #define MAPCOUNT_ELF_CORE_MARGIN (5) | ||
372 | #define DEFAULT_MAX_MAP_COUNT (USHORT_MAX - MAPCOUNT_ELF_CORE_MARGIN) | ||
354 | 373 | ||
355 | extern int sysctl_max_map_count; | 374 | extern int sysctl_max_map_count; |
356 | 375 | ||
@@ -407,6 +426,15 @@ static inline unsigned long get_mm_hiwater_rss(struct mm_struct *mm) | |||
407 | return max(mm->hiwater_rss, get_mm_rss(mm)); | 426 | return max(mm->hiwater_rss, get_mm_rss(mm)); |
408 | } | 427 | } |
409 | 428 | ||
429 | static inline void setmax_mm_hiwater_rss(unsigned long *maxrss, | ||
430 | struct mm_struct *mm) | ||
431 | { | ||
432 | unsigned long hiwater_rss = get_mm_hiwater_rss(mm); | ||
433 | |||
434 | if (*maxrss < hiwater_rss) | ||
435 | *maxrss = hiwater_rss; | ||
436 | } | ||
437 | |||
410 | static inline unsigned long get_mm_hiwater_vm(struct mm_struct *mm) | 438 | static inline unsigned long get_mm_hiwater_vm(struct mm_struct *mm) |
411 | { | 439 | { |
412 | return max(mm->hiwater_vm, mm->total_vm); | 440 | return max(mm->hiwater_vm, mm->total_vm); |
@@ -419,7 +447,9 @@ extern int get_dumpable(struct mm_struct *mm); | |||
419 | /* dumpable bits */ | 447 | /* dumpable bits */ |
420 | #define MMF_DUMPABLE 0 /* core dump is permitted */ | 448 | #define MMF_DUMPABLE 0 /* core dump is permitted */ |
421 | #define MMF_DUMP_SECURELY 1 /* core file is readable only by root */ | 449 | #define MMF_DUMP_SECURELY 1 /* core file is readable only by root */ |
450 | |||
422 | #define MMF_DUMPABLE_BITS 2 | 451 | #define MMF_DUMPABLE_BITS 2 |
452 | #define MMF_DUMPABLE_MASK ((1 << MMF_DUMPABLE_BITS) - 1) | ||
423 | 453 | ||
424 | /* coredump filter bits */ | 454 | /* coredump filter bits */ |
425 | #define MMF_DUMP_ANON_PRIVATE 2 | 455 | #define MMF_DUMP_ANON_PRIVATE 2 |
@@ -429,6 +459,7 @@ extern int get_dumpable(struct mm_struct *mm); | |||
429 | #define MMF_DUMP_ELF_HEADERS 6 | 459 | #define MMF_DUMP_ELF_HEADERS 6 |
430 | #define MMF_DUMP_HUGETLB_PRIVATE 7 | 460 | #define MMF_DUMP_HUGETLB_PRIVATE 7 |
431 | #define MMF_DUMP_HUGETLB_SHARED 8 | 461 | #define MMF_DUMP_HUGETLB_SHARED 8 |
462 | |||
432 | #define MMF_DUMP_FILTER_SHIFT MMF_DUMPABLE_BITS | 463 | #define MMF_DUMP_FILTER_SHIFT MMF_DUMPABLE_BITS |
433 | #define MMF_DUMP_FILTER_BITS 7 | 464 | #define MMF_DUMP_FILTER_BITS 7 |
434 | #define MMF_DUMP_FILTER_MASK \ | 465 | #define MMF_DUMP_FILTER_MASK \ |
@@ -442,6 +473,10 @@ extern int get_dumpable(struct mm_struct *mm); | |||
442 | #else | 473 | #else |
443 | # define MMF_DUMP_MASK_DEFAULT_ELF 0 | 474 | # define MMF_DUMP_MASK_DEFAULT_ELF 0 |
444 | #endif | 475 | #endif |
476 | /* leave room for more dump flags */ | ||
477 | #define MMF_VM_MERGEABLE 16 /* KSM may merge identical pages */ | ||
478 | |||
479 | #define MMF_INIT_MASK (MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK) | ||
445 | 480 | ||
446 | struct sighand_struct { | 481 | struct sighand_struct { |
447 | atomic_t count; | 482 | atomic_t count; |
@@ -458,6 +493,13 @@ struct pacct_struct { | |||
458 | unsigned long ac_minflt, ac_majflt; | 493 | unsigned long ac_minflt, ac_majflt; |
459 | }; | 494 | }; |
460 | 495 | ||
496 | struct cpu_itimer { | ||
497 | cputime_t expires; | ||
498 | cputime_t incr; | ||
499 | u32 error; | ||
500 | u32 incr_error; | ||
501 | }; | ||
502 | |||
461 | /** | 503 | /** |
462 | * struct task_cputime - collected CPU time counts | 504 | * struct task_cputime - collected CPU time counts |
463 | * @utime: time spent in user mode, in &cputime_t units | 505 | * @utime: time spent in user mode, in &cputime_t units |
@@ -486,6 +528,15 @@ struct task_cputime { | |||
486 | .sum_exec_runtime = 0, \ | 528 | .sum_exec_runtime = 0, \ |
487 | } | 529 | } |
488 | 530 | ||
531 | /* | ||
532 | * Disable preemption until the scheduler is running. | ||
533 | * Reset by start_kernel()->sched_init()->init_idle(). | ||
534 | * | ||
535 | * We include PREEMPT_ACTIVE to avoid cond_resched() from working | ||
536 | * before the scheduler is active -- see should_resched(). | ||
537 | */ | ||
538 | #define INIT_PREEMPT_COUNT (1 + PREEMPT_ACTIVE) | ||
539 | |||
489 | /** | 540 | /** |
490 | * struct thread_group_cputimer - thread group interval timer counts | 541 | * struct thread_group_cputimer - thread group interval timer counts |
491 | * @cputime: thread group interval timers. | 542 | * @cputime: thread group interval timers. |
@@ -543,9 +594,12 @@ struct signal_struct { | |||
543 | struct pid *leader_pid; | 594 | struct pid *leader_pid; |
544 | ktime_t it_real_incr; | 595 | ktime_t it_real_incr; |
545 | 596 | ||
546 | /* ITIMER_PROF and ITIMER_VIRTUAL timers for the process */ | 597 | /* |
547 | cputime_t it_prof_expires, it_virt_expires; | 598 | * ITIMER_PROF and ITIMER_VIRTUAL timers for the process, we use |
548 | cputime_t it_prof_incr, it_virt_incr; | 599 | * CPUCLOCK_PROF and CPUCLOCK_VIRT for indexing array as these |
600 | * values are defined to 0 and 1 respectively | ||
601 | */ | ||
602 | struct cpu_itimer it[2]; | ||
549 | 603 | ||
550 | /* | 604 | /* |
551 | * Thread group totals for process CPU timers. | 605 | * Thread group totals for process CPU timers. |
@@ -577,6 +631,7 @@ struct signal_struct { | |||
577 | unsigned long nvcsw, nivcsw, cnvcsw, cnivcsw; | 631 | unsigned long nvcsw, nivcsw, cnvcsw, cnivcsw; |
578 | unsigned long min_flt, maj_flt, cmin_flt, cmaj_flt; | 632 | unsigned long min_flt, maj_flt, cmin_flt, cmaj_flt; |
579 | unsigned long inblock, oublock, cinblock, coublock; | 633 | unsigned long inblock, oublock, cinblock, coublock; |
634 | unsigned long maxrss, cmaxrss; | ||
580 | struct task_io_accounting ioac; | 635 | struct task_io_accounting ioac; |
581 | 636 | ||
582 | /* | 637 | /* |
@@ -608,6 +663,8 @@ struct signal_struct { | |||
608 | unsigned audit_tty; | 663 | unsigned audit_tty; |
609 | struct tty_audit_buf *tty_audit_buf; | 664 | struct tty_audit_buf *tty_audit_buf; |
610 | #endif | 665 | #endif |
666 | |||
667 | int oom_adj; /* OOM kill score adjustment (bit shift) */ | ||
611 | }; | 668 | }; |
612 | 669 | ||
613 | /* Context switch must be unlocked if interrupts are to be enabled */ | 670 | /* Context switch must be unlocked if interrupts are to be enabled */ |
@@ -677,7 +734,7 @@ struct user_struct { | |||
677 | #endif | 734 | #endif |
678 | #endif | 735 | #endif |
679 | 736 | ||
680 | #ifdef CONFIG_PERF_COUNTERS | 737 | #ifdef CONFIG_PERF_EVENTS |
681 | atomic_long_t locked_vm; | 738 | atomic_long_t locked_vm; |
682 | #endif | 739 | #endif |
683 | }; | 740 | }; |
@@ -775,18 +832,19 @@ enum cpu_idle_type { | |||
775 | #define SCHED_LOAD_SCALE_FUZZ SCHED_LOAD_SCALE | 832 | #define SCHED_LOAD_SCALE_FUZZ SCHED_LOAD_SCALE |
776 | 833 | ||
777 | #ifdef CONFIG_SMP | 834 | #ifdef CONFIG_SMP |
778 | #define SD_LOAD_BALANCE 1 /* Do load balancing on this domain. */ | 835 | #define SD_LOAD_BALANCE 0x0001 /* Do load balancing on this domain. */ |
779 | #define SD_BALANCE_NEWIDLE 2 /* Balance when about to become idle */ | 836 | #define SD_BALANCE_NEWIDLE 0x0002 /* Balance when about to become idle */ |
780 | #define SD_BALANCE_EXEC 4 /* Balance on exec */ | 837 | #define SD_BALANCE_EXEC 0x0004 /* Balance on exec */ |
781 | #define SD_BALANCE_FORK 8 /* Balance on fork, clone */ | 838 | #define SD_BALANCE_FORK 0x0008 /* Balance on fork, clone */ |
782 | #define SD_WAKE_IDLE 16 /* Wake to idle CPU on task wakeup */ | 839 | #define SD_BALANCE_WAKE 0x0010 /* Balance on wakeup */ |
783 | #define SD_WAKE_AFFINE 32 /* Wake task to waking CPU */ | 840 | #define SD_WAKE_AFFINE 0x0020 /* Wake task to waking CPU */ |
784 | #define SD_WAKE_BALANCE 64 /* Perform balancing at task wakeup */ | 841 | #define SD_PREFER_LOCAL 0x0040 /* Prefer to keep tasks local to this domain */ |
785 | #define SD_SHARE_CPUPOWER 128 /* Domain members share cpu power */ | 842 | #define SD_SHARE_CPUPOWER 0x0080 /* Domain members share cpu power */ |
786 | #define SD_POWERSAVINGS_BALANCE 256 /* Balance for power savings */ | 843 | #define SD_POWERSAVINGS_BALANCE 0x0100 /* Balance for power savings */ |
787 | #define SD_SHARE_PKG_RESOURCES 512 /* Domain members share cpu pkg resources */ | 844 | #define SD_SHARE_PKG_RESOURCES 0x0200 /* Domain members share cpu pkg resources */ |
788 | #define SD_SERIALIZE 1024 /* Only a single load balancing instance */ | 845 | #define SD_SERIALIZE 0x0400 /* Only a single load balancing instance */ |
789 | #define SD_WAKE_IDLE_FAR 2048 /* Gain latency sacrificing cache hit */ | 846 | |
847 | #define SD_PREFER_SIBLING 0x1000 /* Prefer to place tasks in a sibling domain */ | ||
790 | 848 | ||
791 | enum powersavings_balance_level { | 849 | enum powersavings_balance_level { |
792 | POWERSAVINGS_BALANCE_NONE = 0, /* No power saving load balance */ | 850 | POWERSAVINGS_BALANCE_NONE = 0, /* No power saving load balance */ |
@@ -806,7 +864,7 @@ static inline int sd_balance_for_mc_power(void) | |||
806 | if (sched_smt_power_savings) | 864 | if (sched_smt_power_savings) |
807 | return SD_POWERSAVINGS_BALANCE; | 865 | return SD_POWERSAVINGS_BALANCE; |
808 | 866 | ||
809 | return 0; | 867 | return SD_PREFER_SIBLING; |
810 | } | 868 | } |
811 | 869 | ||
812 | static inline int sd_balance_for_package_power(void) | 870 | static inline int sd_balance_for_package_power(void) |
@@ -814,7 +872,7 @@ static inline int sd_balance_for_package_power(void) | |||
814 | if (sched_mc_power_savings | sched_smt_power_savings) | 872 | if (sched_mc_power_savings | sched_smt_power_savings) |
815 | return SD_POWERSAVINGS_BALANCE; | 873 | return SD_POWERSAVINGS_BALANCE; |
816 | 874 | ||
817 | return 0; | 875 | return SD_PREFER_SIBLING; |
818 | } | 876 | } |
819 | 877 | ||
820 | /* | 878 | /* |
@@ -836,15 +894,9 @@ struct sched_group { | |||
836 | 894 | ||
837 | /* | 895 | /* |
838 | * CPU power of this group, SCHED_LOAD_SCALE being max power for a | 896 | * CPU power of this group, SCHED_LOAD_SCALE being max power for a |
839 | * single CPU. This is read only (except for setup, hotplug CPU). | 897 | * single CPU. |
840 | * Note : Never change cpu_power without recompute its reciprocal | ||
841 | */ | 898 | */ |
842 | unsigned int __cpu_power; | 899 | unsigned int cpu_power; |
843 | /* | ||
844 | * reciprocal value of cpu_power to avoid expensive divides | ||
845 | * (see include/linux/reciprocal_div.h) | ||
846 | */ | ||
847 | u32 reciprocal_cpu_power; | ||
848 | 900 | ||
849 | /* | 901 | /* |
850 | * The CPUs this group covers. | 902 | * The CPUs this group covers. |
@@ -897,6 +949,7 @@ struct sched_domain { | |||
897 | unsigned int newidle_idx; | 949 | unsigned int newidle_idx; |
898 | unsigned int wake_idx; | 950 | unsigned int wake_idx; |
899 | unsigned int forkexec_idx; | 951 | unsigned int forkexec_idx; |
952 | unsigned int smt_gain; | ||
900 | int flags; /* See SD_* */ | 953 | int flags; /* See SD_* */ |
901 | enum sched_domain_level level; | 954 | enum sched_domain_level level; |
902 | 955 | ||
@@ -972,6 +1025,9 @@ static inline int test_sd_parent(struct sched_domain *sd, int flag) | |||
972 | return 0; | 1025 | return 0; |
973 | } | 1026 | } |
974 | 1027 | ||
1028 | unsigned long default_scale_freq_power(struct sched_domain *sd, int cpu); | ||
1029 | unsigned long default_scale_smt_power(struct sched_domain *sd, int cpu); | ||
1030 | |||
975 | #else /* CONFIG_SMP */ | 1031 | #else /* CONFIG_SMP */ |
976 | 1032 | ||
977 | struct sched_domain_attr; | 1033 | struct sched_domain_attr; |
@@ -983,6 +1039,7 @@ partition_sched_domains(int ndoms_new, struct cpumask *doms_new, | |||
983 | } | 1039 | } |
984 | #endif /* !CONFIG_SMP */ | 1040 | #endif /* !CONFIG_SMP */ |
985 | 1041 | ||
1042 | |||
986 | struct io_context; /* See blkdev.h */ | 1043 | struct io_context; /* See blkdev.h */ |
987 | 1044 | ||
988 | 1045 | ||
@@ -1000,6 +1057,12 @@ struct uts_namespace; | |||
1000 | struct rq; | 1057 | struct rq; |
1001 | struct sched_domain; | 1058 | struct sched_domain; |
1002 | 1059 | ||
1060 | /* | ||
1061 | * wake flags | ||
1062 | */ | ||
1063 | #define WF_SYNC 0x01 /* waker goes to sleep after wakup */ | ||
1064 | #define WF_FORK 0x02 /* child wakeup after fork */ | ||
1065 | |||
1003 | struct sched_class { | 1066 | struct sched_class { |
1004 | const struct sched_class *next; | 1067 | const struct sched_class *next; |
1005 | 1068 | ||
@@ -1007,13 +1070,13 @@ struct sched_class { | |||
1007 | void (*dequeue_task) (struct rq *rq, struct task_struct *p, int sleep); | 1070 | void (*dequeue_task) (struct rq *rq, struct task_struct *p, int sleep); |
1008 | void (*yield_task) (struct rq *rq); | 1071 | void (*yield_task) (struct rq *rq); |
1009 | 1072 | ||
1010 | void (*check_preempt_curr) (struct rq *rq, struct task_struct *p, int sync); | 1073 | void (*check_preempt_curr) (struct rq *rq, struct task_struct *p, int flags); |
1011 | 1074 | ||
1012 | struct task_struct * (*pick_next_task) (struct rq *rq); | 1075 | struct task_struct * (*pick_next_task) (struct rq *rq); |
1013 | void (*put_prev_task) (struct rq *rq, struct task_struct *p); | 1076 | void (*put_prev_task) (struct rq *rq, struct task_struct *p); |
1014 | 1077 | ||
1015 | #ifdef CONFIG_SMP | 1078 | #ifdef CONFIG_SMP |
1016 | int (*select_task_rq)(struct task_struct *p, int sync); | 1079 | int (*select_task_rq)(struct task_struct *p, int sd_flag, int flags); |
1017 | 1080 | ||
1018 | unsigned long (*load_balance) (struct rq *this_rq, int this_cpu, | 1081 | unsigned long (*load_balance) (struct rq *this_rq, int this_cpu, |
1019 | struct rq *busiest, unsigned long max_load_move, | 1082 | struct rq *busiest, unsigned long max_load_move, |
@@ -1024,7 +1087,6 @@ struct sched_class { | |||
1024 | struct rq *busiest, struct sched_domain *sd, | 1087 | struct rq *busiest, struct sched_domain *sd, |
1025 | enum cpu_idle_type idle); | 1088 | enum cpu_idle_type idle); |
1026 | void (*pre_schedule) (struct rq *this_rq, struct task_struct *task); | 1089 | void (*pre_schedule) (struct rq *this_rq, struct task_struct *task); |
1027 | int (*needs_post_schedule) (struct rq *this_rq); | ||
1028 | void (*post_schedule) (struct rq *this_rq); | 1090 | void (*post_schedule) (struct rq *this_rq); |
1029 | void (*task_wake_up) (struct rq *this_rq, struct task_struct *task); | 1091 | void (*task_wake_up) (struct rq *this_rq, struct task_struct *task); |
1030 | 1092 | ||
@@ -1046,6 +1108,8 @@ struct sched_class { | |||
1046 | void (*prio_changed) (struct rq *this_rq, struct task_struct *task, | 1108 | void (*prio_changed) (struct rq *this_rq, struct task_struct *task, |
1047 | int oldprio, int running); | 1109 | int oldprio, int running); |
1048 | 1110 | ||
1111 | unsigned int (*get_rr_interval) (struct task_struct *task); | ||
1112 | |||
1049 | #ifdef CONFIG_FAIR_GROUP_SCHED | 1113 | #ifdef CONFIG_FAIR_GROUP_SCHED |
1050 | void (*moved_group) (struct task_struct *p); | 1114 | void (*moved_group) (struct task_struct *p); |
1051 | #endif | 1115 | #endif |
@@ -1084,11 +1148,15 @@ struct sched_entity { | |||
1084 | u64 start_runtime; | 1148 | u64 start_runtime; |
1085 | u64 avg_wakeup; | 1149 | u64 avg_wakeup; |
1086 | 1150 | ||
1151 | u64 avg_running; | ||
1152 | |||
1087 | #ifdef CONFIG_SCHEDSTATS | 1153 | #ifdef CONFIG_SCHEDSTATS |
1088 | u64 wait_start; | 1154 | u64 wait_start; |
1089 | u64 wait_max; | 1155 | u64 wait_max; |
1090 | u64 wait_count; | 1156 | u64 wait_count; |
1091 | u64 wait_sum; | 1157 | u64 wait_sum; |
1158 | u64 iowait_count; | ||
1159 | u64 iowait_sum; | ||
1092 | 1160 | ||
1093 | u64 sleep_start; | 1161 | u64 sleep_start; |
1094 | u64 sleep_max; | 1162 | u64 sleep_max; |
@@ -1142,6 +1210,8 @@ struct sched_rt_entity { | |||
1142 | #endif | 1210 | #endif |
1143 | }; | 1211 | }; |
1144 | 1212 | ||
1213 | struct rcu_node; | ||
1214 | |||
1145 | struct task_struct { | 1215 | struct task_struct { |
1146 | volatile long state; /* -1 unrunnable, 0 runnable, >0 stopped */ | 1216 | volatile long state; /* -1 unrunnable, 0 runnable, >0 stopped */ |
1147 | void *stack; | 1217 | void *stack; |
@@ -1184,10 +1254,12 @@ struct task_struct { | |||
1184 | unsigned int policy; | 1254 | unsigned int policy; |
1185 | cpumask_t cpus_allowed; | 1255 | cpumask_t cpus_allowed; |
1186 | 1256 | ||
1187 | #ifdef CONFIG_PREEMPT_RCU | 1257 | #ifdef CONFIG_TREE_PREEMPT_RCU |
1188 | int rcu_read_lock_nesting; | 1258 | int rcu_read_lock_nesting; |
1189 | int rcu_flipctr_idx; | 1259 | char rcu_read_unlock_special; |
1190 | #endif /* #ifdef CONFIG_PREEMPT_RCU */ | 1260 | struct rcu_node *rcu_blocked_node; |
1261 | struct list_head rcu_node_entry; | ||
1262 | #endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */ | ||
1191 | 1263 | ||
1192 | #if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) | 1264 | #if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) |
1193 | struct sched_info sched_info; | 1265 | struct sched_info sched_info; |
@@ -1199,7 +1271,6 @@ struct task_struct { | |||
1199 | struct mm_struct *mm, *active_mm; | 1271 | struct mm_struct *mm, *active_mm; |
1200 | 1272 | ||
1201 | /* task state */ | 1273 | /* task state */ |
1202 | struct linux_binfmt *binfmt; | ||
1203 | int exit_state; | 1274 | int exit_state; |
1204 | int exit_code, exit_signal; | 1275 | int exit_code, exit_signal; |
1205 | int pdeath_signal; /* The signal sent when the parent dies */ | 1276 | int pdeath_signal; /* The signal sent when the parent dies */ |
@@ -1208,11 +1279,19 @@ struct task_struct { | |||
1208 | unsigned did_exec:1; | 1279 | unsigned did_exec:1; |
1209 | unsigned in_execve:1; /* Tell the LSMs that the process is doing an | 1280 | unsigned in_execve:1; /* Tell the LSMs that the process is doing an |
1210 | * execve */ | 1281 | * execve */ |
1282 | unsigned in_iowait:1; | ||
1283 | |||
1284 | |||
1285 | /* Revert to default priority/policy when forking */ | ||
1286 | unsigned sched_reset_on_fork:1; | ||
1287 | |||
1211 | pid_t pid; | 1288 | pid_t pid; |
1212 | pid_t tgid; | 1289 | pid_t tgid; |
1213 | 1290 | ||
1291 | #ifdef CONFIG_CC_STACKPROTECTOR | ||
1214 | /* Canary value for the -fstack-protector gcc feature */ | 1292 | /* Canary value for the -fstack-protector gcc feature */ |
1215 | unsigned long stack_canary; | 1293 | unsigned long stack_canary; |
1294 | #endif | ||
1216 | 1295 | ||
1217 | /* | 1296 | /* |
1218 | * pointers to (original) parent process, youngest child, younger sibling, | 1297 | * pointers to (original) parent process, youngest child, younger sibling, |
@@ -1270,6 +1349,7 @@ struct task_struct { | |||
1270 | struct mutex cred_guard_mutex; /* guard against foreign influences on | 1349 | struct mutex cred_guard_mutex; /* guard against foreign influences on |
1271 | * credential calculations | 1350 | * credential calculations |
1272 | * (notably. ptrace) */ | 1351 | * (notably. ptrace) */ |
1352 | struct cred *replacement_session_keyring; /* for KEYCTL_SESSION_TO_PARENT */ | ||
1273 | 1353 | ||
1274 | char comm[TASK_COMM_LEN]; /* executable name excluding path | 1354 | char comm[TASK_COMM_LEN]; /* executable name excluding path |
1275 | - access with [gs]et_task_comm (which lock | 1355 | - access with [gs]et_task_comm (which lock |
@@ -1402,10 +1482,10 @@ struct task_struct { | |||
1402 | struct list_head pi_state_list; | 1482 | struct list_head pi_state_list; |
1403 | struct futex_pi_state *pi_state_cache; | 1483 | struct futex_pi_state *pi_state_cache; |
1404 | #endif | 1484 | #endif |
1405 | #ifdef CONFIG_PERF_COUNTERS | 1485 | #ifdef CONFIG_PERF_EVENTS |
1406 | struct perf_counter_context *perf_counter_ctxp; | 1486 | struct perf_event_context *perf_event_ctxp; |
1407 | struct mutex perf_counter_mutex; | 1487 | struct mutex perf_event_mutex; |
1408 | struct list_head perf_counter_list; | 1488 | struct list_head perf_event_list; |
1409 | #endif | 1489 | #endif |
1410 | #ifdef CONFIG_NUMA | 1490 | #ifdef CONFIG_NUMA |
1411 | struct mempolicy *mempolicy; /* Protected by alloc_lock */ | 1491 | struct mempolicy *mempolicy; /* Protected by alloc_lock */ |
@@ -1458,6 +1538,7 @@ struct task_struct { | |||
1458 | /* bitmask of trace recursion */ | 1538 | /* bitmask of trace recursion */ |
1459 | unsigned long trace_recursion; | 1539 | unsigned long trace_recursion; |
1460 | #endif /* CONFIG_TRACING */ | 1540 | #endif /* CONFIG_TRACING */ |
1541 | unsigned long stack_start; | ||
1461 | }; | 1542 | }; |
1462 | 1543 | ||
1463 | /* Future-safe accessor for struct task_struct's cpus_allowed. */ | 1544 | /* Future-safe accessor for struct task_struct's cpus_allowed. */ |
@@ -1653,17 +1734,19 @@ extern cputime_t task_gtime(struct task_struct *p); | |||
1653 | #define PF_EXITPIDONE 0x00000008 /* pi exit done on shut down */ | 1734 | #define PF_EXITPIDONE 0x00000008 /* pi exit done on shut down */ |
1654 | #define PF_VCPU 0x00000010 /* I'm a virtual CPU */ | 1735 | #define PF_VCPU 0x00000010 /* I'm a virtual CPU */ |
1655 | #define PF_FORKNOEXEC 0x00000040 /* forked but didn't exec */ | 1736 | #define PF_FORKNOEXEC 0x00000040 /* forked but didn't exec */ |
1737 | #define PF_MCE_PROCESS 0x00000080 /* process policy on mce errors */ | ||
1656 | #define PF_SUPERPRIV 0x00000100 /* used super-user privileges */ | 1738 | #define PF_SUPERPRIV 0x00000100 /* used super-user privileges */ |
1657 | #define PF_DUMPCORE 0x00000200 /* dumped core */ | 1739 | #define PF_DUMPCORE 0x00000200 /* dumped core */ |
1658 | #define PF_SIGNALED 0x00000400 /* killed by a signal */ | 1740 | #define PF_SIGNALED 0x00000400 /* killed by a signal */ |
1659 | #define PF_MEMALLOC 0x00000800 /* Allocating memory */ | 1741 | #define PF_MEMALLOC 0x00000800 /* Allocating memory */ |
1660 | #define PF_FLUSHER 0x00001000 /* responsible for disk writeback */ | 1742 | #define PF_FLUSHER 0x00001000 /* responsible for disk writeback */ |
1661 | #define PF_USED_MATH 0x00002000 /* if unset the fpu must be initialized before use */ | 1743 | #define PF_USED_MATH 0x00002000 /* if unset the fpu must be initialized before use */ |
1744 | #define PF_FREEZING 0x00004000 /* freeze in progress. do not account to load */ | ||
1662 | #define PF_NOFREEZE 0x00008000 /* this thread should not be frozen */ | 1745 | #define PF_NOFREEZE 0x00008000 /* this thread should not be frozen */ |
1663 | #define PF_FROZEN 0x00010000 /* frozen for system suspend */ | 1746 | #define PF_FROZEN 0x00010000 /* frozen for system suspend */ |
1664 | #define PF_FSTRANS 0x00020000 /* inside a filesystem transaction */ | 1747 | #define PF_FSTRANS 0x00020000 /* inside a filesystem transaction */ |
1665 | #define PF_KSWAPD 0x00040000 /* I am kswapd */ | 1748 | #define PF_KSWAPD 0x00040000 /* I am kswapd */ |
1666 | #define PF_SWAPOFF 0x00080000 /* I am in swapoff */ | 1749 | #define PF_OOM_ORIGIN 0x00080000 /* Allocating much memory to others */ |
1667 | #define PF_LESS_THROTTLE 0x00100000 /* Throttle me less: I clean memory */ | 1750 | #define PF_LESS_THROTTLE 0x00100000 /* Throttle me less: I clean memory */ |
1668 | #define PF_KTHREAD 0x00200000 /* I am a kernel thread */ | 1751 | #define PF_KTHREAD 0x00200000 /* I am a kernel thread */ |
1669 | #define PF_RANDOMIZE 0x00400000 /* randomize virtual address space */ | 1752 | #define PF_RANDOMIZE 0x00400000 /* randomize virtual address space */ |
@@ -1671,6 +1754,7 @@ extern cputime_t task_gtime(struct task_struct *p); | |||
1671 | #define PF_SPREAD_PAGE 0x01000000 /* Spread page cache over cpuset */ | 1754 | #define PF_SPREAD_PAGE 0x01000000 /* Spread page cache over cpuset */ |
1672 | #define PF_SPREAD_SLAB 0x02000000 /* Spread some slab caches over cpuset */ | 1755 | #define PF_SPREAD_SLAB 0x02000000 /* Spread some slab caches over cpuset */ |
1673 | #define PF_THREAD_BOUND 0x04000000 /* Thread bound to specific cpu */ | 1756 | #define PF_THREAD_BOUND 0x04000000 /* Thread bound to specific cpu */ |
1757 | #define PF_MCE_EARLY 0x08000000 /* Early kill for mce process policy */ | ||
1674 | #define PF_MEMPOLICY 0x10000000 /* Non-default NUMA mempolicy */ | 1758 | #define PF_MEMPOLICY 0x10000000 /* Non-default NUMA mempolicy */ |
1675 | #define PF_MUTEX_TESTER 0x20000000 /* Thread belongs to the rt mutex tester */ | 1759 | #define PF_MUTEX_TESTER 0x20000000 /* Thread belongs to the rt mutex tester */ |
1676 | #define PF_FREEZER_SKIP 0x40000000 /* Freezer should not count it as freezeable */ | 1760 | #define PF_FREEZER_SKIP 0x40000000 /* Freezer should not count it as freezeable */ |
@@ -1701,6 +1785,27 @@ extern cputime_t task_gtime(struct task_struct *p); | |||
1701 | #define tsk_used_math(p) ((p)->flags & PF_USED_MATH) | 1785 | #define tsk_used_math(p) ((p)->flags & PF_USED_MATH) |
1702 | #define used_math() tsk_used_math(current) | 1786 | #define used_math() tsk_used_math(current) |
1703 | 1787 | ||
1788 | #ifdef CONFIG_TREE_PREEMPT_RCU | ||
1789 | |||
1790 | #define RCU_READ_UNLOCK_BLOCKED (1 << 0) /* blocked while in RCU read-side. */ | ||
1791 | #define RCU_READ_UNLOCK_NEED_QS (1 << 1) /* RCU core needs CPU response. */ | ||
1792 | |||
1793 | static inline void rcu_copy_process(struct task_struct *p) | ||
1794 | { | ||
1795 | p->rcu_read_lock_nesting = 0; | ||
1796 | p->rcu_read_unlock_special = 0; | ||
1797 | p->rcu_blocked_node = NULL; | ||
1798 | INIT_LIST_HEAD(&p->rcu_node_entry); | ||
1799 | } | ||
1800 | |||
1801 | #else | ||
1802 | |||
1803 | static inline void rcu_copy_process(struct task_struct *p) | ||
1804 | { | ||
1805 | } | ||
1806 | |||
1807 | #endif | ||
1808 | |||
1704 | #ifdef CONFIG_SMP | 1809 | #ifdef CONFIG_SMP |
1705 | extern int set_cpus_allowed_ptr(struct task_struct *p, | 1810 | extern int set_cpus_allowed_ptr(struct task_struct *p, |
1706 | const struct cpumask *new_mask); | 1811 | const struct cpumask *new_mask); |
@@ -1713,10 +1818,13 @@ static inline int set_cpus_allowed_ptr(struct task_struct *p, | |||
1713 | return 0; | 1818 | return 0; |
1714 | } | 1819 | } |
1715 | #endif | 1820 | #endif |
1821 | |||
1822 | #ifndef CONFIG_CPUMASK_OFFSTACK | ||
1716 | static inline int set_cpus_allowed(struct task_struct *p, cpumask_t new_mask) | 1823 | static inline int set_cpus_allowed(struct task_struct *p, cpumask_t new_mask) |
1717 | { | 1824 | { |
1718 | return set_cpus_allowed_ptr(p, &new_mask); | 1825 | return set_cpus_allowed_ptr(p, &new_mask); |
1719 | } | 1826 | } |
1827 | #endif | ||
1720 | 1828 | ||
1721 | /* | 1829 | /* |
1722 | * Architectures can set this to 1 if they have specified | 1830 | * Architectures can set this to 1 if they have specified |
@@ -1790,15 +1898,16 @@ extern unsigned int sysctl_sched_min_granularity; | |||
1790 | extern unsigned int sysctl_sched_wakeup_granularity; | 1898 | extern unsigned int sysctl_sched_wakeup_granularity; |
1791 | extern unsigned int sysctl_sched_shares_ratelimit; | 1899 | extern unsigned int sysctl_sched_shares_ratelimit; |
1792 | extern unsigned int sysctl_sched_shares_thresh; | 1900 | extern unsigned int sysctl_sched_shares_thresh; |
1793 | #ifdef CONFIG_SCHED_DEBUG | ||
1794 | extern unsigned int sysctl_sched_child_runs_first; | 1901 | extern unsigned int sysctl_sched_child_runs_first; |
1902 | #ifdef CONFIG_SCHED_DEBUG | ||
1795 | extern unsigned int sysctl_sched_features; | 1903 | extern unsigned int sysctl_sched_features; |
1796 | extern unsigned int sysctl_sched_migration_cost; | 1904 | extern unsigned int sysctl_sched_migration_cost; |
1797 | extern unsigned int sysctl_sched_nr_migrate; | 1905 | extern unsigned int sysctl_sched_nr_migrate; |
1906 | extern unsigned int sysctl_sched_time_avg; | ||
1798 | extern unsigned int sysctl_timer_migration; | 1907 | extern unsigned int sysctl_timer_migration; |
1799 | 1908 | ||
1800 | int sched_nr_latency_handler(struct ctl_table *table, int write, | 1909 | int sched_nr_latency_handler(struct ctl_table *table, int write, |
1801 | struct file *file, void __user *buffer, size_t *length, | 1910 | void __user *buffer, size_t *length, |
1802 | loff_t *ppos); | 1911 | loff_t *ppos); |
1803 | #endif | 1912 | #endif |
1804 | #ifdef CONFIG_SCHED_DEBUG | 1913 | #ifdef CONFIG_SCHED_DEBUG |
@@ -1816,7 +1925,7 @@ extern unsigned int sysctl_sched_rt_period; | |||
1816 | extern int sysctl_sched_rt_runtime; | 1925 | extern int sysctl_sched_rt_runtime; |
1817 | 1926 | ||
1818 | int sched_rt_handler(struct ctl_table *table, int write, | 1927 | int sched_rt_handler(struct ctl_table *table, int write, |
1819 | struct file *filp, void __user *buffer, size_t *lenp, | 1928 | void __user *buffer, size_t *lenp, |
1820 | loff_t *ppos); | 1929 | loff_t *ppos); |
1821 | 1930 | ||
1822 | extern unsigned int sysctl_sched_compat_yield; | 1931 | extern unsigned int sysctl_sched_compat_yield; |
@@ -1951,6 +2060,7 @@ extern int kill_pgrp(struct pid *pid, int sig, int priv); | |||
1951 | extern int kill_pid(struct pid *pid, int sig, int priv); | 2060 | extern int kill_pid(struct pid *pid, int sig, int priv); |
1952 | extern int kill_proc_info(int, struct siginfo *, pid_t); | 2061 | extern int kill_proc_info(int, struct siginfo *, pid_t); |
1953 | extern int do_notify_parent(struct task_struct *, int); | 2062 | extern int do_notify_parent(struct task_struct *, int); |
2063 | extern void __wake_up_parent(struct task_struct *p, struct task_struct *parent); | ||
1954 | extern void force_sig(int, struct task_struct *); | 2064 | extern void force_sig(int, struct task_struct *); |
1955 | extern void force_sig_specific(int, struct task_struct *); | 2065 | extern void force_sig_specific(int, struct task_struct *); |
1956 | extern int send_sig(int, struct task_struct *, int); | 2066 | extern int send_sig(int, struct task_struct *, int); |
@@ -2054,7 +2164,7 @@ static inline unsigned long wait_task_inactive(struct task_struct *p, | |||
2054 | #define for_each_process(p) \ | 2164 | #define for_each_process(p) \ |
2055 | for (p = &init_task ; (p = next_task(p)) != &init_task ; ) | 2165 | for (p = &init_task ; (p = next_task(p)) != &init_task ; ) |
2056 | 2166 | ||
2057 | extern bool is_single_threaded(struct task_struct *); | 2167 | extern bool current_is_single_threaded(void); |
2058 | 2168 | ||
2059 | /* | 2169 | /* |
2060 | * Careful: do_each_thread/while_each_thread is a double loop so | 2170 | * Careful: do_each_thread/while_each_thread is a double loop so |
@@ -2228,7 +2338,10 @@ static inline int signal_pending(struct task_struct *p) | |||
2228 | return unlikely(test_tsk_thread_flag(p,TIF_SIGPENDING)); | 2338 | return unlikely(test_tsk_thread_flag(p,TIF_SIGPENDING)); |
2229 | } | 2339 | } |
2230 | 2340 | ||
2231 | extern int __fatal_signal_pending(struct task_struct *p); | 2341 | static inline int __fatal_signal_pending(struct task_struct *p) |
2342 | { | ||
2343 | return unlikely(sigismember(&p->pending.signal, SIGKILL)); | ||
2344 | } | ||
2232 | 2345 | ||
2233 | static inline int fatal_signal_pending(struct task_struct *p) | 2346 | static inline int fatal_signal_pending(struct task_struct *p) |
2234 | { | 2347 | { |
@@ -2258,23 +2371,31 @@ static inline int need_resched(void) | |||
2258 | * cond_resched_softirq() will enable bhs before scheduling. | 2371 | * cond_resched_softirq() will enable bhs before scheduling. |
2259 | */ | 2372 | */ |
2260 | extern int _cond_resched(void); | 2373 | extern int _cond_resched(void); |
2261 | #ifdef CONFIG_PREEMPT_BKL | 2374 | |
2262 | static inline int cond_resched(void) | 2375 | #define cond_resched() ({ \ |
2263 | { | 2376 | __might_sleep(__FILE__, __LINE__, 0); \ |
2264 | return 0; | 2377 | _cond_resched(); \ |
2265 | } | 2378 | }) |
2379 | |||
2380 | extern int __cond_resched_lock(spinlock_t *lock); | ||
2381 | |||
2382 | #ifdef CONFIG_PREEMPT | ||
2383 | #define PREEMPT_LOCK_OFFSET PREEMPT_OFFSET | ||
2266 | #else | 2384 | #else |
2267 | static inline int cond_resched(void) | 2385 | #define PREEMPT_LOCK_OFFSET 0 |
2268 | { | ||
2269 | return _cond_resched(); | ||
2270 | } | ||
2271 | #endif | 2386 | #endif |
2272 | extern int cond_resched_lock(spinlock_t * lock); | 2387 | |
2273 | extern int cond_resched_softirq(void); | 2388 | #define cond_resched_lock(lock) ({ \ |
2274 | static inline int cond_resched_bkl(void) | 2389 | __might_sleep(__FILE__, __LINE__, PREEMPT_LOCK_OFFSET); \ |
2275 | { | 2390 | __cond_resched_lock(lock); \ |
2276 | return _cond_resched(); | 2391 | }) |
2277 | } | 2392 | |
2393 | extern int __cond_resched_softirq(void); | ||
2394 | |||
2395 | #define cond_resched_softirq() ({ \ | ||
2396 | __might_sleep(__FILE__, __LINE__, SOFTIRQ_OFFSET); \ | ||
2397 | __cond_resched_softirq(); \ | ||
2398 | }) | ||
2278 | 2399 | ||
2279 | /* | 2400 | /* |
2280 | * Does a critical section need to be broken due to another | 2401 | * Does a critical section need to be broken due to another |