/* * Definition of the scheduler plugin interface. * */ #ifndef _LINUX_RT_PARAM_H_ #define _LINUX_RT_PARAM_H_ /* Litmus time type. */ typedef unsigned long long lt_t; static inline int lt_after(lt_t a, lt_t b) { return ((long long) b) - ((long long) a) < 0; } #define lt_before(a, b) lt_after(b, a) static inline int lt_after_eq(lt_t a, lt_t b) { return ((long long) a) - ((long long) b) >= 0; } #define lt_before_eq(a, b) lt_after_eq(b, a) /* different types of clients */ typedef enum { RT_CLASS_HARD, RT_CLASS_SOFT, RT_CLASS_BEST_EFFORT } task_class_t; typedef enum { NO_ENFORCEMENT, /* job may overrun unhindered */ QUANTUM_ENFORCEMENT, /* budgets are only checked on quantum boundaries */ PRECISE_ENFORCEMENT /* budgets are enforced with hrtimers */ } budget_policy_t; /* budget draining policy (ignored if neither budget enforcement nor signalling are used). */ typedef enum { DRAIN_SIMPLE, /* drains while task is linked */ DRAIN_SIMPLE_IO, /* drains while task is linked or blocked (not waiting for Litmus lock) */ DRAIN_SAWARE, /* drains according to suspension-aware analysis */ DRAIN_SOBLIV /* drains according to suspension-obliv analysis */ } budget_drain_policy_t; /* signal policy for budget exhaustion */ typedef enum { NO_SIGNALS, /* job receives no signals when it exhausts its budget */ QUANTUM_SIGNALS, /*budget signals are only sent on quantum boundaries */ PRECISE_SIGNALS /* budget signals are triggered with hrtimers */ } budget_signal_policy_t; /* Release behaviors for jobs. PERIODIC and EARLY jobs must end by calling sys_complete_job() (or equivalent) to set up their next release and deadline. */ typedef enum { /* Jobs are released sporadically (provided job precedence constraints are met). */ TASK_SPORADIC, /* Jobs are released periodically (provided job precedence constraints are met). */ TASK_PERIODIC, /* Jobs are released immediately after meeting precedence constraints. Beware this can peg your CPUs if used in the wrong applications. Only supported by EDF schedulers. */ TASK_EARLY } release_policy_t; /* Real-time behaviors of forked threads that are not explicitly real-time. */ typedef enum { AUX_ENABLE = 0x1, /* Priority of non-rt (aux) threads inherit from max of suspended real-time thread within the process. */ AUX_CURRENT = (AUX_ENABLE<<1), /* All current non-rt threads become aux threads. */ AUX_FUTURE = (AUX_CURRENT<<1) /* Any non-rt threads forked in the future automatically become aux threads. */ } auxiliary_thread_flags_t; /* We use the common priority interpretation "lower index == higher priority", * which is commonly used in fixed-priority schedulability analysis papers. * So, a numerically lower priority value implies higher scheduling priority, * with priority 1 being the highest priority. Priority 0 is reserved for * priority boosting. LITMUS_MAX_PRIORITY denotes the maximum priority value * range. */ #define LITMUS_MAX_PRIORITY 512 #define LITMUS_HIGHEST_PRIORITY 1 #define LITMUS_LOWEST_PRIORITY (LITMUS_MAX_PRIORITY - 1) /* Provide generic comparison macros for userspace, * in case that we change this later. */ #define litmus_higher_fixed_prio(a, b) (a < b) #define litmus_lower_fixed_prio(a, b) (a > b) #define litmus_is_valid_fixed_prio(p) \ ((p) >= LITMUS_HIGHEST_PRIORITY && \ (p) <= LITMUS_LOWEST_PRIORITY) struct rt_task { lt_t exec_cost; lt_t period; lt_t relative_deadline; lt_t phase; unsigned int cpu; unsigned int priority; task_class_t cls; budget_policy_t budget_policy; /* ignored by pfair */ budget_drain_policy_t drain_policy; budget_signal_policy_t budget_signal_policy; /* ignored by pfair */ release_policy_t release_policy; }; union np_flag { uint32_t raw; struct { /* Is the task currently in a non-preemptive section? */ uint32_t flag:31; /* Should the task call into the scheduler? */ uint32_t preempt:1; } np; }; struct affinity_observer_args { int lock_od; }; struct gpu_affinity_observer_args { struct affinity_observer_args obs; unsigned int replica_to_gpu_offset; unsigned int rho; int relaxed_rules; }; #define R2DGLP_M_IN_FIFOS (0u) #define R2DGLP_UNLIMITED_IN_FIFOS (~0u) #define R2DGLP_OPTIMAL_FIFO_LEN (0u) #define R2DGLP_UNLIMITED_FIFO_LEN (~0u) struct r2dglp_args { unsigned int nr_replicas; unsigned int max_in_fifos; unsigned int max_fifo_len; }; /* The definition of the data that is shared between the kernel and real-time * tasks via a shared page (see litmus/ctrldev.c). * * WARNING: User space can write to this, so don't trust * the correctness of the fields! * * This servees two purposes: to enable efficient signaling * of non-preemptive sections (user->kernel) and * delayed preemptions (kernel->user), and to export * some real-time relevant statistics such as preemption and * migration data to user space. We can't use a device to export * statistics because we want to avoid system call overhead when * determining preemption/migration overheads). */ struct control_page { /* This flag is used by userspace to communicate non-preempive * sections. */ volatile __attribute__ ((aligned (8))) union np_flag sched; /* Incremented by the kernel each time an IRQ is handled. */ volatile __attribute__ ((aligned (8))) uint64_t irq_count; /* Locking overhead tracing: userspace records here the time stamp * and IRQ counter prior to starting the system call. */ uint64_t ts_syscall_start; /* Feather-Trace cycles */ uint64_t irq_syscall_start; /* Snapshot of irq_count when the syscall * started. */ /* to be extended */ }; /* Expected offsets within the control page. */ #define LITMUS_CP_OFFSET_SCHED 0 #define LITMUS_CP_OFFSET_IRQ_COUNT 8 #define LITMUS_CP_OFFSET_TS_SC_START 16 #define LITMUS_CP_OFFSET_IRQ_SC_START 24 /* sched trace event injection */ /* mirror of st_event_record_type_t * Assume all are UNsupported, unless otherwise stated. */ typedef enum { ST_INJECT_NAME = 1, /* supported */ ST_INJECT_PARAM, /* supported */ ST_INJECT_RELEASE, /* supported */ ST_INJECT_ASSIGNED, ST_INJECT_SWITCH_TO, ST_INJECT_SWITCH_AWAY, ST_INJECT_COMPLETION, /* supported */ ST_INJECT_BLOCK, ST_INJECT_RESUME, ST_INJECT_ACTION, /* supported */ ST_INJECT_SYS_RELEASE, /* supported */ ST_INJECT_MIGRATION = 21, /* supported */ } sched_trace_injection_events_t; struct st_inject_args { union { /* ST_INJECT_RELEASE, ST_INJECT_COMPLETION */ struct { lt_t release; lt_t deadline; unsigned int job_no; }; /* ST_INJECT_ACTION */ unsigned int action; /* ST_INJECT_MIGRATION */ struct { unsigned int from; unsigned int to; }; }; }; /* don't export internal data structures to user space (liblitmus) */ #ifdef __KERNEL__ #include #include #include #ifdef CONFIG_LITMUS_SOFTIRQD #include #endif #if defined(CONFIG_LITMUS_NVIDIA) && defined(CONFIG_LITMUS_AFFINITY_LOCKING) /*** GPU affinity tracking structures ***/ typedef enum gpu_migration_dist { MIG_LOCAL = 0, MIG_NEAR = 1, MIG_MED = 2, MIG_FAR = 3, /* assumes 8 GPU binary tree hierarchy */ MIG_NONE = 4, MIG_LAST = MIG_NONE } gpu_migration_dist_t; #if 0 typedef struct feedback_est { fp_t est; fp_t accum_err; } feedback_est_t #endif #define AVG_EST_WINDOW_SIZE 20 typedef struct avg_est { lt_t history[AVG_EST_WINDOW_SIZE]; uint16_t count; uint16_t idx; lt_t sum; lt_t avg; lt_t std; } avg_est_t; #endif /* end LITMUS_NVIDIA && LITMUS_AFFINITY_LOCKING */ #ifdef CONFIG_LITMUS_AFFINITY_LOCKING typedef int (*notify_rsrc_exit_t)(struct task_struct* tsk); #endif /* end LITMUS_AFFINITY_LOCKING */ #ifdef CONFIG_LITMUS_SOFTIRQD /* klmirqd (real-time threaded interrupt) thread data */ struct klmirqd_info { struct task_struct* klmirqd; unsigned int terminating:1; raw_spinlock_t lock; u32 pending; atomic_t num_hi_pending; atomic_t num_low_pending; atomic_t num_work_pending; struct tasklet_head pending_tasklets_hi; struct tasklet_head pending_tasklets; struct list_head worklist; struct list_head klmirqd_reg; struct completion* exited; }; #endif struct _rt_domain; struct bheap_node; struct release_heap; struct rt_job { /* Time instant the the job was or will be released. */ lt_t release; /* What is the current deadline? */ lt_t deadline; /* How much service has this job received so far? */ lt_t exec_time; /* By how much did the prior job miss its deadline by? * Value differs from tardiness in that lateness may * be negative (when job finishes before its deadline). */ long long lateness; /* Which job is this. This is used to let user space * specify which job to wait for, which is important if jobs * overrun. If we just call sys_sleep_next_period() then we * will unintentionally miss jobs after an overrun. * * Increase this sequence number when a job is released. */ unsigned int job_no; /* Increments each time a job is forced to complete by budget exhaustion. * If a job completes without remaining budget, the next ob will be early- * released __without__ pushing back its deadline. job_backlog is * decremented once per early release. This behavior continues until * backlog == 0. */ unsigned int backlog; /* Denotes if the current job is a backlogged job that was early released * due to budget enforcement behaviors. */ unsigned int is_backlogged_job:1; }; struct pfair_param; /* RT task parameters for scheduling extensions * These parameters are inherited during clone and therefore must * be explicitly set up before the task set is launched. */ struct rt_param { /* Generic flags available for plugin-internal use. */ unsigned int flags:8; /* do we need to check for srp blocking? */ unsigned int srp_non_recurse:1; /* is the task present? (true if it can be scheduled) */ unsigned int present:1; /* has the task completed? */ unsigned int completed:1; #ifdef CONFIG_LITMUS_NVIDIA long unsigned int held_gpus; /* bitmap of held GPUs. */ struct binheap_node gpu_owner_node; /* just one GPU for now... */ unsigned int hide_from_gpu:1; #ifdef CONFIG_LITMUS_AFFINITY_LOCKING avg_est_t gpu_migration_est[MIG_LAST+1]; gpu_migration_dist_t gpu_migration; int last_gpu; lt_t accum_gpu_time; lt_t gpu_time_stamp; unsigned int suspend_gpu_tracker_on_block:1; #endif /* end LITMUS_AFFINITY_LOCKING */ #endif /* end LITMUS_NVIDIA */ #ifdef CONFIG_LITMUS_AFFINITY_LOCKING notify_rsrc_exit_t rsrc_exit_cb; void* rsrc_exit_cb_args; #endif #ifdef CONFIG_LITMUS_LOCKING /* Is the task being priority-boosted by a locking protocol? */ unsigned int priority_boosted:1; /* If so, when did this start? */ lt_t boost_start_time; /* How many LITMUS^RT locks does the task currently hold/wait for? */ unsigned int num_locks_held; /* How many PCP/SRP locks does the task currently hold/wait for? */ unsigned int num_local_locks_held; #endif #ifdef CONFIG_LITMUS_NESTED_LOCKING raw_spinlock_t hp_blocked_tasks_lock; struct binheap hp_blocked_tasks; /* pointer to lock upon which is currently blocked */ struct litmus_lock* blocked_lock; unsigned long blocked_lock_data; struct litmus_lock* outermost_lock; unsigned int virtually_unlocked:1; /* wait-queue entry for pending wakeups */ wait_queue_t wait; #endif /* user controlled parameters */ struct rt_task task_params; /* timing parameters */ struct rt_job job_params; /* Should the next job be released at some time other than * just period time units after the last release? */ unsigned int sporadic_release:1; lt_t sporadic_release_time; /* budget tracking/enforcement method and data assigned to this task */ struct budget_tracker budget; /* task representing the current "inherited" task * priority, assigned by inherit_priority and * return priority in the scheduler plugins. * could point to self if PI does not result in * an increased task priority. */ struct task_struct* inh_task; /* budget enforcement methods may require knowledge of tasks that * inherit this task's priority. There may be more than one such * task w/ priority inheritance chains. */ int inh_task_linkback_idx; /* idx in inh_task's inh_task_linkbacks array. */ struct task_struct** inh_task_linkbacks; /* array w/ BITS_PER_LONG elm */ unsigned long used_linkback_slots; /* nr used slots in inh_task_linkbacks */ #ifdef CONFIG_NP_SECTION /* For the FMLP under PSN-EDF, it is required to make the task * non-preemptive from kernel space. In order not to interfere with * user space, this counter indicates the kernel space np setting. * kernel_np > 0 => task is non-preemptive */ unsigned int kernel_np; #endif /* This field can be used by plugins to store where the task * is currently scheduled. It is the responsibility of the * plugin to avoid race conditions. * * This used by GSN-EDF and PFAIR. */ volatile int scheduled_on; /* Is the stack of the task currently in use? This is updated by * the LITMUS core. * * Be careful to avoid deadlocks! */ volatile int stack_in_use; /* This field can be used by plugins to store where the task * is currently linked. It is the responsibility of the plugin * to avoid race conditions. * * Used by GSN-EDF. */ volatile int linked_on; /* PFAIR/PD^2 state. Allocated on demand. */ struct pfair_param* pfair; /* Fields saved before BE->RT transition. */ int old_policy; int old_prio; /* ready queue for this task */ struct _rt_domain* domain; /* heap element for this task * * Warning: Don't statically allocate this node. The heap * implementation swaps these between tasks, thus after * dequeuing from a heap you may end up with a different node * then the one you had when enqueuing the task. For the same * reason, don't obtain and store references to this node * other than this pointer (which is updated by the heap * implementation). */ struct bheap_node* heap_node; struct release_heap* rel_heap; /* Used by rt_domain to queue task in release list. */ struct list_head list; /* Pointer to the page shared between userspace and kernel. */ struct control_page * ctrl_page; #ifdef CONFIG_LITMUS_SOFTIRQD /* proxy threads have minimum priority by default */ unsigned int is_interrupt_thread:1; /* pointer to data used by klmirqd thread. valid only if ptr only valid if is_interrupt_thread == 1 */ struct klmirqd_info* klmirqd_info; #endif /* end LITMUS_SOFTIRQD */ #ifdef CONFIG_REALTIME_AUX_TASKS /* Real-time data for auxiliary tasks */ struct list_head aux_task_node; struct binheap_node aux_task_owner_node; unsigned int is_aux_task:1; unsigned int aux_ready:1; unsigned int has_aux_tasks:1; unsigned int hide_from_aux_tasks:1; #endif }; #ifdef CONFIG_REALTIME_AUX_TASKS /* Auxiliary task data. Appears in task_struct, like rt_param */ struct aux_data { struct list_head aux_tasks; struct binheap aux_task_owners; unsigned int initialized:1; unsigned int aux_future:1; }; #endif #endif /* __KERNEL */ #endif