#ifndef _LINUX_RT_PARAM_H_
#define _LINUX_RT_PARAM_H_
/*
* Definition of the scheduler plugin interface.
*
*/
#include <litmus/fpmath.h>
/* Litmus time type. */
typedef unsigned long long lt_t;
static inline int lt_after(lt_t a, lt_t b)
{
return ((long long) b) - ((long long) a) < 0;
}
#define lt_before(a, b) lt_after(b, a)
static inline int lt_after_eq(lt_t a, lt_t b)
{
return ((long long) a) - ((long long) b) >= 0;
}
#define lt_before_eq(a, b) lt_after_eq(b, a)
/* different types of clients */
typedef enum {
RT_CLASS_HARD,
RT_CLASS_SOFT,
RT_CLASS_BEST_EFFORT
} task_class_t;
typedef enum {
NO_ENFORCEMENT, /* job may overrun unhindered */
QUANTUM_ENFORCEMENT, /* budgets are only checked on quantum boundaries */
PRECISE_ENFORCEMENT, /* budgets are enforced with hrtimers */
} budget_policy_t;
typedef enum {
NO_SIGNALS, /* job receives no signals when it exhausts its budget */
QUANTUM_SIGNALS, /* budget signals are only sent on quantum boundaries */
PRECISE_SIGNALS, /* budget signals are triggered with hrtimers */
} budget_signal_policy_t;
typedef enum {
AUX_ENABLE = 0x1,
AUX_CURRENT = (AUX_ENABLE<<1),
AUX_FUTURE = (AUX_CURRENT<<2)
} aux_flags_t;
/* mirror of st_event_record_type_t
* Assume all are UNsupported, unless otherwise stated. */
typedef enum {
ST_INJECT_NAME = 1, /* supported */
ST_INJECT_PARAM, /* supported */
ST_INJECT_RELEASE, /* supported */
ST_INJECT_ASSIGNED,
ST_INJECT_SWITCH_TO,
ST_INJECT_SWITCH_AWAY,
ST_INJECT_COMPLETION, /* supported */
ST_INJECT_BLOCK,
ST_INJECT_RESUME,
ST_INJECT_ACTION,
ST_INJECT_SYS_RELEASE, /* supported */
} sched_trace_injection_events_t;
struct st_inject_args {
lt_t release;
lt_t deadline;
unsigned int job_no;
};
/* We use the common priority interpretation "lower index == higher priority",
* which is commonly used in fixed-priority schedulability analysis papers.
* So, a numerically lower priority value implies higher scheduling priority,
* with priority 1 being the highest priority. Priority 0 is reserved for
* priority boosting. LITMUS_MAX_PRIORITY denotes the maximum priority value
* range.
*/
#define LITMUS_MAX_PRIORITY 512
#define LITMUS_HIGHEST_PRIORITY 1
#define LITMUS_LOWEST_PRIORITY (LITMUS_MAX_PRIORITY - 1)
/* Provide generic comparison macros for userspace,
* in case that we change this later. */
#define litmus_higher_fixed_prio(a, b) (a < b)
#define litmus_lower_fixed_prio(a, b) (a > b)
#define litmus_is_valid_fixed_prio(p) \
((p) >= LITMUS_HIGHEST_PRIORITY && \
(p) <= LITMUS_LOWEST_PRIORITY)
struct rt_task {
lt_t exec_cost;
lt_t period;
lt_t relative_deadline;
lt_t phase;
unsigned int cpu;
unsigned int priority;
task_class_t cls;
budget_policy_t budget_policy; /* ignored by pfair */
budget_signal_policy_t budget_signal_policy; /* currently ignored by pfair */
};
union np_flag {
uint64_t raw;
struct {
/* Is the task currently in a non-preemptive section? */
uint64_t flag:31;
/* Should the task call into the scheduler? */
uint64_t preempt:1;
} np;
};
struct affinity_observer_args
{
int lock_od;
};
struct gpu_affinity_observer_args
{
struct affinity_observer_args obs;
int replica_to_gpu_offset;
int nr_simult_users;
int relaxed_rules;
};
/* The definition of the data that is shared between the kernel and real-time
* tasks via a shared page (see litmus/ctrldev.c).
*
* WARNING: User space can write to this, so don't trust
* the correctness of the fields!
*
* This servees two purposes: to enable efficient signaling
* of non-preemptive sections (user->kernel) and
* delayed preemptions (kernel->user), and to export
* some real-time relevant statistics such as preemption and
* migration data to user space. We can't use a device to export
* statistics because we want to avoid system call overhead when
* determining preemption/migration overheads).
*/
struct control_page {
/* This flag is used by userspace to communicate non-preempive
* sections. */
volatile union np_flag sched;
volatile uint64_t irq_count; /* Incremented by the kernel each time an IRQ is
* handled. */
/* Locking overhead tracing: userspace records here the time stamp
* and IRQ counter prior to starting the system call. */
uint64_t ts_syscall_start; /* Feather-Trace cycles */
uint64_t irq_syscall_start; /* Snapshot of irq_count when the syscall
* started. */
/* to be extended */
};
/* Expected offsets within the control page. */
#define LITMUS_CP_OFFSET_SCHED 0
#define LITMUS_CP_OFFSET_IRQ_COUNT 8
#define LITMUS_CP_OFFSET_TS_SC_START 16
#define LITMUS_CP_OFFSET_IRQ_SC_START 24
/* don't export internal data structures to user space (liblitmus) */
#ifdef __KERNEL__
#include <litmus/binheap.h>
#include <linux/semaphore.h>
#ifdef CONFIG_LITMUS_SOFTIRQD
#include <linux/interrupt.h>
#endif
struct _rt_domain;
struct bheap_node;
struct release_heap;
struct rt_job {
/* Time instant the the job was or will be released. */
lt_t release;
/* What is the current deadline? */
lt_t deadline;
/* How much service has this job received so far? */
lt_t exec_time;
/* By how much did the prior job miss its deadline by?
* Value differs from tardiness in that lateness may
* be negative (when job finishes before its deadline).
*/
long long lateness;
/* Which job is this. This is used to let user space
* specify which job to wait for, which is important if jobs
* overrun. If we just call sys_sleep_next_period() then we
* will unintentionally miss jobs after an overrun.
*
* Increase this sequence number when a job is released.
*/
unsigned int job_no;
/* bits:
* 0th: Set if a budget exhaustion signal has already been sent for
* the current job. */
unsigned long flags;
};
#define RT_JOB_SIG_BUDGET_SENT 0
struct pfair_param;
enum klmirqd_sem_status
{
NEED_TO_REACQUIRE,
REACQUIRING,
NOT_HELD,
HELD
};
typedef enum gpu_migration_dist
{
// TODO: Make this variable against NR_NVIDIA_GPUS
MIG_LOCAL = 0,
MIG_NEAR = 1,
MIG_MED = 2,
MIG_FAR = 3, // 8 GPUs in a binary tree hierarchy
MIG_NONE = 4,
MIG_LAST = MIG_NONE
} gpu_migration_dist_t;
typedef struct feedback_est{
fp_t est;
fp_t accum_err;
} feedback_est_t;
#define AVG_EST_WINDOW_SIZE 20
typedef int (*notify_rsrc_exit_t)(struct task_struct* tsk);
typedef struct avg_est{
lt_t history[AVG_EST_WINDOW_SIZE];
uint16_t count;
uint16_t idx;
lt_t sum;
lt_t std;
lt_t avg;
} avg_est_t;
#ifdef CONFIG_LITMUS_SOFTIRQD
struct klmirqd_info
{
struct task_struct* klmirqd;
struct task_struct* current_owner;
unsigned int terminating:1;
raw_spinlock_t lock;
u32 pending;
atomic_t num_hi_pending;
atomic_t num_low_pending;
atomic_t num_work_pending;
/* in order of priority */
struct tasklet_head pending_tasklets_hi;
struct tasklet_head pending_tasklets;
struct list_head worklist;
struct list_head klmirqd_reg;
};
#endif
/* RT task parameters for scheduling extensions
* These parameters are inherited during clone and therefore must
* be explicitly set up before the task set is launched.
*/
struct rt_param {
/* is the task sleeping? */
unsigned int flags:8;
/* do we need to check for srp blocking? */
unsigned int srp_non_recurse:1;
/* is the task present? (true if it can be scheduled) */
unsigned int present:1;
/* has the task completed? */
unsigned int completed:1;
#ifdef CONFIG_LITMUS_SOFTIRQD
/* proxy threads have minimum priority by default */
unsigned int is_interrupt_thread:1;
/* pointer to data used by klmirqd thread.
*
* ptr only valid if is_interrupt_thread == 1
*/
struct klmirqd_info* klmirqd_info;
#endif
#ifdef CONFIG_LITMUS_NVIDIA
/* number of top-half interrupts handled on behalf of current job */
atomic_t nv_int_count;
long unsigned int held_gpus; // bitmap of held GPUs.
struct binheap_node gpu_owner_node; // just one GPU for now...
unsigned int hide_from_gpu:1;
#ifdef CONFIG_LITMUS_AFFINITY_LOCKING
avg_est_t gpu_migration_est[MIG_LAST+1];
gpu_migration_dist_t gpu_migration;
int last_gpu;
notify_rsrc_exit_t rsrc_exit_cb;
void* rsrc_exit_cb_args;
lt_t accum_gpu_time;
lt_t gpu_time_stamp;
unsigned int suspend_gpu_tracker_on_block:1;
#endif
#endif
#ifdef CONFIG_LITMUS_LOCKING
/* Is the task being priority-boosted by a locking protocol? */
unsigned int priority_boosted:1;
/* If so, when did this start? */
lt_t boost_start_time;
#endif
/* user controlled parameters */
struct rt_task task_params;
/* timing parameters */
struct rt_job job_params;
/* task representing the current "inherited" task
* priority, assigned by inherit_priority and
* return priority in the scheduler plugins.
* could point to self if PI does not result in
* an increased task priority.
*/
struct task_struct* inh_task;
#ifdef CONFIG_LITMUS_NESTED_LOCKING
raw_spinlock_t hp_blocked_tasks_lock;
struct binheap hp_blocked_tasks;
/* pointer to lock upon which is currently blocked */
struct litmus_lock* blocked_lock;
#endif
#ifdef CONFIG_REALTIME_AUX_TASKS
unsigned int is_aux_task:1;
unsigned int has_aux_tasks:1;
unsigned int hide_from_aux_tasks:1;
struct list_head aux_task_node;
struct binheap_node aux_task_owner_node;
#endif
#ifdef CONFIG_NP_SECTION
/* For the FMLP under PSN-EDF, it is required to make the task
* non-preemptive from kernel space. In order not to interfere with
* user space, this counter indicates the kernel space np setting.
* kernel_np > 0 => task is non-preemptive
*/
unsigned int kernel_np;
#endif
/* This field can be used by plugins to store where the task
* is currently scheduled. It is the responsibility of the
* plugin to avoid race conditions.
*
* This used by GSN-EDF and PFAIR.
*/
volatile int scheduled_on;
/* Is the stack of the task currently in use? This is updated by
* the LITMUS core.
*
* Be careful to avoid deadlocks!
*/
volatile int stack_in_use;
/* This field can be used by plugins to store where the task
* is currently linked. It is the responsibility of the plugin
* to avoid race conditions.
*
* Used by GSN-EDF.
*/
volatile int linked_on;
/* PFAIR/PD^2 state. Allocated on demand. */
struct pfair_param* pfair;
/* Fields saved before BE->RT transition.
*/
int old_policy;
int old_prio;
/* ready queue for this task */
struct _rt_domain* domain;
/* heap element for this task
*
* Warning: Don't statically allocate this node. The heap
* implementation swaps these between tasks, thus after
* dequeuing from a heap you may end up with a different node
* then the one you had when enqueuing the task. For the same
* reason, don't obtain and store references to this node
* other than this pointer (which is updated by the heap
* implementation).
*/
struct bheap_node* heap_node;
struct release_heap* rel_heap;
/* Used by rt_domain to queue task in release list.
*/
struct list_head list;
/* Pointer to the page shared between userspace and kernel. */
struct control_page * ctrl_page;
};
#ifdef CONFIG_REALTIME_AUX_TASKS
struct aux_data
{
struct list_head aux_tasks;
struct binheap aux_task_owners;
unsigned int initialized:1;
unsigned int aux_future:1;
};
#endif
#endif /* __KERNEL */
#endif