#include #include #include #include #include #define DEBUG_DGL #ifdef DEBUG_DGL #define TRACE(fmt, args...) STRACE(fmt, ## args) #define STRACE2(fmt, args...) STRACE(fmt, ## args) #define TRACE_GREQ(greq, fmt, args...) \ TRACE("(greq-%s/%llu) " fmt, (greq->task ? greq->task->comm : "greq"), \ (greq->task ? greq->task->pid : (unsigned long long)greq), ## args) #else #define STRACE2(fmt, args...) printk(KERN_ERR fmt, ## args) #define TRACE(fmt, args...) #define TRACE_GREQ(greq, fmt, args...) #endif #define MASK_SIZE (sizeof(unsigned long) * 8) /* Return number of MASK_SIZE fields needed to store a mask in d */ #define WP(num, word) (num / word + (num % word != 0)) #define MASK_WORDS(d) WP(d->num_resources, MASK_SIZE) /* Word, bit -> resource id */ #define ri(w, b) (w * MASK_SIZE + b) /* For loop, where @i iterates over each set bit in @bit_arr */ #define for_each_resource(bit_arr, d, w, b, i) \ for(w = 0; w < MASK_WORDS(d); ++w) \ for(b = find_first_bit(&bit_arr[w],MASK_SIZE), i = ri(w, b); \ b < MASK_SIZE; \ b = find_next_bit(&bit_arr[w],MASK_SIZE,b+1), i = ri(w, b)) /* Return resource id in dgl @d for resource @r */ #define resource_id(d, r) ((((void*)r) - (void*)((d)->resources))/ sizeof(*r)) /* Return request group of req @r for resource @i */ #define req_group(r, i) (container_of(((void*)r) - sizeof(*r)*(i), \ struct dgl_group_req, requests)) static inline void set_bit_value(int bit, unsigned long *word, int value) { if (value) { __set_bit(bit, word); } else { clear_bit(bit, word); } } /* * Resource id -> word, bit */ static inline void mask_idx(int resource, int *word, int *bit) { *word = resource / MASK_SIZE; *bit = resource % MASK_SIZE; } /* * Logical OR of words in @arr. */ static int arr_to_bool(struct dgl *dgl, unsigned long *arr) { int word, ret = 0; for (word = 0; word < MASK_WORDS(dgl) && !ret; word++) { ret |= arr[word]; } return (ret != 0); } static void print_queue(struct dgl *dgl, struct list_head *list) { struct dgl_req *pos; struct dgl_group_req *greq; list_for_each_entry(pos, list, list) { greq = pos->greq; sched_trace_log_message("(%s-%d:r%d-p%x-b%x)->", greq->task->comm, greq->task->pid, pos->replicas, greq->need_prio[0], greq->blocked[0]); } sched_trace_log_message("\n"); } static void print_resource(struct dgl *dgl, struct dgl_resource *resource) { STRACE2("\tResource %d, real_free: %d, goal_free: %d\n", resource_id(dgl, resource), resource->real_free, resource->goal_free); STRACE2("\t acquired: "); print_queue(dgl, &resource->acquired); STRACE2("\t will_acquire:"); print_queue(dgl, &resource->will_acquire); STRACE2("\t waiting:"); print_queue(dgl, &resource->waiting); STRACE2("\t will_wait:"); print_queue(dgl, &resource->will_wait); } /* * Print stats and queues of every resource to the trace log. */ static void print_state(struct dgl *dgl) { int i; struct dgl_resource *resource; sched_trace_log_message("\n"); STRACE2("\t\tDGL: requests: %d\n", dgl->requests); for (i = 0; i < dgl->num_resources; ++i) { resource = &dgl->resources[i]; if (!resource) { STRACE2("\tResource %d is null!\n", i); } if (!list_empty(&resource->waiting) || !list_empty(&resource->will_wait) || !list_empty(&resource->acquired) || !list_empty(&resource->will_acquire)) { print_resource(dgl, resource); } } STRACE2("Dump complete\n"); sched_trace_log_message("\n"); } #ifdef DEBUG_DGL #define BUG_DUMP(dgl, cond) \ do { \ if (cond) { \ STRACE2("BAD: %s", #cond); \ print_state(dgl); \ BUG(); \ }} while(0) #else #define BUG_DUMP(dgl, cond) #endif static int higher_prio(struct dgl_group_req *a, struct dgl_group_req *b) { return (a->priority < b->priority || (a->priority == b->priority && (a->task->pid < b->task->pid))); } static void release_cpu(struct dgl *dgl, struct dgl_group_req *greq) { int cpu = greq->cpu; BUG_DUMP(dgl, dgl->acquired[cpu] != greq); dgl->acquired[cpu] = NULL; } static void take_cpu(struct dgl *dgl, struct dgl_group_req *greq) { int cpu = greq->cpu; BUG_DUMP(dgl, dgl->acquired[cpu]); dgl->acquired[cpu] = greq; } /* * Count lower priority replicas of @greq for @list in @resource. * @start If specified, begin search at location at *@start */ static int __get_lp_replicas(struct dgl *dgl, struct dgl_group_req *greq, struct dgl_resource *resource, struct dgl_req **start, struct list_head *list) { int replicas = 0; struct list_head *pos_l; struct dgl_req *pos; pos_l = (*start) ? &(*start)->list : list; pos = list_entry(pos_l, struct dgl_req, list); list_for_each_entry(pos, list, list) { if (higher_prio(pos->greq, greq)) { break; } replicas += pos->replicas; BUG_DUMP(dgl, replicas > dgl->num_replicas); } if (start) { *start = pos; } return replicas; } /* * Count lower priority replicas of @greq in the acquired and will_acquire * queues of @resource. * @start_a Request to start counting replicas in acquired list * @start_wa "" "" for will_acquire */ static int get_lp_replicas(struct dgl *dgl, struct dgl_group_req *greq, struct dgl_resource *resource, struct dgl_req **start_a, struct dgl_req **start_wa) { int reps = 0; reps += __get_lp_replicas(dgl, greq, resource, start_a, &resource->acquired); reps += __get_lp_replicas(dgl, greq, resource, start_wa, &resource->will_acquire); return reps; } /* * Add @req to @list in priority order. * @reverse Reverse priority */ static void add_request(struct list_head *list, struct dgl_req *req, int reverse) { struct list_head *last; struct dgl_req *acquired; struct dgl_group_req *greqa, *greqb; BUG_ON(in_list(&req->list)); last = list; list_for_each_entry(acquired, list, list) { if (!reverse) { greqa = req->greq; greqb = acquired->greq; } else { greqa = acquired->greq; greqb = req->greq; } if (higher_prio(greqa, greqb)) { break; } last = &acquired->list; } list_add(&req->list, last); } /* * Add @req to a waiting or will_wait list. */ static void add_waiting(struct dgl *dgl, struct list_head *list, struct dgl_req *req) { BUG_DUMP(dgl, !arr_to_bool(dgl, req->greq->need_prio)); add_request(list, req, 0); } /* * Add @req to an acquired or will_acquire list. */ static void add_acquired(struct dgl *dgl, struct list_head *list, struct dgl_req *req) { BUG_DUMP(dgl, arr_to_bool(dgl, req->greq->need_prio)); add_request(list, req, 1); } /* * Cancel preemption of @greq's requests. */ static void will_wait_to_acquire(struct dgl *dgl, struct dgl_group_req *greq) { int w, b, i; struct dgl_req *our_req; struct dgl_resource *resource; BUG_DUMP(dgl, arr_to_bool(dgl, greq->need_prio)); BUG_DUMP(dgl, arr_to_bool(dgl, greq->blocked)); BUG_DUMP(dgl, dgl->acquired[greq->cpu] != greq); TRACE_GREQ(greq, "(WILL_WAIT)->(ACQUIRE)\n"); for_each_resource(greq->requested, dgl, w, b, i) { our_req = &greq->requests[i]; resource = &dgl->resources[i]; resource->goal_free -= our_req->replicas; list_del_init(&our_req->list); add_acquired(dgl, &resource->acquired, our_req); BUG_DUMP(dgl, resource->goal_free < 0); } } /* * Preempt @greq's requests. */ static void acquired_to_will_wait(struct dgl *dgl, struct dgl_group_req *greq, struct dgl_group_req *blocked) { int w, b, i; struct dgl_req *our_req; struct dgl_resource *resource; BUG_DUMP(dgl, !arr_to_bool(dgl, greq->need_prio)); BUG_DUMP(dgl, dgl->acquired[greq->cpu] != greq); TRACE_GREQ(greq, "(ACQUIRED)->(WILL_WAIT)\n"); for_each_resource(greq->requested, dgl, w, b, i) { our_req = &greq->requests[i]; resource = &dgl->resources[i]; resource->goal_free += our_req->replicas; list_del_init(&our_req->list); add_waiting(dgl, &resource->will_wait, our_req); BUG_DUMP(dgl, resource->goal_free > dgl->num_replicas); } } /* * Cancel @greq's resource acquisition. */ static void will_acquire_to_waiting(struct dgl *dgl, struct dgl_group_req *greq) { int w, b, i; struct dgl_req *our_req; struct dgl_resource *resource; BUG_DUMP(dgl, !arr_to_bool(dgl, greq->need_prio)); BUG_DUMP(dgl, dgl->acquired[greq->cpu] == greq); TRACE_GREQ(greq, "(WILL_ACQUIRE)->(WAITING)\n"); for_each_resource(greq->requested, dgl, w, b, i) { our_req = &greq->requests[i]; resource = &dgl->resources[i]; resource->goal_free += our_req->replicas; list_del_init(&our_req->list); add_waiting(dgl, &resource->waiting, our_req); BUG_DUMP(dgl, resource->goal_free < 0); } } #define next_list(list, reverse) ((reverse) ? (list)->prev : (list)->next) /* * @return Next request from list_a U list_b * @reverse_prio Return lower-priority requests * @revearse_read Read lists from end to beginning * @tmp_a and @tmp_b store the next item in the list so that the returned * item can be safely deleted during iteration */ static struct dgl_req *next_union(struct list_head *list_a, struct list_head *list_b, struct list_head **tmp_a, struct list_head **tmp_b, int reverse_prio, int reverse_read) { int higher; struct dgl_req *next_a, *next_b; next_a = next_b = NULL; *tmp_a = (*tmp_a) ? *tmp_a : next_list(list_a, reverse_read); *tmp_b = (*tmp_b) ? *tmp_b : next_list(list_b, reverse_read); if (*tmp_a != list_a) { next_a = list_entry(*tmp_a, struct dgl_req, list); } if (*tmp_b != list_b) { next_b = list_entry(*tmp_b, struct dgl_req, list); } if (next_a && next_b) { higher = higher_prio(next_a->greq, next_b->greq); if ((reverse_prio && higher)|| (!reverse_prio && !higher)) { *tmp_b = next_list(&next_b->list, reverse_read); return next_b; } else { *tmp_a = next_list(&next_a->list, reverse_read); return next_a; } } else if (next_a) { *tmp_a = next_list(&next_a->list, reverse_read); return next_a; } else if (next_b) { *tmp_b = next_list(&next_b->list, reverse_read); return next_b; } else { return NULL; } } /* * @return Next logically waiting request in @resource */ static struct dgl_req *next_waiting(struct dgl_resource *resource, struct list_head **pos_a, struct list_head **pos_b, int reverse) { int reverse_prio = (reverse) ? 1 : 0; return next_union(&resource->waiting, &resource->will_wait, pos_a, pos_b, reverse_prio, reverse); } /* * @return Next logically acquired request in @resource */ static struct dgl_req *next_acquired(struct dgl_resource *resource, struct list_head **pos_a, struct list_head **pos_b) { return next_union(&resource->acquired, &resource->will_acquire, pos_a, pos_b, 1, 0); } /* * Recalculate all need_prio bits for requests in @resource. */ static void update_prio(struct dgl *dgl, struct dgl_resource *resource) { int lp_replicas, w, b, i; struct dgl_req *req, *acq_a, *acq_b; struct dgl_group_req *greq; struct list_head *tmp_a, *tmp_b; lp_replicas = resource->goal_free; tmp_a = tmp_b = NULL; acq_a = acq_b = NULL; i = resource_id(dgl, resource); mask_idx(i, &w, &b); while ((req = next_waiting(resource, &tmp_a, &tmp_b, 1))) { greq = req->greq; lp_replicas += get_lp_replicas(dgl, greq, resource, &acq_a, &acq_b); set_bit_value(b, &greq->need_prio[w], lp_replicas < req->replicas); BUG_DUMP(dgl, lp_replicas > dgl->num_replicas); } } /* * Recalculate all blocked bits for requests in @resource. */ static void update_blocked(struct dgl *dgl, struct dgl_resource *resource) { int w, b, i; struct dgl_req *req; struct dgl_group_req *greq; i = resource_id(dgl, resource); mask_idx(i, &w, &b); list_for_each_entry(req, &resource->will_acquire, list) { greq = req->greq; set_bit_value(b, &greq->blocked[w], resource->real_free < req->replicas); } } /* * Move @greq from logically running to physically running. */ static void will_acquire_to_acquire(struct dgl *dgl, struct dgl_group_req *greq) { int w, b, i; struct dgl_req *our_req; struct dgl_resource *resource; BUG_DUMP(dgl, arr_to_bool(dgl, greq->need_prio)); BUG_DUMP(dgl, arr_to_bool(dgl, greq->blocked)); BUG_DUMP(dgl, dgl->acquired[greq->cpu] != greq); TRACE_GREQ(greq, "(WILL_ACQUIRE)->(ACQUIRE)\n"); for_each_resource(greq->requested, dgl, w, b, i) { our_req = &greq->requests[i]; resource = &dgl->resources[i]; list_del_init(&our_req->list); add_acquired(dgl, &resource->acquired, our_req); resource->real_free -= our_req->replicas; BUG_DUMP(dgl, (resource->real_free < 0)); } } /* * If any requests in @resource are unblocked, schedule them. */ static void pull_blocked(struct dgl *dgl, struct dgl_resource *resource) { struct dgl_req *req, *tmp; struct dgl_group_req *greq; list_for_each_entry_safe(req, tmp, &resource->will_acquire, list) { greq = req->greq; if (!arr_to_bool(dgl, greq->blocked)) { take_cpu(dgl, greq); will_acquire_to_acquire(dgl, greq); dgl->cpu_acquired(greq); update_blocked(dgl, resource); } } } /* * Free up enough goal_free in @resource to run @our_req. */ static void bump_resource(struct dgl *dgl, struct dgl_resource *resource, struct dgl_group_req *greq, struct dgl_req *our_req) { int w; struct list_head *tmp_a, *tmp_b; struct dgl_req *next; struct dgl_group_req *next_greq; tmp_a = tmp_b = NULL; while ((next = next_acquired(resource, &tmp_b, &tmp_a))) { if (resource->goal_free >= our_req->replicas) { break; } next_greq = next->greq; BUG_DUMP(dgl, higher_prio(next_greq, greq)); for (w = 0; w < MASK_WORDS(dgl); w++) { next_greq->need_prio[w] |= next_greq->requested[w] & greq->requested[w]; } if (dgl->acquired[next_greq->cpu] == next_greq) { BUG_DUMP(dgl, dgl->acquired[next_greq->cpu] != next_greq); acquired_to_will_wait(dgl, next_greq, greq); } else { will_acquire_to_waiting(dgl, next_greq); } dgl->cpu_preempted(next_greq); } } /* * Prepare high-priority request for acquisition. */ static void waiting_to_will_acquire(struct dgl *dgl, struct dgl_group_req *greq) { int w, b, i; struct dgl_req *our_req; struct dgl_resource *resource; BUG_DUMP(dgl, arr_to_bool(dgl, greq->need_prio)); BUG_DUMP(dgl, dgl->acquired[greq->cpu] == greq); TRACE_GREQ(greq, "(WAITING)->(WILL_ACQUIRE)\n"); for_each_resource(greq->requested, dgl, w, b, i) { our_req = &greq->requests[i]; resource = &dgl->resources[i]; list_del_init(&our_req->list); bump_resource(dgl, resource, greq, our_req); resource->goal_free -= our_req->replicas; BUG_DUMP(dgl, resource->goal_free < 0); add_acquired(dgl, &resource->will_acquire, our_req); set_bit_value(b, &greq->blocked[w], resource->real_free < our_req->replicas); update_prio(dgl, resource); } } /* * Logically waiting but physically running request moves to physically waiting. */ static void will_wait_to_waiting(struct dgl *dgl, struct dgl_group_req *greq) { int w, b, i; struct dgl_req *our_req; struct dgl_resource *resource; BUG_DUMP(dgl, !arr_to_bool(dgl, greq->need_prio)); BUG_DUMP(dgl, dgl->acquired[greq->cpu] == greq); TRACE_GREQ(greq, "(WILL_WAIT)->(WAITING)\n"); for_each_resource(greq->requested, dgl, w, b, i) { our_req = &greq->requests[i]; resource = &dgl->resources[i]; list_del_init(&our_req->list); add_waiting(dgl, &resource->waiting, our_req); resource->real_free += our_req->replicas; update_blocked(dgl, resource); BUG_DUMP(dgl, resource->real_free > dgl->num_replicas); } for_each_resource(greq->requested, dgl, w, b, i) { pull_blocked(dgl, &dgl->resources[i]); } } /* * If any requests in @resource have priority to logically acquire, start * resource acquisitions. */ static void pull_prio(struct dgl *dgl, struct dgl_resource *resource) { struct dgl_req *next; struct dgl_group_req *greq; struct list_head *tmp_a, *tmp_b; tmp_a = tmp_b = NULL; while ((next = next_waiting(resource, &tmp_a, &tmp_b, 0))) { greq = next->greq; if (!arr_to_bool(dgl, greq->need_prio)) { if (dgl->acquired[greq->cpu] == greq) { will_wait_to_acquire(dgl, greq); } else { waiting_to_will_acquire(dgl, greq); } update_prio(dgl, resource); } } } /** * add_group_req - put in @greq for resources on @cpu. */ void add_group_req(struct dgl *dgl, struct dgl_group_req *greq, int cpu) { int w, b, i, lp_reps; struct dgl_req *our_req; struct dgl_resource *resource; greq->cpu = cpu; greq->priority = dgl->assign_priority(dgl, greq); ++dgl->requests; TRACE_GREQ(greq, "Group request added for CPU %d\n", cpu); BUG_DUMP(dgl, dgl->acquired[cpu] == greq); for (w = 0; w < MASK_WORDS(dgl); w++) { greq->need_prio[w] = greq->requested[w]; greq->blocked[w] = greq->requested[w]; } for_each_resource(greq->requested, dgl, w, b, i) { our_req = &greq->requests[i]; resource = &dgl->resources[i]; add_waiting(dgl, &resource->waiting, our_req); lp_reps = get_lp_replicas(dgl, greq, resource, NULL, NULL) + resource->goal_free; set_bit_value(b, &greq->need_prio[w], lp_reps < our_req->replicas); } /* Take resources if it has priority */ if (!arr_to_bool(dgl, greq->need_prio)) { for_each_resource(greq->requested, dgl, w, b, i) { our_req = &greq->requests[i]; resource = &dgl->resources[i]; pull_prio(dgl, resource); set_bit_value(b, &greq->blocked[w], resource->real_free < our_req->replicas); } /* Take CPUs if its not blocked */ if (!arr_to_bool(dgl, greq->blocked)) { for_each_resource(greq->requested, dgl, w, b, i) { pull_blocked(dgl, &dgl->resources[i]); } } } print_state(dgl); } /** * update_group_req - update state of @greq in @dgl. */ void update_group_req(struct dgl *dgl, struct dgl_group_req *greq) { if (arr_to_bool(dgl, greq->need_prio) && !arr_to_bool(dgl, greq->blocked) && dgl->acquired[greq->cpu] == greq) { TRACE_GREQ(greq, "Blocker has called update\n"); release_cpu(dgl, greq); will_wait_to_waiting(dgl, greq); print_state(dgl); } else { TRACE_GREQ(greq, "Unused update\n"); } BUG_DUMP(dgl, arr_to_bool(dgl, greq->blocked) && dgl->acquired[greq->cpu] == greq); } /** * remove_group_req - remove @greq from contention in @dgl. */ void remove_group_req(struct dgl *dgl, struct dgl_group_req *greq) { int w, b, i, running, has_prio; struct dgl_req *our_req; struct dgl_resource *resource; TRACE_GREQ(greq, "Removing group request\n"); running = dgl->acquired[greq->cpu] == greq; has_prio = !arr_to_bool(dgl, greq->need_prio); if (dgl->acquired[greq->cpu] == greq) { release_cpu(dgl, greq); } for_each_resource(greq->requested, dgl, w, b, i) { our_req = &greq->requests[i]; list_del_init(&our_req->list); resource = &dgl->resources[i]; if (running) { resource->real_free += our_req->replicas; BUG_DUMP(dgl, resource->real_free > dgl->num_replicas); } if (has_prio) { resource->goal_free += our_req->replicas; BUG_DUMP(dgl, resource->goal_free > dgl->num_replicas); } } --dgl->requests; /* TODO: these updates should be able to move up */ if (has_prio) { for_each_resource(greq->requested, dgl, w, b, i) { update_prio(dgl, &dgl->resources[i]); } for_each_resource(greq->requested, dgl, w, b, i) { pull_prio(dgl, &dgl->resources[i]); } } if (running || has_prio) { for_each_resource(greq->requested, dgl, w, b, i) { update_blocked(dgl, &dgl->resources[i]); } for_each_resource(greq->requested, dgl, w, b, i) { pull_blocked(dgl, &dgl->resources[i]); } } print_state(dgl); } /** * set_req - create request for @replicas of @resource. */ void set_req(struct dgl *dgl, struct dgl_group_req *greq, int resource, int replicas) { int word, bit; struct dgl_req *req; if (replicas > dgl->num_replicas) replicas = dgl->num_replicas; mask_idx(resource, &word, &bit); __set_bit(bit, &greq->requested[word]); TRACE_GREQ(greq, "requesting %d of %d\n", replicas, resource); req = &greq->requests[resource]; req->greq = greq; req->replicas = replicas; INIT_LIST_HEAD(&req->list); BUG_DUMP(dgl, in_list(&req->list)); } static void dummy_acquired(struct dgl_group_req *greq){} static unsigned long long timestamp_prio (struct dgl *dgl, struct dgl_group_req *greq) { return dgl->ts++; } void dgl_init(struct dgl *dgl, unsigned long num_resources, unsigned long num_replicas) { int i; struct dgl_resource *resource; dgl->num_replicas = num_replicas; dgl->num_resources = num_resources; dgl->resources = kmalloc(sizeof(*dgl->resources) * num_resources, GFP_ATOMIC); dgl->acquired = kmalloc(sizeof(*dgl->acquired) * num_online_cpus(), GFP_ATOMIC); for (i = 0; i < num_online_cpus(); ++i) dgl->acquired[i] = NULL; for (i = 0; i < num_resources; i++) { resource = &dgl->resources[i]; INIT_LIST_HEAD(&resource->waiting); INIT_LIST_HEAD(&resource->acquired); INIT_LIST_HEAD(&resource->will_wait); INIT_LIST_HEAD(&resource->will_acquire); resource->real_free = dgl->num_replicas; resource->goal_free = dgl->num_replicas; } dgl->requests = 0; dgl->ts = 0; dgl->cpu_acquired = dummy_acquired; dgl->cpu_preempted = NULL; dgl->assign_priority = timestamp_prio; } void dgl_free(struct dgl *dgl) { kfree(dgl->resources); kfree(dgl->acquired); } void dgl_group_req_init(struct dgl *dgl, struct dgl_group_req *greq) { int i; greq->requested = kmalloc(sizeof(*greq->requested) * MASK_WORDS(dgl), GFP_ATOMIC); greq->need_prio = kmalloc(sizeof(*greq->need_prio) * MASK_WORDS(dgl), GFP_ATOMIC); greq->blocked = kmalloc(sizeof(*greq->need_prio) * MASK_WORDS(dgl), GFP_ATOMIC); greq->requests = kmalloc(sizeof(*greq->requests) * dgl->num_resources, GFP_ATOMIC); BUG_DUMP(dgl, !greq->requested); BUG_DUMP(dgl, !greq->need_prio); BUG_DUMP(dgl, !greq->requests); greq->cpu = NO_CPU; for (i = 0; i < MASK_WORDS(dgl); ++i) { greq->requested[i] = 0; greq->need_prio[i] = 0; } } void dgl_group_req_free(struct dgl_group_req *greq) { kfree(greq->requested); kfree(greq->need_prio); kfree(greq->requests); }