aboutsummaryrefslogblamecommitdiffstats
path: root/litmus/gpu_affinity.c
blob: 9762be1a085e08af98fdf8f1b7a12064046b2124 (plain) (tree)
1
2
3
4
5
6
7
8
9






                                

                               

                             
                                                                              
 
                          
                      
                                               
 
                                    

                                                         

                                         

                                                 

                            




                                                                                       
 
                                                    
 






                                                                                     

                 





                                                                                       





                                                                                                                               

                                                                       

                                                                                               












                                                                                                       

         
                                                                              
                                                    
                                    
                                                   
                                            



                                                         
                                                                            
              
                 
 








                                                                             
         

                                
         
 


                                              
 
                    






      

#ifdef CONFIG_LITMUS_NVIDIA

#include <linux/sched.h>
#include <litmus/litmus.h>
#include <litmus/gpu_affinity.h>

#include <litmus/sched_trace.h>

#define OBSERVATION_CAP 2*1e9

static fp_t update_estimate(feedback_est_t* fb, fp_t a, fp_t b, lt_t observed)
{
	fp_t relative_err;
	fp_t err, new;
	fp_t actual = _integer_to_fp(observed);

	err = _sub(actual, fb->est);
	new = _add(_mul(a, err), _mul(b, fb->accum_err));

	relative_err = _div(err, actual);

	fb->est = new;
	fb->accum_err = _add(fb->accum_err, err);

	return relative_err;
}

void update_gpu_estimate(struct task_struct *t, lt_t observed)
{
	feedback_est_t *fb = &(tsk_rt(t)->gpu_migration_est[tsk_rt(t)->gpu_migration]);

	BUG_ON(tsk_rt(t)->gpu_migration > MIG_LAST);

	if(unlikely(fb->est.val == 0)) {
		// kludge-- cap observed values to prevent whacky estimations.
		// whacky stuff happens during the first few jobs.
		if(unlikely(observed > OBSERVATION_CAP)) {
			TRACE_TASK(t, "Crazy observation was capped: %llu -> %llu\n",
					   observed, OBSERVATION_CAP);
			observed = OBSERVATION_CAP;
		}

		// take the first observation as our estimate
		// (initial value of 0 was bogus anyhow)
		fb->est = _integer_to_fp(observed);
		fb->accum_err = _div(fb->est, _integer_to_fp(2));  // ...seems to work.
	}
	else {
		fp_t rel_err = update_estimate(fb,
									   tsk_rt(t)->gpu_fb_param_a[tsk_rt(t)->gpu_migration],
									   tsk_rt(t)->gpu_fb_param_b[tsk_rt(t)->gpu_migration],
									   observed);

		if(unlikely(_fp_to_integer(fb->est) <= 0)) {
			TRACE_TASK(t, "Invalid estimate. Patching.\n");
			fb->est = _integer_to_fp(observed);
			fb->accum_err = _div(fb->est, _integer_to_fp(2));  // ...seems to work.
		}
		else {
//			struct migration_info mig_info;

			sched_trace_prediction_err(t,
									   &(tsk_rt(t)->gpu_migration),
									   &rel_err);

//			mig_info.observed = observed;
//			mig_info.estimated = get_gpu_estimate(t, tsk_rt(t)->gpu_migration);
//			mig_info.distance = tsk_rt(t)->gpu_migration;
//
//			sched_trace_migration(t, &mig_info);
		}
	}

	TRACE_TASK(t, "GPU est update after (dist = %d, obs = %llu): %d.%d\n",
			   tsk_rt(t)->gpu_migration,
			   observed,
			   _fp_to_integer(fb->est),
			   _point(fb->est));
}

gpu_migration_dist_t gpu_migration_distance(int a, int b)
{
	// GPUs organized in a binary hierarchy, no more than 2^MIG_FAR GPUs
	int i;
	int dist;

	if(likely(a >= 0 && b >= 0)) {
		for(i = 0; i <= MIG_FAR; ++i) {
			if(a>>i == b>>i) {
				dist = i;
				goto out;
			}
		}
		dist = MIG_NONE; // hopefully never reached.
		TRACE_CUR("WARNING: GPU distance too far! %d -> %d\n", a, b);
	}
	else {
		dist = MIG_NONE;
	}

out:
	TRACE_CUR("Distance %d -> %d is %d\n",
			  a, b, dist);

	return dist;
}




#endif