/* * preemption and migration overhead measurement * * common data structures and defines */ #ifndef PM_COMMON_H #define PM_COMMON_H #include #include #include #include #include #include #include #include #include /* WSS, CACHESIZE, DATAPOINTS may be given as commandline define * when ricompiling this test for different WSS, CACHESIZE and (?) datapoints * ATM only WSS can be passed through scons building mechanism */ /* Definitions and variables related to experimental measurement. * What I eventually want is a test script that will cycle though * different WSS and CACHESIZE, recompiling this program at * each round (easier for memory management), but running all test * without human intervention */ /* * default working set size, in KB * non-default WSS are taken from the test script (-DWSS=...) */ #ifndef WSS #define WSS 3072 #endif /* Cache size: * Niagara: L2: 3MB * Koruna: L2: 6MB every 2 cores * Ludwig: L2: 3MB every 2 cores, L3 12MB * Pound: L2: 256KB, L3 8MB */ #define CACHESIZE (12 * 1024) /* number of measurements that can be stored per single pm_task */ #define DATAPOINTS 100000 /* The following macro don't need (hopefully) any modification */ /* Cache alignment (cache line size) * Niagara, Koruna, Ludwig, Pound cache line size: 64B */ #define CACHEALIGNMENT 64 /* ints per WSS */ #define INTS_PER_WSS (WSS*1024)/(sizeof(int)) /* reads vs. writes ratio */ #define READRATIO 75 /* random seed */ #define SEEDVAL 12345 /* number of "working sets" to cycle through */ #define NUMWS ((CACHESIZE*2)/WSS)+2 /* runtime in milliseconds -- 60s*/ #define SIMRUNTIME 60000 /* times to read warm memory to get accurate data */ /* preliminary experiments on Ludwig shows that we can safely set * this to just 2 iteration (first and second 'H' access are ~ equal) * (it was 3) */ #define REFTOTAL 2 #define NS_PER_MS 1000000 struct data_entry { unsigned long long timestamp; /* cC cold cache access * hH hot cache access * pP preeption / migration */ char access_type; unsigned long long access_time; unsigned int cpu; unsigned long job_count; unsigned long sched_count; unsigned long last_rt_task; unsigned long long preemption_length; }; /* serializable data entry */ struct saved_data_entry { char access_type; unsigned long long access_time; unsigned int cpu; unsigned long long preemption_length; }; /* long long is a looot of time and should be enough for our needs * However we keep the saved data in ull and leave to the analysis * dealing with the conversion */ struct full_ovd_plen { /* "current" cpu */ unsigned int curr_cpu; /* last "seen" cpu (curr != last --> migration) */ unsigned int last_cpu; /* overhead */ long long ovd; /* preemption length */ long long plen; }; struct ovd_plen { long long ovd; long long plen; }; /* write data_entry -> saved_data_entry on disk */ int serialize_data_entry(char *filename, struct data_entry *samples, int num); /* read saved_data_entry from disk */ int read_sdata_entry(const char *filename, struct saved_data_entry **samples); /* get valid overhead from trace file */ int get_valid_ovd(const char *filename, struct full_ovd_plen **full_costs, int wss, int tss); /* get ovd and pm length for different cores configurations (on uma xeon) */ /* Watch out for different topologies: * /sys/devices/system/cpu/cpuX/cache/indexY/shared_cpu_list */ void get_ovd_plen_umaxeon(struct full_ovd_plen *full_costs, int num_samples, unsigned int cores_per_l2, unsigned int num_phys_cpu, struct ovd_plen *preempt, int *pcount, struct ovd_plen *samel2, int *l2count, struct ovd_plen *samechip, int *chipcount, struct ovd_plen *offchip, int *offcount); /* get ovd and pm length for different cores configurations */ void get_ovd_plen(struct full_ovd_plen *full_costs, int num_samples, unsigned int cores_per_l2, unsigned int cores_per_chip, struct ovd_plen *preempt, int *pcount, struct ovd_plen *samel2, int *l2count, struct ovd_plen *samechip, int *chipcount, struct ovd_plen *offchip, int *offcount); #endif