diff options
author | Glenn Elliott <gelliott@cs.unc.edu> | 2014-05-19 21:06:08 -0400 |
---|---|---|
committer | Glenn Elliott <gelliott@cs.unc.edu> | 2014-05-19 21:06:08 -0400 |
commit | 5f2866d43d9a2e33bc2961edf9966cad5708cc4d (patch) | |
tree | 58a7660043dc264543cf2a4295566635e193a243 | |
parent | 68f124a16f1e0c75e6ce5c31a139da1c222b51d8 (diff) |
Migration: Support systems with more than 32 CPUs.
This patch improves the cpu bit map parsing used to determine
CPU/cluster mapping. Namely, it supports clusters with more
than 32 CPUs. Prior to this patch, mapping would fail due to
unexpected commas in bitfield data read from /proc (groups
of 32 CPUs are separated by commas).
NOTE: Bug could still be encountered if NR_CPUS > 32.
New CPU limit: 4096.
-rw-r--r-- | src/migration.c | 135 |
1 files changed, 109 insertions, 26 deletions
diff --git a/src/migration.c b/src/migration.c index 3bd6d0a..b01d87b 100644 --- a/src/migration.c +++ b/src/migration.c | |||
@@ -27,17 +27,26 @@ int num_online_cpus() | |||
27 | return sysconf(_SC_NPROCESSORS_ONLN); | 27 | return sysconf(_SC_NPROCESSORS_ONLN); |
28 | } | 28 | } |
29 | 29 | ||
30 | static int read_mapping(int idx, const char* which, unsigned long long int* mask) | 30 | static int read_mapping(int idx, const char* which, cpu_set_t** set, size_t *sz) |
31 | { | 31 | { |
32 | /* Max CPUs = 4096 */ | ||
33 | |||
32 | int ret = -1; | 34 | int ret = -1; |
33 | char buf[129] = {0}; | 35 | char buf[4096/4 /* enough chars for hex data (4 CPUs per char) */ |
36 | + 4096/(4*8) /* for commas (separate groups of 8 chars) */ | ||
37 | + 1] = {0}; /* for \0 */ | ||
34 | char fname[80] = {0}; | 38 | char fname[80] = {0}; |
35 | 39 | ||
36 | if (num_online_cpus() > 64) { | 40 | char* chunk_str; |
37 | /* XXX: Support more than 64 CPUs. | 41 | int len, nbits; |
38 | * User can still set appropriate values directly. */ | 42 | int i; |
43 | |||
44 | /* init vals returned to callee */ | ||
45 | *set = NULL; | ||
46 | *sz = 0; | ||
47 | |||
48 | if (num_online_cpus() > 4096) | ||
39 | goto out; | 49 | goto out; |
40 | } | ||
41 | 50 | ||
42 | snprintf(fname, sizeof(fname), "/proc/litmus/%s/%d", which, idx); | 51 | snprintf(fname, sizeof(fname), "/proc/litmus/%s/%d", which, idx); |
43 | 52 | ||
@@ -45,30 +54,115 @@ static int read_mapping(int idx, const char* which, unsigned long long int* mask | |||
45 | if (ret <= 0) | 54 | if (ret <= 0) |
46 | goto out; | 55 | goto out; |
47 | 56 | ||
48 | *mask = strtoull(buf, NULL, 16); | 57 | len = strnlen(buf, sizeof(buf)); |
58 | nbits = 32*(len/9 + 1); /* buf is a series of comma + 32-bits as hex */ | ||
59 | |||
60 | *set = CPU_ALLOC(nbits); | ||
61 | *sz = CPU_ALLOC_SIZE(nbits); | ||
62 | CPU_ZERO_S(*sz, *set); | ||
63 | |||
64 | /* process LSB chunks first (at the end of the str) and move backward */ | ||
65 | chunk_str = buf + len - 8; | ||
66 | i = 0; | ||
67 | while (chunk_str >= buf) { | ||
68 | unsigned long chunk = strtoul(chunk_str, NULL, 16); | ||
69 | while (chunk) { | ||
70 | int j = ffsl(chunk) - 1; | ||
71 | CPU_SET_S(i*32 + j, *sz, *set); | ||
72 | chunk &= ~(1ul << j); | ||
73 | } | ||
74 | |||
75 | chunk_str -= 9; | ||
76 | i += 1; | ||
77 | } | ||
78 | |||
49 | ret = 0; | 79 | ret = 0; |
50 | 80 | ||
51 | out: | 81 | out: |
52 | return ret; | 82 | return ret; |
53 | } | 83 | } |
54 | 84 | ||
85 | static unsigned long long int cpusettoull(cpu_set_t* bits, size_t sz) | ||
86 | { | ||
87 | unsigned long long mask = 0; | ||
88 | int i; | ||
89 | |||
90 | for (i = 0; i < sizeof(mask)*8; ++i) { | ||
91 | if (CPU_ISSET_S(i, sz, bits)) { | ||
92 | mask |= (1ull) << i; | ||
93 | } | ||
94 | } | ||
95 | |||
96 | return mask; | ||
97 | } | ||
98 | |||
55 | int domain_to_cpus(int domain, unsigned long long int* mask) | 99 | int domain_to_cpus(int domain, unsigned long long int* mask) |
56 | { | 100 | { |
57 | return read_mapping(domain, "domains", mask); | 101 | /* TODO: Support more than 64 CPUs. Instead of using 'ull' for 'mask', |
102 | consider using gcc's __uint128_t or some struct. */ | ||
103 | |||
104 | cpu_set_t *bits; | ||
105 | size_t sz; | ||
106 | int ret; | ||
107 | |||
108 | /* number of CPUs exceeds what we can pack in ull */ | ||
109 | if (num_online_cpus() > sizeof(unsigned long long int)*8) | ||
110 | return -1; | ||
111 | |||
112 | ret = read_mapping(domain, "domains", &bits, &sz); | ||
113 | if (!ret) { | ||
114 | *mask = cpusettoull(bits, sz); | ||
115 | CPU_FREE(bits); | ||
116 | } | ||
117 | |||
118 | return ret; | ||
58 | } | 119 | } |
59 | 120 | ||
60 | int cpu_to_domains(int cpu, unsigned long long int* mask) | 121 | int cpu_to_domains(int cpu, unsigned long long int* mask) |
61 | { | 122 | { |
62 | return read_mapping(cpu, "cpus", mask); | 123 | /* TODO: Support more than 64 domains. Instead of using 'ull' for 'mask', |
124 | consider using gcc's __uint128_t or some struct. */ | ||
125 | |||
126 | cpu_set_t *bits; | ||
127 | size_t sz; | ||
128 | int ret; | ||
129 | |||
130 | /* number of CPUs exceeds what we can pack in ull */ | ||
131 | if (num_online_cpus() > sizeof(unsigned long long int)*8) | ||
132 | return -1; | ||
133 | |||
134 | ret = read_mapping(cpu, "cpus", &bits, &sz); | ||
135 | if (!ret) { | ||
136 | *mask = cpusettoull(bits, sz); | ||
137 | CPU_FREE(bits); | ||
138 | } | ||
139 | |||
140 | return ret; | ||
63 | } | 141 | } |
64 | 142 | ||
65 | int domain_to_first_cpu(int domain) | 143 | int domain_to_first_cpu(int domain) |
66 | { | 144 | { |
67 | unsigned long long int mask; | 145 | cpu_set_t *bits; |
68 | int ret = domain_to_cpus(domain, &mask); | 146 | size_t sz; |
69 | if(ret == 0) | 147 | int i, n_online; |
70 | return (ffsll(mask)-1); | 148 | int first; |
71 | return ret; | 149 | |
150 | int ret = read_mapping(domain, "domains", &bits, &sz); | ||
151 | |||
152 | if (ret) | ||
153 | return ret; | ||
154 | |||
155 | n_online = num_online_cpus(); | ||
156 | first = -1; /* assume failure */ | ||
157 | for (i = 0; i < n_online; ++i) { | ||
158 | if(CPU_ISSET_S(i, sz, bits)) { | ||
159 | first = i; | ||
160 | break; | ||
161 | } | ||
162 | } | ||
163 | CPU_FREE(bits); | ||
164 | |||
165 | return first; | ||
72 | } | 166 | } |
73 | 167 | ||
74 | int be_migrate_thread_to_cpu(pid_t tid, int target_cpu) | 168 | int be_migrate_thread_to_cpu(pid_t tid, int target_cpu) |
@@ -111,9 +205,8 @@ int be_migrate_thread_to_domain(pid_t tid, int domain) | |||
111 | int ret, num_cpus; | 205 | int ret, num_cpus; |
112 | cpu_set_t *cpu_set; | 206 | cpu_set_t *cpu_set; |
113 | size_t sz; | 207 | size_t sz; |
114 | unsigned long long int mask; | ||
115 | 208 | ||
116 | ret = domain_to_cpus(domain, &mask); | 209 | ret = read_mapping(domain, "domains", &cpu_set, &sz); |
117 | if (ret != 0) | 210 | if (ret != 0) |
118 | return ret; | 211 | return ret; |
119 | 212 | ||
@@ -121,16 +214,6 @@ int be_migrate_thread_to_domain(pid_t tid, int domain) | |||
121 | if (num_cpus == -1) | 214 | if (num_cpus == -1) |
122 | return -1; | 215 | return -1; |
123 | 216 | ||
124 | cpu_set = CPU_ALLOC(num_cpus); | ||
125 | sz = CPU_ALLOC_SIZE(num_cpus); | ||
126 | CPU_ZERO_S(sz, cpu_set); | ||
127 | |||
128 | while(mask) { | ||
129 | int idx = ffsll(mask) - 1; | ||
130 | CPU_SET_S(idx, sz, cpu_set); | ||
131 | mask &= ~(1ull<<idx); | ||
132 | } | ||
133 | |||
134 | /* apply to caller */ | 217 | /* apply to caller */ |
135 | if (tid == 0) | 218 | if (tid == 0) |
136 | tid = gettid(); | 219 | tid = gettid(); |