diff options
| author | Suzuki K Poulose <suzuki.poulose@arm.com> | 2018-09-20 15:17:57 -0400 |
|---|---|---|
| committer | Greg Kroah-Hartman <gregkh@linuxfoundation.org> | 2018-09-25 14:09:18 -0400 |
| commit | 22f429f19c4135d51e9dcaf360c0920e32aac7f9 (patch) | |
| tree | 4bb516c8eac07d97de19fd1825f2c2c16689ba2c /drivers/hwtracing/coresight | |
| parent | 3d6e8935758392179645e1b105789b3da329ad38 (diff) | |
coresight: etm-perf: Add support for ETR backend
Add support for using TMC-ETR as backend for ETM perf tracing.
We use software double buffering at the moment. i.e, the TMC-ETR
uses a separate buffer than the perf ring buffer. The data is
copied to the perf ring buffer once a session completes.
The TMC-ETR would try to match the larger of perf ring buffer
or the ETR buffer size configured via sysfs, scaling down to
a minimum limit of 1MB.
Cc: Mathieu Poirier <mathieu.poirier@linaro.org>
Signed-off-by: Suzuki K Poulose <suzuki.poulose@arm.com>
Signed-off-by: Mathieu Poirier <mathieu.poirier@linaro.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Diffstat (limited to 'drivers/hwtracing/coresight')
| -rw-r--r-- | drivers/hwtracing/coresight/coresight-tmc-etr.c | 248 | ||||
| -rw-r--r-- | drivers/hwtracing/coresight/coresight-tmc.h | 2 |
2 files changed, 248 insertions, 2 deletions
diff --git a/drivers/hwtracing/coresight/coresight-tmc-etr.c b/drivers/hwtracing/coresight/coresight-tmc-etr.c index 1aedfc3629c0..56fea4ff947e 100644 --- a/drivers/hwtracing/coresight/coresight-tmc-etr.c +++ b/drivers/hwtracing/coresight/coresight-tmc-etr.c | |||
| @@ -10,6 +10,7 @@ | |||
| 10 | #include <linux/slab.h> | 10 | #include <linux/slab.h> |
| 11 | #include <linux/vmalloc.h> | 11 | #include <linux/vmalloc.h> |
| 12 | #include "coresight-catu.h" | 12 | #include "coresight-catu.h" |
| 13 | #include "coresight-etm-perf.h" | ||
| 13 | #include "coresight-priv.h" | 14 | #include "coresight-priv.h" |
| 14 | #include "coresight-tmc.h" | 15 | #include "coresight-tmc.h" |
| 15 | 16 | ||
| @@ -21,6 +22,28 @@ struct etr_flat_buf { | |||
| 21 | }; | 22 | }; |
| 22 | 23 | ||
| 23 | /* | 24 | /* |
| 25 | * etr_perf_buffer - Perf buffer used for ETR | ||
| 26 | * @etr_buf - Actual buffer used by the ETR | ||
| 27 | * @snaphost - Perf session mode | ||
| 28 | * @head - handle->head at the beginning of the session. | ||
| 29 | * @nr_pages - Number of pages in the ring buffer. | ||
| 30 | * @pages - Array of Pages in the ring buffer. | ||
| 31 | */ | ||
| 32 | struct etr_perf_buffer { | ||
| 33 | struct etr_buf *etr_buf; | ||
| 34 | bool snapshot; | ||
| 35 | unsigned long head; | ||
| 36 | int nr_pages; | ||
| 37 | void **pages; | ||
| 38 | }; | ||
| 39 | |||
| 40 | /* Convert the perf index to an offset within the ETR buffer */ | ||
| 41 | #define PERF_IDX2OFF(idx, buf) ((idx) % ((buf)->nr_pages << PAGE_SHIFT)) | ||
| 42 | |||
| 43 | /* Lower limit for ETR hardware buffer */ | ||
| 44 | #define TMC_ETR_PERF_MIN_BUF_SIZE SZ_1M | ||
| 45 | |||
| 46 | /* | ||
| 24 | * The TMC ETR SG has a page size of 4K. The SG table contains pointers | 47 | * The TMC ETR SG has a page size of 4K. The SG table contains pointers |
| 25 | * to 4KB buffers. However, the OS may use a PAGE_SIZE different from | 48 | * to 4KB buffers. However, the OS may use a PAGE_SIZE different from |
| 26 | * 4K (i.e, 16KB or 64KB). This implies that a single OS page could | 49 | * 4K (i.e, 16KB or 64KB). This implies that a single OS page could |
| @@ -1103,10 +1126,228 @@ out: | |||
| 1103 | return ret; | 1126 | return ret; |
| 1104 | } | 1127 | } |
| 1105 | 1128 | ||
| 1129 | /* | ||
| 1130 | * tmc_etr_setup_perf_buf: Allocate ETR buffer for use by perf. | ||
| 1131 | * The size of the hardware buffer is dependent on the size configured | ||
| 1132 | * via sysfs and the perf ring buffer size. We prefer to allocate the | ||
| 1133 | * largest possible size, scaling down the size by half until it | ||
| 1134 | * reaches a minimum limit (1M), beyond which we give up. | ||
| 1135 | */ | ||
| 1136 | static struct etr_perf_buffer * | ||
| 1137 | tmc_etr_setup_perf_buf(struct tmc_drvdata *drvdata, int node, int nr_pages, | ||
| 1138 | void **pages, bool snapshot) | ||
| 1139 | { | ||
| 1140 | struct etr_buf *etr_buf; | ||
| 1141 | struct etr_perf_buffer *etr_perf; | ||
| 1142 | unsigned long size; | ||
| 1143 | |||
| 1144 | etr_perf = kzalloc_node(sizeof(*etr_perf), GFP_KERNEL, node); | ||
| 1145 | if (!etr_perf) | ||
| 1146 | return ERR_PTR(-ENOMEM); | ||
| 1147 | |||
| 1148 | /* | ||
| 1149 | * Try to match the perf ring buffer size if it is larger | ||
| 1150 | * than the size requested via sysfs. | ||
| 1151 | */ | ||
| 1152 | if ((nr_pages << PAGE_SHIFT) > drvdata->size) { | ||
| 1153 | etr_buf = tmc_alloc_etr_buf(drvdata, (nr_pages << PAGE_SHIFT), | ||
| 1154 | 0, node, NULL); | ||
| 1155 | if (!IS_ERR(etr_buf)) | ||
| 1156 | goto done; | ||
| 1157 | } | ||
| 1158 | |||
| 1159 | /* | ||
| 1160 | * Else switch to configured size for this ETR | ||
| 1161 | * and scale down until we hit the minimum limit. | ||
| 1162 | */ | ||
| 1163 | size = drvdata->size; | ||
| 1164 | do { | ||
| 1165 | etr_buf = tmc_alloc_etr_buf(drvdata, size, 0, node, NULL); | ||
| 1166 | if (!IS_ERR(etr_buf)) | ||
| 1167 | goto done; | ||
| 1168 | size /= 2; | ||
| 1169 | } while (size >= TMC_ETR_PERF_MIN_BUF_SIZE); | ||
| 1170 | |||
| 1171 | kfree(etr_perf); | ||
| 1172 | return ERR_PTR(-ENOMEM); | ||
| 1173 | |||
| 1174 | done: | ||
| 1175 | etr_perf->etr_buf = etr_buf; | ||
| 1176 | return etr_perf; | ||
| 1177 | } | ||
| 1178 | |||
| 1179 | |||
| 1180 | static void *tmc_alloc_etr_buffer(struct coresight_device *csdev, | ||
| 1181 | int cpu, void **pages, int nr_pages, | ||
| 1182 | bool snapshot) | ||
| 1183 | { | ||
| 1184 | struct etr_perf_buffer *etr_perf; | ||
| 1185 | struct tmc_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent); | ||
| 1186 | |||
| 1187 | if (cpu == -1) | ||
| 1188 | cpu = smp_processor_id(); | ||
| 1189 | |||
| 1190 | etr_perf = tmc_etr_setup_perf_buf(drvdata, cpu_to_node(cpu), | ||
| 1191 | nr_pages, pages, snapshot); | ||
| 1192 | if (IS_ERR(etr_perf)) { | ||
| 1193 | dev_dbg(drvdata->dev, "Unable to allocate ETR buffer\n"); | ||
| 1194 | return NULL; | ||
| 1195 | } | ||
| 1196 | |||
| 1197 | etr_perf->snapshot = snapshot; | ||
| 1198 | etr_perf->nr_pages = nr_pages; | ||
| 1199 | etr_perf->pages = pages; | ||
| 1200 | |||
| 1201 | return etr_perf; | ||
| 1202 | } | ||
| 1203 | |||
| 1204 | static void tmc_free_etr_buffer(void *config) | ||
| 1205 | { | ||
| 1206 | struct etr_perf_buffer *etr_perf = config; | ||
| 1207 | |||
| 1208 | if (etr_perf->etr_buf) | ||
| 1209 | tmc_free_etr_buf(etr_perf->etr_buf); | ||
| 1210 | kfree(etr_perf); | ||
| 1211 | } | ||
| 1212 | |||
| 1213 | /* | ||
| 1214 | * tmc_etr_sync_perf_buffer: Copy the actual trace data from the hardware | ||
| 1215 | * buffer to the perf ring buffer. | ||
| 1216 | */ | ||
| 1217 | static void tmc_etr_sync_perf_buffer(struct etr_perf_buffer *etr_perf) | ||
| 1218 | { | ||
| 1219 | long bytes, to_copy; | ||
| 1220 | long pg_idx, pg_offset, src_offset; | ||
| 1221 | unsigned long head = etr_perf->head; | ||
| 1222 | char **dst_pages, *src_buf; | ||
| 1223 | struct etr_buf *etr_buf = etr_perf->etr_buf; | ||
| 1224 | |||
| 1225 | head = etr_perf->head; | ||
| 1226 | pg_idx = head >> PAGE_SHIFT; | ||
| 1227 | pg_offset = head & (PAGE_SIZE - 1); | ||
| 1228 | dst_pages = (char **)etr_perf->pages; | ||
| 1229 | src_offset = etr_buf->offset; | ||
| 1230 | to_copy = etr_buf->len; | ||
| 1231 | |||
| 1232 | while (to_copy > 0) { | ||
| 1233 | /* | ||
| 1234 | * In one iteration, we can copy minimum of : | ||
| 1235 | * 1) what is available in the source buffer, | ||
| 1236 | * 2) what is available in the source buffer, before it | ||
| 1237 | * wraps around. | ||
| 1238 | * 3) what is available in the destination page. | ||
| 1239 | * in one iteration. | ||
| 1240 | */ | ||
| 1241 | bytes = tmc_etr_buf_get_data(etr_buf, src_offset, to_copy, | ||
| 1242 | &src_buf); | ||
| 1243 | if (WARN_ON_ONCE(bytes <= 0)) | ||
| 1244 | break; | ||
| 1245 | bytes = min(bytes, (long)(PAGE_SIZE - pg_offset)); | ||
| 1246 | |||
| 1247 | memcpy(dst_pages[pg_idx] + pg_offset, src_buf, bytes); | ||
| 1248 | |||
| 1249 | to_copy -= bytes; | ||
| 1250 | |||
| 1251 | /* Move destination pointers */ | ||
| 1252 | pg_offset += bytes; | ||
| 1253 | if (pg_offset == PAGE_SIZE) { | ||
| 1254 | pg_offset = 0; | ||
| 1255 | if (++pg_idx == etr_perf->nr_pages) | ||
| 1256 | pg_idx = 0; | ||
| 1257 | } | ||
| 1258 | |||
| 1259 | /* Move source pointers */ | ||
| 1260 | src_offset += bytes; | ||
| 1261 | if (src_offset >= etr_buf->size) | ||
| 1262 | src_offset -= etr_buf->size; | ||
| 1263 | } | ||
| 1264 | } | ||
| 1265 | |||
| 1266 | /* | ||
| 1267 | * tmc_update_etr_buffer : Update the perf ring buffer with the | ||
| 1268 | * available trace data. We use software double buffering at the moment. | ||
| 1269 | * | ||
| 1270 | * TODO: Add support for reusing the perf ring buffer. | ||
| 1271 | */ | ||
| 1272 | static unsigned long | ||
| 1273 | tmc_update_etr_buffer(struct coresight_device *csdev, | ||
| 1274 | struct perf_output_handle *handle, | ||
| 1275 | void *config) | ||
| 1276 | { | ||
| 1277 | bool lost = false; | ||
| 1278 | unsigned long flags, size = 0; | ||
| 1279 | struct tmc_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent); | ||
| 1280 | struct etr_perf_buffer *etr_perf = config; | ||
| 1281 | struct etr_buf *etr_buf = etr_perf->etr_buf; | ||
| 1282 | |||
| 1283 | spin_lock_irqsave(&drvdata->spinlock, flags); | ||
| 1284 | if (WARN_ON(drvdata->perf_data != etr_perf)) { | ||
| 1285 | lost = true; | ||
| 1286 | spin_unlock_irqrestore(&drvdata->spinlock, flags); | ||
| 1287 | goto out; | ||
| 1288 | } | ||
| 1289 | |||
| 1290 | CS_UNLOCK(drvdata->base); | ||
| 1291 | |||
| 1292 | tmc_flush_and_stop(drvdata); | ||
| 1293 | tmc_sync_etr_buf(drvdata); | ||
| 1294 | |||
| 1295 | CS_LOCK(drvdata->base); | ||
| 1296 | /* Reset perf specific data */ | ||
| 1297 | drvdata->perf_data = NULL; | ||
| 1298 | spin_unlock_irqrestore(&drvdata->spinlock, flags); | ||
| 1299 | |||
| 1300 | size = etr_buf->len; | ||
| 1301 | tmc_etr_sync_perf_buffer(etr_perf); | ||
| 1302 | |||
| 1303 | /* | ||
| 1304 | * Update handle->head in snapshot mode. Also update the size to the | ||
| 1305 | * hardware buffer size if there was an overflow. | ||
| 1306 | */ | ||
| 1307 | if (etr_perf->snapshot) { | ||
| 1308 | handle->head += size; | ||
| 1309 | if (etr_buf->full) | ||
| 1310 | size = etr_buf->size; | ||
| 1311 | } | ||
| 1312 | |||
| 1313 | lost |= etr_buf->full; | ||
| 1314 | out: | ||
| 1315 | if (lost) | ||
| 1316 | perf_aux_output_flag(handle, PERF_AUX_FLAG_TRUNCATED); | ||
| 1317 | return size; | ||
| 1318 | } | ||
| 1319 | |||
| 1106 | static int tmc_enable_etr_sink_perf(struct coresight_device *csdev, void *data) | 1320 | static int tmc_enable_etr_sink_perf(struct coresight_device *csdev, void *data) |
| 1107 | { | 1321 | { |
| 1108 | /* We don't support perf mode yet ! */ | 1322 | int rc = 0; |
| 1109 | return -EINVAL; | 1323 | unsigned long flags; |
| 1324 | struct tmc_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent); | ||
| 1325 | struct perf_output_handle *handle = data; | ||
| 1326 | struct etr_perf_buffer *etr_perf = etm_perf_sink_config(handle); | ||
| 1327 | |||
| 1328 | spin_lock_irqsave(&drvdata->spinlock, flags); | ||
| 1329 | /* | ||
| 1330 | * There can be only one writer per sink in perf mode. If the sink | ||
| 1331 | * is already open in SYSFS mode, we can't use it. | ||
| 1332 | */ | ||
| 1333 | if (drvdata->mode != CS_MODE_DISABLED || WARN_ON(drvdata->perf_data)) { | ||
| 1334 | rc = -EBUSY; | ||
| 1335 | goto unlock_out; | ||
| 1336 | } | ||
| 1337 | |||
| 1338 | if (WARN_ON(!etr_perf || !etr_perf->etr_buf)) { | ||
| 1339 | rc = -EINVAL; | ||
| 1340 | goto unlock_out; | ||
| 1341 | } | ||
| 1342 | |||
| 1343 | etr_perf->head = PERF_IDX2OFF(handle->head, etr_perf); | ||
| 1344 | drvdata->perf_data = etr_perf; | ||
| 1345 | drvdata->mode = CS_MODE_PERF; | ||
| 1346 | tmc_etr_enable_hw(drvdata, etr_perf->etr_buf); | ||
| 1347 | |||
| 1348 | unlock_out: | ||
| 1349 | spin_unlock_irqrestore(&drvdata->spinlock, flags); | ||
| 1350 | return rc; | ||
| 1110 | } | 1351 | } |
| 1111 | 1352 | ||
| 1112 | static int tmc_enable_etr_sink(struct coresight_device *csdev, | 1353 | static int tmc_enable_etr_sink(struct coresight_device *csdev, |
| @@ -1148,6 +1389,9 @@ static void tmc_disable_etr_sink(struct coresight_device *csdev) | |||
| 1148 | static const struct coresight_ops_sink tmc_etr_sink_ops = { | 1389 | static const struct coresight_ops_sink tmc_etr_sink_ops = { |
| 1149 | .enable = tmc_enable_etr_sink, | 1390 | .enable = tmc_enable_etr_sink, |
| 1150 | .disable = tmc_disable_etr_sink, | 1391 | .disable = tmc_disable_etr_sink, |
| 1392 | .alloc_buffer = tmc_alloc_etr_buffer, | ||
| 1393 | .update_buffer = tmc_update_etr_buffer, | ||
| 1394 | .free_buffer = tmc_free_etr_buffer, | ||
| 1151 | }; | 1395 | }; |
| 1152 | 1396 | ||
| 1153 | const struct coresight_ops tmc_etr_cs_ops = { | 1397 | const struct coresight_ops tmc_etr_cs_ops = { |
diff --git a/drivers/hwtracing/coresight/coresight-tmc.h b/drivers/hwtracing/coresight/coresight-tmc.h index 872f63e3651b..487c53701e9c 100644 --- a/drivers/hwtracing/coresight/coresight-tmc.h +++ b/drivers/hwtracing/coresight/coresight-tmc.h | |||
| @@ -170,6 +170,7 @@ struct etr_buf { | |||
| 170 | * @trigger_cntr: amount of words to store after a trigger. | 170 | * @trigger_cntr: amount of words to store after a trigger. |
| 171 | * @etr_caps: Bitmask of capabilities of the TMC ETR, inferred from the | 171 | * @etr_caps: Bitmask of capabilities of the TMC ETR, inferred from the |
| 172 | * device configuration register (DEVID) | 172 | * device configuration register (DEVID) |
| 173 | * @perf_data: PERF buffer for ETR. | ||
| 173 | * @sysfs_data: SYSFS buffer for ETR. | 174 | * @sysfs_data: SYSFS buffer for ETR. |
| 174 | */ | 175 | */ |
| 175 | struct tmc_drvdata { | 176 | struct tmc_drvdata { |
| @@ -191,6 +192,7 @@ struct tmc_drvdata { | |||
| 191 | u32 trigger_cntr; | 192 | u32 trigger_cntr; |
| 192 | u32 etr_caps; | 193 | u32 etr_caps; |
| 193 | struct etr_buf *sysfs_buf; | 194 | struct etr_buf *sysfs_buf; |
| 195 | void *perf_data; | ||
| 194 | }; | 196 | }; |
| 195 | 197 | ||
| 196 | struct etr_buf_operations { | 198 | struct etr_buf_operations { |
