diff options
author | Kent Russell <kent.russell@amd.com> | 2019-01-03 08:12:39 -0500 |
---|---|---|
committer | Alex Deucher <alexander.deucher@amd.com> | 2019-01-14 15:04:54 -0500 |
commit | b45e18acd394954c24943762ada5d8dada75f2b9 (patch) | |
tree | 40ffc68e9c4e6c4449edf17d4401d4dcc5a54503 /drivers/gpu | |
parent | a0bb79e2559c9330c82080d6e4f8c762d72ed0f1 (diff) |
drm/amdgpu: Add sysfs file for PCIe usage v5
Add a sysfs file that reports the number of bytes transmitted and
received in the last second. This can be used to approximate the PCIe
bandwidth usage over the last second.
v2: Clarify use of mps as estimation of bandwidth
v3: Don't make the file on APUs
v4: Early exit for APUs in the read function, change output to
display "packets-received packets-sent mps"
v5: fix missing header for si (Alex)
Signed-off-by: Kent Russell <kent.russell@amd.com>
Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Diffstat (limited to 'drivers/gpu')
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu.h | 4 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c | 36 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/cik.c | 47 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/si.c | 48 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/soc15.c | 50 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/vi.c | 47 |
6 files changed, 232 insertions, 0 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index bcef6ea4bcf9..3b30bb2cdd21 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h | |||
@@ -542,6 +542,9 @@ struct amdgpu_asic_funcs { | |||
542 | bool (*need_full_reset)(struct amdgpu_device *adev); | 542 | bool (*need_full_reset)(struct amdgpu_device *adev); |
543 | /* initialize doorbell layout for specific asic*/ | 543 | /* initialize doorbell layout for specific asic*/ |
544 | void (*init_doorbell_index)(struct amdgpu_device *adev); | 544 | void (*init_doorbell_index)(struct amdgpu_device *adev); |
545 | /* PCIe bandwidth usage */ | ||
546 | void (*get_pcie_usage)(struct amdgpu_device *adev, uint64_t *count0, | ||
547 | uint64_t *count1); | ||
545 | }; | 548 | }; |
546 | 549 | ||
547 | /* | 550 | /* |
@@ -1042,6 +1045,7 @@ int emu_soc_asic_init(struct amdgpu_device *adev); | |||
1042 | #define amdgpu_asic_invalidate_hdp(adev, r) (adev)->asic_funcs->invalidate_hdp((adev), (r)) | 1045 | #define amdgpu_asic_invalidate_hdp(adev, r) (adev)->asic_funcs->invalidate_hdp((adev), (r)) |
1043 | #define amdgpu_asic_need_full_reset(adev) (adev)->asic_funcs->need_full_reset((adev)) | 1046 | #define amdgpu_asic_need_full_reset(adev) (adev)->asic_funcs->need_full_reset((adev)) |
1044 | #define amdgpu_asic_init_doorbell_index(adev) (adev)->asic_funcs->init_doorbell_index((adev)) | 1047 | #define amdgpu_asic_init_doorbell_index(adev) (adev)->asic_funcs->init_doorbell_index((adev)) |
1048 | #define amdgpu_asic_get_pcie_usage(adev, cnt0, cnt1) ((adev)->asic_funcs->get_pcie_usage((adev), (cnt0), (cnt1))) | ||
1045 | 1049 | ||
1046 | /* Common functions */ | 1050 | /* Common functions */ |
1047 | bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev); | 1051 | bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev); |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c index 6896dec97fc7..b38c06f0196e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c | |||
@@ -990,6 +990,31 @@ static ssize_t amdgpu_get_busy_percent(struct device *dev, | |||
990 | return snprintf(buf, PAGE_SIZE, "%d\n", value); | 990 | return snprintf(buf, PAGE_SIZE, "%d\n", value); |
991 | } | 991 | } |
992 | 992 | ||
993 | /** | ||
994 | * DOC: pcie_bw | ||
995 | * | ||
996 | * The amdgpu driver provides a sysfs API for estimating how much data | ||
997 | * has been received and sent by the GPU in the last second through PCIe. | ||
998 | * The file pcie_bw is used for this. | ||
999 | * The Perf counters count the number of received and sent messages and return | ||
1000 | * those values, as well as the maximum payload size of a PCIe packet (mps). | ||
1001 | * Note that it is not possible to easily and quickly obtain the size of each | ||
1002 | * packet transmitted, so we output the max payload size (mps) to allow for | ||
1003 | * quick estimation of the PCIe bandwidth usage | ||
1004 | */ | ||
1005 | static ssize_t amdgpu_get_pcie_bw(struct device *dev, | ||
1006 | struct device_attribute *attr, | ||
1007 | char *buf) | ||
1008 | { | ||
1009 | struct drm_device *ddev = dev_get_drvdata(dev); | ||
1010 | struct amdgpu_device *adev = ddev->dev_private; | ||
1011 | uint64_t count0, count1; | ||
1012 | |||
1013 | amdgpu_asic_get_pcie_usage(adev, &count0, &count1); | ||
1014 | return snprintf(buf, PAGE_SIZE, "%llu %llu %i\n", | ||
1015 | count0, count1, pcie_get_mps(adev->pdev)); | ||
1016 | } | ||
1017 | |||
993 | static DEVICE_ATTR(power_dpm_state, S_IRUGO | S_IWUSR, amdgpu_get_dpm_state, amdgpu_set_dpm_state); | 1018 | static DEVICE_ATTR(power_dpm_state, S_IRUGO | S_IWUSR, amdgpu_get_dpm_state, amdgpu_set_dpm_state); |
994 | static DEVICE_ATTR(power_dpm_force_performance_level, S_IRUGO | S_IWUSR, | 1019 | static DEVICE_ATTR(power_dpm_force_performance_level, S_IRUGO | S_IWUSR, |
995 | amdgpu_get_dpm_forced_performance_level, | 1020 | amdgpu_get_dpm_forced_performance_level, |
@@ -1025,6 +1050,7 @@ static DEVICE_ATTR(pp_od_clk_voltage, S_IRUGO | S_IWUSR, | |||
1025 | amdgpu_set_pp_od_clk_voltage); | 1050 | amdgpu_set_pp_od_clk_voltage); |
1026 | static DEVICE_ATTR(gpu_busy_percent, S_IRUGO, | 1051 | static DEVICE_ATTR(gpu_busy_percent, S_IRUGO, |
1027 | amdgpu_get_busy_percent, NULL); | 1052 | amdgpu_get_busy_percent, NULL); |
1053 | static DEVICE_ATTR(pcie_bw, S_IRUGO, amdgpu_get_pcie_bw, NULL); | ||
1028 | 1054 | ||
1029 | static ssize_t amdgpu_hwmon_show_temp(struct device *dev, | 1055 | static ssize_t amdgpu_hwmon_show_temp(struct device *dev, |
1030 | struct device_attribute *attr, | 1056 | struct device_attribute *attr, |
@@ -2108,6 +2134,14 @@ int amdgpu_pm_sysfs_init(struct amdgpu_device *adev) | |||
2108 | "gpu_busy_level\n"); | 2134 | "gpu_busy_level\n"); |
2109 | return ret; | 2135 | return ret; |
2110 | } | 2136 | } |
2137 | /* PCIe Perf counters won't work on APU nodes */ | ||
2138 | if (adev->flags & !AMD_IS_APU) { | ||
2139 | ret = device_create_file(adev->dev, &dev_attr_pcie_bw); | ||
2140 | if (ret) { | ||
2141 | DRM_ERROR("failed to create device file pcie_bw\n"); | ||
2142 | return ret; | ||
2143 | } | ||
2144 | } | ||
2111 | ret = amdgpu_debugfs_pm_init(adev); | 2145 | ret = amdgpu_debugfs_pm_init(adev); |
2112 | if (ret) { | 2146 | if (ret) { |
2113 | DRM_ERROR("Failed to register debugfs file for dpm!\n"); | 2147 | DRM_ERROR("Failed to register debugfs file for dpm!\n"); |
@@ -2147,6 +2181,8 @@ void amdgpu_pm_sysfs_fini(struct amdgpu_device *adev) | |||
2147 | device_remove_file(adev->dev, | 2181 | device_remove_file(adev->dev, |
2148 | &dev_attr_pp_od_clk_voltage); | 2182 | &dev_attr_pp_od_clk_voltage); |
2149 | device_remove_file(adev->dev, &dev_attr_gpu_busy_percent); | 2183 | device_remove_file(adev->dev, &dev_attr_gpu_busy_percent); |
2184 | if (adev->flags & !AMD_IS_APU) | ||
2185 | device_remove_file(adev->dev, &dev_attr_pcie_bw); | ||
2150 | } | 2186 | } |
2151 | 2187 | ||
2152 | void amdgpu_pm_compute_clocks(struct amdgpu_device *adev) | 2188 | void amdgpu_pm_compute_clocks(struct amdgpu_device *adev) |
diff --git a/drivers/gpu/drm/amd/amdgpu/cik.c b/drivers/gpu/drm/amd/amdgpu/cik.c index 71c50d8900e3..6277de51483f 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik.c +++ b/drivers/gpu/drm/amd/amdgpu/cik.c | |||
@@ -1741,6 +1741,52 @@ static bool cik_need_full_reset(struct amdgpu_device *adev) | |||
1741 | return true; | 1741 | return true; |
1742 | } | 1742 | } |
1743 | 1743 | ||
1744 | static void cik_get_pcie_usage(struct amdgpu_device *adev, uint64_t *count0, | ||
1745 | uint64_t *count1) | ||
1746 | { | ||
1747 | uint32_t perfctr = 0; | ||
1748 | uint64_t cnt0_of, cnt1_of; | ||
1749 | int tmp; | ||
1750 | |||
1751 | /* This reports 0 on APUs, so return to avoid writing/reading registers | ||
1752 | * that may or may not be different from their GPU counterparts | ||
1753 | */ | ||
1754 | if (adev->flags & AMD_IS_APU) | ||
1755 | return; | ||
1756 | |||
1757 | /* Set the 2 events that we wish to watch, defined above */ | ||
1758 | /* Reg 40 is # received msgs, Reg 104 is # of posted requests sent */ | ||
1759 | perfctr = REG_SET_FIELD(perfctr, PCIE_PERF_CNTL_TXCLK, EVENT0_SEL, 40); | ||
1760 | perfctr = REG_SET_FIELD(perfctr, PCIE_PERF_CNTL_TXCLK, EVENT1_SEL, 104); | ||
1761 | |||
1762 | /* Write to enable desired perf counters */ | ||
1763 | WREG32_PCIE(ixPCIE_PERF_CNTL_TXCLK, perfctr); | ||
1764 | /* Zero out and enable the perf counters | ||
1765 | * Write 0x5: | ||
1766 | * Bit 0 = Start all counters(1) | ||
1767 | * Bit 2 = Global counter reset enable(1) | ||
1768 | */ | ||
1769 | WREG32_PCIE(ixPCIE_PERF_COUNT_CNTL, 0x00000005); | ||
1770 | |||
1771 | msleep(1000); | ||
1772 | |||
1773 | /* Load the shadow and disable the perf counters | ||
1774 | * Write 0x2: | ||
1775 | * Bit 0 = Stop counters(0) | ||
1776 | * Bit 1 = Load the shadow counters(1) | ||
1777 | */ | ||
1778 | WREG32_PCIE(ixPCIE_PERF_COUNT_CNTL, 0x00000002); | ||
1779 | |||
1780 | /* Read register values to get any >32bit overflow */ | ||
1781 | tmp = RREG32_PCIE(ixPCIE_PERF_CNTL_TXCLK); | ||
1782 | cnt0_of = REG_GET_FIELD(tmp, PCIE_PERF_CNTL_TXCLK, COUNTER0_UPPER); | ||
1783 | cnt1_of = REG_GET_FIELD(tmp, PCIE_PERF_CNTL_TXCLK, COUNTER1_UPPER); | ||
1784 | |||
1785 | /* Get the values and add the overflow */ | ||
1786 | *count0 = RREG32_PCIE(ixPCIE_PERF_COUNT0_TXCLK) | (cnt0_of << 32); | ||
1787 | *count1 = RREG32_PCIE(ixPCIE_PERF_COUNT1_TXCLK) | (cnt1_of << 32); | ||
1788 | } | ||
1789 | |||
1744 | static const struct amdgpu_asic_funcs cik_asic_funcs = | 1790 | static const struct amdgpu_asic_funcs cik_asic_funcs = |
1745 | { | 1791 | { |
1746 | .read_disabled_bios = &cik_read_disabled_bios, | 1792 | .read_disabled_bios = &cik_read_disabled_bios, |
@@ -1756,6 +1802,7 @@ static const struct amdgpu_asic_funcs cik_asic_funcs = | |||
1756 | .invalidate_hdp = &cik_invalidate_hdp, | 1802 | .invalidate_hdp = &cik_invalidate_hdp, |
1757 | .need_full_reset = &cik_need_full_reset, | 1803 | .need_full_reset = &cik_need_full_reset, |
1758 | .init_doorbell_index = &legacy_doorbell_index_init, | 1804 | .init_doorbell_index = &legacy_doorbell_index_init, |
1805 | .get_pcie_usage = &cik_get_pcie_usage, | ||
1759 | }; | 1806 | }; |
1760 | 1807 | ||
1761 | static int cik_common_early_init(void *handle) | 1808 | static int cik_common_early_init(void *handle) |
diff --git a/drivers/gpu/drm/amd/amdgpu/si.c b/drivers/gpu/drm/amd/amdgpu/si.c index f8408f88cd37..7d2a48727e76 100644 --- a/drivers/gpu/drm/amd/amdgpu/si.c +++ b/drivers/gpu/drm/amd/amdgpu/si.c | |||
@@ -47,6 +47,7 @@ | |||
47 | #include "dce/dce_6_0_d.h" | 47 | #include "dce/dce_6_0_d.h" |
48 | #include "uvd/uvd_4_0_d.h" | 48 | #include "uvd/uvd_4_0_d.h" |
49 | #include "bif/bif_3_0_d.h" | 49 | #include "bif/bif_3_0_d.h" |
50 | #include "bif/bif_3_0_sh_mask.h" | ||
50 | 51 | ||
51 | static const u32 tahiti_golden_registers[] = | 52 | static const u32 tahiti_golden_registers[] = |
52 | { | 53 | { |
@@ -1323,6 +1324,52 @@ static void si_set_pcie_lanes(struct amdgpu_device *adev, int lanes) | |||
1323 | WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, link_width_cntl); | 1324 | WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, link_width_cntl); |
1324 | } | 1325 | } |
1325 | 1326 | ||
1327 | static void si_get_pcie_usage(struct amdgpu_device *adev, uint64_t *count0, | ||
1328 | uint64_t *count1) | ||
1329 | { | ||
1330 | uint32_t perfctr = 0; | ||
1331 | uint64_t cnt0_of, cnt1_of; | ||
1332 | int tmp; | ||
1333 | |||
1334 | /* This reports 0 on APUs, so return to avoid writing/reading registers | ||
1335 | * that may or may not be different from their GPU counterparts | ||
1336 | */ | ||
1337 | if (adev->flags & AMD_IS_APU) | ||
1338 | return; | ||
1339 | |||
1340 | /* Set the 2 events that we wish to watch, defined above */ | ||
1341 | /* Reg 40 is # received msgs, Reg 104 is # of posted requests sent */ | ||
1342 | perfctr = REG_SET_FIELD(perfctr, PCIE_PERF_CNTL_TXCLK, EVENT0_SEL, 40); | ||
1343 | perfctr = REG_SET_FIELD(perfctr, PCIE_PERF_CNTL_TXCLK, EVENT1_SEL, 104); | ||
1344 | |||
1345 | /* Write to enable desired perf counters */ | ||
1346 | WREG32_PCIE(ixPCIE_PERF_CNTL_TXCLK, perfctr); | ||
1347 | /* Zero out and enable the perf counters | ||
1348 | * Write 0x5: | ||
1349 | * Bit 0 = Start all counters(1) | ||
1350 | * Bit 2 = Global counter reset enable(1) | ||
1351 | */ | ||
1352 | WREG32_PCIE(ixPCIE_PERF_COUNT_CNTL, 0x00000005); | ||
1353 | |||
1354 | msleep(1000); | ||
1355 | |||
1356 | /* Load the shadow and disable the perf counters | ||
1357 | * Write 0x2: | ||
1358 | * Bit 0 = Stop counters(0) | ||
1359 | * Bit 1 = Load the shadow counters(1) | ||
1360 | */ | ||
1361 | WREG32_PCIE(ixPCIE_PERF_COUNT_CNTL, 0x00000002); | ||
1362 | |||
1363 | /* Read register values to get any >32bit overflow */ | ||
1364 | tmp = RREG32_PCIE(ixPCIE_PERF_CNTL_TXCLK); | ||
1365 | cnt0_of = REG_GET_FIELD(tmp, PCIE_PERF_CNTL_TXCLK, COUNTER0_UPPER); | ||
1366 | cnt1_of = REG_GET_FIELD(tmp, PCIE_PERF_CNTL_TXCLK, COUNTER1_UPPER); | ||
1367 | |||
1368 | /* Get the values and add the overflow */ | ||
1369 | *count0 = RREG32_PCIE(ixPCIE_PERF_COUNT0_TXCLK) | (cnt0_of << 32); | ||
1370 | *count1 = RREG32_PCIE(ixPCIE_PERF_COUNT1_TXCLK) | (cnt1_of << 32); | ||
1371 | } | ||
1372 | |||
1326 | static const struct amdgpu_asic_funcs si_asic_funcs = | 1373 | static const struct amdgpu_asic_funcs si_asic_funcs = |
1327 | { | 1374 | { |
1328 | .read_disabled_bios = &si_read_disabled_bios, | 1375 | .read_disabled_bios = &si_read_disabled_bios, |
@@ -1339,6 +1386,7 @@ static const struct amdgpu_asic_funcs si_asic_funcs = | |||
1339 | .flush_hdp = &si_flush_hdp, | 1386 | .flush_hdp = &si_flush_hdp, |
1340 | .invalidate_hdp = &si_invalidate_hdp, | 1387 | .invalidate_hdp = &si_invalidate_hdp, |
1341 | .need_full_reset = &si_need_full_reset, | 1388 | .need_full_reset = &si_need_full_reset, |
1389 | .get_pcie_usage = &si_get_pcie_usage, | ||
1342 | }; | 1390 | }; |
1343 | 1391 | ||
1344 | static uint32_t si_get_rev_id(struct amdgpu_device *adev) | 1392 | static uint32_t si_get_rev_id(struct amdgpu_device *adev) |
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index 8849b74078d6..bb89833ed3e7 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c | |||
@@ -43,6 +43,9 @@ | |||
43 | #include "hdp/hdp_4_0_sh_mask.h" | 43 | #include "hdp/hdp_4_0_sh_mask.h" |
44 | #include "smuio/smuio_9_0_offset.h" | 44 | #include "smuio/smuio_9_0_offset.h" |
45 | #include "smuio/smuio_9_0_sh_mask.h" | 45 | #include "smuio/smuio_9_0_sh_mask.h" |
46 | #include "nbio/nbio_7_0_default.h" | ||
47 | #include "nbio/nbio_7_0_sh_mask.h" | ||
48 | #include "nbio/nbio_7_0_smn.h" | ||
46 | 49 | ||
47 | #include "soc15.h" | 50 | #include "soc15.h" |
48 | #include "soc15_common.h" | 51 | #include "soc15_common.h" |
@@ -601,6 +604,51 @@ static bool soc15_need_full_reset(struct amdgpu_device *adev) | |||
601 | /* change this when we implement soft reset */ | 604 | /* change this when we implement soft reset */ |
602 | return true; | 605 | return true; |
603 | } | 606 | } |
607 | static void soc15_get_pcie_usage(struct amdgpu_device *adev, uint64_t *count0, | ||
608 | uint64_t *count1) | ||
609 | { | ||
610 | uint32_t perfctr = 0; | ||
611 | uint64_t cnt0_of, cnt1_of; | ||
612 | int tmp; | ||
613 | |||
614 | /* This reports 0 on APUs, so return to avoid writing/reading registers | ||
615 | * that may or may not be different from their GPU counterparts | ||
616 | */ | ||
617 | if (adev->flags & AMD_IS_APU) | ||
618 | return; | ||
619 | |||
620 | /* Set the 2 events that we wish to watch, defined above */ | ||
621 | /* Reg 40 is # received msgs, Reg 104 is # of posted requests sent */ | ||
622 | perfctr = REG_SET_FIELD(perfctr, PCIE_PERF_CNTL_TXCLK, EVENT0_SEL, 40); | ||
623 | perfctr = REG_SET_FIELD(perfctr, PCIE_PERF_CNTL_TXCLK, EVENT1_SEL, 104); | ||
624 | |||
625 | /* Write to enable desired perf counters */ | ||
626 | WREG32_PCIE(smnPCIE_PERF_CNTL_TXCLK, perfctr); | ||
627 | /* Zero out and enable the perf counters | ||
628 | * Write 0x5: | ||
629 | * Bit 0 = Start all counters(1) | ||
630 | * Bit 2 = Global counter reset enable(1) | ||
631 | */ | ||
632 | WREG32_PCIE(smnPCIE_PERF_COUNT_CNTL, 0x00000005); | ||
633 | |||
634 | msleep(1000); | ||
635 | |||
636 | /* Load the shadow and disable the perf counters | ||
637 | * Write 0x2: | ||
638 | * Bit 0 = Stop counters(0) | ||
639 | * Bit 1 = Load the shadow counters(1) | ||
640 | */ | ||
641 | WREG32_PCIE(smnPCIE_PERF_COUNT_CNTL, 0x00000002); | ||
642 | |||
643 | /* Read register values to get any >32bit overflow */ | ||
644 | tmp = RREG32_PCIE(smnPCIE_PERF_CNTL_TXCLK); | ||
645 | cnt0_of = REG_GET_FIELD(tmp, PCIE_PERF_CNTL_TXCLK, COUNTER0_UPPER); | ||
646 | cnt1_of = REG_GET_FIELD(tmp, PCIE_PERF_CNTL_TXCLK, COUNTER1_UPPER); | ||
647 | |||
648 | /* Get the values and add the overflow */ | ||
649 | *count0 = RREG32_PCIE(smnPCIE_PERF_COUNT0_TXCLK) | (cnt0_of << 32); | ||
650 | *count1 = RREG32_PCIE(smnPCIE_PERF_COUNT1_TXCLK) | (cnt1_of << 32); | ||
651 | } | ||
604 | 652 | ||
605 | static const struct amdgpu_asic_funcs soc15_asic_funcs = | 653 | static const struct amdgpu_asic_funcs soc15_asic_funcs = |
606 | { | 654 | { |
@@ -617,6 +665,7 @@ static const struct amdgpu_asic_funcs soc15_asic_funcs = | |||
617 | .invalidate_hdp = &soc15_invalidate_hdp, | 665 | .invalidate_hdp = &soc15_invalidate_hdp, |
618 | .need_full_reset = &soc15_need_full_reset, | 666 | .need_full_reset = &soc15_need_full_reset, |
619 | .init_doorbell_index = &vega10_doorbell_index_init, | 667 | .init_doorbell_index = &vega10_doorbell_index_init, |
668 | .get_pcie_usage = &soc15_get_pcie_usage, | ||
620 | }; | 669 | }; |
621 | 670 | ||
622 | static const struct amdgpu_asic_funcs vega20_asic_funcs = | 671 | static const struct amdgpu_asic_funcs vega20_asic_funcs = |
@@ -634,6 +683,7 @@ static const struct amdgpu_asic_funcs vega20_asic_funcs = | |||
634 | .invalidate_hdp = &soc15_invalidate_hdp, | 683 | .invalidate_hdp = &soc15_invalidate_hdp, |
635 | .need_full_reset = &soc15_need_full_reset, | 684 | .need_full_reset = &soc15_need_full_reset, |
636 | .init_doorbell_index = &vega20_doorbell_index_init, | 685 | .init_doorbell_index = &vega20_doorbell_index_init, |
686 | .get_pcie_usage = &soc15_get_pcie_usage, | ||
637 | }; | 687 | }; |
638 | 688 | ||
639 | static int soc15_common_early_init(void *handle) | 689 | static int soc15_common_early_init(void *handle) |
diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c index 03e7be595a0d..cdc8ab8d79d7 100644 --- a/drivers/gpu/drm/amd/amdgpu/vi.c +++ b/drivers/gpu/drm/amd/amdgpu/vi.c | |||
@@ -941,6 +941,52 @@ static bool vi_need_full_reset(struct amdgpu_device *adev) | |||
941 | } | 941 | } |
942 | } | 942 | } |
943 | 943 | ||
944 | static void vi_get_pcie_usage(struct amdgpu_device *adev, uint64_t *count0, | ||
945 | uint64_t *count1) | ||
946 | { | ||
947 | uint32_t perfctr = 0; | ||
948 | uint64_t cnt0_of, cnt1_of; | ||
949 | int tmp; | ||
950 | |||
951 | /* This reports 0 on APUs, so return to avoid writing/reading registers | ||
952 | * that may or may not be different from their GPU counterparts | ||
953 | */ | ||
954 | if (adev->flags & AMD_IS_APU) | ||
955 | return; | ||
956 | |||
957 | /* Set the 2 events that we wish to watch, defined above */ | ||
958 | /* Reg 40 is # received msgs, Reg 104 is # of posted requests sent */ | ||
959 | perfctr = REG_SET_FIELD(perfctr, PCIE_PERF_CNTL_TXCLK, EVENT0_SEL, 40); | ||
960 | perfctr = REG_SET_FIELD(perfctr, PCIE_PERF_CNTL_TXCLK, EVENT1_SEL, 104); | ||
961 | |||
962 | /* Write to enable desired perf counters */ | ||
963 | WREG32_PCIE(ixPCIE_PERF_CNTL_TXCLK, perfctr); | ||
964 | /* Zero out and enable the perf counters | ||
965 | * Write 0x5: | ||
966 | * Bit 0 = Start all counters(1) | ||
967 | * Bit 2 = Global counter reset enable(1) | ||
968 | */ | ||
969 | WREG32_PCIE(ixPCIE_PERF_COUNT_CNTL, 0x00000005); | ||
970 | |||
971 | msleep(1000); | ||
972 | |||
973 | /* Load the shadow and disable the perf counters | ||
974 | * Write 0x2: | ||
975 | * Bit 0 = Stop counters(0) | ||
976 | * Bit 1 = Load the shadow counters(1) | ||
977 | */ | ||
978 | WREG32_PCIE(ixPCIE_PERF_COUNT_CNTL, 0x00000002); | ||
979 | |||
980 | /* Read register values to get any >32bit overflow */ | ||
981 | tmp = RREG32_PCIE(ixPCIE_PERF_CNTL_TXCLK); | ||
982 | cnt0_of = REG_GET_FIELD(tmp, PCIE_PERF_CNTL_TXCLK, COUNTER0_UPPER); | ||
983 | cnt1_of = REG_GET_FIELD(tmp, PCIE_PERF_CNTL_TXCLK, COUNTER1_UPPER); | ||
984 | |||
985 | /* Get the values and add the overflow */ | ||
986 | *count0 = RREG32_PCIE(ixPCIE_PERF_COUNT0_TXCLK) | (cnt0_of << 32); | ||
987 | *count1 = RREG32_PCIE(ixPCIE_PERF_COUNT1_TXCLK) | (cnt1_of << 32); | ||
988 | } | ||
989 | |||
944 | static const struct amdgpu_asic_funcs vi_asic_funcs = | 990 | static const struct amdgpu_asic_funcs vi_asic_funcs = |
945 | { | 991 | { |
946 | .read_disabled_bios = &vi_read_disabled_bios, | 992 | .read_disabled_bios = &vi_read_disabled_bios, |
@@ -956,6 +1002,7 @@ static const struct amdgpu_asic_funcs vi_asic_funcs = | |||
956 | .invalidate_hdp = &vi_invalidate_hdp, | 1002 | .invalidate_hdp = &vi_invalidate_hdp, |
957 | .need_full_reset = &vi_need_full_reset, | 1003 | .need_full_reset = &vi_need_full_reset, |
958 | .init_doorbell_index = &legacy_doorbell_index_init, | 1004 | .init_doorbell_index = &legacy_doorbell_index_init, |
1005 | .get_pcie_usage = &vi_get_pcie_usage, | ||
959 | }; | 1006 | }; |
960 | 1007 | ||
961 | #define CZ_REV_BRISTOL(rev) \ | 1008 | #define CZ_REV_BRISTOL(rev) \ |