summaryrefslogtreecommitdiff
path: root/drivers
diff options
context:
space:
mode:
authorKent Russell <kent.russell@amd.com>2019-01-03 08:12:39 -0500
committerAlex Deucher <alexander.deucher@amd.com>2019-01-14 15:04:54 -0500
commitb45e18acd394954c24943762ada5d8dada75f2b9 (patch)
tree40ffc68e9c4e6c4449edf17d4401d4dcc5a54503 /drivers
parenta0bb79e2559c9330c82080d6e4f8c762d72ed0f1 (diff)
drm/amdgpu: Add sysfs file for PCIe usage v5
Add a sysfs file that reports the number of bytes transmitted and received in the last second. This can be used to approximate the PCIe bandwidth usage over the last second. v2: Clarify use of mps as estimation of bandwidth v3: Don't make the file on APUs v4: Early exit for APUs in the read function, change output to display "packets-received packets-sent mps" v5: fix missing header for si (Alex) Signed-off-by: Kent Russell <kent.russell@amd.com> Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com> Reviewed-by: Alex Deucher <alexander.deucher@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Diffstat (limited to 'drivers')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu.h4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c36
-rw-r--r--drivers/gpu/drm/amd/amdgpu/cik.c47
-rw-r--r--drivers/gpu/drm/amd/amdgpu/si.c48
-rw-r--r--drivers/gpu/drm/amd/amdgpu/soc15.c50
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vi.c47
6 files changed, 232 insertions, 0 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index bcef6ea4bcf9..3b30bb2cdd21 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -542,6 +542,9 @@ struct amdgpu_asic_funcs {
bool (*need_full_reset)(struct amdgpu_device *adev);
/* initialize doorbell layout for specific asic*/
void (*init_doorbell_index)(struct amdgpu_device *adev);
+ /* PCIe bandwidth usage */
+ void (*get_pcie_usage)(struct amdgpu_device *adev, uint64_t *count0,
+ uint64_t *count1);
};
/*
@@ -1042,6 +1045,7 @@ int emu_soc_asic_init(struct amdgpu_device *adev);
#define amdgpu_asic_invalidate_hdp(adev, r) (adev)->asic_funcs->invalidate_hdp((adev), (r))
#define amdgpu_asic_need_full_reset(adev) (adev)->asic_funcs->need_full_reset((adev))
#define amdgpu_asic_init_doorbell_index(adev) (adev)->asic_funcs->init_doorbell_index((adev))
+#define amdgpu_asic_get_pcie_usage(adev, cnt0, cnt1) ((adev)->asic_funcs->get_pcie_usage((adev), (cnt0), (cnt1)))
/* Common functions */
bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
index 6896dec97fc7..b38c06f0196e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
@@ -990,6 +990,31 @@ static ssize_t amdgpu_get_busy_percent(struct device *dev,
return snprintf(buf, PAGE_SIZE, "%d\n", value);
}
+/**
+ * DOC: pcie_bw
+ *
+ * The amdgpu driver provides a sysfs API for estimating how much data
+ * has been received and sent by the GPU in the last second through PCIe.
+ * The file pcie_bw is used for this.
+ * The Perf counters count the number of received and sent messages and return
+ * those values, as well as the maximum payload size of a PCIe packet (mps).
+ * Note that it is not possible to easily and quickly obtain the size of each
+ * packet transmitted, so we output the max payload size (mps) to allow for
+ * quick estimation of the PCIe bandwidth usage
+ */
+static ssize_t amdgpu_get_pcie_bw(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct drm_device *ddev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = ddev->dev_private;
+ uint64_t count0, count1;
+
+ amdgpu_asic_get_pcie_usage(adev, &count0, &count1);
+ return snprintf(buf, PAGE_SIZE, "%llu %llu %i\n",
+ count0, count1, pcie_get_mps(adev->pdev));
+}
+
static DEVICE_ATTR(power_dpm_state, S_IRUGO | S_IWUSR, amdgpu_get_dpm_state, amdgpu_set_dpm_state);
static DEVICE_ATTR(power_dpm_force_performance_level, S_IRUGO | S_IWUSR,
amdgpu_get_dpm_forced_performance_level,
@@ -1025,6 +1050,7 @@ static DEVICE_ATTR(pp_od_clk_voltage, S_IRUGO | S_IWUSR,
amdgpu_set_pp_od_clk_voltage);
static DEVICE_ATTR(gpu_busy_percent, S_IRUGO,
amdgpu_get_busy_percent, NULL);
+static DEVICE_ATTR(pcie_bw, S_IRUGO, amdgpu_get_pcie_bw, NULL);
static ssize_t amdgpu_hwmon_show_temp(struct device *dev,
struct device_attribute *attr,
@@ -2108,6 +2134,14 @@ int amdgpu_pm_sysfs_init(struct amdgpu_device *adev)
"gpu_busy_level\n");
return ret;
}
+ /* PCIe Perf counters won't work on APU nodes */
+ if (adev->flags & !AMD_IS_APU) {
+ ret = device_create_file(adev->dev, &dev_attr_pcie_bw);
+ if (ret) {
+ DRM_ERROR("failed to create device file pcie_bw\n");
+ return ret;
+ }
+ }
ret = amdgpu_debugfs_pm_init(adev);
if (ret) {
DRM_ERROR("Failed to register debugfs file for dpm!\n");
@@ -2147,6 +2181,8 @@ void amdgpu_pm_sysfs_fini(struct amdgpu_device *adev)
device_remove_file(adev->dev,
&dev_attr_pp_od_clk_voltage);
device_remove_file(adev->dev, &dev_attr_gpu_busy_percent);
+ if (adev->flags & !AMD_IS_APU)
+ device_remove_file(adev->dev, &dev_attr_pcie_bw);
}
void amdgpu_pm_compute_clocks(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/cik.c b/drivers/gpu/drm/amd/amdgpu/cik.c
index 71c50d8900e3..6277de51483f 100644
--- a/drivers/gpu/drm/amd/amdgpu/cik.c
+++ b/drivers/gpu/drm/amd/amdgpu/cik.c
@@ -1741,6 +1741,52 @@ static bool cik_need_full_reset(struct amdgpu_device *adev)
return true;
}
+static void cik_get_pcie_usage(struct amdgpu_device *adev, uint64_t *count0,
+ uint64_t *count1)
+{
+ uint32_t perfctr = 0;
+ uint64_t cnt0_of, cnt1_of;
+ int tmp;
+
+ /* This reports 0 on APUs, so return to avoid writing/reading registers
+ * that may or may not be different from their GPU counterparts
+ */
+ if (adev->flags & AMD_IS_APU)
+ return;
+
+ /* Set the 2 events that we wish to watch, defined above */
+ /* Reg 40 is # received msgs, Reg 104 is # of posted requests sent */
+ perfctr = REG_SET_FIELD(perfctr, PCIE_PERF_CNTL_TXCLK, EVENT0_SEL, 40);
+ perfctr = REG_SET_FIELD(perfctr, PCIE_PERF_CNTL_TXCLK, EVENT1_SEL, 104);
+
+ /* Write to enable desired perf counters */
+ WREG32_PCIE(ixPCIE_PERF_CNTL_TXCLK, perfctr);
+ /* Zero out and enable the perf counters
+ * Write 0x5:
+ * Bit 0 = Start all counters(1)
+ * Bit 2 = Global counter reset enable(1)
+ */
+ WREG32_PCIE(ixPCIE_PERF_COUNT_CNTL, 0x00000005);
+
+ msleep(1000);
+
+ /* Load the shadow and disable the perf counters
+ * Write 0x2:
+ * Bit 0 = Stop counters(0)
+ * Bit 1 = Load the shadow counters(1)
+ */
+ WREG32_PCIE(ixPCIE_PERF_COUNT_CNTL, 0x00000002);
+
+ /* Read register values to get any >32bit overflow */
+ tmp = RREG32_PCIE(ixPCIE_PERF_CNTL_TXCLK);
+ cnt0_of = REG_GET_FIELD(tmp, PCIE_PERF_CNTL_TXCLK, COUNTER0_UPPER);
+ cnt1_of = REG_GET_FIELD(tmp, PCIE_PERF_CNTL_TXCLK, COUNTER1_UPPER);
+
+ /* Get the values and add the overflow */
+ *count0 = RREG32_PCIE(ixPCIE_PERF_COUNT0_TXCLK) | (cnt0_of << 32);
+ *count1 = RREG32_PCIE(ixPCIE_PERF_COUNT1_TXCLK) | (cnt1_of << 32);
+}
+
static const struct amdgpu_asic_funcs cik_asic_funcs =
{
.read_disabled_bios = &cik_read_disabled_bios,
@@ -1756,6 +1802,7 @@ static const struct amdgpu_asic_funcs cik_asic_funcs =
.invalidate_hdp = &cik_invalidate_hdp,
.need_full_reset = &cik_need_full_reset,
.init_doorbell_index = &legacy_doorbell_index_init,
+ .get_pcie_usage = &cik_get_pcie_usage,
};
static int cik_common_early_init(void *handle)
diff --git a/drivers/gpu/drm/amd/amdgpu/si.c b/drivers/gpu/drm/amd/amdgpu/si.c
index f8408f88cd37..7d2a48727e76 100644
--- a/drivers/gpu/drm/amd/amdgpu/si.c
+++ b/drivers/gpu/drm/amd/amdgpu/si.c
@@ -47,6 +47,7 @@
#include "dce/dce_6_0_d.h"
#include "uvd/uvd_4_0_d.h"
#include "bif/bif_3_0_d.h"
+#include "bif/bif_3_0_sh_mask.h"
static const u32 tahiti_golden_registers[] =
{
@@ -1323,6 +1324,52 @@ static void si_set_pcie_lanes(struct amdgpu_device *adev, int lanes)
WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, link_width_cntl);
}
+static void si_get_pcie_usage(struct amdgpu_device *adev, uint64_t *count0,
+ uint64_t *count1)
+{
+ uint32_t perfctr = 0;
+ uint64_t cnt0_of, cnt1_of;
+ int tmp;
+
+ /* This reports 0 on APUs, so return to avoid writing/reading registers
+ * that may or may not be different from their GPU counterparts
+ */
+ if (adev->flags & AMD_IS_APU)
+ return;
+
+ /* Set the 2 events that we wish to watch, defined above */
+ /* Reg 40 is # received msgs, Reg 104 is # of posted requests sent */
+ perfctr = REG_SET_FIELD(perfctr, PCIE_PERF_CNTL_TXCLK, EVENT0_SEL, 40);
+ perfctr = REG_SET_FIELD(perfctr, PCIE_PERF_CNTL_TXCLK, EVENT1_SEL, 104);
+
+ /* Write to enable desired perf counters */
+ WREG32_PCIE(ixPCIE_PERF_CNTL_TXCLK, perfctr);
+ /* Zero out and enable the perf counters
+ * Write 0x5:
+ * Bit 0 = Start all counters(1)
+ * Bit 2 = Global counter reset enable(1)
+ */
+ WREG32_PCIE(ixPCIE_PERF_COUNT_CNTL, 0x00000005);
+
+ msleep(1000);
+
+ /* Load the shadow and disable the perf counters
+ * Write 0x2:
+ * Bit 0 = Stop counters(0)
+ * Bit 1 = Load the shadow counters(1)
+ */
+ WREG32_PCIE(ixPCIE_PERF_COUNT_CNTL, 0x00000002);
+
+ /* Read register values to get any >32bit overflow */
+ tmp = RREG32_PCIE(ixPCIE_PERF_CNTL_TXCLK);
+ cnt0_of = REG_GET_FIELD(tmp, PCIE_PERF_CNTL_TXCLK, COUNTER0_UPPER);
+ cnt1_of = REG_GET_FIELD(tmp, PCIE_PERF_CNTL_TXCLK, COUNTER1_UPPER);
+
+ /* Get the values and add the overflow */
+ *count0 = RREG32_PCIE(ixPCIE_PERF_COUNT0_TXCLK) | (cnt0_of << 32);
+ *count1 = RREG32_PCIE(ixPCIE_PERF_COUNT1_TXCLK) | (cnt1_of << 32);
+}
+
static const struct amdgpu_asic_funcs si_asic_funcs =
{
.read_disabled_bios = &si_read_disabled_bios,
@@ -1339,6 +1386,7 @@ static const struct amdgpu_asic_funcs si_asic_funcs =
.flush_hdp = &si_flush_hdp,
.invalidate_hdp = &si_invalidate_hdp,
.need_full_reset = &si_need_full_reset,
+ .get_pcie_usage = &si_get_pcie_usage,
};
static uint32_t si_get_rev_id(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c
index 8849b74078d6..bb89833ed3e7 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15.c
+++ b/drivers/gpu/drm/amd/amdgpu/soc15.c
@@ -43,6 +43,9 @@
#include "hdp/hdp_4_0_sh_mask.h"
#include "smuio/smuio_9_0_offset.h"
#include "smuio/smuio_9_0_sh_mask.h"
+#include "nbio/nbio_7_0_default.h"
+#include "nbio/nbio_7_0_sh_mask.h"
+#include "nbio/nbio_7_0_smn.h"
#include "soc15.h"
#include "soc15_common.h"
@@ -601,6 +604,51 @@ static bool soc15_need_full_reset(struct amdgpu_device *adev)
/* change this when we implement soft reset */
return true;
}
+static void soc15_get_pcie_usage(struct amdgpu_device *adev, uint64_t *count0,
+ uint64_t *count1)
+{
+ uint32_t perfctr = 0;
+ uint64_t cnt0_of, cnt1_of;
+ int tmp;
+
+ /* This reports 0 on APUs, so return to avoid writing/reading registers
+ * that may or may not be different from their GPU counterparts
+ */
+ if (adev->flags & AMD_IS_APU)
+ return;
+
+ /* Set the 2 events that we wish to watch, defined above */
+ /* Reg 40 is # received msgs, Reg 104 is # of posted requests sent */
+ perfctr = REG_SET_FIELD(perfctr, PCIE_PERF_CNTL_TXCLK, EVENT0_SEL, 40);
+ perfctr = REG_SET_FIELD(perfctr, PCIE_PERF_CNTL_TXCLK, EVENT1_SEL, 104);
+
+ /* Write to enable desired perf counters */
+ WREG32_PCIE(smnPCIE_PERF_CNTL_TXCLK, perfctr);
+ /* Zero out and enable the perf counters
+ * Write 0x5:
+ * Bit 0 = Start all counters(1)
+ * Bit 2 = Global counter reset enable(1)
+ */
+ WREG32_PCIE(smnPCIE_PERF_COUNT_CNTL, 0x00000005);
+
+ msleep(1000);
+
+ /* Load the shadow and disable the perf counters
+ * Write 0x2:
+ * Bit 0 = Stop counters(0)
+ * Bit 1 = Load the shadow counters(1)
+ */
+ WREG32_PCIE(smnPCIE_PERF_COUNT_CNTL, 0x00000002);
+
+ /* Read register values to get any >32bit overflow */
+ tmp = RREG32_PCIE(smnPCIE_PERF_CNTL_TXCLK);
+ cnt0_of = REG_GET_FIELD(tmp, PCIE_PERF_CNTL_TXCLK, COUNTER0_UPPER);
+ cnt1_of = REG_GET_FIELD(tmp, PCIE_PERF_CNTL_TXCLK, COUNTER1_UPPER);
+
+ /* Get the values and add the overflow */
+ *count0 = RREG32_PCIE(smnPCIE_PERF_COUNT0_TXCLK) | (cnt0_of << 32);
+ *count1 = RREG32_PCIE(smnPCIE_PERF_COUNT1_TXCLK) | (cnt1_of << 32);
+}
static const struct amdgpu_asic_funcs soc15_asic_funcs =
{
@@ -617,6 +665,7 @@ static const struct amdgpu_asic_funcs soc15_asic_funcs =
.invalidate_hdp = &soc15_invalidate_hdp,
.need_full_reset = &soc15_need_full_reset,
.init_doorbell_index = &vega10_doorbell_index_init,
+ .get_pcie_usage = &soc15_get_pcie_usage,
};
static const struct amdgpu_asic_funcs vega20_asic_funcs =
@@ -634,6 +683,7 @@ static const struct amdgpu_asic_funcs vega20_asic_funcs =
.invalidate_hdp = &soc15_invalidate_hdp,
.need_full_reset = &soc15_need_full_reset,
.init_doorbell_index = &vega20_doorbell_index_init,
+ .get_pcie_usage = &soc15_get_pcie_usage,
};
static int soc15_common_early_init(void *handle)
diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c
index 03e7be595a0d..cdc8ab8d79d7 100644
--- a/drivers/gpu/drm/amd/amdgpu/vi.c
+++ b/drivers/gpu/drm/amd/amdgpu/vi.c
@@ -941,6 +941,52 @@ static bool vi_need_full_reset(struct amdgpu_device *adev)
}
}
+static void vi_get_pcie_usage(struct amdgpu_device *adev, uint64_t *count0,
+ uint64_t *count1)
+{
+ uint32_t perfctr = 0;
+ uint64_t cnt0_of, cnt1_of;
+ int tmp;
+
+ /* This reports 0 on APUs, so return to avoid writing/reading registers
+ * that may or may not be different from their GPU counterparts
+ */
+ if (adev->flags & AMD_IS_APU)
+ return;
+
+ /* Set the 2 events that we wish to watch, defined above */
+ /* Reg 40 is # received msgs, Reg 104 is # of posted requests sent */
+ perfctr = REG_SET_FIELD(perfctr, PCIE_PERF_CNTL_TXCLK, EVENT0_SEL, 40);
+ perfctr = REG_SET_FIELD(perfctr, PCIE_PERF_CNTL_TXCLK, EVENT1_SEL, 104);
+
+ /* Write to enable desired perf counters */
+ WREG32_PCIE(ixPCIE_PERF_CNTL_TXCLK, perfctr);
+ /* Zero out and enable the perf counters
+ * Write 0x5:
+ * Bit 0 = Start all counters(1)
+ * Bit 2 = Global counter reset enable(1)
+ */
+ WREG32_PCIE(ixPCIE_PERF_COUNT_CNTL, 0x00000005);
+
+ msleep(1000);
+
+ /* Load the shadow and disable the perf counters
+ * Write 0x2:
+ * Bit 0 = Stop counters(0)
+ * Bit 1 = Load the shadow counters(1)
+ */
+ WREG32_PCIE(ixPCIE_PERF_COUNT_CNTL, 0x00000002);
+
+ /* Read register values to get any >32bit overflow */
+ tmp = RREG32_PCIE(ixPCIE_PERF_CNTL_TXCLK);
+ cnt0_of = REG_GET_FIELD(tmp, PCIE_PERF_CNTL_TXCLK, COUNTER0_UPPER);
+ cnt1_of = REG_GET_FIELD(tmp, PCIE_PERF_CNTL_TXCLK, COUNTER1_UPPER);
+
+ /* Get the values and add the overflow */
+ *count0 = RREG32_PCIE(ixPCIE_PERF_COUNT0_TXCLK) | (cnt0_of << 32);
+ *count1 = RREG32_PCIE(ixPCIE_PERF_COUNT1_TXCLK) | (cnt1_of << 32);
+}
+
static const struct amdgpu_asic_funcs vi_asic_funcs =
{
.read_disabled_bios = &vi_read_disabled_bios,
@@ -956,6 +1002,7 @@ static const struct amdgpu_asic_funcs vi_asic_funcs =
.invalidate_hdp = &vi_invalidate_hdp,
.need_full_reset = &vi_need_full_reset,
.init_doorbell_index = &legacy_doorbell_index_init,
+ .get_pcie_usage = &vi_get_pcie_usage,
};
#define CZ_REV_BRISTOL(rev) \