1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
|
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (C) 2023-2024, Advanced Micro Devices, Inc.
*/
#include <drm/amdxdna_accel.h>
#include <drm/drm_device.h>
#include <drm/gpu_scheduler.h>
#include <linux/sizes.h>
#include "aie2_pci.h"
#include "amdxdna_mailbox.h"
#include "amdxdna_pci_drv.h"
/* NPU Public Registers on MpNPUAxiXbar (refer to Diag npu_registers.h) */
#define MPNPU_PUB_SEC_INTR 0x3010060
#define MPNPU_PUB_PWRMGMT_INTR 0x3010064
#define MPNPU_PUB_SCRATCH0 0x301006C
#define MPNPU_PUB_SCRATCH1 0x3010070
#define MPNPU_PUB_SCRATCH2 0x3010074
#define MPNPU_PUB_SCRATCH3 0x3010078
#define MPNPU_PUB_SCRATCH4 0x301007C
#define MPNPU_PUB_SCRATCH5 0x3010080
#define MPNPU_PUB_SCRATCH6 0x3010084
#define MPNPU_PUB_SCRATCH7 0x3010088
#define MPNPU_PUB_SCRATCH8 0x301008C
#define MPNPU_PUB_SCRATCH9 0x3010090
#define MPNPU_PUB_SCRATCH10 0x3010094
#define MPNPU_PUB_SCRATCH11 0x3010098
#define MPNPU_PUB_SCRATCH12 0x301009C
#define MPNPU_PUB_SCRATCH13 0x30100A0
#define MPNPU_PUB_SCRATCH14 0x30100A4
#define MPNPU_PUB_SCRATCH15 0x30100A8
#define MP0_C2PMSG_73 0x3810A24
#define MP0_C2PMSG_123 0x3810AEC
#define MP1_C2PMSG_0 0x3B10900
#define MP1_C2PMSG_60 0x3B109F0
#define MP1_C2PMSG_61 0x3B109F4
#define MPNPU_SRAM_X2I_MAILBOX_0 0x3600000
#define MPNPU_SRAM_X2I_MAILBOX_15 0x361E000
#define MPNPU_SRAM_X2I_MAILBOX_31 0x363E000
#define MPNPU_SRAM_I2X_MAILBOX_31 0x363F000
#define MMNPU_APERTURE0_BASE 0x3000000
#define MMNPU_APERTURE1_BASE 0x3600000
#define MMNPU_APERTURE3_BASE 0x3810000
#define MMNPU_APERTURE4_BASE 0x3B10000
/* PCIe BAR Index for NPU4 */
#define NPU4_REG_BAR_INDEX 0
#define NPU4_MBOX_BAR_INDEX 0
#define NPU4_PSP_BAR_INDEX 4
#define NPU4_SMU_BAR_INDEX 5
#define NPU4_SRAM_BAR_INDEX 2
/* Associated BARs and Apertures */
#define NPU4_REG_BAR_BASE MMNPU_APERTURE0_BASE
#define NPU4_MBOX_BAR_BASE MMNPU_APERTURE0_BASE
#define NPU4_PSP_BAR_BASE MMNPU_APERTURE3_BASE
#define NPU4_SMU_BAR_BASE MMNPU_APERTURE4_BASE
#define NPU4_SRAM_BAR_BASE MMNPU_APERTURE1_BASE
const struct rt_config npu4_default_rt_cfg[] = {
{ 5, 1, AIE2_RT_CFG_INIT }, /* PDI APP LOAD MODE */
{ 1, 1, AIE2_RT_CFG_CLK_GATING }, /* Clock gating on */
{ 2, 1, AIE2_RT_CFG_CLK_GATING }, /* Clock gating on */
{ 3, 1, AIE2_RT_CFG_CLK_GATING }, /* Clock gating on */
{ 4, 1, AIE2_RT_CFG_CLK_GATING }, /* Clock gating on */
{ 0 },
};
const struct dpm_clk_freq npu4_dpm_clk_table[] = {
{396, 792},
{600, 1056},
{792, 1152},
{975, 1267},
{975, 1267},
{1056, 1408},
{1152, 1584},
{1267, 1800},
{ 0 }
};
static const struct amdxdna_dev_priv npu4_dev_priv = {
.fw_path = "amdnpu/17f0_10/npu.sbin",
.protocol_major = 0x6,
.protocol_minor = 12,
.rt_config = npu4_default_rt_cfg,
.dpm_clk_tbl = npu4_dpm_clk_table,
.col_align = COL_ALIGN_NATURE,
.mbox_dev_addr = NPU4_MBOX_BAR_BASE,
.mbox_size = 0, /* Use BAR size */
.sram_dev_addr = NPU4_SRAM_BAR_BASE,
.sram_offs = {
DEFINE_BAR_OFFSET(MBOX_CHANN_OFF, NPU4_SRAM, MPNPU_SRAM_X2I_MAILBOX_0),
DEFINE_BAR_OFFSET(FW_ALIVE_OFF, NPU4_SRAM, MPNPU_SRAM_X2I_MAILBOX_15),
},
.psp_regs_off = {
DEFINE_BAR_OFFSET(PSP_CMD_REG, NPU4_PSP, MP0_C2PMSG_123),
DEFINE_BAR_OFFSET(PSP_ARG0_REG, NPU4_REG, MPNPU_PUB_SCRATCH3),
DEFINE_BAR_OFFSET(PSP_ARG1_REG, NPU4_REG, MPNPU_PUB_SCRATCH4),
DEFINE_BAR_OFFSET(PSP_ARG2_REG, NPU4_REG, MPNPU_PUB_SCRATCH9),
DEFINE_BAR_OFFSET(PSP_INTR_REG, NPU4_PSP, MP0_C2PMSG_73),
DEFINE_BAR_OFFSET(PSP_STATUS_REG, NPU4_PSP, MP0_C2PMSG_123),
DEFINE_BAR_OFFSET(PSP_RESP_REG, NPU4_REG, MPNPU_PUB_SCRATCH3),
},
.smu_regs_off = {
DEFINE_BAR_OFFSET(SMU_CMD_REG, NPU4_SMU, MP1_C2PMSG_0),
DEFINE_BAR_OFFSET(SMU_ARG_REG, NPU4_SMU, MP1_C2PMSG_60),
DEFINE_BAR_OFFSET(SMU_INTR_REG, NPU4_SMU, MMNPU_APERTURE4_BASE),
DEFINE_BAR_OFFSET(SMU_RESP_REG, NPU4_SMU, MP1_C2PMSG_61),
DEFINE_BAR_OFFSET(SMU_OUT_REG, NPU4_SMU, MP1_C2PMSG_60),
},
.hw_ops = {
.set_dpm = npu4_set_dpm,
},
};
const struct amdxdna_dev_info dev_npu4_info = {
.reg_bar = NPU4_REG_BAR_INDEX,
.mbox_bar = NPU4_MBOX_BAR_INDEX,
.sram_bar = NPU4_SRAM_BAR_INDEX,
.psp_bar = NPU4_PSP_BAR_INDEX,
.smu_bar = NPU4_SMU_BAR_INDEX,
.first_col = 0,
.dev_mem_buf_shift = 15, /* 32 KiB aligned */
.dev_mem_base = AIE2_DEVM_BASE,
.dev_mem_size = AIE2_DEVM_SIZE,
.vbnv = "RyzenAI-npu4",
.device_type = AMDXDNA_DEV_TYPE_KMQ,
.dev_priv = &npu4_dev_priv,
.ops = &aie2_ops, /* NPU4 can share NPU1's callback */
};
|