diff options
Diffstat (limited to 'drivers/gpu/drm/radeon/evergreen_cs.c')
| -rw-r--r-- | drivers/gpu/drm/radeon/evergreen_cs.c | 1143 |
1 files changed, 693 insertions, 450 deletions
diff --git a/drivers/gpu/drm/radeon/evergreen_cs.c b/drivers/gpu/drm/radeon/evergreen_cs.c index eb8ac315f92f..1162cb5d75ed 100644 --- a/drivers/gpu/drm/radeon/evergreen_cs.c +++ b/drivers/gpu/drm/radeon/evergreen_cs.c @@ -25,17 +25,21 @@ * Alex Deucher * Jerome Glisse */ -#include <drm/drmP.h> + #include "radeon.h" +#include "radeon_asic.h" +#include "r600.h" #include "evergreend.h" #include "evergreen_reg_safe.h" #include "cayman_reg_safe.h" -#define MAX(a,b) (((a)>(b))?(a):(b)) -#define MIN(a,b) (((a)<(b))?(a):(b)) +#ifndef MIN +#define MAX(a, b) (((a) > (b)) ? (a) : (b)) +#define MIN(a, b) (((a) < (b)) ? (a) : (b)) +#endif + +#define REG_SAFE_BM_SIZE ARRAY_SIZE(evergreen_reg_safe_bm) -int r600_dma_cs_next_reloc(struct radeon_cs_parser *p, - struct radeon_cs_reloc **cs_reloc); struct evergreen_cs_track { u32 group_size; u32 nbanks; @@ -83,6 +87,8 @@ struct evergreen_cs_track { u32 htile_offset; u32 htile_surface; struct radeon_bo *htile_bo; + unsigned long indirect_draw_buffer_size; + const unsigned *reg_safe_bm; }; static u32 evergreen_cs_get_aray_mode(u32 tiling_flags) @@ -205,7 +211,7 @@ static int evergreen_surface_check_linear_aligned(struct radeon_cs_parser *p, surf->base_align = track->group_size; surf->palign = palign; surf->halign = 1; - if (surf->nbx & (palign - 1)) { + if ((surf->nbx & (palign - 1)) && !(palign == 64 && surf->nbx == 32)) { if (prefix) { dev_warn(p->dev, "%s:%d %s pitch %d invalid must be aligned with %d\n", __func__, __LINE__, prefix, surf->nbx, palign); @@ -391,7 +397,7 @@ static int evergreen_cs_track_validate_cb(struct radeon_cs_parser *p, unsigned i struct evergreen_cs_track *track = p->track; struct eg_surface surf; unsigned pitch, slice, mslice; - unsigned long offset; + u64 offset; int r; mslice = G_028C6C_SLICE_MAX(track->cb_color_view[id]) + 1; @@ -429,29 +435,29 @@ static int evergreen_cs_track_validate_cb(struct radeon_cs_parser *p, unsigned i return r; } - offset = track->cb_color_bo_offset[id] << 8; + offset = (u64)track->cb_color_bo_offset[id] << 8; if (offset & (surf.base_align - 1)) { - dev_warn(p->dev, "%s:%d cb[%d] bo base %ld not aligned with %ld\n", + dev_warn(p->dev, "%s:%d cb[%d] bo base %llu not aligned with %ld\n", __func__, __LINE__, id, offset, surf.base_align); return -EINVAL; } - offset += surf.layer_size * mslice; + offset += (u64)surf.layer_size * mslice; if (offset > radeon_bo_size(track->cb_color_bo[id])) { /* old ddx are broken they allocate bo with w*h*bpp but * program slice with ALIGN(h, 8), catch this and patch * command stream. */ if (!surf.mode) { - volatile u32 *ib = p->ib.ptr; - unsigned long tmp, nby, bsize, size, min = 0; + uint32_t *ib = p->ib.ptr; + u64 tmp, nby, bsize, size, min = 0; /* find the height the ddx wants */ if (surf.nby > 8) { min = surf.nby - 8; } bsize = radeon_bo_size(track->cb_color_bo[id]); - tmp = track->cb_color_bo_offset[id] << 8; + tmp = (u64)track->cb_color_bo_offset[id] << 8; for (nby = surf.nby; nby > min; nby--) { size = nby * surf.nbx * surf.bpe * surf.nsamples; if ((tmp + size * mslice) <= bsize) { @@ -463,7 +469,7 @@ static int evergreen_cs_track_validate_cb(struct radeon_cs_parser *p, unsigned i slice = ((nby * surf.nbx) / 64) - 1; if (!evergreen_surface_check(p, &surf, "cb")) { /* check if this one works */ - tmp += surf.layer_size * mslice; + tmp += (u64)surf.layer_size * mslice; if (tmp <= bsize) { ib[track->cb_color_slice_idx[id]] = slice; goto old_ddx_ok; @@ -472,9 +478,9 @@ static int evergreen_cs_track_validate_cb(struct radeon_cs_parser *p, unsigned i } } dev_warn(p->dev, "%s:%d cb[%d] bo too small (layer size %d, " - "offset %d, max layer %d, bo size %ld, slice %d)\n", + "offset %llu, max layer %d, bo size %ld, slice %d)\n", __func__, __LINE__, id, surf.layer_size, - track->cb_color_bo_offset[id] << 8, mslice, + (u64)track->cb_color_bo_offset[id] << 8, mslice, radeon_bo_size(track->cb_color_bo[id]), slice); dev_warn(p->dev, "%s:%d problematic surf: (%d %d) (%d %d %d %d %d %d %d)\n", __func__, __LINE__, surf.nbx, surf.nby, @@ -558,7 +564,7 @@ static int evergreen_cs_track_validate_stencil(struct radeon_cs_parser *p) struct evergreen_cs_track *track = p->track; struct eg_surface surf; unsigned pitch, slice, mslice; - unsigned long offset; + u64 offset; int r; mslice = G_028008_SLICE_MAX(track->db_depth_view) + 1; @@ -604,18 +610,18 @@ static int evergreen_cs_track_validate_stencil(struct radeon_cs_parser *p) return r; } - offset = track->db_s_read_offset << 8; + offset = (u64)track->db_s_read_offset << 8; if (offset & (surf.base_align - 1)) { - dev_warn(p->dev, "%s:%d stencil read bo base %ld not aligned with %ld\n", + dev_warn(p->dev, "%s:%d stencil read bo base %llu not aligned with %ld\n", __func__, __LINE__, offset, surf.base_align); return -EINVAL; } - offset += surf.layer_size * mslice; + offset += (u64)surf.layer_size * mslice; if (offset > radeon_bo_size(track->db_s_read_bo)) { dev_warn(p->dev, "%s:%d stencil read bo too small (layer size %d, " - "offset %ld, max layer %d, bo size %ld)\n", + "offset %llu, max layer %d, bo size %ld)\n", __func__, __LINE__, surf.layer_size, - (unsigned long)track->db_s_read_offset << 8, mslice, + (u64)track->db_s_read_offset << 8, mslice, radeon_bo_size(track->db_s_read_bo)); dev_warn(p->dev, "%s:%d stencil invalid (0x%08x 0x%08x 0x%08x 0x%08x)\n", __func__, __LINE__, track->db_depth_size, @@ -623,18 +629,18 @@ static int evergreen_cs_track_validate_stencil(struct radeon_cs_parser *p) return -EINVAL; } - offset = track->db_s_write_offset << 8; + offset = (u64)track->db_s_write_offset << 8; if (offset & (surf.base_align - 1)) { - dev_warn(p->dev, "%s:%d stencil write bo base %ld not aligned with %ld\n", + dev_warn(p->dev, "%s:%d stencil write bo base %llu not aligned with %ld\n", __func__, __LINE__, offset, surf.base_align); return -EINVAL; } - offset += surf.layer_size * mslice; + offset += (u64)surf.layer_size * mslice; if (offset > radeon_bo_size(track->db_s_write_bo)) { dev_warn(p->dev, "%s:%d stencil write bo too small (layer size %d, " - "offset %ld, max layer %d, bo size %ld)\n", + "offset %llu, max layer %d, bo size %ld)\n", __func__, __LINE__, surf.layer_size, - (unsigned long)track->db_s_write_offset << 8, mslice, + (u64)track->db_s_write_offset << 8, mslice, radeon_bo_size(track->db_s_write_bo)); return -EINVAL; } @@ -655,7 +661,7 @@ static int evergreen_cs_track_validate_depth(struct radeon_cs_parser *p) struct evergreen_cs_track *track = p->track; struct eg_surface surf; unsigned pitch, slice, mslice; - unsigned long offset; + u64 offset; int r; mslice = G_028008_SLICE_MAX(track->db_depth_view) + 1; @@ -702,34 +708,34 @@ static int evergreen_cs_track_validate_depth(struct radeon_cs_parser *p) return r; } - offset = track->db_z_read_offset << 8; + offset = (u64)track->db_z_read_offset << 8; if (offset & (surf.base_align - 1)) { - dev_warn(p->dev, "%s:%d stencil read bo base %ld not aligned with %ld\n", + dev_warn(p->dev, "%s:%d stencil read bo base %llu not aligned with %ld\n", __func__, __LINE__, offset, surf.base_align); return -EINVAL; } - offset += surf.layer_size * mslice; + offset += (u64)surf.layer_size * mslice; if (offset > radeon_bo_size(track->db_z_read_bo)) { dev_warn(p->dev, "%s:%d depth read bo too small (layer size %d, " - "offset %ld, max layer %d, bo size %ld)\n", + "offset %llu, max layer %d, bo size %ld)\n", __func__, __LINE__, surf.layer_size, - (unsigned long)track->db_z_read_offset << 8, mslice, + (u64)track->db_z_read_offset << 8, mslice, radeon_bo_size(track->db_z_read_bo)); return -EINVAL; } - offset = track->db_z_write_offset << 8; + offset = (u64)track->db_z_write_offset << 8; if (offset & (surf.base_align - 1)) { - dev_warn(p->dev, "%s:%d stencil write bo base %ld not aligned with %ld\n", + dev_warn(p->dev, "%s:%d stencil write bo base %llu not aligned with %ld\n", __func__, __LINE__, offset, surf.base_align); return -EINVAL; } - offset += surf.layer_size * mslice; + offset += (u64)surf.layer_size * mslice; if (offset > radeon_bo_size(track->db_z_write_bo)) { dev_warn(p->dev, "%s:%d depth write bo too small (layer size %d, " - "offset %ld, max layer %d, bo size %ld)\n", + "offset %llu, max layer %d, bo size %ld)\n", __func__, __LINE__, surf.layer_size, - (unsigned long)track->db_z_write_offset << 8, mslice, + (u64)track->db_z_write_offset << 8, mslice, radeon_bo_size(track->db_z_write_bo)); return -EINVAL; } @@ -945,13 +951,13 @@ static int evergreen_cs_track_check(struct radeon_cs_parser *p) u64 offset = (u64)track->vgt_strmout_bo_offset[i] + (u64)track->vgt_strmout_size[i]; if (offset > radeon_bo_size(track->vgt_strmout_bo[i])) { - DRM_ERROR("streamout %d bo too small: 0x%llx, 0x%lx\n", - i, offset, - radeon_bo_size(track->vgt_strmout_bo[i])); + dev_warn_once(p->dev, "streamout %d bo too small: 0x%llx, 0x%lx\n", + i, offset, + radeon_bo_size(track->vgt_strmout_bo[i])); return -EINVAL; } } else { - dev_warn(p->dev, "No buffer for streamout %d\n", i); + dev_warn_once(p->dev, "No buffer for streamout %d\n", i); return -EINVAL; } } @@ -967,11 +973,14 @@ static int evergreen_cs_track_check(struct radeon_cs_parser *p) if (track->cb_dirty) { tmp = track->cb_target_mask; for (i = 0; i < 8; i++) { - if ((tmp >> (i * 4)) & 0xF) { + u32 format = G_028C70_FORMAT(track->cb_color_info[i]); + + if (format != V_028C70_COLOR_INVALID && + (tmp >> (i * 4)) & 0xF) { /* at least one component is enabled */ if (track->cb_color_bo[i] == NULL) { - dev_warn(p->dev, "%s:%d mask 0x%08X | 0x%08X no cb for %d\n", - __func__, __LINE__, track->cb_target_mask, track->cb_shader_mask, i); + dev_warn_once(p->dev, "%s:%d mask 0x%08X | 0x%08X no cb for %d\n", + __func__, __LINE__, track->cb_target_mask, track->cb_shader_mask, i); return -EINVAL; } /* check cb */ @@ -1007,7 +1016,7 @@ static int evergreen_cs_track_check(struct radeon_cs_parser *p) /** * evergreen_cs_packet_parse_vline() - parse userspace VLINE packet - * @parser: parser structure holding parsing context. + * @p: parser structure holding parsing context. * * This is an Evergreen(+)-specific function for parsing VLINE packets. * Real work is done by r600_cs_common_vline_parse function. @@ -1047,14 +1056,13 @@ static int evergreen_packet0_check(struct radeon_cs_parser *p, case EVERGREEN_VLINE_START_END: r = evergreen_cs_packet_parse_vline(p); if (r) { - DRM_ERROR("No reloc for ib[%d]=0x%04X\n", - idx, reg); + dev_warn_once(p->dev, "No reloc for ib[%d]=0x%04X\n", + idx, reg); return r; } break; default: - printk(KERN_ERR "Forbidden register 0x%04X in cs at %d\n", - reg, idx); + pr_err("Forbidden register 0x%04X in cs at %d\n", reg, idx); return -EINVAL; } return 0; @@ -1079,41 +1087,18 @@ static int evergreen_cs_parse_packet0(struct radeon_cs_parser *p, } /** - * evergreen_cs_check_reg() - check if register is authorized or not - * @parser: parser structure holding parsing context + * evergreen_cs_handle_reg() - process registers that need special handling. + * @p: parser structure holding parsing context * @reg: register we are testing * @idx: index into the cs buffer - * - * This function will test against evergreen_reg_safe_bm and return 0 - * if register is safe. If register is not flag as safe this function - * will test it against a list of register needind special handling. */ -static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) +static int evergreen_cs_handle_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) { struct evergreen_cs_track *track = (struct evergreen_cs_track *)p->track; - struct radeon_cs_reloc *reloc; - u32 last_reg; - u32 m, i, tmp, *ib; + struct radeon_bo_list *reloc; + u32 tmp, *ib; int r; - if (p->rdev->family >= CHIP_CAYMAN) - last_reg = ARRAY_SIZE(cayman_reg_safe_bm); - else - last_reg = ARRAY_SIZE(evergreen_reg_safe_bm); - - i = (reg >> 7); - if (i >= last_reg) { - dev_warn(p->dev, "forbidden register 0x%08x at %d\n", reg, idx); - return -EINVAL; - } - m = 1 << ((reg >> 2) & 31); - if (p->rdev->family >= CHIP_CAYMAN) { - if (!(cayman_reg_safe_bm[i] & m)) - return 0; - } else { - if (!(evergreen_reg_safe_bm[i] & m)) - return 0; - } ib = p->ib.ptr; switch (reg) { /* force following reg to 0 in an attempt to disable out buffer @@ -1158,11 +1143,11 @@ static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) case SQ_VSTMP_RING_BASE: r = radeon_cs_packet_next_reloc(p, &reloc, 0); if (r) { - dev_warn(p->dev, "bad SET_CONTEXT_REG " - "0x%04X\n", reg); + dev_warn_once(p->dev, "bad SET_CONTEXT_REG " + "0x%04X\n", reg); return -EINVAL; } - ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); + ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff); break; case DB_DEPTH_CONTROL: track->db_depth_control = radeon_get_ib_value(p, idx); @@ -1170,15 +1155,15 @@ static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) break; case CAYMAN_DB_EQAA: if (p->rdev->family < CHIP_CAYMAN) { - dev_warn(p->dev, "bad SET_CONTEXT_REG " - "0x%04X\n", reg); + dev_warn_once(p->dev, "bad SET_CONTEXT_REG " + "0x%04X\n", reg); return -EINVAL; } break; case CAYMAN_DB_DEPTH_INFO: if (p->rdev->family < CHIP_CAYMAN) { - dev_warn(p->dev, "bad SET_CONTEXT_REG " - "0x%04X\n", reg); + dev_warn_once(p->dev, "bad SET_CONTEXT_REG " + "0x%04X\n", reg); return -EINVAL; } break; @@ -1187,18 +1172,18 @@ static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) { r = radeon_cs_packet_next_reloc(p, &reloc, 0); if (r) { - dev_warn(p->dev, "bad SET_CONTEXT_REG " - "0x%04X\n", reg); + dev_warn_once(p->dev, "bad SET_CONTEXT_REG " + "0x%04X\n", reg); return -EINVAL; } ib[idx] &= ~Z_ARRAY_MODE(0xf); track->db_z_info &= ~Z_ARRAY_MODE(0xf); - ib[idx] |= Z_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->lobj.tiling_flags)); - track->db_z_info |= Z_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->lobj.tiling_flags)); - if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO) { + ib[idx] |= Z_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->tiling_flags)); + track->db_z_info |= Z_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->tiling_flags)); + if (reloc->tiling_flags & RADEON_TILING_MACRO) { unsigned bankw, bankh, mtaspect, tile_split; - evergreen_tiling_fields(reloc->lobj.tiling_flags, + evergreen_tiling_fields(reloc->tiling_flags, &bankw, &bankh, &mtaspect, &tile_split); ib[idx] |= DB_NUM_BANKS(evergreen_cs_get_num_banks(track->nbanks)); @@ -1229,48 +1214,48 @@ static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) case DB_Z_READ_BASE: r = radeon_cs_packet_next_reloc(p, &reloc, 0); if (r) { - dev_warn(p->dev, "bad SET_CONTEXT_REG " - "0x%04X\n", reg); + dev_warn_once(p->dev, "bad SET_CONTEXT_REG " + "0x%04X\n", reg); return -EINVAL; } track->db_z_read_offset = radeon_get_ib_value(p, idx); - ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); + ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff); track->db_z_read_bo = reloc->robj; track->db_dirty = true; break; case DB_Z_WRITE_BASE: r = radeon_cs_packet_next_reloc(p, &reloc, 0); if (r) { - dev_warn(p->dev, "bad SET_CONTEXT_REG " - "0x%04X\n", reg); + dev_warn_once(p->dev, "bad SET_CONTEXT_REG " + "0x%04X\n", reg); return -EINVAL; } track->db_z_write_offset = radeon_get_ib_value(p, idx); - ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); + ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff); track->db_z_write_bo = reloc->robj; track->db_dirty = true; break; case DB_STENCIL_READ_BASE: r = radeon_cs_packet_next_reloc(p, &reloc, 0); if (r) { - dev_warn(p->dev, "bad SET_CONTEXT_REG " - "0x%04X\n", reg); + dev_warn_once(p->dev, "bad SET_CONTEXT_REG " + "0x%04X\n", reg); return -EINVAL; } track->db_s_read_offset = radeon_get_ib_value(p, idx); - ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); + ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff); track->db_s_read_bo = reloc->robj; track->db_dirty = true; break; case DB_STENCIL_WRITE_BASE: r = radeon_cs_packet_next_reloc(p, &reloc, 0); if (r) { - dev_warn(p->dev, "bad SET_CONTEXT_REG " - "0x%04X\n", reg); + dev_warn_once(p->dev, "bad SET_CONTEXT_REG " + "0x%04X\n", reg); return -EINVAL; } track->db_s_write_offset = radeon_get_ib_value(p, idx); - ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); + ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff); track->db_s_write_bo = reloc->robj; track->db_dirty = true; break; @@ -1288,13 +1273,13 @@ static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) case VGT_STRMOUT_BUFFER_BASE_3: r = radeon_cs_packet_next_reloc(p, &reloc, 0); if (r) { - dev_warn(p->dev, "bad SET_CONTEXT_REG " - "0x%04X\n", reg); + dev_warn_once(p->dev, "bad SET_CONTEXT_REG " + "0x%04X\n", reg); return -EINVAL; } tmp = (reg - VGT_STRMOUT_BUFFER_BASE_0) / 16; track->vgt_strmout_bo_offset[tmp] = radeon_get_ib_value(p, idx) << 8; - ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); + ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff); track->vgt_strmout_bo[tmp] = reloc->robj; track->streamout_dirty = true; break; @@ -1310,11 +1295,12 @@ static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) case CP_COHER_BASE: r = radeon_cs_packet_next_reloc(p, &reloc, 0); if (r) { - dev_warn(p->dev, "missing reloc for CP_COHER_BASE " - "0x%04X\n", reg); + dev_warn_once(p->dev, "missing reloc for CP_COHER_BASE " + "0x%04X\n", reg); return -EINVAL; } - ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); + ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff); + break; case CB_TARGET_MASK: track->cb_target_mask = radeon_get_ib_value(p, idx); track->cb_dirty = true; @@ -1325,8 +1311,8 @@ static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) break; case PA_SC_AA_CONFIG: if (p->rdev->family >= CHIP_CAYMAN) { - dev_warn(p->dev, "bad SET_CONTEXT_REG " - "0x%04X\n", reg); + dev_warn_once(p->dev, "bad SET_CONTEXT_REG " + "0x%04X\n", reg); return -EINVAL; } tmp = radeon_get_ib_value(p, idx) & MSAA_NUM_SAMPLES_MASK; @@ -1334,8 +1320,8 @@ static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) break; case CAYMAN_PA_SC_AA_CONFIG: if (p->rdev->family < CHIP_CAYMAN) { - dev_warn(p->dev, "bad SET_CONTEXT_REG " - "0x%04X\n", reg); + dev_warn_once(p->dev, "bad SET_CONTEXT_REG " + "0x%04X\n", reg); return -EINVAL; } tmp = radeon_get_ib_value(p, idx) & CAYMAN_MSAA_NUM_SAMPLES_MASK; @@ -1374,12 +1360,12 @@ static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) { r = radeon_cs_packet_next_reloc(p, &reloc, 0); if (r) { - dev_warn(p->dev, "bad SET_CONTEXT_REG " - "0x%04X\n", reg); + dev_warn_once(p->dev, "bad SET_CONTEXT_REG " + "0x%04X\n", reg); return -EINVAL; } - ib[idx] |= CB_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->lobj.tiling_flags)); - track->cb_color_info[tmp] |= CB_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->lobj.tiling_flags)); + ib[idx] |= CB_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->tiling_flags)); + track->cb_color_info[tmp] |= CB_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->tiling_flags)); } track->cb_dirty = true; break; @@ -1392,12 +1378,12 @@ static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) { r = radeon_cs_packet_next_reloc(p, &reloc, 0); if (r) { - dev_warn(p->dev, "bad SET_CONTEXT_REG " - "0x%04X\n", reg); + dev_warn_once(p->dev, "bad SET_CONTEXT_REG " + "0x%04X\n", reg); return -EINVAL; } - ib[idx] |= CB_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->lobj.tiling_flags)); - track->cb_color_info[tmp] |= CB_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->lobj.tiling_flags)); + ib[idx] |= CB_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->tiling_flags)); + track->cb_color_info[tmp] |= CB_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->tiling_flags)); } track->cb_dirty = true; break; @@ -1453,15 +1439,15 @@ static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) case CB_COLOR7_ATTRIB: r = radeon_cs_packet_next_reloc(p, &reloc, 0); if (r) { - dev_warn(p->dev, "bad SET_CONTEXT_REG " - "0x%04X\n", reg); + dev_warn_once(p->dev, "bad SET_CONTEXT_REG " + "0x%04X\n", reg); return -EINVAL; } if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) { - if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO) { + if (reloc->tiling_flags & RADEON_TILING_MACRO) { unsigned bankw, bankh, mtaspect, tile_split; - evergreen_tiling_fields(reloc->lobj.tiling_flags, + evergreen_tiling_fields(reloc->tiling_flags, &bankw, &bankh, &mtaspect, &tile_split); ib[idx] |= CB_NUM_BANKS(evergreen_cs_get_num_banks(track->nbanks)); @@ -1481,15 +1467,15 @@ static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) case CB_COLOR11_ATTRIB: r = radeon_cs_packet_next_reloc(p, &reloc, 0); if (r) { - dev_warn(p->dev, "bad SET_CONTEXT_REG " - "0x%04X\n", reg); + dev_warn_once(p->dev, "bad SET_CONTEXT_REG " + "0x%04X\n", reg); return -EINVAL; } if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) { - if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO) { + if (reloc->tiling_flags & RADEON_TILING_MACRO) { unsigned bankw, bankh, mtaspect, tile_split; - evergreen_tiling_fields(reloc->lobj.tiling_flags, + evergreen_tiling_fields(reloc->tiling_flags, &bankw, &bankh, &mtaspect, &tile_split); ib[idx] |= CB_NUM_BANKS(evergreen_cs_get_num_banks(track->nbanks)); @@ -1517,7 +1503,7 @@ static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) dev_err(p->dev, "bad SET_CONTEXT_REG 0x%04X\n", reg); return -EINVAL; } - ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); + ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff); track->cb_color_fmask_bo[tmp] = reloc->robj; break; case CB_COLOR0_CMASK: @@ -1534,7 +1520,7 @@ static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) dev_err(p->dev, "bad SET_CONTEXT_REG 0x%04X\n", reg); return -EINVAL; } - ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); + ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff); track->cb_color_cmask_bo[tmp] = reloc->robj; break; case CB_COLOR0_FMASK_SLICE: @@ -1569,13 +1555,13 @@ static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) case CB_COLOR7_BASE: r = radeon_cs_packet_next_reloc(p, &reloc, 0); if (r) { - dev_warn(p->dev, "bad SET_CONTEXT_REG " - "0x%04X\n", reg); + dev_warn_once(p->dev, "bad SET_CONTEXT_REG " + "0x%04X\n", reg); return -EINVAL; } tmp = (reg - CB_COLOR0_BASE) / 0x3c; track->cb_color_bo_offset[tmp] = radeon_get_ib_value(p, idx); - ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); + ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff); track->cb_color_bo[tmp] = reloc->robj; track->cb_dirty = true; break; @@ -1585,25 +1571,25 @@ static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) case CB_COLOR11_BASE: r = radeon_cs_packet_next_reloc(p, &reloc, 0); if (r) { - dev_warn(p->dev, "bad SET_CONTEXT_REG " - "0x%04X\n", reg); + dev_warn_once(p->dev, "bad SET_CONTEXT_REG " + "0x%04X\n", reg); return -EINVAL; } tmp = ((reg - CB_COLOR8_BASE) / 0x1c) + 8; track->cb_color_bo_offset[tmp] = radeon_get_ib_value(p, idx); - ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); + ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff); track->cb_color_bo[tmp] = reloc->robj; track->cb_dirty = true; break; case DB_HTILE_DATA_BASE: r = radeon_cs_packet_next_reloc(p, &reloc, 0); if (r) { - dev_warn(p->dev, "bad SET_CONTEXT_REG " - "0x%04X\n", reg); + dev_warn_once(p->dev, "bad SET_CONTEXT_REG " + "0x%04X\n", reg); return -EINVAL; } track->htile_offset = radeon_get_ib_value(p, idx); - ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); + ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff); track->htile_bo = reloc->robj; track->db_dirty = true; break; @@ -1716,82 +1702,80 @@ static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) case SQ_ALU_CONST_CACHE_LS_15: r = radeon_cs_packet_next_reloc(p, &reloc, 0); if (r) { - dev_warn(p->dev, "bad SET_CONTEXT_REG " - "0x%04X\n", reg); + dev_warn_once(p->dev, "bad SET_CONTEXT_REG " + "0x%04X\n", reg); return -EINVAL; } - ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); + ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff); break; case SX_MEMORY_EXPORT_BASE: if (p->rdev->family >= CHIP_CAYMAN) { - dev_warn(p->dev, "bad SET_CONFIG_REG " - "0x%04X\n", reg); + dev_warn_once(p->dev, "bad SET_CONFIG_REG " + "0x%04X\n", reg); return -EINVAL; } r = radeon_cs_packet_next_reloc(p, &reloc, 0); if (r) { - dev_warn(p->dev, "bad SET_CONFIG_REG " - "0x%04X\n", reg); + dev_warn_once(p->dev, "bad SET_CONFIG_REG " + "0x%04X\n", reg); return -EINVAL; } - ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); + ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff); break; case CAYMAN_SX_SCATTER_EXPORT_BASE: if (p->rdev->family < CHIP_CAYMAN) { - dev_warn(p->dev, "bad SET_CONTEXT_REG " - "0x%04X\n", reg); + dev_warn_once(p->dev, "bad SET_CONTEXT_REG " + "0x%04X\n", reg); return -EINVAL; } r = radeon_cs_packet_next_reloc(p, &reloc, 0); if (r) { - dev_warn(p->dev, "bad SET_CONTEXT_REG " - "0x%04X\n", reg); + dev_warn_once(p->dev, "bad SET_CONTEXT_REG " + "0x%04X\n", reg); return -EINVAL; } - ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); + ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff); break; case SX_MISC: track->sx_misc_kill_all_prims = (radeon_get_ib_value(p, idx) & 0x1) != 0; break; default: - dev_warn(p->dev, "forbidden register 0x%08x at %d\n", reg, idx); + dev_warn_once(p->dev, "forbidden register 0x%08x at %d\n", reg, idx); return -EINVAL; } return 0; } -static bool evergreen_is_safe_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) +/** + * evergreen_is_safe_reg() - check if register is authorized or not + * @p: parser structure holding parsing context + * @reg: register we are testing + * + * This function will test against reg_safe_bm and return true + * if register is safe or false otherwise. + */ +static inline bool evergreen_is_safe_reg(struct radeon_cs_parser *p, u32 reg) { - u32 last_reg, m, i; - - if (p->rdev->family >= CHIP_CAYMAN) - last_reg = ARRAY_SIZE(cayman_reg_safe_bm); - else - last_reg = ARRAY_SIZE(evergreen_reg_safe_bm); + struct evergreen_cs_track *track = p->track; + u32 m, i; i = (reg >> 7); - if (i >= last_reg) { - dev_warn(p->dev, "forbidden register 0x%08x at %d\n", reg, idx); + if (unlikely(i >= REG_SAFE_BM_SIZE)) { return false; } m = 1 << ((reg >> 2) & 31); - if (p->rdev->family >= CHIP_CAYMAN) { - if (!(cayman_reg_safe_bm[i] & m)) - return true; - } else { - if (!(evergreen_reg_safe_bm[i] & m)) - return true; - } - dev_warn(p->dev, "forbidden register 0x%08x at %d\n", reg, idx); + if (!(track->reg_safe_bm[i] & m)) + return true; + return false; } static int evergreen_packet3_check(struct radeon_cs_parser *p, struct radeon_cs_packet *pkt) { - struct radeon_cs_reloc *reloc; + struct radeon_bo_list *reloc; struct evergreen_cs_track *track; - volatile u32 *ib; + uint32_t *ib; unsigned idx; unsigned i; unsigned start_reg, end_reg, reg; @@ -1811,7 +1795,7 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, uint64_t offset; if (pkt->count != 1) { - DRM_ERROR("bad SET PREDICATION\n"); + dev_warn_once(p->dev, "bad SET PREDICATION\n"); return -EINVAL; } @@ -1823,19 +1807,19 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, return 0; if (pred_op > 2) { - DRM_ERROR("bad SET PREDICATION operation %d\n", pred_op); + dev_warn_once(p->dev, "bad SET PREDICATION operation %d\n", pred_op); return -EINVAL; } r = radeon_cs_packet_next_reloc(p, &reloc, 0); if (r) { - DRM_ERROR("bad SET PREDICATION\n"); + dev_warn_once(p->dev, "bad SET PREDICATION\n"); return -EINVAL; } - offset = reloc->lobj.gpu_offset + - (idx_value & 0xfffffff0) + - ((u64)(tmp & 0xff) << 32); + offset = reloc->gpu_offset + + (idx_value & 0xfffffff0) + + ((u64)(tmp & 0xff) << 32); ib[idx + 0] = offset; ib[idx + 1] = (tmp & 0xffffff00) | (upper_32_bits(offset) & 0xff); @@ -1843,7 +1827,7 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, break; case PACKET3_CONTEXT_CONTROL: if (pkt->count != 1) { - DRM_ERROR("bad CONTEXT_CONTROL\n"); + dev_warn_once(p->dev, "bad CONTEXT_CONTROL\n"); return -EINVAL; } break; @@ -1851,17 +1835,17 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, case PACKET3_NUM_INSTANCES: case PACKET3_CLEAR_STATE: if (pkt->count) { - DRM_ERROR("bad INDEX_TYPE/NUM_INSTANCES/CLEAR_STATE\n"); + dev_warn_once(p->dev, "bad INDEX_TYPE/NUM_INSTANCES/CLEAR_STATE\n"); return -EINVAL; } break; case CAYMAN_PACKET3_DEALLOC_STATE: if (p->rdev->family < CHIP_CAYMAN) { - DRM_ERROR("bad PACKET3_DEALLOC_STATE\n"); + dev_warn_once(p->dev, "bad PACKET3_DEALLOC_STATE\n"); return -EINVAL; } if (pkt->count) { - DRM_ERROR("bad INDEX_TYPE/NUM_INSTANCES/CLEAR_STATE\n"); + dev_warn_once(p->dev, "bad INDEX_TYPE/NUM_INSTANCES/CLEAR_STATE\n"); return -EINVAL; } break; @@ -1870,52 +1854,60 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, uint64_t offset; if (pkt->count != 1) { - DRM_ERROR("bad INDEX_BASE\n"); + dev_warn_once(p->dev, "bad INDEX_BASE\n"); return -EINVAL; } r = radeon_cs_packet_next_reloc(p, &reloc, 0); if (r) { - DRM_ERROR("bad INDEX_BASE\n"); + dev_warn_once(p->dev, "bad INDEX_BASE\n"); return -EINVAL; } - offset = reloc->lobj.gpu_offset + - idx_value + - ((u64)(radeon_get_ib_value(p, idx+1) & 0xff) << 32); + offset = reloc->gpu_offset + + idx_value + + ((u64)(radeon_get_ib_value(p, idx+1) & 0xff) << 32); ib[idx+0] = offset; ib[idx+1] = upper_32_bits(offset) & 0xff; r = evergreen_cs_track_check(p); if (r) { - dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__); + dev_warn_once(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__); return r; } break; } + case PACKET3_INDEX_BUFFER_SIZE: + { + if (pkt->count != 0) { + dev_warn_once(p->dev, "bad INDEX_BUFFER_SIZE\n"); + return -EINVAL; + } + break; + } case PACKET3_DRAW_INDEX: { uint64_t offset; if (pkt->count != 3) { - DRM_ERROR("bad DRAW_INDEX\n"); + dev_warn_once(p->dev, "bad DRAW_INDEX\n"); return -EINVAL; } r = radeon_cs_packet_next_reloc(p, &reloc, 0); if (r) { - DRM_ERROR("bad DRAW_INDEX\n"); + dev_warn_once(p->dev, "bad DRAW_INDEX\n"); return -EINVAL; } - offset = reloc->lobj.gpu_offset + - idx_value + - ((u64)(radeon_get_ib_value(p, idx+1) & 0xff) << 32); + offset = reloc->gpu_offset + + idx_value + + ((u64)(radeon_get_ib_value(p, idx+1) & 0xff) << 32); ib[idx+0] = offset; ib[idx+1] = upper_32_bits(offset) & 0xff; r = evergreen_cs_track_check(p); if (r) { - dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__); + dev_warn_once(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__); return r; } break; @@ -1925,115 +1917,176 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, uint64_t offset; if (pkt->count != 4) { - DRM_ERROR("bad DRAW_INDEX_2\n"); + dev_warn_once(p->dev, "bad DRAW_INDEX_2\n"); return -EINVAL; } r = radeon_cs_packet_next_reloc(p, &reloc, 0); if (r) { - DRM_ERROR("bad DRAW_INDEX_2\n"); + dev_warn_once(p->dev, "bad DRAW_INDEX_2\n"); return -EINVAL; } - offset = reloc->lobj.gpu_offset + - radeon_get_ib_value(p, idx+1) + - ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32); + offset = reloc->gpu_offset + + radeon_get_ib_value(p, idx+1) + + ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32); ib[idx+1] = offset; ib[idx+2] = upper_32_bits(offset) & 0xff; r = evergreen_cs_track_check(p); if (r) { - dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__); + dev_warn_once(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__); return r; } break; } case PACKET3_DRAW_INDEX_AUTO: if (pkt->count != 1) { - DRM_ERROR("bad DRAW_INDEX_AUTO\n"); + dev_warn_once(p->dev, "bad DRAW_INDEX_AUTO\n"); return -EINVAL; } r = evergreen_cs_track_check(p); if (r) { - dev_warn(p->dev, "%s:%d invalid cmd stream %d\n", __func__, __LINE__, idx); + dev_warn_once(p->dev, "%s:%d invalid cmd stream %d\n", __func__, __LINE__, idx); return r; } break; case PACKET3_DRAW_INDEX_MULTI_AUTO: if (pkt->count != 2) { - DRM_ERROR("bad DRAW_INDEX_MULTI_AUTO\n"); + dev_warn_once(p->dev, "bad DRAW_INDEX_MULTI_AUTO\n"); return -EINVAL; } r = evergreen_cs_track_check(p); if (r) { - dev_warn(p->dev, "%s:%d invalid cmd stream %d\n", __func__, __LINE__, idx); + dev_warn_once(p->dev, "%s:%d invalid cmd stream %d\n", __func__, __LINE__, idx); return r; } break; case PACKET3_DRAW_INDEX_IMMD: if (pkt->count < 2) { - DRM_ERROR("bad DRAW_INDEX_IMMD\n"); + dev_warn_once(p->dev, "bad DRAW_INDEX_IMMD\n"); return -EINVAL; } r = evergreen_cs_track_check(p); if (r) { - dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__); + dev_warn_once(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__); return r; } break; case PACKET3_DRAW_INDEX_OFFSET: if (pkt->count != 2) { - DRM_ERROR("bad DRAW_INDEX_OFFSET\n"); + dev_warn_once(p->dev, "bad DRAW_INDEX_OFFSET\n"); return -EINVAL; } r = evergreen_cs_track_check(p); if (r) { - dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__); + dev_warn_once(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__); return r; } break; case PACKET3_DRAW_INDEX_OFFSET_2: if (pkt->count != 3) { - DRM_ERROR("bad DRAW_INDEX_OFFSET_2\n"); + dev_warn_once(p->dev, "bad DRAW_INDEX_OFFSET_2\n"); + return -EINVAL; + } + r = evergreen_cs_track_check(p); + if (r) { + dev_warn_once(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__); + return r; + } + break; + case PACKET3_SET_BASE: + { + /* + DW 1 HEADER Header of the packet. Shader_Type in bit 1 of the Header will correspond to the shader type of the Load, see Type-3 Packet. + 2 BASE_INDEX Bits [3:0] BASE_INDEX - Base Index specifies which base address is specified in the last two DWs. + 0001: DX11 Draw_Index_Indirect Patch Table Base: Base address for Draw_Index_Indirect data. + 3 ADDRESS_LO Bits [31:3] - Lower bits of QWORD-Aligned Address. Bits [2:0] - Reserved + 4 ADDRESS_HI Bits [31:8] - Reserved. Bits [7:0] - Upper bits of Address [47:32] + */ + if (pkt->count != 2) { + dev_warn_once(p->dev, "bad SET_BASE\n"); + return -EINVAL; + } + + /* currently only supporting setting indirect draw buffer base address */ + if (idx_value != 1) { + dev_warn_once(p->dev, "bad SET_BASE\n"); + return -EINVAL; + } + + r = radeon_cs_packet_next_reloc(p, &reloc, 0); + if (r) { + dev_warn_once(p->dev, "bad SET_BASE\n"); + return -EINVAL; + } + + track->indirect_draw_buffer_size = radeon_bo_size(reloc->robj); + + ib[idx+1] = reloc->gpu_offset; + ib[idx+2] = upper_32_bits(reloc->gpu_offset) & 0xff; + + break; + } + case PACKET3_DRAW_INDIRECT: + case PACKET3_DRAW_INDEX_INDIRECT: + { + u64 size = pkt->opcode == PACKET3_DRAW_INDIRECT ? 16 : 20; + + /* + DW 1 HEADER + 2 DATA_OFFSET Bits [31:0] + byte aligned offset where the required data structure starts. Bits 1:0 are zero + 3 DRAW_INITIATOR Draw Initiator Register. Written to the VGT_DRAW_INITIATOR register for the assigned context + */ + if (pkt->count != 1) { + dev_warn_once(p->dev, "bad DRAW_INDIRECT\n"); return -EINVAL; } + + if (idx_value + size > track->indirect_draw_buffer_size) { + dev_warn_once(p->dev, "DRAW_INDIRECT buffer too small %u + %llu > %lu\n", + idx_value, size, track->indirect_draw_buffer_size); + return -EINVAL; + } + r = evergreen_cs_track_check(p); if (r) { - dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__); + dev_warn_once(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__); return r; } break; + } case PACKET3_DISPATCH_DIRECT: if (pkt->count != 3) { - DRM_ERROR("bad DISPATCH_DIRECT\n"); + dev_warn_once(p->dev, "bad DISPATCH_DIRECT\n"); return -EINVAL; } r = evergreen_cs_track_check(p); if (r) { - dev_warn(p->dev, "%s:%d invalid cmd stream %d\n", __func__, __LINE__, idx); + dev_warn_once(p->dev, "%s:%d invalid cmd stream %d\n", __func__, __LINE__, idx); return r; } break; case PACKET3_DISPATCH_INDIRECT: if (pkt->count != 1) { - DRM_ERROR("bad DISPATCH_INDIRECT\n"); + dev_warn_once(p->dev, "bad DISPATCH_INDIRECT\n"); return -EINVAL; } r = radeon_cs_packet_next_reloc(p, &reloc, 0); if (r) { - DRM_ERROR("bad DISPATCH_INDIRECT\n"); + dev_warn_once(p->dev, "bad DISPATCH_INDIRECT\n"); return -EINVAL; } - ib[idx+0] = idx_value + (u32)(reloc->lobj.gpu_offset & 0xffffffff); + ib[idx+0] = idx_value + (u32)(reloc->gpu_offset & 0xffffffff); r = evergreen_cs_track_check(p); if (r) { - dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__); + dev_warn_once(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__); return r; } break; case PACKET3_WAIT_REG_MEM: if (pkt->count != 5) { - DRM_ERROR("bad WAIT_REG_MEM\n"); + dev_warn_once(p->dev, "bad WAIT_REG_MEM\n"); return -EINVAL; } /* bit 4 is reg (0) or mem (1) */ @@ -2042,18 +2095,18 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, r = radeon_cs_packet_next_reloc(p, &reloc, 0); if (r) { - DRM_ERROR("bad WAIT_REG_MEM\n"); + dev_warn_once(p->dev, "bad WAIT_REG_MEM\n"); return -EINVAL; } - offset = reloc->lobj.gpu_offset + - (radeon_get_ib_value(p, idx+1) & 0xfffffffc) + - ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32); + offset = reloc->gpu_offset + + (radeon_get_ib_value(p, idx+1) & 0xfffffffc) + + ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32); ib[idx+1] = (ib[idx+1] & 0x3) | (offset & 0xfffffffc); ib[idx+2] = upper_32_bits(offset) & 0xff; } else if (idx_value & 0x100) { - DRM_ERROR("cannot use PFP on REG wait\n"); + dev_warn_once(p->dev, "cannot use PFP on REG wait\n"); return -EINVAL; } break; @@ -2062,7 +2115,7 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, u32 command, size, info; u64 offset, tmp; if (pkt->count != 4) { - DRM_ERROR("bad CP DMA\n"); + dev_warn_once(p->dev, "bad CP DMA\n"); return -EINVAL; } command = radeon_get_ib_value(p, idx+4); @@ -2076,7 +2129,7 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, (command & PACKET3_CP_DMA_CMD_SAS))) { /* src = register */ /* non mem to mem copies requires dw aligned count */ if (size % 4) { - DRM_ERROR("CP DMA command requires dw count alignment\n"); + dev_warn_once(p->dev, "CP DMA command requires dw count alignment\n"); return -EINVAL; } } @@ -2084,37 +2137,37 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, /* src address space is register */ /* GDS is ok */ if (((info & 0x60000000) >> 29) != 1) { - DRM_ERROR("CP DMA SAS not supported\n"); + dev_warn_once(p->dev, "CP DMA SAS not supported\n"); return -EINVAL; } } else { if (command & PACKET3_CP_DMA_CMD_SAIC) { - DRM_ERROR("CP DMA SAIC only supported for registers\n"); + dev_warn_once(p->dev, "CP DMA SAIC only supported for registers\n"); return -EINVAL; } /* src address space is memory */ if (((info & 0x60000000) >> 29) == 0) { r = radeon_cs_packet_next_reloc(p, &reloc, 0); if (r) { - DRM_ERROR("bad CP DMA SRC\n"); + dev_warn_once(p->dev, "bad CP DMA SRC\n"); return -EINVAL; } tmp = radeon_get_ib_value(p, idx) + ((u64)(radeon_get_ib_value(p, idx+1) & 0xff) << 32); - offset = reloc->lobj.gpu_offset + tmp; + offset = reloc->gpu_offset + tmp; if ((tmp + size) > radeon_bo_size(reloc->robj)) { - dev_warn(p->dev, "CP DMA src buffer too small (%llu %lu)\n", - tmp + size, radeon_bo_size(reloc->robj)); + dev_warn_once(p->dev, "CP DMA src buffer too small (%llu %lu)\n", + tmp + size, radeon_bo_size(reloc->robj)); return -EINVAL; } ib[idx] = offset; ib[idx+1] = (ib[idx+1] & 0xffffff00) | (upper_32_bits(offset) & 0xff); } else if (((info & 0x60000000) >> 29) != 2) { - DRM_ERROR("bad CP DMA SRC_SEL\n"); + dev_warn_once(p->dev, "bad CP DMA SRC_SEL\n"); return -EINVAL; } } @@ -2122,45 +2175,51 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, /* dst address space is register */ /* GDS is ok */ if (((info & 0x00300000) >> 20) != 1) { - DRM_ERROR("CP DMA DAS not supported\n"); + dev_warn_once(p->dev, "CP DMA DAS not supported\n"); return -EINVAL; } } else { /* dst address space is memory */ if (command & PACKET3_CP_DMA_CMD_DAIC) { - DRM_ERROR("CP DMA DAIC only supported for registers\n"); + dev_warn_once(p->dev, "CP DMA DAIC only supported for registers\n"); return -EINVAL; } if (((info & 0x00300000) >> 20) == 0) { r = radeon_cs_packet_next_reloc(p, &reloc, 0); if (r) { - DRM_ERROR("bad CP DMA DST\n"); + dev_warn_once(p->dev, "bad CP DMA DST\n"); return -EINVAL; } tmp = radeon_get_ib_value(p, idx+2) + ((u64)(radeon_get_ib_value(p, idx+3) & 0xff) << 32); - offset = reloc->lobj.gpu_offset + tmp; + offset = reloc->gpu_offset + tmp; if ((tmp + size) > radeon_bo_size(reloc->robj)) { - dev_warn(p->dev, "CP DMA dst buffer too small (%llu %lu)\n", - tmp + size, radeon_bo_size(reloc->robj)); + dev_warn_once(p->dev, "CP DMA dst buffer too small (%llu %lu)\n", + tmp + size, radeon_bo_size(reloc->robj)); return -EINVAL; } ib[idx+2] = offset; ib[idx+3] = upper_32_bits(offset) & 0xff; } else { - DRM_ERROR("bad CP DMA DST_SEL\n"); + dev_warn_once(p->dev, "bad CP DMA DST_SEL\n"); return -EINVAL; } } break; } + case PACKET3_PFP_SYNC_ME: + if (pkt->count) { + dev_warn_once(p->dev, "bad PFP_SYNC_ME\n"); + return -EINVAL; + } + break; case PACKET3_SURFACE_SYNC: if (pkt->count != 3) { - DRM_ERROR("bad SURFACE_SYNC\n"); + dev_warn_once(p->dev, "bad SURFACE_SYNC\n"); return -EINVAL; } /* 0xffffffff/0x0 is flush all cache flag */ @@ -2168,15 +2227,15 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, radeon_get_ib_value(p, idx + 2) != 0) { r = radeon_cs_packet_next_reloc(p, &reloc, 0); if (r) { - DRM_ERROR("bad SURFACE_SYNC\n"); + dev_warn_once(p->dev, "bad SURFACE_SYNC\n"); return -EINVAL; } - ib[idx+2] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); + ib[idx+2] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff); } break; case PACKET3_EVENT_WRITE: if (pkt->count != 2 && pkt->count != 0) { - DRM_ERROR("bad EVENT_WRITE\n"); + dev_warn_once(p->dev, "bad EVENT_WRITE\n"); return -EINVAL; } if (pkt->count) { @@ -2184,12 +2243,12 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, r = radeon_cs_packet_next_reloc(p, &reloc, 0); if (r) { - DRM_ERROR("bad EVENT_WRITE\n"); + dev_warn_once(p->dev, "bad EVENT_WRITE\n"); return -EINVAL; } - offset = reloc->lobj.gpu_offset + - (radeon_get_ib_value(p, idx+1) & 0xfffffff8) + - ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32); + offset = reloc->gpu_offset + + (radeon_get_ib_value(p, idx+1) & 0xfffffff8) + + ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32); ib[idx+1] = offset & 0xfffffff8; ib[idx+2] = upper_32_bits(offset) & 0xff; @@ -2200,18 +2259,18 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, uint64_t offset; if (pkt->count != 4) { - DRM_ERROR("bad EVENT_WRITE_EOP\n"); + dev_warn_once(p->dev, "bad EVENT_WRITE_EOP\n"); return -EINVAL; } r = radeon_cs_packet_next_reloc(p, &reloc, 0); if (r) { - DRM_ERROR("bad EVENT_WRITE_EOP\n"); + dev_warn_once(p->dev, "bad EVENT_WRITE_EOP\n"); return -EINVAL; } - offset = reloc->lobj.gpu_offset + - (radeon_get_ib_value(p, idx+1) & 0xfffffffc) + - ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32); + offset = reloc->gpu_offset + + (radeon_get_ib_value(p, idx+1) & 0xfffffffc) + + ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32); ib[idx+1] = offset & 0xfffffffc; ib[idx+2] = (ib[idx+2] & 0xffffff00) | (upper_32_bits(offset) & 0xff); @@ -2222,18 +2281,18 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, uint64_t offset; if (pkt->count != 3) { - DRM_ERROR("bad EVENT_WRITE_EOS\n"); + dev_warn_once(p->dev, "bad EVENT_WRITE_EOS\n"); return -EINVAL; } r = radeon_cs_packet_next_reloc(p, &reloc, 0); if (r) { - DRM_ERROR("bad EVENT_WRITE_EOS\n"); + dev_warn_once(p->dev, "bad EVENT_WRITE_EOS\n"); return -EINVAL; } - offset = reloc->lobj.gpu_offset + - (radeon_get_ib_value(p, idx+1) & 0xfffffffc) + - ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32); + offset = reloc->gpu_offset + + (radeon_get_ib_value(p, idx+1) & 0xfffffffc) + + ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32); ib[idx+1] = offset & 0xfffffffc; ib[idx+2] = (ib[idx+2] & 0xffffff00) | (upper_32_bits(offset) & 0xff); @@ -2245,12 +2304,13 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, if ((start_reg < PACKET3_SET_CONFIG_REG_START) || (start_reg >= PACKET3_SET_CONFIG_REG_END) || (end_reg >= PACKET3_SET_CONFIG_REG_END)) { - DRM_ERROR("bad PACKET3_SET_CONFIG_REG\n"); + dev_warn_once(p->dev, "bad PACKET3_SET_CONFIG_REG\n"); return -EINVAL; } - for (i = 0; i < pkt->count; i++) { - reg = start_reg + (4 * i); - r = evergreen_cs_check_reg(p, reg, idx+1+i); + for (reg = start_reg, idx++; reg <= end_reg; reg += 4, idx++) { + if (evergreen_is_safe_reg(p, reg)) + continue; + r = evergreen_cs_handle_reg(p, reg, idx); if (r) return r; } @@ -2261,19 +2321,20 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, if ((start_reg < PACKET3_SET_CONTEXT_REG_START) || (start_reg >= PACKET3_SET_CONTEXT_REG_END) || (end_reg >= PACKET3_SET_CONTEXT_REG_END)) { - DRM_ERROR("bad PACKET3_SET_CONTEXT_REG\n"); + dev_warn_once(p->dev, "bad PACKET3_SET_CONTEXT_REG\n"); return -EINVAL; } - for (i = 0; i < pkt->count; i++) { - reg = start_reg + (4 * i); - r = evergreen_cs_check_reg(p, reg, idx+1+i); + for (reg = start_reg, idx++; reg <= end_reg; reg += 4, idx++) { + if (evergreen_is_safe_reg(p, reg)) + continue; + r = evergreen_cs_handle_reg(p, reg, idx); if (r) return r; } break; case PACKET3_SET_RESOURCE: if (pkt->count % 8) { - DRM_ERROR("bad SET_RESOURCE\n"); + dev_warn_once(p->dev, "bad SET_RESOURCE\n"); return -EINVAL; } start_reg = (idx_value << 2) + PACKET3_SET_RESOURCE_START; @@ -2281,7 +2342,7 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, if ((start_reg < PACKET3_SET_RESOURCE_START) || (start_reg >= PACKET3_SET_RESOURCE_END) || (end_reg >= PACKET3_SET_RESOURCE_END)) { - DRM_ERROR("bad SET_RESOURCE\n"); + dev_warn_once(p->dev, "bad SET_RESOURCE\n"); return -EINVAL; } for (i = 0; i < (pkt->count / 8); i++) { @@ -2294,16 +2355,16 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, /* tex base */ r = radeon_cs_packet_next_reloc(p, &reloc, 0); if (r) { - DRM_ERROR("bad SET_RESOURCE (tex)\n"); + dev_warn_once(p->dev, "bad SET_RESOURCE (tex)\n"); return -EINVAL; } if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) { ib[idx+1+(i*8)+1] |= - TEX_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->lobj.tiling_flags)); - if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO) { + TEX_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->tiling_flags)); + if (reloc->tiling_flags & RADEON_TILING_MACRO) { unsigned bankw, bankh, mtaspect, tile_split; - evergreen_tiling_fields(reloc->lobj.tiling_flags, + evergreen_tiling_fields(reloc->tiling_flags, &bankw, &bankh, &mtaspect, &tile_split); ib[idx+1+(i*8)+6] |= TEX_TILE_SPLIT(tile_split); @@ -2315,7 +2376,7 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, } } texture = reloc->robj; - toffset = (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); + toffset = (u32)((reloc->gpu_offset >> 8) & 0xffffffff); /* tex mip base */ tex_dim = ib[idx+1+(i*8)+0] & 0x7; @@ -2331,10 +2392,10 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, } else { r = radeon_cs_packet_next_reloc(p, &reloc, 0); if (r) { - DRM_ERROR("bad SET_RESOURCE (tex)\n"); + dev_warn_once(p->dev, "bad SET_RESOURCE (tex)\n"); return -EINVAL; } - moffset = (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); + moffset = (u32)((reloc->gpu_offset >> 8) & 0xffffffff); mipmap = reloc->robj; } @@ -2350,18 +2411,19 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, /* vtx base */ r = radeon_cs_packet_next_reloc(p, &reloc, 0); if (r) { - DRM_ERROR("bad SET_RESOURCE (vtx)\n"); + dev_warn_once(p->dev, "bad SET_RESOURCE (vtx)\n"); return -EINVAL; } offset = radeon_get_ib_value(p, idx+1+(i*8)+0); size = radeon_get_ib_value(p, idx+1+(i*8)+1); if (p->rdev && (size + offset) > radeon_bo_size(reloc->robj)) { /* force size to size of the buffer */ - dev_warn(p->dev, "vbo resource seems too big for the bo\n"); + dev_warn_once(p->dev, "vbo resource seems too big (%d) for the bo (%ld)\n", + size + offset, radeon_bo_size(reloc->robj)); ib[idx+1+(i*8)+1] = radeon_bo_size(reloc->robj) - offset; } - offset64 = reloc->lobj.gpu_offset + offset; + offset64 = reloc->gpu_offset + offset; ib[idx+1+(i*8)+0] = offset64; ib[idx+1+(i*8)+2] = (ib[idx+1+(i*8)+2] & 0xffffff00) | (upper_32_bits(offset64) & 0xff); @@ -2370,7 +2432,7 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, case SQ_TEX_VTX_INVALID_TEXTURE: case SQ_TEX_VTX_INVALID_BUFFER: default: - DRM_ERROR("bad SET_RESOURCE\n"); + dev_warn_once(p->dev, "bad SET_RESOURCE\n"); return -EINVAL; } } @@ -2384,7 +2446,7 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, if ((start_reg < PACKET3_SET_BOOL_CONST_START) || (start_reg >= PACKET3_SET_BOOL_CONST_END) || (end_reg >= PACKET3_SET_BOOL_CONST_END)) { - DRM_ERROR("bad SET_BOOL_CONST\n"); + dev_warn_once(p->dev, "bad SET_BOOL_CONST\n"); return -EINVAL; } break; @@ -2394,7 +2456,7 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, if ((start_reg < PACKET3_SET_LOOP_CONST_START) || (start_reg >= PACKET3_SET_LOOP_CONST_END) || (end_reg >= PACKET3_SET_LOOP_CONST_END)) { - DRM_ERROR("bad SET_LOOP_CONST\n"); + dev_warn_once(p->dev, "bad SET_LOOP_CONST\n"); return -EINVAL; } break; @@ -2404,13 +2466,13 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, if ((start_reg < PACKET3_SET_CTL_CONST_START) || (start_reg >= PACKET3_SET_CTL_CONST_END) || (end_reg >= PACKET3_SET_CTL_CONST_END)) { - DRM_ERROR("bad SET_CTL_CONST\n"); + dev_warn_once(p->dev, "bad SET_CTL_CONST\n"); return -EINVAL; } break; case PACKET3_SET_SAMPLER: if (pkt->count % 3) { - DRM_ERROR("bad SET_SAMPLER\n"); + dev_warn_once(p->dev, "bad SET_SAMPLER\n"); return -EINVAL; } start_reg = (idx_value << 2) + PACKET3_SET_SAMPLER_START; @@ -2418,13 +2480,13 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, if ((start_reg < PACKET3_SET_SAMPLER_START) || (start_reg >= PACKET3_SET_SAMPLER_END) || (end_reg >= PACKET3_SET_SAMPLER_END)) { - DRM_ERROR("bad SET_SAMPLER\n"); + dev_warn_once(p->dev, "bad SET_SAMPLER\n"); return -EINVAL; } break; case PACKET3_STRMOUT_BUFFER_UPDATE: if (pkt->count != 4) { - DRM_ERROR("bad STRMOUT_BUFFER_UPDATE (invalid count)\n"); + dev_warn_once(p->dev, "bad STRMOUT_BUFFER_UPDATE (invalid count)\n"); return -EINVAL; } /* Updating memory at DST_ADDRESS. */ @@ -2432,17 +2494,17 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, u64 offset; r = radeon_cs_packet_next_reloc(p, &reloc, 0); if (r) { - DRM_ERROR("bad STRMOUT_BUFFER_UPDATE (missing dst reloc)\n"); + dev_warn_once(p->dev, "bad STRMOUT_BUFFER_UPDATE (missing dst reloc)\n"); return -EINVAL; } offset = radeon_get_ib_value(p, idx+1); offset += ((u64)(radeon_get_ib_value(p, idx+2) & 0xff)) << 32; if ((offset + 4) > radeon_bo_size(reloc->robj)) { - DRM_ERROR("bad STRMOUT_BUFFER_UPDATE dst bo too small: 0x%llx, 0x%lx\n", - offset + 4, radeon_bo_size(reloc->robj)); + dev_warn_once(p->dev, "bad STRMOUT_BUFFER_UPDATE dst bo too small: 0x%llx, 0x%lx\n", + offset + 4, radeon_bo_size(reloc->robj)); return -EINVAL; } - offset += reloc->lobj.gpu_offset; + offset += reloc->gpu_offset; ib[idx+1] = offset; ib[idx+2] = upper_32_bits(offset) & 0xff; } @@ -2451,17 +2513,17 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, u64 offset; r = radeon_cs_packet_next_reloc(p, &reloc, 0); if (r) { - DRM_ERROR("bad STRMOUT_BUFFER_UPDATE (missing src reloc)\n"); + dev_warn_once(p->dev, "bad STRMOUT_BUFFER_UPDATE (missing src reloc)\n"); return -EINVAL; } offset = radeon_get_ib_value(p, idx+3); offset += ((u64)(radeon_get_ib_value(p, idx+4) & 0xff)) << 32; if ((offset + 4) > radeon_bo_size(reloc->robj)) { - DRM_ERROR("bad STRMOUT_BUFFER_UPDATE src bo too small: 0x%llx, 0x%lx\n", - offset + 4, radeon_bo_size(reloc->robj)); + dev_warn_once(p->dev, "bad STRMOUT_BUFFER_UPDATE src bo too small: 0x%llx, 0x%lx\n", + offset + 4, radeon_bo_size(reloc->robj)); return -EINVAL; } - offset += reloc->lobj.gpu_offset; + offset += reloc->gpu_offset; ib[idx+3] = offset; ib[idx+4] = upper_32_bits(offset) & 0xff; } @@ -2471,33 +2533,33 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, u64 offset; if (pkt->count != 3) { - DRM_ERROR("bad MEM_WRITE (invalid count)\n"); + dev_warn_once(p->dev, "bad MEM_WRITE (invalid count)\n"); return -EINVAL; } r = radeon_cs_packet_next_reloc(p, &reloc, 0); if (r) { - DRM_ERROR("bad MEM_WRITE (missing reloc)\n"); + dev_warn_once(p->dev, "bad MEM_WRITE (missing reloc)\n"); return -EINVAL; } offset = radeon_get_ib_value(p, idx+0); offset += ((u64)(radeon_get_ib_value(p, idx+1) & 0xff)) << 32UL; if (offset & 0x7) { - DRM_ERROR("bad MEM_WRITE (address not qwords aligned)\n"); + dev_warn_once(p->dev, "bad MEM_WRITE (address not qwords aligned)\n"); return -EINVAL; } if ((offset + 8) > radeon_bo_size(reloc->robj)) { - DRM_ERROR("bad MEM_WRITE bo too small: 0x%llx, 0x%lx\n", - offset + 8, radeon_bo_size(reloc->robj)); + dev_warn_once(p->dev, "bad MEM_WRITE bo too small: 0x%llx, 0x%lx\n", + offset + 8, radeon_bo_size(reloc->robj)); return -EINVAL; } - offset += reloc->lobj.gpu_offset; + offset += reloc->gpu_offset; ib[idx+0] = offset; ib[idx+1] = upper_32_bits(offset) & 0xff; break; } case PACKET3_COPY_DW: if (pkt->count != 4) { - DRM_ERROR("bad COPY_DW (invalid count)\n"); + dev_warn_once(p->dev, "bad COPY_DW (invalid count)\n"); return -EINVAL; } if (idx_value & 0x1) { @@ -2505,54 +2567,194 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, /* SRC is memory. */ r = radeon_cs_packet_next_reloc(p, &reloc, 0); if (r) { - DRM_ERROR("bad COPY_DW (missing src reloc)\n"); + dev_warn_once(p->dev, "bad COPY_DW (missing src reloc)\n"); return -EINVAL; } offset = radeon_get_ib_value(p, idx+1); offset += ((u64)(radeon_get_ib_value(p, idx+2) & 0xff)) << 32; if ((offset + 4) > radeon_bo_size(reloc->robj)) { - DRM_ERROR("bad COPY_DW src bo too small: 0x%llx, 0x%lx\n", - offset + 4, radeon_bo_size(reloc->robj)); + dev_warn_once(p->dev, "bad COPY_DW src bo too small: 0x%llx, 0x%lx\n", + offset + 4, radeon_bo_size(reloc->robj)); return -EINVAL; } - offset += reloc->lobj.gpu_offset; + offset += reloc->gpu_offset; ib[idx+1] = offset; ib[idx+2] = upper_32_bits(offset) & 0xff; } else { /* SRC is a reg. */ reg = radeon_get_ib_value(p, idx+1) << 2; - if (!evergreen_is_safe_reg(p, reg, idx+1)) + if (!evergreen_is_safe_reg(p, reg)) { + dev_warn_once(p->dev, "forbidden register 0x%08x at %d\n", + reg, idx + 1); return -EINVAL; + } } if (idx_value & 0x2) { u64 offset; /* DST is memory. */ r = radeon_cs_packet_next_reloc(p, &reloc, 0); if (r) { - DRM_ERROR("bad COPY_DW (missing dst reloc)\n"); + dev_warn_once(p->dev, "bad COPY_DW (missing dst reloc)\n"); return -EINVAL; } offset = radeon_get_ib_value(p, idx+3); offset += ((u64)(radeon_get_ib_value(p, idx+4) & 0xff)) << 32; if ((offset + 4) > radeon_bo_size(reloc->robj)) { - DRM_ERROR("bad COPY_DW dst bo too small: 0x%llx, 0x%lx\n", - offset + 4, radeon_bo_size(reloc->robj)); + dev_warn_once(p->dev, "bad COPY_DW dst bo too small: 0x%llx, 0x%lx\n", + offset + 4, radeon_bo_size(reloc->robj)); return -EINVAL; } - offset += reloc->lobj.gpu_offset; + offset += reloc->gpu_offset; ib[idx+3] = offset; ib[idx+4] = upper_32_bits(offset) & 0xff; } else { /* DST is a reg. */ reg = radeon_get_ib_value(p, idx+3) << 2; - if (!evergreen_is_safe_reg(p, reg, idx+3)) + if (!evergreen_is_safe_reg(p, reg)) { + dev_warn_once(p->dev, "forbidden register 0x%08x at %d\n", + reg, idx + 3); return -EINVAL; + } + } + break; + case PACKET3_SET_APPEND_CNT: + { + uint32_t areg; + uint32_t allowed_reg_base; + uint32_t source_sel; + if (pkt->count != 2) { + dev_warn_once(p->dev, "bad SET_APPEND_CNT (invalid count)\n"); + return -EINVAL; + } + + allowed_reg_base = GDS_APPEND_COUNT_0; + allowed_reg_base -= PACKET3_SET_CONTEXT_REG_START; + allowed_reg_base >>= 2; + + areg = idx_value >> 16; + if (areg < allowed_reg_base || areg > (allowed_reg_base + 11)) { + dev_warn_once(p->dev, "forbidden register for append cnt 0x%08x at %d\n", + areg, idx); + return -EINVAL; + } + + source_sel = G_PACKET3_SET_APPEND_CNT_SRC_SELECT(idx_value); + if (source_sel == PACKET3_SAC_SRC_SEL_MEM) { + uint64_t offset; + uint32_t swap; + r = radeon_cs_packet_next_reloc(p, &reloc, 0); + if (r) { + dev_warn_once(p->dev, "bad SET_APPEND_CNT (missing reloc)\n"); + return -EINVAL; + } + offset = radeon_get_ib_value(p, idx + 1); + swap = offset & 0x3; + offset &= ~0x3; + + offset += ((u64)(radeon_get_ib_value(p, idx + 2) & 0xff)) << 32; + + offset += reloc->gpu_offset; + ib[idx+1] = (offset & 0xfffffffc) | swap; + ib[idx+2] = upper_32_bits(offset) & 0xff; + } else { + dev_warn_once(p->dev, "bad SET_APPEND_CNT (unsupported operation)\n"); + return -EINVAL; + } + break; + } + case PACKET3_COND_EXEC: + { + u64 offset; + + if (pkt->count != 2) { + dev_warn_once(p->dev, "bad COND_EXEC (invalid count)\n"); + return -EINVAL; + } + r = radeon_cs_packet_next_reloc(p, &reloc, 0); + if (r) { + dev_warn_once(p->dev, "bad COND_EXEC (missing reloc)\n"); + return -EINVAL; + } + offset = radeon_get_ib_value(p, idx + 0); + offset += ((u64)(radeon_get_ib_value(p, idx + 1) & 0xff)) << 32UL; + if (offset & 0x7) { + dev_warn_once(p->dev, "bad COND_EXEC (address not qwords aligned)\n"); + return -EINVAL; + } + if ((offset + 8) > radeon_bo_size(reloc->robj)) { + dev_warn_once(p->dev, "bad COND_EXEC bo too small: 0x%llx, 0x%lx\n", + offset + 8, radeon_bo_size(reloc->robj)); + return -EINVAL; + } + offset += reloc->gpu_offset; + ib[idx + 0] = offset; + ib[idx + 1] = upper_32_bits(offset) & 0xff; + break; + } + case PACKET3_COND_WRITE: + if (pkt->count != 7) { + dev_warn_once(p->dev, "bad COND_WRITE (invalid count)\n"); + return -EINVAL; + } + if (idx_value & 0x10) { + u64 offset; + /* POLL is memory. */ + r = radeon_cs_packet_next_reloc(p, &reloc, 0); + if (r) { + dev_warn_once(p->dev, "bad COND_WRITE (missing src reloc)\n"); + return -EINVAL; + } + offset = radeon_get_ib_value(p, idx + 1); + offset += ((u64)(radeon_get_ib_value(p, idx + 2) & 0xff)) << 32; + if ((offset + 8) > radeon_bo_size(reloc->robj)) { + dev_warn_once(p->dev, "bad COND_WRITE src bo too small: 0x%llx, 0x%lx\n", + offset + 8, radeon_bo_size(reloc->robj)); + return -EINVAL; + } + offset += reloc->gpu_offset; + ib[idx + 1] = offset; + ib[idx + 2] = upper_32_bits(offset) & 0xff; + } else { + /* POLL is a reg. */ + reg = radeon_get_ib_value(p, idx + 1) << 2; + if (!evergreen_is_safe_reg(p, reg)) { + dev_warn_once(p->dev, "forbidden register 0x%08x at %d\n", + reg, idx + 1); + return -EINVAL; + } + } + if (idx_value & 0x100) { + u64 offset; + /* WRITE is memory. */ + r = radeon_cs_packet_next_reloc(p, &reloc, 0); + if (r) { + dev_warn_once(p->dev, "bad COND_WRITE (missing dst reloc)\n"); + return -EINVAL; + } + offset = radeon_get_ib_value(p, idx + 5); + offset += ((u64)(radeon_get_ib_value(p, idx + 6) & 0xff)) << 32; + if ((offset + 8) > radeon_bo_size(reloc->robj)) { + dev_warn_once(p->dev, "bad COND_WRITE dst bo too small: 0x%llx, 0x%lx\n", + offset + 8, radeon_bo_size(reloc->robj)); + return -EINVAL; + } + offset += reloc->gpu_offset; + ib[idx + 5] = offset; + ib[idx + 6] = upper_32_bits(offset) & 0xff; + } else { + /* WRITE is a reg. */ + reg = radeon_get_ib_value(p, idx + 5) << 2; + if (!evergreen_is_safe_reg(p, reg)) { + dev_warn_once(p->dev, "forbidden register 0x%08x at %d\n", + reg, idx + 5); + return -EINVAL; + } } break; case PACKET3_NOP: break; default: - DRM_ERROR("Packet3 opcode %x not supported\n", pkt->opcode); + dev_warn_once(p->dev, "Packet3 opcode %x not supported\n", pkt->opcode); return -EINVAL; } return 0; @@ -2571,11 +2773,15 @@ int evergreen_cs_parse(struct radeon_cs_parser *p) if (track == NULL) return -ENOMEM; evergreen_cs_track_init(track); - if (p->rdev->family >= CHIP_CAYMAN) + if (p->rdev->family >= CHIP_CAYMAN) { tmp = p->rdev->config.cayman.tile_config; - else + track->reg_safe_bm = cayman_reg_safe_bm; + } else { tmp = p->rdev->config.evergreen.tile_config; - + track->reg_safe_bm = evergreen_reg_safe_bm; + } + BUILD_BUG_ON(ARRAY_SIZE(cayman_reg_safe_bm) != REG_SAFE_BM_SIZE); + BUILD_BUG_ON(ARRAY_SIZE(evergreen_reg_safe_bm) != REG_SAFE_BM_SIZE); switch (tmp & 0xf) { case 0: track->npipes = 1; @@ -2648,7 +2854,7 @@ int evergreen_cs_parse(struct radeon_cs_parser *p) r = evergreen_packet3_check(p, &pkt); break; default: - DRM_ERROR("Unknown packet type %d !\n", pkt.type); + dev_warn_once(p->dev, "Unknown packet type %d !\n", pkt.type); kfree(p->track); p->track = NULL; return -EINVAL; @@ -2658,10 +2864,10 @@ int evergreen_cs_parse(struct radeon_cs_parser *p) p->track = NULL; return r; } - } while (p->idx < p->chunks[p->chunk_ib_idx].length_dw); + } while (p->idx < p->chunk_ib->length_dw); #if 0 for (r = 0; r < p->ib.length_dw; r++) { - printk(KERN_INFO "%05d 0x%08X\n", r, p->ib.ptr[r]); + pr_info("%05d 0x%08X\n", r, p->ib.ptr[r]); mdelay(1); } #endif @@ -2681,18 +2887,18 @@ int evergreen_cs_parse(struct radeon_cs_parser *p) **/ int evergreen_dma_cs_parse(struct radeon_cs_parser *p) { - struct radeon_cs_chunk *ib_chunk = &p->chunks[p->chunk_ib_idx]; - struct radeon_cs_reloc *src_reloc, *dst_reloc, *dst2_reloc; + struct radeon_cs_chunk *ib_chunk = p->chunk_ib; + struct radeon_bo_list *src_reloc, *dst_reloc, *dst2_reloc; u32 header, cmd, count, sub_cmd; - volatile u32 *ib = p->ib.ptr; + uint32_t *ib = p->ib.ptr; u32 idx; u64 src_offset, dst_offset, dst2_offset; int r; do { if (p->idx >= ib_chunk->length_dw) { - DRM_ERROR("Can not parse packet at %d after CS end %d !\n", - p->idx, ib_chunk->length_dw); + dev_warn_once(p->dev, "Can not parse packet at %d after CS end %d !\n", + p->idx, ib_chunk->length_dw); return -EINVAL; } idx = p->idx; @@ -2705,7 +2911,7 @@ int evergreen_dma_cs_parse(struct radeon_cs_parser *p) case DMA_PACKET_WRITE: r = r600_dma_cs_next_reloc(p, &dst_reloc); if (r) { - DRM_ERROR("bad DMA_PACKET_WRITE\n"); + dev_warn_once(p->dev, "bad DMA_PACKET_WRITE\n"); return -EINVAL; } switch (sub_cmd) { @@ -2714,7 +2920,7 @@ int evergreen_dma_cs_parse(struct radeon_cs_parser *p) dst_offset = radeon_get_ib_value(p, idx+1); dst_offset <<= 8; - ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8); + ib[idx+1] += (u32)(dst_reloc->gpu_offset >> 8); p->idx += count + 7; break; /* linear */ @@ -2722,29 +2928,29 @@ int evergreen_dma_cs_parse(struct radeon_cs_parser *p) dst_offset = radeon_get_ib_value(p, idx+1); dst_offset |= ((u64)(radeon_get_ib_value(p, idx+2) & 0xff)) << 32; - ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc); - ib[idx+2] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff; + ib[idx+1] += (u32)(dst_reloc->gpu_offset & 0xfffffffc); + ib[idx+2] += upper_32_bits(dst_reloc->gpu_offset) & 0xff; p->idx += count + 3; break; default: - DRM_ERROR("bad DMA_PACKET_WRITE [%6d] 0x%08x sub cmd is not 0 or 8\n", idx, header); + dev_warn_once(p->dev, "bad DMA_PACKET_WRITE [%6d] 0x%08x sub cmd is not 0 or 8\n", idx, header); return -EINVAL; } if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) { - dev_warn(p->dev, "DMA write buffer too small (%llu %lu)\n", - dst_offset, radeon_bo_size(dst_reloc->robj)); + dev_warn_once(p->dev, "DMA write buffer too small (%llu %lu)\n", + dst_offset, radeon_bo_size(dst_reloc->robj)); return -EINVAL; } break; case DMA_PACKET_COPY: r = r600_dma_cs_next_reloc(p, &src_reloc); if (r) { - DRM_ERROR("bad DMA_PACKET_COPY\n"); + dev_warn_once(p->dev, "bad DMA_PACKET_COPY\n"); return -EINVAL; } r = r600_dma_cs_next_reloc(p, &dst_reloc); if (r) { - DRM_ERROR("bad DMA_PACKET_COPY\n"); + dev_warn_once(p->dev, "bad DMA_PACKET_COPY\n"); return -EINVAL; } switch (sub_cmd) { @@ -2756,19 +2962,19 @@ int evergreen_dma_cs_parse(struct radeon_cs_parser *p) dst_offset = radeon_get_ib_value(p, idx+1); dst_offset |= ((u64)(radeon_get_ib_value(p, idx+3) & 0xff)) << 32; if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) { - dev_warn(p->dev, "DMA L2L, dw src buffer too small (%llu %lu)\n", - src_offset + (count * 4), radeon_bo_size(src_reloc->robj)); + dev_warn_once(p->dev, "DMA L2L, dw src buffer too small (%llu %lu)\n", + src_offset + (count * 4), radeon_bo_size(src_reloc->robj)); return -EINVAL; } if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) { - dev_warn(p->dev, "DMA L2L, dw dst buffer too small (%llu %lu)\n", - dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj)); + dev_warn_once(p->dev, "DMA L2L, dw dst buffer too small (%llu %lu)\n", + dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj)); return -EINVAL; } - ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc); - ib[idx+2] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc); - ib[idx+3] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff; - ib[idx+4] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff; + ib[idx+1] += (u32)(dst_reloc->gpu_offset & 0xfffffffc); + ib[idx+2] += (u32)(src_reloc->gpu_offset & 0xfffffffc); + ib[idx+3] += upper_32_bits(dst_reloc->gpu_offset) & 0xff; + ib[idx+4] += upper_32_bits(src_reloc->gpu_offset) & 0xff; p->idx += 5; break; /* Copy L2T/T2L */ @@ -2778,31 +2984,31 @@ int evergreen_dma_cs_parse(struct radeon_cs_parser *p) /* tiled src, linear dst */ src_offset = radeon_get_ib_value(p, idx+1); src_offset <<= 8; - ib[idx+1] += (u32)(src_reloc->lobj.gpu_offset >> 8); + ib[idx+1] += (u32)(src_reloc->gpu_offset >> 8); dst_offset = radeon_get_ib_value(p, idx + 7); dst_offset |= ((u64)(radeon_get_ib_value(p, idx+8) & 0xff)) << 32; - ib[idx+7] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc); - ib[idx+8] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff; + ib[idx+7] += (u32)(dst_reloc->gpu_offset & 0xfffffffc); + ib[idx+8] += upper_32_bits(dst_reloc->gpu_offset) & 0xff; } else { /* linear src, tiled dst */ src_offset = radeon_get_ib_value(p, idx+7); src_offset |= ((u64)(radeon_get_ib_value(p, idx+8) & 0xff)) << 32; - ib[idx+7] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc); - ib[idx+8] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff; + ib[idx+7] += (u32)(src_reloc->gpu_offset & 0xfffffffc); + ib[idx+8] += upper_32_bits(src_reloc->gpu_offset) & 0xff; dst_offset = radeon_get_ib_value(p, idx+1); dst_offset <<= 8; - ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8); + ib[idx+1] += (u32)(dst_reloc->gpu_offset >> 8); } if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) { - dev_warn(p->dev, "DMA L2T, src buffer too small (%llu %lu)\n", - src_offset + (count * 4), radeon_bo_size(src_reloc->robj)); + dev_warn_once(p->dev, "DMA L2T, src buffer too small (%llu %lu)\n", + src_offset + (count * 4), radeon_bo_size(src_reloc->robj)); return -EINVAL; } if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) { - dev_warn(p->dev, "DMA L2T, dst buffer too small (%llu %lu)\n", - dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj)); + dev_warn_once(p->dev, "DMA L2T, dst buffer too small (%llu %lu)\n", + dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj)); return -EINVAL; } p->idx += 9; @@ -2815,32 +3021,32 @@ int evergreen_dma_cs_parse(struct radeon_cs_parser *p) dst_offset = radeon_get_ib_value(p, idx+1); dst_offset |= ((u64)(radeon_get_ib_value(p, idx+3) & 0xff)) << 32; if ((src_offset + count) > radeon_bo_size(src_reloc->robj)) { - dev_warn(p->dev, "DMA L2L, byte src buffer too small (%llu %lu)\n", - src_offset + count, radeon_bo_size(src_reloc->robj)); + dev_warn_once(p->dev, "DMA L2L, byte src buffer too small (%llu %lu)\n", + src_offset + count, radeon_bo_size(src_reloc->robj)); return -EINVAL; } if ((dst_offset + count) > radeon_bo_size(dst_reloc->robj)) { - dev_warn(p->dev, "DMA L2L, byte dst buffer too small (%llu %lu)\n", - dst_offset + count, radeon_bo_size(dst_reloc->robj)); + dev_warn_once(p->dev, "DMA L2L, byte dst buffer too small (%llu %lu)\n", + dst_offset + count, radeon_bo_size(dst_reloc->robj)); return -EINVAL; } - ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xffffffff); - ib[idx+2] += (u32)(src_reloc->lobj.gpu_offset & 0xffffffff); - ib[idx+3] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff; - ib[idx+4] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff; + ib[idx+1] += (u32)(dst_reloc->gpu_offset & 0xffffffff); + ib[idx+2] += (u32)(src_reloc->gpu_offset & 0xffffffff); + ib[idx+3] += upper_32_bits(dst_reloc->gpu_offset) & 0xff; + ib[idx+4] += upper_32_bits(src_reloc->gpu_offset) & 0xff; p->idx += 5; break; /* Copy L2L, partial */ case 0x41: /* L2L, partial */ if (p->family < CHIP_CAYMAN) { - DRM_ERROR("L2L Partial is cayman only !\n"); + dev_warn_once(p->dev, "L2L Partial is cayman only !\n"); return -EINVAL; } - ib[idx+1] += (u32)(src_reloc->lobj.gpu_offset & 0xffffffff); - ib[idx+2] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff; - ib[idx+4] += (u32)(dst_reloc->lobj.gpu_offset & 0xffffffff); - ib[idx+5] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff; + ib[idx+1] += (u32)(src_reloc->gpu_offset & 0xffffffff); + ib[idx+2] += upper_32_bits(src_reloc->gpu_offset) & 0xff; + ib[idx+4] += (u32)(dst_reloc->gpu_offset & 0xffffffff); + ib[idx+5] += upper_32_bits(dst_reloc->gpu_offset) & 0xff; p->idx += 9; break; @@ -2849,7 +3055,7 @@ int evergreen_dma_cs_parse(struct radeon_cs_parser *p) /* L2L, dw, broadcast */ r = r600_dma_cs_next_reloc(p, &dst2_reloc); if (r) { - DRM_ERROR("bad L2L, dw, broadcast DMA_PACKET_COPY\n"); + dev_warn_once(p->dev, "bad L2L, dw, broadcast DMA_PACKET_COPY\n"); return -EINVAL; } dst_offset = radeon_get_ib_value(p, idx+1); @@ -2859,37 +3065,37 @@ int evergreen_dma_cs_parse(struct radeon_cs_parser *p) src_offset = radeon_get_ib_value(p, idx+3); src_offset |= ((u64)(radeon_get_ib_value(p, idx+6) & 0xff)) << 32; if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) { - dev_warn(p->dev, "DMA L2L, dw, broadcast src buffer too small (%llu %lu)\n", - src_offset + (count * 4), radeon_bo_size(src_reloc->robj)); + dev_warn_once(p->dev, "DMA L2L, dw, broadcast src buffer too small (%llu %lu)\n", + src_offset + (count * 4), radeon_bo_size(src_reloc->robj)); return -EINVAL; } if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) { - dev_warn(p->dev, "DMA L2L, dw, broadcast dst buffer too small (%llu %lu)\n", - dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj)); + dev_warn_once(p->dev, "DMA L2L, dw, broadcast dst buffer too small (%llu %lu)\n", + dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj)); return -EINVAL; } if ((dst2_offset + (count * 4)) > radeon_bo_size(dst2_reloc->robj)) { - dev_warn(p->dev, "DMA L2L, dw, broadcast dst2 buffer too small (%llu %lu)\n", - dst2_offset + (count * 4), radeon_bo_size(dst2_reloc->robj)); + dev_warn_once(p->dev, "DMA L2L, dw, broadcast dst2 buffer too small (%llu %lu)\n", + dst2_offset + (count * 4), radeon_bo_size(dst2_reloc->robj)); return -EINVAL; } - ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc); - ib[idx+2] += (u32)(dst2_reloc->lobj.gpu_offset & 0xfffffffc); - ib[idx+3] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc); - ib[idx+4] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff; - ib[idx+5] += upper_32_bits(dst2_reloc->lobj.gpu_offset) & 0xff; - ib[idx+6] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff; + ib[idx+1] += (u32)(dst_reloc->gpu_offset & 0xfffffffc); + ib[idx+2] += (u32)(dst2_reloc->gpu_offset & 0xfffffffc); + ib[idx+3] += (u32)(src_reloc->gpu_offset & 0xfffffffc); + ib[idx+4] += upper_32_bits(dst_reloc->gpu_offset) & 0xff; + ib[idx+5] += upper_32_bits(dst2_reloc->gpu_offset) & 0xff; + ib[idx+6] += upper_32_bits(src_reloc->gpu_offset) & 0xff; p->idx += 7; break; /* Copy L2T Frame to Field */ case 0x48: if (radeon_get_ib_value(p, idx + 2) & (1 << 31)) { - DRM_ERROR("bad L2T, frame to fields DMA_PACKET_COPY\n"); + dev_warn_once(p->dev, "bad L2T, frame to fields DMA_PACKET_COPY\n"); return -EINVAL; } r = r600_dma_cs_next_reloc(p, &dst2_reloc); if (r) { - DRM_ERROR("bad L2T, frame to fields DMA_PACKET_COPY\n"); + dev_warn_once(p->dev, "bad L2T, frame to fields DMA_PACKET_COPY\n"); return -EINVAL; } dst_offset = radeon_get_ib_value(p, idx+1); @@ -2899,46 +3105,46 @@ int evergreen_dma_cs_parse(struct radeon_cs_parser *p) src_offset = radeon_get_ib_value(p, idx+8); src_offset |= ((u64)(radeon_get_ib_value(p, idx+9) & 0xff)) << 32; if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) { - dev_warn(p->dev, "DMA L2T, frame to fields src buffer too small (%llu %lu)\n", - src_offset + (count * 4), radeon_bo_size(src_reloc->robj)); + dev_warn_once(p->dev, "DMA L2T, frame to fields src buffer too small (%llu %lu)\n", + src_offset + (count * 4), radeon_bo_size(src_reloc->robj)); return -EINVAL; } if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) { - dev_warn(p->dev, "DMA L2T, frame to fields buffer too small (%llu %lu)\n", - dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj)); + dev_warn_once(p->dev, "DMA L2T, frame to fields buffer too small (%llu %lu)\n", + dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj)); return -EINVAL; } if ((dst2_offset + (count * 4)) > radeon_bo_size(dst2_reloc->robj)) { - dev_warn(p->dev, "DMA L2T, frame to fields buffer too small (%llu %lu)\n", - dst2_offset + (count * 4), radeon_bo_size(dst2_reloc->robj)); + dev_warn_once(p->dev, "DMA L2T, frame to fields buffer too small (%llu %lu)\n", + dst2_offset + (count * 4), radeon_bo_size(dst2_reloc->robj)); return -EINVAL; } - ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8); - ib[idx+2] += (u32)(dst2_reloc->lobj.gpu_offset >> 8); - ib[idx+8] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc); - ib[idx+9] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff; + ib[idx+1] += (u32)(dst_reloc->gpu_offset >> 8); + ib[idx+2] += (u32)(dst2_reloc->gpu_offset >> 8); + ib[idx+8] += (u32)(src_reloc->gpu_offset & 0xfffffffc); + ib[idx+9] += upper_32_bits(src_reloc->gpu_offset) & 0xff; p->idx += 10; break; /* Copy L2T/T2L, partial */ case 0x49: /* L2T, T2L partial */ if (p->family < CHIP_CAYMAN) { - DRM_ERROR("L2T, T2L Partial is cayman only !\n"); + dev_warn_once(p->dev, "L2T, T2L Partial is cayman only !\n"); return -EINVAL; } /* detile bit */ if (radeon_get_ib_value(p, idx + 2) & (1 << 31)) { /* tiled src, linear dst */ - ib[idx+1] += (u32)(src_reloc->lobj.gpu_offset >> 8); + ib[idx+1] += (u32)(src_reloc->gpu_offset >> 8); - ib[idx+7] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc); - ib[idx+8] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff; + ib[idx+7] += (u32)(dst_reloc->gpu_offset & 0xfffffffc); + ib[idx+8] += upper_32_bits(dst_reloc->gpu_offset) & 0xff; } else { /* linear src, tiled dst */ - ib[idx+7] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc); - ib[idx+8] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff; + ib[idx+7] += (u32)(src_reloc->gpu_offset & 0xfffffffc); + ib[idx+8] += upper_32_bits(src_reloc->gpu_offset) & 0xff; - ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8); + ib[idx+1] += (u32)(dst_reloc->gpu_offset >> 8); } p->idx += 12; break; @@ -2946,12 +3152,12 @@ int evergreen_dma_cs_parse(struct radeon_cs_parser *p) case 0x4b: /* L2T, broadcast */ if (radeon_get_ib_value(p, idx + 2) & (1 << 31)) { - DRM_ERROR("bad L2T, broadcast DMA_PACKET_COPY\n"); + dev_warn_once(p->dev, "bad L2T, broadcast DMA_PACKET_COPY\n"); return -EINVAL; } r = r600_dma_cs_next_reloc(p, &dst2_reloc); if (r) { - DRM_ERROR("bad L2T, broadcast DMA_PACKET_COPY\n"); + dev_warn_once(p->dev, "bad L2T, broadcast DMA_PACKET_COPY\n"); return -EINVAL; } dst_offset = radeon_get_ib_value(p, idx+1); @@ -2961,24 +3167,24 @@ int evergreen_dma_cs_parse(struct radeon_cs_parser *p) src_offset = radeon_get_ib_value(p, idx+8); src_offset |= ((u64)(radeon_get_ib_value(p, idx+9) & 0xff)) << 32; if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) { - dev_warn(p->dev, "DMA L2T, broadcast src buffer too small (%llu %lu)\n", - src_offset + (count * 4), radeon_bo_size(src_reloc->robj)); + dev_warn_once(p->dev, "DMA L2T, broadcast src buffer too small (%llu %lu)\n", + src_offset + (count * 4), radeon_bo_size(src_reloc->robj)); return -EINVAL; } if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) { - dev_warn(p->dev, "DMA L2T, broadcast dst buffer too small (%llu %lu)\n", - dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj)); + dev_warn_once(p->dev, "DMA L2T, broadcast dst buffer too small (%llu %lu)\n", + dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj)); return -EINVAL; } if ((dst2_offset + (count * 4)) > radeon_bo_size(dst2_reloc->robj)) { - dev_warn(p->dev, "DMA L2T, broadcast dst2 buffer too small (%llu %lu)\n", - dst2_offset + (count * 4), radeon_bo_size(dst2_reloc->robj)); + dev_warn_once(p->dev, "DMA L2T, broadcast dst2 buffer too small (%llu %lu)\n", + dst2_offset + (count * 4), radeon_bo_size(dst2_reloc->robj)); return -EINVAL; } - ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8); - ib[idx+2] += (u32)(dst2_reloc->lobj.gpu_offset >> 8); - ib[idx+8] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc); - ib[idx+9] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff; + ib[idx+1] += (u32)(dst_reloc->gpu_offset >> 8); + ib[idx+2] += (u32)(dst2_reloc->gpu_offset >> 8); + ib[idx+8] += (u32)(src_reloc->gpu_offset & 0xfffffffc); + ib[idx+9] += upper_32_bits(src_reloc->gpu_offset) & 0xff; p->idx += 10; break; /* Copy L2T/T2L (tile units) */ @@ -2989,31 +3195,31 @@ int evergreen_dma_cs_parse(struct radeon_cs_parser *p) /* tiled src, linear dst */ src_offset = radeon_get_ib_value(p, idx+1); src_offset <<= 8; - ib[idx+1] += (u32)(src_reloc->lobj.gpu_offset >> 8); + ib[idx+1] += (u32)(src_reloc->gpu_offset >> 8); dst_offset = radeon_get_ib_value(p, idx+7); dst_offset |= ((u64)(radeon_get_ib_value(p, idx+8) & 0xff)) << 32; - ib[idx+7] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc); - ib[idx+8] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff; + ib[idx+7] += (u32)(dst_reloc->gpu_offset & 0xfffffffc); + ib[idx+8] += upper_32_bits(dst_reloc->gpu_offset) & 0xff; } else { /* linear src, tiled dst */ src_offset = radeon_get_ib_value(p, idx+7); src_offset |= ((u64)(radeon_get_ib_value(p, idx+8) & 0xff)) << 32; - ib[idx+7] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc); - ib[idx+8] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff; + ib[idx+7] += (u32)(src_reloc->gpu_offset & 0xfffffffc); + ib[idx+8] += upper_32_bits(src_reloc->gpu_offset) & 0xff; dst_offset = radeon_get_ib_value(p, idx+1); dst_offset <<= 8; - ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8); + ib[idx+1] += (u32)(dst_reloc->gpu_offset >> 8); } if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) { - dev_warn(p->dev, "DMA L2T, T2L src buffer too small (%llu %lu)\n", - src_offset + (count * 4), radeon_bo_size(src_reloc->robj)); + dev_warn_once(p->dev, "DMA L2T, T2L src buffer too small (%llu %lu)\n", + src_offset + (count * 4), radeon_bo_size(src_reloc->robj)); return -EINVAL; } if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) { - dev_warn(p->dev, "DMA L2T, T2L dst buffer too small (%llu %lu)\n", - dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj)); + dev_warn_once(p->dev, "DMA L2T, T2L dst buffer too small (%llu %lu)\n", + dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj)); return -EINVAL; } p->idx += 9; @@ -3022,23 +3228,23 @@ int evergreen_dma_cs_parse(struct radeon_cs_parser *p) case 0x4d: /* T2T partial */ if (p->family < CHIP_CAYMAN) { - DRM_ERROR("L2T, T2L Partial is cayman only !\n"); + dev_warn_once(p->dev, "L2T, T2L Partial is cayman only !\n"); return -EINVAL; } - ib[idx+1] += (u32)(src_reloc->lobj.gpu_offset >> 8); - ib[idx+4] += (u32)(dst_reloc->lobj.gpu_offset >> 8); + ib[idx+1] += (u32)(src_reloc->gpu_offset >> 8); + ib[idx+4] += (u32)(dst_reloc->gpu_offset >> 8); p->idx += 13; break; /* Copy L2T broadcast (tile units) */ case 0x4f: /* L2T, broadcast */ if (radeon_get_ib_value(p, idx + 2) & (1 << 31)) { - DRM_ERROR("bad L2T, broadcast DMA_PACKET_COPY\n"); + dev_warn_once(p->dev, "bad L2T, broadcast DMA_PACKET_COPY\n"); return -EINVAL; } r = r600_dma_cs_next_reloc(p, &dst2_reloc); if (r) { - DRM_ERROR("bad L2T, broadcast DMA_PACKET_COPY\n"); + dev_warn_once(p->dev, "bad L2T, broadcast DMA_PACKET_COPY\n"); return -EINVAL; } dst_offset = radeon_get_ib_value(p, idx+1); @@ -3048,59 +3254,59 @@ int evergreen_dma_cs_parse(struct radeon_cs_parser *p) src_offset = radeon_get_ib_value(p, idx+8); src_offset |= ((u64)(radeon_get_ib_value(p, idx+9) & 0xff)) << 32; if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) { - dev_warn(p->dev, "DMA L2T, broadcast src buffer too small (%llu %lu)\n", - src_offset + (count * 4), radeon_bo_size(src_reloc->robj)); + dev_warn_once(p->dev, "DMA L2T, broadcast src buffer too small (%llu %lu)\n", + src_offset + (count * 4), radeon_bo_size(src_reloc->robj)); return -EINVAL; } if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) { - dev_warn(p->dev, "DMA L2T, broadcast dst buffer too small (%llu %lu)\n", - dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj)); + dev_warn_once(p->dev, "DMA L2T, broadcast dst buffer too small (%llu %lu)\n", + dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj)); return -EINVAL; } if ((dst2_offset + (count * 4)) > radeon_bo_size(dst2_reloc->robj)) { - dev_warn(p->dev, "DMA L2T, broadcast dst2 buffer too small (%llu %lu)\n", - dst2_offset + (count * 4), radeon_bo_size(dst2_reloc->robj)); + dev_warn_once(p->dev, "DMA L2T, broadcast dst2 buffer too small (%llu %lu)\n", + dst2_offset + (count * 4), radeon_bo_size(dst2_reloc->robj)); return -EINVAL; } - ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8); - ib[idx+2] += (u32)(dst2_reloc->lobj.gpu_offset >> 8); - ib[idx+8] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc); - ib[idx+9] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff; + ib[idx+1] += (u32)(dst_reloc->gpu_offset >> 8); + ib[idx+2] += (u32)(dst2_reloc->gpu_offset >> 8); + ib[idx+8] += (u32)(src_reloc->gpu_offset & 0xfffffffc); + ib[idx+9] += upper_32_bits(src_reloc->gpu_offset) & 0xff; p->idx += 10; break; default: - DRM_ERROR("bad DMA_PACKET_COPY [%6d] 0x%08x invalid sub cmd\n", idx, header); + dev_warn_once(p->dev, "bad DMA_PACKET_COPY [%6d] 0x%08x invalid sub cmd\n", idx, header); return -EINVAL; } break; case DMA_PACKET_CONSTANT_FILL: r = r600_dma_cs_next_reloc(p, &dst_reloc); if (r) { - DRM_ERROR("bad DMA_PACKET_CONSTANT_FILL\n"); + dev_warn_once(p->dev, "bad DMA_PACKET_CONSTANT_FILL\n"); return -EINVAL; } dst_offset = radeon_get_ib_value(p, idx+1); dst_offset |= ((u64)(radeon_get_ib_value(p, idx+3) & 0x00ff0000)) << 16; if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) { - dev_warn(p->dev, "DMA constant fill buffer too small (%llu %lu)\n", - dst_offset, radeon_bo_size(dst_reloc->robj)); + dev_warn_once(p->dev, "DMA constant fill buffer too small (%llu %lu)\n", + dst_offset, radeon_bo_size(dst_reloc->robj)); return -EINVAL; } - ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc); - ib[idx+3] += (upper_32_bits(dst_reloc->lobj.gpu_offset) << 16) & 0x00ff0000; + ib[idx+1] += (u32)(dst_reloc->gpu_offset & 0xfffffffc); + ib[idx+3] += (upper_32_bits(dst_reloc->gpu_offset) << 16) & 0x00ff0000; p->idx += 4; break; case DMA_PACKET_NOP: p->idx += 1; break; default: - DRM_ERROR("Unknown packet type %d at %d !\n", cmd, idx); + dev_warn_once(p->dev, "Unknown packet type %d at %d !\n", cmd, idx); return -EINVAL; } - } while (p->idx < p->chunks[p->chunk_ib_idx].length_dw); + } while (p->idx < p->chunk_ib->length_dw); #if 0 for (r = 0; r < p->ib->length_dw; r++) { - printk(KERN_INFO "%05d 0x%08X\n", r, p->ib.ptr[r]); + pr_info("%05d 0x%08X\n", r, p->ib.ptr[r]); mdelay(1); } #endif @@ -3225,7 +3431,7 @@ static bool evergreen_vm_reg_valid(u32 reg) case CAYMAN_SQ_EX_ALLOC_TABLE_SLOTS: return true; default: - DRM_ERROR("Invalid register 0x%x in CS\n", reg); + DRM_DEBUG("Invalid register 0x%x in CS\n", reg); return false; } } @@ -3240,7 +3446,13 @@ static int evergreen_vm_packet3_check(struct radeon_device *rdev, switch (pkt->opcode) { case PACKET3_NOP: + break; case PACKET3_SET_BASE: + if (idx_value != 1) { + dev_warn_once(rdev->dev, "bad SET_BASE"); + return -EINVAL; + } + break; case PACKET3_CLEAR_STATE: case PACKET3_INDEX_BUFFER_SIZE: case PACKET3_DISPATCH_DIRECT: @@ -3267,6 +3479,7 @@ static int evergreen_vm_packet3_check(struct radeon_device *rdev, case PACKET3_MPEG_INDEX: case PACKET3_WAIT_REG_MEM: case PACKET3_MEM_WRITE: + case PACKET3_PFP_SYNC_ME: case PACKET3_SURFACE_SYNC: case PACKET3_EVENT_WRITE: case PACKET3_EVENT_WRITE_EOP: @@ -3283,7 +3496,12 @@ static int evergreen_vm_packet3_check(struct radeon_device *rdev, case CAYMAN_PACKET3_DEALLOC_STATE: break; case PACKET3_COND_WRITE: - if (idx_value & 0x100) { + if (!(idx_value & 0x10)) { + reg = ib[idx + 1] * 4; + if (!evergreen_vm_reg_valid(reg)) + return -EINVAL; + } + if (!(idx_value & 0x100)) { reg = ib[idx + 5] * 4; if (!evergreen_vm_reg_valid(reg)) return -EINVAL; @@ -3302,7 +3520,7 @@ static int evergreen_vm_packet3_check(struct radeon_device *rdev, if ((start_reg < PACKET3_SET_CONFIG_REG_START) || (start_reg >= PACKET3_SET_CONFIG_REG_END) || (end_reg >= PACKET3_SET_CONFIG_REG_END)) { - DRM_ERROR("bad PACKET3_SET_CONFIG_REG\n"); + dev_warn_once(rdev->dev, "bad PACKET3_SET_CONFIG_REG\n"); return -EINVAL; } for (i = 0; i < pkt->count; i++) { @@ -3322,7 +3540,7 @@ static int evergreen_vm_packet3_check(struct radeon_device *rdev, (command & PACKET3_CP_DMA_CMD_SAS))) { /* src = register */ /* non mem to mem copies requires dw aligned count */ if ((command & 0x1fffff) % 4) { - DRM_ERROR("CP DMA command requires dw count alignment\n"); + dev_warn_once(rdev->dev, "CP DMA command requires dw count alignment\n"); return -EINVAL; } } @@ -3333,14 +3551,14 @@ static int evergreen_vm_packet3_check(struct radeon_device *rdev, if (command & PACKET3_CP_DMA_CMD_SAIC) { reg = start_reg; if (!evergreen_vm_reg_valid(reg)) { - DRM_ERROR("CP DMA Bad SRC register\n"); + dev_warn_once(rdev->dev, "CP DMA Bad SRC register\n"); return -EINVAL; } } else { for (i = 0; i < (command & 0x1fffff); i++) { reg = start_reg + (4 * i); if (!evergreen_vm_reg_valid(reg)) { - DRM_ERROR("CP DMA Bad SRC register\n"); + dev_warn_once(rdev->dev, "CP DMA Bad SRC register\n"); return -EINVAL; } } @@ -3354,14 +3572,14 @@ static int evergreen_vm_packet3_check(struct radeon_device *rdev, if (command & PACKET3_CP_DMA_CMD_DAIC) { reg = start_reg; if (!evergreen_vm_reg_valid(reg)) { - DRM_ERROR("CP DMA Bad DST register\n"); + dev_warn_once(rdev->dev, "CP DMA Bad DST register\n"); return -EINVAL; } } else { for (i = 0; i < (command & 0x1fffff); i++) { reg = start_reg + (4 * i); if (!evergreen_vm_reg_valid(reg)) { - DRM_ERROR("CP DMA Bad DST register\n"); + dev_warn_once(rdev->dev, "CP DMA Bad DST register\n"); return -EINVAL; } } @@ -3369,6 +3587,27 @@ static int evergreen_vm_packet3_check(struct radeon_device *rdev, } } break; + case PACKET3_SET_APPEND_CNT: { + uint32_t areg; + uint32_t allowed_reg_base; + + if (pkt->count != 2) { + dev_warn_once(rdev->dev, "bad SET_APPEND_CNT (invalid count)\n"); + return -EINVAL; + } + + allowed_reg_base = GDS_APPEND_COUNT_0; + allowed_reg_base -= PACKET3_SET_CONTEXT_REG_START; + allowed_reg_base >>= 2; + + areg = idx_value >> 16; + if (areg < allowed_reg_base || areg > (allowed_reg_base + 11)) { + dev_warn_once(rdev->dev, "forbidden register for append cnt 0x%08x at %d\n", + areg, idx); + return -EINVAL; + } + break; + } default: return -EINVAL; } @@ -3443,7 +3682,9 @@ int evergreen_dma_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib) idx += count + 3; break; default: - DRM_ERROR("bad DMA_PACKET_WRITE [%6d] 0x%08x sub cmd is not 0 or 8\n", idx, ib->ptr[idx]); + dev_warn_once(rdev->dev, + "bad DMA_PACKET_WRITE [%6d] 0x%08x sub cmd is not 0 or 8\n", + idx, ib->ptr[idx]); return -EINVAL; } break; @@ -3494,7 +3735,9 @@ int evergreen_dma_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib) idx += 10; break; default: - DRM_ERROR("bad DMA_PACKET_COPY [%6d] 0x%08x invalid sub cmd\n", idx, ib->ptr[idx]); + dev_warn_once(rdev->dev, + "bad DMA_PACKET_COPY [%6d] 0x%08x invalid sub cmd\n", + idx, ib->ptr[idx]); return -EINVAL; } break; @@ -3505,7 +3748,7 @@ int evergreen_dma_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib) idx += 1; break; default: - DRM_ERROR("Unknown packet type %d at %d !\n", cmd, idx); + dev_warn_once(rdev->dev, "Unknown packet type %d at %d !\n", cmd, idx); return -EINVAL; } } while (idx < ib->length_dw); |
