summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2016-05-23 11:48:48 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2016-05-23 11:48:48 -0700
commit1d6da87a3241deb13d073c4125d19ed0e5a0c62c (patch)
tree42b7a9842618dad2afe7db9709cc6217ced03120 /drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
parent1f40c49570eb01436786a9b5845c4469a9a1f362 (diff)
parenta39ed680bddb1ead592e22ed812c7e47286bfc03 (diff)
Merge branch 'drm-next' of git://people.freedesktop.org/~airlied/linux
Pull drm updates from Dave Airlie: "Here's the main drm pull request for 4.7, it's been a busy one, and I've been a bit more distracted in real life this merge window. Lots more ARM drivers, not sure if it'll ever end. I think I've at least one more coming the next merge window. But changes are all over the place, support for AMD Polaris GPUs is in here, some missing GM108 support for nouveau (found in some Lenovos), a bunch of MST and skylake fixes. I've also noticed a few fixes from Arnd in my inbox, that I'll try and get in asap, but I didn't think they should hold this up. New drivers: - Hisilicon kirin display driver - Mediatek MT8173 display driver - ARC PGU - bitstreamer on Synopsys ARC SDP boards - Allwinner A13 initial RGB output driver - Analogix driver for DisplayPort IP found in exynos and rockchip DRM Core: - UAPI headers fixes and C++ safety - DRM connector reference counting - DisplayID mode parsing for Dell 5K monitors - Removal of struct_mutex from drivers - Connector registration cleanups - MST robustness fixes - MAINTAINERS updates - Lockless GEM object freeing - Generic fbdev deferred IO support panel: - Support for a bunch of new panels i915: - VBT refactoring - PLL computation cleanups - DSI support for BXT - Color manager support - More atomic patches - GEM improvements - GuC fw loading fixes - DP detection fixes - SKL GPU hang fixes - Lots of BXT fixes radeon/amdgpu: - Initial Polaris support - GPUVM/Scheduler/Clock/Power improvements - ASYNC pageflip support - New mesa feature support nouveau: - GM108 support - Power sensor support improvements - GR init + ucode fixes. - Use GPU provided topology information vmwgfx: - Add host messaging support gma500: - Some cleanups and fixes atmel: - Bridge support - Async atomic commit support fsl-dcu: - Timing controller for LCD support - Pixel clock polarity support rcar-du: - Misc fixes exynos: - Pipeline clock support - Exynoss4533 SoC support - HW trigger mode support - export HDMI_PHY clock - DECON5433 fixes - Use generic prime functions - use DMA mapping APIs rockchip: - Lots of little fixes vc4: - Render node support - Gamma ramp support - DPI output support msm: - Mostly cleanups and fixes - Conversion to generic struct fence etnaviv: - Fix for prime buffer handling - Allow hangcheck to be coalesced with other wakeups tegra: - Gamme table size fix" * 'drm-next' of git://people.freedesktop.org/~airlied/linux: (1050 commits) drm/edid: add displayid detailed 1 timings to the modelist. (v1.1) drm/edid: move displayid validation to it's own function. drm/displayid: Iterate over all DisplayID blocks drm/edid: move displayid tiled block parsing into separate function. drm: Nuke ->vblank_disable_allowed drm/vmwgfx: Report vmwgfx version to vmware.log drm/vmwgfx: Add VMWare host messaging capability drm/vmwgfx: Kill some lockdep warnings drm/nouveau/gr/gf100-: fix race condition in fecs/gpccs ucode drm/nouveau/core: recognise GM108 chipsets drm/nouveau/gr/gm107-: fix touching non-existent ppcs in attrib cb setup drm/nouveau/gr/gk104-: share implementation of ppc exception init drm/nouveau/gr/gk104-: move rop_active_fbps init to nonctx drm/nouveau/bios/pll: check BIT table version before trying to parse it drm/nouveau/bios/pll: prevent oops when limits table can't be parsed drm/nouveau/volt/gk104: round up in gk104_volt_set drm/nouveau/fb/gm200: setup mmu debug buffer registers at init() drm/nouveau/fb/gk20a,gm20b: setup mmu debug buffer registers at init() drm/nouveau/fb/gf100-: allocate mmu debug buffers drm/nouveau/fb: allow chipset-specific actions for oneinit() ...
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c145
1 files changed, 56 insertions, 89 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 9392e50a7ba4..9bc8f1d99733 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -24,7 +24,6 @@
* Authors:
* Jerome Glisse <glisse@freedesktop.org>
*/
-#include <linux/list_sort.h>
#include <linux/pagemap.h>
#include <drm/drmP.h>
#include <drm/amdgpu_drm.h>
@@ -88,44 +87,41 @@ int amdgpu_cs_get_ring(struct amdgpu_device *adev, u32 ip_type,
}
static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p,
- struct amdgpu_user_fence *uf,
- struct drm_amdgpu_cs_chunk_fence *fence_data)
+ struct drm_amdgpu_cs_chunk_fence *data,
+ uint32_t *offset)
{
struct drm_gem_object *gobj;
- uint32_t handle;
- handle = fence_data->handle;
- gobj = drm_gem_object_lookup(p->adev->ddev, p->filp,
- fence_data->handle);
+ gobj = drm_gem_object_lookup(p->filp, data->handle);
if (gobj == NULL)
return -EINVAL;
- uf->bo = amdgpu_bo_ref(gem_to_amdgpu_bo(gobj));
- uf->offset = fence_data->offset;
-
- if (amdgpu_ttm_tt_get_usermm(uf->bo->tbo.ttm)) {
- drm_gem_object_unreference_unlocked(gobj);
- return -EINVAL;
- }
-
- p->uf_entry.robj = amdgpu_bo_ref(uf->bo);
+ p->uf_entry.robj = amdgpu_bo_ref(gem_to_amdgpu_bo(gobj));
p->uf_entry.priority = 0;
p->uf_entry.tv.bo = &p->uf_entry.robj->tbo;
p->uf_entry.tv.shared = true;
p->uf_entry.user_pages = NULL;
+ *offset = data->offset;
drm_gem_object_unreference_unlocked(gobj);
+
+ if (amdgpu_ttm_tt_get_usermm(p->uf_entry.robj->tbo.ttm)) {
+ amdgpu_bo_unref(&p->uf_entry.robj);
+ return -EINVAL;
+ }
+
return 0;
}
int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
{
struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
+ struct amdgpu_vm *vm = &fpriv->vm;
union drm_amdgpu_cs *cs = data;
uint64_t *chunk_array_user;
uint64_t *chunk_array;
- struct amdgpu_user_fence uf = {};
unsigned size, num_ibs = 0;
+ uint32_t uf_offset = 0;
int i;
int ret;
@@ -200,7 +196,8 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
goto free_partial_kdata;
}
- ret = amdgpu_cs_user_fence_chunk(p, &uf, (void *)p->chunks[i].kdata);
+ ret = amdgpu_cs_user_fence_chunk(p, p->chunks[i].kdata,
+ &uf_offset);
if (ret)
goto free_partial_kdata;
@@ -215,11 +212,14 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
}
}
- ret = amdgpu_job_alloc(p->adev, num_ibs, &p->job);
+ ret = amdgpu_job_alloc(p->adev, num_ibs, &p->job, vm);
if (ret)
goto free_all_kdata;
- p->job->uf = uf;
+ if (p->uf_entry.robj) {
+ p->job->uf_bo = amdgpu_bo_ref(p->uf_entry.robj);
+ p->job->uf_offset = uf_offset;
+ }
kfree(chunk_array);
return 0;
@@ -377,7 +377,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
INIT_LIST_HEAD(&duplicates);
amdgpu_vm_get_pd_bo(&fpriv->vm, &p->validated, &p->vm_pd);
- if (p->job->uf.bo)
+ if (p->uf_entry.robj)
list_add(&p->uf_entry.tv.head, &p->validated);
if (need_mmap_lock)
@@ -473,6 +473,9 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
goto error_validate;
if (p->bo_list) {
+ struct amdgpu_bo *gds = p->bo_list->gds_obj;
+ struct amdgpu_bo *gws = p->bo_list->gws_obj;
+ struct amdgpu_bo *oa = p->bo_list->oa_obj;
struct amdgpu_vm *vm = &fpriv->vm;
unsigned i;
@@ -481,6 +484,19 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
p->bo_list->array[i].bo_va = amdgpu_vm_bo_find(vm, bo);
}
+
+ if (gds) {
+ p->job->gds_base = amdgpu_bo_gpu_offset(gds);
+ p->job->gds_size = amdgpu_bo_size(gds);
+ }
+ if (gws) {
+ p->job->gws_base = amdgpu_bo_gpu_offset(gws);
+ p->job->gws_size = amdgpu_bo_size(gws);
+ }
+ if (oa) {
+ p->job->oa_base = amdgpu_bo_gpu_offset(oa);
+ p->job->oa_size = amdgpu_bo_size(oa);
+ }
}
error_validate:
@@ -527,16 +543,6 @@ static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p)
return 0;
}
-static int cmp_size_smaller_first(void *priv, struct list_head *a,
- struct list_head *b)
-{
- struct amdgpu_bo_list_entry *la = list_entry(a, struct amdgpu_bo_list_entry, tv.head);
- struct amdgpu_bo_list_entry *lb = list_entry(b, struct amdgpu_bo_list_entry, tv.head);
-
- /* Sort A before B if A is smaller. */
- return (int)la->robj->tbo.num_pages - (int)lb->robj->tbo.num_pages;
-}
-
/**
* cs_parser_fini() - clean parser states
* @parser: parser structure holding parsing context.
@@ -553,18 +559,6 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, bo
if (!error) {
amdgpu_vm_move_pt_bos_in_lru(parser->adev, &fpriv->vm);
- /* Sort the buffer list from the smallest to largest buffer,
- * which affects the order of buffers in the LRU list.
- * This assures that the smallest buffers are added first
- * to the LRU list, so they are likely to be later evicted
- * first, instead of large buffers whose eviction is more
- * expensive.
- *
- * This slightly lowers the number of bytes moved by TTM
- * per frame under memory pressure.
- */
- list_sort(NULL, &parser->validated, cmp_size_smaller_first);
-
ttm_eu_fence_buffer_objects(&parser->ticket,
&parser->validated,
parser->fence);
@@ -763,41 +757,14 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
ib->length_dw = chunk_ib->ib_bytes / 4;
ib->flags = chunk_ib->flags;
- ib->ctx = parser->ctx;
j++;
}
- /* add GDS resources to first IB */
- if (parser->bo_list) {
- struct amdgpu_bo *gds = parser->bo_list->gds_obj;
- struct amdgpu_bo *gws = parser->bo_list->gws_obj;
- struct amdgpu_bo *oa = parser->bo_list->oa_obj;
- struct amdgpu_ib *ib = &parser->job->ibs[0];
-
- if (gds) {
- ib->gds_base = amdgpu_bo_gpu_offset(gds);
- ib->gds_size = amdgpu_bo_size(gds);
- }
- if (gws) {
- ib->gws_base = amdgpu_bo_gpu_offset(gws);
- ib->gws_size = amdgpu_bo_size(gws);
- }
- if (oa) {
- ib->oa_base = amdgpu_bo_gpu_offset(oa);
- ib->oa_size = amdgpu_bo_size(oa);
- }
- }
- /* wrap the last IB with user fence */
- if (parser->job->uf.bo) {
- struct amdgpu_ib *ib = &parser->job->ibs[parser->job->num_ibs - 1];
-
- /* UVD & VCE fw doesn't support user fences */
- if (parser->job->ring->type == AMDGPU_RING_TYPE_UVD ||
- parser->job->ring->type == AMDGPU_RING_TYPE_VCE)
- return -EINVAL;
-
- ib->user = &parser->job->uf;
- }
+ /* UVD & VCE fw doesn't support user fences */
+ if (parser->job->uf_bo && (
+ parser->job->ring->type == AMDGPU_RING_TYPE_UVD ||
+ parser->job->ring->type == AMDGPU_RING_TYPE_VCE))
+ return -EINVAL;
return 0;
}
@@ -862,28 +829,28 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
union drm_amdgpu_cs *cs)
{
struct amdgpu_ring *ring = p->job->ring;
- struct amd_sched_fence *fence;
+ struct amd_sched_entity *entity = &p->ctx->rings[ring->idx].entity;
+ struct fence *fence;
struct amdgpu_job *job;
+ int r;
job = p->job;
p->job = NULL;
- job->base.sched = &ring->sched;
- job->base.s_entity = &p->ctx->rings[ring->idx].entity;
- job->owner = p->filp;
-
- fence = amd_sched_fence_create(job->base.s_entity, p->filp);
- if (!fence) {
+ r = amd_sched_job_init(&job->base, &ring->sched,
+ entity, amdgpu_job_timeout_func,
+ amdgpu_job_free_func,
+ p->filp, &fence);
+ if (r) {
amdgpu_job_free(job);
- return -ENOMEM;
+ return r;
}
- job->base.s_fence = fence;
- p->fence = fence_get(&fence->base);
-
- cs->out.handle = amdgpu_ctx_add_fence(p->ctx, ring,
- &fence->base);
- job->ibs[job->num_ibs - 1].sequence = cs->out.handle;
+ job->owner = p->filp;
+ job->ctx = entity->fence_context;
+ p->fence = fence_get(fence);
+ cs->out.handle = amdgpu_ctx_add_fence(p->ctx, ring, fence);
+ job->uf_sequence = cs->out.handle;
trace_amdgpu_cs_ioctl(job);
amd_sched_entity_push_job(&job->base);