drivers/media/platform/mediatek/vcodec/vdec_msg_queue.c


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372

// SPDX-License-Identifier: GPL-2.0
/*
 * Copyright (c) 2021 MediaTek Inc.
 * Author: Yunfei Dong <yunfei.dong@mediatek.com>
 */

#include <linux/freezer.h>
#include <linux/interrupt.h>
#include <linux/kthread.h>

#include "mtk_vcodec_dec_pm.h"
#include "mtk_vcodec_drv.h"
#include "vdec_msg_queue.h"

#define VDEC_MSG_QUEUE_TIMEOUT_MS 1500

/* the size used to store lat slice header information */
#define VDEC_LAT_SLICE_HEADER_SZ    (640 * SZ_1K)

/* the size used to store avc error information */
#define VDEC_ERR_MAP_SZ_AVC         (17 * SZ_1K)

#define VDEC_RD_MV_BUFFER_SZ        (((SZ_4K * 2304 >> 4) + SZ_1K) << 1)
#define VDEC_LAT_TILE_SZ            (64 * V4L2_AV1_MAX_TILE_COUNT)

/* core will read the trans buffer which decoded by lat to decode again.
 * The trans buffer size of FHD and 4K bitstreams are different.
 */
static int vde_msg_queue_get_trans_size(int width, int height)
{
	if (width > 1920 || height > 1088)
		return 30 * SZ_1M;
	else
		return 6 * SZ_1M;
}

void vdec_msg_queue_init_ctx(struct vdec_msg_queue_ctx *ctx, int hardware_index)
{
	init_waitqueue_head(&ctx->ready_to_use);
	INIT_LIST_HEAD(&ctx->ready_queue);
	spin_lock_init(&ctx->ready_lock);
	ctx->ready_num = 0;
	ctx->hardware_index = hardware_index;
}

static struct list_head *vdec_get_buf_list(int hardware_index, struct vdec_lat_buf *buf)
{
	switch (hardware_index) {
	case MTK_VDEC_CORE:
		return &buf->core_list;
	case MTK_VDEC_LAT0:
		return &buf->lat_list;
	default:
		return NULL;
	}
}

static void vdec_msg_queue_inc(struct vdec_msg_queue *msg_queue, int hardware_index)
{
	if (hardware_index == MTK_VDEC_CORE)
		atomic_inc(&msg_queue->core_list_cnt);
	else
		atomic_inc(&msg_queue->lat_list_cnt);
}

static void vdec_msg_queue_dec(struct vdec_msg_queue *msg_queue, int hardware_index)
{
	if (hardware_index == MTK_VDEC_CORE)
		atomic_dec(&msg_queue->core_list_cnt);
	else
		atomic_dec(&msg_queue->lat_list_cnt);
}

int vdec_msg_queue_qbuf(struct vdec_msg_queue_ctx *msg_ctx, struct vdec_lat_buf *buf)
{
	struct list_head *head;

	head = vdec_get_buf_list(msg_ctx->hardware_index, buf);
	if (!head) {
		mtk_v4l2_err("fail to qbuf: %d", msg_ctx->hardware_index);
		return -EINVAL;
	}

	spin_lock(&msg_ctx->ready_lock);
	list_add_tail(head, &msg_ctx->ready_queue);
	msg_ctx->ready_num++;

	vdec_msg_queue_inc(&buf->ctx->msg_queue, msg_ctx->hardware_index);
	if (msg_ctx->hardware_index != MTK_VDEC_CORE) {
		wake_up_all(&msg_ctx->ready_to_use);
	} else {
		if (!(buf->ctx->msg_queue.status & CONTEXT_LIST_QUEUED)) {
			queue_work(buf->ctx->dev->core_workqueue, &buf->ctx->msg_queue.core_work);
			buf->ctx->msg_queue.status |= CONTEXT_LIST_QUEUED;
		}
	}

	mtk_v4l2_debug(3, "enqueue buf type: %d addr: 0x%p num: %d",
		       msg_ctx->hardware_index, buf, msg_ctx->ready_num);
	spin_unlock(&msg_ctx->ready_lock);

	return 0;
}

static bool vdec_msg_queue_wait_event(struct vdec_msg_queue_ctx *msg_ctx)
{
	int ret;

	ret = wait_event_timeout(msg_ctx->ready_to_use,
				 !list_empty(&msg_ctx->ready_queue),
				 msecs_to_jiffies(VDEC_MSG_QUEUE_TIMEOUT_MS));
	if (!ret)
		return false;

	return true;
}

struct vdec_lat_buf *vdec_msg_queue_dqbuf(struct vdec_msg_queue_ctx *msg_ctx)
{
	struct vdec_lat_buf *buf;
	struct list_head *head;
	int ret;

	spin_lock(&msg_ctx->ready_lock);
	if (list_empty(&msg_ctx->ready_queue)) {
		mtk_v4l2_debug(3, "queue is NULL, type:%d num: %d",
			       msg_ctx->hardware_index, msg_ctx->ready_num);
		spin_unlock(&msg_ctx->ready_lock);

		if (msg_ctx->hardware_index == MTK_VDEC_CORE)
			return NULL;

		ret = vdec_msg_queue_wait_event(msg_ctx);
		if (!ret)
			return NULL;
		spin_lock(&msg_ctx->ready_lock);
	}

	if (msg_ctx->hardware_index == MTK_VDEC_CORE)
		buf = list_first_entry(&msg_ctx->ready_queue,
				       struct vdec_lat_buf, core_list);
	else
		buf = list_first_entry(&msg_ctx->ready_queue,
				       struct vdec_lat_buf, lat_list);

	head = vdec_get_buf_list(msg_ctx->hardware_index, buf);
	if (!head) {
		spin_unlock(&msg_ctx->ready_lock);
		mtk_v4l2_err("fail to dqbuf: %d", msg_ctx->hardware_index);
		return NULL;
	}
	list_del(head);
	vdec_msg_queue_dec(&buf->ctx->msg_queue, msg_ctx->hardware_index);

	msg_ctx->ready_num--;
	mtk_v4l2_debug(3, "dqueue buf type:%d addr: 0x%p num: %d",
		       msg_ctx->hardware_index, buf, msg_ctx->ready_num);
	spin_unlock(&msg_ctx->ready_lock);

	return buf;
}

void vdec_msg_queue_update_ube_rptr(struct vdec_msg_queue *msg_queue, uint64_t ube_rptr)
{
	spin_lock(&msg_queue->lat_ctx.ready_lock);
	msg_queue->wdma_rptr_addr = ube_rptr;
	mtk_v4l2_debug(3, "update ube rprt (0x%llx)", ube_rptr);
	spin_unlock(&msg_queue->lat_ctx.ready_lock);
}

void vdec_msg_queue_update_ube_wptr(struct vdec_msg_queue *msg_queue, uint64_t ube_wptr)
{
	spin_lock(&msg_queue->lat_ctx.ready_lock);
	msg_queue->wdma_wptr_addr = ube_wptr;
	mtk_v4l2_debug(3, "update ube wprt: (0x%llx 0x%llx) offset: 0x%llx",
		       msg_queue->wdma_rptr_addr, msg_queue->wdma_wptr_addr,
		       ube_wptr);
	spin_unlock(&msg_queue->lat_ctx.ready_lock);
}

bool vdec_msg_queue_wait_lat_buf_full(struct vdec_msg_queue *msg_queue)
{
	if (atomic_read(&msg_queue->lat_list_cnt) == NUM_BUFFER_COUNT) {
		mtk_v4l2_debug(3, "wait buf full: list(%d %d) ready_num:%d status:%d",
			       atomic_read(&msg_queue->lat_list_cnt),
			       atomic_read(&msg_queue->core_list_cnt),
			       msg_queue->lat_ctx.ready_num,
			       msg_queue->status);
		return true;
	}

	msg_queue->flush_done = false;
	vdec_msg_queue_qbuf(&msg_queue->core_ctx, &msg_queue->empty_lat_buf);
	wait_event(msg_queue->core_dec_done, msg_queue->flush_done);

	mtk_v4l2_debug(3, "flush done => ready_num:%d status:%d list(%d %d)",
		       msg_queue->lat_ctx.ready_num, msg_queue->status,
		       atomic_read(&msg_queue->lat_list_cnt),
		       atomic_read(&msg_queue->core_list_cnt));

	return false;
}

void vdec_msg_queue_deinit(struct vdec_msg_queue *msg_queue,
			   struct mtk_vcodec_ctx *ctx)
{
	struct vdec_lat_buf *lat_buf;
	struct mtk_vcodec_mem *mem;
	int i;

	mem = &msg_queue->wdma_addr;
	if (mem->va)
		mtk_vcodec_mem_free(ctx, mem);
	for (i = 0; i < NUM_BUFFER_COUNT; i++) {
		lat_buf = &msg_queue->lat_buf[i];

		mem = &lat_buf->wdma_err_addr;
		if (mem->va)
			mtk_vcodec_mem_free(ctx, mem);

		mem = &lat_buf->slice_bc_addr;
		if (mem->va)
			mtk_vcodec_mem_free(ctx, mem);

		mem = &lat_buf->rd_mv_addr;
		if (mem->va)
			mtk_vcodec_mem_free(ctx, mem);

		mem = &lat_buf->tile_addr;
		if (mem->va)
			mtk_vcodec_mem_free(ctx, mem);

		kfree(lat_buf->private_data);
	}

	if (msg_queue->wdma_addr.size)
		cancel_work_sync(&msg_queue->core_work);
}

static void vdec_msg_queue_core_work(struct work_struct *work)
{
	struct vdec_msg_queue *msg_queue =
		container_of(work, struct vdec_msg_queue, core_work);
	struct mtk_vcodec_ctx *ctx =
		container_of(msg_queue, struct mtk_vcodec_ctx, msg_queue);
	struct mtk_vcodec_dev *dev = ctx->dev;
	struct vdec_lat_buf *lat_buf;

	spin_lock(&msg_queue->core_ctx.ready_lock);
	ctx->msg_queue.status &= ~CONTEXT_LIST_QUEUED;
	spin_unlock(&msg_queue->core_ctx.ready_lock);

	lat_buf = vdec_msg_queue_dqbuf(&msg_queue->core_ctx);
	if (!lat_buf)
		return;

	if (lat_buf->is_last_frame) {
		ctx->msg_queue.status = CONTEXT_LIST_DEC_DONE;
		msg_queue->flush_done = true;
		wake_up(&ctx->msg_queue.core_dec_done);

		return;
	}

	ctx = lat_buf->ctx;
	mtk_vcodec_dec_enable_hardware(ctx, MTK_VDEC_CORE);
	mtk_vcodec_set_curr_ctx(dev, ctx, MTK_VDEC_CORE);

	lat_buf->core_decode(lat_buf);

	mtk_vcodec_set_curr_ctx(dev, NULL, MTK_VDEC_CORE);
	mtk_vcodec_dec_disable_hardware(ctx, MTK_VDEC_CORE);
	vdec_msg_queue_qbuf(&ctx->msg_queue.lat_ctx, lat_buf);

	if (!(ctx->msg_queue.status & CONTEXT_LIST_QUEUED) &&
	    atomic_read(&msg_queue->core_list_cnt)) {
		spin_lock(&msg_queue->core_ctx.ready_lock);
		ctx->msg_queue.status |= CONTEXT_LIST_QUEUED;
		spin_unlock(&msg_queue->core_ctx.ready_lock);
		queue_work(ctx->dev->core_workqueue, &msg_queue->core_work);
	}
}

int vdec_msg_queue_init(struct vdec_msg_queue *msg_queue,
			struct mtk_vcodec_ctx *ctx, core_decode_cb_t core_decode,
			int private_size)
{
	struct vdec_lat_buf *lat_buf;
	int i, err;

	/* already init msg queue */
	if (msg_queue->wdma_addr.size)
		return 0;

	vdec_msg_queue_init_ctx(&msg_queue->lat_ctx, MTK_VDEC_LAT0);
	vdec_msg_queue_init_ctx(&msg_queue->core_ctx, MTK_VDEC_CORE);
	INIT_WORK(&msg_queue->core_work, vdec_msg_queue_core_work);

	atomic_set(&msg_queue->lat_list_cnt, 0);
	atomic_set(&msg_queue->core_list_cnt, 0);
	init_waitqueue_head(&msg_queue->core_dec_done);
	msg_queue->status = CONTEXT_LIST_EMPTY;

	msg_queue->wdma_addr.size =
		vde_msg_queue_get_trans_size(ctx->picinfo.buf_w,
					     ctx->picinfo.buf_h);
	err = mtk_vcodec_mem_alloc(ctx, &msg_queue->wdma_addr);
	if (err) {
		mtk_v4l2_err("failed to allocate wdma_addr buf");
		return -ENOMEM;
	}
	msg_queue->wdma_rptr_addr = msg_queue->wdma_addr.dma_addr;
	msg_queue->wdma_wptr_addr = msg_queue->wdma_addr.dma_addr;

	msg_queue->empty_lat_buf.ctx = ctx;
	msg_queue->empty_lat_buf.core_decode = NULL;
	msg_queue->empty_lat_buf.is_last_frame = true;

	for (i = 0; i < NUM_BUFFER_COUNT; i++) {
		lat_buf = &msg_queue->lat_buf[i];

		lat_buf->wdma_err_addr.size = VDEC_ERR_MAP_SZ_AVC;
		err = mtk_vcodec_mem_alloc(ctx, &lat_buf->wdma_err_addr);
		if (err) {
			mtk_v4l2_err("failed to allocate wdma_err_addr buf[%d]", i);
			goto mem_alloc_err;
		}

		lat_buf->slice_bc_addr.size = VDEC_LAT_SLICE_HEADER_SZ;
		err = mtk_vcodec_mem_alloc(ctx, &lat_buf->slice_bc_addr);
		if (err) {
			mtk_v4l2_err("failed to allocate wdma_addr buf[%d]", i);
			goto mem_alloc_err;
		}

		if (ctx->current_codec == V4L2_PIX_FMT_AV1_FRAME) {
			lat_buf->rd_mv_addr.size = VDEC_RD_MV_BUFFER_SZ;
			err = mtk_vcodec_mem_alloc(ctx, &lat_buf->rd_mv_addr);
			if (err) {
				mtk_v4l2_err("failed to allocate rd_mv_addr buf[%d]", i);
				return -ENOMEM;
			}

			lat_buf->tile_addr.size = VDEC_LAT_TILE_SZ;
			err = mtk_vcodec_mem_alloc(ctx, &lat_buf->tile_addr);
			if (err) {
				mtk_v4l2_err("failed to allocate tile_addr buf[%d]", i);
				return -ENOMEM;
			}
		}

		lat_buf->private_data = kzalloc(private_size, GFP_KERNEL);
		if (!lat_buf->private_data) {
			err = -ENOMEM;
			goto mem_alloc_err;
		}

		lat_buf->ctx = ctx;
		lat_buf->core_decode = core_decode;
		lat_buf->is_last_frame = false;
		err = vdec_msg_queue_qbuf(&msg_queue->lat_ctx, lat_buf);
		if (err) {
			mtk_v4l2_err("failed to qbuf buf[%d]", i);
			goto mem_alloc_err;
		}
	}
	return 0;

mem_alloc_err:
	vdec_msg_queue_deinit(msg_queue, ctx);
	return err;
}