You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
openwrt/target/linux/bcm27xx/patches-5.4/950-0513-staging-media-Add-...

4342 lines
118 KiB
Diff

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

From 82bbd353e2dc364bf37e6f0b91890cb432b1a72f Mon Sep 17 00:00:00 2001
From: John Cox <jc@kynesim.co.uk>
Date: Thu, 5 Mar 2020 18:30:41 +0000
Subject: [PATCH] staging: media: Add Raspberry Pi V4L2 H265 decoder
This driver is for the HEVC/H265 decoder block on the Raspberry
Pi 4, and conforms to the V4L2 stateless decoder API.
Signed-off-by: John Cox <jc@kynesim.co.uk>
---
drivers/staging/media/Kconfig | 2 +
drivers/staging/media/Makefile | 1 +
drivers/staging/media/rpivid/Kconfig | 16 +
drivers/staging/media/rpivid/Makefile | 5 +
drivers/staging/media/rpivid/rpivid.c | 432 ++++
drivers/staging/media/rpivid/rpivid.h | 181 ++
drivers/staging/media/rpivid/rpivid_dec.c | 79 +
drivers/staging/media/rpivid/rpivid_dec.h | 19 +
drivers/staging/media/rpivid/rpivid_h265.c | 2275 +++++++++++++++++++
drivers/staging/media/rpivid/rpivid_hw.c | 321 +++
drivers/staging/media/rpivid/rpivid_hw.h | 300 +++
drivers/staging/media/rpivid/rpivid_video.c | 593 +++++
drivers/staging/media/rpivid/rpivid_video.h | 30 +
14 files changed, 4256 insertions(+)
create mode 100644 drivers/staging/media/rpivid/Kconfig
create mode 100644 drivers/staging/media/rpivid/Makefile
create mode 100644 drivers/staging/media/rpivid/rpivid.c
create mode 100644 drivers/staging/media/rpivid/rpivid.h
create mode 100644 drivers/staging/media/rpivid/rpivid_dec.c
create mode 100644 drivers/staging/media/rpivid/rpivid_dec.h
create mode 100644 drivers/staging/media/rpivid/rpivid_h265.c
create mode 100644 drivers/staging/media/rpivid/rpivid_hw.c
create mode 100644 drivers/staging/media/rpivid/rpivid_hw.h
create mode 100644 drivers/staging/media/rpivid/rpivid_video.c
create mode 100644 drivers/staging/media/rpivid/rpivid_video.h
--- a/drivers/staging/media/Kconfig
+++ b/drivers/staging/media/Kconfig
@@ -30,6 +30,8 @@ source "drivers/staging/media/meson/vdec
source "drivers/staging/media/omap4iss/Kconfig"
+source "drivers/staging/media/rpivid/Kconfig"
+
source "drivers/staging/media/sunxi/Kconfig"
source "drivers/staging/media/tegra-vde/Kconfig"
--- a/drivers/staging/media/Makefile
+++ b/drivers/staging/media/Makefile
@@ -3,6 +3,7 @@ obj-$(CONFIG_VIDEO_ALLEGRO_DVT) += alleg
obj-$(CONFIG_VIDEO_IMX_MEDIA) += imx/
obj-$(CONFIG_VIDEO_MESON_VDEC) += meson/vdec/
obj-$(CONFIG_VIDEO_OMAP4) += omap4iss/
+obj-$(CONFIG_VIDEO_RPIVID) += rpivid/
obj-$(CONFIG_VIDEO_SUNXI) += sunxi/
obj-$(CONFIG_TEGRA_VDE) += tegra-vde/
obj-$(CONFIG_VIDEO_HANTRO) += hantro/
--- /dev/null
+++ b/drivers/staging/media/rpivid/Kconfig
@@ -0,0 +1,16 @@
+# SPDX-License-Identifier: GPL-2.0
+
+config VIDEO_RPIVID
+ tristate "Rpi H265 driver"
+ depends on VIDEO_DEV && VIDEO_V4L2
+ depends on MEDIA_CONTROLLER
+ depends on OF
+ depends on MEDIA_CONTROLLER_REQUEST_API
+ select VIDEOBUF2_DMA_CONTIG
+ select V4L2_MEM2MEM_DEV
+ help
+ Support for the Rpi H265 h/w decoder.
+
+ To compile this driver as a module, choose M here: the module
+ will be called rpivid-hevc.
+
--- /dev/null
+++ b/drivers/staging/media/rpivid/Makefile
@@ -0,0 +1,5 @@
+# SPDX-License-Identifier: GPL-2.0
+obj-$(CONFIG_VIDEO_RPIVID) += rpivid-hevc.o
+
+rpivid-hevc-y = rpivid.o rpivid_video.o rpivid_dec.o \
+ rpivid_hw.o rpivid_h265.o
--- /dev/null
+++ b/drivers/staging/media/rpivid/rpivid.c
@@ -0,0 +1,432 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Raspberry Pi HEVC driver
+ *
+ * Copyright (C) 2020 Raspberry Pi (Trading) Ltd
+ *
+ * Based on the Cedrus VPU driver, that is:
+ *
+ * Copyright (C) 2016 Florent Revest <florent.revest@free-electrons.com>
+ * Copyright (C) 2018 Paul Kocialkowski <paul.kocialkowski@bootlin.com>
+ * Copyright (C) 2018 Bootlin
+ */
+
+#include <linux/platform_device.h>
+#include <linux/module.h>
+#include <linux/of.h>
+
+#include <media/v4l2-device.h>
+#include <media/v4l2-ioctl.h>
+#include <media/v4l2-ctrls.h>
+#include <media/v4l2-mem2mem.h>
+
+#include "rpivid.h"
+#include "rpivid_video.h"
+#include "rpivid_hw.h"
+#include "rpivid_dec.h"
+
+/*
+ * Default /dev/videoN node number.
+ * Deliberately avoid the very low numbers as these are often taken by webcams
+ * etc, and simple apps tend to only go for /dev/video0.
+ */
+static int video_nr = 19;
+module_param(video_nr, int, 0644);
+MODULE_PARM_DESC(video_nr, "decoder video device number");
+
+static const struct rpivid_control rpivid_ctrls[] = {
+ {
+ .cfg = {
+ .id = V4L2_CID_MPEG_VIDEO_HEVC_SPS,
+ },
+ .required = true,
+ },
+ {
+ .cfg = {
+ .id = V4L2_CID_MPEG_VIDEO_HEVC_PPS,
+ },
+ .required = true,
+ },
+ {
+ .cfg = {
+ .id = V4L2_CID_MPEG_VIDEO_HEVC_SCALING_MATRIX,
+ },
+ .required = false,
+ },
+ {
+ .cfg = {
+ .id = V4L2_CID_MPEG_VIDEO_HEVC_SLICE_PARAMS,
+ },
+ .required = true,
+ },
+ {
+ .cfg = {
+ .id = V4L2_CID_MPEG_VIDEO_HEVC_DECODE_MODE,
+ .max = V4L2_MPEG_VIDEO_HEVC_DECODE_MODE_SLICE_BASED,
+ .def = V4L2_MPEG_VIDEO_HEVC_DECODE_MODE_SLICE_BASED,
+ },
+ .required = false,
+ },
+ {
+ .cfg = {
+ .id = V4L2_CID_MPEG_VIDEO_HEVC_START_CODE,
+ .max = V4L2_MPEG_VIDEO_HEVC_START_CODE_NONE,
+ .def = V4L2_MPEG_VIDEO_HEVC_START_CODE_NONE,
+ },
+ .required = false,
+ },
+};
+
+#define rpivid_ctrls_COUNT ARRAY_SIZE(rpivid_ctrls)
+
+void *rpivid_find_control_data(struct rpivid_ctx *ctx, u32 id)
+{
+ unsigned int i;
+
+ for (i = 0; ctx->ctrls[i]; i++)
+ if (ctx->ctrls[i]->id == id)
+ return ctx->ctrls[i]->p_cur.p;
+
+ return NULL;
+}
+
+static int rpivid_init_ctrls(struct rpivid_dev *dev, struct rpivid_ctx *ctx)
+{
+ struct v4l2_ctrl_handler *hdl = &ctx->hdl;
+ struct v4l2_ctrl *ctrl;
+ unsigned int ctrl_size;
+ unsigned int i;
+
+ v4l2_ctrl_handler_init(hdl, rpivid_ctrls_COUNT);
+ if (hdl->error) {
+ v4l2_err(&dev->v4l2_dev,
+ "Failed to initialize control handler\n");
+ return hdl->error;
+ }
+
+ ctrl_size = sizeof(ctrl) * rpivid_ctrls_COUNT + 1;
+
+ ctx->ctrls = kzalloc(ctrl_size, GFP_KERNEL);
+ if (!ctx->ctrls)
+ return -ENOMEM;
+
+ for (i = 0; i < rpivid_ctrls_COUNT; i++) {
+ ctrl = v4l2_ctrl_new_custom(hdl, &rpivid_ctrls[i].cfg,
+ NULL);
+ if (hdl->error) {
+ v4l2_err(&dev->v4l2_dev,
+ "Failed to create new custom control id=%#x\n",
+ rpivid_ctrls[i].cfg.id);
+
+ v4l2_ctrl_handler_free(hdl);
+ kfree(ctx->ctrls);
+ return hdl->error;
+ }
+
+ ctx->ctrls[i] = ctrl;
+ }
+
+ ctx->fh.ctrl_handler = hdl;
+ v4l2_ctrl_handler_setup(hdl);
+
+ return 0;
+}
+
+static int rpivid_request_validate(struct media_request *req)
+{
+ struct media_request_object *obj;
+ struct v4l2_ctrl_handler *parent_hdl, *hdl;
+ struct rpivid_ctx *ctx = NULL;
+ struct v4l2_ctrl *ctrl_test;
+ unsigned int count;
+ unsigned int i;
+
+ list_for_each_entry(obj, &req->objects, list) {
+ struct vb2_buffer *vb;
+
+ if (vb2_request_object_is_buffer(obj)) {
+ vb = container_of(obj, struct vb2_buffer, req_obj);
+ ctx = vb2_get_drv_priv(vb->vb2_queue);
+
+ break;
+ }
+ }
+
+ if (!ctx)
+ return -ENOENT;
+
+ count = vb2_request_buffer_cnt(req);
+ if (!count) {
+ v4l2_info(&ctx->dev->v4l2_dev,
+ "No buffer was provided with the request\n");
+ return -ENOENT;
+ } else if (count > 1) {
+ v4l2_info(&ctx->dev->v4l2_dev,
+ "More than one buffer was provided with the request\n");
+ return -EINVAL;
+ }
+
+ parent_hdl = &ctx->hdl;
+
+ hdl = v4l2_ctrl_request_hdl_find(req, parent_hdl);
+ if (!hdl) {
+ v4l2_info(&ctx->dev->v4l2_dev, "Missing codec control(s)\n");
+ return -ENOENT;
+ }
+
+ for (i = 0; i < rpivid_ctrls_COUNT; i++) {
+ if (!rpivid_ctrls[i].required)
+ continue;
+
+ ctrl_test =
+ v4l2_ctrl_request_hdl_ctrl_find(hdl,
+ rpivid_ctrls[i].cfg.id);
+ if (!ctrl_test) {
+ v4l2_info(&ctx->dev->v4l2_dev,
+ "Missing required codec control\n");
+ return -ENOENT;
+ }
+ }
+
+ v4l2_ctrl_request_hdl_put(hdl);
+
+ return vb2_request_validate(req);
+}
+
+static int rpivid_open(struct file *file)
+{
+ struct rpivid_dev *dev = video_drvdata(file);
+ struct rpivid_ctx *ctx = NULL;
+ int ret;
+
+ if (mutex_lock_interruptible(&dev->dev_mutex))
+ return -ERESTARTSYS;
+
+ ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
+ if (!ctx) {
+ mutex_unlock(&dev->dev_mutex);
+ return -ENOMEM;
+ }
+
+ v4l2_fh_init(&ctx->fh, video_devdata(file));
+ file->private_data = &ctx->fh;
+ ctx->dev = dev;
+
+ ret = rpivid_init_ctrls(dev, ctx);
+ if (ret)
+ goto err_free;
+
+ ctx->fh.m2m_ctx = v4l2_m2m_ctx_init(dev->m2m_dev, ctx,
+ &rpivid_queue_init);
+ if (IS_ERR(ctx->fh.m2m_ctx)) {
+ ret = PTR_ERR(ctx->fh.m2m_ctx);
+ goto err_ctrls;
+ }
+
+ /* The only bit of format info that we can guess now is H265 src
+ * Everything else we need more info for
+ */
+ ctx->src_fmt.pixelformat = RPIVID_SRC_PIXELFORMAT_DEFAULT;
+ rpivid_prepare_src_format(&ctx->src_fmt);
+
+ v4l2_fh_add(&ctx->fh);
+
+ mutex_unlock(&dev->dev_mutex);
+
+ return 0;
+
+err_ctrls:
+ v4l2_ctrl_handler_free(&ctx->hdl);
+err_free:
+ kfree(ctx);
+ mutex_unlock(&dev->dev_mutex);
+
+ return ret;
+}
+
+static int rpivid_release(struct file *file)
+{
+ struct rpivid_dev *dev = video_drvdata(file);
+ struct rpivid_ctx *ctx = container_of(file->private_data,
+ struct rpivid_ctx, fh);
+
+ mutex_lock(&dev->dev_mutex);
+
+ v4l2_fh_del(&ctx->fh);
+ v4l2_m2m_ctx_release(ctx->fh.m2m_ctx);
+
+ v4l2_ctrl_handler_free(&ctx->hdl);
+ kfree(ctx->ctrls);
+
+ v4l2_fh_exit(&ctx->fh);
+
+ kfree(ctx);
+
+ mutex_unlock(&dev->dev_mutex);
+
+ return 0;
+}
+
+static const struct v4l2_file_operations rpivid_fops = {
+ .owner = THIS_MODULE,
+ .open = rpivid_open,
+ .release = rpivid_release,
+ .poll = v4l2_m2m_fop_poll,
+ .unlocked_ioctl = video_ioctl2,
+ .mmap = v4l2_m2m_fop_mmap,
+};
+
+static const struct video_device rpivid_video_device = {
+ .name = RPIVID_NAME,
+ .vfl_dir = VFL_DIR_M2M,
+ .fops = &rpivid_fops,
+ .ioctl_ops = &rpivid_ioctl_ops,
+ .minor = -1,
+ .release = video_device_release_empty,
+ .device_caps = V4L2_CAP_VIDEO_M2M | V4L2_CAP_STREAMING,
+};
+
+static const struct v4l2_m2m_ops rpivid_m2m_ops = {
+ .device_run = rpivid_device_run,
+};
+
+static const struct media_device_ops rpivid_m2m_media_ops = {
+ .req_validate = rpivid_request_validate,
+ .req_queue = v4l2_m2m_request_queue,
+};
+
+static int rpivid_probe(struct platform_device *pdev)
+{
+ struct rpivid_dev *dev;
+ struct video_device *vfd;
+ int ret;
+
+ dev = devm_kzalloc(&pdev->dev, sizeof(*dev), GFP_KERNEL);
+ if (!dev)
+ return -ENOMEM;
+
+ dev->vfd = rpivid_video_device;
+ dev->dev = &pdev->dev;
+ dev->pdev = pdev;
+
+ ret = 0;
+ ret = rpivid_hw_probe(dev);
+ if (ret) {
+ dev_err(&pdev->dev, "Failed to probe hardware\n");
+ return ret;
+ }
+
+ dev->dec_ops = &rpivid_dec_ops_h265;
+
+ mutex_init(&dev->dev_mutex);
+
+ ret = v4l2_device_register(&pdev->dev, &dev->v4l2_dev);
+ if (ret) {
+ dev_err(&pdev->dev, "Failed to register V4L2 device\n");
+ return ret;
+ }
+
+ vfd = &dev->vfd;
+ vfd->lock = &dev->dev_mutex;
+ vfd->v4l2_dev = &dev->v4l2_dev;
+
+ snprintf(vfd->name, sizeof(vfd->name), "%s", rpivid_video_device.name);
+ video_set_drvdata(vfd, dev);
+
+ dev->m2m_dev = v4l2_m2m_init(&rpivid_m2m_ops);
+ if (IS_ERR(dev->m2m_dev)) {
+ v4l2_err(&dev->v4l2_dev,
+ "Failed to initialize V4L2 M2M device\n");
+ ret = PTR_ERR(dev->m2m_dev);
+
+ goto err_v4l2;
+ }
+
+ dev->mdev.dev = &pdev->dev;
+ strscpy(dev->mdev.model, RPIVID_NAME, sizeof(dev->mdev.model));
+ strscpy(dev->mdev.bus_info, "platform:" RPIVID_NAME,
+ sizeof(dev->mdev.bus_info));
+
+ media_device_init(&dev->mdev);
+ dev->mdev.ops = &rpivid_m2m_media_ops;
+ dev->v4l2_dev.mdev = &dev->mdev;
+
+ ret = video_register_device(vfd, VFL_TYPE_GRABBER, video_nr);
+ if (ret) {
+ v4l2_err(&dev->v4l2_dev, "Failed to register video device\n");
+ goto err_m2m;
+ }
+
+ v4l2_info(&dev->v4l2_dev,
+ "Device registered as /dev/video%d\n", vfd->num);
+
+ ret = v4l2_m2m_register_media_controller(dev->m2m_dev, vfd,
+ MEDIA_ENT_F_PROC_VIDEO_DECODER);
+ if (ret) {
+ v4l2_err(&dev->v4l2_dev,
+ "Failed to initialize V4L2 M2M media controller\n");
+ goto err_video;
+ }
+
+ ret = media_device_register(&dev->mdev);
+ if (ret) {
+ v4l2_err(&dev->v4l2_dev, "Failed to register media device\n");
+ goto err_m2m_mc;
+ }
+
+ platform_set_drvdata(pdev, dev);
+
+ return 0;
+
+err_m2m_mc:
+ v4l2_m2m_unregister_media_controller(dev->m2m_dev);
+err_video:
+ video_unregister_device(&dev->vfd);
+err_m2m:
+ v4l2_m2m_release(dev->m2m_dev);
+err_v4l2:
+ v4l2_device_unregister(&dev->v4l2_dev);
+
+ return ret;
+}
+
+static int rpivid_remove(struct platform_device *pdev)
+{
+ struct rpivid_dev *dev = platform_get_drvdata(pdev);
+
+ if (media_devnode_is_registered(dev->mdev.devnode)) {
+ media_device_unregister(&dev->mdev);
+ v4l2_m2m_unregister_media_controller(dev->m2m_dev);
+ media_device_cleanup(&dev->mdev);
+ }
+
+ v4l2_m2m_release(dev->m2m_dev);
+ video_unregister_device(&dev->vfd);
+ v4l2_device_unregister(&dev->v4l2_dev);
+
+ rpivid_hw_remove(dev);
+
+ return 0;
+}
+
+static const struct of_device_id rpivid_dt_match[] = {
+ {
+ .compatible = "raspberrypi,rpivid-vid-decoder",
+ },
+ { /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, rpivid_dt_match);
+
+static struct platform_driver rpivid_driver = {
+ .probe = rpivid_probe,
+ .remove = rpivid_remove,
+ .driver = {
+ .name = RPIVID_NAME,
+ .of_match_table = of_match_ptr(rpivid_dt_match),
+ },
+};
+module_platform_driver(rpivid_driver);
+
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("John Cox <jc@kynesim.co.uk>");
+MODULE_DESCRIPTION("Raspberry Pi HEVC V4L2 driver");
--- /dev/null
+++ b/drivers/staging/media/rpivid/rpivid.h
@@ -0,0 +1,181 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Raspberry Pi HEVC driver
+ *
+ * Copyright (C) 2020 Raspberry Pi (Trading) Ltd
+ *
+ * Based on the Cedrus VPU driver, that is:
+ *
+ * Copyright (C) 2016 Florent Revest <florent.revest@free-electrons.com>
+ * Copyright (C) 2018 Paul Kocialkowski <paul.kocialkowski@bootlin.com>
+ * Copyright (C) 2018 Bootlin
+ */
+
+#ifndef _RPIVID_H_
+#define _RPIVID_H_
+
+#include <linux/clk.h>
+#include <linux/platform_device.h>
+#include <media/v4l2-ctrls.h>
+#include <media/v4l2-device.h>
+#include <media/v4l2-mem2mem.h>
+#include <media/videobuf2-v4l2.h>
+#include <media/videobuf2-dma-contig.h>
+
+#define OPT_DEBUG_POLL_IRQ 0
+
+#define RPIVID_NAME "rpivid"
+
+#define RPIVID_CAPABILITY_UNTILED BIT(0)
+#define RPIVID_CAPABILITY_H265_DEC BIT(1)
+
+#define RPIVID_QUIRK_NO_DMA_OFFSET BIT(0)
+
+#define RPIVID_SRC_PIXELFORMAT_DEFAULT V4L2_PIX_FMT_HEVC_SLICE
+
+enum rpivid_irq_status {
+ RPIVID_IRQ_NONE,
+ RPIVID_IRQ_ERROR,
+ RPIVID_IRQ_OK,
+};
+
+struct rpivid_control {
+ struct v4l2_ctrl_config cfg;
+ unsigned char required:1;
+};
+
+struct rpivid_h265_run {
+ const struct v4l2_ctrl_hevc_sps *sps;
+ const struct v4l2_ctrl_hevc_pps *pps;
+ const struct v4l2_ctrl_hevc_slice_params *slice_params;
+ const struct v4l2_ctrl_hevc_scaling_matrix *scaling_matrix;
+};
+
+struct rpivid_run {
+ struct vb2_v4l2_buffer *src;
+ struct vb2_v4l2_buffer *dst;
+
+ struct rpivid_h265_run h265;
+};
+
+struct rpivid_buffer {
+ struct v4l2_m2m_buffer m2m_buf;
+};
+
+struct rpivid_dec_state;
+struct rpivid_dec_env;
+#define RPIVID_DEC_ENV_COUNT 3
+
+struct rpivid_gptr {
+ size_t size;
+ __u8 *ptr;
+ dma_addr_t addr;
+ unsigned long attrs;
+};
+
+struct rpivid_dev;
+typedef void (*rpivid_irq_callback)(struct rpivid_dev *dev, void *ctx);
+
+struct rpivid_q_aux;
+#define RPIVID_AUX_ENT_COUNT VB2_MAX_FRAME
+
+#define RPIVID_P2BUF_COUNT 2
+
+struct rpivid_ctx {
+ struct v4l2_fh fh;
+ struct rpivid_dev *dev;
+
+ struct v4l2_pix_format src_fmt;
+ struct v4l2_pix_format dst_fmt;
+ int dst_fmt_set;
+
+ struct v4l2_ctrl_handler hdl;
+ struct v4l2_ctrl **ctrls;
+
+ /* Decode state - stateless decoder my *** */
+ /* state contains stuff that is only needed in phase0
+ * it could be held in dec_env but that would be wasteful
+ */
+ struct rpivid_dec_state *state;
+ struct rpivid_dec_env *dec0;
+
+ /* Spinlock protecting dec_free */
+ spinlock_t dec_lock;
+ struct rpivid_dec_env *dec_free;
+
+ struct rpivid_dec_env *dec_pool;
+
+ /* Some of these should be in dev */
+ struct rpivid_gptr bitbufs[1]; /* Will be 2 */
+ struct rpivid_gptr cmdbufs[1]; /* Will be 2 */
+ unsigned int p2idx;
+ atomic_t p2out;
+ struct rpivid_gptr pu_bufs[RPIVID_P2BUF_COUNT];
+ struct rpivid_gptr coeff_bufs[RPIVID_P2BUF_COUNT];
+
+ /* Spinlock protecting aux_free */
+ spinlock_t aux_lock;
+ struct rpivid_q_aux *aux_free;
+
+ struct rpivid_q_aux *aux_ents[RPIVID_AUX_ENT_COUNT];
+
+ unsigned int colmv_stride;
+ unsigned int colmv_picsize;
+};
+
+struct rpivid_dec_ops {
+ void (*setup)(struct rpivid_ctx *ctx, struct rpivid_run *run);
+ int (*start)(struct rpivid_ctx *ctx);
+ void (*stop)(struct rpivid_ctx *ctx);
+ void (*trigger)(struct rpivid_ctx *ctx);
+};
+
+struct rpivid_variant {
+ unsigned int capabilities;
+ unsigned int quirks;
+ unsigned int mod_rate;
+};
+
+struct rpivid_hw_irq_ent;
+
+struct rpivid_hw_irq_ctrl {
+ /* Spinlock protecting claim and tail */
+ spinlock_t lock;
+ struct rpivid_hw_irq_ent *claim;
+ struct rpivid_hw_irq_ent *tail;
+
+ /* Ent for pending irq - also prevents sched */
+ struct rpivid_hw_irq_ent *irq;
+ /* Non-zero => do not start a new job - outer layer sched pending */
+ int no_sched;
+ /* Thread CB requested */
+ bool thread_reqed;
+};
+
+struct rpivid_dev {
+ struct v4l2_device v4l2_dev;
+ struct video_device vfd;
+ struct media_device mdev;
+ struct media_pad pad[2];
+ struct platform_device *pdev;
+ struct device *dev;
+ struct v4l2_m2m_dev *m2m_dev;
+ struct rpivid_dec_ops *dec_ops;
+
+ /* Device file mutex */
+ struct mutex dev_mutex;
+
+ void __iomem *base_irq;
+ void __iomem *base_h265;
+
+ struct clk *clock;
+
+ struct rpivid_hw_irq_ctrl ic_active1;
+ struct rpivid_hw_irq_ctrl ic_active2;
+};
+
+extern struct rpivid_dec_ops rpivid_dec_ops_h265;
+
+void *rpivid_find_control_data(struct rpivid_ctx *ctx, u32 id);
+
+#endif
--- /dev/null
+++ b/drivers/staging/media/rpivid/rpivid_dec.c
@@ -0,0 +1,79 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Raspberry Pi HEVC driver
+ *
+ * Copyright (C) 2020 Raspberry Pi (Trading) Ltd
+ *
+ * Based on the Cedrus VPU driver, that is:
+ *
+ * Copyright (C) 2016 Florent Revest <florent.revest@free-electrons.com>
+ * Copyright (C) 2018 Paul Kocialkowski <paul.kocialkowski@bootlin.com>
+ * Copyright (C) 2018 Bootlin
+ */
+
+#include <media/v4l2-device.h>
+#include <media/v4l2-ioctl.h>
+#include <media/v4l2-event.h>
+#include <media/v4l2-mem2mem.h>
+
+#include "rpivid.h"
+#include "rpivid_dec.h"
+
+void rpivid_device_run(void *priv)
+{
+ struct rpivid_ctx *ctx = priv;
+ struct rpivid_dev *dev = ctx->dev;
+ struct rpivid_run run = {};
+ struct media_request *src_req;
+
+ run.src = v4l2_m2m_next_src_buf(ctx->fh.m2m_ctx);
+ run.dst = v4l2_m2m_next_dst_buf(ctx->fh.m2m_ctx);
+
+ if (!run.src || !run.dst) {
+ v4l2_err(&dev->v4l2_dev, "%s: Missing buffer: src=%p, dst=%p\n",
+ __func__, run.src, run.dst);
+ /* We are stuffed - this probably won't dig us out of our
+ * current situation but it is better than nothing
+ */
+ v4l2_m2m_buf_done_and_job_finish(dev->m2m_dev, ctx->fh.m2m_ctx,
+ VB2_BUF_STATE_ERROR);
+ return;
+ }
+
+ /* Apply request(s) controls if needed. */
+ src_req = run.src->vb2_buf.req_obj.req;
+
+ if (src_req)
+ v4l2_ctrl_request_setup(src_req, &ctx->hdl);
+
+ switch (ctx->src_fmt.pixelformat) {
+ case V4L2_PIX_FMT_HEVC_SLICE:
+ run.h265.sps =
+ rpivid_find_control_data(ctx,
+ V4L2_CID_MPEG_VIDEO_HEVC_SPS);
+ run.h265.pps =
+ rpivid_find_control_data(ctx,
+ V4L2_CID_MPEG_VIDEO_HEVC_PPS);
+ run.h265.slice_params =
+ rpivid_find_control_data(ctx,
+ V4L2_CID_MPEG_VIDEO_HEVC_SLICE_PARAMS);
+ run.h265.scaling_matrix =
+ rpivid_find_control_data(ctx,
+ V4L2_CID_MPEG_VIDEO_HEVC_SCALING_MATRIX);
+ break;
+
+ default:
+ break;
+ }
+
+ v4l2_m2m_buf_copy_metadata(run.src, run.dst, true);
+
+ dev->dec_ops->setup(ctx, &run);
+
+ /* Complete request(s) controls if needed. */
+
+ if (src_req)
+ v4l2_ctrl_request_complete(src_req, &ctx->hdl);
+
+ dev->dec_ops->trigger(ctx);
+}
--- /dev/null
+++ b/drivers/staging/media/rpivid/rpivid_dec.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Raspberry Pi HEVC driver
+ *
+ * Copyright (C) 2020 Raspberry Pi (Trading) Ltd
+ *
+ * Based on the Cedrus VPU driver, that is:
+ *
+ * Copyright (C) 2016 Florent Revest <florent.revest@free-electrons.com>
+ * Copyright (C) 2018 Paul Kocialkowski <paul.kocialkowski@bootlin.com>
+ * Copyright (C) 2018 Bootlin
+ */
+
+#ifndef _RPIVID_DEC_H_
+#define _RPIVID_DEC_H_
+
+void rpivid_device_run(void *priv);
+
+#endif
--- /dev/null
+++ b/drivers/staging/media/rpivid/rpivid_h265.c
@@ -0,0 +1,2275 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Raspberry Pi HEVC driver
+ *
+ * Copyright (C) 2020 Raspberry Pi (Trading) Ltd
+ *
+ * Based on the Cedrus VPU driver, that is:
+ *
+ * Copyright (C) 2016 Florent Revest <florent.revest@free-electrons.com>
+ * Copyright (C) 2018 Paul Kocialkowski <paul.kocialkowski@bootlin.com>
+ * Copyright (C) 2018 Bootlin
+ */
+
+#include <linux/delay.h>
+#include <linux/types.h>
+
+#include <media/videobuf2-dma-contig.h>
+
+#include "rpivid.h"
+#include "rpivid_hw.h"
+
+#define DEBUG_TRACE_P1_CMD 0
+#define DEBUG_TRACE_EXECUTION 0
+
+#if DEBUG_TRACE_EXECUTION
+#define xtrace_in(dev_, de_)\
+ v4l2_info(&(dev_)->v4l2_dev, "%s[%d]: in\n", __func__,\
+ (de_) == NULL ? -1 : (de_)->decode_order)
+#define xtrace_ok(dev_, de_)\
+ v4l2_info(&(dev_)->v4l2_dev, "%s[%d]: ok\n", __func__,\
+ (de_) == NULL ? -1 : (de_)->decode_order)
+#define xtrace_fin(dev_, de_)\
+ v4l2_info(&(dev_)->v4l2_dev, "%s[%d]: finish\n", __func__,\
+ (de_) == NULL ? -1 : (de_)->decode_order)
+#define xtrace_fail(dev_, de_)\
+ v4l2_info(&(dev_)->v4l2_dev, "%s[%d]: FAIL\n", __func__,\
+ (de_) == NULL ? -1 : (de_)->decode_order)
+#else
+#define xtrace_in(dev_, de_)
+#define xtrace_ok(dev_, de_)
+#define xtrace_fin(dev_, de_)
+#define xtrace_fail(dev_, de_)
+#endif
+
+enum hevc_slice_type {
+ HEVC_SLICE_B = 0,
+ HEVC_SLICE_P = 1,
+ HEVC_SLICE_I = 2,
+};
+
+enum hevc_layer { L0 = 0, L1 = 1 };
+
+static int gptr_alloc(struct rpivid_dev *const dev, struct rpivid_gptr *gptr,
+ size_t size, unsigned long attrs)
+{
+ gptr->size = size;
+ gptr->attrs = attrs;
+ gptr->addr = 0;
+ gptr->ptr = dma_alloc_attrs(dev->dev, gptr->size, &gptr->addr,
+ GFP_KERNEL, gptr->attrs);
+ return !gptr->ptr ? -ENOMEM : 0;
+}
+
+static void gptr_free(struct rpivid_dev *const dev,
+ struct rpivid_gptr *const gptr)
+{
+ if (gptr->ptr)
+ dma_free_attrs(dev->dev, gptr->size, gptr->ptr, gptr->addr,
+ gptr->attrs);
+ gptr->size = 0;
+ gptr->ptr = NULL;
+ gptr->addr = 0;
+ gptr->attrs = 0;
+}
+
+/* Realloc but do not copy */
+static int gptr_realloc_new(struct rpivid_dev * const dev,
+ struct rpivid_gptr * const gptr, size_t size)
+{
+ if (size == gptr->size)
+ return 0;
+
+ if (gptr->ptr)
+ dma_free_attrs(dev->dev, gptr->size, gptr->ptr,
+ gptr->addr, gptr->attrs);
+
+ gptr->addr = 0;
+ gptr->size = size;
+ gptr->ptr = dma_alloc_attrs(dev->dev, gptr->size,
+ &gptr->addr, GFP_KERNEL, gptr->attrs);
+ return gptr->ptr ? 0 : -ENOMEM;
+}
+
+/* floor(log2(x)) */
+static unsigned int log2_size(size_t x)
+{
+ unsigned int n = 0;
+
+ if (x & ~0xffff) {
+ n += 16;
+ x >>= 16;
+ }
+ if (x & ~0xff) {
+ n += 8;
+ x >>= 8;
+ }
+ if (x & ~0xf) {
+ n += 4;
+ x >>= 4;
+ }
+ if (x & ~3) {
+ n += 2;
+ x >>= 2;
+ }
+ return (x & ~1) ? n + 1 : n;
+}
+
+static size_t round_up_size(const size_t x)
+{
+ /* Admit no size < 256 */
+ const unsigned int n = x < 256 ? 8 : log2_size(x) - 1;
+
+ return x >= (3 << n) ? 4 << n : (3 << n);
+}
+
+static size_t next_size(const size_t x)
+{
+ return round_up_size(x + 1);
+}
+
+#define NUM_SCALING_FACTORS 4064 /* Not a typo = 0xbe0 + 0x400 */
+
+#define AXI_BASE64 0
+
+#define PROB_BACKUP ((20 << 12) + (20 << 6) + (0 << 0))
+#define PROB_RELOAD ((20 << 12) + (20 << 0) + (0 << 6))
+
+#define HEVC_MAX_REFS V4L2_HEVC_DPB_ENTRIES_NUM_MAX
+
+//////////////////////////////////////////////////////////////////////////////
+
+struct rpi_cmd {
+ u32 addr;
+ u32 data;
+} __packed;
+
+struct rpivid_q_aux {
+ unsigned int refcount;
+ unsigned int q_index;
+ struct rpivid_q_aux *next;
+ struct rpivid_gptr col;
+};
+
+//////////////////////////////////////////////////////////////////////////////
+
+enum rpivid_decode_state {
+ RPIVID_DECODE_SLICE_START,
+ RPIVID_DECODE_SLICE_CONTINUE,
+ RPIVID_DECODE_ERROR_CONTINUE,
+ RPIVID_DECODE_ERROR_DONE,
+ RPIVID_DECODE_PHASE1,
+ RPIVID_DECODE_END,
+};
+
+struct rpivid_dec_env {
+ struct rpivid_ctx *ctx;
+ struct rpivid_dec_env *next;
+
+ enum rpivid_decode_state state;
+ unsigned int decode_order;
+ int p1_status; /* P1 status - what to realloc */
+
+ struct rpivid_dec_env *phase_wait_q_next;
+
+ struct rpi_cmd *cmd_fifo;
+ unsigned int cmd_len, cmd_max;
+ unsigned int num_slice_msgs;
+ unsigned int pic_width_in_ctbs_y;
+ unsigned int pic_height_in_ctbs_y;
+ unsigned int dpbno_col;
+ u32 reg_slicestart;
+ int collocated_from_l0_flag;
+ unsigned int wpp_entry_x;
+ unsigned int wpp_entry_y;
+
+ u32 rpi_config2;
+ u32 rpi_framesize;
+ u32 rpi_currpoc;
+
+ struct vb2_v4l2_buffer *frame_buf; // Detached dest buffer
+ unsigned int frame_c_offset;
+ unsigned int frame_stride;
+ dma_addr_t frame_addr;
+ dma_addr_t ref_addrs[16];
+ struct rpivid_q_aux *frame_aux;
+ struct rpivid_q_aux *col_aux;
+
+ dma_addr_t pu_base_vc;
+ dma_addr_t coeff_base_vc;
+ u32 pu_stride;
+ u32 coeff_stride;
+
+ struct rpivid_gptr *bit_copy_gptr;
+ size_t bit_copy_len;
+ struct rpivid_gptr *cmd_copy_gptr;
+
+ u16 slice_msgs[2 * HEVC_MAX_REFS * 8 + 3];
+ u8 scaling_factors[NUM_SCALING_FACTORS];
+
+ struct rpivid_hw_irq_ent irq_ent;
+};
+
+#define member_size(type, member) sizeof(((type *)0)->member)
+
+struct rpivid_dec_state {
+ struct v4l2_ctrl_hevc_sps sps;
+ struct v4l2_ctrl_hevc_pps pps;
+
+ // Helper vars & tables derived from sps/pps
+ unsigned int log2_ctb_size; /* log2 width of a CTB */
+ unsigned int ctb_width; /* Width in CTBs */
+ unsigned int ctb_height; /* Height in CTBs */
+ unsigned int ctb_size; /* Pic area in CTBs */
+ unsigned int num_tile_columns;
+ unsigned int num_tile_rows;
+ u8 column_width[member_size(struct v4l2_ctrl_hevc_pps,
+ column_width_minus1)];
+ u8 row_height[member_size(struct v4l2_ctrl_hevc_pps,
+ row_height_minus1)];
+
+ int *col_bd;
+ int *row_bd;
+ int *ctb_addr_rs_to_ts;
+ int *ctb_addr_ts_to_rs;
+ int *tile_id;
+
+ // Aux starage for DPB
+ // Hold refs
+ struct rpivid_q_aux *ref_aux[HEVC_MAX_REFS];
+ struct rpivid_q_aux *frame_aux;
+
+ // Slice vars
+ unsigned int slice_idx;
+ bool frame_end;
+ bool slice_temporal_mvp; /* Slice flag but constant for frame */
+
+ // Temp vars per run - don't actually need to persist
+ u8 *src_buf;
+ dma_addr_t src_addr;
+ const struct v4l2_ctrl_hevc_slice_params *sh;
+ unsigned int nb_refs[2];
+ unsigned int slice_qp;
+ unsigned int max_num_merge_cand; // 0 if I-slice
+ bool dependent_slice_segment_flag;
+};
+
+static inline int clip_int(const int x, const int lo, const int hi)
+{
+ return x < lo ? lo : x > hi ? hi : x;
+}
+
+//////////////////////////////////////////////////////////////////////////////
+// Phase 1 command and bit FIFOs
+
+#if DEBUG_TRACE_P1_CMD
+static int p1_z;
+#endif
+
+// ???? u16 addr - put in u32
+static int p1_apb_write(struct rpivid_dec_env *const de, const u16 addr,
+ const u32 data)
+{
+ if (de->cmd_len == de->cmd_max)
+ de->cmd_fifo =
+ krealloc(de->cmd_fifo,
+ (de->cmd_max *= 2) * sizeof(struct rpi_cmd),
+ GFP_KERNEL);
+ de->cmd_fifo[de->cmd_len].addr = addr;
+ de->cmd_fifo[de->cmd_len].data = data;
+
+#if DEBUG_TRACE_P1_CMD
+ if (++p1_z < 256) {
+ v4l2_info(&de->ctx->dev->v4l2_dev, "[%02x] %x %x\n",
+ de->cmd_len, addr, data);
+ }
+#endif
+
+ return de->cmd_len++;
+}
+
+static int ctb_to_tile(unsigned int ctb, unsigned int *bd, int num)
+{
+ int i;
+
+ for (i = 1; ctb >= bd[i]; i++)
+ ; // bd[] has num+1 elements; bd[0]=0;
+ return i - 1;
+}
+
+static int ctb_to_slice_w_h(unsigned int ctb, int ctb_size, int width,
+ unsigned int *bd, int num)
+{
+ if (ctb < bd[num - 1])
+ return ctb_size;
+ else if (width % ctb_size)
+ return width % ctb_size;
+ else
+ return ctb_size;
+}
+
+static void aux_q_free(struct rpivid_ctx *const ctx,
+ struct rpivid_q_aux *const aq)
+{
+ struct rpivid_dev *const dev = ctx->dev;
+
+ gptr_free(dev, &aq->col);
+ kfree(aq);
+}
+
+static struct rpivid_q_aux *aux_q_alloc(struct rpivid_ctx *const ctx)
+{
+ struct rpivid_dev *const dev = ctx->dev;
+ struct rpivid_q_aux *const aq = kzalloc(sizeof(*aq), GFP_KERNEL);
+
+ if (!aq)
+ return NULL;
+
+ aq->refcount = 1;
+ if (gptr_alloc(dev, &aq->col, ctx->colmv_picsize,
+ DMA_ATTR_FORCE_CONTIGUOUS | DMA_ATTR_NO_KERNEL_MAPPING))
+ goto fail;
+
+ return aq;
+
+fail:
+ kfree(aq);
+ return NULL;
+}
+
+static struct rpivid_q_aux *aux_q_new(struct rpivid_ctx *const ctx,
+ const unsigned int q_index)
+{
+ struct rpivid_q_aux *aq;
+ unsigned long lockflags;
+
+ spin_lock_irqsave(&ctx->aux_lock, lockflags);
+ aq = ctx->aux_free;
+ if (aq) {
+ ctx->aux_free = aq->next;
+ aq->next = NULL;
+ aq->refcount = 1;
+ }
+ spin_unlock_irqrestore(&ctx->aux_lock, lockflags);
+
+ if (!aq) {
+ aq = aux_q_alloc(ctx);
+ if (!aq)
+ return NULL;
+ }
+
+ aq->q_index = q_index;
+ ctx->aux_ents[q_index] = aq;
+ return aq;
+}
+
+static struct rpivid_q_aux *aux_q_ref(struct rpivid_ctx *const ctx,
+ struct rpivid_q_aux *const aq)
+{
+ if (aq) {
+ unsigned long lockflags;
+
+ spin_lock_irqsave(&ctx->aux_lock, lockflags);
+
+ ++aq->refcount;
+
+ spin_unlock_irqrestore(&ctx->aux_lock, lockflags);
+ }
+ return aq;
+}
+
+static void aux_q_release(struct rpivid_ctx *const ctx,
+ struct rpivid_q_aux **const paq)
+{
+ struct rpivid_q_aux *const aq = *paq;
+ *paq = NULL;
+
+ if (aq) {
+ unsigned long lockflags;
+
+ spin_lock_irqsave(&ctx->aux_lock, lockflags);
+
+ if (--aq->refcount == 0) {
+ aq->next = ctx->aux_free;
+ ctx->aux_free = aq;
+ ctx->aux_ents[aq->q_index] = NULL;
+ }
+
+ spin_unlock_irqrestore(&ctx->aux_lock, lockflags);
+ }
+}
+
+static void aux_q_init(struct rpivid_ctx *const ctx)
+{
+ spin_lock_init(&ctx->aux_lock);
+ ctx->aux_free = NULL;
+}
+
+static void aux_q_uninit(struct rpivid_ctx *const ctx)
+{
+ struct rpivid_q_aux *aq;
+
+ ctx->colmv_picsize = 0;
+ ctx->colmv_stride = 0;
+ while ((aq = ctx->aux_free) != NULL) {
+ ctx->aux_free = aq->next;
+ aux_q_free(ctx, aq);
+ }
+}
+
+//////////////////////////////////////////////////////////////////////////////
+
+/*
+ * Initialisation process for context variables (CABAC init)
+ * see H.265 9.3.2.2
+ *
+ * N.B. If comparing with FFmpeg note that this h/w uses slightly different
+ * offsets to FFmpegs array
+ */
+
+/* Actual number of values */
+#define RPI_PROB_VALS 154U
+/* Rounded up as we copy words */
+#define RPI_PROB_ARRAY_SIZE ((154 + 3) & ~3)
+
+/* Initialiser values - see tables H.265 9-4 through 9-42 */
+static const u8 prob_init[3][156] = {
+ {
+ 153, 200, 139, 141, 157, 154, 154, 154, 154, 154, 184, 154, 154,
+ 154, 184, 63, 154, 154, 154, 154, 154, 154, 154, 154, 154, 154,
+ 154, 154, 154, 153, 138, 138, 111, 141, 94, 138, 182, 154, 154,
+ 154, 140, 92, 137, 138, 140, 152, 138, 139, 153, 74, 149, 92,
+ 139, 107, 122, 152, 140, 179, 166, 182, 140, 227, 122, 197, 110,
+ 110, 124, 125, 140, 153, 125, 127, 140, 109, 111, 143, 127, 111,
+ 79, 108, 123, 63, 110, 110, 124, 125, 140, 153, 125, 127, 140,
+ 109, 111, 143, 127, 111, 79, 108, 123, 63, 91, 171, 134, 141,
+ 138, 153, 136, 167, 152, 152, 139, 139, 111, 111, 125, 110, 110,
+ 94, 124, 108, 124, 107, 125, 141, 179, 153, 125, 107, 125, 141,
+ 179, 153, 125, 107, 125, 141, 179, 153, 125, 140, 139, 182, 182,
+ 152, 136, 152, 136, 153, 136, 139, 111, 136, 139, 111, 0, 0,
+ },
+ {
+ 153, 185, 107, 139, 126, 197, 185, 201, 154, 149, 154, 139, 154,
+ 154, 154, 152, 110, 122, 95, 79, 63, 31, 31, 153, 153, 168,
+ 140, 198, 79, 124, 138, 94, 153, 111, 149, 107, 167, 154, 154,
+ 154, 154, 196, 196, 167, 154, 152, 167, 182, 182, 134, 149, 136,
+ 153, 121, 136, 137, 169, 194, 166, 167, 154, 167, 137, 182, 125,
+ 110, 94, 110, 95, 79, 125, 111, 110, 78, 110, 111, 111, 95,
+ 94, 108, 123, 108, 125, 110, 94, 110, 95, 79, 125, 111, 110,
+ 78, 110, 111, 111, 95, 94, 108, 123, 108, 121, 140, 61, 154,
+ 107, 167, 91, 122, 107, 167, 139, 139, 155, 154, 139, 153, 139,
+ 123, 123, 63, 153, 166, 183, 140, 136, 153, 154, 166, 183, 140,
+ 136, 153, 154, 166, 183, 140, 136, 153, 154, 170, 153, 123, 123,
+ 107, 121, 107, 121, 167, 151, 183, 140, 151, 183, 140, 0, 0,
+ },
+ {
+ 153, 160, 107, 139, 126, 197, 185, 201, 154, 134, 154, 139, 154,
+ 154, 183, 152, 154, 137, 95, 79, 63, 31, 31, 153, 153, 168,
+ 169, 198, 79, 224, 167, 122, 153, 111, 149, 92, 167, 154, 154,
+ 154, 154, 196, 167, 167, 154, 152, 167, 182, 182, 134, 149, 136,
+ 153, 121, 136, 122, 169, 208, 166, 167, 154, 152, 167, 182, 125,
+ 110, 124, 110, 95, 94, 125, 111, 111, 79, 125, 126, 111, 111,
+ 79, 108, 123, 93, 125, 110, 124, 110, 95, 94, 125, 111, 111,
+ 79, 125, 126, 111, 111, 79, 108, 123, 93, 121, 140, 61, 154,
+ 107, 167, 91, 107, 107, 167, 139, 139, 170, 154, 139, 153, 139,
+ 123, 123, 63, 124, 166, 183, 140, 136, 153, 154, 166, 183, 140,
+ 136, 153, 154, 166, 183, 140, 136, 153, 154, 170, 153, 138, 138,
+ 122, 121, 122, 121, 167, 151, 183, 140, 151, 183, 140, 0, 0,
+ },
+};
+
+static void write_prob(struct rpivid_dec_env *const de,
+ const struct rpivid_dec_state *const s)
+{
+ u8 dst[RPI_PROB_ARRAY_SIZE];
+
+ const unsigned int init_type =
+ ((s->sh->flags & V4L2_HEVC_SLICE_PARAMS_FLAG_CABAC_INIT) != 0 &&
+ s->sh->slice_type != HEVC_SLICE_I) ?
+ s->sh->slice_type + 1 :
+ 2 - s->sh->slice_type;
+ const u8 *p = prob_init[init_type];
+ const int q = clip_int(s->slice_qp, 0, 51);
+ unsigned int i;
+
+ for (i = 0; i < RPI_PROB_VALS; i++) {
+ int init_value = p[i];
+ int m = (init_value >> 4) * 5 - 45;
+ int n = ((init_value & 15) << 3) - 16;
+ int pre = 2 * (((m * q) >> 4) + n) - 127;
+
+ pre ^= pre >> 31;
+ if (pre > 124)
+ pre = 124 + (pre & 1);
+ dst[i] = pre;
+ }
+ for (i = RPI_PROB_VALS; i != RPI_PROB_ARRAY_SIZE; ++i)
+ dst[i] = 0;
+
+ for (i = 0; i < RPI_PROB_ARRAY_SIZE; i += 4)
+ p1_apb_write(de, 0x1000 + i,
+ dst[i] + (dst[i + 1] << 8) + (dst[i + 2] << 16) +
+ (dst[i + 3] << 24));
+}
+
+static void write_scaling_factors(struct rpivid_dec_env *const de)
+{
+ int i;
+ const u8 *p = (u8 *)de->scaling_factors;
+
+ for (i = 0; i < NUM_SCALING_FACTORS; i += 4, p += 4)
+ p1_apb_write(de, 0x2000 + i,
+ p[0] + (p[1] << 8) + (p[2] << 16) + (p[3] << 24));
+}
+
+static inline __u32 dma_to_axi_addr(dma_addr_t a)
+{
+ return (__u32)(a >> 6);
+}
+
+static void write_bitstream(struct rpivid_dec_env *const de,
+ const struct rpivid_dec_state *const s)
+{
+ // Note that FFmpeg removes emulation prevention bytes, so this is
+ // matched in the configuration here.
+ // Whether that is the correct behaviour or not is not clear in the
+ // spec.
+ const int rpi_use_emu = 1;
+ unsigned int offset = s->sh->data_bit_offset / 8 + 1;
+ const unsigned int len = (s->sh->bit_size + 7) / 8 - offset;
+ dma_addr_t addr;
+
+ if (s->src_addr != 0) {
+ addr = s->src_addr + offset;
+ } else {
+ memcpy(de->bit_copy_gptr->ptr + de->bit_copy_len,
+ s->src_buf + offset, len);
+ addr = de->bit_copy_gptr->addr + de->bit_copy_len;
+ de->bit_copy_len += (len + 63) & ~63;
+ }
+ offset = addr & 63;
+
+ p1_apb_write(de, RPI_BFBASE, dma_to_axi_addr(addr));
+ p1_apb_write(de, RPI_BFNUM, len);
+ p1_apb_write(de, RPI_BFCONTROL, offset + (1 << 7)); // Stop
+ p1_apb_write(de, RPI_BFCONTROL, offset + (rpi_use_emu << 6));
+}
+
+//////////////////////////////////////////////////////////////////////////////
+
+static void write_slice(struct rpivid_dec_env *const de,
+ const struct rpivid_dec_state *const s,
+ const unsigned int slice_w,
+ const unsigned int slice_h)
+{
+ u32 u32 = (s->sh->slice_type << 12) +
+ (((s->sh->flags &
+ V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_SAO_LUMA) != 0)
+ << 14) +
+ (((s->sh->flags &
+ V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_SAO_CHROMA) != 0)
+ << 15) +
+ (slice_w << 17) + (slice_h << 24);
+
+ u32 |= (s->max_num_merge_cand << 0) + (s->nb_refs[L0] << 4) +
+ (s->nb_refs[L1] << 8);
+
+ if (s->sh->slice_type == HEVC_SLICE_B)
+ u32 |= ((s->sh->flags &
+ V4L2_HEVC_SLICE_PARAMS_FLAG_MVD_L1_ZERO) != 0)
+ << 16;
+ p1_apb_write(de, RPI_SLICE, u32);
+}
+
+//////////////////////////////////////////////////////////////////////////////
+// Tiles mode
+
+static void new_entry_point(struct rpivid_dec_env *const de,
+ const struct rpivid_dec_state *const s,
+ const int do_bte,
+ const int reset_qp_y, const int ctb_addr_ts)
+{
+ int ctb_col = s->ctb_addr_ts_to_rs[ctb_addr_ts] %
+ de->pic_width_in_ctbs_y;
+ int ctb_row = s->ctb_addr_ts_to_rs[ctb_addr_ts] /
+ de->pic_width_in_ctbs_y;
+
+ int tile_x = ctb_to_tile(ctb_col, s->col_bd, s->num_tile_columns);
+ int tile_y = ctb_to_tile(ctb_row, s->row_bd, s->num_tile_rows);
+
+ int endx = s->col_bd[tile_x + 1] - 1;
+ int endy = s->row_bd[tile_y + 1] - 1;
+
+ u8 slice_w = ctb_to_slice_w_h(ctb_col, 1 << s->log2_ctb_size,
+ s->sps.pic_width_in_luma_samples,
+ s->col_bd, s->num_tile_columns);
+ u8 slice_h = ctb_to_slice_w_h(ctb_row, 1 << s->log2_ctb_size,
+ s->sps.pic_height_in_luma_samples,
+ s->row_bd, s->num_tile_rows);
+
+ p1_apb_write(de, RPI_TILESTART,
+ s->col_bd[tile_x] + (s->row_bd[tile_y] << 16));
+ p1_apb_write(de, RPI_TILEEND, endx + (endy << 16));
+
+ if (do_bte)
+ p1_apb_write(de, RPI_BEGINTILEEND, endx + (endy << 16));
+
+ write_slice(de, s, slice_w, slice_h);
+
+ if (reset_qp_y) {
+ unsigned int sps_qp_bd_offset =
+ 6 * s->sps.bit_depth_luma_minus8;
+
+ p1_apb_write(de, RPI_QP, sps_qp_bd_offset + s->slice_qp);
+ }
+
+ p1_apb_write(de, RPI_MODE,
+ (0xFFFF << 0) + (0x0 << 16) +
+ ((tile_x == s->num_tile_columns - 1) << 17) +
+ ((tile_y == s->num_tile_rows - 1) << 18));
+
+ p1_apb_write(de, RPI_CONTROL, (ctb_col << 0) + (ctb_row << 16));
+}
+
+//////////////////////////////////////////////////////////////////////////////
+
+static void new_slice_segment(struct rpivid_dec_env *const de,
+ const struct rpivid_dec_state *const s)
+{
+ const struct v4l2_ctrl_hevc_sps *const sps = &s->sps;
+ const struct v4l2_ctrl_hevc_pps *const pps = &s->pps;
+
+ p1_apb_write(de,
+ RPI_SPS0,
+ ((sps->log2_min_luma_coding_block_size_minus3 + 3) << 0) |
+ (s->log2_ctb_size << 4) |
+ ((sps->log2_min_luma_transform_block_size_minus2 + 2)
+ << 8) |
+ ((sps->log2_min_luma_transform_block_size_minus2 + 2 +
+ sps->log2_diff_max_min_luma_transform_block_size)
+ << 12) |
+ ((sps->bit_depth_luma_minus8 + 8) << 16) |
+ ((sps->bit_depth_chroma_minus8 + 8) << 20) |
+ (sps->max_transform_hierarchy_depth_intra << 24) |
+ (sps->max_transform_hierarchy_depth_inter << 28));
+
+ p1_apb_write(de,
+ RPI_SPS1,
+ ((sps->pcm_sample_bit_depth_luma_minus1 + 1) << 0) |
+ ((sps->pcm_sample_bit_depth_chroma_minus1 + 1) << 4) |
+ ((sps->log2_min_pcm_luma_coding_block_size_minus3 + 3)
+ << 8) |
+ ((sps->log2_min_pcm_luma_coding_block_size_minus3 + 3 +
+ sps->log2_diff_max_min_pcm_luma_coding_block_size)
+ << 12) |
+ (((sps->flags & V4L2_HEVC_SPS_FLAG_SEPARATE_COLOUR_PLANE) ?
+ 0 : sps->chroma_format_idc) << 16) |
+ ((!!(sps->flags & V4L2_HEVC_SPS_FLAG_AMP_ENABLED)) << 18) |
+ ((!!(sps->flags & V4L2_HEVC_SPS_FLAG_PCM_ENABLED)) << 19) |
+ ((!!(sps->flags & V4L2_HEVC_SPS_FLAG_SCALING_LIST_ENABLED))
+ << 20) |
+ ((!!(sps->flags &
+ V4L2_HEVC_SPS_FLAG_STRONG_INTRA_SMOOTHING_ENABLED))
+ << 21));
+
+ p1_apb_write(de,
+ RPI_PPS,
+ ((s->log2_ctb_size - pps->diff_cu_qp_delta_depth) << 0) |
+ ((!!(pps->flags & V4L2_HEVC_PPS_FLAG_CU_QP_DELTA_ENABLED))
+ << 4) |
+ ((!!(pps->flags &
+ V4L2_HEVC_PPS_FLAG_TRANSQUANT_BYPASS_ENABLED))
+ << 5) |
+ ((!!(pps->flags & V4L2_HEVC_PPS_FLAG_TRANSFORM_SKIP_ENABLED))
+ << 6) |
+ ((!!(pps->flags &
+ V4L2_HEVC_PPS_FLAG_SIGN_DATA_HIDING_ENABLED))
+ << 7) |
+ (((pps->pps_cb_qp_offset + s->sh->slice_cb_qp_offset) & 255)
+ << 8) |
+ (((pps->pps_cr_qp_offset + s->sh->slice_cr_qp_offset) & 255)
+ << 16) |
+ ((!!(pps->flags &
+ V4L2_HEVC_PPS_FLAG_CONSTRAINED_INTRA_PRED))
+ << 24));
+
+ if ((sps->flags & V4L2_HEVC_SPS_FLAG_SCALING_LIST_ENABLED) != 0)
+ write_scaling_factors(de);
+
+ if (!s->dependent_slice_segment_flag) {
+ int ctb_col = s->sh->slice_segment_addr %
+ de->pic_width_in_ctbs_y;
+ int ctb_row = s->sh->slice_segment_addr /
+ de->pic_width_in_ctbs_y;
+
+ de->reg_slicestart = (ctb_col << 0) + (ctb_row << 16);
+ }
+
+ p1_apb_write(de, RPI_SLICESTART, de->reg_slicestart);
+}
+
+//////////////////////////////////////////////////////////////////////////////
+// Slice messages
+
+static void msg_slice(struct rpivid_dec_env *const de, const u16 msg)
+{
+ de->slice_msgs[de->num_slice_msgs++] = msg;
+}
+
+static void program_slicecmds(struct rpivid_dec_env *const de,
+ const int sliceid)
+{
+ int i;
+
+ p1_apb_write(de, RPI_SLICECMDS, de->num_slice_msgs + (sliceid << 8));
+
+ for (i = 0; i < de->num_slice_msgs; i++)
+ p1_apb_write(de, 0x4000 + 4 * i, de->slice_msgs[i] & 0xffff);
+}
+
+// NoBackwardPredictionFlag 8.3.5
+// Simply checks POCs
+static int has_backward(const struct v4l2_hevc_dpb_entry *const dpb,
+ const __u8 *const idx, const unsigned int n,
+ const unsigned int cur_poc)
+{
+ unsigned int i;
+
+ for (i = 0; i < n; ++i) {
+ // Compare mod 2^16
+ // We only get u16 pocs & 8.3.1 says
+ // "The bitstream shall not contain data that result in values
+ // of DiffPicOrderCnt( picA, picB ) used in the decoding
+ // process that are not in the range of 2^15 to 2^15 1,
+ // inclusive."
+ if (((cur_poc - dpb[idx[i]].pic_order_cnt[0]) & 0x8000) != 0)
+ return 0;
+ }
+ return 1;
+}
+
+static void pre_slice_decode(struct rpivid_dec_env *const de,
+ const struct rpivid_dec_state *const s)
+{
+ const struct v4l2_ctrl_hevc_slice_params *const sh = s->sh;
+ int weighted_pred_flag, idx;
+ u16 cmd_slice;
+ unsigned int collocated_from_l0_flag;
+
+ de->num_slice_msgs = 0;
+
+ cmd_slice = 0;
+ if (sh->slice_type == HEVC_SLICE_I)
+ cmd_slice = 1;
+ if (sh->slice_type == HEVC_SLICE_P)
+ cmd_slice = 2;
+ if (sh->slice_type == HEVC_SLICE_B)
+ cmd_slice = 3;
+
+ cmd_slice |= (s->nb_refs[L0] << 2) | (s->nb_refs[L1] << 6) |
+ (s->max_num_merge_cand << 11);
+
+ collocated_from_l0_flag =
+ !s->slice_temporal_mvp ||
+ sh->slice_type != HEVC_SLICE_B ||
+ (sh->flags & V4L2_HEVC_SLICE_PARAMS_FLAG_COLLOCATED_FROM_L0);
+ cmd_slice |= collocated_from_l0_flag << 14;
+
+ if (sh->slice_type == HEVC_SLICE_P || sh->slice_type == HEVC_SLICE_B) {
+ // Flag to say all reference pictures are from the past
+ const int no_backward_pred_flag =
+ has_backward(sh->dpb, sh->ref_idx_l0, s->nb_refs[L0],
+ sh->slice_pic_order_cnt) &&
+ has_backward(sh->dpb, sh->ref_idx_l1, s->nb_refs[L1],
+ sh->slice_pic_order_cnt);
+ cmd_slice |= no_backward_pred_flag << 10;
+ msg_slice(de, cmd_slice);
+
+ if (s->slice_temporal_mvp) {
+ const __u8 *const rpl = collocated_from_l0_flag ?
+ sh->ref_idx_l0 : sh->ref_idx_l1;
+ de->dpbno_col = rpl[sh->collocated_ref_idx];
+ //v4l2_info(&de->ctx->dev->v4l2_dev,
+ // "L0=%d col_ref_idx=%d,
+ // dpb_no=%d\n", collocated_from_l0_flag,
+ // sh->collocated_ref_idx, de->dpbno_col);
+ }
+
+ // Write reference picture descriptions
+ weighted_pred_flag =
+ sh->slice_type == HEVC_SLICE_P ?
+ !!(s->pps.flags & V4L2_HEVC_PPS_FLAG_WEIGHTED_PRED) :
+ !!(s->pps.flags & V4L2_HEVC_PPS_FLAG_WEIGHTED_BIPRED);
+
+ for (idx = 0; idx < s->nb_refs[L0]; ++idx) {
+ unsigned int dpb_no = sh->ref_idx_l0[idx];
+ //v4l2_info(&de->ctx->dev->v4l2_dev,
+ // "L0[%d]=dpb[%d]\n", idx, dpb_no);
+
+ msg_slice(de,
+ dpb_no |
+ (sh->dpb[dpb_no].rps ==
+ V4L2_HEVC_DPB_ENTRY_RPS_LT_CURR ?
+ (1 << 4) : 0) |
+ (weighted_pred_flag ? (3 << 5) : 0));
+ msg_slice(de, sh->dpb[dpb_no].pic_order_cnt[0]);
+
+ if (weighted_pred_flag) {
+ const struct v4l2_hevc_pred_weight_table
+ *const w = &sh->pred_weight_table;
+ const int luma_weight_denom =
+ (1 << w->luma_log2_weight_denom);
+ const unsigned int chroma_log2_weight_denom =
+ (w->luma_log2_weight_denom +
+ w->delta_chroma_log2_weight_denom);
+ const int chroma_weight_denom =
+ (1 << chroma_log2_weight_denom);
+
+ msg_slice(de,
+ w->luma_log2_weight_denom |
+ (((w->delta_luma_weight_l0[idx] +
+ luma_weight_denom) & 0x1ff)
+ << 3));
+ msg_slice(de, w->luma_offset_l0[idx] & 0xff);
+ msg_slice(de,
+ chroma_log2_weight_denom |
+ (((w->delta_chroma_weight_l0[idx][0] +
+ chroma_weight_denom) & 0x1ff)
+ << 3));
+ msg_slice(de,
+ w->chroma_offset_l0[idx][0] & 0xff);
+ msg_slice(de,
+ chroma_log2_weight_denom |
+ (((w->delta_chroma_weight_l0[idx][1] +
+ chroma_weight_denom) & 0x1ff)
+ << 3));
+ msg_slice(de,
+ w->chroma_offset_l0[idx][1] & 0xff);
+ }
+ }
+
+ for (idx = 0; idx < s->nb_refs[L1]; ++idx) {
+ unsigned int dpb_no = sh->ref_idx_l1[idx];
+ //v4l2_info(&de->ctx->dev->v4l2_dev,
+ // "L1[%d]=dpb[%d]\n", idx, dpb_no);
+ msg_slice(de,
+ dpb_no |
+ (sh->dpb[dpb_no].rps ==
+ V4L2_HEVC_DPB_ENTRY_RPS_LT_CURR ?
+ (1 << 4) : 0) |
+ (weighted_pred_flag ? (3 << 5) : 0));
+ msg_slice(de, sh->dpb[dpb_no].pic_order_cnt[0]);
+ if (weighted_pred_flag) {
+ const struct v4l2_hevc_pred_weight_table
+ *const w = &sh->pred_weight_table;
+ const int luma_weight_denom =
+ (1 << w->luma_log2_weight_denom);
+ const unsigned int chroma_log2_weight_denom =
+ (w->luma_log2_weight_denom +
+ w->delta_chroma_log2_weight_denom);
+ const int chroma_weight_denom =
+ (1 << chroma_log2_weight_denom);
+
+ msg_slice(de,
+ w->luma_log2_weight_denom |
+ (((w->delta_luma_weight_l1[idx] +
+ luma_weight_denom) & 0x1ff) << 3));
+ msg_slice(de, w->luma_offset_l1[idx] & 0xff);
+ msg_slice(de,
+ chroma_log2_weight_denom |
+ (((w->delta_chroma_weight_l1[idx][0] +
+ chroma_weight_denom) & 0x1ff)
+ << 3));
+ msg_slice(de,
+ w->chroma_offset_l1[idx][0] & 0xff);
+ msg_slice(de,
+ chroma_log2_weight_denom |
+ (((w->delta_chroma_weight_l1[idx][1] +
+ chroma_weight_denom) & 0x1ff)
+ << 3));
+ msg_slice(de,
+ w->chroma_offset_l1[idx][1] & 0xff);
+ }
+ }
+ } else {
+ msg_slice(de, cmd_slice);
+ }
+
+ msg_slice(de,
+ (sh->slice_beta_offset_div2 & 15) |
+ ((sh->slice_tc_offset_div2 & 15) << 4) |
+ ((sh->flags &
+ V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_DEBLOCKING_FILTER_DISABLED) ?
+ 1 << 8 : 0) |
+ ((sh->flags &
+ V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_LOOP_FILTER_ACROSS_SLICES_ENABLED) ?
+ 1 << 9 : 0) |
+ ((s->pps.flags &
+ V4L2_HEVC_PPS_FLAG_LOOP_FILTER_ACROSS_TILES_ENABLED) ?
+ 1 << 10 : 0));
+
+ msg_slice(de, ((sh->slice_cr_qp_offset & 31) << 5) +
+ (sh->slice_cb_qp_offset & 31)); // CMD_QPOFF
+}
+
+//////////////////////////////////////////////////////////////////////////////
+// Write STATUS register with expected end CTU address of previous slice
+
+static void end_previous_slice(struct rpivid_dec_env *const de,
+ const struct rpivid_dec_state *const s,
+ const int ctb_addr_ts)
+{
+ int last_x =
+ s->ctb_addr_ts_to_rs[ctb_addr_ts - 1] % de->pic_width_in_ctbs_y;
+ int last_y =
+ s->ctb_addr_ts_to_rs[ctb_addr_ts - 1] / de->pic_width_in_ctbs_y;
+
+ p1_apb_write(de, RPI_STATUS, 1 + (last_x << 5) + (last_y << 18));
+}
+
+static void wpp_pause(struct rpivid_dec_env *const de, int ctb_row)
+{
+ p1_apb_write(de, RPI_STATUS, (ctb_row << 18) + 0x25);
+ p1_apb_write(de, RPI_TRANSFER, PROB_BACKUP);
+ p1_apb_write(de, RPI_MODE,
+ ctb_row == de->pic_height_in_ctbs_y - 1 ?
+ 0x70000 : 0x30000);
+ p1_apb_write(de, RPI_CONTROL, (ctb_row << 16) + 2);
+}
+
+static void wpp_end_previous_slice(struct rpivid_dec_env *const de,
+ const struct rpivid_dec_state *const s,
+ int ctb_addr_ts)
+{
+ int new_x = s->sh->slice_segment_addr % de->pic_width_in_ctbs_y;
+ int new_y = s->sh->slice_segment_addr / de->pic_width_in_ctbs_y;
+ int last_x =
+ s->ctb_addr_ts_to_rs[ctb_addr_ts - 1] % de->pic_width_in_ctbs_y;
+ int last_y =
+ s->ctb_addr_ts_to_rs[ctb_addr_ts - 1] / de->pic_width_in_ctbs_y;
+
+ if (de->wpp_entry_x < 2 && (de->wpp_entry_y < new_y || new_x > 2) &&
+ de->pic_width_in_ctbs_y > 2)
+ wpp_pause(de, last_y);
+ p1_apb_write(de, RPI_STATUS, 1 + (last_x << 5) + (last_y << 18));
+ if (new_x == 2 || (de->pic_width_in_ctbs_y == 2 &&
+ de->wpp_entry_y < new_y))
+ p1_apb_write(de, RPI_TRANSFER, PROB_BACKUP);
+}
+
+//////////////////////////////////////////////////////////////////////////////
+// Wavefront mode
+
+static void wpp_entry_point(struct rpivid_dec_env *const de,
+ const struct rpivid_dec_state *const s,
+ const int do_bte,
+ const int reset_qp_y, const int ctb_addr_ts)
+{
+ int ctb_size = 1 << s->log2_ctb_size;
+ int ctb_addr_rs = s->ctb_addr_ts_to_rs[ctb_addr_ts];
+
+ int ctb_col = de->wpp_entry_x = ctb_addr_rs % de->pic_width_in_ctbs_y;
+ int ctb_row = de->wpp_entry_y = ctb_addr_rs / de->pic_width_in_ctbs_y;
+
+ int endx = de->pic_width_in_ctbs_y - 1;
+ int endy = ctb_row;
+
+ u8 slice_w = ctb_to_slice_w_h(ctb_col, ctb_size,
+ s->sps.pic_width_in_luma_samples,
+ s->col_bd, s->num_tile_columns);
+ u8 slice_h = ctb_to_slice_w_h(ctb_row, ctb_size,
+ s->sps.pic_height_in_luma_samples,
+ s->row_bd, s->num_tile_rows);
+
+ p1_apb_write(de, RPI_TILESTART, 0);
+ p1_apb_write(de, RPI_TILEEND, endx + (endy << 16));
+
+ if (do_bte)
+ p1_apb_write(de, RPI_BEGINTILEEND, endx + (endy << 16));
+
+ write_slice(de, s, slice_w,
+ ctb_row == de->pic_height_in_ctbs_y - 1 ?
+ slice_h : ctb_size);
+
+ if (reset_qp_y) {
+ unsigned int sps_qp_bd_offset =
+ 6 * s->sps.bit_depth_luma_minus8;
+
+ p1_apb_write(de, RPI_QP, sps_qp_bd_offset + s->slice_qp);
+ }
+
+ p1_apb_write(de, RPI_MODE,
+ ctb_row == de->pic_height_in_ctbs_y - 1 ?
+ 0x60001 : 0x20001);
+ p1_apb_write(de, RPI_CONTROL, (ctb_col << 0) + (ctb_row << 16));
+}
+
+//////////////////////////////////////////////////////////////////////////////
+// Wavefront mode
+
+static void wpp_decode_slice(struct rpivid_dec_env *const de,
+ const struct rpivid_dec_state *const s,
+ const struct v4l2_ctrl_hevc_slice_params *sh,
+ int ctb_addr_ts)
+{
+ int i, reset_qp_y = 1;
+ int indep = !s->dependent_slice_segment_flag;
+ int ctb_col = s->sh->slice_segment_addr % de->pic_width_in_ctbs_y;
+
+ if (ctb_addr_ts)
+ wpp_end_previous_slice(de, s, ctb_addr_ts);
+ pre_slice_decode(de, s);
+ write_bitstream(de, s);
+ if (ctb_addr_ts == 0 || indep || de->pic_width_in_ctbs_y == 1)
+ write_prob(de, s);
+ else if (ctb_col == 0)
+ p1_apb_write(de, RPI_TRANSFER, PROB_RELOAD);
+ else
+ reset_qp_y = 0;
+ program_slicecmds(de, s->slice_idx);
+ new_slice_segment(de, s);
+ wpp_entry_point(de, s, indep, reset_qp_y, ctb_addr_ts);
+
+ for (i = 0; i < s->sh->num_entry_point_offsets; i++) {
+ int ctb_addr_rs = s->ctb_addr_ts_to_rs[ctb_addr_ts];
+ int ctb_row = ctb_addr_rs / de->pic_width_in_ctbs_y;
+ int last_x = de->pic_width_in_ctbs_y - 1;
+
+ if (de->pic_width_in_ctbs_y > 2)
+ wpp_pause(de, ctb_row);
+ p1_apb_write(de, RPI_STATUS,
+ (ctb_row << 18) + (last_x << 5) + 2);
+ if (de->pic_width_in_ctbs_y == 2)
+ p1_apb_write(de, RPI_TRANSFER, PROB_BACKUP);
+ if (de->pic_width_in_ctbs_y == 1)
+ write_prob(de, s);
+ else
+ p1_apb_write(de, RPI_TRANSFER, PROB_RELOAD);
+ ctb_addr_ts += s->column_width[0];
+ wpp_entry_point(de, s, 0, 1, ctb_addr_ts);
+ }
+}
+
+//////////////////////////////////////////////////////////////////////////////
+// Tiles mode
+
+static void decode_slice(struct rpivid_dec_env *const de,
+ const struct rpivid_dec_state *const s,
+ const struct v4l2_ctrl_hevc_slice_params *const sh,
+ int ctb_addr_ts)
+{
+ int i, reset_qp_y;
+
+ if (ctb_addr_ts)
+ end_previous_slice(de, s, ctb_addr_ts);
+
+ pre_slice_decode(de, s);
+ write_bitstream(de, s);
+
+#if DEBUG_TRACE_P1_CMD
+ if (p1_z < 256) {
+ v4l2_info(&de->ctx->dev->v4l2_dev,
+ "TS=%d, tile=%d/%d, dss=%d, flags=%#llx\n",
+ ctb_addr_ts, s->tile_id[ctb_addr_ts],
+ s->tile_id[ctb_addr_ts - 1],
+ s->dependent_slice_segment_flag, sh->flags);
+ }
+#endif
+
+ reset_qp_y = ctb_addr_ts == 0 ||
+ s->tile_id[ctb_addr_ts] != s->tile_id[ctb_addr_ts - 1] ||
+ !s->dependent_slice_segment_flag;
+ if (reset_qp_y)
+ write_prob(de, s);
+
+ program_slicecmds(de, s->slice_idx);
+ new_slice_segment(de, s);
+ new_entry_point(de, s, !s->dependent_slice_segment_flag, reset_qp_y,
+ ctb_addr_ts);
+
+ for (i = 0; i < s->sh->num_entry_point_offsets; i++) {
+ int ctb_addr_rs = s->ctb_addr_ts_to_rs[ctb_addr_ts];
+ int ctb_col = ctb_addr_rs % de->pic_width_in_ctbs_y;
+ int ctb_row = ctb_addr_rs / de->pic_width_in_ctbs_y;
+ int tile_x = ctb_to_tile(ctb_col, s->col_bd,
+ s->num_tile_columns - 1);
+ int tile_y =
+ ctb_to_tile(ctb_row, s->row_bd, s->num_tile_rows - 1);
+ int last_x = s->col_bd[tile_x + 1] - 1;
+ int last_y = s->row_bd[tile_y + 1] - 1;
+
+ p1_apb_write(de, RPI_STATUS,
+ 2 + (last_x << 5) + (last_y << 18));
+ write_prob(de, s);
+ ctb_addr_ts += s->column_width[tile_x] * s->row_height[tile_y];
+ new_entry_point(de, s, 0, 1, ctb_addr_ts);
+ }
+}
+
+//////////////////////////////////////////////////////////////////////////////
+// Scaling factors
+
+static void expand_scaling_list(const unsigned int size_id,
+ const unsigned int matrix_id, u8 *const dst0,
+ const u8 *const src0, uint8_t dc)
+{
+ u8 *d;
+ unsigned int x, y;
+
+ // FIXME: matrix_id is unused ?
+ switch (size_id) {
+ case 0:
+ memcpy(dst0, src0, 16);
+ break;
+ case 1:
+ memcpy(dst0, src0, 64);
+ break;
+ case 2:
+ d = dst0;
+
+ for (y = 0; y != 16; y++) {
+ const u8 *s = src0 + (y >> 1) * 8;
+
+ for (x = 0; x != 8; ++x) {
+ *d++ = *s;
+ *d++ = *s++;
+ }
+ }
+ dst0[0] = dc;
+ break;
+ default:
+ d = dst0;
+
+ for (y = 0; y != 32; y++) {
+ const u8 *s = src0 + (y >> 2) * 8;
+
+ for (x = 0; x != 8; ++x) {
+ *d++ = *s;
+ *d++ = *s;
+ *d++ = *s;
+ *d++ = *s++;
+ }
+ }
+ dst0[0] = dc;
+ break;
+ }
+}
+
+static void populate_scaling_factors(const struct rpivid_run *const run,
+ struct rpivid_dec_env *const de,
+ const struct rpivid_dec_state *const s)
+{
+ const struct v4l2_ctrl_hevc_scaling_matrix *const sl =
+ run->h265.scaling_matrix;
+ // Array of constants for scaling factors
+ static const u32 scaling_factor_offsets[4][6] = {
+ // MID0 MID1 MID2 MID3 MID4 MID5
+ // SID0 (4x4)
+ { 0x0000, 0x0010, 0x0020, 0x0030, 0x0040, 0x0050 },
+ // SID1 (8x8)
+ { 0x0060, 0x00A0, 0x00E0, 0x0120, 0x0160, 0x01A0 },
+ // SID2 (16x16)
+ { 0x01E0, 0x02E0, 0x03E0, 0x04E0, 0x05E0, 0x06E0 },
+ // SID3 (32x32)
+ { 0x07E0, 0x0BE0, 0x0000, 0x0000, 0x0000, 0x0000 }
+ };
+
+ unsigned int mid;
+
+ for (mid = 0; mid < 6; mid++)
+ expand_scaling_list(0, mid,
+ de->scaling_factors +
+ scaling_factor_offsets[0][mid],
+ sl->scaling_list_4x4[mid], 0);
+ for (mid = 0; mid < 6; mid++)
+ expand_scaling_list(1, mid,
+ de->scaling_factors +
+ scaling_factor_offsets[1][mid],
+ sl->scaling_list_8x8[mid], 0);
+ for (mid = 0; mid < 6; mid++)
+ expand_scaling_list(2, mid,
+ de->scaling_factors +
+ scaling_factor_offsets[2][mid],
+ sl->scaling_list_16x16[mid],
+ sl->scaling_list_dc_coef_16x16[mid]);
+ for (mid = 0; mid < 2; mid += 1)
+ expand_scaling_list(3, mid,
+ de->scaling_factors +
+ scaling_factor_offsets[3][mid],
+ sl->scaling_list_32x32[mid],
+ sl->scaling_list_dc_coef_32x32[mid]);
+}
+
+static void free_ps_info(struct rpivid_dec_state *const s)
+{
+ kfree(s->ctb_addr_rs_to_ts);
+ s->ctb_addr_rs_to_ts = NULL;
+ kfree(s->ctb_addr_ts_to_rs);
+ s->ctb_addr_ts_to_rs = NULL;
+ kfree(s->tile_id);
+ s->tile_id = NULL;
+
+ kfree(s->col_bd);
+ s->col_bd = NULL;
+ kfree(s->row_bd);
+ s->row_bd = NULL;
+}
+
+static int updated_ps(struct rpivid_dec_state *const s)
+{
+ unsigned int ctb_addr_rs;
+ int j, x, y, tile_id;
+ unsigned int i;
+
+ free_ps_info(s);
+
+ // Inferred parameters
+ s->log2_ctb_size = s->sps.log2_min_luma_coding_block_size_minus3 + 3 +
+ s->sps.log2_diff_max_min_luma_coding_block_size;
+
+ s->ctb_width = (s->sps.pic_width_in_luma_samples +
+ (1 << s->log2_ctb_size) - 1) >>
+ s->log2_ctb_size;
+ s->ctb_height = (s->sps.pic_height_in_luma_samples +
+ (1 << s->log2_ctb_size) - 1) >>
+ s->log2_ctb_size;
+ s->ctb_size = s->ctb_width * s->ctb_height;
+
+ // Inferred parameters
+
+ if (!(s->pps.flags & V4L2_HEVC_PPS_FLAG_TILES_ENABLED)) {
+ s->num_tile_columns = 1;
+ s->num_tile_rows = 1;
+ s->column_width[0] = s->ctb_width;
+ s->row_height[0] = s->ctb_height;
+ } else {
+ s->num_tile_columns = s->pps.num_tile_columns_minus1 + 1;
+ s->num_tile_rows = s->pps.num_tile_rows_minus1 + 1;
+ for (i = 0; i < s->num_tile_columns; ++i)
+ s->column_width[i] = s->pps.column_width_minus1[i] + 1;
+ for (i = 0; i < s->num_tile_rows; ++i)
+ s->row_height[i] = s->pps.row_height_minus1[i] + 1;
+ }
+
+ s->col_bd = kmalloc((s->num_tile_columns + 1) * sizeof(*s->col_bd),
+ GFP_KERNEL);
+ s->row_bd = kmalloc((s->num_tile_rows + 1) * sizeof(*s->row_bd),
+ GFP_KERNEL);
+
+ s->col_bd[0] = 0;
+ for (i = 0; i < s->num_tile_columns; i++)
+ s->col_bd[i + 1] = s->col_bd[i] + s->column_width[i];
+
+ s->row_bd[0] = 0;
+ for (i = 0; i < s->num_tile_rows; i++)
+ s->row_bd[i + 1] = s->row_bd[i] + s->row_height[i];
+
+ s->ctb_addr_rs_to_ts = kmalloc_array(s->ctb_size,
+ sizeof(*s->ctb_addr_rs_to_ts),
+ GFP_KERNEL);
+ s->ctb_addr_ts_to_rs = kmalloc_array(s->ctb_size,
+ sizeof(*s->ctb_addr_ts_to_rs),
+ GFP_KERNEL);
+ s->tile_id = kmalloc_array(s->ctb_size, sizeof(*s->tile_id),
+ GFP_KERNEL);
+
+ for (ctb_addr_rs = 0; ctb_addr_rs < s->ctb_size; ctb_addr_rs++) {
+ int tb_x = ctb_addr_rs % s->ctb_width;
+ int tb_y = ctb_addr_rs / s->ctb_width;
+ int tile_x = 0;
+ int tile_y = 0;
+ int val = 0;
+
+ for (i = 0; i < s->num_tile_columns; i++) {
+ if (tb_x < s->col_bd[i + 1]) {
+ tile_x = i;
+ break;
+ }
+ }
+
+ for (i = 0; i < s->num_tile_rows; i++) {
+ if (tb_y < s->row_bd[i + 1]) {
+ tile_y = i;
+ break;
+ }
+ }
+
+ for (i = 0; i < tile_x; i++)
+ val += s->row_height[tile_y] * s->column_width[i];
+ for (i = 0; i < tile_y; i++)
+ val += s->ctb_width * s->row_height[i];
+
+ val += (tb_y - s->row_bd[tile_y]) * s->column_width[tile_x] +
+ tb_x - s->col_bd[tile_x];
+
+ s->ctb_addr_rs_to_ts[ctb_addr_rs] = val;
+ s->ctb_addr_ts_to_rs[val] = ctb_addr_rs;
+ }
+
+ for (j = 0, tile_id = 0; j < s->num_tile_rows; j++)
+ for (i = 0; i < s->num_tile_columns; i++, tile_id++)
+ for (y = s->row_bd[j]; y < s->row_bd[j + 1]; y++)
+ for (x = s->col_bd[i];
+ x < s->col_bd[i + 1];
+ x++)
+ s->tile_id[s->ctb_addr_rs_to_ts
+ [y * s->ctb_width +
+ x]] = tile_id;
+
+ return 0;
+}
+
+static int frame_end(struct rpivid_dev *const dev,
+ struct rpivid_dec_env *const de,
+ const struct rpivid_dec_state *const s)
+{
+ const unsigned int last_x = s->col_bd[s->num_tile_columns] - 1;
+ const unsigned int last_y = s->row_bd[s->num_tile_rows] - 1;
+ size_t cmd_size;
+
+ if (s->pps.flags & V4L2_HEVC_PPS_FLAG_ENTROPY_CODING_SYNC_ENABLED) {
+ if (de->wpp_entry_x < 2 && de->pic_width_in_ctbs_y > 2)
+ wpp_pause(de, last_y);
+ }
+ p1_apb_write(de, RPI_STATUS, 1 + (last_x << 5) + (last_y << 18));
+
+ // Copy commands out to dma buf
+ cmd_size = de->cmd_len * sizeof(de->cmd_fifo[0]);
+
+ if (!de->cmd_copy_gptr->ptr || cmd_size > de->cmd_copy_gptr->size) {
+ size_t cmd_alloc = round_up_size(cmd_size);
+
+ if (gptr_realloc_new(dev, de->cmd_copy_gptr, cmd_alloc)) {
+ v4l2_err(&dev->v4l2_dev,
+ "Alloc cmd buffer (%d): FAILED\n", cmd_alloc);
+ return -ENOMEM;
+ }
+ v4l2_info(&dev->v4l2_dev, "Alloc cmd buffer (%d): OK\n",
+ cmd_alloc);
+ }
+
+ memcpy(de->cmd_copy_gptr->ptr, de->cmd_fifo, cmd_size);
+ return 0;
+}
+
+static void setup_colmv(struct rpivid_ctx *const ctx, struct rpivid_run *run,
+ struct rpivid_dec_state *const s)
+{
+ ctx->colmv_stride = ALIGN(s->sps.pic_width_in_luma_samples, 64);
+ ctx->colmv_picsize = ctx->colmv_stride *
+ (ALIGN(s->sps.pic_height_in_luma_samples, 64) >> 4);
+}
+
+// Can be called from irq context
+static struct rpivid_dec_env *dec_env_new(struct rpivid_ctx *const ctx)
+{
+ struct rpivid_dec_env *de;
+ unsigned long lock_flags;
+
+ spin_lock_irqsave(&ctx->dec_lock, lock_flags);
+
+ de = ctx->dec_free;
+ if (de) {
+ ctx->dec_free = de->next;
+ de->next = NULL;
+ de->state = RPIVID_DECODE_SLICE_START;
+ }
+
+ spin_unlock_irqrestore(&ctx->dec_lock, lock_flags);
+ return de;
+}
+
+// Can be called from irq context
+static void dec_env_delete(struct rpivid_dec_env *const de)
+{
+ struct rpivid_ctx * const ctx = de->ctx;
+ unsigned long lock_flags;
+
+ aux_q_release(ctx, &de->frame_aux);
+ aux_q_release(ctx, &de->col_aux);
+
+ spin_lock_irqsave(&ctx->dec_lock, lock_flags);
+
+ de->state = RPIVID_DECODE_END;
+ de->next = ctx->dec_free;
+ ctx->dec_free = de;
+
+ spin_unlock_irqrestore(&ctx->dec_lock, lock_flags);
+}
+
+static void dec_env_uninit(struct rpivid_ctx *const ctx)
+{
+ unsigned int i;
+
+ if (ctx->dec_pool) {
+ for (i = 0; i != RPIVID_DEC_ENV_COUNT; ++i) {
+ struct rpivid_dec_env *const de = ctx->dec_pool + i;
+
+ kfree(de->cmd_fifo);
+ }
+
+ kfree(ctx->dec_pool);
+ }
+
+ ctx->dec_pool = NULL;
+ ctx->dec_free = NULL;
+}
+
+static int dec_env_init(struct rpivid_ctx *const ctx)
+{
+ unsigned int i;
+
+ ctx->dec_pool = kzalloc(sizeof(*ctx->dec_pool) * RPIVID_DEC_ENV_COUNT,
+ GFP_KERNEL);
+ if (!ctx->dec_pool)
+ return -1;
+
+ spin_lock_init(&ctx->dec_lock);
+
+ // Build free chain
+ ctx->dec_free = ctx->dec_pool;
+ for (i = 0; i != RPIVID_DEC_ENV_COUNT - 1; ++i)
+ ctx->dec_pool[i].next = ctx->dec_pool + i + 1;
+
+ // Fill in other bits
+ for (i = 0; i != RPIVID_DEC_ENV_COUNT; ++i) {
+ struct rpivid_dec_env *const de = ctx->dec_pool + i;
+
+ de->ctx = ctx;
+ de->decode_order = i;
+ de->cmd_max = 1024;
+ de->cmd_fifo = kmalloc_array(de->cmd_max,
+ sizeof(struct rpi_cmd),
+ GFP_KERNEL);
+ if (!de->cmd_fifo)
+ goto fail;
+ }
+
+ return 0;
+
+fail:
+ dec_env_uninit(ctx);
+ return -1;
+}
+
+// Assume that we get exactly the same DPB for every slice
+// it makes no real sense otherwise
+#if V4L2_HEVC_DPB_ENTRIES_NUM_MAX > 16
+#error HEVC_DPB_ENTRIES > h/w slots
+#endif
+
+static u32 mk_config2(const struct rpivid_dec_state *const s)
+{
+ const struct v4l2_ctrl_hevc_sps *const sps = &s->sps;
+ const struct v4l2_ctrl_hevc_pps *const pps = &s->pps;
+ u32 c;
+ // BitDepthY
+ c = (sps->bit_depth_luma_minus8 + 8) << 0;
+ // BitDepthC
+ c |= (sps->bit_depth_chroma_minus8 + 8) << 4;
+ // BitDepthY
+ if (sps->bit_depth_luma_minus8)
+ c |= BIT(8);
+ // BitDepthC
+ if (sps->bit_depth_chroma_minus8)
+ c |= BIT(9);
+ c |= s->log2_ctb_size << 10;
+ if (pps->flags & V4L2_HEVC_PPS_FLAG_CONSTRAINED_INTRA_PRED)
+ c |= BIT(13);
+ if (sps->flags & V4L2_HEVC_SPS_FLAG_STRONG_INTRA_SMOOTHING_ENABLED)
+ c |= BIT(14);
+ if (sps->flags & V4L2_HEVC_SPS_FLAG_SPS_TEMPORAL_MVP_ENABLED)
+ c |= BIT(15); /* Write motion vectors to external memory */
+ c |= (pps->log2_parallel_merge_level_minus2 + 2) << 16;
+ if (s->slice_temporal_mvp)
+ c |= BIT(19);
+ if (sps->flags & V4L2_HEVC_SPS_FLAG_PCM_LOOP_FILTER_DISABLED)
+ c |= BIT(20);
+ c |= (pps->pps_cb_qp_offset & 31) << 21;
+ c |= (pps->pps_cr_qp_offset & 31) << 26;
+ return c;
+}
+
+static void rpivid_h265_setup(struct rpivid_ctx *ctx, struct rpivid_run *run)
+{
+ struct rpivid_dev *const dev = ctx->dev;
+ const struct v4l2_ctrl_hevc_slice_params *const sh =
+ run->h265.slice_params;
+ const struct v4l2_hevc_pred_weight_table *pred_weight_table;
+ struct rpivid_q_aux *dpb_q_aux[V4L2_HEVC_DPB_ENTRIES_NUM_MAX];
+ struct rpivid_dec_state *const s = ctx->state;
+ struct vb2_queue *vq;
+ struct rpivid_dec_env *de;
+ int ctb_addr_ts;
+ unsigned int i;
+ int use_aux;
+ bool slice_temporal_mvp;
+
+ pred_weight_table = &sh->pred_weight_table;
+
+ s->frame_end =
+ ((run->src->flags & V4L2_BUF_FLAG_M2M_HOLD_CAPTURE_BUF) == 0);
+
+ de = ctx->dec0;
+ slice_temporal_mvp = (sh->flags &
+ V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_TEMPORAL_MVP_ENABLED);
+
+ if (de && de->state != RPIVID_DECODE_END) {
+ ++s->slice_idx;
+
+ switch (de->state) {
+ case RPIVID_DECODE_SLICE_CONTINUE:
+ // Expected state
+ break;
+ default:
+ v4l2_err(&dev->v4l2_dev, "%s: Unexpected state: %d\n",
+ __func__, de->state);
+ /* FALLTHRU */
+ case RPIVID_DECODE_ERROR_CONTINUE:
+ // Uncleared error - fail now
+ goto fail;
+ }
+
+ if (s->slice_temporal_mvp != slice_temporal_mvp) {
+ v4l2_warn(&dev->v4l2_dev,
+ "Slice Temporal MVP non-constant\n");
+ goto fail;
+ }
+ } else {
+ /* Frame start */
+ unsigned int ctb_size_y;
+ bool sps_changed = false;
+
+ if (memcmp(&s->sps, run->h265.sps, sizeof(s->sps)) != 0) {
+ /* SPS changed */
+ v4l2_info(&dev->v4l2_dev, "SPS changed\n");
+ memcpy(&s->sps, run->h265.sps, sizeof(s->sps));
+ sps_changed = true;
+ }
+ if (sps_changed ||
+ memcmp(&s->pps, run->h265.pps, sizeof(s->pps)) != 0) {
+ /* SPS changed */
+ v4l2_info(&dev->v4l2_dev, "PPS changed\n");
+ memcpy(&s->pps, run->h265.pps, sizeof(s->pps));
+
+ /* Recalc stuff as required */
+ updated_ps(s);
+ }
+
+ de = dec_env_new(ctx);
+ if (!de) {
+ v4l2_err(&dev->v4l2_dev,
+ "Failed to find free decode env\n");
+ goto fail;
+ }
+ ctx->dec0 = de;
+
+ ctb_size_y =
+ 1U << (s->sps.log2_min_luma_coding_block_size_minus3 +
+ 3 +
+ s->sps.log2_diff_max_min_luma_coding_block_size);
+
+ de->pic_width_in_ctbs_y =
+ (s->sps.pic_width_in_luma_samples + ctb_size_y - 1) /
+ ctb_size_y; // 7-15
+ de->pic_height_in_ctbs_y =
+ (s->sps.pic_height_in_luma_samples + ctb_size_y - 1) /
+ ctb_size_y; // 7-17
+ de->cmd_len = 0;
+ de->dpbno_col = ~0U;
+
+ de->bit_copy_gptr = ctx->bitbufs + 0;
+ de->bit_copy_len = 0;
+ de->cmd_copy_gptr = ctx->cmdbufs + 0;
+
+ de->frame_c_offset = ctx->dst_fmt.height * 128;
+ de->frame_stride = ctx->dst_fmt.bytesperline * 128;
+ de->frame_addr =
+ vb2_dma_contig_plane_dma_addr(&run->dst->vb2_buf, 0);
+ de->frame_aux = NULL;
+
+ if (s->sps.bit_depth_luma_minus8 !=
+ s->sps.bit_depth_chroma_minus8) {
+ v4l2_warn(&dev->v4l2_dev,
+ "Chroma depth (%d) != Luma depth (%d)\n",
+ s->sps.bit_depth_chroma_minus8 + 8,
+ s->sps.bit_depth_luma_minus8 + 8);
+ goto fail;
+ }
+ if (s->sps.bit_depth_luma_minus8 == 0) {
+ if (ctx->dst_fmt.pixelformat !=
+ V4L2_PIX_FMT_NV12_COL128) {
+ v4l2_err(&dev->v4l2_dev,
+ "Pixel format %#x != NV12_COL128 for 8-bit output",
+ ctx->dst_fmt.pixelformat);
+ goto fail;
+ }
+ } else if (s->sps.bit_depth_luma_minus8 == 2) {
+ if (ctx->dst_fmt.pixelformat !=
+ V4L2_PIX_FMT_NV12_10_COL128) {
+ v4l2_err(&dev->v4l2_dev,
+ "Pixel format %#x != NV12_10_COL128 for 10-bit output",
+ ctx->dst_fmt.pixelformat);
+ goto fail;
+ }
+ } else {
+ v4l2_warn(&dev->v4l2_dev,
+ "Luma depth (%d) unsupported\n",
+ s->sps.bit_depth_luma_minus8 + 8);
+ goto fail;
+ }
+ if (run->dst->vb2_buf.num_planes != 1) {
+ v4l2_warn(&dev->v4l2_dev, "Capture planes (%d) != 1\n",
+ run->dst->vb2_buf.num_planes);
+ goto fail;
+ }
+ if (run->dst->planes[0].length <
+ ctx->dst_fmt.sizeimage) {
+ v4l2_warn(&dev->v4l2_dev,
+ "Capture plane[0] length (%d) < sizeimage (%d)\n",
+ run->dst->planes[0].length,
+ ctx->dst_fmt.sizeimage);
+ goto fail;
+ }
+
+ if (s->sps.pic_width_in_luma_samples > 4096 ||
+ s->sps.pic_height_in_luma_samples > 4096) {
+ v4l2_warn(&dev->v4l2_dev,
+ "Pic dimension (%dx%d) exeeds 4096\n",
+ s->sps.pic_width_in_luma_samples,
+ s->sps.pic_height_in_luma_samples);
+ goto fail;
+ }
+
+ // Fill in ref planes with our address s.t. if we mess
+ // up refs somehow then we still have a valid address
+ // entry
+ for (i = 0; i != 16; ++i)
+ de->ref_addrs[i] = de->frame_addr;
+
+ /*
+ * Stash initial temporal_mvp flag
+ * This must be the same for all pic slices (7.4.7.1)
+ */
+ s->slice_temporal_mvp = slice_temporal_mvp;
+
+ // Phase 2 reg pre-calc
+ de->rpi_config2 = mk_config2(s);
+ de->rpi_framesize = (s->sps.pic_height_in_luma_samples << 16) |
+ s->sps.pic_width_in_luma_samples;
+ de->rpi_currpoc = sh->slice_pic_order_cnt;
+
+ if (s->sps.flags &
+ V4L2_HEVC_SPS_FLAG_SPS_TEMPORAL_MVP_ENABLED) {
+ setup_colmv(ctx, run, s);
+ }
+
+ s->slice_idx = 0;
+
+ if (sh->slice_segment_addr != 0) {
+ v4l2_warn(&dev->v4l2_dev,
+ "New frame but segment_addr=%d\n",
+ sh->slice_segment_addr);
+ goto fail;
+ }
+
+ /* Allocate a bitbuf if we need one - don't need one if single
+ * slice as we can use the src buf directly
+ */
+ if (!s->frame_end && !de->bit_copy_gptr->ptr) {
+ const size_t wxh = s->sps.pic_width_in_luma_samples *
+ s->sps.pic_height_in_luma_samples;
+ size_t bits_alloc;
+
+ /* Annex A gives a min compression of 2 @ lvl 3.1
+ * (wxh <= 983040) and min 4 thereafter but avoid
+ * the odity of 983041 having a lower limit than
+ * 983040.
+ * Multiply by 3/2 for 4:2:0
+ */
+ bits_alloc = wxh < 983040 ? wxh * 3 / 4 :
+ wxh < 983040 * 2 ? 983040 * 3 / 4 :
+ wxh * 3 / 8;
+ bits_alloc = round_up_size(bits_alloc);
+
+ if (gptr_alloc(dev, de->bit_copy_gptr,
+ bits_alloc,
+ DMA_ATTR_FORCE_CONTIGUOUS) != 0) {
+ v4l2_err(&dev->v4l2_dev,
+ "Unable to alloc buf (%d) for bit copy\n",
+ bits_alloc);
+ goto fail;
+ }
+ v4l2_info(&dev->v4l2_dev,
+ "Alloc buf (%d) for bit copy OK\n",
+ bits_alloc);
+ }
+ }
+
+ // Pre calc a few things
+ s->src_addr =
+ !s->frame_end ?
+ 0 :
+ vb2_dma_contig_plane_dma_addr(&run->src->vb2_buf, 0);
+ s->src_buf = s->src_addr != 0 ? NULL :
+ vb2_plane_vaddr(&run->src->vb2_buf, 0);
+ if (!s->src_addr && !s->src_buf) {
+ v4l2_err(&dev->v4l2_dev, "Failed to map src buffer\n");
+ goto fail;
+ }
+
+ s->sh = sh;
+ s->slice_qp = 26 + s->pps.init_qp_minus26 + s->sh->slice_qp_delta;
+ s->max_num_merge_cand = sh->slice_type == HEVC_SLICE_I ?
+ 0 :
+ (5 - sh->five_minus_max_num_merge_cand);
+ // * SH DSS flag invented by me - but clearly needed
+ s->dependent_slice_segment_flag =
+ ((sh->flags &
+ V4L2_HEVC_SLICE_PARAMS_FLAG_DEPENDENT_SLICE_SEGMENT) != 0);
+
+ s->nb_refs[0] = (sh->slice_type == HEVC_SLICE_I) ?
+ 0 :
+ sh->num_ref_idx_l0_active_minus1 + 1;
+ s->nb_refs[1] = (sh->slice_type != HEVC_SLICE_B) ?
+ 0 :
+ sh->num_ref_idx_l1_active_minus1 + 1;
+
+ if (s->sps.flags & V4L2_HEVC_SPS_FLAG_SCALING_LIST_ENABLED)
+ populate_scaling_factors(run, de, s);
+
+ ctb_addr_ts = s->ctb_addr_rs_to_ts[sh->slice_segment_addr];
+
+ if ((s->pps.flags & V4L2_HEVC_PPS_FLAG_ENTROPY_CODING_SYNC_ENABLED))
+ wpp_decode_slice(de, s, sh, ctb_addr_ts);
+ else
+ decode_slice(de, s, sh, ctb_addr_ts);
+
+ if (!s->frame_end)
+ return;
+
+ // Frame end
+ memset(dpb_q_aux, 0,
+ sizeof(*dpb_q_aux) * V4L2_HEVC_DPB_ENTRIES_NUM_MAX);
+ /*
+ * Need Aux ents for all (ref) DPB ents if temporal MV could
+ * be enabled for any pic
+ * ** At the moment we have aux ents for all pics whether or not
+ * they are ref
+ */
+ use_aux = ((s->sps.flags &
+ V4L2_HEVC_SPS_FLAG_SPS_TEMPORAL_MVP_ENABLED) != 0);
+
+ // Locate ref frames
+ // At least in the current implementation this is constant across all
+ // slices. If this changes we will need idx mapping code.
+ // Uses sh so here rather than trigger
+
+ vq = v4l2_m2m_get_vq(ctx->fh.m2m_ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE);
+
+ if (!vq) {
+ v4l2_err(&dev->v4l2_dev, "VQ gone!\n");
+ goto fail;
+ }
+
+ // v4l2_info(&dev->v4l2_dev, "rpivid_h265_end of frame\n");
+ if (frame_end(dev, de, s))
+ goto fail;
+
+ for (i = 0; i < sh->num_active_dpb_entries; ++i) {
+ int buffer_index =
+ vb2_find_timestamp(vq, sh->dpb[i].timestamp, 0);
+ struct vb2_buffer *buf = buffer_index < 0 ?
+ NULL :
+ vb2_get_buffer(vq, buffer_index);
+
+ if (!buf) {
+ v4l2_warn(&dev->v4l2_dev,
+ "Missing DPB ent %d, timestamp=%lld, index=%d\n",
+ i, (long long)sh->dpb[i].timestamp,
+ buffer_index);
+ continue;
+ }
+
+ if (use_aux) {
+ dpb_q_aux[i] = aux_q_ref(ctx,
+ ctx->aux_ents[buffer_index]);
+ if (!dpb_q_aux[i])
+ v4l2_warn(&dev->v4l2_dev,
+ "Missing DPB AUX ent %d index=%d\n",
+ i, buffer_index);
+ }
+
+ de->ref_addrs[i] =
+ vb2_dma_contig_plane_dma_addr(buf, 0);
+ }
+
+ // Move DPB from temp
+ for (i = 0; i != V4L2_HEVC_DPB_ENTRIES_NUM_MAX; ++i) {
+ aux_q_release(ctx, &s->ref_aux[i]);
+ s->ref_aux[i] = dpb_q_aux[i];
+ }
+ // Unref the old frame aux too - it is either in the DPB or not
+ // now
+ aux_q_release(ctx, &s->frame_aux);
+
+ if (use_aux) {
+ // New frame so new aux ent
+ // ??? Do we need this if non-ref ??? can we tell
+ s->frame_aux = aux_q_new(ctx, run->dst->vb2_buf.index);
+
+ if (!s->frame_aux) {
+ v4l2_err(&dev->v4l2_dev,
+ "Failed to obtain aux storage for frame\n");
+ goto fail;
+ }
+
+ de->frame_aux = aux_q_ref(ctx, s->frame_aux);
+ }
+
+ if (de->dpbno_col != ~0U) {
+ if (de->dpbno_col >= sh->num_active_dpb_entries) {
+ v4l2_err(&dev->v4l2_dev,
+ "Col ref index %d >= %d\n",
+ de->dpbno_col,
+ sh->num_active_dpb_entries);
+ } else {
+ // Standard requires that the col pic is
+ // constant for the duration of the pic
+ // (text of collocated_ref_idx in H265-2 2018
+ // 7.4.7.1)
+
+ // Spot the collocated ref in passing
+ de->col_aux = aux_q_ref(ctx,
+ dpb_q_aux[de->dpbno_col]);
+
+ if (!de->col_aux) {
+ v4l2_warn(&dev->v4l2_dev,
+ "Missing DPB ent for col\n");
+ // Probably need to abort if this fails
+ // as P2 may explode on bad data
+ goto fail;
+ }
+ }
+ }
+
+ de->state = RPIVID_DECODE_PHASE1;
+ return;
+
+fail:
+ if (de)
+ // Actual error reporting happens in Trigger
+ de->state = s->frame_end ? RPIVID_DECODE_ERROR_DONE :
+ RPIVID_DECODE_ERROR_CONTINUE;
+}
+
+//////////////////////////////////////////////////////////////////////////////
+// Handle PU and COEFF stream overflow
+
+// Returns:
+// -1 Phase 1 decode error
+// 0 OK
+// >0 Out of space (bitmask)
+
+#define STATUS_COEFF_EXHAUSTED 8
+#define STATUS_PU_EXHAUSTED 16
+
+static int check_status(const struct rpivid_dev *const dev)
+{
+ const u32 cfstatus = apb_read(dev, RPI_CFSTATUS);
+ const u32 cfnum = apb_read(dev, RPI_CFNUM);
+ u32 status = apb_read(dev, RPI_STATUS);
+
+ // Handle PU and COEFF stream overflow
+
+ // this is the definition of successful completion of phase 1
+ // it assures that status register is zero and all blocks in each tile
+ // have completed
+ if (cfstatus == cfnum)
+ return 0; //No error
+
+ status &= (STATUS_PU_EXHAUSTED | STATUS_COEFF_EXHAUSTED);
+ if (status)
+ return status;
+
+ return -1;
+}
+
+static void cb_phase2(struct rpivid_dev *const dev, void *v)
+{
+ struct rpivid_dec_env *const de = v;
+ struct rpivid_ctx *const ctx = de->ctx;
+
+ xtrace_in(dev, de);
+
+ v4l2_m2m_cap_buf_return(dev->m2m_dev, ctx->fh.m2m_ctx, de->frame_buf,
+ VB2_BUF_STATE_DONE);
+ de->frame_buf = NULL;
+
+ /* Delete de before finish as finish might immediately trigger a reuse
+ * of de
+ */
+ dec_env_delete(de);
+
+ if (atomic_add_return(-1, &ctx->p2out) >= RPIVID_P2BUF_COUNT - 1) {
+ xtrace_fin(dev, de);
+ v4l2_m2m_buf_done_and_job_finish(dev->m2m_dev, ctx->fh.m2m_ctx,
+ VB2_BUF_STATE_DONE);
+ }
+
+ xtrace_ok(dev, de);
+}
+
+static void phase2_claimed(struct rpivid_dev *const dev, void *v)
+{
+ struct rpivid_dec_env *const de = v;
+ unsigned int i;
+
+ xtrace_in(dev, de);
+
+ apb_write_vc_addr(dev, RPI_PURBASE, de->pu_base_vc);
+ apb_write_vc_len(dev, RPI_PURSTRIDE, de->pu_stride);
+ apb_write_vc_addr(dev, RPI_COEFFRBASE, de->coeff_base_vc);
+ apb_write_vc_len(dev, RPI_COEFFRSTRIDE, de->coeff_stride);
+
+ apb_write_vc_addr(dev, RPI_OUTYBASE, de->frame_addr);
+ apb_write_vc_addr(dev, RPI_OUTCBASE,
+ de->frame_addr + de->frame_c_offset);
+ apb_write_vc_len(dev, RPI_OUTYSTRIDE, de->frame_stride);
+ apb_write_vc_len(dev, RPI_OUTCSTRIDE, de->frame_stride);
+
+ // v4l2_info(&dev->v4l2_dev, "Frame: Y=%llx, C=%llx, Stride=%x\n",
+ // de->frame_addr, de->frame_addr + de->frame_c_offset,
+ // de->frame_stride);
+
+ for (i = 0; i < 16; i++) {
+ // Strides are in fact unused but fill in anyway
+ apb_write_vc_addr(dev, 0x9000 + 16 * i, de->ref_addrs[i]);
+ apb_write_vc_len(dev, 0x9004 + 16 * i, de->frame_stride);
+ apb_write_vc_addr(dev, 0x9008 + 16 * i,
+ de->ref_addrs[i] + de->frame_c_offset);
+ apb_write_vc_len(dev, 0x900C + 16 * i, de->frame_stride);
+ }
+
+ apb_write(dev, RPI_CONFIG2, de->rpi_config2);
+ apb_write(dev, RPI_FRAMESIZE, de->rpi_framesize);
+ apb_write(dev, RPI_CURRPOC, de->rpi_currpoc);
+ // v4l2_info(&dev->v4l2_dev, "Config2=%#x, FrameSize=%#x, POC=%#x\n",
+ // de->rpi_config2, de->rpi_framesize, de->rpi_currpoc);
+
+ // collocated reads/writes
+ apb_write_vc_len(dev, RPI_COLSTRIDE,
+ de->ctx->colmv_stride); // Read vals
+ apb_write_vc_len(dev, RPI_MVSTRIDE,
+ de->ctx->colmv_stride); // Write vals
+ apb_write_vc_addr(dev, RPI_MVBASE,
+ !de->frame_aux ? 0 : de->frame_aux->col.addr);
+ apb_write_vc_addr(dev, RPI_COLBASE,
+ !de->col_aux ? 0 : de->col_aux->col.addr);
+
+ //v4l2_info(&dev->v4l2_dev,
+ // "Mv=%llx, Col=%llx, Stride=%x, Buf=%llx->%llx\n",
+ // de->rpi_mvbase, de->rpi_colbase, de->ctx->colmv_stride,
+ // de->ctx->colmvbuf.addr, de->ctx->colmvbuf.addr +
+ // de->ctx->colmvbuf.size);
+
+ rpivid_hw_irq_active2_irq(dev, &de->irq_ent, cb_phase2, de);
+
+ apb_write_final(dev, RPI_NUMROWS, de->pic_height_in_ctbs_y);
+
+ xtrace_ok(dev, de);
+}
+
+static void phase1_claimed(struct rpivid_dev *const dev, void *v);
+
+static void phase1_thread(struct rpivid_dev *const dev, void *v)
+{
+ struct rpivid_dec_env *const de = v;
+ struct rpivid_ctx *const ctx = de->ctx;
+
+ struct rpivid_gptr *const pu_gptr = ctx->pu_bufs + ctx->p2idx;
+ struct rpivid_gptr *const coeff_gptr = ctx->coeff_bufs + ctx->p2idx;
+
+ xtrace_in(dev, de);
+
+ if (de->p1_status & STATUS_PU_EXHAUSTED) {
+ if (gptr_realloc_new(dev, pu_gptr, next_size(pu_gptr->size))) {
+ v4l2_err(&dev->v4l2_dev,
+ "%s: PU realloc (%#x) failed\n",
+ __func__, pu_gptr->size);
+ goto fail;
+ }
+ v4l2_info(&dev->v4l2_dev, "%s: PU realloc (%#x) OK\n",
+ __func__, pu_gptr->size);
+ }
+
+ if (de->p1_status & STATUS_COEFF_EXHAUSTED) {
+ if (gptr_realloc_new(dev, coeff_gptr,
+ next_size(coeff_gptr->size))) {
+ v4l2_err(&dev->v4l2_dev,
+ "%s: Coeff realloc (%#x) failed\n",
+ __func__, coeff_gptr->size);
+ goto fail;
+ }
+ v4l2_info(&dev->v4l2_dev, "%s: Coeff realloc (%#x) OK\n",
+ __func__, coeff_gptr->size);
+ }
+
+ phase1_claimed(dev, de);
+ xtrace_ok(dev, de);
+ return;
+
+fail:
+ dec_env_delete(de);
+ xtrace_fin(dev, de);
+ v4l2_m2m_buf_done_and_job_finish(dev->m2m_dev, ctx->fh.m2m_ctx,
+ VB2_BUF_STATE_ERROR);
+ xtrace_fail(dev, de);
+}
+
+/* Always called in irq context (this is good) */
+static void cb_phase1(struct rpivid_dev *const dev, void *v)
+{
+ struct rpivid_dec_env *const de = v;
+ struct rpivid_ctx *const ctx = de->ctx;
+
+ xtrace_in(dev, de);
+
+ de->p1_status = check_status(dev);
+ if (de->p1_status != 0) {
+ v4l2_info(&dev->v4l2_dev, "%s: Post wait: %#x\n",
+ __func__, de->p1_status);
+
+ if (de->p1_status < 0)
+ goto fail;
+
+ /* Need to realloc - push onto a thread rather than IRQ */
+ rpivid_hw_irq_active1_thread(dev, &de->irq_ent,
+ phase1_thread, de);
+ return;
+ }
+
+ /* After the frame-buf is detached it must be returned but from
+ * this point onward (phase2_claimed, cb_phase2) there are no error
+ * paths so the return at the end of cb_phase2 is all that is needed
+ */
+ de->frame_buf = v4l2_m2m_cap_buf_detach(dev->m2m_dev, ctx->fh.m2m_ctx);
+ if (!de->frame_buf) {
+ v4l2_err(&dev->v4l2_dev, "%s: No detached buffer\n", __func__);
+ goto fail;
+ }
+
+ ctx->p2idx =
+ (ctx->p2idx + 1 >= RPIVID_P2BUF_COUNT) ? 0 : ctx->p2idx + 1;
+
+ // Enable the next setup if our Q isn't too big
+ if (atomic_add_return(1, &ctx->p2out) < RPIVID_P2BUF_COUNT) {
+ xtrace_fin(dev, de);
+ v4l2_m2m_buf_done_and_job_finish(dev->m2m_dev, ctx->fh.m2m_ctx,
+ VB2_BUF_STATE_DONE);
+ }
+
+ rpivid_hw_irq_active2_claim(dev, &de->irq_ent, phase2_claimed, de);
+
+ xtrace_ok(dev, de);
+ return;
+
+fail:
+ dec_env_delete(de);
+ xtrace_fin(dev, de);
+ v4l2_m2m_buf_done_and_job_finish(dev->m2m_dev, ctx->fh.m2m_ctx,
+ VB2_BUF_STATE_ERROR);
+ xtrace_fail(dev, de);
+}
+
+static void phase1_claimed(struct rpivid_dev *const dev, void *v)
+{
+ struct rpivid_dec_env *const de = v;
+ struct rpivid_ctx *const ctx = de->ctx;
+
+ const struct rpivid_gptr * const pu_gptr = ctx->pu_bufs + ctx->p2idx;
+ const struct rpivid_gptr * const coeff_gptr = ctx->coeff_bufs +
+ ctx->p2idx;
+
+ xtrace_in(dev, de);
+
+ de->pu_base_vc = pu_gptr->addr;
+ de->pu_stride =
+ ALIGN_DOWN(pu_gptr->size / de->pic_height_in_ctbs_y, 64);
+
+ de->coeff_base_vc = coeff_gptr->addr;
+ de->coeff_stride =
+ ALIGN_DOWN(coeff_gptr->size / de->pic_height_in_ctbs_y, 64);
+
+ apb_write_vc_addr(dev, RPI_PUWBASE, de->pu_base_vc);
+ apb_write_vc_len(dev, RPI_PUWSTRIDE, de->pu_stride);
+ apb_write_vc_addr(dev, RPI_COEFFWBASE, de->coeff_base_vc);
+ apb_write_vc_len(dev, RPI_COEFFWSTRIDE, de->coeff_stride);
+
+ // Trigger command FIFO
+ apb_write(dev, RPI_CFNUM, de->cmd_len);
+
+ // Claim irq
+ rpivid_hw_irq_active1_irq(dev, &de->irq_ent, cb_phase1, de);
+
+ // And start the h/w
+ apb_write_vc_addr_final(dev, RPI_CFBASE, de->cmd_copy_gptr->addr);
+
+ xtrace_ok(dev, de);
+}
+
+static void dec_state_delete(struct rpivid_ctx *const ctx)
+{
+ unsigned int i;
+ struct rpivid_dec_state *const s = ctx->state;
+
+ if (!s)
+ return;
+ ctx->state = NULL;
+
+ free_ps_info(s);
+
+ for (i = 0; i != HEVC_MAX_REFS; ++i)
+ aux_q_release(ctx, &s->ref_aux[i]);
+ aux_q_release(ctx, &s->frame_aux);
+
+ kfree(s);
+}
+
+static void rpivid_h265_stop(struct rpivid_ctx *ctx)
+{
+ struct rpivid_dev *const dev = ctx->dev;
+ unsigned int i;
+
+ v4l2_info(&dev->v4l2_dev, "%s\n", __func__);
+
+ dec_env_uninit(ctx);
+ dec_state_delete(ctx);
+
+ // dec_env & state must be killed before this to release the buffer to
+ // the free pool
+ aux_q_uninit(ctx);
+
+ for (i = 0; i != ARRAY_SIZE(ctx->bitbufs); ++i)
+ gptr_free(dev, ctx->bitbufs + i);
+ for (i = 0; i != ARRAY_SIZE(ctx->cmdbufs); ++i)
+ gptr_free(dev, ctx->cmdbufs + i);
+ for (i = 0; i != ARRAY_SIZE(ctx->pu_bufs); ++i)
+ gptr_free(dev, ctx->pu_bufs + i);
+ for (i = 0; i != ARRAY_SIZE(ctx->coeff_bufs); ++i)
+ gptr_free(dev, ctx->coeff_bufs + i);
+}
+
+static int rpivid_h265_start(struct rpivid_ctx *ctx)
+{
+ struct rpivid_dev *const dev = ctx->dev;
+ unsigned int i;
+
+ unsigned int w = ctx->dst_fmt.width;
+ unsigned int h = ctx->dst_fmt.height;
+ unsigned int wxh;
+ size_t pu_alloc;
+ size_t coeff_alloc;
+
+ // Generate a sanitised WxH for memory alloc
+ // Assume HD if unset
+ if (w == 0)
+ w = 1920;
+ if (w > 4096)
+ w = 4096;
+ if (h == 0)
+ w = 1088;
+ if (h > 4096)
+ h = 4096;
+ wxh = w * h;
+
+ v4l2_info(&dev->v4l2_dev, "%s: (%dx%d)\n", __func__,
+ ctx->dst_fmt.width, ctx->dst_fmt.height);
+
+ ctx->dec0 = NULL;
+ ctx->state = kzalloc(sizeof(*ctx->state), GFP_KERNEL);
+ if (!ctx->state) {
+ v4l2_err(&dev->v4l2_dev, "Failed to allocate decode state\n");
+ goto fail;
+ }
+
+ if (dec_env_init(ctx) != 0) {
+ v4l2_err(&dev->v4l2_dev, "Failed to allocate decode envs\n");
+ goto fail;
+ }
+
+ // 16k is plenty for most purposes but we will realloc if needed
+ for (i = 0; i != ARRAY_SIZE(ctx->cmdbufs); ++i) {
+ if (gptr_alloc(dev, ctx->cmdbufs + i, 0x4000,
+ DMA_ATTR_FORCE_CONTIGUOUS))
+ goto fail;
+ }
+
+ // Finger in the air PU & Coeff alloc
+ // Will be realloced if too small
+ coeff_alloc = round_up_size(wxh);
+ pu_alloc = round_up_size(wxh / 4);
+ for (i = 0; i != ARRAY_SIZE(ctx->pu_bufs); ++i) {
+ // Don't actually need a kernel mapping here
+ if (gptr_alloc(dev, ctx->pu_bufs + i, pu_alloc,
+ DMA_ATTR_FORCE_CONTIGUOUS |
+ DMA_ATTR_NO_KERNEL_MAPPING))
+ goto fail;
+ if (gptr_alloc(dev, ctx->coeff_bufs + i, coeff_alloc,
+ DMA_ATTR_FORCE_CONTIGUOUS |
+ DMA_ATTR_NO_KERNEL_MAPPING))
+ goto fail;
+ }
+ aux_q_init(ctx);
+
+ return 0;
+
+fail:
+ rpivid_h265_stop(ctx);
+ return -ENOMEM;
+}
+
+static void rpivid_h265_trigger(struct rpivid_ctx *ctx)
+{
+ struct rpivid_dev *const dev = ctx->dev;
+ struct rpivid_dec_env *const de = ctx->dec0;
+
+ xtrace_in(dev, de);
+
+ switch (!de ? RPIVID_DECODE_ERROR_CONTINUE : de->state) {
+ case RPIVID_DECODE_SLICE_START:
+ de->state = RPIVID_DECODE_SLICE_CONTINUE;
+ /* FALLTHRU */
+ case RPIVID_DECODE_SLICE_CONTINUE:
+ v4l2_m2m_buf_done_and_job_finish(dev->m2m_dev, ctx->fh.m2m_ctx,
+ VB2_BUF_STATE_DONE);
+ break;
+ default:
+ v4l2_err(&dev->v4l2_dev, "%s: Unexpected state: %d\n", __func__,
+ de->state);
+ /* FALLTHRU */
+ case RPIVID_DECODE_ERROR_DONE:
+ ctx->dec0 = NULL;
+ dec_env_delete(de);
+ /* FALLTHRU */
+ case RPIVID_DECODE_ERROR_CONTINUE:
+ xtrace_fin(dev, de);
+ v4l2_m2m_buf_done_and_job_finish(dev->m2m_dev, ctx->fh.m2m_ctx,
+ VB2_BUF_STATE_ERROR);
+ break;
+ case RPIVID_DECODE_PHASE1:
+ ctx->dec0 = NULL;
+ rpivid_hw_irq_active1_claim(dev, &de->irq_ent, phase1_claimed,
+ de);
+ break;
+ }
+
+ xtrace_ok(dev, de);
+}
+
+struct rpivid_dec_ops rpivid_dec_ops_h265 = {
+ .setup = rpivid_h265_setup,
+ .start = rpivid_h265_start,
+ .stop = rpivid_h265_stop,
+ .trigger = rpivid_h265_trigger,
+};
--- /dev/null
+++ b/drivers/staging/media/rpivid/rpivid_hw.c
@@ -0,0 +1,321 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Raspberry Pi HEVC driver
+ *
+ * Copyright (C) 2020 Raspberry Pi (Trading) Ltd
+ *
+ * Based on the Cedrus VPU driver, that is:
+ *
+ * Copyright (C) 2016 Florent Revest <florent.revest@free-electrons.com>
+ * Copyright (C) 2018 Paul Kocialkowski <paul.kocialkowski@bootlin.com>
+ * Copyright (C) 2018 Bootlin
+ */
+#include <linux/clk.h>
+#include <linux/component.h>
+#include <linux/dma-mapping.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/of_reserved_mem.h>
+#include <linux/of_device.h>
+#include <linux/of_platform.h>
+#include <linux/platform_device.h>
+#include <linux/regmap.h>
+#include <linux/reset.h>
+
+#include <media/videobuf2-core.h>
+#include <media/v4l2-mem2mem.h>
+
+#include "rpivid.h"
+#include "rpivid_hw.h"
+
+static void pre_irq(struct rpivid_dev *dev, struct rpivid_hw_irq_ent *ient,
+ rpivid_irq_callback cb, void *v,
+ struct rpivid_hw_irq_ctrl *ictl)
+{
+ unsigned long flags;
+
+ if (ictl->irq) {
+ v4l2_err(&dev->v4l2_dev, "Attempt to claim IRQ when already claimed\n");
+ return;
+ }
+
+ ient->cb = cb;
+ ient->v = v;
+
+ // Not sure this lock is actually required
+ spin_lock_irqsave(&ictl->lock, flags);
+ ictl->irq = ient;
+ spin_unlock_irqrestore(&ictl->lock, flags);
+}
+
+static void sched_claim(struct rpivid_dev * const dev,
+ struct rpivid_hw_irq_ctrl * const ictl)
+{
+ for (;;) {
+ struct rpivid_hw_irq_ent *ient = NULL;
+ unsigned long flags;
+
+ spin_lock_irqsave(&ictl->lock, flags);
+
+ if (--ictl->no_sched <= 0) {
+ ient = ictl->claim;
+ if (!ictl->irq && ient) {
+ ictl->claim = ient->next;
+ ictl->no_sched = 1;
+ }
+ }
+
+ spin_unlock_irqrestore(&ictl->lock, flags);
+
+ if (!ient)
+ break;
+
+ ient->cb(dev, ient->v);
+ }
+}
+
+/* Should only ever be called from its own IRQ cb so no lock required */
+static void pre_thread(struct rpivid_dev *dev,
+ struct rpivid_hw_irq_ent *ient,
+ rpivid_irq_callback cb, void *v,
+ struct rpivid_hw_irq_ctrl *ictl)
+{
+ ient->cb = cb;
+ ient->v = v;
+ ictl->irq = ient;
+ ictl->thread_reqed = true;
+ ictl->no_sched++;
+}
+
+// Called in irq context
+static void do_irq(struct rpivid_dev * const dev,
+ struct rpivid_hw_irq_ctrl * const ictl)
+{
+ struct rpivid_hw_irq_ent *ient;
+ unsigned long flags;
+
+ spin_lock_irqsave(&ictl->lock, flags);
+ ient = ictl->irq;
+ if (ient) {
+ ictl->no_sched++;
+ ictl->irq = NULL;
+ }
+ spin_unlock_irqrestore(&ictl->lock, flags);
+
+ if (ient) {
+ ient->cb(dev, ient->v);
+
+ sched_claim(dev, ictl);
+ }
+}
+
+static void do_claim(struct rpivid_dev * const dev,
+ struct rpivid_hw_irq_ent *ient,
+ const rpivid_irq_callback cb, void * const v,
+ struct rpivid_hw_irq_ctrl * const ictl)
+{
+ unsigned long flags;
+
+ ient->next = NULL;
+ ient->cb = cb;
+ ient->v = v;
+
+ spin_lock_irqsave(&ictl->lock, flags);
+
+ if (ictl->claim) {
+ // If we have a Q then add to end
+ ictl->tail->next = ient;
+ ictl->tail = ient;
+ ient = NULL;
+ } else if (ictl->no_sched || ictl->irq) {
+ // Empty Q but other activity in progress so Q
+ ictl->claim = ient;
+ ictl->tail = ient;
+ ient = NULL;
+ } else {
+ // Nothing else going on - schedule immediately and
+ // prevent anything else scheduling claims
+ ictl->no_sched = 1;
+ }
+
+ spin_unlock_irqrestore(&ictl->lock, flags);
+
+ if (ient) {
+ ient->cb(dev, ient->v);
+
+ sched_claim(dev, ictl);
+ }
+}
+
+static void ictl_init(struct rpivid_hw_irq_ctrl * const ictl)
+{
+ spin_lock_init(&ictl->lock);
+ ictl->claim = NULL;
+ ictl->tail = NULL;
+ ictl->irq = NULL;
+ ictl->no_sched = 0;
+}
+
+static void ictl_uninit(struct rpivid_hw_irq_ctrl * const ictl)
+{
+ // Nothing to do
+}
+
+#if !OPT_DEBUG_POLL_IRQ
+static irqreturn_t rpivid_irq_irq(int irq, void *data)
+{
+ struct rpivid_dev * const dev = data;
+ __u32 ictrl;
+
+ ictrl = irq_read(dev, ARG_IC_ICTRL);
+ if (!(ictrl & ARG_IC_ICTRL_ALL_IRQ_MASK)) {
+ v4l2_warn(&dev->v4l2_dev, "IRQ but no IRQ bits set\n");
+ return IRQ_NONE;
+ }
+
+ // Cancel any/all irqs
+ irq_write(dev, ARG_IC_ICTRL, ictrl & ~ARG_IC_ICTRL_SET_ZERO_MASK);
+
+ // Service Active2 before Active1 so Phase 1 can transition to Phase 2
+ // without delay
+ if (ictrl & ARG_IC_ICTRL_ACTIVE2_INT_SET)
+ do_irq(dev, &dev->ic_active2);
+ if (ictrl & ARG_IC_ICTRL_ACTIVE1_INT_SET)
+ do_irq(dev, &dev->ic_active1);
+
+ return dev->ic_active1.thread_reqed || dev->ic_active2.thread_reqed ?
+ IRQ_WAKE_THREAD : IRQ_HANDLED;
+}
+
+static void do_thread(struct rpivid_dev * const dev,
+ struct rpivid_hw_irq_ctrl *const ictl)
+{
+ unsigned long flags;
+ struct rpivid_hw_irq_ent *ient = NULL;
+
+ spin_lock_irqsave(&ictl->lock, flags);
+
+ if (ictl->thread_reqed) {
+ ient = ictl->irq;
+ ictl->thread_reqed = false;
+ ictl->irq = NULL;
+ }
+
+ spin_unlock_irqrestore(&ictl->lock, flags);
+
+ if (ient) {
+ ient->cb(dev, ient->v);
+
+ sched_claim(dev, ictl);
+ }
+}
+
+static irqreturn_t rpivid_irq_thread(int irq, void *data)
+{
+ struct rpivid_dev * const dev = data;
+
+ do_thread(dev, &dev->ic_active1);
+ do_thread(dev, &dev->ic_active2);
+
+ return IRQ_HANDLED;
+}
+#endif
+
+/* May only be called from Active1 CB
+ * IRQs should not be expected until execution continues in the cb
+ */
+void rpivid_hw_irq_active1_thread(struct rpivid_dev *dev,
+ struct rpivid_hw_irq_ent *ient,
+ rpivid_irq_callback thread_cb, void *ctx)
+{
+ pre_thread(dev, ient, thread_cb, ctx, &dev->ic_active1);
+}
+
+void rpivid_hw_irq_active1_claim(struct rpivid_dev *dev,
+ struct rpivid_hw_irq_ent *ient,
+ rpivid_irq_callback ready_cb, void *ctx)
+{
+ do_claim(dev, ient, ready_cb, ctx, &dev->ic_active1);
+}
+
+void rpivid_hw_irq_active1_irq(struct rpivid_dev *dev,
+ struct rpivid_hw_irq_ent *ient,
+ rpivid_irq_callback irq_cb, void *ctx)
+{
+ pre_irq(dev, ient, irq_cb, ctx, &dev->ic_active1);
+}
+
+void rpivid_hw_irq_active2_claim(struct rpivid_dev *dev,
+ struct rpivid_hw_irq_ent *ient,
+ rpivid_irq_callback ready_cb, void *ctx)
+{
+ do_claim(dev, ient, ready_cb, ctx, &dev->ic_active2);
+}
+
+void rpivid_hw_irq_active2_irq(struct rpivid_dev *dev,
+ struct rpivid_hw_irq_ent *ient,
+ rpivid_irq_callback irq_cb, void *ctx)
+{
+ pre_irq(dev, ient, irq_cb, ctx, &dev->ic_active2);
+}
+
+int rpivid_hw_probe(struct rpivid_dev *dev)
+{
+ struct resource *res;
+ __u32 irq_stat;
+ int irq_dec;
+ int ret = 0;
+
+ ictl_init(&dev->ic_active1);
+ ictl_init(&dev->ic_active2);
+
+ res = platform_get_resource_byname(dev->pdev, IORESOURCE_MEM, "intc");
+ if (!res)
+ return -ENODEV;
+
+ dev->base_irq = devm_ioremap(dev->dev, res->start, resource_size(res));
+ if (IS_ERR(dev->base_irq))
+ return PTR_ERR(dev->base_irq);
+
+ res = platform_get_resource_byname(dev->pdev, IORESOURCE_MEM, "hevc");
+ if (!res)
+ return -ENODEV;
+
+ dev->base_h265 = devm_ioremap(dev->dev, res->start, resource_size(res));
+ if (IS_ERR(dev->base_h265))
+ return PTR_ERR(dev->base_h265);
+
+ dev->clock = devm_clk_get(&dev->pdev->dev, "hevc");
+ if (IS_ERR(dev->clock))
+ return PTR_ERR(dev->clock);
+
+ // Disable IRQs & reset anything pending
+ irq_write(dev, 0,
+ ARG_IC_ICTRL_ACTIVE1_EN_SET | ARG_IC_ICTRL_ACTIVE2_EN_SET);
+ irq_stat = irq_read(dev, 0);
+ irq_write(dev, 0, irq_stat);
+
+#if !OPT_DEBUG_POLL_IRQ
+ irq_dec = platform_get_irq(dev->pdev, 0);
+ if (irq_dec <= 0)
+ return irq_dec;
+ ret = devm_request_threaded_irq(dev->dev, irq_dec,
+ rpivid_irq_irq,
+ rpivid_irq_thread,
+ 0, dev_name(dev->dev), dev);
+ if (ret) {
+ dev_err(dev->dev, "Failed to request IRQ - %d\n", ret);
+
+ return ret;
+ }
+#endif
+ return ret;
+}
+
+void rpivid_hw_remove(struct rpivid_dev *dev)
+{
+ // IRQ auto freed on unload so no need to do it here
+ ictl_uninit(&dev->ic_active1);
+ ictl_uninit(&dev->ic_active2);
+}
+
--- /dev/null
+++ b/drivers/staging/media/rpivid/rpivid_hw.h
@@ -0,0 +1,300 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Raspberry Pi HEVC driver
+ *
+ * Copyright (C) 2020 Raspberry Pi (Trading) Ltd
+ *
+ * Based on the Cedrus VPU driver, that is:
+ *
+ * Copyright (C) 2016 Florent Revest <florent.revest@free-electrons.com>
+ * Copyright (C) 2018 Paul Kocialkowski <paul.kocialkowski@bootlin.com>
+ * Copyright (C) 2018 Bootlin
+ */
+
+#ifndef _RPIVID_HW_H_
+#define _RPIVID_HW_H_
+
+struct rpivid_hw_irq_ent {
+ struct rpivid_hw_irq_ent *next;
+ rpivid_irq_callback cb;
+ void *v;
+};
+
+/* Phase 1 Register offsets */
+
+#define RPI_SPS0 0
+#define RPI_SPS1 4
+#define RPI_PPS 8
+#define RPI_SLICE 12
+#define RPI_TILESTART 16
+#define RPI_TILEEND 20
+#define RPI_SLICESTART 24
+#define RPI_MODE 28
+#define RPI_LEFT0 32
+#define RPI_LEFT1 36
+#define RPI_LEFT2 40
+#define RPI_LEFT3 44
+#define RPI_QP 48
+#define RPI_CONTROL 52
+#define RPI_STATUS 56
+#define RPI_VERSION 60
+#define RPI_BFBASE 64
+#define RPI_BFNUM 68
+#define RPI_BFCONTROL 72
+#define RPI_BFSTATUS 76
+#define RPI_PUWBASE 80
+#define RPI_PUWSTRIDE 84
+#define RPI_COEFFWBASE 88
+#define RPI_COEFFWSTRIDE 92
+#define RPI_SLICECMDS 96
+#define RPI_BEGINTILEEND 100
+#define RPI_TRANSFER 104
+#define RPI_CFBASE 108
+#define RPI_CFNUM 112
+#define RPI_CFSTATUS 116
+
+/* Phase 2 Register offsets */
+
+#define RPI_PURBASE 0x8000
+#define RPI_PURSTRIDE 0x8004
+#define RPI_COEFFRBASE 0x8008
+#define RPI_COEFFRSTRIDE 0x800C
+#define RPI_NUMROWS 0x8010
+#define RPI_CONFIG2 0x8014
+#define RPI_OUTYBASE 0x8018
+#define RPI_OUTYSTRIDE 0x801C
+#define RPI_OUTCBASE 0x8020
+#define RPI_OUTCSTRIDE 0x8024
+#define RPI_STATUS2 0x8028
+#define RPI_FRAMESIZE 0x802C
+#define RPI_MVBASE 0x8030
+#define RPI_MVSTRIDE 0x8034
+#define RPI_COLBASE 0x8038
+#define RPI_COLSTRIDE 0x803C
+#define RPI_CURRPOC 0x8040
+
+/*
+ * Write a general register value
+ * Order is unimportant
+ */
+static inline void apb_write(const struct rpivid_dev * const dev,
+ const unsigned int offset, const u32 val)
+{
+ writel_relaxed(val, dev->base_h265 + offset);
+}
+
+/* Write the final register value that actually starts the phase */
+static inline void apb_write_final(const struct rpivid_dev * const dev,
+ const unsigned int offset, const u32 val)
+{
+ writel(val, dev->base_h265 + offset);
+}
+
+static inline u32 apb_read(const struct rpivid_dev * const dev,
+ const unsigned int offset)
+{
+ return readl(dev->base_h265 + offset);
+}
+
+static inline void irq_write(const struct rpivid_dev * const dev,
+ const unsigned int offset, const u32 val)
+{
+ writel(val, dev->base_irq + offset);
+}
+
+static inline u32 irq_read(const struct rpivid_dev * const dev,
+ const unsigned int offset)
+{
+ return readl(dev->base_irq + offset);
+}
+
+static inline void apb_write_vc_addr(const struct rpivid_dev * const dev,
+ const unsigned int offset,
+ const dma_addr_t a)
+{
+ apb_write(dev, offset, (u32)(a >> 6));
+}
+
+static inline void apb_write_vc_addr_final(const struct rpivid_dev * const dev,
+ const unsigned int offset,
+ const dma_addr_t a)
+{
+ apb_write_final(dev, offset, (u32)(a >> 6));
+}
+
+static inline void apb_write_vc_len(const struct rpivid_dev * const dev,
+ const unsigned int offset,
+ const unsigned int x)
+{
+ apb_write(dev, offset, (x + 63) >> 6);
+}
+
+/* *ARG_IC_ICTRL - Interrupt control for ARGON Core*
+ * Offset (byte space) = 40'h2b10000
+ * Physical Address (byte space) = 40'h7eb10000
+ * Verilog Macro Address = `ARG_IC_REG_START + `ARGON_INTCTRL_ICTRL
+ * Reset Value = 32'b100x100x_100xxxxx_xxxxxxx0_x100x100
+ * Access = RW (32-bit only)
+ * Interrupt control logic for ARGON Core.
+ */
+#define ARG_IC_ICTRL 0
+
+/* acc=LWC ACTIVE1_INT FIELD ACCESS: LWC
+ *
+ * Interrupt 1
+ * This is set and held when an hevc_active1 interrupt edge is detected
+ * The polarity of the edge is set by the ACTIVE1_EDGE field
+ * Write a 1 to this bit to clear down the latched interrupt
+ * The latched interrupt is only enabled out onto the interrupt line if
+ * ACTIVE1_EN is set
+ * Reset value is *0* decimal.
+ */
+#define ARG_IC_ICTRL_ACTIVE1_INT_SET BIT(0)
+
+/* ACTIVE1_EDGE Sets the polarity of the interrupt edge detection logic
+ * This logic detects edges of the hevc_active1 line from the argon core
+ * 0 = negedge, 1 = posedge
+ * Reset value is *0* decimal.
+ */
+#define ARG_IC_ICTRL_ACTIVE1_EDGE_SET BIT(1)
+
+/* ACTIVE1_EN Enables ACTIVE1_INT out onto the argon interrupt line.
+ * If this isn't set, the interrupt logic will work but no interrupt will be
+ * set to the interrupt controller
+ * Reset value is *1* decimal.
+ *
+ * [JC] The above appears to be a lie - if unset then b0 is never set
+ */
+#define ARG_IC_ICTRL_ACTIVE1_EN_SET BIT(2)
+
+/* acc=RO ACTIVE1_STATUS FIELD ACCESS: RO
+ *
+ * The current status of the hevc_active1 signal
+ */
+#define ARG_IC_ICTRL_ACTIVE1_STATUS_SET BIT(3)
+
+/* acc=LWC ACTIVE2_INT FIELD ACCESS: LWC
+ *
+ * Interrupt 2
+ * This is set and held when an hevc_active2 interrupt edge is detected
+ * The polarity of the edge is set by the ACTIVE2_EDGE field
+ * Write a 1 to this bit to clear down the latched interrupt
+ * The latched interrupt is only enabled out onto the interrupt line if
+ * ACTIVE2_EN is set
+ * Reset value is *0* decimal.
+ */
+#define ARG_IC_ICTRL_ACTIVE2_INT_SET BIT(4)
+
+/* ACTIVE2_EDGE Sets the polarity of the interrupt edge detection logic
+ * This logic detects edges of the hevc_active2 line from the argon core
+ * 0 = negedge, 1 = posedge
+ * Reset value is *0* decimal.
+ */
+#define ARG_IC_ICTRL_ACTIVE2_EDGE_SET BIT(5)
+
+/* ACTIVE2_EN Enables ACTIVE2_INT out onto the argon interrupt line.
+ * If this isn't set, the interrupt logic will work but no interrupt will be
+ * set to the interrupt controller
+ * Reset value is *1* decimal.
+ */
+#define ARG_IC_ICTRL_ACTIVE2_EN_SET BIT(6)
+
+/* acc=RO ACTIVE2_STATUS FIELD ACCESS: RO
+ *
+ * The current status of the hevc_active2 signal
+ */
+#define ARG_IC_ICTRL_ACTIVE2_STATUS_SET BIT(7)
+
+/* TEST_INT Forces the argon int high for test purposes.
+ * Reset value is *0* decimal.
+ */
+#define ARG_IC_ICTRL_TEST_INT BIT(8)
+#define ARG_IC_ICTRL_SPARE BIT(9)
+
+/* acc=RO VP9_INTERRUPT_STATUS FIELD ACCESS: RO
+ *
+ * The current status of the vp9_interrupt signal
+ */
+#define ARG_IC_ICTRL_VP9_INTERRUPT_STATUS BIT(10)
+
+/* AIO_INT_ENABLE 1 = Or the AIO int in with the Argon int so the VPU can see
+ * it
+ * 0 = the AIO int is masked. (It should still be connected to the GIC though).
+ */
+#define ARG_IC_ICTRL_AIO_INT_ENABLE BIT(20)
+#define ARG_IC_ICTRL_H264_ACTIVE_INT BIT(21)
+#define ARG_IC_ICTRL_H264_ACTIVE_EDGE BIT(22)
+#define ARG_IC_ICTRL_H264_ACTIVE_EN BIT(23)
+#define ARG_IC_ICTRL_H264_ACTIVE_STATUS BIT(24)
+#define ARG_IC_ICTRL_H264_INTERRUPT_INT BIT(25)
+#define ARG_IC_ICTRL_H264_INTERRUPT_EDGE BIT(26)
+#define ARG_IC_ICTRL_H264_INTERRUPT_EN BIT(27)
+
+/* acc=RO H264_INTERRUPT_STATUS FIELD ACCESS: RO
+ *
+ * The current status of the h264_interrupt signal
+ */
+#define ARG_IC_ICTRL_H264_INTERRUPT_STATUS BIT(28)
+
+/* acc=LWC VP9_INTERRUPT_INT FIELD ACCESS: LWC
+ *
+ * Interrupt 1
+ * This is set and held when an vp9_interrupt interrupt edge is detected
+ * The polarity of the edge is set by the VP9_INTERRUPT_EDGE field
+ * Write a 1 to this bit to clear down the latched interrupt
+ * The latched interrupt is only enabled out onto the interrupt line if
+ * VP9_INTERRUPT_EN is set
+ * Reset value is *0* decimal.
+ */
+#define ARG_IC_ICTRL_VP9_INTERRUPT_INT BIT(29)
+
+/* VP9_INTERRUPT_EDGE Sets the polarity of the interrupt edge detection logic
+ * This logic detects edges of the vp9_interrupt line from the argon h264 core
+ * 0 = negedge, 1 = posedge
+ * Reset value is *0* decimal.
+ */
+#define ARG_IC_ICTRL_VP9_INTERRUPT_EDGE BIT(30)
+
+/* VP9_INTERRUPT_EN Enables VP9_INTERRUPT_INT out onto the argon interrupt line.
+ * If this isn't set, the interrupt logic will work but no interrupt will be
+ * set to the interrupt controller
+ * Reset value is *1* decimal.
+ */
+#define ARG_IC_ICTRL_VP9_INTERRUPT_EN BIT(31)
+
+/* Bits 19:12, 11 reserved - read ?, write 0 */
+#define ARG_IC_ICTRL_SET_ZERO_MASK ((0xff << 12) | BIT(11))
+
+/* All IRQ bits */
+#define ARG_IC_ICTRL_ALL_IRQ_MASK (\
+ ARG_IC_ICTRL_VP9_INTERRUPT_INT |\
+ ARG_IC_ICTRL_H264_INTERRUPT_INT |\
+ ARG_IC_ICTRL_ACTIVE1_INT_SET |\
+ ARG_IC_ICTRL_ACTIVE2_INT_SET)
+
+/* Auto release once all CBs called */
+void rpivid_hw_irq_active1_claim(struct rpivid_dev *dev,
+ struct rpivid_hw_irq_ent *ient,
+ rpivid_irq_callback ready_cb, void *ctx);
+/* May only be called in claim cb */
+void rpivid_hw_irq_active1_irq(struct rpivid_dev *dev,
+ struct rpivid_hw_irq_ent *ient,
+ rpivid_irq_callback irq_cb, void *ctx);
+/* May only be called in irq cb */
+void rpivid_hw_irq_active1_thread(struct rpivid_dev *dev,
+ struct rpivid_hw_irq_ent *ient,
+ rpivid_irq_callback thread_cb, void *ctx);
+
+/* Auto release once all CBs called */
+void rpivid_hw_irq_active2_claim(struct rpivid_dev *dev,
+ struct rpivid_hw_irq_ent *ient,
+ rpivid_irq_callback ready_cb, void *ctx);
+/* May only be called in claim cb */
+void rpivid_hw_irq_active2_irq(struct rpivid_dev *dev,
+ struct rpivid_hw_irq_ent *ient,
+ rpivid_irq_callback irq_cb, void *ctx);
+
+int rpivid_hw_probe(struct rpivid_dev *dev);
+void rpivid_hw_remove(struct rpivid_dev *dev);
+
+#endif
--- /dev/null
+++ b/drivers/staging/media/rpivid/rpivid_video.c
@@ -0,0 +1,593 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Raspberry Pi HEVC driver
+ *
+ * Copyright (C) 2020 Raspberry Pi (Trading) Ltd
+ *
+ * Based on the Cedrus VPU driver, that is:
+ *
+ * Copyright (C) 2016 Florent Revest <florent.revest@free-electrons.com>
+ * Copyright (C) 2018 Paul Kocialkowski <paul.kocialkowski@bootlin.com>
+ * Copyright (C) 2018 Bootlin
+ */
+
+#include <media/videobuf2-dma-contig.h>
+#include <media/v4l2-device.h>
+#include <media/v4l2-ioctl.h>
+#include <media/v4l2-event.h>
+#include <media/v4l2-mem2mem.h>
+
+#include "rpivid.h"
+#include "rpivid_video.h"
+#include "rpivid_dec.h"
+
+#define RPIVID_DECODE_SRC BIT(0)
+#define RPIVID_DECODE_DST BIT(1)
+
+#define RPIVID_MIN_WIDTH 16U
+#define RPIVID_MIN_HEIGHT 16U
+#define RPIVID_MAX_WIDTH 4096U
+#define RPIVID_MAX_HEIGHT 4096U
+
+static inline struct rpivid_ctx *rpivid_file2ctx(struct file *file)
+{
+ return container_of(file->private_data, struct rpivid_ctx, fh);
+}
+
+/* constrain x to y,y*2 */
+static inline unsigned int constrain2x(unsigned int x, unsigned int y)
+{
+ return (x < y) ?
+ y :
+ (x > y * 2) ? y : x;
+}
+
+int rpivid_prepare_src_format(struct v4l2_pix_format *pix_fmt)
+{
+ if (pix_fmt->pixelformat != V4L2_PIX_FMT_HEVC_SLICE)
+ return -EINVAL;
+
+ /* Zero bytes per line for encoded source. */
+ pix_fmt->bytesperline = 0;
+ /* Choose some minimum size since this can't be 0 */
+ pix_fmt->sizeimage = max_t(u32, SZ_1K, pix_fmt->sizeimage);
+ pix_fmt->field = V4L2_FIELD_NONE;
+ return 0;
+}
+
+int rpivid_prepare_dst_format(struct v4l2_pix_format *pix_fmt)
+{
+ unsigned int width = pix_fmt->width;
+ unsigned int height = pix_fmt->height;
+ unsigned int sizeimage = pix_fmt->sizeimage;
+ unsigned int bytesperline = pix_fmt->bytesperline;
+
+ switch (pix_fmt->pixelformat) {
+ /* For column formats set bytesperline to column height (stride2) */
+ case V4L2_PIX_FMT_NV12_COL128:
+ /* Width rounds up to columns */
+ width = ALIGN(min(width, RPIVID_MAX_WIDTH), 128);
+
+ /* 16 aligned height - not sure we even need that */
+ height = ALIGN(height, 16);
+ /* column height
+ * Accept suggested shape if at least min & < 2 * min
+ */
+ bytesperline = constrain2x(bytesperline, height * 3 / 2);
+
+ /* image size
+ * Again allow plausible variation in case added padding is
+ * required
+ */
+ sizeimage = constrain2x(sizeimage, bytesperline * width);
+ break;
+
+ case V4L2_PIX_FMT_NV12_10_COL128:
+ /* width in pixels (3 pels = 4 bytes) rounded to 128 byte
+ * columns
+ */
+ width = ALIGN(((min(width, RPIVID_MAX_WIDTH) + 2) / 3), 32) * 3;
+
+ /* 16-aligned height. */
+ height = ALIGN(height, 16);
+
+ /* column height
+ * Accept suggested shape if at least min & < 2 * min
+ */
+ bytesperline = constrain2x(bytesperline, height * 3 / 2);
+
+ /* image size
+ * Again allow plausible variation in case added padding is
+ * required
+ */
+ sizeimage = constrain2x(sizeimage,
+ bytesperline * width * 4 / 3);
+ break;
+
+ default:
+ return -EINVAL;
+ }
+
+ pix_fmt->width = width;
+ pix_fmt->height = height;
+
+ pix_fmt->field = V4L2_FIELD_NONE;
+ pix_fmt->bytesperline = bytesperline;
+ pix_fmt->sizeimage = sizeimage;
+ return 0;
+}
+
+static int rpivid_querycap(struct file *file, void *priv,
+ struct v4l2_capability *cap)
+{
+ strscpy(cap->driver, RPIVID_NAME, sizeof(cap->driver));
+ strscpy(cap->card, RPIVID_NAME, sizeof(cap->card));
+ snprintf(cap->bus_info, sizeof(cap->bus_info),
+ "platform:%s", RPIVID_NAME);
+
+ return 0;
+}
+
+static int rpivid_enum_fmt_vid_out(struct file *file, void *priv,
+ struct v4l2_fmtdesc *f)
+{
+ // Input formats
+
+ // H.265 Slice only currently
+ if (f->index == 0) {
+ f->pixelformat = V4L2_PIX_FMT_HEVC_SLICE;
+ return 0;
+ }
+
+ return -EINVAL;
+}
+
+static int rpivid_hevc_validate_sps(const struct v4l2_ctrl_hevc_sps * const sps)
+{
+ const unsigned int ctb_log2_size_y =
+ sps->log2_min_luma_coding_block_size_minus3 + 3 +
+ sps->log2_diff_max_min_luma_coding_block_size;
+ const unsigned int min_tb_log2_size_y =
+ sps->log2_min_luma_transform_block_size_minus2 + 2;
+ const unsigned int max_tb_log2_size_y = min_tb_log2_size_y +
+ sps->log2_diff_max_min_luma_transform_block_size;
+
+ /* Local limitations */
+ if (sps->pic_width_in_luma_samples < 32 ||
+ sps->pic_width_in_luma_samples > 4096)
+ return 0;
+ if (sps->pic_height_in_luma_samples < 32 ||
+ sps->pic_height_in_luma_samples > 4096)
+ return 0;
+ if (!(sps->bit_depth_luma_minus8 == 0 ||
+ sps->bit_depth_luma_minus8 == 2))
+ return 0;
+ if (sps->bit_depth_luma_minus8 != sps->bit_depth_chroma_minus8)
+ return 0;
+ if (sps->chroma_format_idc != 1)
+ return 0;
+
+ /* Limits from H.265 7.4.3.2.1 */
+ if (sps->log2_max_pic_order_cnt_lsb_minus4 > 12)
+ return 0;
+ if (sps->sps_max_dec_pic_buffering_minus1 > 15)
+ return 0;
+ if (sps->sps_max_num_reorder_pics >
+ sps->sps_max_dec_pic_buffering_minus1)
+ return 0;
+ if (ctb_log2_size_y > 6)
+ return 0;
+ if (max_tb_log2_size_y > 5)
+ return 0;
+ if (max_tb_log2_size_y > ctb_log2_size_y)
+ return 0;
+ if (sps->max_transform_hierarchy_depth_inter >
+ (ctb_log2_size_y - min_tb_log2_size_y))
+ return 0;
+ if (sps->max_transform_hierarchy_depth_intra >
+ (ctb_log2_size_y - min_tb_log2_size_y))
+ return 0;
+ /* Check pcm stuff */
+ if (sps->num_short_term_ref_pic_sets > 64)
+ return 0;
+ if (sps->num_long_term_ref_pics_sps > 32)
+ return 0;
+ return 1;
+}
+
+static inline int is_sps_set(const struct v4l2_ctrl_hevc_sps * const sps)
+{
+ return sps && sps->pic_width_in_luma_samples != 0;
+}
+
+static u32 pixelformat_from_sps(const struct v4l2_ctrl_hevc_sps * const sps,
+ const int index)
+{
+ u32 pf = 0;
+
+ // Use width 0 as a signifier of unsetness
+ if (!is_sps_set(sps)) {
+ /* Treat this as an error? For now return both */
+ if (index == 0)
+ pf = V4L2_PIX_FMT_NV12_COL128;
+ else if (index == 1)
+ pf = V4L2_PIX_FMT_NV12_10_COL128;
+ } else if (index == 0 && rpivid_hevc_validate_sps(sps)) {
+ if (sps->bit_depth_luma_minus8 == 0)
+ pf = V4L2_PIX_FMT_NV12_COL128;
+ else if (sps->bit_depth_luma_minus8 == 2)
+ pf = V4L2_PIX_FMT_NV12_10_COL128;
+ }
+
+ return pf;
+}
+
+static struct v4l2_pix_format
+rpivid_hevc_default_dst_fmt(struct rpivid_ctx * const ctx)
+{
+ const struct v4l2_ctrl_hevc_sps * const sps =
+ rpivid_find_control_data(ctx, V4L2_CID_MPEG_VIDEO_HEVC_SPS);
+ struct v4l2_pix_format pix_fmt = {
+ .width = sps->pic_width_in_luma_samples,
+ .height = sps->pic_height_in_luma_samples,
+ .pixelformat = pixelformat_from_sps(sps, 0)
+ };
+
+ rpivid_prepare_dst_format(&pix_fmt);
+ return pix_fmt;
+}
+
+static u32 rpivid_hevc_get_dst_pixelformat(struct rpivid_ctx * const ctx,
+ const int index)
+{
+ const struct v4l2_ctrl_hevc_sps * const sps =
+ rpivid_find_control_data(ctx, V4L2_CID_MPEG_VIDEO_HEVC_SPS);
+
+ return pixelformat_from_sps(sps, index);
+}
+
+static int rpivid_enum_fmt_vid_cap(struct file *file, void *priv,
+ struct v4l2_fmtdesc *f)
+{
+ struct rpivid_ctx * const ctx = rpivid_file2ctx(file);
+
+ const u32 pf = rpivid_hevc_get_dst_pixelformat(ctx, f->index);
+
+ if (pf == 0)
+ return -EINVAL;
+
+ f->pixelformat = pf;
+ return 0;
+}
+
+static int rpivid_g_fmt_vid_cap(struct file *file, void *priv,
+ struct v4l2_format *f)
+{
+ struct rpivid_ctx *ctx = rpivid_file2ctx(file);
+
+ if (!ctx->dst_fmt_set)
+ ctx->dst_fmt = rpivid_hevc_default_dst_fmt(ctx);
+ f->fmt.pix = ctx->dst_fmt;
+ return 0;
+}
+
+static int rpivid_g_fmt_vid_out(struct file *file, void *priv,
+ struct v4l2_format *f)
+{
+ struct rpivid_ctx *ctx = rpivid_file2ctx(file);
+
+ f->fmt.pix = ctx->src_fmt;
+ return 0;
+}
+
+static inline void copy_color(struct v4l2_pix_format *d,
+ const struct v4l2_pix_format *s)
+{
+ d->colorspace = s->colorspace;
+ d->xfer_func = s->xfer_func;
+ d->ycbcr_enc = s->ycbcr_enc;
+ d->quantization = s->quantization;
+}
+
+static int rpivid_try_fmt_vid_cap(struct file *file, void *priv,
+ struct v4l2_format *f)
+{
+ struct rpivid_ctx *ctx = rpivid_file2ctx(file);
+ const struct v4l2_ctrl_hevc_sps * const sps =
+ rpivid_find_control_data(ctx, V4L2_CID_MPEG_VIDEO_HEVC_SPS);
+ u32 pixelformat;
+ int i;
+
+ /* Reject format types we don't support */
+ if (f->type != V4L2_BUF_TYPE_VIDEO_CAPTURE)
+ return -EINVAL;
+
+ for (i = 0; (pixelformat = pixelformat_from_sps(sps, i)) != 0; i++) {
+ if (f->fmt.pix.pixelformat == pixelformat)
+ break;
+ }
+
+ // If we can't use requested fmt then set to default
+ if (pixelformat == 0) {
+ pixelformat = pixelformat_from_sps(sps, 0);
+ // If we don't have a default then give up
+ if (pixelformat == 0)
+ return -EINVAL;
+ }
+
+ // We don't have any way of finding out colourspace so believe
+ // anything we are told - take anything set in src as a default
+ if (f->fmt.pix.colorspace == V4L2_COLORSPACE_DEFAULT)
+ copy_color(&f->fmt.pix, &ctx->src_fmt);
+
+ f->fmt.pix.pixelformat = pixelformat;
+ return rpivid_prepare_dst_format(&f->fmt.pix);
+}
+
+static int rpivid_try_fmt_vid_out(struct file *file, void *priv,
+ struct v4l2_format *f)
+{
+ if (f->type != V4L2_BUF_TYPE_VIDEO_OUTPUT)
+ return -EINVAL;
+
+ if (rpivid_prepare_src_format(&f->fmt.pix)) {
+ // Set default src format
+ f->fmt.pix.pixelformat = RPIVID_SRC_PIXELFORMAT_DEFAULT;
+ rpivid_prepare_src_format(&f->fmt.pix);
+ }
+ return 0;
+}
+
+static int rpivid_s_fmt_vid_cap(struct file *file, void *priv,
+ struct v4l2_format *f)
+{
+ struct rpivid_ctx *ctx = rpivid_file2ctx(file);
+ struct vb2_queue *vq;
+ int ret;
+
+ vq = v4l2_m2m_get_vq(ctx->fh.m2m_ctx, f->type);
+ if (vb2_is_busy(vq))
+ return -EBUSY;
+
+ ret = rpivid_try_fmt_vid_cap(file, priv, f);
+ if (ret)
+ return ret;
+
+ ctx->dst_fmt = f->fmt.pix;
+ ctx->dst_fmt_set = 1;
+
+ return 0;
+}
+
+static int rpivid_s_fmt_vid_out(struct file *file, void *priv,
+ struct v4l2_format *f)
+{
+ struct rpivid_ctx *ctx = rpivid_file2ctx(file);
+ struct vb2_queue *vq;
+ int ret;
+
+ vq = v4l2_m2m_get_vq(ctx->fh.m2m_ctx, f->type);
+ if (vb2_is_busy(vq))
+ return -EBUSY;
+
+ ret = rpivid_try_fmt_vid_out(file, priv, f);
+ if (ret)
+ return ret;
+
+ ctx->src_fmt = f->fmt.pix;
+ ctx->dst_fmt_set = 0; // Setting src invalidates dst
+
+ vq->subsystem_flags |=
+ VB2_V4L2_FL_SUPPORTS_M2M_HOLD_CAPTURE_BUF;
+
+ /* Propagate colorspace information to capture. */
+ copy_color(&ctx->dst_fmt, &f->fmt.pix);
+ return 0;
+}
+
+const struct v4l2_ioctl_ops rpivid_ioctl_ops = {
+ .vidioc_querycap = rpivid_querycap,
+
+ .vidioc_enum_fmt_vid_cap = rpivid_enum_fmt_vid_cap,
+ .vidioc_g_fmt_vid_cap = rpivid_g_fmt_vid_cap,
+ .vidioc_try_fmt_vid_cap = rpivid_try_fmt_vid_cap,
+ .vidioc_s_fmt_vid_cap = rpivid_s_fmt_vid_cap,
+
+ .vidioc_enum_fmt_vid_out = rpivid_enum_fmt_vid_out,
+ .vidioc_g_fmt_vid_out = rpivid_g_fmt_vid_out,
+ .vidioc_try_fmt_vid_out = rpivid_try_fmt_vid_out,
+ .vidioc_s_fmt_vid_out = rpivid_s_fmt_vid_out,
+
+ .vidioc_reqbufs = v4l2_m2m_ioctl_reqbufs,
+ .vidioc_querybuf = v4l2_m2m_ioctl_querybuf,
+ .vidioc_qbuf = v4l2_m2m_ioctl_qbuf,
+ .vidioc_dqbuf = v4l2_m2m_ioctl_dqbuf,
+ .vidioc_prepare_buf = v4l2_m2m_ioctl_prepare_buf,
+ .vidioc_create_bufs = v4l2_m2m_ioctl_create_bufs,
+ .vidioc_expbuf = v4l2_m2m_ioctl_expbuf,
+
+ .vidioc_streamon = v4l2_m2m_ioctl_streamon,
+ .vidioc_streamoff = v4l2_m2m_ioctl_streamoff,
+
+ .vidioc_try_decoder_cmd = v4l2_m2m_ioctl_stateless_try_decoder_cmd,
+ .vidioc_decoder_cmd = v4l2_m2m_ioctl_stateless_decoder_cmd,
+
+ .vidioc_subscribe_event = v4l2_ctrl_subscribe_event,
+ .vidioc_unsubscribe_event = v4l2_event_unsubscribe,
+};
+
+static int rpivid_queue_setup(struct vb2_queue *vq, unsigned int *nbufs,
+ unsigned int *nplanes, unsigned int sizes[],
+ struct device *alloc_devs[])
+{
+ struct rpivid_ctx *ctx = vb2_get_drv_priv(vq);
+ struct v4l2_pix_format *pix_fmt;
+
+ if (V4L2_TYPE_IS_OUTPUT(vq->type))
+ pix_fmt = &ctx->src_fmt;
+ else
+ pix_fmt = &ctx->dst_fmt;
+
+ if (*nplanes) {
+ if (sizes[0] < pix_fmt->sizeimage)
+ return -EINVAL;
+ } else {
+ sizes[0] = pix_fmt->sizeimage;
+ *nplanes = 1;
+ }
+
+ return 0;
+}
+
+static void rpivid_queue_cleanup(struct vb2_queue *vq, u32 state)
+{
+ struct rpivid_ctx *ctx = vb2_get_drv_priv(vq);
+ struct vb2_v4l2_buffer *vbuf;
+
+ for (;;) {
+ if (V4L2_TYPE_IS_OUTPUT(vq->type))
+ vbuf = v4l2_m2m_src_buf_remove(ctx->fh.m2m_ctx);
+ else
+ vbuf = v4l2_m2m_dst_buf_remove(ctx->fh.m2m_ctx);
+
+ if (!vbuf)
+ return;
+
+ v4l2_ctrl_request_complete(vbuf->vb2_buf.req_obj.req,
+ &ctx->hdl);
+ v4l2_m2m_buf_done(vbuf, state);
+ }
+}
+
+static int rpivid_buf_out_validate(struct vb2_buffer *vb)
+{
+ struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb);
+
+ vbuf->field = V4L2_FIELD_NONE;
+ return 0;
+}
+
+static int rpivid_buf_prepare(struct vb2_buffer *vb)
+{
+ struct vb2_queue *vq = vb->vb2_queue;
+ struct rpivid_ctx *ctx = vb2_get_drv_priv(vq);
+ struct v4l2_pix_format *pix_fmt;
+
+ if (V4L2_TYPE_IS_OUTPUT(vq->type))
+ pix_fmt = &ctx->src_fmt;
+ else
+ pix_fmt = &ctx->dst_fmt;
+
+ if (vb2_plane_size(vb, 0) < pix_fmt->sizeimage)
+ return -EINVAL;
+
+ vb2_set_plane_payload(vb, 0, pix_fmt->sizeimage);
+
+ return 0;
+}
+
+static int rpivid_start_streaming(struct vb2_queue *vq, unsigned int count)
+{
+ struct rpivid_ctx *ctx = vb2_get_drv_priv(vq);
+ struct rpivid_dev *dev = ctx->dev;
+ int ret = 0;
+
+ if (ctx->src_fmt.pixelformat != V4L2_PIX_FMT_HEVC_SLICE)
+ return -EINVAL;
+
+ if (V4L2_TYPE_IS_OUTPUT(vq->type) && dev->dec_ops->start)
+ ret = dev->dec_ops->start(ctx);
+
+ ret = clk_set_rate(dev->clock, 500 * 1000 * 1000);
+ if (ret) {
+ dev_err(dev->dev, "Failed to set clock rate\n");
+ goto out;
+ }
+
+ ret = clk_prepare_enable(dev->clock);
+ if (ret)
+ dev_err(dev->dev, "Failed to enable clock\n");
+
+out:
+ if (ret)
+ rpivid_queue_cleanup(vq, VB2_BUF_STATE_QUEUED);
+
+ return ret;
+}
+
+static void rpivid_stop_streaming(struct vb2_queue *vq)
+{
+ struct rpivid_ctx *ctx = vb2_get_drv_priv(vq);
+ struct rpivid_dev *dev = ctx->dev;
+
+ if (V4L2_TYPE_IS_OUTPUT(vq->type) && dev->dec_ops->stop)
+ dev->dec_ops->stop(ctx);
+
+ rpivid_queue_cleanup(vq, VB2_BUF_STATE_ERROR);
+
+ clk_disable_unprepare(dev->clock);
+}
+
+static void rpivid_buf_queue(struct vb2_buffer *vb)
+{
+ struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb);
+ struct rpivid_ctx *ctx = vb2_get_drv_priv(vb->vb2_queue);
+
+ v4l2_m2m_buf_queue(ctx->fh.m2m_ctx, vbuf);
+}
+
+static void rpivid_buf_request_complete(struct vb2_buffer *vb)
+{
+ struct rpivid_ctx *ctx = vb2_get_drv_priv(vb->vb2_queue);
+
+ v4l2_ctrl_request_complete(vb->req_obj.req, &ctx->hdl);
+}
+
+static struct vb2_ops rpivid_qops = {
+ .queue_setup = rpivid_queue_setup,
+ .buf_prepare = rpivid_buf_prepare,
+ .buf_queue = rpivid_buf_queue,
+ .buf_out_validate = rpivid_buf_out_validate,
+ .buf_request_complete = rpivid_buf_request_complete,
+ .start_streaming = rpivid_start_streaming,
+ .stop_streaming = rpivid_stop_streaming,
+ .wait_prepare = vb2_ops_wait_prepare,
+ .wait_finish = vb2_ops_wait_finish,
+};
+
+int rpivid_queue_init(void *priv, struct vb2_queue *src_vq,
+ struct vb2_queue *dst_vq)
+{
+ struct rpivid_ctx *ctx = priv;
+ int ret;
+
+ src_vq->type = V4L2_BUF_TYPE_VIDEO_OUTPUT;
+ src_vq->io_modes = VB2_MMAP | VB2_DMABUF;
+ src_vq->drv_priv = ctx;
+ src_vq->buf_struct_size = sizeof(struct rpivid_buffer);
+ src_vq->min_buffers_needed = 1;
+ src_vq->ops = &rpivid_qops;
+ src_vq->mem_ops = &vb2_dma_contig_memops;
+ src_vq->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_COPY;
+ src_vq->lock = &ctx->dev->dev_mutex;
+ src_vq->dev = ctx->dev->dev;
+ src_vq->supports_requests = true;
+ src_vq->requires_requests = true;
+
+ ret = vb2_queue_init(src_vq);
+ if (ret)
+ return ret;
+
+ dst_vq->type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
+ dst_vq->io_modes = VB2_MMAP | VB2_DMABUF;
+ dst_vq->drv_priv = ctx;
+ dst_vq->buf_struct_size = sizeof(struct rpivid_buffer);
+ dst_vq->min_buffers_needed = 1;
+ dst_vq->ops = &rpivid_qops;
+ dst_vq->mem_ops = &vb2_dma_contig_memops;
+ dst_vq->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_COPY;
+ dst_vq->lock = &ctx->dev->dev_mutex;
+ dst_vq->dev = ctx->dev->dev;
+
+ return vb2_queue_init(dst_vq);
+}
--- /dev/null
+++ b/drivers/staging/media/rpivid/rpivid_video.h
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Raspberry Pi HEVC driver
+ *
+ * Copyright (C) 2020 Raspberry Pi (Trading) Ltd
+ *
+ * Based on the Cedrus VPU driver, that is:
+ *
+ * Copyright (C) 2016 Florent Revest <florent.revest@free-electrons.com>
+ * Copyright (C) 2018 Paul Kocialkowski <paul.kocialkowski@bootlin.com>
+ * Copyright (C) 2018 Bootlin
+ */
+
+#ifndef _RPIVID_VIDEO_H_
+#define _RPIVID_VIDEO_H_
+
+struct rpivid_format {
+ u32 pixelformat;
+ u32 directions;
+ unsigned int capabilities;
+};
+
+extern const struct v4l2_ioctl_ops rpivid_ioctl_ops;
+
+int rpivid_queue_init(void *priv, struct vb2_queue *src_vq,
+ struct vb2_queue *dst_vq);
+int rpivid_prepare_src_format(struct v4l2_pix_format *pix_fmt);
+int rpivid_prepare_dst_format(struct v4l2_pix_format *pix_fmt);
+
+#endif